1
2
3
4
5
6
7
8
9
10
11
12#include "ext4_jbd2.h"
13#include "mballoc.h"
14#include <linux/log2.h>
15#include <linux/module.h>
16#include <linux/slab.h>
17#include <linux/nospec.h>
18#include <linux/backing-dev.h>
19#include <trace/events/ext4.h>
20
21#ifdef CONFIG_EXT4_DEBUG
22ushort ext4_mballoc_debug __read_mostly;
23
24module_param_named(mballoc_debug, ext4_mballoc_debug, ushort, 0644);
25MODULE_PARM_DESC(mballoc_debug, "Debugging level for ext4's mballoc");
26#endif
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339static struct kmem_cache *ext4_pspace_cachep;
340static struct kmem_cache *ext4_ac_cachep;
341static struct kmem_cache *ext4_free_data_cachep;
342
343
344
345
346#define NR_GRPINFO_CACHES 8
347static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
348
349static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
350 "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
351 "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
352 "ext4_groupinfo_64k", "ext4_groupinfo_128k"
353};
354
355static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
356 ext4_group_t group);
357static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
358 ext4_group_t group);
359
360static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
361{
362#if BITS_PER_LONG == 64
363 *bit += ((unsigned long) addr & 7UL) << 3;
364 addr = (void *) ((unsigned long) addr & ~7UL);
365#elif BITS_PER_LONG == 32
366 *bit += ((unsigned long) addr & 3UL) << 3;
367 addr = (void *) ((unsigned long) addr & ~3UL);
368#else
369#error "how many bits you are?!"
370#endif
371 return addr;
372}
373
374static inline int mb_test_bit(int bit, void *addr)
375{
376
377
378
379
380 addr = mb_correct_addr_and_bit(&bit, addr);
381 return ext4_test_bit(bit, addr);
382}
383
384static inline void mb_set_bit(int bit, void *addr)
385{
386 addr = mb_correct_addr_and_bit(&bit, addr);
387 ext4_set_bit(bit, addr);
388}
389
390static inline void mb_clear_bit(int bit, void *addr)
391{
392 addr = mb_correct_addr_and_bit(&bit, addr);
393 ext4_clear_bit(bit, addr);
394}
395
396static inline int mb_test_and_clear_bit(int bit, void *addr)
397{
398 addr = mb_correct_addr_and_bit(&bit, addr);
399 return ext4_test_and_clear_bit(bit, addr);
400}
401
402static inline int mb_find_next_zero_bit(void *addr, int max, int start)
403{
404 int fix = 0, ret, tmpmax;
405 addr = mb_correct_addr_and_bit(&fix, addr);
406 tmpmax = max + fix;
407 start += fix;
408
409 ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
410 if (ret > max)
411 return max;
412 return ret;
413}
414
415static inline int mb_find_next_bit(void *addr, int max, int start)
416{
417 int fix = 0, ret, tmpmax;
418 addr = mb_correct_addr_and_bit(&fix, addr);
419 tmpmax = max + fix;
420 start += fix;
421
422 ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
423 if (ret > max)
424 return max;
425 return ret;
426}
427
428static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
429{
430 char *bb;
431
432 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
433 BUG_ON(max == NULL);
434
435 if (order > e4b->bd_blkbits + 1) {
436 *max = 0;
437 return NULL;
438 }
439
440
441 if (order == 0) {
442 *max = 1 << (e4b->bd_blkbits + 3);
443 return e4b->bd_bitmap;
444 }
445
446 bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
447 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
448
449 return bb;
450}
451
452#ifdef DOUBLE_CHECK
453static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
454 int first, int count)
455{
456 int i;
457 struct super_block *sb = e4b->bd_sb;
458
459 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
460 return;
461 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
462 for (i = 0; i < count; i++) {
463 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
464 ext4_fsblk_t blocknr;
465
466 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
467 blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
468 ext4_grp_locked_error(sb, e4b->bd_group,
469 inode ? inode->i_ino : 0,
470 blocknr,
471 "freeing block already freed "
472 "(bit %u)",
473 first + i);
474 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
475 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
476 }
477 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
478 }
479}
480
481static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
482{
483 int i;
484
485 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
486 return;
487 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
488 for (i = 0; i < count; i++) {
489 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
490 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
491 }
492}
493
494static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
495{
496 if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
497 unsigned char *b1, *b2;
498 int i;
499 b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
500 b2 = (unsigned char *) bitmap;
501 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
502 if (b1[i] != b2[i]) {
503 ext4_msg(e4b->bd_sb, KERN_ERR,
504 "corruption in group %u "
505 "at byte %u(%u): %x in copy != %x "
506 "on disk/prealloc",
507 e4b->bd_group, i, i * 8, b1[i], b2[i]);
508 BUG();
509 }
510 }
511 }
512}
513
514#else
515static inline void mb_free_blocks_double(struct inode *inode,
516 struct ext4_buddy *e4b, int first, int count)
517{
518 return;
519}
520static inline void mb_mark_used_double(struct ext4_buddy *e4b,
521 int first, int count)
522{
523 return;
524}
525static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
526{
527 return;
528}
529#endif
530
531#ifdef AGGRESSIVE_CHECK
532
533#define MB_CHECK_ASSERT(assert) \
534do { \
535 if (!(assert)) { \
536 printk(KERN_EMERG \
537 "Assertion failure in %s() at %s:%d: \"%s\"\n", \
538 function, file, line, # assert); \
539 BUG(); \
540 } \
541} while (0)
542
543static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
544 const char *function, int line)
545{
546 struct super_block *sb = e4b->bd_sb;
547 int order = e4b->bd_blkbits + 1;
548 int max;
549 int max2;
550 int i;
551 int j;
552 int k;
553 int count;
554 struct ext4_group_info *grp;
555 int fragments = 0;
556 int fstart;
557 struct list_head *cur;
558 void *buddy;
559 void *buddy2;
560
561 {
562 static int mb_check_counter;
563 if (mb_check_counter++ % 100 != 0)
564 return 0;
565 }
566
567 while (order > 1) {
568 buddy = mb_find_buddy(e4b, order, &max);
569 MB_CHECK_ASSERT(buddy);
570 buddy2 = mb_find_buddy(e4b, order - 1, &max2);
571 MB_CHECK_ASSERT(buddy2);
572 MB_CHECK_ASSERT(buddy != buddy2);
573 MB_CHECK_ASSERT(max * 2 == max2);
574
575 count = 0;
576 for (i = 0; i < max; i++) {
577
578 if (mb_test_bit(i, buddy)) {
579
580 if (!mb_test_bit(i << 1, buddy2)) {
581 MB_CHECK_ASSERT(
582 mb_test_bit((i<<1)+1, buddy2));
583 } else if (!mb_test_bit((i << 1) + 1, buddy2)) {
584 MB_CHECK_ASSERT(
585 mb_test_bit(i << 1, buddy2));
586 }
587 continue;
588 }
589
590
591 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
592 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
593
594 for (j = 0; j < (1 << order); j++) {
595 k = (i * (1 << order)) + j;
596 MB_CHECK_ASSERT(
597 !mb_test_bit(k, e4b->bd_bitmap));
598 }
599 count++;
600 }
601 MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
602 order--;
603 }
604
605 fstart = -1;
606 buddy = mb_find_buddy(e4b, 0, &max);
607 for (i = 0; i < max; i++) {
608 if (!mb_test_bit(i, buddy)) {
609 MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
610 if (fstart == -1) {
611 fragments++;
612 fstart = i;
613 }
614 continue;
615 }
616 fstart = -1;
617
618 for (j = 0; j < e4b->bd_blkbits + 1; j++) {
619 buddy2 = mb_find_buddy(e4b, j, &max2);
620 k = i >> j;
621 MB_CHECK_ASSERT(k < max2);
622 MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
623 }
624 }
625 MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
626 MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
627
628 grp = ext4_get_group_info(sb, e4b->bd_group);
629 list_for_each(cur, &grp->bb_prealloc_list) {
630 ext4_group_t groupnr;
631 struct ext4_prealloc_space *pa;
632 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
633 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
634 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
635 for (i = 0; i < pa->pa_len; i++)
636 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
637 }
638 return 0;
639}
640#undef MB_CHECK_ASSERT
641#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
642 __FILE__, __func__, __LINE__)
643#else
644#define mb_check_buddy(e4b)
645#endif
646
647
648
649
650
651
652
653static void ext4_mb_mark_free_simple(struct super_block *sb,
654 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
655 struct ext4_group_info *grp)
656{
657 struct ext4_sb_info *sbi = EXT4_SB(sb);
658 ext4_grpblk_t min;
659 ext4_grpblk_t max;
660 ext4_grpblk_t chunk;
661 unsigned int border;
662
663 BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
664
665 border = 2 << sb->s_blocksize_bits;
666
667 while (len > 0) {
668
669 max = ffs(first | border) - 1;
670
671
672 min = fls(len) - 1;
673
674 if (max < min)
675 min = max;
676 chunk = 1 << min;
677
678
679 grp->bb_counters[min]++;
680 if (min > 0)
681 mb_clear_bit(first >> min,
682 buddy + sbi->s_mb_offsets[min]);
683
684 len -= chunk;
685 first += chunk;
686 }
687}
688
689
690
691
692
693static void
694mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
695{
696 int i;
697 int bits;
698
699 grp->bb_largest_free_order = -1;
700
701 bits = sb->s_blocksize_bits + 1;
702 for (i = bits; i >= 0; i--) {
703 if (grp->bb_counters[i] > 0) {
704 grp->bb_largest_free_order = i;
705 break;
706 }
707 }
708}
709
710static noinline_for_stack
711void ext4_mb_generate_buddy(struct super_block *sb,
712 void *buddy, void *bitmap, ext4_group_t group)
713{
714 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
715 struct ext4_sb_info *sbi = EXT4_SB(sb);
716 ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
717 ext4_grpblk_t i = 0;
718 ext4_grpblk_t first;
719 ext4_grpblk_t len;
720 unsigned free = 0;
721 unsigned fragments = 0;
722 unsigned long long period = get_cycles();
723
724
725
726 i = mb_find_next_zero_bit(bitmap, max, 0);
727 grp->bb_first_free = i;
728 while (i < max) {
729 fragments++;
730 first = i;
731 i = mb_find_next_bit(bitmap, max, i);
732 len = i - first;
733 free += len;
734 if (len > 1)
735 ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
736 else
737 grp->bb_counters[0]++;
738 if (i < max)
739 i = mb_find_next_zero_bit(bitmap, max, i);
740 }
741 grp->bb_fragments = fragments;
742
743 if (free != grp->bb_free) {
744 ext4_grp_locked_error(sb, group, 0, 0,
745 "block bitmap and bg descriptor "
746 "inconsistent: %u vs %u free clusters",
747 free, grp->bb_free);
748
749
750
751
752 grp->bb_free = free;
753 ext4_mark_group_bitmap_corrupted(sb, group,
754 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
755 }
756 mb_set_largest_free_order(sb, grp);
757
758 clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
759
760 period = get_cycles() - period;
761 spin_lock(&sbi->s_bal_lock);
762 sbi->s_mb_buddies_generated++;
763 sbi->s_mb_generation_time += period;
764 spin_unlock(&sbi->s_bal_lock);
765}
766
767static void mb_regenerate_buddy(struct ext4_buddy *e4b)
768{
769 int count;
770 int order = 1;
771 void *buddy;
772
773 while ((buddy = mb_find_buddy(e4b, order++, &count))) {
774 ext4_set_bits(buddy, 0, count);
775 }
776 e4b->bd_info->bb_fragments = 0;
777 memset(e4b->bd_info->bb_counters, 0,
778 sizeof(*e4b->bd_info->bb_counters) *
779 (e4b->bd_sb->s_blocksize_bits + 2));
780
781 ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
782 e4b->bd_bitmap, e4b->bd_group);
783}
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
806{
807 ext4_group_t ngroups;
808 int blocksize;
809 int blocks_per_page;
810 int groups_per_page;
811 int err = 0;
812 int i;
813 ext4_group_t first_group, group;
814 int first_block;
815 struct super_block *sb;
816 struct buffer_head *bhs;
817 struct buffer_head **bh = NULL;
818 struct inode *inode;
819 char *data;
820 char *bitmap;
821 struct ext4_group_info *grinfo;
822
823 mb_debug(1, "init page %lu\n", page->index);
824
825 inode = page->mapping->host;
826 sb = inode->i_sb;
827 ngroups = ext4_get_groups_count(sb);
828 blocksize = i_blocksize(inode);
829 blocks_per_page = PAGE_SIZE / blocksize;
830
831 groups_per_page = blocks_per_page >> 1;
832 if (groups_per_page == 0)
833 groups_per_page = 1;
834
835
836 if (groups_per_page > 1) {
837 i = sizeof(struct buffer_head *) * groups_per_page;
838 bh = kzalloc(i, gfp);
839 if (bh == NULL) {
840 err = -ENOMEM;
841 goto out;
842 }
843 } else
844 bh = &bhs;
845
846 first_group = page->index * blocks_per_page / 2;
847
848
849 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
850 if (group >= ngroups)
851 break;
852
853 grinfo = ext4_get_group_info(sb, group);
854
855
856
857
858
859
860 if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
861 bh[i] = NULL;
862 continue;
863 }
864 bh[i] = ext4_read_block_bitmap_nowait(sb, group);
865 if (IS_ERR(bh[i])) {
866 err = PTR_ERR(bh[i]);
867 bh[i] = NULL;
868 goto out;
869 }
870 mb_debug(1, "read bitmap for group %u\n", group);
871 }
872
873
874 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
875 int err2;
876
877 if (!bh[i])
878 continue;
879 err2 = ext4_wait_block_bitmap(sb, group, bh[i]);
880 if (!err)
881 err = err2;
882 }
883
884 first_block = page->index * blocks_per_page;
885 for (i = 0; i < blocks_per_page; i++) {
886 group = (first_block + i) >> 1;
887 if (group >= ngroups)
888 break;
889
890 if (!bh[group - first_group])
891
892 continue;
893
894 if (!buffer_verified(bh[group - first_group]))
895
896 continue;
897 err = 0;
898
899
900
901
902
903
904
905 data = page_address(page) + (i * blocksize);
906 bitmap = bh[group - first_group]->b_data;
907
908
909
910
911
912 if ((first_block + i) & 1) {
913
914 BUG_ON(incore == NULL);
915 mb_debug(1, "put buddy for group %u in page %lu/%x\n",
916 group, page->index, i * blocksize);
917 trace_ext4_mb_buddy_bitmap_load(sb, group);
918 grinfo = ext4_get_group_info(sb, group);
919 grinfo->bb_fragments = 0;
920 memset(grinfo->bb_counters, 0,
921 sizeof(*grinfo->bb_counters) *
922 (sb->s_blocksize_bits+2));
923
924
925
926 ext4_lock_group(sb, group);
927
928 memset(data, 0xff, blocksize);
929 ext4_mb_generate_buddy(sb, data, incore, group);
930 ext4_unlock_group(sb, group);
931 incore = NULL;
932 } else {
933
934 BUG_ON(incore != NULL);
935 mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
936 group, page->index, i * blocksize);
937 trace_ext4_mb_bitmap_load(sb, group);
938
939
940 ext4_lock_group(sb, group);
941 memcpy(data, bitmap, blocksize);
942
943
944 ext4_mb_generate_from_pa(sb, data, group);
945 ext4_mb_generate_from_freelist(sb, data, group);
946 ext4_unlock_group(sb, group);
947
948
949
950
951 incore = data;
952 }
953 }
954 SetPageUptodate(page);
955
956out:
957 if (bh) {
958 for (i = 0; i < groups_per_page; i++)
959 brelse(bh[i]);
960 if (bh != &bhs)
961 kfree(bh);
962 }
963 return err;
964}
965
966
967
968
969
970
971
972static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
973 ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
974{
975 struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
976 int block, pnum, poff;
977 int blocks_per_page;
978 struct page *page;
979
980 e4b->bd_buddy_page = NULL;
981 e4b->bd_bitmap_page = NULL;
982
983 blocks_per_page = PAGE_SIZE / sb->s_blocksize;
984
985
986
987
988
989 block = group * 2;
990 pnum = block / blocks_per_page;
991 poff = block % blocks_per_page;
992 page = find_or_create_page(inode->i_mapping, pnum, gfp);
993 if (!page)
994 return -ENOMEM;
995 BUG_ON(page->mapping != inode->i_mapping);
996 e4b->bd_bitmap_page = page;
997 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
998
999 if (blocks_per_page >= 2) {
1000
1001 return 0;
1002 }
1003
1004 block++;
1005 pnum = block / blocks_per_page;
1006 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1007 if (!page)
1008 return -ENOMEM;
1009 BUG_ON(page->mapping != inode->i_mapping);
1010 e4b->bd_buddy_page = page;
1011 return 0;
1012}
1013
1014static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
1015{
1016 if (e4b->bd_bitmap_page) {
1017 unlock_page(e4b->bd_bitmap_page);
1018 put_page(e4b->bd_bitmap_page);
1019 }
1020 if (e4b->bd_buddy_page) {
1021 unlock_page(e4b->bd_buddy_page);
1022 put_page(e4b->bd_buddy_page);
1023 }
1024}
1025
1026
1027
1028
1029
1030
1031static noinline_for_stack
1032int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
1033{
1034
1035 struct ext4_group_info *this_grp;
1036 struct ext4_buddy e4b;
1037 struct page *page;
1038 int ret = 0;
1039
1040 might_sleep();
1041 mb_debug(1, "init group %u\n", group);
1042 this_grp = ext4_get_group_info(sb, group);
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052 ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp);
1053 if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
1054
1055
1056
1057
1058 goto err;
1059 }
1060
1061 page = e4b.bd_bitmap_page;
1062 ret = ext4_mb_init_cache(page, NULL, gfp);
1063 if (ret)
1064 goto err;
1065 if (!PageUptodate(page)) {
1066 ret = -EIO;
1067 goto err;
1068 }
1069
1070 if (e4b.bd_buddy_page == NULL) {
1071
1072
1073
1074
1075
1076 ret = 0;
1077 goto err;
1078 }
1079
1080 page = e4b.bd_buddy_page;
1081 ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
1082 if (ret)
1083 goto err;
1084 if (!PageUptodate(page)) {
1085 ret = -EIO;
1086 goto err;
1087 }
1088err:
1089 ext4_mb_put_buddy_page_lock(&e4b);
1090 return ret;
1091}
1092
1093
1094
1095
1096
1097
1098static noinline_for_stack int
1099ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
1100 struct ext4_buddy *e4b, gfp_t gfp)
1101{
1102 int blocks_per_page;
1103 int block;
1104 int pnum;
1105 int poff;
1106 struct page *page;
1107 int ret;
1108 struct ext4_group_info *grp;
1109 struct ext4_sb_info *sbi = EXT4_SB(sb);
1110 struct inode *inode = sbi->s_buddy_cache;
1111
1112 might_sleep();
1113 mb_debug(1, "load group %u\n", group);
1114
1115 blocks_per_page = PAGE_SIZE / sb->s_blocksize;
1116 grp = ext4_get_group_info(sb, group);
1117
1118 e4b->bd_blkbits = sb->s_blocksize_bits;
1119 e4b->bd_info = grp;
1120 e4b->bd_sb = sb;
1121 e4b->bd_group = group;
1122 e4b->bd_buddy_page = NULL;
1123 e4b->bd_bitmap_page = NULL;
1124
1125 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1126
1127
1128
1129
1130 ret = ext4_mb_init_group(sb, group, gfp);
1131 if (ret)
1132 return ret;
1133 }
1134
1135
1136
1137
1138
1139
1140 block = group * 2;
1141 pnum = block / blocks_per_page;
1142 poff = block % blocks_per_page;
1143
1144
1145
1146 page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
1147 if (page == NULL || !PageUptodate(page)) {
1148 if (page)
1149
1150
1151
1152
1153
1154
1155
1156
1157 put_page(page);
1158 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1159 if (page) {
1160 BUG_ON(page->mapping != inode->i_mapping);
1161 if (!PageUptodate(page)) {
1162 ret = ext4_mb_init_cache(page, NULL, gfp);
1163 if (ret) {
1164 unlock_page(page);
1165 goto err;
1166 }
1167 mb_cmp_bitmaps(e4b, page_address(page) +
1168 (poff * sb->s_blocksize));
1169 }
1170 unlock_page(page);
1171 }
1172 }
1173 if (page == NULL) {
1174 ret = -ENOMEM;
1175 goto err;
1176 }
1177 if (!PageUptodate(page)) {
1178 ret = -EIO;
1179 goto err;
1180 }
1181
1182
1183 e4b->bd_bitmap_page = page;
1184 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1185
1186 block++;
1187 pnum = block / blocks_per_page;
1188 poff = block % blocks_per_page;
1189
1190 page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
1191 if (page == NULL || !PageUptodate(page)) {
1192 if (page)
1193 put_page(page);
1194 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1195 if (page) {
1196 BUG_ON(page->mapping != inode->i_mapping);
1197 if (!PageUptodate(page)) {
1198 ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
1199 gfp);
1200 if (ret) {
1201 unlock_page(page);
1202 goto err;
1203 }
1204 }
1205 unlock_page(page);
1206 }
1207 }
1208 if (page == NULL) {
1209 ret = -ENOMEM;
1210 goto err;
1211 }
1212 if (!PageUptodate(page)) {
1213 ret = -EIO;
1214 goto err;
1215 }
1216
1217
1218 e4b->bd_buddy_page = page;
1219 e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
1220
1221 BUG_ON(e4b->bd_bitmap_page == NULL);
1222 BUG_ON(e4b->bd_buddy_page == NULL);
1223
1224 return 0;
1225
1226err:
1227 if (page)
1228 put_page(page);
1229 if (e4b->bd_bitmap_page)
1230 put_page(e4b->bd_bitmap_page);
1231 if (e4b->bd_buddy_page)
1232 put_page(e4b->bd_buddy_page);
1233 e4b->bd_buddy = NULL;
1234 e4b->bd_bitmap = NULL;
1235 return ret;
1236}
1237
1238static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1239 struct ext4_buddy *e4b)
1240{
1241 return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS);
1242}
1243
1244static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
1245{
1246 if (e4b->bd_bitmap_page)
1247 put_page(e4b->bd_bitmap_page);
1248 if (e4b->bd_buddy_page)
1249 put_page(e4b->bd_buddy_page);
1250}
1251
1252
1253static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1254{
1255 int order = 1;
1256 int bb_incr = 1 << (e4b->bd_blkbits - 1);
1257 void *bb;
1258
1259 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
1260 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
1261
1262 bb = e4b->bd_buddy;
1263 while (order <= e4b->bd_blkbits + 1) {
1264 block = block >> 1;
1265 if (!mb_test_bit(block, bb)) {
1266
1267 return order;
1268 }
1269 bb += bb_incr;
1270 bb_incr >>= 1;
1271 order++;
1272 }
1273 return 0;
1274}
1275
1276static void mb_clear_bits(void *bm, int cur, int len)
1277{
1278 __u32 *addr;
1279
1280 len = cur + len;
1281 while (cur < len) {
1282 if ((cur & 31) == 0 && (len - cur) >= 32) {
1283
1284 addr = bm + (cur >> 3);
1285 *addr = 0;
1286 cur += 32;
1287 continue;
1288 }
1289 mb_clear_bit(cur, bm);
1290 cur++;
1291 }
1292}
1293
1294
1295
1296
1297static int mb_test_and_clear_bits(void *bm, int cur, int len)
1298{
1299 __u32 *addr;
1300 int zero_bit = -1;
1301
1302 len = cur + len;
1303 while (cur < len) {
1304 if ((cur & 31) == 0 && (len - cur) >= 32) {
1305
1306 addr = bm + (cur >> 3);
1307 if (*addr != (__u32)(-1) && zero_bit == -1)
1308 zero_bit = cur + mb_find_next_zero_bit(addr, 32, 0);
1309 *addr = 0;
1310 cur += 32;
1311 continue;
1312 }
1313 if (!mb_test_and_clear_bit(cur, bm) && zero_bit == -1)
1314 zero_bit = cur;
1315 cur++;
1316 }
1317
1318 return zero_bit;
1319}
1320
1321void ext4_set_bits(void *bm, int cur, int len)
1322{
1323 __u32 *addr;
1324
1325 len = cur + len;
1326 while (cur < len) {
1327 if ((cur & 31) == 0 && (len - cur) >= 32) {
1328
1329 addr = bm + (cur >> 3);
1330 *addr = 0xffffffff;
1331 cur += 32;
1332 continue;
1333 }
1334 mb_set_bit(cur, bm);
1335 cur++;
1336 }
1337}
1338
1339
1340
1341
1342static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side)
1343{
1344 if (mb_test_bit(*bit + side, bitmap)) {
1345 mb_clear_bit(*bit, bitmap);
1346 (*bit) -= side;
1347 return 1;
1348 }
1349 else {
1350 (*bit) += side;
1351 mb_set_bit(*bit, bitmap);
1352 return -1;
1353 }
1354}
1355
1356static void mb_buddy_mark_free(struct ext4_buddy *e4b, int first, int last)
1357{
1358 int max;
1359 int order = 1;
1360 void *buddy = mb_find_buddy(e4b, order, &max);
1361
1362 while (buddy) {
1363 void *buddy2;
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394 if (first & 1)
1395 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&first, buddy, -1);
1396 if (!(last & 1))
1397 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&last, buddy, 1);
1398 if (first > last)
1399 break;
1400 order++;
1401
1402 if (first == last || !(buddy2 = mb_find_buddy(e4b, order, &max))) {
1403 mb_clear_bits(buddy, first, last - first + 1);
1404 e4b->bd_info->bb_counters[order - 1] += last - first + 1;
1405 break;
1406 }
1407 first >>= 1;
1408 last >>= 1;
1409 buddy = buddy2;
1410 }
1411}
1412
1413static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1414 int first, int count)
1415{
1416 int left_is_free = 0;
1417 int right_is_free = 0;
1418 int block;
1419 int last = first + count - 1;
1420 struct super_block *sb = e4b->bd_sb;
1421
1422 if (WARN_ON(count == 0))
1423 return;
1424 BUG_ON(last >= (sb->s_blocksize << 3));
1425 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1426
1427 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
1428 return;
1429
1430 mb_check_buddy(e4b);
1431 mb_free_blocks_double(inode, e4b, first, count);
1432
1433 e4b->bd_info->bb_free += count;
1434 if (first < e4b->bd_info->bb_first_free)
1435 e4b->bd_info->bb_first_free = first;
1436
1437
1438
1439
1440 if (first != 0)
1441 left_is_free = !mb_test_bit(first - 1, e4b->bd_bitmap);
1442 block = mb_test_and_clear_bits(e4b->bd_bitmap, first, count);
1443 if (last + 1 < EXT4_SB(sb)->s_mb_maxs[0])
1444 right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
1445
1446 if (unlikely(block != -1)) {
1447 struct ext4_sb_info *sbi = EXT4_SB(sb);
1448 ext4_fsblk_t blocknr;
1449
1450 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1451 blocknr += EXT4_C2B(sbi, block);
1452 ext4_grp_locked_error(sb, e4b->bd_group,
1453 inode ? inode->i_ino : 0,
1454 blocknr,
1455 "freeing already freed block "
1456 "(bit %u); block bitmap corrupt.",
1457 block);
1458 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
1459 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
1460 mb_regenerate_buddy(e4b);
1461 goto done;
1462 }
1463
1464
1465 if (left_is_free && right_is_free)
1466 e4b->bd_info->bb_fragments--;
1467 else if (!left_is_free && !right_is_free)
1468 e4b->bd_info->bb_fragments++;
1469
1470
1471
1472
1473
1474
1475
1476 if (first & 1) {
1477 first += !left_is_free;
1478 e4b->bd_info->bb_counters[0] += left_is_free ? -1 : 1;
1479 }
1480 if (!(last & 1)) {
1481 last -= !right_is_free;
1482 e4b->bd_info->bb_counters[0] += right_is_free ? -1 : 1;
1483 }
1484
1485 if (first <= last)
1486 mb_buddy_mark_free(e4b, first >> 1, last >> 1);
1487
1488done:
1489 mb_set_largest_free_order(sb, e4b->bd_info);
1490 mb_check_buddy(e4b);
1491}
1492
1493static int mb_find_extent(struct ext4_buddy *e4b, int block,
1494 int needed, struct ext4_free_extent *ex)
1495{
1496 int next = block;
1497 int max, order;
1498 void *buddy;
1499
1500 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1501 BUG_ON(ex == NULL);
1502
1503 buddy = mb_find_buddy(e4b, 0, &max);
1504 BUG_ON(buddy == NULL);
1505 BUG_ON(block >= max);
1506 if (mb_test_bit(block, buddy)) {
1507 ex->fe_len = 0;
1508 ex->fe_start = 0;
1509 ex->fe_group = 0;
1510 return 0;
1511 }
1512
1513
1514 order = mb_find_order_for_block(e4b, block);
1515 block = block >> order;
1516
1517 ex->fe_len = 1 << order;
1518 ex->fe_start = block << order;
1519 ex->fe_group = e4b->bd_group;
1520
1521
1522 next = next - ex->fe_start;
1523 ex->fe_len -= next;
1524 ex->fe_start += next;
1525
1526 while (needed > ex->fe_len &&
1527 mb_find_buddy(e4b, order, &max)) {
1528
1529 if (block + 1 >= max)
1530 break;
1531
1532 next = (block + 1) * (1 << order);
1533 if (mb_test_bit(next, e4b->bd_bitmap))
1534 break;
1535
1536 order = mb_find_order_for_block(e4b, next);
1537
1538 block = next >> order;
1539 ex->fe_len += 1 << order;
1540 }
1541
1542 if (ex->fe_start + ex->fe_len > EXT4_CLUSTERS_PER_GROUP(e4b->bd_sb)) {
1543
1544 WARN_ON(1);
1545 ext4_error(e4b->bd_sb, "corruption or bug in mb_find_extent "
1546 "block=%d, order=%d needed=%d ex=%u/%d/%d@%u",
1547 block, order, needed, ex->fe_group, ex->fe_start,
1548 ex->fe_len, ex->fe_logical);
1549 ex->fe_len = 0;
1550 ex->fe_start = 0;
1551 ex->fe_group = 0;
1552 }
1553 return ex->fe_len;
1554}
1555
1556static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1557{
1558 int ord;
1559 int mlen = 0;
1560 int max = 0;
1561 int cur;
1562 int start = ex->fe_start;
1563 int len = ex->fe_len;
1564 unsigned ret = 0;
1565 int len0 = len;
1566 void *buddy;
1567
1568 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
1569 BUG_ON(e4b->bd_group != ex->fe_group);
1570 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1571 mb_check_buddy(e4b);
1572 mb_mark_used_double(e4b, start, len);
1573
1574 e4b->bd_info->bb_free -= len;
1575 if (e4b->bd_info->bb_first_free == start)
1576 e4b->bd_info->bb_first_free += len;
1577
1578
1579 if (start != 0)
1580 mlen = !mb_test_bit(start - 1, e4b->bd_bitmap);
1581 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
1582 max = !mb_test_bit(start + len, e4b->bd_bitmap);
1583 if (mlen && max)
1584 e4b->bd_info->bb_fragments++;
1585 else if (!mlen && !max)
1586 e4b->bd_info->bb_fragments--;
1587
1588
1589 while (len) {
1590 ord = mb_find_order_for_block(e4b, start);
1591
1592 if (((start >> ord) << ord) == start && len >= (1 << ord)) {
1593
1594 mlen = 1 << ord;
1595 buddy = mb_find_buddy(e4b, ord, &max);
1596 BUG_ON((start >> ord) >= max);
1597 mb_set_bit(start >> ord, buddy);
1598 e4b->bd_info->bb_counters[ord]--;
1599 start += mlen;
1600 len -= mlen;
1601 BUG_ON(len < 0);
1602 continue;
1603 }
1604
1605
1606 if (ret == 0)
1607 ret = len | (ord << 16);
1608
1609
1610 BUG_ON(ord <= 0);
1611 buddy = mb_find_buddy(e4b, ord, &max);
1612 mb_set_bit(start >> ord, buddy);
1613 e4b->bd_info->bb_counters[ord]--;
1614
1615 ord--;
1616 cur = (start >> ord) & ~1U;
1617 buddy = mb_find_buddy(e4b, ord, &max);
1618 mb_clear_bit(cur, buddy);
1619 mb_clear_bit(cur + 1, buddy);
1620 e4b->bd_info->bb_counters[ord]++;
1621 e4b->bd_info->bb_counters[ord]++;
1622 }
1623 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
1624
1625 ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
1626 mb_check_buddy(e4b);
1627
1628 return ret;
1629}
1630
1631
1632
1633
1634static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
1635 struct ext4_buddy *e4b)
1636{
1637 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1638 int ret;
1639
1640 BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
1641 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1642
1643 ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
1644 ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
1645 ret = mb_mark_used(e4b, &ac->ac_b_ex);
1646
1647
1648
1649 ac->ac_f_ex = ac->ac_b_ex;
1650
1651 ac->ac_status = AC_STATUS_FOUND;
1652 ac->ac_tail = ret & 0xffff;
1653 ac->ac_buddy = ret >> 16;
1654
1655
1656
1657
1658
1659
1660
1661
1662 ac->ac_bitmap_page = e4b->bd_bitmap_page;
1663 get_page(ac->ac_bitmap_page);
1664 ac->ac_buddy_page = e4b->bd_buddy_page;
1665 get_page(ac->ac_buddy_page);
1666
1667 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
1668 spin_lock(&sbi->s_md_lock);
1669 sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
1670 sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
1671 spin_unlock(&sbi->s_md_lock);
1672 }
1673}
1674
1675
1676
1677
1678
1679static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
1680 struct ext4_buddy *e4b,
1681 int finish_group)
1682{
1683 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1684 struct ext4_free_extent *bex = &ac->ac_b_ex;
1685 struct ext4_free_extent *gex = &ac->ac_g_ex;
1686 struct ext4_free_extent ex;
1687 int max;
1688
1689 if (ac->ac_status == AC_STATUS_FOUND)
1690 return;
1691
1692
1693
1694 if (ac->ac_found > sbi->s_mb_max_to_scan &&
1695 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1696 ac->ac_status = AC_STATUS_BREAK;
1697 return;
1698 }
1699
1700
1701
1702
1703 if (bex->fe_len < gex->fe_len)
1704 return;
1705
1706 if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
1707 && bex->fe_group == e4b->bd_group) {
1708
1709
1710
1711 max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
1712 if (max >= gex->fe_len) {
1713 ext4_mb_use_best_found(ac, e4b);
1714 return;
1715 }
1716 }
1717}
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
1730 struct ext4_free_extent *ex,
1731 struct ext4_buddy *e4b)
1732{
1733 struct ext4_free_extent *bex = &ac->ac_b_ex;
1734 struct ext4_free_extent *gex = &ac->ac_g_ex;
1735
1736 BUG_ON(ex->fe_len <= 0);
1737 BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1738 BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1739 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
1740
1741 ac->ac_found++;
1742
1743
1744
1745
1746 if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1747 *bex = *ex;
1748 ext4_mb_use_best_found(ac, e4b);
1749 return;
1750 }
1751
1752
1753
1754
1755 if (ex->fe_len == gex->fe_len) {
1756 *bex = *ex;
1757 ext4_mb_use_best_found(ac, e4b);
1758 return;
1759 }
1760
1761
1762
1763
1764 if (bex->fe_len == 0) {
1765 *bex = *ex;
1766 return;
1767 }
1768
1769
1770
1771
1772 if (bex->fe_len < gex->fe_len) {
1773
1774
1775 if (ex->fe_len > bex->fe_len)
1776 *bex = *ex;
1777 } else if (ex->fe_len > gex->fe_len) {
1778
1779
1780
1781 if (ex->fe_len < bex->fe_len)
1782 *bex = *ex;
1783 }
1784
1785 ext4_mb_check_limits(ac, e4b, 0);
1786}
1787
1788static noinline_for_stack
1789int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
1790 struct ext4_buddy *e4b)
1791{
1792 struct ext4_free_extent ex = ac->ac_b_ex;
1793 ext4_group_t group = ex.fe_group;
1794 int max;
1795 int err;
1796
1797 BUG_ON(ex.fe_len <= 0);
1798 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1799 if (err)
1800 return err;
1801
1802 ext4_lock_group(ac->ac_sb, group);
1803 max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
1804
1805 if (max > 0) {
1806 ac->ac_b_ex = ex;
1807 ext4_mb_use_best_found(ac, e4b);
1808 }
1809
1810 ext4_unlock_group(ac->ac_sb, group);
1811 ext4_mb_unload_buddy(e4b);
1812
1813 return 0;
1814}
1815
1816static noinline_for_stack
1817int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1818 struct ext4_buddy *e4b)
1819{
1820 ext4_group_t group = ac->ac_g_ex.fe_group;
1821 int max;
1822 int err;
1823 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1824 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1825 struct ext4_free_extent ex;
1826
1827 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
1828 return 0;
1829 if (grp->bb_free == 0)
1830 return 0;
1831
1832 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1833 if (err)
1834 return err;
1835
1836 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
1837 ext4_mb_unload_buddy(e4b);
1838 return 0;
1839 }
1840
1841 ext4_lock_group(ac->ac_sb, group);
1842 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
1843 ac->ac_g_ex.fe_len, &ex);
1844 ex.fe_logical = 0xDEADFA11;
1845
1846 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
1847 ext4_fsblk_t start;
1848
1849 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
1850 ex.fe_start;
1851
1852 if (do_div(start, sbi->s_stripe) == 0) {
1853 ac->ac_found++;
1854 ac->ac_b_ex = ex;
1855 ext4_mb_use_best_found(ac, e4b);
1856 }
1857 } else if (max >= ac->ac_g_ex.fe_len) {
1858 BUG_ON(ex.fe_len <= 0);
1859 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1860 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1861 ac->ac_found++;
1862 ac->ac_b_ex = ex;
1863 ext4_mb_use_best_found(ac, e4b);
1864 } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
1865
1866
1867 BUG_ON(ex.fe_len <= 0);
1868 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1869 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1870 ac->ac_found++;
1871 ac->ac_b_ex = ex;
1872 ext4_mb_use_best_found(ac, e4b);
1873 }
1874 ext4_unlock_group(ac->ac_sb, group);
1875 ext4_mb_unload_buddy(e4b);
1876
1877 return 0;
1878}
1879
1880
1881
1882
1883
1884static noinline_for_stack
1885void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
1886 struct ext4_buddy *e4b)
1887{
1888 struct super_block *sb = ac->ac_sb;
1889 struct ext4_group_info *grp = e4b->bd_info;
1890 void *buddy;
1891 int i;
1892 int k;
1893 int max;
1894
1895 BUG_ON(ac->ac_2order <= 0);
1896 for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
1897 if (grp->bb_counters[i] == 0)
1898 continue;
1899
1900 buddy = mb_find_buddy(e4b, i, &max);
1901 BUG_ON(buddy == NULL);
1902
1903 k = mb_find_next_zero_bit(buddy, max, 0);
1904 BUG_ON(k >= max);
1905
1906 ac->ac_found++;
1907
1908 ac->ac_b_ex.fe_len = 1 << i;
1909 ac->ac_b_ex.fe_start = k << i;
1910 ac->ac_b_ex.fe_group = e4b->bd_group;
1911
1912 ext4_mb_use_best_found(ac, e4b);
1913
1914 BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len);
1915
1916 if (EXT4_SB(sb)->s_mb_stats)
1917 atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
1918
1919 break;
1920 }
1921}
1922
1923
1924
1925
1926
1927
1928static noinline_for_stack
1929void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1930 struct ext4_buddy *e4b)
1931{
1932 struct super_block *sb = ac->ac_sb;
1933 void *bitmap = e4b->bd_bitmap;
1934 struct ext4_free_extent ex;
1935 int i;
1936 int free;
1937
1938 free = e4b->bd_info->bb_free;
1939 BUG_ON(free <= 0);
1940
1941 i = e4b->bd_info->bb_first_free;
1942
1943 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
1944 i = mb_find_next_zero_bit(bitmap,
1945 EXT4_CLUSTERS_PER_GROUP(sb), i);
1946 if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
1947
1948
1949
1950
1951
1952 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1953 "%d free clusters as per "
1954 "group info. But bitmap says 0",
1955 free);
1956 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
1957 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
1958 break;
1959 }
1960
1961 mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
1962 BUG_ON(ex.fe_len <= 0);
1963 if (free < ex.fe_len) {
1964 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1965 "%d free clusters as per "
1966 "group info. But got %d blocks",
1967 free, ex.fe_len);
1968 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
1969 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
1970
1971
1972
1973
1974
1975 break;
1976 }
1977 ex.fe_logical = 0xDEADC0DE;
1978 ext4_mb_measure_extent(ac, &ex, e4b);
1979
1980 i += ex.fe_len;
1981 free -= ex.fe_len;
1982 }
1983
1984 ext4_mb_check_limits(ac, e4b, 1);
1985}
1986
1987
1988
1989
1990
1991static noinline_for_stack
1992void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1993 struct ext4_buddy *e4b)
1994{
1995 struct super_block *sb = ac->ac_sb;
1996 struct ext4_sb_info *sbi = EXT4_SB(sb);
1997 void *bitmap = e4b->bd_bitmap;
1998 struct ext4_free_extent ex;
1999 ext4_fsblk_t first_group_block;
2000 ext4_fsblk_t a;
2001 ext4_grpblk_t i;
2002 int max;
2003
2004 BUG_ON(sbi->s_stripe == 0);
2005
2006
2007 first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
2008
2009 a = first_group_block + sbi->s_stripe - 1;
2010 do_div(a, sbi->s_stripe);
2011 i = (a * sbi->s_stripe) - first_group_block;
2012
2013 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
2014 if (!mb_test_bit(i, bitmap)) {
2015 max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
2016 if (max >= sbi->s_stripe) {
2017 ac->ac_found++;
2018 ex.fe_logical = 0xDEADF00D;
2019 ac->ac_b_ex = ex;
2020 ext4_mb_use_best_found(ac, e4b);
2021 break;
2022 }
2023 }
2024 i += sbi->s_stripe;
2025 }
2026}
2027
2028
2029
2030
2031
2032
2033
2034static int ext4_mb_good_group(struct ext4_allocation_context *ac,
2035 ext4_group_t group, int cr)
2036{
2037 unsigned free, fragments;
2038 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
2039 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
2040
2041 BUG_ON(cr < 0 || cr >= 4);
2042
2043 free = grp->bb_free;
2044 if (free == 0)
2045 return 0;
2046 if (cr <= 2 && free < ac->ac_g_ex.fe_len)
2047 return 0;
2048
2049 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
2050 return 0;
2051
2052
2053 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
2054 int ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
2055 if (ret)
2056 return ret;
2057 }
2058
2059 fragments = grp->bb_fragments;
2060 if (fragments == 0)
2061 return 0;
2062
2063 switch (cr) {
2064 case 0:
2065 BUG_ON(ac->ac_2order == 0);
2066
2067
2068 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
2069 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
2070 ((group % flex_size) == 0))
2071 return 0;
2072
2073 if ((ac->ac_2order > ac->ac_sb->s_blocksize_bits+1) ||
2074 (free / fragments) >= ac->ac_g_ex.fe_len)
2075 return 1;
2076
2077 if (grp->bb_largest_free_order < ac->ac_2order)
2078 return 0;
2079
2080 return 1;
2081 case 1:
2082 if ((free / fragments) >= ac->ac_g_ex.fe_len)
2083 return 1;
2084 break;
2085 case 2:
2086 if (free >= ac->ac_g_ex.fe_len)
2087 return 1;
2088 break;
2089 case 3:
2090 return 1;
2091 default:
2092 BUG();
2093 }
2094
2095 return 0;
2096}
2097
2098static noinline_for_stack int
2099ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
2100{
2101 ext4_group_t ngroups, group, i;
2102 int cr;
2103 int err = 0, first_err = 0;
2104 struct ext4_sb_info *sbi;
2105 struct super_block *sb;
2106 struct ext4_buddy e4b;
2107
2108 sb = ac->ac_sb;
2109 sbi = EXT4_SB(sb);
2110 ngroups = ext4_get_groups_count(sb);
2111
2112 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
2113 ngroups = sbi->s_blockfile_groups;
2114
2115 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
2116
2117
2118 err = ext4_mb_find_by_goal(ac, &e4b);
2119 if (err || ac->ac_status == AC_STATUS_FOUND)
2120 goto out;
2121
2122 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
2123 goto out;
2124
2125
2126
2127
2128
2129
2130 i = fls(ac->ac_g_ex.fe_len);
2131 ac->ac_2order = 0;
2132
2133
2134
2135
2136
2137
2138
2139 if (i >= sbi->s_mb_order2_reqs && i <= sb->s_blocksize_bits + 2) {
2140
2141
2142
2143 if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
2144 ac->ac_2order = array_index_nospec(i - 1,
2145 sb->s_blocksize_bits + 2);
2146 }
2147
2148
2149 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
2150
2151 spin_lock(&sbi->s_md_lock);
2152 ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
2153 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
2154 spin_unlock(&sbi->s_md_lock);
2155 }
2156
2157
2158 cr = ac->ac_2order ? 0 : 1;
2159
2160
2161
2162
2163repeat:
2164 for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
2165 ac->ac_criteria = cr;
2166
2167
2168
2169
2170 group = ac->ac_g_ex.fe_group;
2171
2172 for (i = 0; i < ngroups; group++, i++) {
2173 int ret = 0;
2174 cond_resched();
2175
2176
2177
2178
2179 if (group >= ngroups)
2180 group = 0;
2181
2182
2183 ret = ext4_mb_good_group(ac, group, cr);
2184 if (ret <= 0) {
2185 if (!first_err)
2186 first_err = ret;
2187 continue;
2188 }
2189
2190 err = ext4_mb_load_buddy(sb, group, &e4b);
2191 if (err)
2192 goto out;
2193
2194 ext4_lock_group(sb, group);
2195
2196
2197
2198
2199
2200 ret = ext4_mb_good_group(ac, group, cr);
2201 if (ret <= 0) {
2202 ext4_unlock_group(sb, group);
2203 ext4_mb_unload_buddy(&e4b);
2204 if (!first_err)
2205 first_err = ret;
2206 continue;
2207 }
2208
2209 ac->ac_groups_scanned++;
2210 if (cr == 0)
2211 ext4_mb_simple_scan_group(ac, &e4b);
2212 else if (cr == 1 && sbi->s_stripe &&
2213 !(ac->ac_g_ex.fe_len % sbi->s_stripe))
2214 ext4_mb_scan_aligned(ac, &e4b);
2215 else
2216 ext4_mb_complex_scan_group(ac, &e4b);
2217
2218 ext4_unlock_group(sb, group);
2219 ext4_mb_unload_buddy(&e4b);
2220
2221 if (ac->ac_status != AC_STATUS_CONTINUE)
2222 break;
2223 }
2224 }
2225
2226 if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
2227 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2228
2229
2230
2231
2232
2233 ext4_mb_try_best_found(ac, &e4b);
2234 if (ac->ac_status != AC_STATUS_FOUND) {
2235
2236
2237
2238
2239
2240
2241 ac->ac_b_ex.fe_group = 0;
2242 ac->ac_b_ex.fe_start = 0;
2243 ac->ac_b_ex.fe_len = 0;
2244 ac->ac_status = AC_STATUS_CONTINUE;
2245 ac->ac_flags |= EXT4_MB_HINT_FIRST;
2246 cr = 3;
2247 atomic_inc(&sbi->s_mb_lost_chunks);
2248 goto repeat;
2249 }
2250 }
2251out:
2252 if (!err && ac->ac_status != AC_STATUS_FOUND && first_err)
2253 err = first_err;
2254 return err;
2255}
2256
2257static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2258{
2259 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2260 ext4_group_t group;
2261
2262 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2263 return NULL;
2264 group = *pos + 1;
2265 return (void *) ((unsigned long) group);
2266}
2267
2268static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2269{
2270 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2271 ext4_group_t group;
2272
2273 ++*pos;
2274 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2275 return NULL;
2276 group = *pos + 1;
2277 return (void *) ((unsigned long) group);
2278}
2279
2280static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2281{
2282 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2283 ext4_group_t group = (ext4_group_t) ((unsigned long) v);
2284 int i;
2285 int err, buddy_loaded = 0;
2286 struct ext4_buddy e4b;
2287 struct ext4_group_info *grinfo;
2288 unsigned char blocksize_bits = min_t(unsigned char,
2289 sb->s_blocksize_bits,
2290 EXT4_MAX_BLOCK_LOG_SIZE);
2291 struct sg {
2292 struct ext4_group_info info;
2293 ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2];
2294 } sg;
2295
2296 group--;
2297 if (group == 0)
2298 seq_puts(seq, "#group: free frags first ["
2299 " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 "
2300 " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n");
2301
2302 i = (blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
2303 sizeof(struct ext4_group_info);
2304
2305 grinfo = ext4_get_group_info(sb, group);
2306
2307 if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
2308 err = ext4_mb_load_buddy(sb, group, &e4b);
2309 if (err) {
2310 seq_printf(seq, "#%-5u: I/O error\n", group);
2311 return 0;
2312 }
2313 buddy_loaded = 1;
2314 }
2315
2316 memcpy(&sg, ext4_get_group_info(sb, group), i);
2317
2318 if (buddy_loaded)
2319 ext4_mb_unload_buddy(&e4b);
2320
2321 seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
2322 sg.info.bb_fragments, sg.info.bb_first_free);
2323 for (i = 0; i <= 13; i++)
2324 seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ?
2325 sg.info.bb_counters[i] : 0);
2326 seq_printf(seq, " ]\n");
2327
2328 return 0;
2329}
2330
2331static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
2332{
2333}
2334
2335const struct seq_operations ext4_mb_seq_groups_ops = {
2336 .start = ext4_mb_seq_groups_start,
2337 .next = ext4_mb_seq_groups_next,
2338 .stop = ext4_mb_seq_groups_stop,
2339 .show = ext4_mb_seq_groups_show,
2340};
2341
2342static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
2343{
2344 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2345 struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
2346
2347 BUG_ON(!cachep);
2348 return cachep;
2349}
2350
2351
2352
2353
2354
2355int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
2356{
2357 struct ext4_sb_info *sbi = EXT4_SB(sb);
2358 unsigned size;
2359 struct ext4_group_info ***old_groupinfo, ***new_groupinfo;
2360
2361 size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
2362 EXT4_DESC_PER_BLOCK_BITS(sb);
2363 if (size <= sbi->s_group_info_size)
2364 return 0;
2365
2366 size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
2367 new_groupinfo = kvzalloc(size, GFP_KERNEL);
2368 if (!new_groupinfo) {
2369 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
2370 return -ENOMEM;
2371 }
2372 rcu_read_lock();
2373 old_groupinfo = rcu_dereference(sbi->s_group_info);
2374 if (old_groupinfo)
2375 memcpy(new_groupinfo, old_groupinfo,
2376 sbi->s_group_info_size * sizeof(*sbi->s_group_info));
2377 rcu_read_unlock();
2378 rcu_assign_pointer(sbi->s_group_info, new_groupinfo);
2379 sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
2380 if (old_groupinfo)
2381 ext4_kvfree_array_rcu(old_groupinfo);
2382 ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
2383 sbi->s_group_info_size);
2384 return 0;
2385}
2386
2387
2388int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2389 struct ext4_group_desc *desc)
2390{
2391 int i;
2392 int metalen = 0;
2393 int idx = group >> EXT4_DESC_PER_BLOCK_BITS(sb);
2394 struct ext4_sb_info *sbi = EXT4_SB(sb);
2395 struct ext4_group_info **meta_group_info;
2396 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2397
2398
2399
2400
2401
2402
2403 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2404 metalen = sizeof(*meta_group_info) <<
2405 EXT4_DESC_PER_BLOCK_BITS(sb);
2406 meta_group_info = kmalloc(metalen, GFP_NOFS);
2407 if (meta_group_info == NULL) {
2408 ext4_msg(sb, KERN_ERR, "can't allocate mem "
2409 "for a buddy group");
2410 goto exit_meta_group_info;
2411 }
2412 rcu_read_lock();
2413 rcu_dereference(sbi->s_group_info)[idx] = meta_group_info;
2414 rcu_read_unlock();
2415 }
2416
2417 meta_group_info = sbi_array_rcu_deref(sbi, s_group_info, idx);
2418 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2419
2420 meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS);
2421 if (meta_group_info[i] == NULL) {
2422 ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
2423 goto exit_group_info;
2424 }
2425 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2426 &(meta_group_info[i]->bb_state));
2427
2428
2429
2430
2431
2432 if (ext4_has_group_desc_csum(sb) &&
2433 (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
2434 meta_group_info[i]->bb_free =
2435 ext4_free_clusters_after_init(sb, group, desc);
2436 } else {
2437 meta_group_info[i]->bb_free =
2438 ext4_free_group_clusters(sb, desc);
2439 }
2440
2441 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2442 init_rwsem(&meta_group_info[i]->alloc_sem);
2443 meta_group_info[i]->bb_free_root = RB_ROOT;
2444 meta_group_info[i]->bb_largest_free_order = -1;
2445
2446#ifdef DOUBLE_CHECK
2447 {
2448 struct buffer_head *bh;
2449 meta_group_info[i]->bb_bitmap =
2450 kmalloc(sb->s_blocksize, GFP_NOFS);
2451 BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
2452 bh = ext4_read_block_bitmap(sb, group);
2453 BUG_ON(IS_ERR_OR_NULL(bh));
2454 memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
2455 sb->s_blocksize);
2456 put_bh(bh);
2457 }
2458#endif
2459
2460 return 0;
2461
2462exit_group_info:
2463
2464 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2465 struct ext4_group_info ***group_info;
2466
2467 rcu_read_lock();
2468 group_info = rcu_dereference(sbi->s_group_info);
2469 kfree(group_info[idx]);
2470 group_info[idx] = NULL;
2471 rcu_read_unlock();
2472 }
2473exit_meta_group_info:
2474 return -ENOMEM;
2475}
2476
2477static int ext4_mb_init_backend(struct super_block *sb)
2478{
2479 ext4_group_t ngroups = ext4_get_groups_count(sb);
2480 ext4_group_t i;
2481 struct ext4_sb_info *sbi = EXT4_SB(sb);
2482 int err;
2483 struct ext4_group_desc *desc;
2484 struct ext4_group_info ***group_info;
2485 struct kmem_cache *cachep;
2486
2487 err = ext4_mb_alloc_groupinfo(sb, ngroups);
2488 if (err)
2489 return err;
2490
2491 sbi->s_buddy_cache = new_inode(sb);
2492 if (sbi->s_buddy_cache == NULL) {
2493 ext4_msg(sb, KERN_ERR, "can't get new inode");
2494 goto err_freesgi;
2495 }
2496
2497
2498
2499
2500 sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
2501 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
2502 for (i = 0; i < ngroups; i++) {
2503 cond_resched();
2504 desc = ext4_get_group_desc(sb, i, NULL);
2505 if (desc == NULL) {
2506 ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
2507 goto err_freebuddy;
2508 }
2509 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
2510 goto err_freebuddy;
2511 }
2512
2513 return 0;
2514
2515err_freebuddy:
2516 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2517 while (i-- > 0)
2518 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
2519 i = sbi->s_group_info_size;
2520 rcu_read_lock();
2521 group_info = rcu_dereference(sbi->s_group_info);
2522 while (i-- > 0)
2523 kfree(group_info[i]);
2524 rcu_read_unlock();
2525 iput(sbi->s_buddy_cache);
2526err_freesgi:
2527 rcu_read_lock();
2528 kvfree(rcu_dereference(sbi->s_group_info));
2529 rcu_read_unlock();
2530 return -ENOMEM;
2531}
2532
2533static void ext4_groupinfo_destroy_slabs(void)
2534{
2535 int i;
2536
2537 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2538 kmem_cache_destroy(ext4_groupinfo_caches[i]);
2539 ext4_groupinfo_caches[i] = NULL;
2540 }
2541}
2542
2543static int ext4_groupinfo_create_slab(size_t size)
2544{
2545 static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
2546 int slab_size;
2547 int blocksize_bits = order_base_2(size);
2548 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2549 struct kmem_cache *cachep;
2550
2551 if (cache_index >= NR_GRPINFO_CACHES)
2552 return -EINVAL;
2553
2554 if (unlikely(cache_index < 0))
2555 cache_index = 0;
2556
2557 mutex_lock(&ext4_grpinfo_slab_create_mutex);
2558 if (ext4_groupinfo_caches[cache_index]) {
2559 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2560 return 0;
2561 }
2562
2563 slab_size = offsetof(struct ext4_group_info,
2564 bb_counters[blocksize_bits + 2]);
2565
2566 cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
2567 slab_size, 0, SLAB_RECLAIM_ACCOUNT,
2568 NULL);
2569
2570 ext4_groupinfo_caches[cache_index] = cachep;
2571
2572 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2573 if (!cachep) {
2574 printk(KERN_EMERG
2575 "EXT4-fs: no memory for groupinfo slab cache\n");
2576 return -ENOMEM;
2577 }
2578
2579 return 0;
2580}
2581
2582int ext4_mb_init(struct super_block *sb)
2583{
2584 struct ext4_sb_info *sbi = EXT4_SB(sb);
2585 unsigned i, j;
2586 unsigned offset, offset_incr;
2587 unsigned max;
2588 int ret;
2589
2590 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2591
2592 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
2593 if (sbi->s_mb_offsets == NULL) {
2594 ret = -ENOMEM;
2595 goto out;
2596 }
2597
2598 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
2599 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2600 if (sbi->s_mb_maxs == NULL) {
2601 ret = -ENOMEM;
2602 goto out;
2603 }
2604
2605 ret = ext4_groupinfo_create_slab(sb->s_blocksize);
2606 if (ret < 0)
2607 goto out;
2608
2609
2610 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
2611 sbi->s_mb_offsets[0] = 0;
2612
2613 i = 1;
2614 offset = 0;
2615 offset_incr = 1 << (sb->s_blocksize_bits - 1);
2616 max = sb->s_blocksize << 2;
2617 do {
2618 sbi->s_mb_offsets[i] = offset;
2619 sbi->s_mb_maxs[i] = max;
2620 offset += offset_incr;
2621 offset_incr = offset_incr >> 1;
2622 max = max >> 1;
2623 i++;
2624 } while (i <= sb->s_blocksize_bits + 1);
2625
2626 spin_lock_init(&sbi->s_md_lock);
2627 spin_lock_init(&sbi->s_bal_lock);
2628 sbi->s_mb_free_pending = 0;
2629 INIT_LIST_HEAD(&sbi->s_freed_data_list);
2630
2631 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
2632 sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
2633 sbi->s_mb_stats = MB_DEFAULT_STATS;
2634 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
2635 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648 sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
2649 sbi->s_cluster_bits, 32);
2650
2651
2652
2653
2654
2655
2656
2657
2658 if (sbi->s_stripe > 1) {
2659 sbi->s_mb_group_prealloc = roundup(
2660 sbi->s_mb_group_prealloc, sbi->s_stripe);
2661 }
2662
2663 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2664 if (sbi->s_locality_groups == NULL) {
2665 ret = -ENOMEM;
2666 goto out;
2667 }
2668 for_each_possible_cpu(i) {
2669 struct ext4_locality_group *lg;
2670 lg = per_cpu_ptr(sbi->s_locality_groups, i);
2671 mutex_init(&lg->lg_mutex);
2672 for (j = 0; j < PREALLOC_TB_SIZE; j++)
2673 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
2674 spin_lock_init(&lg->lg_prealloc_lock);
2675 }
2676
2677
2678 ret = ext4_mb_init_backend(sb);
2679 if (ret != 0)
2680 goto out_free_locality_groups;
2681
2682 return 0;
2683
2684out_free_locality_groups:
2685 free_percpu(sbi->s_locality_groups);
2686 sbi->s_locality_groups = NULL;
2687out:
2688 kfree(sbi->s_mb_offsets);
2689 sbi->s_mb_offsets = NULL;
2690 kfree(sbi->s_mb_maxs);
2691 sbi->s_mb_maxs = NULL;
2692 return ret;
2693}
2694
2695
2696static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2697{
2698 struct ext4_prealloc_space *pa;
2699 struct list_head *cur, *tmp;
2700 int count = 0;
2701
2702 list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
2703 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
2704 list_del(&pa->pa_group_list);
2705 count++;
2706 kmem_cache_free(ext4_pspace_cachep, pa);
2707 }
2708 if (count)
2709 mb_debug(1, "mballoc: %u PAs left\n", count);
2710
2711}
2712
2713int ext4_mb_release(struct super_block *sb)
2714{
2715 ext4_group_t ngroups = ext4_get_groups_count(sb);
2716 ext4_group_t i;
2717 int num_meta_group_infos;
2718 struct ext4_group_info *grinfo, ***group_info;
2719 struct ext4_sb_info *sbi = EXT4_SB(sb);
2720 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2721
2722 if (sbi->s_group_info) {
2723 for (i = 0; i < ngroups; i++) {
2724 cond_resched();
2725 grinfo = ext4_get_group_info(sb, i);
2726#ifdef DOUBLE_CHECK
2727 kfree(grinfo->bb_bitmap);
2728#endif
2729 ext4_lock_group(sb, i);
2730 ext4_mb_cleanup_pa(grinfo);
2731 ext4_unlock_group(sb, i);
2732 kmem_cache_free(cachep, grinfo);
2733 }
2734 num_meta_group_infos = (ngroups +
2735 EXT4_DESC_PER_BLOCK(sb) - 1) >>
2736 EXT4_DESC_PER_BLOCK_BITS(sb);
2737 rcu_read_lock();
2738 group_info = rcu_dereference(sbi->s_group_info);
2739 for (i = 0; i < num_meta_group_infos; i++)
2740 kfree(group_info[i]);
2741 kvfree(group_info);
2742 rcu_read_unlock();
2743 }
2744 kfree(sbi->s_mb_offsets);
2745 kfree(sbi->s_mb_maxs);
2746 iput(sbi->s_buddy_cache);
2747 if (sbi->s_mb_stats) {
2748 ext4_msg(sb, KERN_INFO,
2749 "mballoc: %u blocks %u reqs (%u success)",
2750 atomic_read(&sbi->s_bal_allocated),
2751 atomic_read(&sbi->s_bal_reqs),
2752 atomic_read(&sbi->s_bal_success));
2753 ext4_msg(sb, KERN_INFO,
2754 "mballoc: %u extents scanned, %u goal hits, "
2755 "%u 2^N hits, %u breaks, %u lost",
2756 atomic_read(&sbi->s_bal_ex_scanned),
2757 atomic_read(&sbi->s_bal_goals),
2758 atomic_read(&sbi->s_bal_2orders),
2759 atomic_read(&sbi->s_bal_breaks),
2760 atomic_read(&sbi->s_mb_lost_chunks));
2761 ext4_msg(sb, KERN_INFO,
2762 "mballoc: %lu generated and it took %Lu",
2763 sbi->s_mb_buddies_generated,
2764 sbi->s_mb_generation_time);
2765 ext4_msg(sb, KERN_INFO,
2766 "mballoc: %u preallocated, %u discarded",
2767 atomic_read(&sbi->s_mb_preallocated),
2768 atomic_read(&sbi->s_mb_discarded));
2769 }
2770
2771 free_percpu(sbi->s_locality_groups);
2772
2773 return 0;
2774}
2775
2776static inline int ext4_issue_discard(struct super_block *sb,
2777 ext4_group_t block_group, ext4_grpblk_t cluster, int count,
2778 struct bio **biop)
2779{
2780 ext4_fsblk_t discard_block;
2781
2782 discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
2783 ext4_group_first_block_no(sb, block_group));
2784 count = EXT4_C2B(EXT4_SB(sb), count);
2785 trace_ext4_discard_blocks(sb,
2786 (unsigned long long) discard_block, count);
2787 if (biop) {
2788 return __blkdev_issue_discard(sb->s_bdev,
2789 (sector_t)discard_block << (sb->s_blocksize_bits - 9),
2790 (sector_t)count << (sb->s_blocksize_bits - 9),
2791 GFP_NOFS, 0, biop);
2792 } else
2793 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
2794}
2795
2796static void ext4_free_data_in_buddy(struct super_block *sb,
2797 struct ext4_free_data *entry)
2798{
2799 struct ext4_buddy e4b;
2800 struct ext4_group_info *db;
2801 int err, count = 0, count2 = 0;
2802
2803 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2804 entry->efd_count, entry->efd_group, entry);
2805
2806 err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
2807
2808 BUG_ON(err != 0);
2809
2810 spin_lock(&EXT4_SB(sb)->s_md_lock);
2811 EXT4_SB(sb)->s_mb_free_pending -= entry->efd_count;
2812 spin_unlock(&EXT4_SB(sb)->s_md_lock);
2813
2814 db = e4b.bd_info;
2815
2816 count += entry->efd_count;
2817 count2++;
2818 ext4_lock_group(sb, entry->efd_group);
2819
2820 rb_erase(&entry->efd_node, &(db->bb_free_root));
2821 mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
2822
2823
2824
2825
2826
2827
2828
2829 if (!test_opt(sb, DISCARD))
2830 EXT4_MB_GRP_CLEAR_TRIMMED(db);
2831
2832 if (!db->bb_free_root.rb_node) {
2833
2834
2835
2836 put_page(e4b.bd_buddy_page);
2837 put_page(e4b.bd_bitmap_page);
2838 }
2839 ext4_unlock_group(sb, entry->efd_group);
2840 kmem_cache_free(ext4_free_data_cachep, entry);
2841 ext4_mb_unload_buddy(&e4b);
2842
2843 mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
2844}
2845
2846
2847
2848
2849
2850void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
2851{
2852 struct ext4_sb_info *sbi = EXT4_SB(sb);
2853 struct ext4_free_data *entry, *tmp;
2854 struct bio *discard_bio = NULL;
2855 struct list_head freed_data_list;
2856 struct list_head *cut_pos = NULL;
2857 int err;
2858
2859 INIT_LIST_HEAD(&freed_data_list);
2860
2861 spin_lock(&sbi->s_md_lock);
2862 list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) {
2863 if (entry->efd_tid != commit_tid)
2864 break;
2865 cut_pos = &entry->efd_list;
2866 }
2867 if (cut_pos)
2868 list_cut_position(&freed_data_list, &sbi->s_freed_data_list,
2869 cut_pos);
2870 spin_unlock(&sbi->s_md_lock);
2871
2872 if (test_opt(sb, DISCARD)) {
2873 list_for_each_entry(entry, &freed_data_list, efd_list) {
2874 err = ext4_issue_discard(sb, entry->efd_group,
2875 entry->efd_start_cluster,
2876 entry->efd_count,
2877 &discard_bio);
2878 if (err && err != -EOPNOTSUPP) {
2879 ext4_msg(sb, KERN_WARNING, "discard request in"
2880 " group:%d block:%d count:%d failed"
2881 " with %d", entry->efd_group,
2882 entry->efd_start_cluster,
2883 entry->efd_count, err);
2884 } else if (err == -EOPNOTSUPP)
2885 break;
2886 }
2887
2888 if (discard_bio) {
2889 submit_bio_wait(discard_bio);
2890 bio_put(discard_bio);
2891 }
2892 }
2893
2894 list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
2895 ext4_free_data_in_buddy(sb, entry);
2896}
2897
2898int __init ext4_init_mballoc(void)
2899{
2900 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
2901 SLAB_RECLAIM_ACCOUNT);
2902 if (ext4_pspace_cachep == NULL)
2903 return -ENOMEM;
2904
2905 ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
2906 SLAB_RECLAIM_ACCOUNT);
2907 if (ext4_ac_cachep == NULL) {
2908 kmem_cache_destroy(ext4_pspace_cachep);
2909 return -ENOMEM;
2910 }
2911
2912 ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
2913 SLAB_RECLAIM_ACCOUNT);
2914 if (ext4_free_data_cachep == NULL) {
2915 kmem_cache_destroy(ext4_pspace_cachep);
2916 kmem_cache_destroy(ext4_ac_cachep);
2917 return -ENOMEM;
2918 }
2919 return 0;
2920}
2921
2922void ext4_exit_mballoc(void)
2923{
2924
2925
2926
2927
2928 rcu_barrier();
2929 kmem_cache_destroy(ext4_pspace_cachep);
2930 kmem_cache_destroy(ext4_ac_cachep);
2931 kmem_cache_destroy(ext4_free_data_cachep);
2932 ext4_groupinfo_destroy_slabs();
2933}
2934
2935
2936
2937
2938
2939
2940static noinline_for_stack int
2941ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2942 handle_t *handle, unsigned int reserv_clstrs)
2943{
2944 struct buffer_head *bitmap_bh = NULL;
2945 struct ext4_group_desc *gdp;
2946 struct buffer_head *gdp_bh;
2947 struct ext4_sb_info *sbi;
2948 struct super_block *sb;
2949 ext4_fsblk_t block;
2950 int err, len;
2951
2952 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
2953 BUG_ON(ac->ac_b_ex.fe_len <= 0);
2954
2955 sb = ac->ac_sb;
2956 sbi = EXT4_SB(sb);
2957
2958 bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
2959 if (IS_ERR(bitmap_bh)) {
2960 err = PTR_ERR(bitmap_bh);
2961 bitmap_bh = NULL;
2962 goto out_err;
2963 }
2964
2965 BUFFER_TRACE(bitmap_bh, "getting write access");
2966 err = ext4_journal_get_write_access(handle, bitmap_bh);
2967 if (err)
2968 goto out_err;
2969
2970 err = -EIO;
2971 gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
2972 if (!gdp)
2973 goto out_err;
2974
2975 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
2976 ext4_free_group_clusters(sb, gdp));
2977
2978 BUFFER_TRACE(gdp_bh, "get_write_access");
2979 err = ext4_journal_get_write_access(handle, gdp_bh);
2980 if (err)
2981 goto out_err;
2982
2983 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
2984
2985 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
2986 if (!ext4_data_block_valid(sbi, block, len)) {
2987 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
2988 "fs metadata", block, block+len);
2989
2990
2991
2992
2993 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2994 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2995 ac->ac_b_ex.fe_len);
2996 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2997 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2998 if (!err)
2999 err = -EFSCORRUPTED;
3000 goto out_err;
3001 }
3002
3003 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3004#ifdef AGGRESSIVE_CHECK
3005 {
3006 int i;
3007 for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
3008 BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
3009 bitmap_bh->b_data));
3010 }
3011 }
3012#endif
3013 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
3014 ac->ac_b_ex.fe_len);
3015 if (ext4_has_group_desc_csum(sb) &&
3016 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
3017 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
3018 ext4_free_group_clusters_set(sb, gdp,
3019 ext4_free_clusters_after_init(sb,
3020 ac->ac_b_ex.fe_group, gdp));
3021 }
3022 len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
3023 ext4_free_group_clusters_set(sb, gdp, len);
3024 ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh);
3025 ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
3026
3027 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3028 percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
3029
3030
3031
3032 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
3033
3034 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
3035 reserv_clstrs);
3036
3037 if (sbi->s_log_groups_per_flex) {
3038 ext4_group_t flex_group = ext4_flex_group(sbi,
3039 ac->ac_b_ex.fe_group);
3040 atomic64_sub(ac->ac_b_ex.fe_len,
3041 &sbi_array_rcu_deref(sbi, s_flex_groups,
3042 flex_group)->free_clusters);
3043 }
3044
3045 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
3046 if (err)
3047 goto out_err;
3048 err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
3049
3050out_err:
3051 brelse(bitmap_bh);
3052 return err;
3053}
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3065{
3066 struct super_block *sb = ac->ac_sb;
3067 struct ext4_locality_group *lg = ac->ac_lg;
3068
3069 BUG_ON(lg == NULL);
3070 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
3071 mb_debug(1, "#%u: goal %u blocks for locality group\n",
3072 current->pid, ac->ac_g_ex.fe_len);
3073}
3074
3075
3076
3077
3078
3079static noinline_for_stack void
3080ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3081 struct ext4_allocation_request *ar)
3082{
3083 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3084 int bsbits, max;
3085 ext4_lblk_t end;
3086 loff_t size, start_off;
3087 loff_t orig_size __maybe_unused;
3088 ext4_lblk_t start;
3089 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3090 struct ext4_prealloc_space *pa;
3091
3092
3093
3094 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3095 return;
3096
3097
3098 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
3099 return;
3100
3101
3102
3103 if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
3104 return;
3105
3106 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
3107 ext4_mb_normalize_group_request(ac);
3108 return ;
3109 }
3110
3111 bsbits = ac->ac_sb->s_blocksize_bits;
3112
3113
3114
3115 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
3116 size = size << bsbits;
3117 if (size < i_size_read(ac->ac_inode))
3118 size = i_size_read(ac->ac_inode);
3119 orig_size = size;
3120
3121
3122 max = 2 << bsbits;
3123
3124#define NRL_CHECK_SIZE(req, size, max, chunk_size) \
3125 (req <= (size) || max <= (chunk_size))
3126
3127
3128
3129 start_off = 0;
3130 if (size <= 16 * 1024) {
3131 size = 16 * 1024;
3132 } else if (size <= 32 * 1024) {
3133 size = 32 * 1024;
3134 } else if (size <= 64 * 1024) {
3135 size = 64 * 1024;
3136 } else if (size <= 128 * 1024) {
3137 size = 128 * 1024;
3138 } else if (size <= 256 * 1024) {
3139 size = 256 * 1024;
3140 } else if (size <= 512 * 1024) {
3141 size = 512 * 1024;
3142 } else if (size <= 1024 * 1024) {
3143 size = 1024 * 1024;
3144 } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
3145 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3146 (21 - bsbits)) << 21;
3147 size = 2 * 1024 * 1024;
3148 } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
3149 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3150 (22 - bsbits)) << 22;
3151 size = 4 * 1024 * 1024;
3152 } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
3153 (8<<20)>>bsbits, max, 8 * 1024)) {
3154 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3155 (23 - bsbits)) << 23;
3156 size = 8 * 1024 * 1024;
3157 } else {
3158 start_off = (loff_t) ac->ac_o_ex.fe_logical << bsbits;
3159 size = (loff_t) EXT4_C2B(EXT4_SB(ac->ac_sb),
3160 ac->ac_o_ex.fe_len) << bsbits;
3161 }
3162 size = size >> bsbits;
3163 start = start_off >> bsbits;
3164
3165
3166 if (ar->pleft && start <= ar->lleft) {
3167 size -= ar->lleft + 1 - start;
3168 start = ar->lleft + 1;
3169 }
3170 if (ar->pright && start + size - 1 >= ar->lright)
3171 size -= start + size - ar->lright;
3172
3173
3174
3175
3176
3177 if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
3178 size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb);
3179
3180 end = start + size;
3181
3182
3183 rcu_read_lock();
3184 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3185 ext4_lblk_t pa_end;
3186
3187 if (pa->pa_deleted)
3188 continue;
3189 spin_lock(&pa->pa_lock);
3190 if (pa->pa_deleted) {
3191 spin_unlock(&pa->pa_lock);
3192 continue;
3193 }
3194
3195 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3196 pa->pa_len);
3197
3198
3199 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
3200 ac->ac_o_ex.fe_logical < pa->pa_lstart));
3201
3202
3203 if (pa->pa_lstart >= end || pa_end <= start) {
3204 spin_unlock(&pa->pa_lock);
3205 continue;
3206 }
3207 BUG_ON(pa->pa_lstart <= start && pa_end >= end);
3208
3209
3210 if (pa_end <= ac->ac_o_ex.fe_logical) {
3211 BUG_ON(pa_end < start);
3212 start = pa_end;
3213 } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
3214 BUG_ON(pa->pa_lstart > end);
3215 end = pa->pa_lstart;
3216 }
3217 spin_unlock(&pa->pa_lock);
3218 }
3219 rcu_read_unlock();
3220 size = end - start;
3221
3222
3223 rcu_read_lock();
3224 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3225 ext4_lblk_t pa_end;
3226
3227 spin_lock(&pa->pa_lock);
3228 if (pa->pa_deleted == 0) {
3229 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3230 pa->pa_len);
3231 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
3232 }
3233 spin_unlock(&pa->pa_lock);
3234 }
3235 rcu_read_unlock();
3236
3237 if (start + size <= ac->ac_o_ex.fe_logical &&
3238 start > ac->ac_o_ex.fe_logical) {
3239 ext4_msg(ac->ac_sb, KERN_ERR,
3240 "start %lu, size %lu, fe_logical %lu",
3241 (unsigned long) start, (unsigned long) size,
3242 (unsigned long) ac->ac_o_ex.fe_logical);
3243 BUG();
3244 }
3245 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
3246
3247
3248
3249
3250
3251 ac->ac_g_ex.fe_logical = start;
3252 ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
3253
3254
3255 if (ar->pright && (ar->lright == (start + size))) {
3256
3257 ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
3258 &ac->ac_f_ex.fe_group,
3259 &ac->ac_f_ex.fe_start);
3260 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3261 }
3262 if (ar->pleft && (ar->lleft + 1 == start)) {
3263
3264 ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
3265 &ac->ac_f_ex.fe_group,
3266 &ac->ac_f_ex.fe_start);
3267 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3268 }
3269
3270 mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size,
3271 (unsigned) orig_size, (unsigned) start);
3272}
3273
3274static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3275{
3276 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3277
3278 if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
3279 atomic_inc(&sbi->s_bal_reqs);
3280 atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
3281 if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
3282 atomic_inc(&sbi->s_bal_success);
3283 atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
3284 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
3285 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
3286 atomic_inc(&sbi->s_bal_goals);
3287 if (ac->ac_found > sbi->s_mb_max_to_scan)
3288 atomic_inc(&sbi->s_bal_breaks);
3289 }
3290
3291 if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
3292 trace_ext4_mballoc_alloc(ac);
3293 else
3294 trace_ext4_mballoc_prealloc(ac);
3295}
3296
3297
3298
3299
3300
3301
3302
3303static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3304{
3305 struct ext4_prealloc_space *pa = ac->ac_pa;
3306 struct ext4_buddy e4b;
3307 int err;
3308
3309 if (pa == NULL) {
3310 if (ac->ac_f_ex.fe_len == 0)
3311 return;
3312 err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
3313 if (err) {
3314
3315
3316
3317
3318
3319 WARN(1, "mb_load_buddy failed (%d)", err);
3320 return;
3321 }
3322 ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
3323 mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
3324 ac->ac_f_ex.fe_len);
3325 ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
3326 ext4_mb_unload_buddy(&e4b);
3327 return;
3328 }
3329 if (pa->pa_type == MB_INODE_PA)
3330 pa->pa_free += ac->ac_b_ex.fe_len;
3331}
3332
3333
3334
3335
3336static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3337 struct ext4_prealloc_space *pa)
3338{
3339 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3340 ext4_fsblk_t start;
3341 ext4_fsblk_t end;
3342 int len;
3343
3344
3345 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
3346 end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
3347 start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
3348 len = EXT4_NUM_B2C(sbi, end - start);
3349 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
3350 &ac->ac_b_ex.fe_start);
3351 ac->ac_b_ex.fe_len = len;
3352 ac->ac_status = AC_STATUS_FOUND;
3353 ac->ac_pa = pa;
3354
3355 BUG_ON(start < pa->pa_pstart);
3356 BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
3357 BUG_ON(pa->pa_free < len);
3358 pa->pa_free -= len;
3359
3360 mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa);
3361}
3362
3363
3364
3365
3366static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3367 struct ext4_prealloc_space *pa)
3368{
3369 unsigned int len = ac->ac_o_ex.fe_len;
3370
3371 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
3372 &ac->ac_b_ex.fe_group,
3373 &ac->ac_b_ex.fe_start);
3374 ac->ac_b_ex.fe_len = len;
3375 ac->ac_status = AC_STATUS_FOUND;
3376 ac->ac_pa = pa;
3377
3378
3379
3380
3381
3382
3383
3384 mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
3385}
3386
3387
3388
3389
3390
3391
3392
3393static struct ext4_prealloc_space *
3394ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3395 struct ext4_prealloc_space *pa,
3396 struct ext4_prealloc_space *cpa)
3397{
3398 ext4_fsblk_t cur_distance, new_distance;
3399
3400 if (cpa == NULL) {
3401 atomic_inc(&pa->pa_count);
3402 return pa;
3403 }
3404 cur_distance = abs(goal_block - cpa->pa_pstart);
3405 new_distance = abs(goal_block - pa->pa_pstart);
3406
3407 if (cur_distance <= new_distance)
3408 return cpa;
3409
3410
3411 atomic_dec(&cpa->pa_count);
3412 atomic_inc(&pa->pa_count);
3413 return pa;
3414}
3415
3416
3417
3418
3419static noinline_for_stack int
3420ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3421{
3422 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3423 int order, i;
3424 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3425 struct ext4_locality_group *lg;
3426 struct ext4_prealloc_space *pa, *cpa = NULL;
3427 ext4_fsblk_t goal_block;
3428
3429
3430 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3431 return 0;
3432
3433
3434 rcu_read_lock();
3435 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3436
3437
3438
3439 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
3440 ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
3441 EXT4_C2B(sbi, pa->pa_len)))
3442 continue;
3443
3444
3445 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
3446 (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
3447 EXT4_MAX_BLOCK_FILE_PHYS))
3448 continue;
3449
3450
3451 spin_lock(&pa->pa_lock);
3452 if (pa->pa_deleted == 0 && pa->pa_free) {
3453 atomic_inc(&pa->pa_count);
3454 ext4_mb_use_inode_pa(ac, pa);
3455 spin_unlock(&pa->pa_lock);
3456 ac->ac_criteria = 10;
3457 rcu_read_unlock();
3458 return 1;
3459 }
3460 spin_unlock(&pa->pa_lock);
3461 }
3462 rcu_read_unlock();
3463
3464
3465 if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
3466 return 0;
3467
3468
3469 lg = ac->ac_lg;
3470 if (lg == NULL)
3471 return 0;
3472 order = fls(ac->ac_o_ex.fe_len) - 1;
3473 if (order > PREALLOC_TB_SIZE - 1)
3474
3475 order = PREALLOC_TB_SIZE - 1;
3476
3477 goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
3478
3479
3480
3481
3482 for (i = order; i < PREALLOC_TB_SIZE; i++) {
3483 rcu_read_lock();
3484 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
3485 pa_inode_list) {
3486 spin_lock(&pa->pa_lock);
3487 if (pa->pa_deleted == 0 &&
3488 pa->pa_free >= ac->ac_o_ex.fe_len) {
3489
3490 cpa = ext4_mb_check_group_pa(goal_block,
3491 pa, cpa);
3492 }
3493 spin_unlock(&pa->pa_lock);
3494 }
3495 rcu_read_unlock();
3496 }
3497 if (cpa) {
3498 ext4_mb_use_group_pa(ac, cpa);
3499 ac->ac_criteria = 20;
3500 return 1;
3501 }
3502 return 0;
3503}
3504
3505
3506
3507
3508
3509
3510
3511static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3512 ext4_group_t group)
3513{
3514 struct rb_node *n;
3515 struct ext4_group_info *grp;
3516 struct ext4_free_data *entry;
3517
3518 grp = ext4_get_group_info(sb, group);
3519 n = rb_first(&(grp->bb_free_root));
3520
3521 while (n) {
3522 entry = rb_entry(n, struct ext4_free_data, efd_node);
3523 ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
3524 n = rb_next(n);
3525 }
3526 return;
3527}
3528
3529
3530
3531
3532
3533
3534static noinline_for_stack
3535void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3536 ext4_group_t group)
3537{
3538 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3539 struct ext4_prealloc_space *pa;
3540 struct list_head *cur;
3541 ext4_group_t groupnr;
3542 ext4_grpblk_t start;
3543 int preallocated = 0;
3544 int len;
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554 list_for_each(cur, &grp->bb_prealloc_list) {
3555 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
3556 spin_lock(&pa->pa_lock);
3557 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
3558 &groupnr, &start);
3559 len = pa->pa_len;
3560 spin_unlock(&pa->pa_lock);
3561 if (unlikely(len == 0))
3562 continue;
3563 BUG_ON(groupnr != group);
3564 ext4_set_bits(bitmap, start, len);
3565 preallocated += len;
3566 }
3567 mb_debug(1, "preallocated %u for group %u\n", preallocated, group);
3568}
3569
3570static void ext4_mb_pa_callback(struct rcu_head *head)
3571{
3572 struct ext4_prealloc_space *pa;
3573 pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
3574
3575 BUG_ON(atomic_read(&pa->pa_count));
3576 BUG_ON(pa->pa_deleted == 0);
3577 kmem_cache_free(ext4_pspace_cachep, pa);
3578}
3579
3580
3581
3582
3583
3584static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3585 struct super_block *sb, struct ext4_prealloc_space *pa)
3586{
3587 ext4_group_t grp;
3588 ext4_fsblk_t grp_blk;
3589
3590
3591 spin_lock(&pa->pa_lock);
3592 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
3593 spin_unlock(&pa->pa_lock);
3594 return;
3595 }
3596
3597 if (pa->pa_deleted == 1) {
3598 spin_unlock(&pa->pa_lock);
3599 return;
3600 }
3601
3602 pa->pa_deleted = 1;
3603 spin_unlock(&pa->pa_lock);
3604
3605 grp_blk = pa->pa_pstart;
3606
3607
3608
3609
3610 if (pa->pa_type == MB_GROUP_PA)
3611 grp_blk--;
3612
3613 grp = ext4_get_group_number(sb, grp_blk);
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629 ext4_lock_group(sb, grp);
3630 list_del(&pa->pa_group_list);
3631 ext4_unlock_group(sb, grp);
3632
3633 spin_lock(pa->pa_obj_lock);
3634 list_del_rcu(&pa->pa_inode_list);
3635 spin_unlock(pa->pa_obj_lock);
3636
3637 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3638}
3639
3640
3641
3642
3643static noinline_for_stack int
3644ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3645{
3646 struct super_block *sb = ac->ac_sb;
3647 struct ext4_sb_info *sbi = EXT4_SB(sb);
3648 struct ext4_prealloc_space *pa;
3649 struct ext4_group_info *grp;
3650 struct ext4_inode_info *ei;
3651
3652
3653 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3654 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3655 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3656
3657 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3658 if (pa == NULL)
3659 return -ENOMEM;
3660
3661 if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
3662 int winl;
3663 int wins;
3664 int win;
3665 int offs;
3666
3667
3668
3669
3670 BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
3671 BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
3672
3673
3674
3675
3676 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
3677
3678
3679 wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
3680
3681
3682 win = min(winl, wins);
3683
3684 offs = ac->ac_o_ex.fe_logical %
3685 EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
3686 if (offs && offs < win)
3687 win = offs;
3688
3689 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
3690 EXT4_NUM_B2C(sbi, win);
3691 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
3692 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
3693 }
3694
3695
3696
3697 ac->ac_f_ex = ac->ac_b_ex;
3698
3699 pa->pa_lstart = ac->ac_b_ex.fe_logical;
3700 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3701 pa->pa_len = ac->ac_b_ex.fe_len;
3702 pa->pa_free = pa->pa_len;
3703 atomic_set(&pa->pa_count, 1);
3704 spin_lock_init(&pa->pa_lock);
3705 INIT_LIST_HEAD(&pa->pa_inode_list);
3706 INIT_LIST_HEAD(&pa->pa_group_list);
3707 pa->pa_deleted = 0;
3708 pa->pa_type = MB_INODE_PA;
3709
3710 mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa,
3711 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3712 trace_ext4_mb_new_inode_pa(ac, pa);
3713
3714 ext4_mb_use_inode_pa(ac, pa);
3715 atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
3716
3717 ei = EXT4_I(ac->ac_inode);
3718 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3719
3720 pa->pa_obj_lock = &ei->i_prealloc_lock;
3721 pa->pa_inode = ac->ac_inode;
3722
3723 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3724 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3725 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3726
3727 spin_lock(pa->pa_obj_lock);
3728 list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
3729 spin_unlock(pa->pa_obj_lock);
3730
3731 return 0;
3732}
3733
3734
3735
3736
3737static noinline_for_stack int
3738ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3739{
3740 struct super_block *sb = ac->ac_sb;
3741 struct ext4_locality_group *lg;
3742 struct ext4_prealloc_space *pa;
3743 struct ext4_group_info *grp;
3744
3745
3746 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3747 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3748 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3749
3750 BUG_ON(ext4_pspace_cachep == NULL);
3751 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3752 if (pa == NULL)
3753 return -ENOMEM;
3754
3755
3756
3757 ac->ac_f_ex = ac->ac_b_ex;
3758
3759 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3760 pa->pa_lstart = pa->pa_pstart;
3761 pa->pa_len = ac->ac_b_ex.fe_len;
3762 pa->pa_free = pa->pa_len;
3763 atomic_set(&pa->pa_count, 1);
3764 spin_lock_init(&pa->pa_lock);
3765 INIT_LIST_HEAD(&pa->pa_inode_list);
3766 INIT_LIST_HEAD(&pa->pa_group_list);
3767 pa->pa_deleted = 0;
3768 pa->pa_type = MB_GROUP_PA;
3769
3770 mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa,
3771 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3772 trace_ext4_mb_new_group_pa(ac, pa);
3773
3774 ext4_mb_use_group_pa(ac, pa);
3775 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
3776
3777 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3778 lg = ac->ac_lg;
3779 BUG_ON(lg == NULL);
3780
3781 pa->pa_obj_lock = &lg->lg_prealloc_lock;
3782 pa->pa_inode = NULL;
3783
3784 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3785 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3786 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3787
3788
3789
3790
3791
3792 return 0;
3793}
3794
3795static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3796{
3797 int err;
3798
3799 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
3800 err = ext4_mb_new_group_pa(ac);
3801 else
3802 err = ext4_mb_new_inode_pa(ac);
3803 return err;
3804}
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814static noinline_for_stack int
3815ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3816 struct ext4_prealloc_space *pa)
3817{
3818 struct super_block *sb = e4b->bd_sb;
3819 struct ext4_sb_info *sbi = EXT4_SB(sb);
3820 unsigned int end;
3821 unsigned int next;
3822 ext4_group_t group;
3823 ext4_grpblk_t bit;
3824 unsigned long long grp_blk_start;
3825 int free = 0;
3826
3827 BUG_ON(pa->pa_deleted == 0);
3828 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3829 grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
3830 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3831 end = bit + pa->pa_len;
3832
3833 while (bit < end) {
3834 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
3835 if (bit >= end)
3836 break;
3837 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
3838 mb_debug(1, " free preallocated %u/%u in group %u\n",
3839 (unsigned) ext4_group_first_block_no(sb, group) + bit,
3840 (unsigned) next - bit, (unsigned) group);
3841 free += next - bit;
3842
3843 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
3844 trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
3845 EXT4_C2B(sbi, bit)),
3846 next - bit);
3847 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3848 bit = next + 1;
3849 }
3850 if (free != pa->pa_free) {
3851 ext4_msg(e4b->bd_sb, KERN_CRIT,
3852 "pa %p: logic %lu, phys. %lu, len %lu",
3853 pa, (unsigned long) pa->pa_lstart,
3854 (unsigned long) pa->pa_pstart,
3855 (unsigned long) pa->pa_len);
3856 ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
3857 free, pa->pa_free);
3858
3859
3860
3861
3862 }
3863 atomic_add(free, &sbi->s_mb_discarded);
3864
3865 return 0;
3866}
3867
3868static noinline_for_stack int
3869ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3870 struct ext4_prealloc_space *pa)
3871{
3872 struct super_block *sb = e4b->bd_sb;
3873 ext4_group_t group;
3874 ext4_grpblk_t bit;
3875
3876 trace_ext4_mb_release_group_pa(sb, pa);
3877 BUG_ON(pa->pa_deleted == 0);
3878 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3879 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3880 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
3881 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
3882 trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
3883
3884 return 0;
3885}
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896static noinline_for_stack int
3897ext4_mb_discard_group_preallocations(struct super_block *sb,
3898 ext4_group_t group, int needed)
3899{
3900 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3901 struct buffer_head *bitmap_bh = NULL;
3902 struct ext4_prealloc_space *pa, *tmp;
3903 struct list_head list;
3904 struct ext4_buddy e4b;
3905 int err;
3906 int busy = 0;
3907 int free = 0;
3908
3909 mb_debug(1, "discard preallocation for group %u\n", group);
3910
3911 if (list_empty(&grp->bb_prealloc_list))
3912 return 0;
3913
3914 bitmap_bh = ext4_read_block_bitmap(sb, group);
3915 if (IS_ERR(bitmap_bh)) {
3916 err = PTR_ERR(bitmap_bh);
3917 ext4_set_errno(sb, -err);
3918 ext4_error(sb, "Error %d reading block bitmap for %u",
3919 err, group);
3920 return 0;
3921 }
3922
3923 err = ext4_mb_load_buddy(sb, group, &e4b);
3924 if (err) {
3925 ext4_warning(sb, "Error %d loading buddy information for %u",
3926 err, group);
3927 put_bh(bitmap_bh);
3928 return 0;
3929 }
3930
3931 if (needed == 0)
3932 needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
3933
3934 INIT_LIST_HEAD(&list);
3935repeat:
3936 ext4_lock_group(sb, group);
3937 list_for_each_entry_safe(pa, tmp,
3938 &grp->bb_prealloc_list, pa_group_list) {
3939 spin_lock(&pa->pa_lock);
3940 if (atomic_read(&pa->pa_count)) {
3941 spin_unlock(&pa->pa_lock);
3942 busy = 1;
3943 continue;
3944 }
3945 if (pa->pa_deleted) {
3946 spin_unlock(&pa->pa_lock);
3947 continue;
3948 }
3949
3950
3951 pa->pa_deleted = 1;
3952
3953
3954 free += pa->pa_free;
3955
3956 spin_unlock(&pa->pa_lock);
3957
3958 list_del(&pa->pa_group_list);
3959 list_add(&pa->u.pa_tmp_list, &list);
3960 }
3961
3962
3963 if (free < needed && busy) {
3964 busy = 0;
3965 ext4_unlock_group(sb, group);
3966 cond_resched();
3967 goto repeat;
3968 }
3969
3970
3971 if (list_empty(&list)) {
3972 BUG_ON(free != 0);
3973 goto out;
3974 }
3975
3976
3977 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
3978
3979
3980 spin_lock(pa->pa_obj_lock);
3981 list_del_rcu(&pa->pa_inode_list);
3982 spin_unlock(pa->pa_obj_lock);
3983
3984 if (pa->pa_type == MB_GROUP_PA)
3985 ext4_mb_release_group_pa(&e4b, pa);
3986 else
3987 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3988
3989 list_del(&pa->u.pa_tmp_list);
3990 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3991 }
3992
3993out:
3994 ext4_unlock_group(sb, group);
3995 ext4_mb_unload_buddy(&e4b);
3996 put_bh(bitmap_bh);
3997 return free;
3998}
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009void ext4_discard_preallocations(struct inode *inode)
4010{
4011 struct ext4_inode_info *ei = EXT4_I(inode);
4012 struct super_block *sb = inode->i_sb;
4013 struct buffer_head *bitmap_bh = NULL;
4014 struct ext4_prealloc_space *pa, *tmp;
4015 ext4_group_t group = 0;
4016 struct list_head list;
4017 struct ext4_buddy e4b;
4018 int err;
4019
4020 if (!S_ISREG(inode->i_mode)) {
4021
4022 return;
4023 }
4024
4025 mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino);
4026 trace_ext4_discard_preallocations(inode);
4027
4028 INIT_LIST_HEAD(&list);
4029
4030repeat:
4031
4032 spin_lock(&ei->i_prealloc_lock);
4033 while (!list_empty(&ei->i_prealloc_list)) {
4034 pa = list_entry(ei->i_prealloc_list.next,
4035 struct ext4_prealloc_space, pa_inode_list);
4036 BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
4037 spin_lock(&pa->pa_lock);
4038 if (atomic_read(&pa->pa_count)) {
4039
4040
4041 spin_unlock(&pa->pa_lock);
4042 spin_unlock(&ei->i_prealloc_lock);
4043 ext4_msg(sb, KERN_ERR,
4044 "uh-oh! used pa while discarding");
4045 WARN_ON(1);
4046 schedule_timeout_uninterruptible(HZ);
4047 goto repeat;
4048
4049 }
4050 if (pa->pa_deleted == 0) {
4051 pa->pa_deleted = 1;
4052 spin_unlock(&pa->pa_lock);
4053 list_del_rcu(&pa->pa_inode_list);
4054 list_add(&pa->u.pa_tmp_list, &list);
4055 continue;
4056 }
4057
4058
4059 spin_unlock(&pa->pa_lock);
4060 spin_unlock(&ei->i_prealloc_lock);
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074 schedule_timeout_uninterruptible(HZ);
4075 goto repeat;
4076 }
4077 spin_unlock(&ei->i_prealloc_lock);
4078
4079 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
4080 BUG_ON(pa->pa_type != MB_INODE_PA);
4081 group = ext4_get_group_number(sb, pa->pa_pstart);
4082
4083 err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
4084 GFP_NOFS|__GFP_NOFAIL);
4085 if (err) {
4086 ext4_set_errno(sb, -err);
4087 ext4_error(sb, "Error %d loading buddy information for %u",
4088 err, group);
4089 continue;
4090 }
4091
4092 bitmap_bh = ext4_read_block_bitmap(sb, group);
4093 if (IS_ERR(bitmap_bh)) {
4094 err = PTR_ERR(bitmap_bh);
4095 ext4_set_errno(sb, -err);
4096 ext4_error(sb, "Error %d reading block bitmap for %u",
4097 err, group);
4098 ext4_mb_unload_buddy(&e4b);
4099 continue;
4100 }
4101
4102 ext4_lock_group(sb, group);
4103 list_del(&pa->pa_group_list);
4104 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
4105 ext4_unlock_group(sb, group);
4106
4107 ext4_mb_unload_buddy(&e4b);
4108 put_bh(bitmap_bh);
4109
4110 list_del(&pa->u.pa_tmp_list);
4111 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4112 }
4113}
4114
4115#ifdef CONFIG_EXT4_DEBUG
4116static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4117{
4118 struct super_block *sb = ac->ac_sb;
4119 ext4_group_t ngroups, i;
4120
4121 if (!ext4_mballoc_debug ||
4122 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
4123 return;
4124
4125 ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:"
4126 " Allocation context details:");
4127 ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d",
4128 ac->ac_status, ac->ac_flags);
4129 ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, "
4130 "goal %lu/%lu/%lu@%lu, "
4131 "best %lu/%lu/%lu@%lu cr %d",
4132 (unsigned long)ac->ac_o_ex.fe_group,
4133 (unsigned long)ac->ac_o_ex.fe_start,
4134 (unsigned long)ac->ac_o_ex.fe_len,
4135 (unsigned long)ac->ac_o_ex.fe_logical,
4136 (unsigned long)ac->ac_g_ex.fe_group,
4137 (unsigned long)ac->ac_g_ex.fe_start,
4138 (unsigned long)ac->ac_g_ex.fe_len,
4139 (unsigned long)ac->ac_g_ex.fe_logical,
4140 (unsigned long)ac->ac_b_ex.fe_group,
4141 (unsigned long)ac->ac_b_ex.fe_start,
4142 (unsigned long)ac->ac_b_ex.fe_len,
4143 (unsigned long)ac->ac_b_ex.fe_logical,
4144 (int)ac->ac_criteria);
4145 ext4_msg(ac->ac_sb, KERN_ERR, "%d found", ac->ac_found);
4146 ext4_msg(ac->ac_sb, KERN_ERR, "groups: ");
4147 ngroups = ext4_get_groups_count(sb);
4148 for (i = 0; i < ngroups; i++) {
4149 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
4150 struct ext4_prealloc_space *pa;
4151 ext4_grpblk_t start;
4152 struct list_head *cur;
4153 ext4_lock_group(sb, i);
4154 list_for_each(cur, &grp->bb_prealloc_list) {
4155 pa = list_entry(cur, struct ext4_prealloc_space,
4156 pa_group_list);
4157 spin_lock(&pa->pa_lock);
4158 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
4159 NULL, &start);
4160 spin_unlock(&pa->pa_lock);
4161 printk(KERN_ERR "PA:%u:%d:%u \n", i,
4162 start, pa->pa_len);
4163 }
4164 ext4_unlock_group(sb, i);
4165
4166 if (grp->bb_free == 0)
4167 continue;
4168 printk(KERN_ERR "%u: %d/%d \n",
4169 i, grp->bb_free, grp->bb_fragments);
4170 }
4171 printk(KERN_ERR "\n");
4172}
4173#else
4174static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4175{
4176 return;
4177}
4178#endif
4179
4180
4181
4182
4183
4184
4185
4186
4187static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
4188{
4189 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4190 int bsbits = ac->ac_sb->s_blocksize_bits;
4191 loff_t size, isize;
4192
4193 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
4194 return;
4195
4196 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
4197 return;
4198
4199 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
4200 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
4201 >> bsbits;
4202
4203 if ((size == isize) && !ext4_fs_is_busy(sbi) &&
4204 !inode_is_open_for_write(ac->ac_inode)) {
4205 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
4206 return;
4207 }
4208
4209 if (sbi->s_mb_group_prealloc <= 0) {
4210 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
4211 return;
4212 }
4213
4214
4215 size = max(size, isize);
4216 if (size > sbi->s_mb_stream_request) {
4217 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
4218 return;
4219 }
4220
4221 BUG_ON(ac->ac_lg != NULL);
4222
4223
4224
4225
4226
4227 ac->ac_lg = raw_cpu_ptr(sbi->s_locality_groups);
4228
4229
4230 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
4231
4232
4233 mutex_lock(&ac->ac_lg->lg_mutex);
4234}
4235
4236static noinline_for_stack int
4237ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4238 struct ext4_allocation_request *ar)
4239{
4240 struct super_block *sb = ar->inode->i_sb;
4241 struct ext4_sb_info *sbi = EXT4_SB(sb);
4242 struct ext4_super_block *es = sbi->s_es;
4243 ext4_group_t group;
4244 unsigned int len;
4245 ext4_fsblk_t goal;
4246 ext4_grpblk_t block;
4247
4248
4249 len = ar->len;
4250
4251
4252 if (len >= EXT4_CLUSTERS_PER_GROUP(sb))
4253 len = EXT4_CLUSTERS_PER_GROUP(sb);
4254
4255
4256 goal = ar->goal;
4257 if (goal < le32_to_cpu(es->s_first_data_block) ||
4258 goal >= ext4_blocks_count(es))
4259 goal = le32_to_cpu(es->s_first_data_block);
4260 ext4_get_group_no_and_offset(sb, goal, &group, &block);
4261
4262
4263 ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
4264 ac->ac_status = AC_STATUS_CONTINUE;
4265 ac->ac_sb = sb;
4266 ac->ac_inode = ar->inode;
4267 ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
4268 ac->ac_o_ex.fe_group = group;
4269 ac->ac_o_ex.fe_start = block;
4270 ac->ac_o_ex.fe_len = len;
4271 ac->ac_g_ex = ac->ac_o_ex;
4272 ac->ac_flags = ar->flags;
4273
4274
4275
4276 ext4_mb_group_or_file(ac);
4277
4278 mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, "
4279 "left: %u/%u, right %u/%u to %swritable\n",
4280 (unsigned) ar->len, (unsigned) ar->logical,
4281 (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
4282 (unsigned) ar->lleft, (unsigned) ar->pleft,
4283 (unsigned) ar->lright, (unsigned) ar->pright,
4284 inode_is_open_for_write(ar->inode) ? "" : "non-");
4285 return 0;
4286
4287}
4288
4289static noinline_for_stack void
4290ext4_mb_discard_lg_preallocations(struct super_block *sb,
4291 struct ext4_locality_group *lg,
4292 int order, int total_entries)
4293{
4294 ext4_group_t group = 0;
4295 struct ext4_buddy e4b;
4296 struct list_head discard_list;
4297 struct ext4_prealloc_space *pa, *tmp;
4298
4299 mb_debug(1, "discard locality group preallocation\n");
4300
4301 INIT_LIST_HEAD(&discard_list);
4302
4303 spin_lock(&lg->lg_prealloc_lock);
4304 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
4305 pa_inode_list) {
4306 spin_lock(&pa->pa_lock);
4307 if (atomic_read(&pa->pa_count)) {
4308
4309
4310
4311
4312
4313 spin_unlock(&pa->pa_lock);
4314 continue;
4315 }
4316 if (pa->pa_deleted) {
4317 spin_unlock(&pa->pa_lock);
4318 continue;
4319 }
4320
4321 BUG_ON(pa->pa_type != MB_GROUP_PA);
4322
4323
4324 pa->pa_deleted = 1;
4325 spin_unlock(&pa->pa_lock);
4326
4327 list_del_rcu(&pa->pa_inode_list);
4328 list_add(&pa->u.pa_tmp_list, &discard_list);
4329
4330 total_entries--;
4331 if (total_entries <= 5) {
4332
4333
4334
4335
4336
4337
4338 break;
4339 }
4340 }
4341 spin_unlock(&lg->lg_prealloc_lock);
4342
4343 list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
4344 int err;
4345
4346 group = ext4_get_group_number(sb, pa->pa_pstart);
4347 err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
4348 GFP_NOFS|__GFP_NOFAIL);
4349 if (err) {
4350 ext4_set_errno(sb, -err);
4351 ext4_error(sb, "Error %d loading buddy information for %u",
4352 err, group);
4353 continue;
4354 }
4355 ext4_lock_group(sb, group);
4356 list_del(&pa->pa_group_list);
4357 ext4_mb_release_group_pa(&e4b, pa);
4358 ext4_unlock_group(sb, group);
4359
4360 ext4_mb_unload_buddy(&e4b);
4361 list_del(&pa->u.pa_tmp_list);
4362 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4363 }
4364}
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4376{
4377 int order, added = 0, lg_prealloc_count = 1;
4378 struct super_block *sb = ac->ac_sb;
4379 struct ext4_locality_group *lg = ac->ac_lg;
4380 struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
4381
4382 order = fls(pa->pa_free) - 1;
4383 if (order > PREALLOC_TB_SIZE - 1)
4384
4385 order = PREALLOC_TB_SIZE - 1;
4386
4387 spin_lock(&lg->lg_prealloc_lock);
4388 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
4389 pa_inode_list) {
4390 spin_lock(&tmp_pa->pa_lock);
4391 if (tmp_pa->pa_deleted) {
4392 spin_unlock(&tmp_pa->pa_lock);
4393 continue;
4394 }
4395 if (!added && pa->pa_free < tmp_pa->pa_free) {
4396
4397 list_add_tail_rcu(&pa->pa_inode_list,
4398 &tmp_pa->pa_inode_list);
4399 added = 1;
4400
4401
4402
4403
4404 }
4405 spin_unlock(&tmp_pa->pa_lock);
4406 lg_prealloc_count++;
4407 }
4408 if (!added)
4409 list_add_tail_rcu(&pa->pa_inode_list,
4410 &lg->lg_prealloc_list[order]);
4411 spin_unlock(&lg->lg_prealloc_lock);
4412
4413
4414 if (lg_prealloc_count > 8) {
4415 ext4_mb_discard_lg_preallocations(sb, lg,
4416 order, lg_prealloc_count);
4417 return;
4418 }
4419 return ;
4420}
4421
4422
4423
4424
4425static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4426{
4427 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4428 struct ext4_prealloc_space *pa = ac->ac_pa;
4429 if (pa) {
4430 if (pa->pa_type == MB_GROUP_PA) {
4431
4432 spin_lock(&pa->pa_lock);
4433 pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4434 pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4435 pa->pa_free -= ac->ac_b_ex.fe_len;
4436 pa->pa_len -= ac->ac_b_ex.fe_len;
4437 spin_unlock(&pa->pa_lock);
4438 }
4439 }
4440 if (pa) {
4441
4442
4443
4444
4445
4446
4447 if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
4448 spin_lock(pa->pa_obj_lock);
4449 list_del_rcu(&pa->pa_inode_list);
4450 spin_unlock(pa->pa_obj_lock);
4451 ext4_mb_add_n_trim(ac);
4452 }
4453 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4454 }
4455 if (ac->ac_bitmap_page)
4456 put_page(ac->ac_bitmap_page);
4457 if (ac->ac_buddy_page)
4458 put_page(ac->ac_buddy_page);
4459 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
4460 mutex_unlock(&ac->ac_lg->lg_mutex);
4461 ext4_mb_collect_stats(ac);
4462 return 0;
4463}
4464
4465static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4466{
4467 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4468 int ret;
4469 int freed = 0;
4470
4471 trace_ext4_mb_discard_preallocations(sb, needed);
4472 for (i = 0; i < ngroups && needed > 0; i++) {
4473 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
4474 freed += ret;
4475 needed -= ret;
4476 }
4477
4478 return freed;
4479}
4480
4481
4482
4483
4484
4485
4486ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4487 struct ext4_allocation_request *ar, int *errp)
4488{
4489 int freed;
4490 struct ext4_allocation_context *ac = NULL;
4491 struct ext4_sb_info *sbi;
4492 struct super_block *sb;
4493 ext4_fsblk_t block = 0;
4494 unsigned int inquota = 0;
4495 unsigned int reserv_clstrs = 0;
4496
4497 might_sleep();
4498 sb = ar->inode->i_sb;
4499 sbi = EXT4_SB(sb);
4500
4501 trace_ext4_request_blocks(ar);
4502
4503
4504 if (ext4_is_quota_file(ar->inode))
4505 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
4506
4507 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
4508
4509
4510
4511
4512 while (ar->len &&
4513 ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
4514
4515
4516 cond_resched();
4517 ar->len = ar->len >> 1;
4518 }
4519 if (!ar->len) {
4520 *errp = -ENOSPC;
4521 return 0;
4522 }
4523 reserv_clstrs = ar->len;
4524 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
4525 dquot_alloc_block_nofail(ar->inode,
4526 EXT4_C2B(sbi, ar->len));
4527 } else {
4528 while (ar->len &&
4529 dquot_alloc_block(ar->inode,
4530 EXT4_C2B(sbi, ar->len))) {
4531
4532 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4533 ar->len--;
4534 }
4535 }
4536 inquota = ar->len;
4537 if (ar->len == 0) {
4538 *errp = -EDQUOT;
4539 goto out;
4540 }
4541 }
4542
4543 ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS);
4544 if (!ac) {
4545 ar->len = 0;
4546 *errp = -ENOMEM;
4547 goto out;
4548 }
4549
4550 *errp = ext4_mb_initialize_context(ac, ar);
4551 if (*errp) {
4552 ar->len = 0;
4553 goto out;
4554 }
4555
4556 ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
4557 if (!ext4_mb_use_preallocated(ac)) {
4558 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
4559 ext4_mb_normalize_request(ac, ar);
4560repeat:
4561
4562 *errp = ext4_mb_regular_allocator(ac);
4563 if (*errp)
4564 goto discard_and_exit;
4565
4566
4567
4568
4569 if (ac->ac_status == AC_STATUS_FOUND &&
4570 ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
4571 *errp = ext4_mb_new_preallocation(ac);
4572 if (*errp) {
4573 discard_and_exit:
4574 ext4_discard_allocated_blocks(ac);
4575 goto errout;
4576 }
4577 }
4578 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4579 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
4580 if (*errp) {
4581 ext4_discard_allocated_blocks(ac);
4582 goto errout;
4583 } else {
4584 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4585 ar->len = ac->ac_b_ex.fe_len;
4586 }
4587 } else {
4588 freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
4589 if (freed)
4590 goto repeat;
4591 *errp = -ENOSPC;
4592 }
4593
4594errout:
4595 if (*errp) {
4596 ac->ac_b_ex.fe_len = 0;
4597 ar->len = 0;
4598 ext4_mb_show_ac(ac);
4599 }
4600 ext4_mb_release_context(ac);
4601out:
4602 if (ac)
4603 kmem_cache_free(ext4_ac_cachep, ac);
4604 if (inquota && ar->len < inquota)
4605 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
4606 if (!ar->len) {
4607 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
4608
4609 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
4610 reserv_clstrs);
4611 }
4612
4613 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
4614
4615 return block;
4616}
4617
4618
4619
4620
4621
4622
4623static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi,
4624 struct ext4_free_data *entry,
4625 struct ext4_free_data *new_entry,
4626 struct rb_root *entry_rb_root)
4627{
4628 if ((entry->efd_tid != new_entry->efd_tid) ||
4629 (entry->efd_group != new_entry->efd_group))
4630 return;
4631 if (entry->efd_start_cluster + entry->efd_count ==
4632 new_entry->efd_start_cluster) {
4633 new_entry->efd_start_cluster = entry->efd_start_cluster;
4634 new_entry->efd_count += entry->efd_count;
4635 } else if (new_entry->efd_start_cluster + new_entry->efd_count ==
4636 entry->efd_start_cluster) {
4637 new_entry->efd_count += entry->efd_count;
4638 } else
4639 return;
4640 spin_lock(&sbi->s_md_lock);
4641 list_del(&entry->efd_list);
4642 spin_unlock(&sbi->s_md_lock);
4643 rb_erase(&entry->efd_node, entry_rb_root);
4644 kmem_cache_free(ext4_free_data_cachep, entry);
4645}
4646
4647static noinline_for_stack int
4648ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4649 struct ext4_free_data *new_entry)
4650{
4651 ext4_group_t group = e4b->bd_group;
4652 ext4_grpblk_t cluster;
4653 ext4_grpblk_t clusters = new_entry->efd_count;
4654 struct ext4_free_data *entry;
4655 struct ext4_group_info *db = e4b->bd_info;
4656 struct super_block *sb = e4b->bd_sb;
4657 struct ext4_sb_info *sbi = EXT4_SB(sb);
4658 struct rb_node **n = &db->bb_free_root.rb_node, *node;
4659 struct rb_node *parent = NULL, *new_node;
4660
4661 BUG_ON(!ext4_handle_valid(handle));
4662 BUG_ON(e4b->bd_bitmap_page == NULL);
4663 BUG_ON(e4b->bd_buddy_page == NULL);
4664
4665 new_node = &new_entry->efd_node;
4666 cluster = new_entry->efd_start_cluster;
4667
4668 if (!*n) {
4669
4670
4671
4672
4673
4674 get_page(e4b->bd_buddy_page);
4675 get_page(e4b->bd_bitmap_page);
4676 }
4677 while (*n) {
4678 parent = *n;
4679 entry = rb_entry(parent, struct ext4_free_data, efd_node);
4680 if (cluster < entry->efd_start_cluster)
4681 n = &(*n)->rb_left;
4682 else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
4683 n = &(*n)->rb_right;
4684 else {
4685 ext4_grp_locked_error(sb, group, 0,
4686 ext4_group_first_block_no(sb, group) +
4687 EXT4_C2B(sbi, cluster),
4688 "Block already on to-be-freed list");
4689 return 0;
4690 }
4691 }
4692
4693 rb_link_node(new_node, parent, n);
4694 rb_insert_color(new_node, &db->bb_free_root);
4695
4696
4697 node = rb_prev(new_node);
4698 if (node) {
4699 entry = rb_entry(node, struct ext4_free_data, efd_node);
4700 ext4_try_merge_freed_extent(sbi, entry, new_entry,
4701 &(db->bb_free_root));
4702 }
4703
4704 node = rb_next(new_node);
4705 if (node) {
4706 entry = rb_entry(node, struct ext4_free_data, efd_node);
4707 ext4_try_merge_freed_extent(sbi, entry, new_entry,
4708 &(db->bb_free_root));
4709 }
4710
4711 spin_lock(&sbi->s_md_lock);
4712 list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list);
4713 sbi->s_mb_free_pending += clusters;
4714 spin_unlock(&sbi->s_md_lock);
4715 return 0;
4716}
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727void ext4_free_blocks(handle_t *handle, struct inode *inode,
4728 struct buffer_head *bh, ext4_fsblk_t block,
4729 unsigned long count, int flags)
4730{
4731 struct buffer_head *bitmap_bh = NULL;
4732 struct super_block *sb = inode->i_sb;
4733 struct ext4_group_desc *gdp;
4734 unsigned int overflow;
4735 ext4_grpblk_t bit;
4736 struct buffer_head *gd_bh;
4737 ext4_group_t block_group;
4738 struct ext4_sb_info *sbi;
4739 struct ext4_buddy e4b;
4740 unsigned int count_clusters;
4741 int err = 0;
4742 int ret;
4743
4744 might_sleep();
4745 if (bh) {
4746 if (block)
4747 BUG_ON(block != bh->b_blocknr);
4748 else
4749 block = bh->b_blocknr;
4750 }
4751
4752 sbi = EXT4_SB(sb);
4753 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
4754 !ext4_data_block_valid(sbi, block, count)) {
4755 ext4_error(sb, "Freeing blocks not in datazone - "
4756 "block = %llu, count = %lu", block, count);
4757 goto error_return;
4758 }
4759
4760 ext4_debug("freeing block %llu\n", block);
4761 trace_ext4_free_blocks(inode, block, count, flags);
4762
4763 if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
4764 BUG_ON(count > 1);
4765
4766 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
4767 inode, bh, block);
4768 }
4769
4770
4771
4772
4773
4774
4775
4776
4777 overflow = EXT4_PBLK_COFF(sbi, block);
4778 if (overflow) {
4779 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
4780 overflow = sbi->s_cluster_ratio - overflow;
4781 block += overflow;
4782 if (count > overflow)
4783 count -= overflow;
4784 else
4785 return;
4786 } else {
4787 block -= overflow;
4788 count += overflow;
4789 }
4790 }
4791 overflow = EXT4_LBLK_COFF(sbi, count);
4792 if (overflow) {
4793 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
4794 if (count > overflow)
4795 count -= overflow;
4796 else
4797 return;
4798 } else
4799 count += sbi->s_cluster_ratio - overflow;
4800 }
4801
4802 if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
4803 int i;
4804 int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA;
4805
4806 for (i = 0; i < count; i++) {
4807 cond_resched();
4808 if (is_metadata)
4809 bh = sb_find_get_block(inode->i_sb, block + i);
4810 ext4_forget(handle, is_metadata, inode, bh, block + i);
4811 }
4812 }
4813
4814do_more:
4815 overflow = 0;
4816 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4817
4818 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
4819 ext4_get_group_info(sb, block_group))))
4820 return;
4821
4822
4823
4824
4825
4826 if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4827 overflow = EXT4_C2B(sbi, bit) + count -
4828 EXT4_BLOCKS_PER_GROUP(sb);
4829 count -= overflow;
4830 }
4831 count_clusters = EXT4_NUM_B2C(sbi, count);
4832 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4833 if (IS_ERR(bitmap_bh)) {
4834 err = PTR_ERR(bitmap_bh);
4835 bitmap_bh = NULL;
4836 goto error_return;
4837 }
4838 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
4839 if (!gdp) {
4840 err = -EIO;
4841 goto error_return;
4842 }
4843
4844 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4845 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
4846 in_range(block, ext4_inode_table(sb, gdp),
4847 sbi->s_itb_per_group) ||
4848 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4849 sbi->s_itb_per_group)) {
4850
4851 ext4_error(sb, "Freeing blocks in system zone - "
4852 "Block = %llu, count = %lu", block, count);
4853
4854 goto error_return;
4855 }
4856
4857 BUFFER_TRACE(bitmap_bh, "getting write access");
4858 err = ext4_journal_get_write_access(handle, bitmap_bh);
4859 if (err)
4860 goto error_return;
4861
4862
4863
4864
4865
4866
4867 BUFFER_TRACE(gd_bh, "get_write_access");
4868 err = ext4_journal_get_write_access(handle, gd_bh);
4869 if (err)
4870 goto error_return;
4871#ifdef AGGRESSIVE_CHECK
4872 {
4873 int i;
4874 for (i = 0; i < count_clusters; i++)
4875 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4876 }
4877#endif
4878 trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
4879
4880
4881 err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
4882 GFP_NOFS|__GFP_NOFAIL);
4883 if (err)
4884 goto error_return;
4885
4886
4887
4888
4889
4890
4891
4892 if (ext4_handle_valid(handle) &&
4893 ((flags & EXT4_FREE_BLOCKS_METADATA) ||
4894 !ext4_should_writeback_data(inode))) {
4895 struct ext4_free_data *new_entry;
4896
4897
4898
4899
4900 new_entry = kmem_cache_alloc(ext4_free_data_cachep,
4901 GFP_NOFS|__GFP_NOFAIL);
4902 new_entry->efd_start_cluster = bit;
4903 new_entry->efd_group = block_group;
4904 new_entry->efd_count = count_clusters;
4905 new_entry->efd_tid = handle->h_transaction->t_tid;
4906
4907 ext4_lock_group(sb, block_group);
4908 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4909 ext4_mb_free_metadata(handle, &e4b, new_entry);
4910 } else {
4911
4912
4913
4914
4915 if (test_opt(sb, DISCARD)) {
4916 err = ext4_issue_discard(sb, block_group, bit, count,
4917 NULL);
4918 if (err && err != -EOPNOTSUPP)
4919 ext4_msg(sb, KERN_WARNING, "discard request in"
4920 " group:%d block:%d count:%lu failed"
4921 " with %d", block_group, bit, count,
4922 err);
4923 } else
4924 EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
4925
4926 ext4_lock_group(sb, block_group);
4927 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4928 mb_free_blocks(inode, &e4b, bit, count_clusters);
4929 }
4930
4931 ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
4932 ext4_free_group_clusters_set(sb, gdp, ret);
4933 ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
4934 ext4_group_desc_csum_set(sb, block_group, gdp);
4935 ext4_unlock_group(sb, block_group);
4936
4937 if (sbi->s_log_groups_per_flex) {
4938 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4939 atomic64_add(count_clusters,
4940 &sbi_array_rcu_deref(sbi, s_flex_groups,
4941 flex_group)->free_clusters);
4942 }
4943
4944
4945
4946
4947
4948
4949 if (!(flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)) {
4950 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4951 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
4952 percpu_counter_add(&sbi->s_freeclusters_counter,
4953 count_clusters);
4954 }
4955
4956 ext4_mb_unload_buddy(&e4b);
4957
4958
4959 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4960 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
4961
4962
4963 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
4964 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
4965 if (!err)
4966 err = ret;
4967
4968 if (overflow && !err) {
4969 block += count;
4970 count = overflow;
4971 put_bh(bitmap_bh);
4972 goto do_more;
4973 }
4974error_return:
4975 brelse(bitmap_bh);
4976 ext4_std_error(sb, err);
4977 return;
4978}
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
4990 ext4_fsblk_t block, unsigned long count)
4991{
4992 struct buffer_head *bitmap_bh = NULL;
4993 struct buffer_head *gd_bh;
4994 ext4_group_t block_group;
4995 ext4_grpblk_t bit;
4996 unsigned int i;
4997 struct ext4_group_desc *desc;
4998 struct ext4_sb_info *sbi = EXT4_SB(sb);
4999 struct ext4_buddy e4b;
5000 int err = 0, ret, free_clusters_count;
5001 ext4_grpblk_t clusters_freed;
5002 ext4_fsblk_t first_cluster = EXT4_B2C(sbi, block);
5003 ext4_fsblk_t last_cluster = EXT4_B2C(sbi, block + count - 1);
5004 unsigned long cluster_count = last_cluster - first_cluster + 1;
5005
5006 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
5007
5008 if (count == 0)
5009 return 0;
5010
5011 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
5012
5013
5014
5015
5016 if (bit + cluster_count > EXT4_CLUSTERS_PER_GROUP(sb)) {
5017 ext4_warning(sb, "too many blocks added to group %u",
5018 block_group);
5019 err = -EINVAL;
5020 goto error_return;
5021 }
5022
5023 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
5024 if (IS_ERR(bitmap_bh)) {
5025 err = PTR_ERR(bitmap_bh);
5026 bitmap_bh = NULL;
5027 goto error_return;
5028 }
5029
5030 desc = ext4_get_group_desc(sb, block_group, &gd_bh);
5031 if (!desc) {
5032 err = -EIO;
5033 goto error_return;
5034 }
5035
5036 if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
5037 in_range(ext4_inode_bitmap(sb, desc), block, count) ||
5038 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
5039 in_range(block + count - 1, ext4_inode_table(sb, desc),
5040 sbi->s_itb_per_group)) {
5041 ext4_error(sb, "Adding blocks in system zones - "
5042 "Block = %llu, count = %lu",
5043 block, count);
5044 err = -EINVAL;
5045 goto error_return;
5046 }
5047
5048 BUFFER_TRACE(bitmap_bh, "getting write access");
5049 err = ext4_journal_get_write_access(handle, bitmap_bh);
5050 if (err)
5051 goto error_return;
5052
5053
5054
5055
5056
5057
5058 BUFFER_TRACE(gd_bh, "get_write_access");
5059 err = ext4_journal_get_write_access(handle, gd_bh);
5060 if (err)
5061 goto error_return;
5062
5063 for (i = 0, clusters_freed = 0; i < cluster_count; i++) {
5064 BUFFER_TRACE(bitmap_bh, "clear bit");
5065 if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
5066 ext4_error(sb, "bit already cleared for block %llu",
5067 (ext4_fsblk_t)(block + i));
5068 BUFFER_TRACE(bitmap_bh, "bit already cleared");
5069 } else {
5070 clusters_freed++;
5071 }
5072 }
5073
5074 err = ext4_mb_load_buddy(sb, block_group, &e4b);
5075 if (err)
5076 goto error_return;
5077
5078
5079
5080
5081
5082
5083 ext4_lock_group(sb, block_group);
5084 mb_clear_bits(bitmap_bh->b_data, bit, cluster_count);
5085 mb_free_blocks(NULL, &e4b, bit, cluster_count);
5086 free_clusters_count = clusters_freed +
5087 ext4_free_group_clusters(sb, desc);
5088 ext4_free_group_clusters_set(sb, desc, free_clusters_count);
5089 ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
5090 ext4_group_desc_csum_set(sb, block_group, desc);
5091 ext4_unlock_group(sb, block_group);
5092 percpu_counter_add(&sbi->s_freeclusters_counter,
5093 clusters_freed);
5094
5095 if (sbi->s_log_groups_per_flex) {
5096 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
5097 atomic64_add(clusters_freed,
5098 &sbi_array_rcu_deref(sbi, s_flex_groups,
5099 flex_group)->free_clusters);
5100 }
5101
5102 ext4_mb_unload_buddy(&e4b);
5103
5104
5105 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
5106 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
5107
5108
5109 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
5110 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
5111 if (!err)
5112 err = ret;
5113
5114error_return:
5115 brelse(bitmap_bh);
5116 ext4_std_error(sb, err);
5117 return err;
5118}
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132static int ext4_trim_extent(struct super_block *sb, int start, int count,
5133 ext4_group_t group, struct ext4_buddy *e4b)
5134__releases(bitlock)
5135__acquires(bitlock)
5136{
5137 struct ext4_free_extent ex;
5138 int ret = 0;
5139
5140 trace_ext4_trim_extent(sb, group, start, count);
5141
5142 assert_spin_locked(ext4_group_lock_ptr(sb, group));
5143
5144 ex.fe_start = start;
5145 ex.fe_group = group;
5146 ex.fe_len = count;
5147
5148
5149
5150
5151
5152 mb_mark_used(e4b, &ex);
5153 ext4_unlock_group(sb, group);
5154 ret = ext4_issue_discard(sb, group, start, count, NULL);
5155 ext4_lock_group(sb, group);
5156 mb_free_blocks(NULL, e4b, start, ex.fe_len);
5157 return ret;
5158}
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178static ext4_grpblk_t
5179ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
5180 ext4_grpblk_t start, ext4_grpblk_t max,
5181 ext4_grpblk_t minblocks)
5182{
5183 void *bitmap;
5184 ext4_grpblk_t next, count = 0, free_count = 0;
5185 struct ext4_buddy e4b;
5186 int ret = 0;
5187
5188 trace_ext4_trim_all_free(sb, group, start, max);
5189
5190 ret = ext4_mb_load_buddy(sb, group, &e4b);
5191 if (ret) {
5192 ext4_warning(sb, "Error %d loading buddy information for %u",
5193 ret, group);
5194 return ret;
5195 }
5196 bitmap = e4b.bd_bitmap;
5197
5198 ext4_lock_group(sb, group);
5199 if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
5200 minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
5201 goto out;
5202
5203 start = (e4b.bd_info->bb_first_free > start) ?
5204 e4b.bd_info->bb_first_free : start;
5205
5206 while (start <= max) {
5207 start = mb_find_next_zero_bit(bitmap, max + 1, start);
5208 if (start > max)
5209 break;
5210 next = mb_find_next_bit(bitmap, max + 1, start);
5211
5212 if ((next - start) >= minblocks) {
5213 ret = ext4_trim_extent(sb, start,
5214 next - start, group, &e4b);
5215 if (ret && ret != -EOPNOTSUPP)
5216 break;
5217 ret = 0;
5218 count += next - start;
5219 }
5220 free_count += next - start;
5221 start = next + 1;
5222
5223 if (fatal_signal_pending(current)) {
5224 count = -ERESTARTSYS;
5225 break;
5226 }
5227
5228 if (need_resched()) {
5229 ext4_unlock_group(sb, group);
5230 cond_resched();
5231 ext4_lock_group(sb, group);
5232 }
5233
5234 if ((e4b.bd_info->bb_free - free_count) < minblocks)
5235 break;
5236 }
5237
5238 if (!ret) {
5239 ret = count;
5240 EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
5241 }
5242out:
5243 ext4_unlock_group(sb, group);
5244 ext4_mb_unload_buddy(&e4b);
5245
5246 ext4_debug("trimmed %d blocks in the group %d\n",
5247 count, group);
5248
5249 return ret;
5250}
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
5265{
5266 struct ext4_group_info *grp;
5267 ext4_group_t group, first_group, last_group;
5268 ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
5269 uint64_t start, end, minlen, trimmed = 0;
5270 ext4_fsblk_t first_data_blk =
5271 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
5272 ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
5273 int ret = 0;
5274
5275 start = range->start >> sb->s_blocksize_bits;
5276 end = start + (range->len >> sb->s_blocksize_bits) - 1;
5277 minlen = EXT4_NUM_B2C(EXT4_SB(sb),
5278 range->minlen >> sb->s_blocksize_bits);
5279
5280 if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) ||
5281 start >= max_blks ||
5282 range->len < sb->s_blocksize)
5283 return -EINVAL;
5284 if (end >= max_blks)
5285 end = max_blks - 1;
5286 if (end <= first_data_blk)
5287 goto out;
5288 if (start < first_data_blk)
5289 start = first_data_blk;
5290
5291
5292 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
5293 &first_group, &first_cluster);
5294 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end,
5295 &last_group, &last_cluster);
5296
5297
5298 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
5299
5300 for (group = first_group; group <= last_group; group++) {
5301 grp = ext4_get_group_info(sb, group);
5302
5303 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
5304 ret = ext4_mb_init_group(sb, group, GFP_NOFS);
5305 if (ret)
5306 break;
5307 }
5308
5309
5310
5311
5312
5313
5314
5315 if (group == last_group)
5316 end = last_cluster;
5317
5318 if (grp->bb_free >= minlen) {
5319 cnt = ext4_trim_all_free(sb, group, first_cluster,
5320 end, minlen);
5321 if (cnt < 0) {
5322 ret = cnt;
5323 break;
5324 }
5325 trimmed += cnt;
5326 }
5327
5328
5329
5330
5331
5332 first_cluster = 0;
5333 }
5334
5335 if (!ret)
5336 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
5337
5338out:
5339 range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
5340 return ret;
5341}
5342
5343
5344int
5345ext4_mballoc_query_range(
5346 struct super_block *sb,
5347 ext4_group_t group,
5348 ext4_grpblk_t start,
5349 ext4_grpblk_t end,
5350 ext4_mballoc_query_range_fn formatter,
5351 void *priv)
5352{
5353 void *bitmap;
5354 ext4_grpblk_t next;
5355 struct ext4_buddy e4b;
5356 int error;
5357
5358 error = ext4_mb_load_buddy(sb, group, &e4b);
5359 if (error)
5360 return error;
5361 bitmap = e4b.bd_bitmap;
5362
5363 ext4_lock_group(sb, group);
5364
5365 start = (e4b.bd_info->bb_first_free > start) ?
5366 e4b.bd_info->bb_first_free : start;
5367 if (end >= EXT4_CLUSTERS_PER_GROUP(sb))
5368 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
5369
5370 while (start <= end) {
5371 start = mb_find_next_zero_bit(bitmap, end + 1, start);
5372 if (start > end)
5373 break;
5374 next = mb_find_next_bit(bitmap, end + 1, start);
5375
5376 ext4_unlock_group(sb, group);
5377 error = formatter(sb, group, start, next - start, priv);
5378 if (error)
5379 goto out_unload;
5380 ext4_lock_group(sb, group);
5381
5382 start = next + 1;
5383 }
5384
5385 ext4_unlock_group(sb, group);
5386out_unload:
5387 ext4_mb_unload_buddy(&e4b);
5388
5389 return error;
5390}
5391