1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include "ext4_jbd2.h"
25#include "mballoc.h"
26#include <linux/log2.h>
27#include <linux/module.h>
28#include <linux/slab.h>
29#include <linux/backing-dev.h>
30#include <trace/events/ext4.h>
31
32#ifdef CONFIG_EXT4_DEBUG
33ushort ext4_mballoc_debug __read_mostly;
34
35module_param_named(mballoc_debug, ext4_mballoc_debug, ushort, 0644);
36MODULE_PARM_DESC(mballoc_debug, "Debugging level for ext4's mballoc");
37#endif
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350static struct kmem_cache *ext4_pspace_cachep;
351static struct kmem_cache *ext4_ac_cachep;
352static struct kmem_cache *ext4_free_data_cachep;
353
354
355
356
357#define NR_GRPINFO_CACHES 8
358static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
359
360static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
361 "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
362 "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
363 "ext4_groupinfo_64k", "ext4_groupinfo_128k"
364};
365
366static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
367 ext4_group_t group);
368static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
369 ext4_group_t group);
370
371static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
372{
373#if BITS_PER_LONG == 64
374 *bit += ((unsigned long) addr & 7UL) << 3;
375 addr = (void *) ((unsigned long) addr & ~7UL);
376#elif BITS_PER_LONG == 32
377 *bit += ((unsigned long) addr & 3UL) << 3;
378 addr = (void *) ((unsigned long) addr & ~3UL);
379#else
380#error "how many bits you are?!"
381#endif
382 return addr;
383}
384
385static inline int mb_test_bit(int bit, void *addr)
386{
387
388
389
390
391 addr = mb_correct_addr_and_bit(&bit, addr);
392 return ext4_test_bit(bit, addr);
393}
394
395static inline void mb_set_bit(int bit, void *addr)
396{
397 addr = mb_correct_addr_and_bit(&bit, addr);
398 ext4_set_bit(bit, addr);
399}
400
401static inline void mb_clear_bit(int bit, void *addr)
402{
403 addr = mb_correct_addr_and_bit(&bit, addr);
404 ext4_clear_bit(bit, addr);
405}
406
407static inline int mb_test_and_clear_bit(int bit, void *addr)
408{
409 addr = mb_correct_addr_and_bit(&bit, addr);
410 return ext4_test_and_clear_bit(bit, addr);
411}
412
413static inline int mb_find_next_zero_bit(void *addr, int max, int start)
414{
415 int fix = 0, ret, tmpmax;
416 addr = mb_correct_addr_and_bit(&fix, addr);
417 tmpmax = max + fix;
418 start += fix;
419
420 ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
421 if (ret > max)
422 return max;
423 return ret;
424}
425
426static inline int mb_find_next_bit(void *addr, int max, int start)
427{
428 int fix = 0, ret, tmpmax;
429 addr = mb_correct_addr_and_bit(&fix, addr);
430 tmpmax = max + fix;
431 start += fix;
432
433 ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
434 if (ret > max)
435 return max;
436 return ret;
437}
438
439static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
440{
441 char *bb;
442
443 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
444 BUG_ON(max == NULL);
445
446 if (order > e4b->bd_blkbits + 1) {
447 *max = 0;
448 return NULL;
449 }
450
451
452 if (order == 0) {
453 *max = 1 << (e4b->bd_blkbits + 3);
454 return e4b->bd_bitmap;
455 }
456
457 bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
458 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
459
460 return bb;
461}
462
463#ifdef DOUBLE_CHECK
464static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
465 int first, int count)
466{
467 int i;
468 struct super_block *sb = e4b->bd_sb;
469
470 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
471 return;
472 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
473 for (i = 0; i < count; i++) {
474 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
475 ext4_fsblk_t blocknr;
476
477 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
478 blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
479 ext4_grp_locked_error(sb, e4b->bd_group,
480 inode ? inode->i_ino : 0,
481 blocknr,
482 "freeing block already freed "
483 "(bit %u)",
484 first + i);
485 }
486 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
487 }
488}
489
490static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
491{
492 int i;
493
494 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
495 return;
496 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
497 for (i = 0; i < count; i++) {
498 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
499 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
500 }
501}
502
503static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
504{
505 if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
506 unsigned char *b1, *b2;
507 int i;
508 b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
509 b2 = (unsigned char *) bitmap;
510 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
511 if (b1[i] != b2[i]) {
512 ext4_msg(e4b->bd_sb, KERN_ERR,
513 "corruption in group %u "
514 "at byte %u(%u): %x in copy != %x "
515 "on disk/prealloc",
516 e4b->bd_group, i, i * 8, b1[i], b2[i]);
517 BUG();
518 }
519 }
520 }
521}
522
523#else
524static inline void mb_free_blocks_double(struct inode *inode,
525 struct ext4_buddy *e4b, int first, int count)
526{
527 return;
528}
529static inline void mb_mark_used_double(struct ext4_buddy *e4b,
530 int first, int count)
531{
532 return;
533}
534static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
535{
536 return;
537}
538#endif
539
540#ifdef AGGRESSIVE_CHECK
541
542#define MB_CHECK_ASSERT(assert) \
543do { \
544 if (!(assert)) { \
545 printk(KERN_EMERG \
546 "Assertion failure in %s() at %s:%d: \"%s\"\n", \
547 function, file, line, # assert); \
548 BUG(); \
549 } \
550} while (0)
551
552static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
553 const char *function, int line)
554{
555 struct super_block *sb = e4b->bd_sb;
556 int order = e4b->bd_blkbits + 1;
557 int max;
558 int max2;
559 int i;
560 int j;
561 int k;
562 int count;
563 struct ext4_group_info *grp;
564 int fragments = 0;
565 int fstart;
566 struct list_head *cur;
567 void *buddy;
568 void *buddy2;
569
570 {
571 static int mb_check_counter;
572 if (mb_check_counter++ % 100 != 0)
573 return 0;
574 }
575
576 while (order > 1) {
577 buddy = mb_find_buddy(e4b, order, &max);
578 MB_CHECK_ASSERT(buddy);
579 buddy2 = mb_find_buddy(e4b, order - 1, &max2);
580 MB_CHECK_ASSERT(buddy2);
581 MB_CHECK_ASSERT(buddy != buddy2);
582 MB_CHECK_ASSERT(max * 2 == max2);
583
584 count = 0;
585 for (i = 0; i < max; i++) {
586
587 if (mb_test_bit(i, buddy)) {
588
589 if (!mb_test_bit(i << 1, buddy2)) {
590 MB_CHECK_ASSERT(
591 mb_test_bit((i<<1)+1, buddy2));
592 } else if (!mb_test_bit((i << 1) + 1, buddy2)) {
593 MB_CHECK_ASSERT(
594 mb_test_bit(i << 1, buddy2));
595 }
596 continue;
597 }
598
599
600 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
601 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
602
603 for (j = 0; j < (1 << order); j++) {
604 k = (i * (1 << order)) + j;
605 MB_CHECK_ASSERT(
606 !mb_test_bit(k, e4b->bd_bitmap));
607 }
608 count++;
609 }
610 MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
611 order--;
612 }
613
614 fstart = -1;
615 buddy = mb_find_buddy(e4b, 0, &max);
616 for (i = 0; i < max; i++) {
617 if (!mb_test_bit(i, buddy)) {
618 MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
619 if (fstart == -1) {
620 fragments++;
621 fstart = i;
622 }
623 continue;
624 }
625 fstart = -1;
626
627 for (j = 0; j < e4b->bd_blkbits + 1; j++) {
628 buddy2 = mb_find_buddy(e4b, j, &max2);
629 k = i >> j;
630 MB_CHECK_ASSERT(k < max2);
631 MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
632 }
633 }
634 MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
635 MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
636
637 grp = ext4_get_group_info(sb, e4b->bd_group);
638 list_for_each(cur, &grp->bb_prealloc_list) {
639 ext4_group_t groupnr;
640 struct ext4_prealloc_space *pa;
641 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
642 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
643 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
644 for (i = 0; i < pa->pa_len; i++)
645 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
646 }
647 return 0;
648}
649#undef MB_CHECK_ASSERT
650#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
651 __FILE__, __func__, __LINE__)
652#else
653#define mb_check_buddy(e4b)
654#endif
655
656
657
658
659
660
661
662static void ext4_mb_mark_free_simple(struct super_block *sb,
663 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
664 struct ext4_group_info *grp)
665{
666 struct ext4_sb_info *sbi = EXT4_SB(sb);
667 ext4_grpblk_t min;
668 ext4_grpblk_t max;
669 ext4_grpblk_t chunk;
670 unsigned int border;
671
672 BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
673
674 border = 2 << sb->s_blocksize_bits;
675
676 while (len > 0) {
677
678 max = ffs(first | border) - 1;
679
680
681 min = fls(len) - 1;
682
683 if (max < min)
684 min = max;
685 chunk = 1 << min;
686
687
688 grp->bb_counters[min]++;
689 if (min > 0)
690 mb_clear_bit(first >> min,
691 buddy + sbi->s_mb_offsets[min]);
692
693 len -= chunk;
694 first += chunk;
695 }
696}
697
698
699
700
701
702static void
703mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
704{
705 int i;
706 int bits;
707
708 grp->bb_largest_free_order = -1;
709
710 bits = sb->s_blocksize_bits + 1;
711 for (i = bits; i >= 0; i--) {
712 if (grp->bb_counters[i] > 0) {
713 grp->bb_largest_free_order = i;
714 break;
715 }
716 }
717}
718
719static noinline_for_stack
720void ext4_mb_generate_buddy(struct super_block *sb,
721 void *buddy, void *bitmap, ext4_group_t group)
722{
723 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
724 struct ext4_sb_info *sbi = EXT4_SB(sb);
725 ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
726 ext4_grpblk_t i = 0;
727 ext4_grpblk_t first;
728 ext4_grpblk_t len;
729 unsigned free = 0;
730 unsigned fragments = 0;
731 unsigned long long period = get_cycles();
732
733
734
735 i = mb_find_next_zero_bit(bitmap, max, 0);
736 grp->bb_first_free = i;
737 while (i < max) {
738 fragments++;
739 first = i;
740 i = mb_find_next_bit(bitmap, max, i);
741 len = i - first;
742 free += len;
743 if (len > 1)
744 ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
745 else
746 grp->bb_counters[0]++;
747 if (i < max)
748 i = mb_find_next_zero_bit(bitmap, max, i);
749 }
750 grp->bb_fragments = fragments;
751
752 if (free != grp->bb_free) {
753 ext4_grp_locked_error(sb, group, 0, 0,
754 "block bitmap and bg descriptor "
755 "inconsistent: %u vs %u free clusters",
756 free, grp->bb_free);
757
758
759
760
761 grp->bb_free = free;
762 if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
763 percpu_counter_sub(&sbi->s_freeclusters_counter,
764 grp->bb_free);
765 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
766 }
767 mb_set_largest_free_order(sb, grp);
768
769 clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
770
771 period = get_cycles() - period;
772 spin_lock(&EXT4_SB(sb)->s_bal_lock);
773 EXT4_SB(sb)->s_mb_buddies_generated++;
774 EXT4_SB(sb)->s_mb_generation_time += period;
775 spin_unlock(&EXT4_SB(sb)->s_bal_lock);
776}
777
778static void mb_regenerate_buddy(struct ext4_buddy *e4b)
779{
780 int count;
781 int order = 1;
782 void *buddy;
783
784 while ((buddy = mb_find_buddy(e4b, order++, &count))) {
785 ext4_set_bits(buddy, 0, count);
786 }
787 e4b->bd_info->bb_fragments = 0;
788 memset(e4b->bd_info->bb_counters, 0,
789 sizeof(*e4b->bd_info->bb_counters) *
790 (e4b->bd_sb->s_blocksize_bits + 2));
791
792 ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
793 e4b->bd_bitmap, e4b->bd_group);
794}
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
817{
818 ext4_group_t ngroups;
819 int blocksize;
820 int blocks_per_page;
821 int groups_per_page;
822 int err = 0;
823 int i;
824 ext4_group_t first_group, group;
825 int first_block;
826 struct super_block *sb;
827 struct buffer_head *bhs;
828 struct buffer_head **bh = NULL;
829 struct inode *inode;
830 char *data;
831 char *bitmap;
832 struct ext4_group_info *grinfo;
833
834 mb_debug(1, "init page %lu\n", page->index);
835
836 inode = page->mapping->host;
837 sb = inode->i_sb;
838 ngroups = ext4_get_groups_count(sb);
839 blocksize = i_blocksize(inode);
840 blocks_per_page = PAGE_SIZE / blocksize;
841
842 groups_per_page = blocks_per_page >> 1;
843 if (groups_per_page == 0)
844 groups_per_page = 1;
845
846
847 if (groups_per_page > 1) {
848 i = sizeof(struct buffer_head *) * groups_per_page;
849 bh = kzalloc(i, gfp);
850 if (bh == NULL) {
851 err = -ENOMEM;
852 goto out;
853 }
854 } else
855 bh = &bhs;
856
857 first_group = page->index * blocks_per_page / 2;
858
859
860 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
861 if (group >= ngroups)
862 break;
863
864 grinfo = ext4_get_group_info(sb, group);
865
866
867
868
869
870
871 if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
872 bh[i] = NULL;
873 continue;
874 }
875 bh[i] = ext4_read_block_bitmap_nowait(sb, group);
876 if (IS_ERR(bh[i])) {
877 err = PTR_ERR(bh[i]);
878 bh[i] = NULL;
879 goto out;
880 }
881 mb_debug(1, "read bitmap for group %u\n", group);
882 }
883
884
885 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
886 int err2;
887
888 if (!bh[i])
889 continue;
890 err2 = ext4_wait_block_bitmap(sb, group, bh[i]);
891 if (!err)
892 err = err2;
893 }
894
895 first_block = page->index * blocks_per_page;
896 for (i = 0; i < blocks_per_page; i++) {
897 group = (first_block + i) >> 1;
898 if (group >= ngroups)
899 break;
900
901 if (!bh[group - first_group])
902
903 continue;
904
905 if (!buffer_verified(bh[group - first_group]))
906
907 continue;
908 err = 0;
909
910
911
912
913
914
915
916 data = page_address(page) + (i * blocksize);
917 bitmap = bh[group - first_group]->b_data;
918
919
920
921
922
923 if ((first_block + i) & 1) {
924
925 BUG_ON(incore == NULL);
926 mb_debug(1, "put buddy for group %u in page %lu/%x\n",
927 group, page->index, i * blocksize);
928 trace_ext4_mb_buddy_bitmap_load(sb, group);
929 grinfo = ext4_get_group_info(sb, group);
930 grinfo->bb_fragments = 0;
931 memset(grinfo->bb_counters, 0,
932 sizeof(*grinfo->bb_counters) *
933 (sb->s_blocksize_bits+2));
934
935
936
937 ext4_lock_group(sb, group);
938
939 memset(data, 0xff, blocksize);
940 ext4_mb_generate_buddy(sb, data, incore, group);
941 ext4_unlock_group(sb, group);
942 incore = NULL;
943 } else {
944
945 BUG_ON(incore != NULL);
946 mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
947 group, page->index, i * blocksize);
948 trace_ext4_mb_bitmap_load(sb, group);
949
950
951 ext4_lock_group(sb, group);
952 memcpy(data, bitmap, blocksize);
953
954
955 ext4_mb_generate_from_pa(sb, data, group);
956 ext4_mb_generate_from_freelist(sb, data, group);
957 ext4_unlock_group(sb, group);
958
959
960
961
962 incore = data;
963 }
964 }
965 SetPageUptodate(page);
966
967out:
968 if (bh) {
969 for (i = 0; i < groups_per_page; i++)
970 brelse(bh[i]);
971 if (bh != &bhs)
972 kfree(bh);
973 }
974 return err;
975}
976
977
978
979
980
981
982
983static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
984 ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
985{
986 struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
987 int block, pnum, poff;
988 int blocks_per_page;
989 struct page *page;
990
991 e4b->bd_buddy_page = NULL;
992 e4b->bd_bitmap_page = NULL;
993
994 blocks_per_page = PAGE_SIZE / sb->s_blocksize;
995
996
997
998
999
1000 block = group * 2;
1001 pnum = block / blocks_per_page;
1002 poff = block % blocks_per_page;
1003 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1004 if (!page)
1005 return -ENOMEM;
1006 BUG_ON(page->mapping != inode->i_mapping);
1007 e4b->bd_bitmap_page = page;
1008 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1009
1010 if (blocks_per_page >= 2) {
1011
1012 return 0;
1013 }
1014
1015 block++;
1016 pnum = block / blocks_per_page;
1017 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1018 if (!page)
1019 return -ENOMEM;
1020 BUG_ON(page->mapping != inode->i_mapping);
1021 e4b->bd_buddy_page = page;
1022 return 0;
1023}
1024
1025static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
1026{
1027 if (e4b->bd_bitmap_page) {
1028 unlock_page(e4b->bd_bitmap_page);
1029 put_page(e4b->bd_bitmap_page);
1030 }
1031 if (e4b->bd_buddy_page) {
1032 unlock_page(e4b->bd_buddy_page);
1033 put_page(e4b->bd_buddy_page);
1034 }
1035}
1036
1037
1038
1039
1040
1041
1042static noinline_for_stack
1043int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
1044{
1045
1046 struct ext4_group_info *this_grp;
1047 struct ext4_buddy e4b;
1048 struct page *page;
1049 int ret = 0;
1050
1051 might_sleep();
1052 mb_debug(1, "init group %u\n", group);
1053 this_grp = ext4_get_group_info(sb, group);
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063 ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp);
1064 if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
1065
1066
1067
1068
1069 goto err;
1070 }
1071
1072 page = e4b.bd_bitmap_page;
1073 ret = ext4_mb_init_cache(page, NULL, gfp);
1074 if (ret)
1075 goto err;
1076 if (!PageUptodate(page)) {
1077 ret = -EIO;
1078 goto err;
1079 }
1080
1081 if (e4b.bd_buddy_page == NULL) {
1082
1083
1084
1085
1086
1087 ret = 0;
1088 goto err;
1089 }
1090
1091 page = e4b.bd_buddy_page;
1092 ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
1093 if (ret)
1094 goto err;
1095 if (!PageUptodate(page)) {
1096 ret = -EIO;
1097 goto err;
1098 }
1099err:
1100 ext4_mb_put_buddy_page_lock(&e4b);
1101 return ret;
1102}
1103
1104
1105
1106
1107
1108
1109static noinline_for_stack int
1110ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
1111 struct ext4_buddy *e4b, gfp_t gfp)
1112{
1113 int blocks_per_page;
1114 int block;
1115 int pnum;
1116 int poff;
1117 struct page *page;
1118 int ret;
1119 struct ext4_group_info *grp;
1120 struct ext4_sb_info *sbi = EXT4_SB(sb);
1121 struct inode *inode = sbi->s_buddy_cache;
1122
1123 might_sleep();
1124 mb_debug(1, "load group %u\n", group);
1125
1126 blocks_per_page = PAGE_SIZE / sb->s_blocksize;
1127 grp = ext4_get_group_info(sb, group);
1128
1129 e4b->bd_blkbits = sb->s_blocksize_bits;
1130 e4b->bd_info = grp;
1131 e4b->bd_sb = sb;
1132 e4b->bd_group = group;
1133 e4b->bd_buddy_page = NULL;
1134 e4b->bd_bitmap_page = NULL;
1135
1136 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1137
1138
1139
1140
1141 ret = ext4_mb_init_group(sb, group, gfp);
1142 if (ret)
1143 return ret;
1144 }
1145
1146
1147
1148
1149
1150
1151 block = group * 2;
1152 pnum = block / blocks_per_page;
1153 poff = block % blocks_per_page;
1154
1155
1156
1157 page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
1158 if (page == NULL || !PageUptodate(page)) {
1159 if (page)
1160
1161
1162
1163
1164
1165
1166
1167
1168 put_page(page);
1169 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1170 if (page) {
1171 BUG_ON(page->mapping != inode->i_mapping);
1172 if (!PageUptodate(page)) {
1173 ret = ext4_mb_init_cache(page, NULL, gfp);
1174 if (ret) {
1175 unlock_page(page);
1176 goto err;
1177 }
1178 mb_cmp_bitmaps(e4b, page_address(page) +
1179 (poff * sb->s_blocksize));
1180 }
1181 unlock_page(page);
1182 }
1183 }
1184 if (page == NULL) {
1185 ret = -ENOMEM;
1186 goto err;
1187 }
1188 if (!PageUptodate(page)) {
1189 ret = -EIO;
1190 goto err;
1191 }
1192
1193
1194 e4b->bd_bitmap_page = page;
1195 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1196
1197 block++;
1198 pnum = block / blocks_per_page;
1199 poff = block % blocks_per_page;
1200
1201 page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
1202 if (page == NULL || !PageUptodate(page)) {
1203 if (page)
1204 put_page(page);
1205 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1206 if (page) {
1207 BUG_ON(page->mapping != inode->i_mapping);
1208 if (!PageUptodate(page)) {
1209 ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
1210 gfp);
1211 if (ret) {
1212 unlock_page(page);
1213 goto err;
1214 }
1215 }
1216 unlock_page(page);
1217 }
1218 }
1219 if (page == NULL) {
1220 ret = -ENOMEM;
1221 goto err;
1222 }
1223 if (!PageUptodate(page)) {
1224 ret = -EIO;
1225 goto err;
1226 }
1227
1228
1229 e4b->bd_buddy_page = page;
1230 e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
1231
1232 BUG_ON(e4b->bd_bitmap_page == NULL);
1233 BUG_ON(e4b->bd_buddy_page == NULL);
1234
1235 return 0;
1236
1237err:
1238 if (page)
1239 put_page(page);
1240 if (e4b->bd_bitmap_page)
1241 put_page(e4b->bd_bitmap_page);
1242 if (e4b->bd_buddy_page)
1243 put_page(e4b->bd_buddy_page);
1244 e4b->bd_buddy = NULL;
1245 e4b->bd_bitmap = NULL;
1246 return ret;
1247}
1248
1249static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1250 struct ext4_buddy *e4b)
1251{
1252 return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS);
1253}
1254
1255static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
1256{
1257 if (e4b->bd_bitmap_page)
1258 put_page(e4b->bd_bitmap_page);
1259 if (e4b->bd_buddy_page)
1260 put_page(e4b->bd_buddy_page);
1261}
1262
1263
1264static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1265{
1266 int order = 1;
1267 int bb_incr = 1 << (e4b->bd_blkbits - 1);
1268 void *bb;
1269
1270 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
1271 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
1272
1273 bb = e4b->bd_buddy;
1274 while (order <= e4b->bd_blkbits + 1) {
1275 block = block >> 1;
1276 if (!mb_test_bit(block, bb)) {
1277
1278 return order;
1279 }
1280 bb += bb_incr;
1281 bb_incr >>= 1;
1282 order++;
1283 }
1284 return 0;
1285}
1286
1287static void mb_clear_bits(void *bm, int cur, int len)
1288{
1289 __u32 *addr;
1290
1291 len = cur + len;
1292 while (cur < len) {
1293 if ((cur & 31) == 0 && (len - cur) >= 32) {
1294
1295 addr = bm + (cur >> 3);
1296 *addr = 0;
1297 cur += 32;
1298 continue;
1299 }
1300 mb_clear_bit(cur, bm);
1301 cur++;
1302 }
1303}
1304
1305
1306
1307
1308static int mb_test_and_clear_bits(void *bm, int cur, int len)
1309{
1310 __u32 *addr;
1311 int zero_bit = -1;
1312
1313 len = cur + len;
1314 while (cur < len) {
1315 if ((cur & 31) == 0 && (len - cur) >= 32) {
1316
1317 addr = bm + (cur >> 3);
1318 if (*addr != (__u32)(-1) && zero_bit == -1)
1319 zero_bit = cur + mb_find_next_zero_bit(addr, 32, 0);
1320 *addr = 0;
1321 cur += 32;
1322 continue;
1323 }
1324 if (!mb_test_and_clear_bit(cur, bm) && zero_bit == -1)
1325 zero_bit = cur;
1326 cur++;
1327 }
1328
1329 return zero_bit;
1330}
1331
1332void ext4_set_bits(void *bm, int cur, int len)
1333{
1334 __u32 *addr;
1335
1336 len = cur + len;
1337 while (cur < len) {
1338 if ((cur & 31) == 0 && (len - cur) >= 32) {
1339
1340 addr = bm + (cur >> 3);
1341 *addr = 0xffffffff;
1342 cur += 32;
1343 continue;
1344 }
1345 mb_set_bit(cur, bm);
1346 cur++;
1347 }
1348}
1349
1350
1351
1352
1353static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side)
1354{
1355 if (mb_test_bit(*bit + side, bitmap)) {
1356 mb_clear_bit(*bit, bitmap);
1357 (*bit) -= side;
1358 return 1;
1359 }
1360 else {
1361 (*bit) += side;
1362 mb_set_bit(*bit, bitmap);
1363 return -1;
1364 }
1365}
1366
1367static void mb_buddy_mark_free(struct ext4_buddy *e4b, int first, int last)
1368{
1369 int max;
1370 int order = 1;
1371 void *buddy = mb_find_buddy(e4b, order, &max);
1372
1373 while (buddy) {
1374 void *buddy2;
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405 if (first & 1)
1406 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&first, buddy, -1);
1407 if (!(last & 1))
1408 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&last, buddy, 1);
1409 if (first > last)
1410 break;
1411 order++;
1412
1413 if (first == last || !(buddy2 = mb_find_buddy(e4b, order, &max))) {
1414 mb_clear_bits(buddy, first, last - first + 1);
1415 e4b->bd_info->bb_counters[order - 1] += last - first + 1;
1416 break;
1417 }
1418 first >>= 1;
1419 last >>= 1;
1420 buddy = buddy2;
1421 }
1422}
1423
1424static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1425 int first, int count)
1426{
1427 int left_is_free = 0;
1428 int right_is_free = 0;
1429 int block;
1430 int last = first + count - 1;
1431 struct super_block *sb = e4b->bd_sb;
1432
1433 if (WARN_ON(count == 0))
1434 return;
1435 BUG_ON(last >= (sb->s_blocksize << 3));
1436 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1437
1438 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
1439 return;
1440
1441 mb_check_buddy(e4b);
1442 mb_free_blocks_double(inode, e4b, first, count);
1443
1444 e4b->bd_info->bb_free += count;
1445 if (first < e4b->bd_info->bb_first_free)
1446 e4b->bd_info->bb_first_free = first;
1447
1448
1449
1450
1451 if (first != 0)
1452 left_is_free = !mb_test_bit(first - 1, e4b->bd_bitmap);
1453 block = mb_test_and_clear_bits(e4b->bd_bitmap, first, count);
1454 if (last + 1 < EXT4_SB(sb)->s_mb_maxs[0])
1455 right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
1456
1457 if (unlikely(block != -1)) {
1458 struct ext4_sb_info *sbi = EXT4_SB(sb);
1459 ext4_fsblk_t blocknr;
1460
1461 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1462 blocknr += EXT4_C2B(EXT4_SB(sb), block);
1463 ext4_grp_locked_error(sb, e4b->bd_group,
1464 inode ? inode->i_ino : 0,
1465 blocknr,
1466 "freeing already freed block "
1467 "(bit %u); block bitmap corrupt.",
1468 block);
1469 if (!EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))
1470 percpu_counter_sub(&sbi->s_freeclusters_counter,
1471 e4b->bd_info->bb_free);
1472
1473 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
1474 &e4b->bd_info->bb_state);
1475 mb_regenerate_buddy(e4b);
1476 goto done;
1477 }
1478
1479
1480 if (left_is_free && right_is_free)
1481 e4b->bd_info->bb_fragments--;
1482 else if (!left_is_free && !right_is_free)
1483 e4b->bd_info->bb_fragments++;
1484
1485
1486
1487
1488
1489
1490
1491 if (first & 1) {
1492 first += !left_is_free;
1493 e4b->bd_info->bb_counters[0] += left_is_free ? -1 : 1;
1494 }
1495 if (!(last & 1)) {
1496 last -= !right_is_free;
1497 e4b->bd_info->bb_counters[0] += right_is_free ? -1 : 1;
1498 }
1499
1500 if (first <= last)
1501 mb_buddy_mark_free(e4b, first >> 1, last >> 1);
1502
1503done:
1504 mb_set_largest_free_order(sb, e4b->bd_info);
1505 mb_check_buddy(e4b);
1506}
1507
1508static int mb_find_extent(struct ext4_buddy *e4b, int block,
1509 int needed, struct ext4_free_extent *ex)
1510{
1511 int next = block;
1512 int max, order;
1513 void *buddy;
1514
1515 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1516 BUG_ON(ex == NULL);
1517
1518 buddy = mb_find_buddy(e4b, 0, &max);
1519 BUG_ON(buddy == NULL);
1520 BUG_ON(block >= max);
1521 if (mb_test_bit(block, buddy)) {
1522 ex->fe_len = 0;
1523 ex->fe_start = 0;
1524 ex->fe_group = 0;
1525 return 0;
1526 }
1527
1528
1529 order = mb_find_order_for_block(e4b, block);
1530 block = block >> order;
1531
1532 ex->fe_len = 1 << order;
1533 ex->fe_start = block << order;
1534 ex->fe_group = e4b->bd_group;
1535
1536
1537 next = next - ex->fe_start;
1538 ex->fe_len -= next;
1539 ex->fe_start += next;
1540
1541 while (needed > ex->fe_len &&
1542 mb_find_buddy(e4b, order, &max)) {
1543
1544 if (block + 1 >= max)
1545 break;
1546
1547 next = (block + 1) * (1 << order);
1548 if (mb_test_bit(next, e4b->bd_bitmap))
1549 break;
1550
1551 order = mb_find_order_for_block(e4b, next);
1552
1553 block = next >> order;
1554 ex->fe_len += 1 << order;
1555 }
1556
1557 if (ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3))) {
1558
1559 WARN_ON(1);
1560 ext4_error(e4b->bd_sb, "corruption or bug in mb_find_extent "
1561 "block=%d, order=%d needed=%d ex=%u/%d/%d@%u",
1562 block, order, needed, ex->fe_group, ex->fe_start,
1563 ex->fe_len, ex->fe_logical);
1564 ex->fe_len = 0;
1565 ex->fe_start = 0;
1566 ex->fe_group = 0;
1567 }
1568 return ex->fe_len;
1569}
1570
1571static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1572{
1573 int ord;
1574 int mlen = 0;
1575 int max = 0;
1576 int cur;
1577 int start = ex->fe_start;
1578 int len = ex->fe_len;
1579 unsigned ret = 0;
1580 int len0 = len;
1581 void *buddy;
1582
1583 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
1584 BUG_ON(e4b->bd_group != ex->fe_group);
1585 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1586 mb_check_buddy(e4b);
1587 mb_mark_used_double(e4b, start, len);
1588
1589 e4b->bd_info->bb_free -= len;
1590 if (e4b->bd_info->bb_first_free == start)
1591 e4b->bd_info->bb_first_free += len;
1592
1593
1594 if (start != 0)
1595 mlen = !mb_test_bit(start - 1, e4b->bd_bitmap);
1596 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
1597 max = !mb_test_bit(start + len, e4b->bd_bitmap);
1598 if (mlen && max)
1599 e4b->bd_info->bb_fragments++;
1600 else if (!mlen && !max)
1601 e4b->bd_info->bb_fragments--;
1602
1603
1604 while (len) {
1605 ord = mb_find_order_for_block(e4b, start);
1606
1607 if (((start >> ord) << ord) == start && len >= (1 << ord)) {
1608
1609 mlen = 1 << ord;
1610 buddy = mb_find_buddy(e4b, ord, &max);
1611 BUG_ON((start >> ord) >= max);
1612 mb_set_bit(start >> ord, buddy);
1613 e4b->bd_info->bb_counters[ord]--;
1614 start += mlen;
1615 len -= mlen;
1616 BUG_ON(len < 0);
1617 continue;
1618 }
1619
1620
1621 if (ret == 0)
1622 ret = len | (ord << 16);
1623
1624
1625 BUG_ON(ord <= 0);
1626 buddy = mb_find_buddy(e4b, ord, &max);
1627 mb_set_bit(start >> ord, buddy);
1628 e4b->bd_info->bb_counters[ord]--;
1629
1630 ord--;
1631 cur = (start >> ord) & ~1U;
1632 buddy = mb_find_buddy(e4b, ord, &max);
1633 mb_clear_bit(cur, buddy);
1634 mb_clear_bit(cur + 1, buddy);
1635 e4b->bd_info->bb_counters[ord]++;
1636 e4b->bd_info->bb_counters[ord]++;
1637 }
1638 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
1639
1640 ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
1641 mb_check_buddy(e4b);
1642
1643 return ret;
1644}
1645
1646
1647
1648
1649static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
1650 struct ext4_buddy *e4b)
1651{
1652 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1653 int ret;
1654
1655 BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
1656 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1657
1658 ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
1659 ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
1660 ret = mb_mark_used(e4b, &ac->ac_b_ex);
1661
1662
1663
1664 ac->ac_f_ex = ac->ac_b_ex;
1665
1666 ac->ac_status = AC_STATUS_FOUND;
1667 ac->ac_tail = ret & 0xffff;
1668 ac->ac_buddy = ret >> 16;
1669
1670
1671
1672
1673
1674
1675
1676
1677 ac->ac_bitmap_page = e4b->bd_bitmap_page;
1678 get_page(ac->ac_bitmap_page);
1679 ac->ac_buddy_page = e4b->bd_buddy_page;
1680 get_page(ac->ac_buddy_page);
1681
1682 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
1683 spin_lock(&sbi->s_md_lock);
1684 sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
1685 sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
1686 spin_unlock(&sbi->s_md_lock);
1687 }
1688}
1689
1690
1691
1692
1693
1694static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
1695 struct ext4_buddy *e4b,
1696 int finish_group)
1697{
1698 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1699 struct ext4_free_extent *bex = &ac->ac_b_ex;
1700 struct ext4_free_extent *gex = &ac->ac_g_ex;
1701 struct ext4_free_extent ex;
1702 int max;
1703
1704 if (ac->ac_status == AC_STATUS_FOUND)
1705 return;
1706
1707
1708
1709 if (ac->ac_found > sbi->s_mb_max_to_scan &&
1710 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1711 ac->ac_status = AC_STATUS_BREAK;
1712 return;
1713 }
1714
1715
1716
1717
1718 if (bex->fe_len < gex->fe_len)
1719 return;
1720
1721 if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
1722 && bex->fe_group == e4b->bd_group) {
1723
1724
1725
1726 max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
1727 if (max >= gex->fe_len) {
1728 ext4_mb_use_best_found(ac, e4b);
1729 return;
1730 }
1731 }
1732}
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
1745 struct ext4_free_extent *ex,
1746 struct ext4_buddy *e4b)
1747{
1748 struct ext4_free_extent *bex = &ac->ac_b_ex;
1749 struct ext4_free_extent *gex = &ac->ac_g_ex;
1750
1751 BUG_ON(ex->fe_len <= 0);
1752 BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1753 BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1754 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
1755
1756 ac->ac_found++;
1757
1758
1759
1760
1761 if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1762 *bex = *ex;
1763 ext4_mb_use_best_found(ac, e4b);
1764 return;
1765 }
1766
1767
1768
1769
1770 if (ex->fe_len == gex->fe_len) {
1771 *bex = *ex;
1772 ext4_mb_use_best_found(ac, e4b);
1773 return;
1774 }
1775
1776
1777
1778
1779 if (bex->fe_len == 0) {
1780 *bex = *ex;
1781 return;
1782 }
1783
1784
1785
1786
1787 if (bex->fe_len < gex->fe_len) {
1788
1789
1790 if (ex->fe_len > bex->fe_len)
1791 *bex = *ex;
1792 } else if (ex->fe_len > gex->fe_len) {
1793
1794
1795
1796 if (ex->fe_len < bex->fe_len)
1797 *bex = *ex;
1798 }
1799
1800 ext4_mb_check_limits(ac, e4b, 0);
1801}
1802
1803static noinline_for_stack
1804int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
1805 struct ext4_buddy *e4b)
1806{
1807 struct ext4_free_extent ex = ac->ac_b_ex;
1808 ext4_group_t group = ex.fe_group;
1809 int max;
1810 int err;
1811
1812 BUG_ON(ex.fe_len <= 0);
1813 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1814 if (err)
1815 return err;
1816
1817 ext4_lock_group(ac->ac_sb, group);
1818 max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
1819
1820 if (max > 0) {
1821 ac->ac_b_ex = ex;
1822 ext4_mb_use_best_found(ac, e4b);
1823 }
1824
1825 ext4_unlock_group(ac->ac_sb, group);
1826 ext4_mb_unload_buddy(e4b);
1827
1828 return 0;
1829}
1830
1831static noinline_for_stack
1832int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1833 struct ext4_buddy *e4b)
1834{
1835 ext4_group_t group = ac->ac_g_ex.fe_group;
1836 int max;
1837 int err;
1838 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1839 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1840 struct ext4_free_extent ex;
1841
1842 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
1843 return 0;
1844 if (grp->bb_free == 0)
1845 return 0;
1846
1847 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1848 if (err)
1849 return err;
1850
1851 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
1852 ext4_mb_unload_buddy(e4b);
1853 return 0;
1854 }
1855
1856 ext4_lock_group(ac->ac_sb, group);
1857 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
1858 ac->ac_g_ex.fe_len, &ex);
1859 ex.fe_logical = 0xDEADFA11;
1860
1861 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
1862 ext4_fsblk_t start;
1863
1864 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
1865 ex.fe_start;
1866
1867 if (do_div(start, sbi->s_stripe) == 0) {
1868 ac->ac_found++;
1869 ac->ac_b_ex = ex;
1870 ext4_mb_use_best_found(ac, e4b);
1871 }
1872 } else if (max >= ac->ac_g_ex.fe_len) {
1873 BUG_ON(ex.fe_len <= 0);
1874 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1875 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1876 ac->ac_found++;
1877 ac->ac_b_ex = ex;
1878 ext4_mb_use_best_found(ac, e4b);
1879 } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
1880
1881
1882 BUG_ON(ex.fe_len <= 0);
1883 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1884 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1885 ac->ac_found++;
1886 ac->ac_b_ex = ex;
1887 ext4_mb_use_best_found(ac, e4b);
1888 }
1889 ext4_unlock_group(ac->ac_sb, group);
1890 ext4_mb_unload_buddy(e4b);
1891
1892 return 0;
1893}
1894
1895
1896
1897
1898
1899static noinline_for_stack
1900void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
1901 struct ext4_buddy *e4b)
1902{
1903 struct super_block *sb = ac->ac_sb;
1904 struct ext4_group_info *grp = e4b->bd_info;
1905 void *buddy;
1906 int i;
1907 int k;
1908 int max;
1909
1910 BUG_ON(ac->ac_2order <= 0);
1911 for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
1912 if (grp->bb_counters[i] == 0)
1913 continue;
1914
1915 buddy = mb_find_buddy(e4b, i, &max);
1916 BUG_ON(buddy == NULL);
1917
1918 k = mb_find_next_zero_bit(buddy, max, 0);
1919 BUG_ON(k >= max);
1920
1921 ac->ac_found++;
1922
1923 ac->ac_b_ex.fe_len = 1 << i;
1924 ac->ac_b_ex.fe_start = k << i;
1925 ac->ac_b_ex.fe_group = e4b->bd_group;
1926
1927 ext4_mb_use_best_found(ac, e4b);
1928
1929 BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len);
1930
1931 if (EXT4_SB(sb)->s_mb_stats)
1932 atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
1933
1934 break;
1935 }
1936}
1937
1938
1939
1940
1941
1942
1943static noinline_for_stack
1944void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1945 struct ext4_buddy *e4b)
1946{
1947 struct super_block *sb = ac->ac_sb;
1948 void *bitmap = e4b->bd_bitmap;
1949 struct ext4_free_extent ex;
1950 int i;
1951 int free;
1952
1953 free = e4b->bd_info->bb_free;
1954 BUG_ON(free <= 0);
1955
1956 i = e4b->bd_info->bb_first_free;
1957
1958 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
1959 i = mb_find_next_zero_bit(bitmap,
1960 EXT4_CLUSTERS_PER_GROUP(sb), i);
1961 if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
1962
1963
1964
1965
1966
1967 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1968 "%d free clusters as per "
1969 "group info. But bitmap says 0",
1970 free);
1971 break;
1972 }
1973
1974 mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
1975 BUG_ON(ex.fe_len <= 0);
1976 if (free < ex.fe_len) {
1977 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1978 "%d free clusters as per "
1979 "group info. But got %d blocks",
1980 free, ex.fe_len);
1981
1982
1983
1984
1985
1986 break;
1987 }
1988 ex.fe_logical = 0xDEADC0DE;
1989 ext4_mb_measure_extent(ac, &ex, e4b);
1990
1991 i += ex.fe_len;
1992 free -= ex.fe_len;
1993 }
1994
1995 ext4_mb_check_limits(ac, e4b, 1);
1996}
1997
1998
1999
2000
2001
2002static noinline_for_stack
2003void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
2004 struct ext4_buddy *e4b)
2005{
2006 struct super_block *sb = ac->ac_sb;
2007 struct ext4_sb_info *sbi = EXT4_SB(sb);
2008 void *bitmap = e4b->bd_bitmap;
2009 struct ext4_free_extent ex;
2010 ext4_fsblk_t first_group_block;
2011 ext4_fsblk_t a;
2012 ext4_grpblk_t i;
2013 int max;
2014
2015 BUG_ON(sbi->s_stripe == 0);
2016
2017
2018 first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
2019
2020 a = first_group_block + sbi->s_stripe - 1;
2021 do_div(a, sbi->s_stripe);
2022 i = (a * sbi->s_stripe) - first_group_block;
2023
2024 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
2025 if (!mb_test_bit(i, bitmap)) {
2026 max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
2027 if (max >= sbi->s_stripe) {
2028 ac->ac_found++;
2029 ex.fe_logical = 0xDEADF00D;
2030 ac->ac_b_ex = ex;
2031 ext4_mb_use_best_found(ac, e4b);
2032 break;
2033 }
2034 }
2035 i += sbi->s_stripe;
2036 }
2037}
2038
2039
2040
2041
2042
2043
2044
2045static int ext4_mb_good_group(struct ext4_allocation_context *ac,
2046 ext4_group_t group, int cr)
2047{
2048 unsigned free, fragments;
2049 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
2050 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
2051
2052 BUG_ON(cr < 0 || cr >= 4);
2053
2054 free = grp->bb_free;
2055 if (free == 0)
2056 return 0;
2057 if (cr <= 2 && free < ac->ac_g_ex.fe_len)
2058 return 0;
2059
2060 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
2061 return 0;
2062
2063
2064 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
2065 int ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
2066 if (ret)
2067 return ret;
2068 }
2069
2070 fragments = grp->bb_fragments;
2071 if (fragments == 0)
2072 return 0;
2073
2074 switch (cr) {
2075 case 0:
2076 BUG_ON(ac->ac_2order == 0);
2077
2078
2079 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
2080 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
2081 ((group % flex_size) == 0))
2082 return 0;
2083
2084 if ((ac->ac_2order > ac->ac_sb->s_blocksize_bits+1) ||
2085 (free / fragments) >= ac->ac_g_ex.fe_len)
2086 return 1;
2087
2088 if (grp->bb_largest_free_order < ac->ac_2order)
2089 return 0;
2090
2091 return 1;
2092 case 1:
2093 if ((free / fragments) >= ac->ac_g_ex.fe_len)
2094 return 1;
2095 break;
2096 case 2:
2097 if (free >= ac->ac_g_ex.fe_len)
2098 return 1;
2099 break;
2100 case 3:
2101 return 1;
2102 default:
2103 BUG();
2104 }
2105
2106 return 0;
2107}
2108
2109static noinline_for_stack int
2110ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
2111{
2112 ext4_group_t ngroups, group, i;
2113 int cr;
2114 int err = 0, first_err = 0;
2115 struct ext4_sb_info *sbi;
2116 struct super_block *sb;
2117 struct ext4_buddy e4b;
2118
2119 sb = ac->ac_sb;
2120 sbi = EXT4_SB(sb);
2121 ngroups = ext4_get_groups_count(sb);
2122
2123 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
2124 ngroups = sbi->s_blockfile_groups;
2125
2126 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
2127
2128
2129 err = ext4_mb_find_by_goal(ac, &e4b);
2130 if (err || ac->ac_status == AC_STATUS_FOUND)
2131 goto out;
2132
2133 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
2134 goto out;
2135
2136
2137
2138
2139
2140
2141 i = fls(ac->ac_g_ex.fe_len);
2142 ac->ac_2order = 0;
2143
2144
2145
2146
2147
2148
2149
2150 if (i >= sbi->s_mb_order2_reqs && i <= sb->s_blocksize_bits + 2) {
2151
2152
2153
2154 if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
2155 ac->ac_2order = i - 1;
2156 }
2157
2158
2159 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
2160
2161 spin_lock(&sbi->s_md_lock);
2162 ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
2163 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
2164 spin_unlock(&sbi->s_md_lock);
2165 }
2166
2167
2168 cr = ac->ac_2order ? 0 : 1;
2169
2170
2171
2172
2173repeat:
2174 for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
2175 ac->ac_criteria = cr;
2176
2177
2178
2179
2180 group = ac->ac_g_ex.fe_group;
2181
2182 for (i = 0; i < ngroups; group++, i++) {
2183 int ret = 0;
2184 cond_resched();
2185
2186
2187
2188
2189 if (group >= ngroups)
2190 group = 0;
2191
2192
2193 ret = ext4_mb_good_group(ac, group, cr);
2194 if (ret <= 0) {
2195 if (!first_err)
2196 first_err = ret;
2197 continue;
2198 }
2199
2200 err = ext4_mb_load_buddy(sb, group, &e4b);
2201 if (err)
2202 goto out;
2203
2204 ext4_lock_group(sb, group);
2205
2206
2207
2208
2209
2210 ret = ext4_mb_good_group(ac, group, cr);
2211 if (ret <= 0) {
2212 ext4_unlock_group(sb, group);
2213 ext4_mb_unload_buddy(&e4b);
2214 if (!first_err)
2215 first_err = ret;
2216 continue;
2217 }
2218
2219 ac->ac_groups_scanned++;
2220 if (cr == 0)
2221 ext4_mb_simple_scan_group(ac, &e4b);
2222 else if (cr == 1 && sbi->s_stripe &&
2223 !(ac->ac_g_ex.fe_len % sbi->s_stripe))
2224 ext4_mb_scan_aligned(ac, &e4b);
2225 else
2226 ext4_mb_complex_scan_group(ac, &e4b);
2227
2228 ext4_unlock_group(sb, group);
2229 ext4_mb_unload_buddy(&e4b);
2230
2231 if (ac->ac_status != AC_STATUS_CONTINUE)
2232 break;
2233 }
2234 }
2235
2236 if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
2237 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2238
2239
2240
2241
2242
2243 ext4_mb_try_best_found(ac, &e4b);
2244 if (ac->ac_status != AC_STATUS_FOUND) {
2245
2246
2247
2248
2249
2250
2251 ac->ac_b_ex.fe_group = 0;
2252 ac->ac_b_ex.fe_start = 0;
2253 ac->ac_b_ex.fe_len = 0;
2254 ac->ac_status = AC_STATUS_CONTINUE;
2255 ac->ac_flags |= EXT4_MB_HINT_FIRST;
2256 cr = 3;
2257 atomic_inc(&sbi->s_mb_lost_chunks);
2258 goto repeat;
2259 }
2260 }
2261out:
2262 if (!err && ac->ac_status != AC_STATUS_FOUND && first_err)
2263 err = first_err;
2264 return err;
2265}
2266
2267static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2268{
2269 struct super_block *sb = seq->private;
2270 ext4_group_t group;
2271
2272 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2273 return NULL;
2274 group = *pos + 1;
2275 return (void *) ((unsigned long) group);
2276}
2277
2278static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2279{
2280 struct super_block *sb = seq->private;
2281 ext4_group_t group;
2282
2283 ++*pos;
2284 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2285 return NULL;
2286 group = *pos + 1;
2287 return (void *) ((unsigned long) group);
2288}
2289
2290static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2291{
2292 struct super_block *sb = seq->private;
2293 ext4_group_t group = (ext4_group_t) ((unsigned long) v);
2294 int i;
2295 int err, buddy_loaded = 0;
2296 struct ext4_buddy e4b;
2297 struct ext4_group_info *grinfo;
2298 unsigned char blocksize_bits = min_t(unsigned char,
2299 sb->s_blocksize_bits,
2300 EXT4_MAX_BLOCK_LOG_SIZE);
2301 struct sg {
2302 struct ext4_group_info info;
2303 ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2];
2304 } sg;
2305
2306 group--;
2307 if (group == 0)
2308 seq_puts(seq, "#group: free frags first ["
2309 " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 "
2310 " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n");
2311
2312 i = (blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
2313 sizeof(struct ext4_group_info);
2314
2315 grinfo = ext4_get_group_info(sb, group);
2316
2317 if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
2318 err = ext4_mb_load_buddy(sb, group, &e4b);
2319 if (err) {
2320 seq_printf(seq, "#%-5u: I/O error\n", group);
2321 return 0;
2322 }
2323 buddy_loaded = 1;
2324 }
2325
2326 memcpy(&sg, ext4_get_group_info(sb, group), i);
2327
2328 if (buddy_loaded)
2329 ext4_mb_unload_buddy(&e4b);
2330
2331 seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
2332 sg.info.bb_fragments, sg.info.bb_first_free);
2333 for (i = 0; i <= 13; i++)
2334 seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ?
2335 sg.info.bb_counters[i] : 0);
2336 seq_printf(seq, " ]\n");
2337
2338 return 0;
2339}
2340
2341static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
2342{
2343}
2344
2345static const struct seq_operations ext4_mb_seq_groups_ops = {
2346 .start = ext4_mb_seq_groups_start,
2347 .next = ext4_mb_seq_groups_next,
2348 .stop = ext4_mb_seq_groups_stop,
2349 .show = ext4_mb_seq_groups_show,
2350};
2351
2352static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
2353{
2354 struct super_block *sb = PDE_DATA(inode);
2355 int rc;
2356
2357 rc = seq_open(file, &ext4_mb_seq_groups_ops);
2358 if (rc == 0) {
2359 struct seq_file *m = file->private_data;
2360 m->private = sb;
2361 }
2362 return rc;
2363
2364}
2365
2366const struct file_operations ext4_seq_mb_groups_fops = {
2367 .open = ext4_mb_seq_groups_open,
2368 .read = seq_read,
2369 .llseek = seq_lseek,
2370 .release = seq_release,
2371};
2372
2373static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
2374{
2375 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2376 struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
2377
2378 BUG_ON(!cachep);
2379 return cachep;
2380}
2381
2382
2383
2384
2385
2386int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
2387{
2388 struct ext4_sb_info *sbi = EXT4_SB(sb);
2389 unsigned size;
2390 struct ext4_group_info ***new_groupinfo;
2391
2392 size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
2393 EXT4_DESC_PER_BLOCK_BITS(sb);
2394 if (size <= sbi->s_group_info_size)
2395 return 0;
2396
2397 size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
2398 new_groupinfo = kvzalloc(size, GFP_KERNEL);
2399 if (!new_groupinfo) {
2400 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
2401 return -ENOMEM;
2402 }
2403 if (sbi->s_group_info) {
2404 memcpy(new_groupinfo, sbi->s_group_info,
2405 sbi->s_group_info_size * sizeof(*sbi->s_group_info));
2406 kvfree(sbi->s_group_info);
2407 }
2408 sbi->s_group_info = new_groupinfo;
2409 sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
2410 ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
2411 sbi->s_group_info_size);
2412 return 0;
2413}
2414
2415
2416int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2417 struct ext4_group_desc *desc)
2418{
2419 int i;
2420 int metalen = 0;
2421 struct ext4_sb_info *sbi = EXT4_SB(sb);
2422 struct ext4_group_info **meta_group_info;
2423 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2424
2425
2426
2427
2428
2429
2430 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2431 metalen = sizeof(*meta_group_info) <<
2432 EXT4_DESC_PER_BLOCK_BITS(sb);
2433 meta_group_info = kmalloc(metalen, GFP_NOFS);
2434 if (meta_group_info == NULL) {
2435 ext4_msg(sb, KERN_ERR, "can't allocate mem "
2436 "for a buddy group");
2437 goto exit_meta_group_info;
2438 }
2439 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
2440 meta_group_info;
2441 }
2442
2443 meta_group_info =
2444 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2445 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2446
2447 meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS);
2448 if (meta_group_info[i] == NULL) {
2449 ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
2450 goto exit_group_info;
2451 }
2452 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2453 &(meta_group_info[i]->bb_state));
2454
2455
2456
2457
2458
2459 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2460 meta_group_info[i]->bb_free =
2461 ext4_free_clusters_after_init(sb, group, desc);
2462 } else {
2463 meta_group_info[i]->bb_free =
2464 ext4_free_group_clusters(sb, desc);
2465 }
2466
2467 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2468 init_rwsem(&meta_group_info[i]->alloc_sem);
2469 meta_group_info[i]->bb_free_root = RB_ROOT;
2470 meta_group_info[i]->bb_largest_free_order = -1;
2471
2472#ifdef DOUBLE_CHECK
2473 {
2474 struct buffer_head *bh;
2475 meta_group_info[i]->bb_bitmap =
2476 kmalloc(sb->s_blocksize, GFP_NOFS);
2477 BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
2478 bh = ext4_read_block_bitmap(sb, group);
2479 BUG_ON(IS_ERR_OR_NULL(bh));
2480 memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
2481 sb->s_blocksize);
2482 put_bh(bh);
2483 }
2484#endif
2485
2486 return 0;
2487
2488exit_group_info:
2489
2490 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2491 kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
2492 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL;
2493 }
2494exit_meta_group_info:
2495 return -ENOMEM;
2496}
2497
2498static int ext4_mb_init_backend(struct super_block *sb)
2499{
2500 ext4_group_t ngroups = ext4_get_groups_count(sb);
2501 ext4_group_t i;
2502 struct ext4_sb_info *sbi = EXT4_SB(sb);
2503 int err;
2504 struct ext4_group_desc *desc;
2505 struct kmem_cache *cachep;
2506
2507 err = ext4_mb_alloc_groupinfo(sb, ngroups);
2508 if (err)
2509 return err;
2510
2511 sbi->s_buddy_cache = new_inode(sb);
2512 if (sbi->s_buddy_cache == NULL) {
2513 ext4_msg(sb, KERN_ERR, "can't get new inode");
2514 goto err_freesgi;
2515 }
2516
2517
2518
2519
2520 sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
2521 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
2522 for (i = 0; i < ngroups; i++) {
2523 desc = ext4_get_group_desc(sb, i, NULL);
2524 if (desc == NULL) {
2525 ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
2526 goto err_freebuddy;
2527 }
2528 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
2529 goto err_freebuddy;
2530 }
2531
2532 return 0;
2533
2534err_freebuddy:
2535 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2536 while (i-- > 0)
2537 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
2538 i = sbi->s_group_info_size;
2539 while (i-- > 0)
2540 kfree(sbi->s_group_info[i]);
2541 iput(sbi->s_buddy_cache);
2542err_freesgi:
2543 kvfree(sbi->s_group_info);
2544 return -ENOMEM;
2545}
2546
2547static void ext4_groupinfo_destroy_slabs(void)
2548{
2549 int i;
2550
2551 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2552 if (ext4_groupinfo_caches[i])
2553 kmem_cache_destroy(ext4_groupinfo_caches[i]);
2554 ext4_groupinfo_caches[i] = NULL;
2555 }
2556}
2557
2558static int ext4_groupinfo_create_slab(size_t size)
2559{
2560 static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
2561 int slab_size;
2562 int blocksize_bits = order_base_2(size);
2563 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2564 struct kmem_cache *cachep;
2565
2566 if (cache_index >= NR_GRPINFO_CACHES)
2567 return -EINVAL;
2568
2569 if (unlikely(cache_index < 0))
2570 cache_index = 0;
2571
2572 mutex_lock(&ext4_grpinfo_slab_create_mutex);
2573 if (ext4_groupinfo_caches[cache_index]) {
2574 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2575 return 0;
2576 }
2577
2578 slab_size = offsetof(struct ext4_group_info,
2579 bb_counters[blocksize_bits + 2]);
2580
2581 cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
2582 slab_size, 0, SLAB_RECLAIM_ACCOUNT,
2583 NULL);
2584
2585 ext4_groupinfo_caches[cache_index] = cachep;
2586
2587 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2588 if (!cachep) {
2589 printk(KERN_EMERG
2590 "EXT4-fs: no memory for groupinfo slab cache\n");
2591 return -ENOMEM;
2592 }
2593
2594 return 0;
2595}
2596
2597int ext4_mb_init(struct super_block *sb)
2598{
2599 struct ext4_sb_info *sbi = EXT4_SB(sb);
2600 unsigned i, j;
2601 unsigned offset, offset_incr;
2602 unsigned max;
2603 int ret;
2604
2605 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2606
2607 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
2608 if (sbi->s_mb_offsets == NULL) {
2609 ret = -ENOMEM;
2610 goto out;
2611 }
2612
2613 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
2614 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2615 if (sbi->s_mb_maxs == NULL) {
2616 ret = -ENOMEM;
2617 goto out;
2618 }
2619
2620 ret = ext4_groupinfo_create_slab(sb->s_blocksize);
2621 if (ret < 0)
2622 goto out;
2623
2624
2625 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
2626 sbi->s_mb_offsets[0] = 0;
2627
2628 i = 1;
2629 offset = 0;
2630 offset_incr = 1 << (sb->s_blocksize_bits - 1);
2631 max = sb->s_blocksize << 2;
2632 do {
2633 sbi->s_mb_offsets[i] = offset;
2634 sbi->s_mb_maxs[i] = max;
2635 offset += offset_incr;
2636 offset_incr = offset_incr >> 1;
2637 max = max >> 1;
2638 i++;
2639 } while (i <= sb->s_blocksize_bits + 1);
2640
2641 spin_lock_init(&sbi->s_md_lock);
2642 spin_lock_init(&sbi->s_bal_lock);
2643 sbi->s_mb_free_pending = 0;
2644 INIT_LIST_HEAD(&sbi->s_freed_data_list);
2645
2646 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
2647 sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
2648 sbi->s_mb_stats = MB_DEFAULT_STATS;
2649 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
2650 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663 sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
2664 sbi->s_cluster_bits, 32);
2665
2666
2667
2668
2669
2670
2671
2672
2673 if (sbi->s_stripe > 1) {
2674 sbi->s_mb_group_prealloc = roundup(
2675 sbi->s_mb_group_prealloc, sbi->s_stripe);
2676 }
2677
2678 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2679 if (sbi->s_locality_groups == NULL) {
2680 ret = -ENOMEM;
2681 goto out;
2682 }
2683 for_each_possible_cpu(i) {
2684 struct ext4_locality_group *lg;
2685 lg = per_cpu_ptr(sbi->s_locality_groups, i);
2686 mutex_init(&lg->lg_mutex);
2687 for (j = 0; j < PREALLOC_TB_SIZE; j++)
2688 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
2689 spin_lock_init(&lg->lg_prealloc_lock);
2690 }
2691
2692
2693 ret = ext4_mb_init_backend(sb);
2694 if (ret != 0)
2695 goto out_free_locality_groups;
2696
2697 return 0;
2698
2699out_free_locality_groups:
2700 free_percpu(sbi->s_locality_groups);
2701 sbi->s_locality_groups = NULL;
2702out:
2703 kfree(sbi->s_mb_offsets);
2704 sbi->s_mb_offsets = NULL;
2705 kfree(sbi->s_mb_maxs);
2706 sbi->s_mb_maxs = NULL;
2707 return ret;
2708}
2709
2710
2711static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2712{
2713 struct ext4_prealloc_space *pa;
2714 struct list_head *cur, *tmp;
2715 int count = 0;
2716
2717 list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
2718 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
2719 list_del(&pa->pa_group_list);
2720 count++;
2721 kmem_cache_free(ext4_pspace_cachep, pa);
2722 }
2723 if (count)
2724 mb_debug(1, "mballoc: %u PAs left\n", count);
2725
2726}
2727
2728int ext4_mb_release(struct super_block *sb)
2729{
2730 ext4_group_t ngroups = ext4_get_groups_count(sb);
2731 ext4_group_t i;
2732 int num_meta_group_infos;
2733 struct ext4_group_info *grinfo;
2734 struct ext4_sb_info *sbi = EXT4_SB(sb);
2735 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2736
2737 if (sbi->s_group_info) {
2738 for (i = 0; i < ngroups; i++) {
2739 grinfo = ext4_get_group_info(sb, i);
2740#ifdef DOUBLE_CHECK
2741 kfree(grinfo->bb_bitmap);
2742#endif
2743 ext4_lock_group(sb, i);
2744 ext4_mb_cleanup_pa(grinfo);
2745 ext4_unlock_group(sb, i);
2746 kmem_cache_free(cachep, grinfo);
2747 }
2748 num_meta_group_infos = (ngroups +
2749 EXT4_DESC_PER_BLOCK(sb) - 1) >>
2750 EXT4_DESC_PER_BLOCK_BITS(sb);
2751 for (i = 0; i < num_meta_group_infos; i++)
2752 kfree(sbi->s_group_info[i]);
2753 kvfree(sbi->s_group_info);
2754 }
2755 kfree(sbi->s_mb_offsets);
2756 kfree(sbi->s_mb_maxs);
2757 iput(sbi->s_buddy_cache);
2758 if (sbi->s_mb_stats) {
2759 ext4_msg(sb, KERN_INFO,
2760 "mballoc: %u blocks %u reqs (%u success)",
2761 atomic_read(&sbi->s_bal_allocated),
2762 atomic_read(&sbi->s_bal_reqs),
2763 atomic_read(&sbi->s_bal_success));
2764 ext4_msg(sb, KERN_INFO,
2765 "mballoc: %u extents scanned, %u goal hits, "
2766 "%u 2^N hits, %u breaks, %u lost",
2767 atomic_read(&sbi->s_bal_ex_scanned),
2768 atomic_read(&sbi->s_bal_goals),
2769 atomic_read(&sbi->s_bal_2orders),
2770 atomic_read(&sbi->s_bal_breaks),
2771 atomic_read(&sbi->s_mb_lost_chunks));
2772 ext4_msg(sb, KERN_INFO,
2773 "mballoc: %lu generated and it took %Lu",
2774 sbi->s_mb_buddies_generated,
2775 sbi->s_mb_generation_time);
2776 ext4_msg(sb, KERN_INFO,
2777 "mballoc: %u preallocated, %u discarded",
2778 atomic_read(&sbi->s_mb_preallocated),
2779 atomic_read(&sbi->s_mb_discarded));
2780 }
2781
2782 free_percpu(sbi->s_locality_groups);
2783
2784 return 0;
2785}
2786
2787static inline int ext4_issue_discard(struct super_block *sb,
2788 ext4_group_t block_group, ext4_grpblk_t cluster, int count,
2789 struct bio **biop)
2790{
2791 ext4_fsblk_t discard_block;
2792
2793 discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
2794 ext4_group_first_block_no(sb, block_group));
2795 count = EXT4_C2B(EXT4_SB(sb), count);
2796 trace_ext4_discard_blocks(sb,
2797 (unsigned long long) discard_block, count);
2798 if (biop) {
2799 return __blkdev_issue_discard(sb->s_bdev,
2800 (sector_t)discard_block << (sb->s_blocksize_bits - 9),
2801 (sector_t)count << (sb->s_blocksize_bits - 9),
2802 GFP_NOFS, 0, biop);
2803 } else
2804 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
2805}
2806
2807static void ext4_free_data_in_buddy(struct super_block *sb,
2808 struct ext4_free_data *entry)
2809{
2810 struct ext4_buddy e4b;
2811 struct ext4_group_info *db;
2812 int err, count = 0, count2 = 0;
2813
2814 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2815 entry->efd_count, entry->efd_group, entry);
2816
2817 err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
2818
2819 BUG_ON(err != 0);
2820
2821 spin_lock(&EXT4_SB(sb)->s_md_lock);
2822 EXT4_SB(sb)->s_mb_free_pending -= entry->efd_count;
2823 spin_unlock(&EXT4_SB(sb)->s_md_lock);
2824
2825 db = e4b.bd_info;
2826
2827 count += entry->efd_count;
2828 count2++;
2829 ext4_lock_group(sb, entry->efd_group);
2830
2831 rb_erase(&entry->efd_node, &(db->bb_free_root));
2832 mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
2833
2834
2835
2836
2837
2838
2839
2840 if (!test_opt(sb, DISCARD))
2841 EXT4_MB_GRP_CLEAR_TRIMMED(db);
2842
2843 if (!db->bb_free_root.rb_node) {
2844
2845
2846
2847 put_page(e4b.bd_buddy_page);
2848 put_page(e4b.bd_bitmap_page);
2849 }
2850 ext4_unlock_group(sb, entry->efd_group);
2851 kmem_cache_free(ext4_free_data_cachep, entry);
2852 ext4_mb_unload_buddy(&e4b);
2853
2854 mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
2855}
2856
2857
2858
2859
2860
2861void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
2862{
2863 struct ext4_sb_info *sbi = EXT4_SB(sb);
2864 struct ext4_free_data *entry, *tmp;
2865 struct bio *discard_bio = NULL;
2866 struct list_head freed_data_list;
2867 struct list_head *cut_pos = NULL;
2868 int err;
2869
2870 INIT_LIST_HEAD(&freed_data_list);
2871
2872 spin_lock(&sbi->s_md_lock);
2873 list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) {
2874 if (entry->efd_tid != commit_tid)
2875 break;
2876 cut_pos = &entry->efd_list;
2877 }
2878 if (cut_pos)
2879 list_cut_position(&freed_data_list, &sbi->s_freed_data_list,
2880 cut_pos);
2881 spin_unlock(&sbi->s_md_lock);
2882
2883 if (test_opt(sb, DISCARD)) {
2884 list_for_each_entry(entry, &freed_data_list, efd_list) {
2885 err = ext4_issue_discard(sb, entry->efd_group,
2886 entry->efd_start_cluster,
2887 entry->efd_count,
2888 &discard_bio);
2889 if (err && err != -EOPNOTSUPP) {
2890 ext4_msg(sb, KERN_WARNING, "discard request in"
2891 " group:%d block:%d count:%d failed"
2892 " with %d", entry->efd_group,
2893 entry->efd_start_cluster,
2894 entry->efd_count, err);
2895 } else if (err == -EOPNOTSUPP)
2896 break;
2897 }
2898
2899 if (discard_bio) {
2900 submit_bio_wait(discard_bio);
2901 bio_put(discard_bio);
2902 }
2903 }
2904
2905 list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
2906 ext4_free_data_in_buddy(sb, entry);
2907}
2908
2909int __init ext4_init_mballoc(void)
2910{
2911 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
2912 SLAB_RECLAIM_ACCOUNT);
2913 if (ext4_pspace_cachep == NULL)
2914 return -ENOMEM;
2915
2916 ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
2917 SLAB_RECLAIM_ACCOUNT);
2918 if (ext4_ac_cachep == NULL) {
2919 kmem_cache_destroy(ext4_pspace_cachep);
2920 return -ENOMEM;
2921 }
2922
2923 ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
2924 SLAB_RECLAIM_ACCOUNT);
2925 if (ext4_free_data_cachep == NULL) {
2926 kmem_cache_destroy(ext4_pspace_cachep);
2927 kmem_cache_destroy(ext4_ac_cachep);
2928 return -ENOMEM;
2929 }
2930 return 0;
2931}
2932
2933void ext4_exit_mballoc(void)
2934{
2935
2936
2937
2938
2939 rcu_barrier();
2940 kmem_cache_destroy(ext4_pspace_cachep);
2941 kmem_cache_destroy(ext4_ac_cachep);
2942 kmem_cache_destroy(ext4_free_data_cachep);
2943 ext4_groupinfo_destroy_slabs();
2944}
2945
2946
2947
2948
2949
2950
2951static noinline_for_stack int
2952ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2953 handle_t *handle, unsigned int reserv_clstrs)
2954{
2955 struct buffer_head *bitmap_bh = NULL;
2956 struct ext4_group_desc *gdp;
2957 struct buffer_head *gdp_bh;
2958 struct ext4_sb_info *sbi;
2959 struct super_block *sb;
2960 ext4_fsblk_t block;
2961 int err, len;
2962
2963 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
2964 BUG_ON(ac->ac_b_ex.fe_len <= 0);
2965
2966 sb = ac->ac_sb;
2967 sbi = EXT4_SB(sb);
2968
2969 bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
2970 if (IS_ERR(bitmap_bh)) {
2971 err = PTR_ERR(bitmap_bh);
2972 bitmap_bh = NULL;
2973 goto out_err;
2974 }
2975
2976 BUFFER_TRACE(bitmap_bh, "getting write access");
2977 err = ext4_journal_get_write_access(handle, bitmap_bh);
2978 if (err)
2979 goto out_err;
2980
2981 err = -EIO;
2982 gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
2983 if (!gdp)
2984 goto out_err;
2985
2986 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
2987 ext4_free_group_clusters(sb, gdp));
2988
2989 BUFFER_TRACE(gdp_bh, "get_write_access");
2990 err = ext4_journal_get_write_access(handle, gdp_bh);
2991 if (err)
2992 goto out_err;
2993
2994 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
2995
2996 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
2997 if (!ext4_data_block_valid(sbi, block, len)) {
2998 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
2999 "fs metadata", block, block+len);
3000
3001
3002
3003
3004 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3005 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
3006 ac->ac_b_ex.fe_len);
3007 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3008 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
3009 if (!err)
3010 err = -EFSCORRUPTED;
3011 goto out_err;
3012 }
3013
3014 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3015#ifdef AGGRESSIVE_CHECK
3016 {
3017 int i;
3018 for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
3019 BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
3020 bitmap_bh->b_data));
3021 }
3022 }
3023#endif
3024 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
3025 ac->ac_b_ex.fe_len);
3026 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
3027 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
3028 ext4_free_group_clusters_set(sb, gdp,
3029 ext4_free_clusters_after_init(sb,
3030 ac->ac_b_ex.fe_group, gdp));
3031 }
3032 len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
3033 ext4_free_group_clusters_set(sb, gdp, len);
3034 ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh);
3035 ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
3036
3037 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3038 percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
3039
3040
3041
3042 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
3043
3044 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
3045 reserv_clstrs);
3046
3047 if (sbi->s_log_groups_per_flex) {
3048 ext4_group_t flex_group = ext4_flex_group(sbi,
3049 ac->ac_b_ex.fe_group);
3050 atomic64_sub(ac->ac_b_ex.fe_len,
3051 &sbi->s_flex_groups[flex_group].free_clusters);
3052 }
3053
3054 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
3055 if (err)
3056 goto out_err;
3057 err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
3058
3059out_err:
3060 brelse(bitmap_bh);
3061 return err;
3062}
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3074{
3075 struct super_block *sb = ac->ac_sb;
3076 struct ext4_locality_group *lg = ac->ac_lg;
3077
3078 BUG_ON(lg == NULL);
3079 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
3080 mb_debug(1, "#%u: goal %u blocks for locality group\n",
3081 current->pid, ac->ac_g_ex.fe_len);
3082}
3083
3084
3085
3086
3087
3088static noinline_for_stack void
3089ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3090 struct ext4_allocation_request *ar)
3091{
3092 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3093 int bsbits, max;
3094 ext4_lblk_t end;
3095 loff_t size, start_off;
3096 loff_t orig_size __maybe_unused;
3097 ext4_lblk_t start;
3098 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3099 struct ext4_prealloc_space *pa;
3100
3101
3102
3103 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3104 return;
3105
3106
3107 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
3108 return;
3109
3110
3111
3112 if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
3113 return;
3114
3115 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
3116 ext4_mb_normalize_group_request(ac);
3117 return ;
3118 }
3119
3120 bsbits = ac->ac_sb->s_blocksize_bits;
3121
3122
3123
3124 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
3125 size = size << bsbits;
3126 if (size < i_size_read(ac->ac_inode))
3127 size = i_size_read(ac->ac_inode);
3128 orig_size = size;
3129
3130
3131 max = 2 << bsbits;
3132
3133#define NRL_CHECK_SIZE(req, size, max, chunk_size) \
3134 (req <= (size) || max <= (chunk_size))
3135
3136
3137
3138 start_off = 0;
3139 if (size <= 16 * 1024) {
3140 size = 16 * 1024;
3141 } else if (size <= 32 * 1024) {
3142 size = 32 * 1024;
3143 } else if (size <= 64 * 1024) {
3144 size = 64 * 1024;
3145 } else if (size <= 128 * 1024) {
3146 size = 128 * 1024;
3147 } else if (size <= 256 * 1024) {
3148 size = 256 * 1024;
3149 } else if (size <= 512 * 1024) {
3150 size = 512 * 1024;
3151 } else if (size <= 1024 * 1024) {
3152 size = 1024 * 1024;
3153 } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
3154 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3155 (21 - bsbits)) << 21;
3156 size = 2 * 1024 * 1024;
3157 } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
3158 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3159 (22 - bsbits)) << 22;
3160 size = 4 * 1024 * 1024;
3161 } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
3162 (8<<20)>>bsbits, max, 8 * 1024)) {
3163 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3164 (23 - bsbits)) << 23;
3165 size = 8 * 1024 * 1024;
3166 } else {
3167 start_off = (loff_t) ac->ac_o_ex.fe_logical << bsbits;
3168 size = (loff_t) EXT4_C2B(EXT4_SB(ac->ac_sb),
3169 ac->ac_o_ex.fe_len) << bsbits;
3170 }
3171 size = size >> bsbits;
3172 start = start_off >> bsbits;
3173
3174
3175 if (ar->pleft && start <= ar->lleft) {
3176 size -= ar->lleft + 1 - start;
3177 start = ar->lleft + 1;
3178 }
3179 if (ar->pright && start + size - 1 >= ar->lright)
3180 size -= start + size - ar->lright;
3181
3182
3183
3184
3185
3186 if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
3187 size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb);
3188
3189 end = start + size;
3190
3191
3192 rcu_read_lock();
3193 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3194 ext4_lblk_t pa_end;
3195
3196 if (pa->pa_deleted)
3197 continue;
3198 spin_lock(&pa->pa_lock);
3199 if (pa->pa_deleted) {
3200 spin_unlock(&pa->pa_lock);
3201 continue;
3202 }
3203
3204 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3205 pa->pa_len);
3206
3207
3208 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
3209 ac->ac_o_ex.fe_logical < pa->pa_lstart));
3210
3211
3212 if (pa->pa_lstart >= end || pa_end <= start) {
3213 spin_unlock(&pa->pa_lock);
3214 continue;
3215 }
3216 BUG_ON(pa->pa_lstart <= start && pa_end >= end);
3217
3218
3219 if (pa_end <= ac->ac_o_ex.fe_logical) {
3220 BUG_ON(pa_end < start);
3221 start = pa_end;
3222 } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
3223 BUG_ON(pa->pa_lstart > end);
3224 end = pa->pa_lstart;
3225 }
3226 spin_unlock(&pa->pa_lock);
3227 }
3228 rcu_read_unlock();
3229 size = end - start;
3230
3231
3232 rcu_read_lock();
3233 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3234 ext4_lblk_t pa_end;
3235
3236 spin_lock(&pa->pa_lock);
3237 if (pa->pa_deleted == 0) {
3238 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3239 pa->pa_len);
3240 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
3241 }
3242 spin_unlock(&pa->pa_lock);
3243 }
3244 rcu_read_unlock();
3245
3246 if (start + size <= ac->ac_o_ex.fe_logical &&
3247 start > ac->ac_o_ex.fe_logical) {
3248 ext4_msg(ac->ac_sb, KERN_ERR,
3249 "start %lu, size %lu, fe_logical %lu",
3250 (unsigned long) start, (unsigned long) size,
3251 (unsigned long) ac->ac_o_ex.fe_logical);
3252 BUG();
3253 }
3254 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
3255
3256
3257
3258
3259
3260 ac->ac_g_ex.fe_logical = start;
3261 ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
3262
3263
3264 if (ar->pright && (ar->lright == (start + size))) {
3265
3266 ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
3267 &ac->ac_f_ex.fe_group,
3268 &ac->ac_f_ex.fe_start);
3269 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3270 }
3271 if (ar->pleft && (ar->lleft + 1 == start)) {
3272
3273 ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
3274 &ac->ac_f_ex.fe_group,
3275 &ac->ac_f_ex.fe_start);
3276 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3277 }
3278
3279 mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size,
3280 (unsigned) orig_size, (unsigned) start);
3281}
3282
3283static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3284{
3285 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3286
3287 if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
3288 atomic_inc(&sbi->s_bal_reqs);
3289 atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
3290 if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
3291 atomic_inc(&sbi->s_bal_success);
3292 atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
3293 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
3294 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
3295 atomic_inc(&sbi->s_bal_goals);
3296 if (ac->ac_found > sbi->s_mb_max_to_scan)
3297 atomic_inc(&sbi->s_bal_breaks);
3298 }
3299
3300 if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
3301 trace_ext4_mballoc_alloc(ac);
3302 else
3303 trace_ext4_mballoc_prealloc(ac);
3304}
3305
3306
3307
3308
3309
3310
3311
3312static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3313{
3314 struct ext4_prealloc_space *pa = ac->ac_pa;
3315 struct ext4_buddy e4b;
3316 int err;
3317
3318 if (pa == NULL) {
3319 if (ac->ac_f_ex.fe_len == 0)
3320 return;
3321 err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
3322 if (err) {
3323
3324
3325
3326
3327
3328 WARN(1, "mb_load_buddy failed (%d)", err);
3329 return;
3330 }
3331 ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
3332 mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
3333 ac->ac_f_ex.fe_len);
3334 ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
3335 ext4_mb_unload_buddy(&e4b);
3336 return;
3337 }
3338 if (pa->pa_type == MB_INODE_PA)
3339 pa->pa_free += ac->ac_b_ex.fe_len;
3340}
3341
3342
3343
3344
3345static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3346 struct ext4_prealloc_space *pa)
3347{
3348 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3349 ext4_fsblk_t start;
3350 ext4_fsblk_t end;
3351 int len;
3352
3353
3354 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
3355 end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
3356 start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
3357 len = EXT4_NUM_B2C(sbi, end - start);
3358 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
3359 &ac->ac_b_ex.fe_start);
3360 ac->ac_b_ex.fe_len = len;
3361 ac->ac_status = AC_STATUS_FOUND;
3362 ac->ac_pa = pa;
3363
3364 BUG_ON(start < pa->pa_pstart);
3365 BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
3366 BUG_ON(pa->pa_free < len);
3367 pa->pa_free -= len;
3368
3369 mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa);
3370}
3371
3372
3373
3374
3375static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3376 struct ext4_prealloc_space *pa)
3377{
3378 unsigned int len = ac->ac_o_ex.fe_len;
3379
3380 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
3381 &ac->ac_b_ex.fe_group,
3382 &ac->ac_b_ex.fe_start);
3383 ac->ac_b_ex.fe_len = len;
3384 ac->ac_status = AC_STATUS_FOUND;
3385 ac->ac_pa = pa;
3386
3387
3388
3389
3390
3391
3392
3393 mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
3394}
3395
3396
3397
3398
3399
3400
3401
3402static struct ext4_prealloc_space *
3403ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3404 struct ext4_prealloc_space *pa,
3405 struct ext4_prealloc_space *cpa)
3406{
3407 ext4_fsblk_t cur_distance, new_distance;
3408
3409 if (cpa == NULL) {
3410 atomic_inc(&pa->pa_count);
3411 return pa;
3412 }
3413 cur_distance = abs(goal_block - cpa->pa_pstart);
3414 new_distance = abs(goal_block - pa->pa_pstart);
3415
3416 if (cur_distance <= new_distance)
3417 return cpa;
3418
3419
3420 atomic_dec(&cpa->pa_count);
3421 atomic_inc(&pa->pa_count);
3422 return pa;
3423}
3424
3425
3426
3427
3428static noinline_for_stack int
3429ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3430{
3431 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3432 int order, i;
3433 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3434 struct ext4_locality_group *lg;
3435 struct ext4_prealloc_space *pa, *cpa = NULL;
3436 ext4_fsblk_t goal_block;
3437
3438
3439 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3440 return 0;
3441
3442
3443 rcu_read_lock();
3444 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3445
3446
3447
3448 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
3449 ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
3450 EXT4_C2B(sbi, pa->pa_len)))
3451 continue;
3452
3453
3454 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
3455 (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
3456 EXT4_MAX_BLOCK_FILE_PHYS))
3457 continue;
3458
3459
3460 spin_lock(&pa->pa_lock);
3461 if (pa->pa_deleted == 0 && pa->pa_free) {
3462 atomic_inc(&pa->pa_count);
3463 ext4_mb_use_inode_pa(ac, pa);
3464 spin_unlock(&pa->pa_lock);
3465 ac->ac_criteria = 10;
3466 rcu_read_unlock();
3467 return 1;
3468 }
3469 spin_unlock(&pa->pa_lock);
3470 }
3471 rcu_read_unlock();
3472
3473
3474 if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
3475 return 0;
3476
3477
3478 lg = ac->ac_lg;
3479 if (lg == NULL)
3480 return 0;
3481 order = fls(ac->ac_o_ex.fe_len) - 1;
3482 if (order > PREALLOC_TB_SIZE - 1)
3483
3484 order = PREALLOC_TB_SIZE - 1;
3485
3486 goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
3487
3488
3489
3490
3491 for (i = order; i < PREALLOC_TB_SIZE; i++) {
3492 rcu_read_lock();
3493 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
3494 pa_inode_list) {
3495 spin_lock(&pa->pa_lock);
3496 if (pa->pa_deleted == 0 &&
3497 pa->pa_free >= ac->ac_o_ex.fe_len) {
3498
3499 cpa = ext4_mb_check_group_pa(goal_block,
3500 pa, cpa);
3501 }
3502 spin_unlock(&pa->pa_lock);
3503 }
3504 rcu_read_unlock();
3505 }
3506 if (cpa) {
3507 ext4_mb_use_group_pa(ac, cpa);
3508 ac->ac_criteria = 20;
3509 return 1;
3510 }
3511 return 0;
3512}
3513
3514
3515
3516
3517
3518
3519
3520static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3521 ext4_group_t group)
3522{
3523 struct rb_node *n;
3524 struct ext4_group_info *grp;
3525 struct ext4_free_data *entry;
3526
3527 grp = ext4_get_group_info(sb, group);
3528 n = rb_first(&(grp->bb_free_root));
3529
3530 while (n) {
3531 entry = rb_entry(n, struct ext4_free_data, efd_node);
3532 ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
3533 n = rb_next(n);
3534 }
3535 return;
3536}
3537
3538
3539
3540
3541
3542
3543static noinline_for_stack
3544void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3545 ext4_group_t group)
3546{
3547 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3548 struct ext4_prealloc_space *pa;
3549 struct list_head *cur;
3550 ext4_group_t groupnr;
3551 ext4_grpblk_t start;
3552 int preallocated = 0;
3553 int len;
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563 list_for_each(cur, &grp->bb_prealloc_list) {
3564 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
3565 spin_lock(&pa->pa_lock);
3566 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
3567 &groupnr, &start);
3568 len = pa->pa_len;
3569 spin_unlock(&pa->pa_lock);
3570 if (unlikely(len == 0))
3571 continue;
3572 BUG_ON(groupnr != group);
3573 ext4_set_bits(bitmap, start, len);
3574 preallocated += len;
3575 }
3576 mb_debug(1, "preallocated %u for group %u\n", preallocated, group);
3577}
3578
3579static void ext4_mb_pa_callback(struct rcu_head *head)
3580{
3581 struct ext4_prealloc_space *pa;
3582 pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
3583
3584 BUG_ON(atomic_read(&pa->pa_count));
3585 BUG_ON(pa->pa_deleted == 0);
3586 kmem_cache_free(ext4_pspace_cachep, pa);
3587}
3588
3589
3590
3591
3592
3593static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3594 struct super_block *sb, struct ext4_prealloc_space *pa)
3595{
3596 ext4_group_t grp;
3597 ext4_fsblk_t grp_blk;
3598
3599
3600 spin_lock(&pa->pa_lock);
3601 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
3602 spin_unlock(&pa->pa_lock);
3603 return;
3604 }
3605
3606 if (pa->pa_deleted == 1) {
3607 spin_unlock(&pa->pa_lock);
3608 return;
3609 }
3610
3611 pa->pa_deleted = 1;
3612 spin_unlock(&pa->pa_lock);
3613
3614 grp_blk = pa->pa_pstart;
3615
3616
3617
3618
3619 if (pa->pa_type == MB_GROUP_PA)
3620 grp_blk--;
3621
3622 grp = ext4_get_group_number(sb, grp_blk);
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638 ext4_lock_group(sb, grp);
3639 list_del(&pa->pa_group_list);
3640 ext4_unlock_group(sb, grp);
3641
3642 spin_lock(pa->pa_obj_lock);
3643 list_del_rcu(&pa->pa_inode_list);
3644 spin_unlock(pa->pa_obj_lock);
3645
3646 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3647}
3648
3649
3650
3651
3652static noinline_for_stack int
3653ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3654{
3655 struct super_block *sb = ac->ac_sb;
3656 struct ext4_sb_info *sbi = EXT4_SB(sb);
3657 struct ext4_prealloc_space *pa;
3658 struct ext4_group_info *grp;
3659 struct ext4_inode_info *ei;
3660
3661
3662 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3663 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3664 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3665
3666 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3667 if (pa == NULL)
3668 return -ENOMEM;
3669
3670 if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
3671 int winl;
3672 int wins;
3673 int win;
3674 int offs;
3675
3676
3677
3678
3679 BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
3680 BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
3681
3682
3683
3684
3685 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
3686
3687
3688 wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
3689
3690
3691 win = min(winl, wins);
3692
3693 offs = ac->ac_o_ex.fe_logical %
3694 EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
3695 if (offs && offs < win)
3696 win = offs;
3697
3698 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
3699 EXT4_NUM_B2C(sbi, win);
3700 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
3701 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
3702 }
3703
3704
3705
3706 ac->ac_f_ex = ac->ac_b_ex;
3707
3708 pa->pa_lstart = ac->ac_b_ex.fe_logical;
3709 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3710 pa->pa_len = ac->ac_b_ex.fe_len;
3711 pa->pa_free = pa->pa_len;
3712 atomic_set(&pa->pa_count, 1);
3713 spin_lock_init(&pa->pa_lock);
3714 INIT_LIST_HEAD(&pa->pa_inode_list);
3715 INIT_LIST_HEAD(&pa->pa_group_list);
3716 pa->pa_deleted = 0;
3717 pa->pa_type = MB_INODE_PA;
3718
3719 mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa,
3720 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3721 trace_ext4_mb_new_inode_pa(ac, pa);
3722
3723 ext4_mb_use_inode_pa(ac, pa);
3724 atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
3725
3726 ei = EXT4_I(ac->ac_inode);
3727 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3728
3729 pa->pa_obj_lock = &ei->i_prealloc_lock;
3730 pa->pa_inode = ac->ac_inode;
3731
3732 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3733 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3734 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3735
3736 spin_lock(pa->pa_obj_lock);
3737 list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
3738 spin_unlock(pa->pa_obj_lock);
3739
3740 return 0;
3741}
3742
3743
3744
3745
3746static noinline_for_stack int
3747ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3748{
3749 struct super_block *sb = ac->ac_sb;
3750 struct ext4_locality_group *lg;
3751 struct ext4_prealloc_space *pa;
3752 struct ext4_group_info *grp;
3753
3754
3755 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3756 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3757 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3758
3759 BUG_ON(ext4_pspace_cachep == NULL);
3760 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3761 if (pa == NULL)
3762 return -ENOMEM;
3763
3764
3765
3766 ac->ac_f_ex = ac->ac_b_ex;
3767
3768 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3769 pa->pa_lstart = pa->pa_pstart;
3770 pa->pa_len = ac->ac_b_ex.fe_len;
3771 pa->pa_free = pa->pa_len;
3772 atomic_set(&pa->pa_count, 1);
3773 spin_lock_init(&pa->pa_lock);
3774 INIT_LIST_HEAD(&pa->pa_inode_list);
3775 INIT_LIST_HEAD(&pa->pa_group_list);
3776 pa->pa_deleted = 0;
3777 pa->pa_type = MB_GROUP_PA;
3778
3779 mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa,
3780 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3781 trace_ext4_mb_new_group_pa(ac, pa);
3782
3783 ext4_mb_use_group_pa(ac, pa);
3784 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
3785
3786 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3787 lg = ac->ac_lg;
3788 BUG_ON(lg == NULL);
3789
3790 pa->pa_obj_lock = &lg->lg_prealloc_lock;
3791 pa->pa_inode = NULL;
3792
3793 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3794 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3795 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3796
3797
3798
3799
3800
3801 return 0;
3802}
3803
3804static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3805{
3806 int err;
3807
3808 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
3809 err = ext4_mb_new_group_pa(ac);
3810 else
3811 err = ext4_mb_new_inode_pa(ac);
3812 return err;
3813}
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823static noinline_for_stack int
3824ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3825 struct ext4_prealloc_space *pa)
3826{
3827 struct super_block *sb = e4b->bd_sb;
3828 struct ext4_sb_info *sbi = EXT4_SB(sb);
3829 unsigned int end;
3830 unsigned int next;
3831 ext4_group_t group;
3832 ext4_grpblk_t bit;
3833 unsigned long long grp_blk_start;
3834 int err = 0;
3835 int free = 0;
3836
3837 BUG_ON(pa->pa_deleted == 0);
3838 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3839 grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
3840 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3841 end = bit + pa->pa_len;
3842
3843 while (bit < end) {
3844 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
3845 if (bit >= end)
3846 break;
3847 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
3848 mb_debug(1, " free preallocated %u/%u in group %u\n",
3849 (unsigned) ext4_group_first_block_no(sb, group) + bit,
3850 (unsigned) next - bit, (unsigned) group);
3851 free += next - bit;
3852
3853 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
3854 trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
3855 EXT4_C2B(sbi, bit)),
3856 next - bit);
3857 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3858 bit = next + 1;
3859 }
3860 if (free != pa->pa_free) {
3861 ext4_msg(e4b->bd_sb, KERN_CRIT,
3862 "pa %p: logic %lu, phys. %lu, len %lu",
3863 pa, (unsigned long) pa->pa_lstart,
3864 (unsigned long) pa->pa_pstart,
3865 (unsigned long) pa->pa_len);
3866 ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
3867 free, pa->pa_free);
3868
3869
3870
3871
3872 }
3873 atomic_add(free, &sbi->s_mb_discarded);
3874
3875 return err;
3876}
3877
3878static noinline_for_stack int
3879ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3880 struct ext4_prealloc_space *pa)
3881{
3882 struct super_block *sb = e4b->bd_sb;
3883 ext4_group_t group;
3884 ext4_grpblk_t bit;
3885
3886 trace_ext4_mb_release_group_pa(sb, pa);
3887 BUG_ON(pa->pa_deleted == 0);
3888 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3889 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3890 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
3891 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
3892 trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
3893
3894 return 0;
3895}
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906static noinline_for_stack int
3907ext4_mb_discard_group_preallocations(struct super_block *sb,
3908 ext4_group_t group, int needed)
3909{
3910 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3911 struct buffer_head *bitmap_bh = NULL;
3912 struct ext4_prealloc_space *pa, *tmp;
3913 struct list_head list;
3914 struct ext4_buddy e4b;
3915 int err;
3916 int busy = 0;
3917 int free = 0;
3918
3919 mb_debug(1, "discard preallocation for group %u\n", group);
3920
3921 if (list_empty(&grp->bb_prealloc_list))
3922 return 0;
3923
3924 bitmap_bh = ext4_read_block_bitmap(sb, group);
3925 if (IS_ERR(bitmap_bh)) {
3926 err = PTR_ERR(bitmap_bh);
3927 ext4_error(sb, "Error %d reading block bitmap for %u",
3928 err, group);
3929 return 0;
3930 }
3931
3932 err = ext4_mb_load_buddy(sb, group, &e4b);
3933 if (err) {
3934 ext4_warning(sb, "Error %d loading buddy information for %u",
3935 err, group);
3936 put_bh(bitmap_bh);
3937 return 0;
3938 }
3939
3940 if (needed == 0)
3941 needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
3942
3943 INIT_LIST_HEAD(&list);
3944repeat:
3945 ext4_lock_group(sb, group);
3946 list_for_each_entry_safe(pa, tmp,
3947 &grp->bb_prealloc_list, pa_group_list) {
3948 spin_lock(&pa->pa_lock);
3949 if (atomic_read(&pa->pa_count)) {
3950 spin_unlock(&pa->pa_lock);
3951 busy = 1;
3952 continue;
3953 }
3954 if (pa->pa_deleted) {
3955 spin_unlock(&pa->pa_lock);
3956 continue;
3957 }
3958
3959
3960 pa->pa_deleted = 1;
3961
3962
3963 free += pa->pa_free;
3964
3965 spin_unlock(&pa->pa_lock);
3966
3967 list_del(&pa->pa_group_list);
3968 list_add(&pa->u.pa_tmp_list, &list);
3969 }
3970
3971
3972 if (free < needed && busy) {
3973 busy = 0;
3974 ext4_unlock_group(sb, group);
3975 cond_resched();
3976 goto repeat;
3977 }
3978
3979
3980 if (list_empty(&list)) {
3981 BUG_ON(free != 0);
3982 goto out;
3983 }
3984
3985
3986 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
3987
3988
3989 spin_lock(pa->pa_obj_lock);
3990 list_del_rcu(&pa->pa_inode_list);
3991 spin_unlock(pa->pa_obj_lock);
3992
3993 if (pa->pa_type == MB_GROUP_PA)
3994 ext4_mb_release_group_pa(&e4b, pa);
3995 else
3996 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3997
3998 list_del(&pa->u.pa_tmp_list);
3999 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4000 }
4001
4002out:
4003 ext4_unlock_group(sb, group);
4004 ext4_mb_unload_buddy(&e4b);
4005 put_bh(bitmap_bh);
4006 return free;
4007}
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018void ext4_discard_preallocations(struct inode *inode)
4019{
4020 struct ext4_inode_info *ei = EXT4_I(inode);
4021 struct super_block *sb = inode->i_sb;
4022 struct buffer_head *bitmap_bh = NULL;
4023 struct ext4_prealloc_space *pa, *tmp;
4024 ext4_group_t group = 0;
4025 struct list_head list;
4026 struct ext4_buddy e4b;
4027 int err;
4028
4029 if (!S_ISREG(inode->i_mode)) {
4030
4031 return;
4032 }
4033
4034 mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino);
4035 trace_ext4_discard_preallocations(inode);
4036
4037 INIT_LIST_HEAD(&list);
4038
4039repeat:
4040
4041 spin_lock(&ei->i_prealloc_lock);
4042 while (!list_empty(&ei->i_prealloc_list)) {
4043 pa = list_entry(ei->i_prealloc_list.next,
4044 struct ext4_prealloc_space, pa_inode_list);
4045 BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
4046 spin_lock(&pa->pa_lock);
4047 if (atomic_read(&pa->pa_count)) {
4048
4049
4050 spin_unlock(&pa->pa_lock);
4051 spin_unlock(&ei->i_prealloc_lock);
4052 ext4_msg(sb, KERN_ERR,
4053 "uh-oh! used pa while discarding");
4054 WARN_ON(1);
4055 schedule_timeout_uninterruptible(HZ);
4056 goto repeat;
4057
4058 }
4059 if (pa->pa_deleted == 0) {
4060 pa->pa_deleted = 1;
4061 spin_unlock(&pa->pa_lock);
4062 list_del_rcu(&pa->pa_inode_list);
4063 list_add(&pa->u.pa_tmp_list, &list);
4064 continue;
4065 }
4066
4067
4068 spin_unlock(&pa->pa_lock);
4069 spin_unlock(&ei->i_prealloc_lock);
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083 schedule_timeout_uninterruptible(HZ);
4084 goto repeat;
4085 }
4086 spin_unlock(&ei->i_prealloc_lock);
4087
4088 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
4089 BUG_ON(pa->pa_type != MB_INODE_PA);
4090 group = ext4_get_group_number(sb, pa->pa_pstart);
4091
4092 err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
4093 GFP_NOFS|__GFP_NOFAIL);
4094 if (err) {
4095 ext4_error(sb, "Error %d loading buddy information for %u",
4096 err, group);
4097 continue;
4098 }
4099
4100 bitmap_bh = ext4_read_block_bitmap(sb, group);
4101 if (IS_ERR(bitmap_bh)) {
4102 err = PTR_ERR(bitmap_bh);
4103 ext4_error(sb, "Error %d reading block bitmap for %u",
4104 err, group);
4105 ext4_mb_unload_buddy(&e4b);
4106 continue;
4107 }
4108
4109 ext4_lock_group(sb, group);
4110 list_del(&pa->pa_group_list);
4111 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
4112 ext4_unlock_group(sb, group);
4113
4114 ext4_mb_unload_buddy(&e4b);
4115 put_bh(bitmap_bh);
4116
4117 list_del(&pa->u.pa_tmp_list);
4118 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4119 }
4120}
4121
4122#ifdef CONFIG_EXT4_DEBUG
4123static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4124{
4125 struct super_block *sb = ac->ac_sb;
4126 ext4_group_t ngroups, i;
4127
4128 if (!ext4_mballoc_debug ||
4129 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
4130 return;
4131
4132 ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:"
4133 " Allocation context details:");
4134 ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d",
4135 ac->ac_status, ac->ac_flags);
4136 ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, "
4137 "goal %lu/%lu/%lu@%lu, "
4138 "best %lu/%lu/%lu@%lu cr %d",
4139 (unsigned long)ac->ac_o_ex.fe_group,
4140 (unsigned long)ac->ac_o_ex.fe_start,
4141 (unsigned long)ac->ac_o_ex.fe_len,
4142 (unsigned long)ac->ac_o_ex.fe_logical,
4143 (unsigned long)ac->ac_g_ex.fe_group,
4144 (unsigned long)ac->ac_g_ex.fe_start,
4145 (unsigned long)ac->ac_g_ex.fe_len,
4146 (unsigned long)ac->ac_g_ex.fe_logical,
4147 (unsigned long)ac->ac_b_ex.fe_group,
4148 (unsigned long)ac->ac_b_ex.fe_start,
4149 (unsigned long)ac->ac_b_ex.fe_len,
4150 (unsigned long)ac->ac_b_ex.fe_logical,
4151 (int)ac->ac_criteria);
4152 ext4_msg(ac->ac_sb, KERN_ERR, "%d found", ac->ac_found);
4153 ext4_msg(ac->ac_sb, KERN_ERR, "groups: ");
4154 ngroups = ext4_get_groups_count(sb);
4155 for (i = 0; i < ngroups; i++) {
4156 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
4157 struct ext4_prealloc_space *pa;
4158 ext4_grpblk_t start;
4159 struct list_head *cur;
4160 ext4_lock_group(sb, i);
4161 list_for_each(cur, &grp->bb_prealloc_list) {
4162 pa = list_entry(cur, struct ext4_prealloc_space,
4163 pa_group_list);
4164 spin_lock(&pa->pa_lock);
4165 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
4166 NULL, &start);
4167 spin_unlock(&pa->pa_lock);
4168 printk(KERN_ERR "PA:%u:%d:%u \n", i,
4169 start, pa->pa_len);
4170 }
4171 ext4_unlock_group(sb, i);
4172
4173 if (grp->bb_free == 0)
4174 continue;
4175 printk(KERN_ERR "%u: %d/%d \n",
4176 i, grp->bb_free, grp->bb_fragments);
4177 }
4178 printk(KERN_ERR "\n");
4179}
4180#else
4181static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4182{
4183 return;
4184}
4185#endif
4186
4187
4188
4189
4190
4191
4192
4193
4194static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
4195{
4196 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4197 int bsbits = ac->ac_sb->s_blocksize_bits;
4198 loff_t size, isize;
4199
4200 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
4201 return;
4202
4203 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
4204 return;
4205
4206 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
4207 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
4208 >> bsbits;
4209
4210 if ((size == isize) &&
4211 !ext4_fs_is_busy(sbi) &&
4212 (atomic_read(&ac->ac_inode->i_writecount) == 0)) {
4213 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
4214 return;
4215 }
4216
4217 if (sbi->s_mb_group_prealloc <= 0) {
4218 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
4219 return;
4220 }
4221
4222
4223 size = max(size, isize);
4224 if (size > sbi->s_mb_stream_request) {
4225 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
4226 return;
4227 }
4228
4229 BUG_ON(ac->ac_lg != NULL);
4230
4231
4232
4233
4234
4235 ac->ac_lg = raw_cpu_ptr(sbi->s_locality_groups);
4236
4237
4238 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
4239
4240
4241 mutex_lock(&ac->ac_lg->lg_mutex);
4242}
4243
4244static noinline_for_stack int
4245ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4246 struct ext4_allocation_request *ar)
4247{
4248 struct super_block *sb = ar->inode->i_sb;
4249 struct ext4_sb_info *sbi = EXT4_SB(sb);
4250 struct ext4_super_block *es = sbi->s_es;
4251 ext4_group_t group;
4252 unsigned int len;
4253 ext4_fsblk_t goal;
4254 ext4_grpblk_t block;
4255
4256
4257 len = ar->len;
4258
4259
4260 if (len >= EXT4_CLUSTERS_PER_GROUP(sb))
4261 len = EXT4_CLUSTERS_PER_GROUP(sb);
4262
4263
4264 goal = ar->goal;
4265 if (goal < le32_to_cpu(es->s_first_data_block) ||
4266 goal >= ext4_blocks_count(es))
4267 goal = le32_to_cpu(es->s_first_data_block);
4268 ext4_get_group_no_and_offset(sb, goal, &group, &block);
4269
4270
4271 ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
4272 ac->ac_status = AC_STATUS_CONTINUE;
4273 ac->ac_sb = sb;
4274 ac->ac_inode = ar->inode;
4275 ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
4276 ac->ac_o_ex.fe_group = group;
4277 ac->ac_o_ex.fe_start = block;
4278 ac->ac_o_ex.fe_len = len;
4279 ac->ac_g_ex = ac->ac_o_ex;
4280 ac->ac_flags = ar->flags;
4281
4282
4283
4284 ext4_mb_group_or_file(ac);
4285
4286 mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, "
4287 "left: %u/%u, right %u/%u to %swritable\n",
4288 (unsigned) ar->len, (unsigned) ar->logical,
4289 (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
4290 (unsigned) ar->lleft, (unsigned) ar->pleft,
4291 (unsigned) ar->lright, (unsigned) ar->pright,
4292 atomic_read(&ar->inode->i_writecount) ? "" : "non-");
4293 return 0;
4294
4295}
4296
4297static noinline_for_stack void
4298ext4_mb_discard_lg_preallocations(struct super_block *sb,
4299 struct ext4_locality_group *lg,
4300 int order, int total_entries)
4301{
4302 ext4_group_t group = 0;
4303 struct ext4_buddy e4b;
4304 struct list_head discard_list;
4305 struct ext4_prealloc_space *pa, *tmp;
4306
4307 mb_debug(1, "discard locality group preallocation\n");
4308
4309 INIT_LIST_HEAD(&discard_list);
4310
4311 spin_lock(&lg->lg_prealloc_lock);
4312 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
4313 pa_inode_list) {
4314 spin_lock(&pa->pa_lock);
4315 if (atomic_read(&pa->pa_count)) {
4316
4317
4318
4319
4320
4321 spin_unlock(&pa->pa_lock);
4322 continue;
4323 }
4324 if (pa->pa_deleted) {
4325 spin_unlock(&pa->pa_lock);
4326 continue;
4327 }
4328
4329 BUG_ON(pa->pa_type != MB_GROUP_PA);
4330
4331
4332 pa->pa_deleted = 1;
4333 spin_unlock(&pa->pa_lock);
4334
4335 list_del_rcu(&pa->pa_inode_list);
4336 list_add(&pa->u.pa_tmp_list, &discard_list);
4337
4338 total_entries--;
4339 if (total_entries <= 5) {
4340
4341
4342
4343
4344
4345
4346 break;
4347 }
4348 }
4349 spin_unlock(&lg->lg_prealloc_lock);
4350
4351 list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
4352 int err;
4353
4354 group = ext4_get_group_number(sb, pa->pa_pstart);
4355 err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
4356 GFP_NOFS|__GFP_NOFAIL);
4357 if (err) {
4358 ext4_error(sb, "Error %d loading buddy information for %u",
4359 err, group);
4360 continue;
4361 }
4362 ext4_lock_group(sb, group);
4363 list_del(&pa->pa_group_list);
4364 ext4_mb_release_group_pa(&e4b, pa);
4365 ext4_unlock_group(sb, group);
4366
4367 ext4_mb_unload_buddy(&e4b);
4368 list_del(&pa->u.pa_tmp_list);
4369 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4370 }
4371}
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4383{
4384 int order, added = 0, lg_prealloc_count = 1;
4385 struct super_block *sb = ac->ac_sb;
4386 struct ext4_locality_group *lg = ac->ac_lg;
4387 struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
4388
4389 order = fls(pa->pa_free) - 1;
4390 if (order > PREALLOC_TB_SIZE - 1)
4391
4392 order = PREALLOC_TB_SIZE - 1;
4393
4394 spin_lock(&lg->lg_prealloc_lock);
4395 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
4396 pa_inode_list) {
4397 spin_lock(&tmp_pa->pa_lock);
4398 if (tmp_pa->pa_deleted) {
4399 spin_unlock(&tmp_pa->pa_lock);
4400 continue;
4401 }
4402 if (!added && pa->pa_free < tmp_pa->pa_free) {
4403
4404 list_add_tail_rcu(&pa->pa_inode_list,
4405 &tmp_pa->pa_inode_list);
4406 added = 1;
4407
4408
4409
4410
4411 }
4412 spin_unlock(&tmp_pa->pa_lock);
4413 lg_prealloc_count++;
4414 }
4415 if (!added)
4416 list_add_tail_rcu(&pa->pa_inode_list,
4417 &lg->lg_prealloc_list[order]);
4418 spin_unlock(&lg->lg_prealloc_lock);
4419
4420
4421 if (lg_prealloc_count > 8) {
4422 ext4_mb_discard_lg_preallocations(sb, lg,
4423 order, lg_prealloc_count);
4424 return;
4425 }
4426 return ;
4427}
4428
4429
4430
4431
4432static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4433{
4434 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4435 struct ext4_prealloc_space *pa = ac->ac_pa;
4436 if (pa) {
4437 if (pa->pa_type == MB_GROUP_PA) {
4438
4439 spin_lock(&pa->pa_lock);
4440 pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4441 pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4442 pa->pa_free -= ac->ac_b_ex.fe_len;
4443 pa->pa_len -= ac->ac_b_ex.fe_len;
4444 spin_unlock(&pa->pa_lock);
4445 }
4446 }
4447 if (pa) {
4448
4449
4450
4451
4452
4453
4454 if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
4455 spin_lock(pa->pa_obj_lock);
4456 list_del_rcu(&pa->pa_inode_list);
4457 spin_unlock(pa->pa_obj_lock);
4458 ext4_mb_add_n_trim(ac);
4459 }
4460 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4461 }
4462 if (ac->ac_bitmap_page)
4463 put_page(ac->ac_bitmap_page);
4464 if (ac->ac_buddy_page)
4465 put_page(ac->ac_buddy_page);
4466 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
4467 mutex_unlock(&ac->ac_lg->lg_mutex);
4468 ext4_mb_collect_stats(ac);
4469 return 0;
4470}
4471
4472static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4473{
4474 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4475 int ret;
4476 int freed = 0;
4477
4478 trace_ext4_mb_discard_preallocations(sb, needed);
4479 for (i = 0; i < ngroups && needed > 0; i++) {
4480 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
4481 freed += ret;
4482 needed -= ret;
4483 }
4484
4485 return freed;
4486}
4487
4488
4489
4490
4491
4492
4493ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4494 struct ext4_allocation_request *ar, int *errp)
4495{
4496 int freed;
4497 struct ext4_allocation_context *ac = NULL;
4498 struct ext4_sb_info *sbi;
4499 struct super_block *sb;
4500 ext4_fsblk_t block = 0;
4501 unsigned int inquota = 0;
4502 unsigned int reserv_clstrs = 0;
4503
4504 might_sleep();
4505 sb = ar->inode->i_sb;
4506 sbi = EXT4_SB(sb);
4507
4508 trace_ext4_request_blocks(ar);
4509
4510
4511 if (ext4_is_quota_file(ar->inode))
4512 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
4513
4514 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
4515
4516
4517
4518
4519 while (ar->len &&
4520 ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
4521
4522
4523 cond_resched();
4524 ar->len = ar->len >> 1;
4525 }
4526 if (!ar->len) {
4527 *errp = -ENOSPC;
4528 return 0;
4529 }
4530 reserv_clstrs = ar->len;
4531 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
4532 dquot_alloc_block_nofail(ar->inode,
4533 EXT4_C2B(sbi, ar->len));
4534 } else {
4535 while (ar->len &&
4536 dquot_alloc_block(ar->inode,
4537 EXT4_C2B(sbi, ar->len))) {
4538
4539 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4540 ar->len--;
4541 }
4542 }
4543 inquota = ar->len;
4544 if (ar->len == 0) {
4545 *errp = -EDQUOT;
4546 goto out;
4547 }
4548 }
4549
4550 ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS);
4551 if (!ac) {
4552 ar->len = 0;
4553 *errp = -ENOMEM;
4554 goto out;
4555 }
4556
4557 *errp = ext4_mb_initialize_context(ac, ar);
4558 if (*errp) {
4559 ar->len = 0;
4560 goto out;
4561 }
4562
4563 ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
4564 if (!ext4_mb_use_preallocated(ac)) {
4565 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
4566 ext4_mb_normalize_request(ac, ar);
4567repeat:
4568
4569 *errp = ext4_mb_regular_allocator(ac);
4570 if (*errp)
4571 goto discard_and_exit;
4572
4573
4574
4575
4576 if (ac->ac_status == AC_STATUS_FOUND &&
4577 ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
4578 *errp = ext4_mb_new_preallocation(ac);
4579 if (*errp) {
4580 discard_and_exit:
4581 ext4_discard_allocated_blocks(ac);
4582 goto errout;
4583 }
4584 }
4585 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4586 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
4587 if (*errp) {
4588 ext4_discard_allocated_blocks(ac);
4589 goto errout;
4590 } else {
4591 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4592 ar->len = ac->ac_b_ex.fe_len;
4593 }
4594 } else {
4595 freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
4596 if (freed)
4597 goto repeat;
4598 *errp = -ENOSPC;
4599 }
4600
4601errout:
4602 if (*errp) {
4603 ac->ac_b_ex.fe_len = 0;
4604 ar->len = 0;
4605 ext4_mb_show_ac(ac);
4606 }
4607 ext4_mb_release_context(ac);
4608out:
4609 if (ac)
4610 kmem_cache_free(ext4_ac_cachep, ac);
4611 if (inquota && ar->len < inquota)
4612 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
4613 if (!ar->len) {
4614 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
4615
4616 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
4617 reserv_clstrs);
4618 }
4619
4620 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
4621
4622 return block;
4623}
4624
4625
4626
4627
4628
4629
4630static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi,
4631 struct ext4_free_data *entry,
4632 struct ext4_free_data *new_entry,
4633 struct rb_root *entry_rb_root)
4634{
4635 if ((entry->efd_tid != new_entry->efd_tid) ||
4636 (entry->efd_group != new_entry->efd_group))
4637 return;
4638 if (entry->efd_start_cluster + entry->efd_count ==
4639 new_entry->efd_start_cluster) {
4640 new_entry->efd_start_cluster = entry->efd_start_cluster;
4641 new_entry->efd_count += entry->efd_count;
4642 } else if (new_entry->efd_start_cluster + new_entry->efd_count ==
4643 entry->efd_start_cluster) {
4644 new_entry->efd_count += entry->efd_count;
4645 } else
4646 return;
4647 spin_lock(&sbi->s_md_lock);
4648 list_del(&entry->efd_list);
4649 spin_unlock(&sbi->s_md_lock);
4650 rb_erase(&entry->efd_node, entry_rb_root);
4651 kmem_cache_free(ext4_free_data_cachep, entry);
4652}
4653
4654static noinline_for_stack int
4655ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4656 struct ext4_free_data *new_entry)
4657{
4658 ext4_group_t group = e4b->bd_group;
4659 ext4_grpblk_t cluster;
4660 ext4_grpblk_t clusters = new_entry->efd_count;
4661 struct ext4_free_data *entry;
4662 struct ext4_group_info *db = e4b->bd_info;
4663 struct super_block *sb = e4b->bd_sb;
4664 struct ext4_sb_info *sbi = EXT4_SB(sb);
4665 struct rb_node **n = &db->bb_free_root.rb_node, *node;
4666 struct rb_node *parent = NULL, *new_node;
4667
4668 BUG_ON(!ext4_handle_valid(handle));
4669 BUG_ON(e4b->bd_bitmap_page == NULL);
4670 BUG_ON(e4b->bd_buddy_page == NULL);
4671
4672 new_node = &new_entry->efd_node;
4673 cluster = new_entry->efd_start_cluster;
4674
4675 if (!*n) {
4676
4677
4678
4679
4680
4681 get_page(e4b->bd_buddy_page);
4682 get_page(e4b->bd_bitmap_page);
4683 }
4684 while (*n) {
4685 parent = *n;
4686 entry = rb_entry(parent, struct ext4_free_data, efd_node);
4687 if (cluster < entry->efd_start_cluster)
4688 n = &(*n)->rb_left;
4689 else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
4690 n = &(*n)->rb_right;
4691 else {
4692 ext4_grp_locked_error(sb, group, 0,
4693 ext4_group_first_block_no(sb, group) +
4694 EXT4_C2B(sbi, cluster),
4695 "Block already on to-be-freed list");
4696 return 0;
4697 }
4698 }
4699
4700 rb_link_node(new_node, parent, n);
4701 rb_insert_color(new_node, &db->bb_free_root);
4702
4703
4704 node = rb_prev(new_node);
4705 if (node) {
4706 entry = rb_entry(node, struct ext4_free_data, efd_node);
4707 ext4_try_merge_freed_extent(sbi, entry, new_entry,
4708 &(db->bb_free_root));
4709 }
4710
4711 node = rb_next(new_node);
4712 if (node) {
4713 entry = rb_entry(node, struct ext4_free_data, efd_node);
4714 ext4_try_merge_freed_extent(sbi, entry, new_entry,
4715 &(db->bb_free_root));
4716 }
4717
4718 spin_lock(&sbi->s_md_lock);
4719 list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list);
4720 sbi->s_mb_free_pending += clusters;
4721 spin_unlock(&sbi->s_md_lock);
4722 return 0;
4723}
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733void ext4_free_blocks(handle_t *handle, struct inode *inode,
4734 struct buffer_head *bh, ext4_fsblk_t block,
4735 unsigned long count, int flags)
4736{
4737 struct buffer_head *bitmap_bh = NULL;
4738 struct super_block *sb = inode->i_sb;
4739 struct ext4_group_desc *gdp;
4740 unsigned int overflow;
4741 ext4_grpblk_t bit;
4742 struct buffer_head *gd_bh;
4743 ext4_group_t block_group;
4744 struct ext4_sb_info *sbi;
4745 struct ext4_buddy e4b;
4746 unsigned int count_clusters;
4747 int err = 0;
4748 int ret;
4749
4750 might_sleep();
4751 if (bh) {
4752 if (block)
4753 BUG_ON(block != bh->b_blocknr);
4754 else
4755 block = bh->b_blocknr;
4756 }
4757
4758 sbi = EXT4_SB(sb);
4759 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
4760 !ext4_data_block_valid(sbi, block, count)) {
4761 ext4_error(sb, "Freeing blocks not in datazone - "
4762 "block = %llu, count = %lu", block, count);
4763 goto error_return;
4764 }
4765
4766 ext4_debug("freeing block %llu\n", block);
4767 trace_ext4_free_blocks(inode, block, count, flags);
4768
4769 if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
4770 BUG_ON(count > 1);
4771
4772 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
4773 inode, bh, block);
4774 }
4775
4776
4777
4778
4779
4780
4781
4782
4783 overflow = EXT4_PBLK_COFF(sbi, block);
4784 if (overflow) {
4785 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
4786 overflow = sbi->s_cluster_ratio - overflow;
4787 block += overflow;
4788 if (count > overflow)
4789 count -= overflow;
4790 else
4791 return;
4792 } else {
4793 block -= overflow;
4794 count += overflow;
4795 }
4796 }
4797 overflow = EXT4_LBLK_COFF(sbi, count);
4798 if (overflow) {
4799 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
4800 if (count > overflow)
4801 count -= overflow;
4802 else
4803 return;
4804 } else
4805 count += sbi->s_cluster_ratio - overflow;
4806 }
4807
4808 if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
4809 int i;
4810 int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA;
4811
4812 for (i = 0; i < count; i++) {
4813 cond_resched();
4814 if (is_metadata)
4815 bh = sb_find_get_block(inode->i_sb, block + i);
4816 ext4_forget(handle, is_metadata, inode, bh, block + i);
4817 }
4818 }
4819
4820do_more:
4821 overflow = 0;
4822 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4823
4824 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
4825 ext4_get_group_info(sb, block_group))))
4826 return;
4827
4828
4829
4830
4831
4832 if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4833 overflow = EXT4_C2B(sbi, bit) + count -
4834 EXT4_BLOCKS_PER_GROUP(sb);
4835 count -= overflow;
4836 }
4837 count_clusters = EXT4_NUM_B2C(sbi, count);
4838 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4839 if (IS_ERR(bitmap_bh)) {
4840 err = PTR_ERR(bitmap_bh);
4841 bitmap_bh = NULL;
4842 goto error_return;
4843 }
4844 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
4845 if (!gdp) {
4846 err = -EIO;
4847 goto error_return;
4848 }
4849
4850 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4851 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
4852 in_range(block, ext4_inode_table(sb, gdp),
4853 EXT4_SB(sb)->s_itb_per_group) ||
4854 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4855 EXT4_SB(sb)->s_itb_per_group)) {
4856
4857 ext4_error(sb, "Freeing blocks in system zone - "
4858 "Block = %llu, count = %lu", block, count);
4859
4860 goto error_return;
4861 }
4862
4863 BUFFER_TRACE(bitmap_bh, "getting write access");
4864 err = ext4_journal_get_write_access(handle, bitmap_bh);
4865 if (err)
4866 goto error_return;
4867
4868
4869
4870
4871
4872
4873 BUFFER_TRACE(gd_bh, "get_write_access");
4874 err = ext4_journal_get_write_access(handle, gd_bh);
4875 if (err)
4876 goto error_return;
4877#ifdef AGGRESSIVE_CHECK
4878 {
4879 int i;
4880 for (i = 0; i < count_clusters; i++)
4881 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4882 }
4883#endif
4884 trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
4885
4886
4887 err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
4888 GFP_NOFS|__GFP_NOFAIL);
4889 if (err)
4890 goto error_return;
4891
4892
4893
4894
4895
4896
4897
4898 if (ext4_handle_valid(handle) &&
4899 ((flags & EXT4_FREE_BLOCKS_METADATA) ||
4900 !ext4_should_writeback_data(inode))) {
4901 struct ext4_free_data *new_entry;
4902
4903
4904
4905
4906 new_entry = kmem_cache_alloc(ext4_free_data_cachep,
4907 GFP_NOFS|__GFP_NOFAIL);
4908 new_entry->efd_start_cluster = bit;
4909 new_entry->efd_group = block_group;
4910 new_entry->efd_count = count_clusters;
4911 new_entry->efd_tid = handle->h_transaction->t_tid;
4912
4913 ext4_lock_group(sb, block_group);
4914 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4915 ext4_mb_free_metadata(handle, &e4b, new_entry);
4916 } else {
4917
4918
4919
4920
4921 if (test_opt(sb, DISCARD)) {
4922 err = ext4_issue_discard(sb, block_group, bit, count,
4923 NULL);
4924 if (err && err != -EOPNOTSUPP)
4925 ext4_msg(sb, KERN_WARNING, "discard request in"
4926 " group:%d block:%d count:%lu failed"
4927 " with %d", block_group, bit, count,
4928 err);
4929 } else
4930 EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
4931
4932 ext4_lock_group(sb, block_group);
4933 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4934 mb_free_blocks(inode, &e4b, bit, count_clusters);
4935 }
4936
4937 ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
4938 ext4_free_group_clusters_set(sb, gdp, ret);
4939 ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
4940 ext4_group_desc_csum_set(sb, block_group, gdp);
4941 ext4_unlock_group(sb, block_group);
4942
4943 if (sbi->s_log_groups_per_flex) {
4944 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4945 atomic64_add(count_clusters,
4946 &sbi->s_flex_groups[flex_group].free_clusters);
4947 }
4948
4949 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4950 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
4951 percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
4952
4953 ext4_mb_unload_buddy(&e4b);
4954
4955
4956 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4957 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
4958
4959
4960 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
4961 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
4962 if (!err)
4963 err = ret;
4964
4965 if (overflow && !err) {
4966 block += count;
4967 count = overflow;
4968 put_bh(bitmap_bh);
4969 goto do_more;
4970 }
4971error_return:
4972 brelse(bitmap_bh);
4973 ext4_std_error(sb, err);
4974 return;
4975}
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
4987 ext4_fsblk_t block, unsigned long count)
4988{
4989 struct buffer_head *bitmap_bh = NULL;
4990 struct buffer_head *gd_bh;
4991 ext4_group_t block_group;
4992 ext4_grpblk_t bit;
4993 unsigned int i;
4994 struct ext4_group_desc *desc;
4995 struct ext4_sb_info *sbi = EXT4_SB(sb);
4996 struct ext4_buddy e4b;
4997 int err = 0, ret, blk_free_count;
4998 ext4_grpblk_t blocks_freed;
4999
5000 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
5001
5002 if (count == 0)
5003 return 0;
5004
5005 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
5006
5007
5008
5009
5010 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
5011 ext4_warning(sb, "too much blocks added to group %u",
5012 block_group);
5013 err = -EINVAL;
5014 goto error_return;
5015 }
5016
5017 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
5018 if (IS_ERR(bitmap_bh)) {
5019 err = PTR_ERR(bitmap_bh);
5020 bitmap_bh = NULL;
5021 goto error_return;
5022 }
5023
5024 desc = ext4_get_group_desc(sb, block_group, &gd_bh);
5025 if (!desc) {
5026 err = -EIO;
5027 goto error_return;
5028 }
5029
5030 if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
5031 in_range(ext4_inode_bitmap(sb, desc), block, count) ||
5032 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
5033 in_range(block + count - 1, ext4_inode_table(sb, desc),
5034 sbi->s_itb_per_group)) {
5035 ext4_error(sb, "Adding blocks in system zones - "
5036 "Block = %llu, count = %lu",
5037 block, count);
5038 err = -EINVAL;
5039 goto error_return;
5040 }
5041
5042 BUFFER_TRACE(bitmap_bh, "getting write access");
5043 err = ext4_journal_get_write_access(handle, bitmap_bh);
5044 if (err)
5045 goto error_return;
5046
5047
5048
5049
5050
5051
5052 BUFFER_TRACE(gd_bh, "get_write_access");
5053 err = ext4_journal_get_write_access(handle, gd_bh);
5054 if (err)
5055 goto error_return;
5056
5057 for (i = 0, blocks_freed = 0; i < count; i++) {
5058 BUFFER_TRACE(bitmap_bh, "clear bit");
5059 if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
5060 ext4_error(sb, "bit already cleared for block %llu",
5061 (ext4_fsblk_t)(block + i));
5062 BUFFER_TRACE(bitmap_bh, "bit already cleared");
5063 } else {
5064 blocks_freed++;
5065 }
5066 }
5067
5068 err = ext4_mb_load_buddy(sb, block_group, &e4b);
5069 if (err)
5070 goto error_return;
5071
5072
5073
5074
5075
5076
5077 ext4_lock_group(sb, block_group);
5078 mb_clear_bits(bitmap_bh->b_data, bit, count);
5079 mb_free_blocks(NULL, &e4b, bit, count);
5080 blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
5081 ext4_free_group_clusters_set(sb, desc, blk_free_count);
5082 ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
5083 ext4_group_desc_csum_set(sb, block_group, desc);
5084 ext4_unlock_group(sb, block_group);
5085 percpu_counter_add(&sbi->s_freeclusters_counter,
5086 EXT4_NUM_B2C(sbi, blocks_freed));
5087
5088 if (sbi->s_log_groups_per_flex) {
5089 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
5090 atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed),
5091 &sbi->s_flex_groups[flex_group].free_clusters);
5092 }
5093
5094 ext4_mb_unload_buddy(&e4b);
5095
5096
5097 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
5098 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
5099
5100
5101 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
5102 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
5103 if (!err)
5104 err = ret;
5105
5106error_return:
5107 brelse(bitmap_bh);
5108 ext4_std_error(sb, err);
5109 return err;
5110}
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124static int ext4_trim_extent(struct super_block *sb, int start, int count,
5125 ext4_group_t group, struct ext4_buddy *e4b)
5126__releases(bitlock)
5127__acquires(bitlock)
5128{
5129 struct ext4_free_extent ex;
5130 int ret = 0;
5131
5132 trace_ext4_trim_extent(sb, group, start, count);
5133
5134 assert_spin_locked(ext4_group_lock_ptr(sb, group));
5135
5136 ex.fe_start = start;
5137 ex.fe_group = group;
5138 ex.fe_len = count;
5139
5140
5141
5142
5143
5144 mb_mark_used(e4b, &ex);
5145 ext4_unlock_group(sb, group);
5146 ret = ext4_issue_discard(sb, group, start, count, NULL);
5147 ext4_lock_group(sb, group);
5148 mb_free_blocks(NULL, e4b, start, ex.fe_len);
5149 return ret;
5150}
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170static ext4_grpblk_t
5171ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
5172 ext4_grpblk_t start, ext4_grpblk_t max,
5173 ext4_grpblk_t minblocks)
5174{
5175 void *bitmap;
5176 ext4_grpblk_t next, count = 0, free_count = 0;
5177 struct ext4_buddy e4b;
5178 int ret = 0;
5179
5180 trace_ext4_trim_all_free(sb, group, start, max);
5181
5182 ret = ext4_mb_load_buddy(sb, group, &e4b);
5183 if (ret) {
5184 ext4_warning(sb, "Error %d loading buddy information for %u",
5185 ret, group);
5186 return ret;
5187 }
5188 bitmap = e4b.bd_bitmap;
5189
5190 ext4_lock_group(sb, group);
5191 if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
5192 minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
5193 goto out;
5194
5195 start = (e4b.bd_info->bb_first_free > start) ?
5196 e4b.bd_info->bb_first_free : start;
5197
5198 while (start <= max) {
5199 start = mb_find_next_zero_bit(bitmap, max + 1, start);
5200 if (start > max)
5201 break;
5202 next = mb_find_next_bit(bitmap, max + 1, start);
5203
5204 if ((next - start) >= minblocks) {
5205 ret = ext4_trim_extent(sb, start,
5206 next - start, group, &e4b);
5207 if (ret && ret != -EOPNOTSUPP)
5208 break;
5209 ret = 0;
5210 count += next - start;
5211 }
5212 free_count += next - start;
5213 start = next + 1;
5214
5215 if (fatal_signal_pending(current)) {
5216 count = -ERESTARTSYS;
5217 break;
5218 }
5219
5220 if (need_resched()) {
5221 ext4_unlock_group(sb, group);
5222 cond_resched();
5223 ext4_lock_group(sb, group);
5224 }
5225
5226 if ((e4b.bd_info->bb_free - free_count) < minblocks)
5227 break;
5228 }
5229
5230 if (!ret) {
5231 ret = count;
5232 EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
5233 }
5234out:
5235 ext4_unlock_group(sb, group);
5236 ext4_mb_unload_buddy(&e4b);
5237
5238 ext4_debug("trimmed %d blocks in the group %d\n",
5239 count, group);
5240
5241 return ret;
5242}
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
5257{
5258 struct ext4_group_info *grp;
5259 ext4_group_t group, first_group, last_group;
5260 ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
5261 uint64_t start, end, minlen, trimmed = 0;
5262 ext4_fsblk_t first_data_blk =
5263 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
5264 ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
5265 int ret = 0;
5266
5267 start = range->start >> sb->s_blocksize_bits;
5268 end = start + (range->len >> sb->s_blocksize_bits) - 1;
5269 minlen = EXT4_NUM_B2C(EXT4_SB(sb),
5270 range->minlen >> sb->s_blocksize_bits);
5271
5272 if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) ||
5273 start >= max_blks ||
5274 range->len < sb->s_blocksize)
5275 return -EINVAL;
5276 if (end >= max_blks)
5277 end = max_blks - 1;
5278 if (end <= first_data_blk)
5279 goto out;
5280 if (start < first_data_blk)
5281 start = first_data_blk;
5282
5283
5284 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
5285 &first_group, &first_cluster);
5286 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end,
5287 &last_group, &last_cluster);
5288
5289
5290 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
5291
5292 for (group = first_group; group <= last_group; group++) {
5293 grp = ext4_get_group_info(sb, group);
5294
5295 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
5296 ret = ext4_mb_init_group(sb, group, GFP_NOFS);
5297 if (ret)
5298 break;
5299 }
5300
5301
5302
5303
5304
5305
5306
5307 if (group == last_group)
5308 end = last_cluster;
5309
5310 if (grp->bb_free >= minlen) {
5311 cnt = ext4_trim_all_free(sb, group, first_cluster,
5312 end, minlen);
5313 if (cnt < 0) {
5314 ret = cnt;
5315 break;
5316 }
5317 trimmed += cnt;
5318 }
5319
5320
5321
5322
5323
5324 first_cluster = 0;
5325 }
5326
5327 if (!ret)
5328 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
5329
5330out:
5331 range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
5332 return ret;
5333}
5334
5335
5336int
5337ext4_mballoc_query_range(
5338 struct super_block *sb,
5339 ext4_group_t group,
5340 ext4_grpblk_t start,
5341 ext4_grpblk_t end,
5342 ext4_mballoc_query_range_fn formatter,
5343 void *priv)
5344{
5345 void *bitmap;
5346 ext4_grpblk_t next;
5347 struct ext4_buddy e4b;
5348 int error;
5349
5350 error = ext4_mb_load_buddy(sb, group, &e4b);
5351 if (error)
5352 return error;
5353 bitmap = e4b.bd_bitmap;
5354
5355 ext4_lock_group(sb, group);
5356
5357 start = (e4b.bd_info->bb_first_free > start) ?
5358 e4b.bd_info->bb_first_free : start;
5359 if (end >= EXT4_CLUSTERS_PER_GROUP(sb))
5360 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
5361
5362 while (start <= end) {
5363 start = mb_find_next_zero_bit(bitmap, end + 1, start);
5364 if (start > end)
5365 break;
5366 next = mb_find_next_bit(bitmap, end + 1, start);
5367
5368 ext4_unlock_group(sb, group);
5369 error = formatter(sb, group, start, next - start, priv);
5370 if (error)
5371 goto out_unload;
5372 ext4_lock_group(sb, group);
5373
5374 start = next + 1;
5375 }
5376
5377 ext4_unlock_group(sb, group);
5378out_unload:
5379 ext4_mb_unload_buddy(&e4b);
5380
5381 return error;
5382}
5383