1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include "ext4_jbd2.h"
25#include "mballoc.h"
26#include <linux/log2.h>
27#include <linux/module.h>
28#include <linux/slab.h>
29#include <linux/nospec.h>
30#include <trace/events/ext4.h>
31
32#ifdef CONFIG_EXT4_DEBUG
33ushort ext4_mballoc_debug __read_mostly;
34
35module_param_named(mballoc_debug, ext4_mballoc_debug, ushort, 0644);
36MODULE_PARM_DESC(mballoc_debug, "Debugging level for ext4's mballoc");
37#endif
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350static struct kmem_cache *ext4_pspace_cachep;
351static struct kmem_cache *ext4_ac_cachep;
352static struct kmem_cache *ext4_free_data_cachep;
353
354
355
356
357#define NR_GRPINFO_CACHES 8
358static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
359
360static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
361 "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
362 "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
363 "ext4_groupinfo_64k", "ext4_groupinfo_128k"
364};
365
366static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
367 ext4_group_t group);
368static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
369 ext4_group_t group);
370static void ext4_free_data_callback(struct super_block *sb,
371 struct ext4_journal_cb_entry *jce, int rc);
372
373static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
374{
375#if BITS_PER_LONG == 64
376 *bit += ((unsigned long) addr & 7UL) << 3;
377 addr = (void *) ((unsigned long) addr & ~7UL);
378#elif BITS_PER_LONG == 32
379 *bit += ((unsigned long) addr & 3UL) << 3;
380 addr = (void *) ((unsigned long) addr & ~3UL);
381#else
382#error "how many bits you are?!"
383#endif
384 return addr;
385}
386
387static inline int mb_test_bit(int bit, void *addr)
388{
389
390
391
392
393 addr = mb_correct_addr_and_bit(&bit, addr);
394 return ext4_test_bit(bit, addr);
395}
396
397static inline void mb_set_bit(int bit, void *addr)
398{
399 addr = mb_correct_addr_and_bit(&bit, addr);
400 ext4_set_bit(bit, addr);
401}
402
403static inline void mb_clear_bit(int bit, void *addr)
404{
405 addr = mb_correct_addr_and_bit(&bit, addr);
406 ext4_clear_bit(bit, addr);
407}
408
409static inline int mb_test_and_clear_bit(int bit, void *addr)
410{
411 addr = mb_correct_addr_and_bit(&bit, addr);
412 return ext4_test_and_clear_bit(bit, addr);
413}
414
415static inline int mb_find_next_zero_bit(void *addr, int max, int start)
416{
417 int fix = 0, ret, tmpmax;
418 addr = mb_correct_addr_and_bit(&fix, addr);
419 tmpmax = max + fix;
420 start += fix;
421
422 ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
423 if (ret > max)
424 return max;
425 return ret;
426}
427
428static inline int mb_find_next_bit(void *addr, int max, int start)
429{
430 int fix = 0, ret, tmpmax;
431 addr = mb_correct_addr_and_bit(&fix, addr);
432 tmpmax = max + fix;
433 start += fix;
434
435 ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
436 if (ret > max)
437 return max;
438 return ret;
439}
440
441static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
442{
443 char *bb;
444
445 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
446 BUG_ON(max == NULL);
447
448 if (order > e4b->bd_blkbits + 1) {
449 *max = 0;
450 return NULL;
451 }
452
453
454 if (order == 0) {
455 *max = 1 << (e4b->bd_blkbits + 3);
456 return e4b->bd_bitmap;
457 }
458
459 bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
460 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
461
462 return bb;
463}
464
465#ifdef DOUBLE_CHECK
466static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
467 int first, int count)
468{
469 int i;
470 struct super_block *sb = e4b->bd_sb;
471
472 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
473 return;
474 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
475 for (i = 0; i < count; i++) {
476 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
477 ext4_fsblk_t blocknr;
478
479 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
480 blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
481 ext4_grp_locked_error(sb, e4b->bd_group,
482 inode ? inode->i_ino : 0,
483 blocknr,
484 "freeing block already freed "
485 "(bit %u)",
486 first + i);
487 }
488 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
489 }
490}
491
492static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
493{
494 int i;
495
496 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
497 return;
498 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
499 for (i = 0; i < count; i++) {
500 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
501 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
502 }
503}
504
505static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
506{
507 if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
508 unsigned char *b1, *b2;
509 int i;
510 b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
511 b2 = (unsigned char *) bitmap;
512 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
513 if (b1[i] != b2[i]) {
514 ext4_msg(e4b->bd_sb, KERN_ERR,
515 "corruption in group %u "
516 "at byte %u(%u): %x in copy != %x "
517 "on disk/prealloc",
518 e4b->bd_group, i, i * 8, b1[i], b2[i]);
519 BUG();
520 }
521 }
522 }
523}
524
525#else
526static inline void mb_free_blocks_double(struct inode *inode,
527 struct ext4_buddy *e4b, int first, int count)
528{
529 return;
530}
531static inline void mb_mark_used_double(struct ext4_buddy *e4b,
532 int first, int count)
533{
534 return;
535}
536static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
537{
538 return;
539}
540#endif
541
542#ifdef AGGRESSIVE_CHECK
543
544#define MB_CHECK_ASSERT(assert) \
545do { \
546 if (!(assert)) { \
547 printk(KERN_EMERG \
548 "Assertion failure in %s() at %s:%d: \"%s\"\n", \
549 function, file, line, # assert); \
550 BUG(); \
551 } \
552} while (0)
553
554static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
555 const char *function, int line)
556{
557 struct super_block *sb = e4b->bd_sb;
558 int order = e4b->bd_blkbits + 1;
559 int max;
560 int max2;
561 int i;
562 int j;
563 int k;
564 int count;
565 struct ext4_group_info *grp;
566 int fragments = 0;
567 int fstart;
568 struct list_head *cur;
569 void *buddy;
570 void *buddy2;
571
572 {
573 static int mb_check_counter;
574 if (mb_check_counter++ % 100 != 0)
575 return 0;
576 }
577
578 while (order > 1) {
579 buddy = mb_find_buddy(e4b, order, &max);
580 MB_CHECK_ASSERT(buddy);
581 buddy2 = mb_find_buddy(e4b, order - 1, &max2);
582 MB_CHECK_ASSERT(buddy2);
583 MB_CHECK_ASSERT(buddy != buddy2);
584 MB_CHECK_ASSERT(max * 2 == max2);
585
586 count = 0;
587 for (i = 0; i < max; i++) {
588
589 if (mb_test_bit(i, buddy)) {
590
591 if (!mb_test_bit(i << 1, buddy2)) {
592 MB_CHECK_ASSERT(
593 mb_test_bit((i<<1)+1, buddy2));
594 } else if (!mb_test_bit((i << 1) + 1, buddy2)) {
595 MB_CHECK_ASSERT(
596 mb_test_bit(i << 1, buddy2));
597 }
598 continue;
599 }
600
601
602 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
603 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
604
605 for (j = 0; j < (1 << order); j++) {
606 k = (i * (1 << order)) + j;
607 MB_CHECK_ASSERT(
608 !mb_test_bit(k, e4b->bd_bitmap));
609 }
610 count++;
611 }
612 MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
613 order--;
614 }
615
616 fstart = -1;
617 buddy = mb_find_buddy(e4b, 0, &max);
618 for (i = 0; i < max; i++) {
619 if (!mb_test_bit(i, buddy)) {
620 MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
621 if (fstart == -1) {
622 fragments++;
623 fstart = i;
624 }
625 continue;
626 }
627 fstart = -1;
628
629 for (j = 0; j < e4b->bd_blkbits + 1; j++) {
630 buddy2 = mb_find_buddy(e4b, j, &max2);
631 k = i >> j;
632 MB_CHECK_ASSERT(k < max2);
633 MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
634 }
635 }
636 MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
637 MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
638
639 grp = ext4_get_group_info(sb, e4b->bd_group);
640 list_for_each(cur, &grp->bb_prealloc_list) {
641 ext4_group_t groupnr;
642 struct ext4_prealloc_space *pa;
643 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
644 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
645 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
646 for (i = 0; i < pa->pa_len; i++)
647 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
648 }
649 return 0;
650}
651#undef MB_CHECK_ASSERT
652#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
653 __FILE__, __func__, __LINE__)
654#else
655#define mb_check_buddy(e4b)
656#endif
657
658
659
660
661
662
663
664static void ext4_mb_mark_free_simple(struct super_block *sb,
665 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
666 struct ext4_group_info *grp)
667{
668 struct ext4_sb_info *sbi = EXT4_SB(sb);
669 ext4_grpblk_t min;
670 ext4_grpblk_t max;
671 ext4_grpblk_t chunk;
672 unsigned int border;
673
674 BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
675
676 border = 2 << sb->s_blocksize_bits;
677
678 while (len > 0) {
679
680 max = ffs(first | border) - 1;
681
682
683 min = fls(len) - 1;
684
685 if (max < min)
686 min = max;
687 chunk = 1 << min;
688
689
690 grp->bb_counters[min]++;
691 if (min > 0)
692 mb_clear_bit(first >> min,
693 buddy + sbi->s_mb_offsets[min]);
694
695 len -= chunk;
696 first += chunk;
697 }
698}
699
700
701
702
703
704static void
705mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
706{
707 int i;
708 int bits;
709
710 grp->bb_largest_free_order = -1;
711
712 bits = sb->s_blocksize_bits + 1;
713 for (i = bits; i >= 0; i--) {
714 if (grp->bb_counters[i] > 0) {
715 grp->bb_largest_free_order = i;
716 break;
717 }
718 }
719}
720
721static noinline_for_stack
722void ext4_mb_generate_buddy(struct super_block *sb,
723 void *buddy, void *bitmap, ext4_group_t group)
724{
725 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
726 struct ext4_sb_info *sbi = EXT4_SB(sb);
727 ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
728 ext4_grpblk_t i = 0;
729 ext4_grpblk_t first;
730 ext4_grpblk_t len;
731 unsigned free = 0;
732 unsigned fragments = 0;
733 unsigned long long period = get_cycles();
734
735
736
737 i = mb_find_next_zero_bit(bitmap, max, 0);
738 grp->bb_first_free = i;
739 while (i < max) {
740 fragments++;
741 first = i;
742 i = mb_find_next_bit(bitmap, max, i);
743 len = i - first;
744 free += len;
745 if (len > 1)
746 ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
747 else
748 grp->bb_counters[0]++;
749 if (i < max)
750 i = mb_find_next_zero_bit(bitmap, max, i);
751 }
752 grp->bb_fragments = fragments;
753
754 if (free != grp->bb_free) {
755 ext4_grp_locked_error(sb, group, 0, 0,
756 "block bitmap and bg descriptor "
757 "inconsistent: %u vs %u free clusters",
758 free, grp->bb_free);
759
760
761
762
763 grp->bb_free = free;
764 if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
765 percpu_counter_sub(&sbi->s_freeclusters_counter,
766 grp->bb_free);
767 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
768 }
769 mb_set_largest_free_order(sb, grp);
770
771 clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
772
773 period = get_cycles() - period;
774 spin_lock(&EXT4_SB(sb)->s_bal_lock);
775 EXT4_SB(sb)->s_mb_buddies_generated++;
776 EXT4_SB(sb)->s_mb_generation_time += period;
777 spin_unlock(&EXT4_SB(sb)->s_bal_lock);
778}
779
780static void mb_regenerate_buddy(struct ext4_buddy *e4b)
781{
782 int count;
783 int order = 1;
784 void *buddy;
785
786 while ((buddy = mb_find_buddy(e4b, order++, &count))) {
787 ext4_set_bits(buddy, 0, count);
788 }
789 e4b->bd_info->bb_fragments = 0;
790 memset(e4b->bd_info->bb_counters, 0,
791 sizeof(*e4b->bd_info->bb_counters) *
792 (e4b->bd_sb->s_blocksize_bits + 2));
793
794 ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
795 e4b->bd_bitmap, e4b->bd_group);
796}
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818static int ext4_mb_init_cache(struct page *page, char *incore)
819{
820 ext4_group_t ngroups;
821 int blocksize;
822 int blocks_per_page;
823 int groups_per_page;
824 int err = 0;
825 int i;
826 ext4_group_t first_group, group;
827 int first_block;
828 struct super_block *sb;
829 struct buffer_head *bhs;
830 struct buffer_head **bh = NULL;
831 struct inode *inode;
832 char *data;
833 char *bitmap;
834 struct ext4_group_info *grinfo;
835
836 mb_debug(1, "init page %lu\n", page->index);
837
838 inode = page->mapping->host;
839 sb = inode->i_sb;
840 ngroups = ext4_get_groups_count(sb);
841 blocksize = 1 << inode->i_blkbits;
842 blocks_per_page = PAGE_CACHE_SIZE / blocksize;
843
844 groups_per_page = blocks_per_page >> 1;
845 if (groups_per_page == 0)
846 groups_per_page = 1;
847
848
849 if (groups_per_page > 1) {
850 i = sizeof(struct buffer_head *) * groups_per_page;
851 bh = kzalloc(i, GFP_NOFS);
852 if (bh == NULL) {
853 err = -ENOMEM;
854 goto out;
855 }
856 } else
857 bh = &bhs;
858
859 first_group = page->index * blocks_per_page / 2;
860
861
862 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
863 if (group >= ngroups)
864 break;
865
866 grinfo = ext4_get_group_info(sb, group);
867
868
869
870
871
872
873 if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
874 bh[i] = NULL;
875 continue;
876 }
877 if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group))) {
878 err = -ENOMEM;
879 goto out;
880 }
881 mb_debug(1, "read bitmap for group %u\n", group);
882 }
883
884
885 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
886 if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i]))
887 err = -EIO;
888 }
889
890 first_block = page->index * blocks_per_page;
891 for (i = 0; i < blocks_per_page; i++) {
892 group = (first_block + i) >> 1;
893 if (group >= ngroups)
894 break;
895
896 if (!bh[group - first_group])
897
898 continue;
899
900 if (!buffer_verified(bh[group - first_group]))
901
902 continue;
903 err = 0;
904
905
906
907
908
909
910
911 data = page_address(page) + (i * blocksize);
912 bitmap = bh[group - first_group]->b_data;
913
914
915
916
917
918 if ((first_block + i) & 1) {
919
920 BUG_ON(incore == NULL);
921 mb_debug(1, "put buddy for group %u in page %lu/%x\n",
922 group, page->index, i * blocksize);
923 trace_ext4_mb_buddy_bitmap_load(sb, group);
924 grinfo = ext4_get_group_info(sb, group);
925 grinfo->bb_fragments = 0;
926 memset(grinfo->bb_counters, 0,
927 sizeof(*grinfo->bb_counters) *
928 (sb->s_blocksize_bits+2));
929
930
931
932 ext4_lock_group(sb, group);
933
934 memset(data, 0xff, blocksize);
935 ext4_mb_generate_buddy(sb, data, incore, group);
936 ext4_unlock_group(sb, group);
937 incore = NULL;
938 } else {
939
940 BUG_ON(incore != NULL);
941 mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
942 group, page->index, i * blocksize);
943 trace_ext4_mb_bitmap_load(sb, group);
944
945
946 ext4_lock_group(sb, group);
947 memcpy(data, bitmap, blocksize);
948
949
950 ext4_mb_generate_from_pa(sb, data, group);
951 ext4_mb_generate_from_freelist(sb, data, group);
952 ext4_unlock_group(sb, group);
953
954
955
956
957 incore = data;
958 }
959 }
960 SetPageUptodate(page);
961
962out:
963 if (bh) {
964 for (i = 0; i < groups_per_page; i++)
965 brelse(bh[i]);
966 if (bh != &bhs)
967 kfree(bh);
968 }
969 return err;
970}
971
972
973
974
975
976
977
978static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
979 ext4_group_t group, struct ext4_buddy *e4b)
980{
981 struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
982 int block, pnum, poff;
983 int blocks_per_page;
984 struct page *page;
985
986 e4b->bd_buddy_page = NULL;
987 e4b->bd_bitmap_page = NULL;
988
989 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
990
991
992
993
994
995 block = group * 2;
996 pnum = block / blocks_per_page;
997 poff = block % blocks_per_page;
998 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
999 if (!page)
1000 return -ENOMEM;
1001 BUG_ON(page->mapping != inode->i_mapping);
1002 e4b->bd_bitmap_page = page;
1003 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1004
1005 if (blocks_per_page >= 2) {
1006
1007 return 0;
1008 }
1009
1010 block++;
1011 pnum = block / blocks_per_page;
1012 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1013 if (!page)
1014 return -ENOMEM;
1015 BUG_ON(page->mapping != inode->i_mapping);
1016 e4b->bd_buddy_page = page;
1017 return 0;
1018}
1019
1020static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
1021{
1022 if (e4b->bd_bitmap_page) {
1023 unlock_page(e4b->bd_bitmap_page);
1024 page_cache_release(e4b->bd_bitmap_page);
1025 }
1026 if (e4b->bd_buddy_page) {
1027 unlock_page(e4b->bd_buddy_page);
1028 page_cache_release(e4b->bd_buddy_page);
1029 }
1030}
1031
1032
1033
1034
1035
1036
1037static noinline_for_stack
1038int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
1039{
1040
1041 struct ext4_group_info *this_grp;
1042 struct ext4_buddy e4b;
1043 struct page *page;
1044 int ret = 0;
1045
1046 might_sleep();
1047 mb_debug(1, "init group %u\n", group);
1048 this_grp = ext4_get_group_info(sb, group);
1049
1050
1051
1052
1053
1054
1055
1056 ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
1057 if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
1058
1059
1060
1061
1062 goto err;
1063 }
1064
1065 page = e4b.bd_bitmap_page;
1066 ret = ext4_mb_init_cache(page, NULL);
1067 if (ret)
1068 goto err;
1069 if (!PageUptodate(page)) {
1070 ret = -EIO;
1071 goto err;
1072 }
1073 mark_page_accessed(page);
1074
1075 if (e4b.bd_buddy_page == NULL) {
1076
1077
1078
1079
1080
1081 ret = 0;
1082 goto err;
1083 }
1084
1085 page = e4b.bd_buddy_page;
1086 ret = ext4_mb_init_cache(page, e4b.bd_bitmap);
1087 if (ret)
1088 goto err;
1089 if (!PageUptodate(page)) {
1090 ret = -EIO;
1091 goto err;
1092 }
1093 mark_page_accessed(page);
1094err:
1095 ext4_mb_put_buddy_page_lock(&e4b);
1096 return ret;
1097}
1098
1099
1100
1101
1102
1103
1104static noinline_for_stack int
1105ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1106 struct ext4_buddy *e4b)
1107{
1108 int blocks_per_page;
1109 int block;
1110 int pnum;
1111 int poff;
1112 struct page *page;
1113 int ret;
1114 struct ext4_group_info *grp;
1115 struct ext4_sb_info *sbi = EXT4_SB(sb);
1116 struct inode *inode = sbi->s_buddy_cache;
1117
1118 might_sleep();
1119 mb_debug(1, "load group %u\n", group);
1120
1121 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1122 grp = ext4_get_group_info(sb, group);
1123
1124 e4b->bd_blkbits = sb->s_blocksize_bits;
1125 e4b->bd_info = grp;
1126 e4b->bd_sb = sb;
1127 e4b->bd_group = group;
1128 e4b->bd_buddy_page = NULL;
1129 e4b->bd_bitmap_page = NULL;
1130
1131 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1132
1133
1134
1135
1136 ret = ext4_mb_init_group(sb, group);
1137 if (ret)
1138 return ret;
1139 }
1140
1141
1142
1143
1144
1145
1146 block = group * 2;
1147 pnum = block / blocks_per_page;
1148 poff = block % blocks_per_page;
1149
1150
1151
1152 page = find_get_page(inode->i_mapping, pnum);
1153 if (page == NULL || !PageUptodate(page)) {
1154 if (page)
1155
1156
1157
1158
1159
1160
1161
1162
1163 page_cache_release(page);
1164 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1165 if (page) {
1166 BUG_ON(page->mapping != inode->i_mapping);
1167 if (!PageUptodate(page)) {
1168 ret = ext4_mb_init_cache(page, NULL);
1169 if (ret) {
1170 unlock_page(page);
1171 goto err;
1172 }
1173 mb_cmp_bitmaps(e4b, page_address(page) +
1174 (poff * sb->s_blocksize));
1175 }
1176 unlock_page(page);
1177 }
1178 }
1179 if (page == NULL) {
1180 ret = -ENOMEM;
1181 goto err;
1182 }
1183 if (!PageUptodate(page)) {
1184 ret = -EIO;
1185 goto err;
1186 }
1187 e4b->bd_bitmap_page = page;
1188 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1189 mark_page_accessed(page);
1190
1191 block++;
1192 pnum = block / blocks_per_page;
1193 poff = block % blocks_per_page;
1194
1195 page = find_get_page(inode->i_mapping, pnum);
1196 if (page == NULL || !PageUptodate(page)) {
1197 if (page)
1198 page_cache_release(page);
1199 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1200 if (page) {
1201 BUG_ON(page->mapping != inode->i_mapping);
1202 if (!PageUptodate(page)) {
1203 ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
1204 if (ret) {
1205 unlock_page(page);
1206 goto err;
1207 }
1208 }
1209 unlock_page(page);
1210 }
1211 }
1212 if (page == NULL) {
1213 ret = -ENOMEM;
1214 goto err;
1215 }
1216 if (!PageUptodate(page)) {
1217 ret = -EIO;
1218 goto err;
1219 }
1220 e4b->bd_buddy_page = page;
1221 e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
1222 mark_page_accessed(page);
1223
1224 BUG_ON(e4b->bd_bitmap_page == NULL);
1225 BUG_ON(e4b->bd_buddy_page == NULL);
1226
1227 return 0;
1228
1229err:
1230 if (page)
1231 page_cache_release(page);
1232 if (e4b->bd_bitmap_page)
1233 page_cache_release(e4b->bd_bitmap_page);
1234 if (e4b->bd_buddy_page)
1235 page_cache_release(e4b->bd_buddy_page);
1236 e4b->bd_buddy = NULL;
1237 e4b->bd_bitmap = NULL;
1238 return ret;
1239}
1240
1241static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
1242{
1243 if (e4b->bd_bitmap_page)
1244 page_cache_release(e4b->bd_bitmap_page);
1245 if (e4b->bd_buddy_page)
1246 page_cache_release(e4b->bd_buddy_page);
1247}
1248
1249
1250static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1251{
1252 int order = 1;
1253 int bb_incr = 1 << (e4b->bd_blkbits - 1);
1254 void *bb;
1255
1256 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
1257 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
1258
1259 bb = e4b->bd_buddy;
1260 while (order <= e4b->bd_blkbits + 1) {
1261 block = block >> 1;
1262 if (!mb_test_bit(block, bb)) {
1263
1264 return order;
1265 }
1266 bb += bb_incr;
1267 bb_incr >>= 1;
1268 order++;
1269 }
1270 return 0;
1271}
1272
1273static void mb_clear_bits(void *bm, int cur, int len)
1274{
1275 __u32 *addr;
1276
1277 len = cur + len;
1278 while (cur < len) {
1279 if ((cur & 31) == 0 && (len - cur) >= 32) {
1280
1281 addr = bm + (cur >> 3);
1282 *addr = 0;
1283 cur += 32;
1284 continue;
1285 }
1286 mb_clear_bit(cur, bm);
1287 cur++;
1288 }
1289}
1290
1291
1292
1293
1294static int mb_test_and_clear_bits(void *bm, int cur, int len)
1295{
1296 __u32 *addr;
1297 int zero_bit = -1;
1298
1299 len = cur + len;
1300 while (cur < len) {
1301 if ((cur & 31) == 0 && (len - cur) >= 32) {
1302
1303 addr = bm + (cur >> 3);
1304 if (*addr != (__u32)(-1) && zero_bit == -1)
1305 zero_bit = cur + mb_find_next_zero_bit(addr, 32, 0);
1306 *addr = 0;
1307 cur += 32;
1308 continue;
1309 }
1310 if (!mb_test_and_clear_bit(cur, bm) && zero_bit == -1)
1311 zero_bit = cur;
1312 cur++;
1313 }
1314
1315 return zero_bit;
1316}
1317
1318void ext4_set_bits(void *bm, int cur, int len)
1319{
1320 __u32 *addr;
1321
1322 len = cur + len;
1323 while (cur < len) {
1324 if ((cur & 31) == 0 && (len - cur) >= 32) {
1325
1326 addr = bm + (cur >> 3);
1327 *addr = 0xffffffff;
1328 cur += 32;
1329 continue;
1330 }
1331 mb_set_bit(cur, bm);
1332 cur++;
1333 }
1334}
1335
1336
1337
1338
1339static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side)
1340{
1341 if (mb_test_bit(*bit + side, bitmap)) {
1342 mb_clear_bit(*bit, bitmap);
1343 (*bit) -= side;
1344 return 1;
1345 }
1346 else {
1347 (*bit) += side;
1348 mb_set_bit(*bit, bitmap);
1349 return -1;
1350 }
1351}
1352
1353static void mb_buddy_mark_free(struct ext4_buddy *e4b, int first, int last)
1354{
1355 int max;
1356 int order = 1;
1357 void *buddy = mb_find_buddy(e4b, order, &max);
1358
1359 while (buddy) {
1360 void *buddy2;
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391 if (first & 1)
1392 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&first, buddy, -1);
1393 if (!(last & 1))
1394 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&last, buddy, 1);
1395 if (first > last)
1396 break;
1397 order++;
1398
1399 if (first == last || !(buddy2 = mb_find_buddy(e4b, order, &max))) {
1400 mb_clear_bits(buddy, first, last - first + 1);
1401 e4b->bd_info->bb_counters[order - 1] += last - first + 1;
1402 break;
1403 }
1404 first >>= 1;
1405 last >>= 1;
1406 buddy = buddy2;
1407 }
1408}
1409
1410static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1411 int first, int count)
1412{
1413 int left_is_free = 0;
1414 int right_is_free = 0;
1415 int block;
1416 int last = first + count - 1;
1417 struct super_block *sb = e4b->bd_sb;
1418
1419 if (WARN_ON(count == 0))
1420 return;
1421 BUG_ON(last >= (sb->s_blocksize << 3));
1422 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1423
1424 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
1425 return;
1426
1427 mb_check_buddy(e4b);
1428 mb_free_blocks_double(inode, e4b, first, count);
1429
1430 e4b->bd_info->bb_free += count;
1431 if (first < e4b->bd_info->bb_first_free)
1432 e4b->bd_info->bb_first_free = first;
1433
1434
1435
1436
1437 if (first != 0)
1438 left_is_free = !mb_test_bit(first - 1, e4b->bd_bitmap);
1439 block = mb_test_and_clear_bits(e4b->bd_bitmap, first, count);
1440 if (last + 1 < EXT4_SB(sb)->s_mb_maxs[0])
1441 right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
1442
1443 if (unlikely(block != -1)) {
1444 struct ext4_sb_info *sbi = EXT4_SB(sb);
1445 ext4_fsblk_t blocknr;
1446
1447 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1448 blocknr += EXT4_C2B(EXT4_SB(sb), block);
1449 ext4_grp_locked_error(sb, e4b->bd_group,
1450 inode ? inode->i_ino : 0,
1451 blocknr,
1452 "freeing already freed block "
1453 "(bit %u); block bitmap corrupt.",
1454 block);
1455 if (!EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))
1456 percpu_counter_sub(&sbi->s_freeclusters_counter,
1457 e4b->bd_info->bb_free);
1458
1459 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
1460 &e4b->bd_info->bb_state);
1461 mb_regenerate_buddy(e4b);
1462 goto done;
1463 }
1464
1465
1466 if (left_is_free && right_is_free)
1467 e4b->bd_info->bb_fragments--;
1468 else if (!left_is_free && !right_is_free)
1469 e4b->bd_info->bb_fragments++;
1470
1471
1472
1473
1474
1475
1476
1477 if (first & 1) {
1478 first += !left_is_free;
1479 e4b->bd_info->bb_counters[0] += left_is_free ? -1 : 1;
1480 }
1481 if (!(last & 1)) {
1482 last -= !right_is_free;
1483 e4b->bd_info->bb_counters[0] += right_is_free ? -1 : 1;
1484 }
1485
1486 if (first <= last)
1487 mb_buddy_mark_free(e4b, first >> 1, last >> 1);
1488
1489done:
1490 mb_set_largest_free_order(sb, e4b->bd_info);
1491 mb_check_buddy(e4b);
1492}
1493
1494static int mb_find_extent(struct ext4_buddy *e4b, int block,
1495 int needed, struct ext4_free_extent *ex)
1496{
1497 int next = block;
1498 int max, order;
1499 void *buddy;
1500
1501 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1502 BUG_ON(ex == NULL);
1503
1504 buddy = mb_find_buddy(e4b, 0, &max);
1505 BUG_ON(buddy == NULL);
1506 BUG_ON(block >= max);
1507 if (mb_test_bit(block, buddy)) {
1508 ex->fe_len = 0;
1509 ex->fe_start = 0;
1510 ex->fe_group = 0;
1511 return 0;
1512 }
1513
1514
1515 order = mb_find_order_for_block(e4b, block);
1516 block = block >> order;
1517
1518 ex->fe_len = 1 << order;
1519 ex->fe_start = block << order;
1520 ex->fe_group = e4b->bd_group;
1521
1522
1523 next = next - ex->fe_start;
1524 ex->fe_len -= next;
1525 ex->fe_start += next;
1526
1527 while (needed > ex->fe_len &&
1528 mb_find_buddy(e4b, order, &max)) {
1529
1530 if (block + 1 >= max)
1531 break;
1532
1533 next = (block + 1) * (1 << order);
1534 if (mb_test_bit(next, e4b->bd_bitmap))
1535 break;
1536
1537 order = mb_find_order_for_block(e4b, next);
1538
1539 block = next >> order;
1540 ex->fe_len += 1 << order;
1541 }
1542
1543 BUG_ON(ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3)));
1544 return ex->fe_len;
1545}
1546
1547static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1548{
1549 int ord;
1550 int mlen = 0;
1551 int max = 0;
1552 int cur;
1553 int start = ex->fe_start;
1554 int len = ex->fe_len;
1555 unsigned ret = 0;
1556 int len0 = len;
1557 void *buddy;
1558
1559 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
1560 BUG_ON(e4b->bd_group != ex->fe_group);
1561 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1562 mb_check_buddy(e4b);
1563 mb_mark_used_double(e4b, start, len);
1564
1565 e4b->bd_info->bb_free -= len;
1566 if (e4b->bd_info->bb_first_free == start)
1567 e4b->bd_info->bb_first_free += len;
1568
1569
1570 if (start != 0)
1571 mlen = !mb_test_bit(start - 1, e4b->bd_bitmap);
1572 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
1573 max = !mb_test_bit(start + len, e4b->bd_bitmap);
1574 if (mlen && max)
1575 e4b->bd_info->bb_fragments++;
1576 else if (!mlen && !max)
1577 e4b->bd_info->bb_fragments--;
1578
1579
1580 while (len) {
1581 ord = mb_find_order_for_block(e4b, start);
1582
1583 if (((start >> ord) << ord) == start && len >= (1 << ord)) {
1584
1585 mlen = 1 << ord;
1586 buddy = mb_find_buddy(e4b, ord, &max);
1587 BUG_ON((start >> ord) >= max);
1588 mb_set_bit(start >> ord, buddy);
1589 e4b->bd_info->bb_counters[ord]--;
1590 start += mlen;
1591 len -= mlen;
1592 BUG_ON(len < 0);
1593 continue;
1594 }
1595
1596
1597 if (ret == 0)
1598 ret = len | (ord << 16);
1599
1600
1601 BUG_ON(ord <= 0);
1602 buddy = mb_find_buddy(e4b, ord, &max);
1603 mb_set_bit(start >> ord, buddy);
1604 e4b->bd_info->bb_counters[ord]--;
1605
1606 ord--;
1607 cur = (start >> ord) & ~1U;
1608 buddy = mb_find_buddy(e4b, ord, &max);
1609 mb_clear_bit(cur, buddy);
1610 mb_clear_bit(cur + 1, buddy);
1611 e4b->bd_info->bb_counters[ord]++;
1612 e4b->bd_info->bb_counters[ord]++;
1613 }
1614 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
1615
1616 ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
1617 mb_check_buddy(e4b);
1618
1619 return ret;
1620}
1621
1622
1623
1624
1625static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
1626 struct ext4_buddy *e4b)
1627{
1628 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1629 int ret;
1630
1631 BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
1632 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1633
1634 ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
1635 ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
1636 ret = mb_mark_used(e4b, &ac->ac_b_ex);
1637
1638
1639
1640 ac->ac_f_ex = ac->ac_b_ex;
1641
1642 ac->ac_status = AC_STATUS_FOUND;
1643 ac->ac_tail = ret & 0xffff;
1644 ac->ac_buddy = ret >> 16;
1645
1646
1647
1648
1649
1650
1651
1652
1653 ac->ac_bitmap_page = e4b->bd_bitmap_page;
1654 get_page(ac->ac_bitmap_page);
1655 ac->ac_buddy_page = e4b->bd_buddy_page;
1656 get_page(ac->ac_buddy_page);
1657
1658 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
1659 spin_lock(&sbi->s_md_lock);
1660 sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
1661 sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
1662 spin_unlock(&sbi->s_md_lock);
1663 }
1664}
1665
1666
1667
1668
1669
1670static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
1671 struct ext4_buddy *e4b,
1672 int finish_group)
1673{
1674 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1675 struct ext4_free_extent *bex = &ac->ac_b_ex;
1676 struct ext4_free_extent *gex = &ac->ac_g_ex;
1677 struct ext4_free_extent ex;
1678 int max;
1679
1680 if (ac->ac_status == AC_STATUS_FOUND)
1681 return;
1682
1683
1684
1685 if (ac->ac_found > sbi->s_mb_max_to_scan &&
1686 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1687 ac->ac_status = AC_STATUS_BREAK;
1688 return;
1689 }
1690
1691
1692
1693
1694 if (bex->fe_len < gex->fe_len)
1695 return;
1696
1697 if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
1698 && bex->fe_group == e4b->bd_group) {
1699
1700
1701
1702 max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
1703 if (max >= gex->fe_len) {
1704 ext4_mb_use_best_found(ac, e4b);
1705 return;
1706 }
1707 }
1708}
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
1721 struct ext4_free_extent *ex,
1722 struct ext4_buddy *e4b)
1723{
1724 struct ext4_free_extent *bex = &ac->ac_b_ex;
1725 struct ext4_free_extent *gex = &ac->ac_g_ex;
1726
1727 BUG_ON(ex->fe_len <= 0);
1728 BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1729 BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1730 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
1731
1732 ac->ac_found++;
1733
1734
1735
1736
1737 if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1738 *bex = *ex;
1739 ext4_mb_use_best_found(ac, e4b);
1740 return;
1741 }
1742
1743
1744
1745
1746 if (ex->fe_len == gex->fe_len) {
1747 *bex = *ex;
1748 ext4_mb_use_best_found(ac, e4b);
1749 return;
1750 }
1751
1752
1753
1754
1755 if (bex->fe_len == 0) {
1756 *bex = *ex;
1757 return;
1758 }
1759
1760
1761
1762
1763 if (bex->fe_len < gex->fe_len) {
1764
1765
1766 if (ex->fe_len > bex->fe_len)
1767 *bex = *ex;
1768 } else if (ex->fe_len > gex->fe_len) {
1769
1770
1771
1772 if (ex->fe_len < bex->fe_len)
1773 *bex = *ex;
1774 }
1775
1776 ext4_mb_check_limits(ac, e4b, 0);
1777}
1778
1779static noinline_for_stack
1780int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
1781 struct ext4_buddy *e4b)
1782{
1783 struct ext4_free_extent ex = ac->ac_b_ex;
1784 ext4_group_t group = ex.fe_group;
1785 int max;
1786 int err;
1787
1788 BUG_ON(ex.fe_len <= 0);
1789 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1790 if (err)
1791 return err;
1792
1793 ext4_lock_group(ac->ac_sb, group);
1794 max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
1795
1796 if (max > 0) {
1797 ac->ac_b_ex = ex;
1798 ext4_mb_use_best_found(ac, e4b);
1799 }
1800
1801 ext4_unlock_group(ac->ac_sb, group);
1802 ext4_mb_unload_buddy(e4b);
1803
1804 return 0;
1805}
1806
1807static noinline_for_stack
1808int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1809 struct ext4_buddy *e4b)
1810{
1811 ext4_group_t group = ac->ac_g_ex.fe_group;
1812 int max;
1813 int err;
1814 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1815 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1816 struct ext4_free_extent ex;
1817
1818 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
1819 return 0;
1820 if (grp->bb_free == 0)
1821 return 0;
1822
1823 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1824 if (err)
1825 return err;
1826
1827 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
1828 ext4_mb_unload_buddy(e4b);
1829 return 0;
1830 }
1831
1832 ext4_lock_group(ac->ac_sb, group);
1833 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
1834 ac->ac_g_ex.fe_len, &ex);
1835 ex.fe_logical = 0xDEADFA11;
1836
1837 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
1838 ext4_fsblk_t start;
1839
1840 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
1841 ex.fe_start;
1842
1843 if (do_div(start, sbi->s_stripe) == 0) {
1844 ac->ac_found++;
1845 ac->ac_b_ex = ex;
1846 ext4_mb_use_best_found(ac, e4b);
1847 }
1848 } else if (max >= ac->ac_g_ex.fe_len) {
1849 BUG_ON(ex.fe_len <= 0);
1850 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1851 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1852 ac->ac_found++;
1853 ac->ac_b_ex = ex;
1854 ext4_mb_use_best_found(ac, e4b);
1855 } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
1856
1857
1858 BUG_ON(ex.fe_len <= 0);
1859 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1860 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1861 ac->ac_found++;
1862 ac->ac_b_ex = ex;
1863 ext4_mb_use_best_found(ac, e4b);
1864 }
1865 ext4_unlock_group(ac->ac_sb, group);
1866 ext4_mb_unload_buddy(e4b);
1867
1868 return 0;
1869}
1870
1871
1872
1873
1874
1875static noinline_for_stack
1876void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
1877 struct ext4_buddy *e4b)
1878{
1879 struct super_block *sb = ac->ac_sb;
1880 struct ext4_group_info *grp = e4b->bd_info;
1881 void *buddy;
1882 int i;
1883 int k;
1884 int max;
1885
1886 BUG_ON(ac->ac_2order <= 0);
1887 for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
1888 if (grp->bb_counters[i] == 0)
1889 continue;
1890
1891 buddy = mb_find_buddy(e4b, i, &max);
1892 BUG_ON(buddy == NULL);
1893
1894 k = mb_find_next_zero_bit(buddy, max, 0);
1895 BUG_ON(k >= max);
1896
1897 ac->ac_found++;
1898
1899 ac->ac_b_ex.fe_len = 1 << i;
1900 ac->ac_b_ex.fe_start = k << i;
1901 ac->ac_b_ex.fe_group = e4b->bd_group;
1902
1903 ext4_mb_use_best_found(ac, e4b);
1904
1905 BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len);
1906
1907 if (EXT4_SB(sb)->s_mb_stats)
1908 atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
1909
1910 break;
1911 }
1912}
1913
1914
1915
1916
1917
1918
1919static noinline_for_stack
1920void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1921 struct ext4_buddy *e4b)
1922{
1923 struct super_block *sb = ac->ac_sb;
1924 void *bitmap = e4b->bd_bitmap;
1925 struct ext4_free_extent ex;
1926 int i;
1927 int free;
1928
1929 free = e4b->bd_info->bb_free;
1930 BUG_ON(free <= 0);
1931
1932 i = e4b->bd_info->bb_first_free;
1933
1934 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
1935 i = mb_find_next_zero_bit(bitmap,
1936 EXT4_CLUSTERS_PER_GROUP(sb), i);
1937 if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
1938
1939
1940
1941
1942
1943 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1944 "%d free clusters as per "
1945 "group info. But bitmap says 0",
1946 free);
1947 break;
1948 }
1949
1950 mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
1951 BUG_ON(ex.fe_len <= 0);
1952 if (free < ex.fe_len) {
1953 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1954 "%d free clusters as per "
1955 "group info. But got %d blocks",
1956 free, ex.fe_len);
1957
1958
1959
1960
1961
1962 break;
1963 }
1964 ex.fe_logical = 0xDEADC0DE;
1965 ext4_mb_measure_extent(ac, &ex, e4b);
1966
1967 i += ex.fe_len;
1968 free -= ex.fe_len;
1969 }
1970
1971 ext4_mb_check_limits(ac, e4b, 1);
1972}
1973
1974
1975
1976
1977
1978static noinline_for_stack
1979void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1980 struct ext4_buddy *e4b)
1981{
1982 struct super_block *sb = ac->ac_sb;
1983 struct ext4_sb_info *sbi = EXT4_SB(sb);
1984 void *bitmap = e4b->bd_bitmap;
1985 struct ext4_free_extent ex;
1986 ext4_fsblk_t first_group_block;
1987 ext4_fsblk_t a;
1988 ext4_grpblk_t i;
1989 int max;
1990
1991 BUG_ON(sbi->s_stripe == 0);
1992
1993
1994 first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
1995
1996 a = first_group_block + sbi->s_stripe - 1;
1997 do_div(a, sbi->s_stripe);
1998 i = (a * sbi->s_stripe) - first_group_block;
1999
2000 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
2001 if (!mb_test_bit(i, bitmap)) {
2002 max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
2003 if (max >= sbi->s_stripe) {
2004 ac->ac_found++;
2005 ex.fe_logical = 0xDEADF00D;
2006 ac->ac_b_ex = ex;
2007 ext4_mb_use_best_found(ac, e4b);
2008 break;
2009 }
2010 }
2011 i += sbi->s_stripe;
2012 }
2013}
2014
2015
2016
2017
2018
2019
2020
2021static int ext4_mb_good_group(struct ext4_allocation_context *ac,
2022 ext4_group_t group, int cr)
2023{
2024 unsigned free, fragments;
2025 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
2026 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
2027
2028 BUG_ON(cr < 0 || cr >= 4);
2029
2030 free = grp->bb_free;
2031 if (free == 0)
2032 return 0;
2033 if (cr <= 2 && free < ac->ac_g_ex.fe_len)
2034 return 0;
2035
2036 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
2037 return 0;
2038
2039
2040 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
2041 int ret = ext4_mb_init_group(ac->ac_sb, group);
2042 if (ret)
2043 return ret;
2044 }
2045
2046 fragments = grp->bb_fragments;
2047 if (fragments == 0)
2048 return 0;
2049
2050 switch (cr) {
2051 case 0:
2052 BUG_ON(ac->ac_2order == 0);
2053
2054
2055 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
2056 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
2057 ((group % flex_size) == 0))
2058 return 0;
2059
2060 if ((ac->ac_2order > ac->ac_sb->s_blocksize_bits+1) ||
2061 (free / fragments) >= ac->ac_g_ex.fe_len)
2062 return 1;
2063
2064 if (grp->bb_largest_free_order < ac->ac_2order)
2065 return 0;
2066
2067 return 1;
2068 case 1:
2069 if ((free / fragments) >= ac->ac_g_ex.fe_len)
2070 return 1;
2071 break;
2072 case 2:
2073 if (free >= ac->ac_g_ex.fe_len)
2074 return 1;
2075 break;
2076 case 3:
2077 return 1;
2078 default:
2079 BUG();
2080 }
2081
2082 return 0;
2083}
2084
2085static noinline_for_stack int
2086ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
2087{
2088 ext4_group_t ngroups, group, i;
2089 int cr;
2090 int err = 0, first_err = 0;
2091 struct ext4_sb_info *sbi;
2092 struct super_block *sb;
2093 struct ext4_buddy e4b;
2094
2095 sb = ac->ac_sb;
2096 sbi = EXT4_SB(sb);
2097 ngroups = ext4_get_groups_count(sb);
2098
2099 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
2100 ngroups = sbi->s_blockfile_groups;
2101
2102 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
2103
2104
2105 err = ext4_mb_find_by_goal(ac, &e4b);
2106 if (err || ac->ac_status == AC_STATUS_FOUND)
2107 goto out;
2108
2109 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
2110 goto out;
2111
2112
2113
2114
2115
2116
2117 i = fls(ac->ac_g_ex.fe_len);
2118 ac->ac_2order = 0;
2119
2120
2121
2122
2123
2124 if (i >= sbi->s_mb_order2_reqs) {
2125
2126
2127
2128 if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
2129 ac->ac_2order = array_index_nospec(i - 1,
2130 sb->s_blocksize_bits + 2);
2131 }
2132
2133
2134 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
2135
2136 spin_lock(&sbi->s_md_lock);
2137 ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
2138 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
2139 spin_unlock(&sbi->s_md_lock);
2140 }
2141
2142
2143 cr = ac->ac_2order ? 0 : 1;
2144
2145
2146
2147
2148repeat:
2149 for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
2150 ac->ac_criteria = cr;
2151
2152
2153
2154
2155 group = ac->ac_g_ex.fe_group;
2156
2157 for (i = 0; i < ngroups; group++, i++) {
2158 int ret = 0;
2159 cond_resched();
2160
2161
2162
2163
2164 if (group >= ngroups)
2165 group = 0;
2166
2167
2168 ret = ext4_mb_good_group(ac, group, cr);
2169 if (ret <= 0) {
2170 if (!first_err)
2171 first_err = ret;
2172 continue;
2173 }
2174
2175 err = ext4_mb_load_buddy(sb, group, &e4b);
2176 if (err)
2177 goto out;
2178
2179 ext4_lock_group(sb, group);
2180
2181
2182
2183
2184
2185 ret = ext4_mb_good_group(ac, group, cr);
2186 if (ret <= 0) {
2187 ext4_unlock_group(sb, group);
2188 ext4_mb_unload_buddy(&e4b);
2189 if (!first_err)
2190 first_err = ret;
2191 continue;
2192 }
2193
2194 ac->ac_groups_scanned++;
2195 if (cr == 0 && ac->ac_2order < sb->s_blocksize_bits+2)
2196 ext4_mb_simple_scan_group(ac, &e4b);
2197 else if (cr == 1 && sbi->s_stripe &&
2198 !(ac->ac_g_ex.fe_len % sbi->s_stripe))
2199 ext4_mb_scan_aligned(ac, &e4b);
2200 else
2201 ext4_mb_complex_scan_group(ac, &e4b);
2202
2203 ext4_unlock_group(sb, group);
2204 ext4_mb_unload_buddy(&e4b);
2205
2206 if (ac->ac_status != AC_STATUS_CONTINUE)
2207 break;
2208 }
2209 }
2210
2211 if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
2212 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2213
2214
2215
2216
2217
2218 ext4_mb_try_best_found(ac, &e4b);
2219 if (ac->ac_status != AC_STATUS_FOUND) {
2220
2221
2222
2223
2224
2225
2226 ac->ac_b_ex.fe_group = 0;
2227 ac->ac_b_ex.fe_start = 0;
2228 ac->ac_b_ex.fe_len = 0;
2229 ac->ac_status = AC_STATUS_CONTINUE;
2230 ac->ac_flags |= EXT4_MB_HINT_FIRST;
2231 cr = 3;
2232 atomic_inc(&sbi->s_mb_lost_chunks);
2233 goto repeat;
2234 }
2235 }
2236out:
2237 if (!err && ac->ac_status != AC_STATUS_FOUND && first_err)
2238 err = first_err;
2239 return err;
2240}
2241
2242static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2243{
2244 struct super_block *sb = seq->private;
2245 ext4_group_t group;
2246
2247 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2248 return NULL;
2249 group = *pos + 1;
2250 return (void *) ((unsigned long) group);
2251}
2252
2253static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2254{
2255 struct super_block *sb = seq->private;
2256 ext4_group_t group;
2257
2258 ++*pos;
2259 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2260 return NULL;
2261 group = *pos + 1;
2262 return (void *) ((unsigned long) group);
2263}
2264
2265static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2266{
2267 struct super_block *sb = seq->private;
2268 ext4_group_t group = (ext4_group_t) ((unsigned long) v);
2269 int i;
2270 int err, buddy_loaded = 0;
2271 struct ext4_buddy e4b;
2272 struct ext4_group_info *grinfo;
2273 struct sg {
2274 struct ext4_group_info info;
2275 ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2];
2276 } sg;
2277
2278 group--;
2279 if (group == 0)
2280 seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
2281 "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
2282 "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
2283 "group", "free", "frags", "first",
2284 "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
2285 "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
2286
2287 i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
2288 sizeof(struct ext4_group_info);
2289 grinfo = ext4_get_group_info(sb, group);
2290
2291 if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
2292 err = ext4_mb_load_buddy(sb, group, &e4b);
2293 if (err) {
2294 seq_printf(seq, "#%-5u: I/O error\n", group);
2295 return 0;
2296 }
2297 buddy_loaded = 1;
2298 }
2299
2300 memcpy(&sg, ext4_get_group_info(sb, group), i);
2301
2302 if (buddy_loaded)
2303 ext4_mb_unload_buddy(&e4b);
2304
2305 seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
2306 sg.info.bb_fragments, sg.info.bb_first_free);
2307 for (i = 0; i <= 13; i++)
2308 seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
2309 sg.info.bb_counters[i] : 0);
2310 seq_printf(seq, " ]\n");
2311
2312 return 0;
2313}
2314
2315static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
2316{
2317}
2318
2319static const struct seq_operations ext4_mb_seq_groups_ops = {
2320 .start = ext4_mb_seq_groups_start,
2321 .next = ext4_mb_seq_groups_next,
2322 .stop = ext4_mb_seq_groups_stop,
2323 .show = ext4_mb_seq_groups_show,
2324};
2325
2326static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
2327{
2328 struct super_block *sb = PDE_DATA(inode);
2329 int rc;
2330
2331 rc = seq_open(file, &ext4_mb_seq_groups_ops);
2332 if (rc == 0) {
2333 struct seq_file *m = file->private_data;
2334 m->private = sb;
2335 }
2336 return rc;
2337
2338}
2339
2340static const struct file_operations ext4_mb_seq_groups_fops = {
2341 .owner = THIS_MODULE,
2342 .open = ext4_mb_seq_groups_open,
2343 .read = seq_read,
2344 .llseek = seq_lseek,
2345 .release = seq_release,
2346};
2347
2348static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
2349{
2350 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2351 struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
2352
2353 BUG_ON(!cachep);
2354 return cachep;
2355}
2356
2357
2358
2359
2360
2361int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
2362{
2363 struct ext4_sb_info *sbi = EXT4_SB(sb);
2364 unsigned size;
2365 struct ext4_group_info ***new_groupinfo;
2366
2367 size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
2368 EXT4_DESC_PER_BLOCK_BITS(sb);
2369 if (size <= sbi->s_group_info_size)
2370 return 0;
2371
2372 size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
2373 new_groupinfo = kvzalloc(size, GFP_KERNEL);
2374 if (!new_groupinfo) {
2375 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
2376 return -ENOMEM;
2377 }
2378 if (sbi->s_group_info) {
2379 memcpy(new_groupinfo, sbi->s_group_info,
2380 sbi->s_group_info_size * sizeof(*sbi->s_group_info));
2381 ext4_kvfree(sbi->s_group_info);
2382 }
2383 sbi->s_group_info = new_groupinfo;
2384 sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
2385 ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
2386 sbi->s_group_info_size);
2387 return 0;
2388}
2389
2390
2391int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2392 struct ext4_group_desc *desc)
2393{
2394 int i;
2395 int metalen = 0;
2396 struct ext4_sb_info *sbi = EXT4_SB(sb);
2397 struct ext4_group_info **meta_group_info;
2398 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2399
2400
2401
2402
2403
2404
2405 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2406 metalen = sizeof(*meta_group_info) <<
2407 EXT4_DESC_PER_BLOCK_BITS(sb);
2408 meta_group_info = kmalloc(metalen, GFP_NOFS);
2409 if (meta_group_info == NULL) {
2410 ext4_msg(sb, KERN_ERR, "can't allocate mem "
2411 "for a buddy group");
2412 goto exit_meta_group_info;
2413 }
2414 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
2415 meta_group_info;
2416 }
2417
2418 meta_group_info =
2419 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2420 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2421
2422 meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS);
2423 if (meta_group_info[i] == NULL) {
2424 ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
2425 goto exit_group_info;
2426 }
2427 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2428 &(meta_group_info[i]->bb_state));
2429
2430
2431
2432
2433
2434 if (ext4_has_group_desc_csum(sb) &&
2435 (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
2436 meta_group_info[i]->bb_free =
2437 ext4_free_clusters_after_init(sb, group, desc);
2438 } else {
2439 meta_group_info[i]->bb_free =
2440 ext4_free_group_clusters(sb, desc);
2441 }
2442
2443 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2444 init_rwsem(&meta_group_info[i]->alloc_sem);
2445 meta_group_info[i]->bb_free_root = RB_ROOT;
2446 meta_group_info[i]->bb_largest_free_order = -1;
2447
2448#ifdef DOUBLE_CHECK
2449 {
2450 struct buffer_head *bh;
2451 meta_group_info[i]->bb_bitmap =
2452 kmalloc(sb->s_blocksize, GFP_NOFS);
2453 BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
2454 bh = ext4_read_block_bitmap(sb, group);
2455 BUG_ON(bh == NULL);
2456 memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
2457 sb->s_blocksize);
2458 put_bh(bh);
2459 }
2460#endif
2461
2462 return 0;
2463
2464exit_group_info:
2465
2466 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2467 kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
2468 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL;
2469 }
2470exit_meta_group_info:
2471 return -ENOMEM;
2472}
2473
2474static int ext4_mb_init_backend(struct super_block *sb)
2475{
2476 ext4_group_t ngroups = ext4_get_groups_count(sb);
2477 ext4_group_t i;
2478 struct ext4_sb_info *sbi = EXT4_SB(sb);
2479 int err;
2480 struct ext4_group_desc *desc;
2481 struct kmem_cache *cachep;
2482
2483 err = ext4_mb_alloc_groupinfo(sb, ngroups);
2484 if (err)
2485 return err;
2486
2487 sbi->s_buddy_cache = new_inode(sb);
2488 if (sbi->s_buddy_cache == NULL) {
2489 ext4_msg(sb, KERN_ERR, "can't get new inode");
2490 goto err_freesgi;
2491 }
2492
2493
2494
2495
2496 sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
2497 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
2498 for (i = 0; i < ngroups; i++) {
2499 desc = ext4_get_group_desc(sb, i, NULL);
2500 if (desc == NULL) {
2501 ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
2502 goto err_freebuddy;
2503 }
2504 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
2505 goto err_freebuddy;
2506 }
2507
2508 return 0;
2509
2510err_freebuddy:
2511 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2512 while (i-- > 0)
2513 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
2514 i = sbi->s_group_info_size;
2515 while (i-- > 0)
2516 kfree(sbi->s_group_info[i]);
2517 iput(sbi->s_buddy_cache);
2518err_freesgi:
2519 ext4_kvfree(sbi->s_group_info);
2520 return -ENOMEM;
2521}
2522
2523static void ext4_groupinfo_destroy_slabs(void)
2524{
2525 int i;
2526
2527 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2528 if (ext4_groupinfo_caches[i])
2529 kmem_cache_destroy(ext4_groupinfo_caches[i]);
2530 ext4_groupinfo_caches[i] = NULL;
2531 }
2532}
2533
2534static int ext4_groupinfo_create_slab(size_t size)
2535{
2536 static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
2537 int slab_size;
2538 int blocksize_bits = order_base_2(size);
2539 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2540 struct kmem_cache *cachep;
2541
2542 if (cache_index >= NR_GRPINFO_CACHES)
2543 return -EINVAL;
2544
2545 if (unlikely(cache_index < 0))
2546 cache_index = 0;
2547
2548 mutex_lock(&ext4_grpinfo_slab_create_mutex);
2549 if (ext4_groupinfo_caches[cache_index]) {
2550 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2551 return 0;
2552 }
2553
2554 slab_size = offsetof(struct ext4_group_info,
2555 bb_counters[blocksize_bits + 2]);
2556
2557 cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
2558 slab_size, 0, SLAB_RECLAIM_ACCOUNT,
2559 NULL);
2560
2561 ext4_groupinfo_caches[cache_index] = cachep;
2562
2563 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2564 if (!cachep) {
2565 printk(KERN_EMERG
2566 "EXT4-fs: no memory for groupinfo slab cache\n");
2567 return -ENOMEM;
2568 }
2569
2570 return 0;
2571}
2572
2573int ext4_mb_init(struct super_block *sb)
2574{
2575 struct ext4_sb_info *sbi = EXT4_SB(sb);
2576 unsigned i, j;
2577 unsigned offset, offset_incr;
2578 unsigned max;
2579 int ret;
2580
2581 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2582
2583 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
2584 if (sbi->s_mb_offsets == NULL) {
2585 ret = -ENOMEM;
2586 goto out;
2587 }
2588
2589 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
2590 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2591 if (sbi->s_mb_maxs == NULL) {
2592 ret = -ENOMEM;
2593 goto out;
2594 }
2595
2596 ret = ext4_groupinfo_create_slab(sb->s_blocksize);
2597 if (ret < 0)
2598 goto out;
2599
2600
2601 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
2602 sbi->s_mb_offsets[0] = 0;
2603
2604 i = 1;
2605 offset = 0;
2606 offset_incr = 1 << (sb->s_blocksize_bits - 1);
2607 max = sb->s_blocksize << 2;
2608 do {
2609 sbi->s_mb_offsets[i] = offset;
2610 sbi->s_mb_maxs[i] = max;
2611 offset += offset_incr;
2612 offset_incr = offset_incr >> 1;
2613 max = max >> 1;
2614 i++;
2615 } while (i <= sb->s_blocksize_bits + 1);
2616
2617 spin_lock_init(&sbi->s_md_lock);
2618 spin_lock_init(&sbi->s_bal_lock);
2619
2620 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
2621 sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
2622 sbi->s_mb_stats = MB_DEFAULT_STATS;
2623 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
2624 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637 sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
2638 sbi->s_cluster_bits, 32);
2639
2640
2641
2642
2643
2644
2645
2646
2647 if (sbi->s_stripe > 1) {
2648 sbi->s_mb_group_prealloc = roundup(
2649 sbi->s_mb_group_prealloc, sbi->s_stripe);
2650 }
2651
2652 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2653 if (sbi->s_locality_groups == NULL) {
2654 ret = -ENOMEM;
2655 goto out;
2656 }
2657 for_each_possible_cpu(i) {
2658 struct ext4_locality_group *lg;
2659 lg = per_cpu_ptr(sbi->s_locality_groups, i);
2660 mutex_init(&lg->lg_mutex);
2661 for (j = 0; j < PREALLOC_TB_SIZE; j++)
2662 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
2663 spin_lock_init(&lg->lg_prealloc_lock);
2664 }
2665
2666
2667 ret = ext4_mb_init_backend(sb);
2668 if (ret != 0)
2669 goto out_free_locality_groups;
2670
2671 if (sbi->s_proc)
2672 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2673 &ext4_mb_seq_groups_fops, sb);
2674
2675 return 0;
2676
2677out_free_locality_groups:
2678 free_percpu(sbi->s_locality_groups);
2679 sbi->s_locality_groups = NULL;
2680out:
2681 kfree(sbi->s_mb_offsets);
2682 sbi->s_mb_offsets = NULL;
2683 kfree(sbi->s_mb_maxs);
2684 sbi->s_mb_maxs = NULL;
2685 return ret;
2686}
2687
2688
2689static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2690{
2691 struct ext4_prealloc_space *pa;
2692 struct list_head *cur, *tmp;
2693 int count = 0;
2694
2695 list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
2696 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
2697 list_del(&pa->pa_group_list);
2698 count++;
2699 kmem_cache_free(ext4_pspace_cachep, pa);
2700 }
2701 if (count)
2702 mb_debug(1, "mballoc: %u PAs left\n", count);
2703
2704}
2705
2706int ext4_mb_release(struct super_block *sb)
2707{
2708 ext4_group_t ngroups = ext4_get_groups_count(sb);
2709 ext4_group_t i;
2710 int num_meta_group_infos;
2711 struct ext4_group_info *grinfo;
2712 struct ext4_sb_info *sbi = EXT4_SB(sb);
2713 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2714
2715 if (sbi->s_proc)
2716 remove_proc_entry("mb_groups", sbi->s_proc);
2717
2718 if (sbi->s_group_info) {
2719 for (i = 0; i < ngroups; i++) {
2720 grinfo = ext4_get_group_info(sb, i);
2721#ifdef DOUBLE_CHECK
2722 kfree(grinfo->bb_bitmap);
2723#endif
2724 ext4_lock_group(sb, i);
2725 ext4_mb_cleanup_pa(grinfo);
2726 ext4_unlock_group(sb, i);
2727 kmem_cache_free(cachep, grinfo);
2728 }
2729 num_meta_group_infos = (ngroups +
2730 EXT4_DESC_PER_BLOCK(sb) - 1) >>
2731 EXT4_DESC_PER_BLOCK_BITS(sb);
2732 for (i = 0; i < num_meta_group_infos; i++)
2733 kfree(sbi->s_group_info[i]);
2734 ext4_kvfree(sbi->s_group_info);
2735 }
2736 kfree(sbi->s_mb_offsets);
2737 kfree(sbi->s_mb_maxs);
2738 iput(sbi->s_buddy_cache);
2739 if (sbi->s_mb_stats) {
2740 ext4_msg(sb, KERN_INFO,
2741 "mballoc: %u blocks %u reqs (%u success)",
2742 atomic_read(&sbi->s_bal_allocated),
2743 atomic_read(&sbi->s_bal_reqs),
2744 atomic_read(&sbi->s_bal_success));
2745 ext4_msg(sb, KERN_INFO,
2746 "mballoc: %u extents scanned, %u goal hits, "
2747 "%u 2^N hits, %u breaks, %u lost",
2748 atomic_read(&sbi->s_bal_ex_scanned),
2749 atomic_read(&sbi->s_bal_goals),
2750 atomic_read(&sbi->s_bal_2orders),
2751 atomic_read(&sbi->s_bal_breaks),
2752 atomic_read(&sbi->s_mb_lost_chunks));
2753 ext4_msg(sb, KERN_INFO,
2754 "mballoc: %lu generated and it took %Lu",
2755 sbi->s_mb_buddies_generated,
2756 sbi->s_mb_generation_time);
2757 ext4_msg(sb, KERN_INFO,
2758 "mballoc: %u preallocated, %u discarded",
2759 atomic_read(&sbi->s_mb_preallocated),
2760 atomic_read(&sbi->s_mb_discarded));
2761 }
2762
2763 free_percpu(sbi->s_locality_groups);
2764
2765 return 0;
2766}
2767
2768static inline int ext4_issue_discard(struct super_block *sb,
2769 ext4_group_t block_group, ext4_grpblk_t cluster, int count)
2770{
2771 ext4_fsblk_t discard_block;
2772
2773 discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
2774 ext4_group_first_block_no(sb, block_group));
2775 count = EXT4_C2B(EXT4_SB(sb), count);
2776 trace_ext4_discard_blocks(sb,
2777 (unsigned long long) discard_block, count);
2778 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
2779}
2780
2781
2782
2783
2784
2785static void ext4_free_data_callback(struct super_block *sb,
2786 struct ext4_journal_cb_entry *jce,
2787 int rc)
2788{
2789 struct ext4_free_data *entry = (struct ext4_free_data *)jce;
2790 struct ext4_buddy e4b;
2791 struct ext4_group_info *db;
2792 int err, count = 0, count2 = 0;
2793
2794 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2795 entry->efd_count, entry->efd_group, entry);
2796
2797 if (test_opt(sb, DISCARD)) {
2798 err = ext4_issue_discard(sb, entry->efd_group,
2799 entry->efd_start_cluster,
2800 entry->efd_count);
2801 if (err && err != -EOPNOTSUPP)
2802 ext4_msg(sb, KERN_WARNING, "discard request in"
2803 " group:%d block:%d count:%d failed"
2804 " with %d", entry->efd_group,
2805 entry->efd_start_cluster,
2806 entry->efd_count, err);
2807 }
2808
2809 err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
2810
2811 BUG_ON(err != 0);
2812
2813
2814 db = e4b.bd_info;
2815
2816 count += entry->efd_count;
2817 count2++;
2818 ext4_lock_group(sb, entry->efd_group);
2819
2820 rb_erase(&entry->efd_node, &(db->bb_free_root));
2821 mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
2822
2823
2824
2825
2826
2827
2828
2829 if (!test_opt(sb, DISCARD))
2830 EXT4_MB_GRP_CLEAR_TRIMMED(db);
2831
2832 if (!db->bb_free_root.rb_node) {
2833
2834
2835
2836 page_cache_release(e4b.bd_buddy_page);
2837 page_cache_release(e4b.bd_bitmap_page);
2838 }
2839 ext4_unlock_group(sb, entry->efd_group);
2840 kmem_cache_free(ext4_free_data_cachep, entry);
2841 ext4_mb_unload_buddy(&e4b);
2842
2843 mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
2844}
2845
2846int __init ext4_init_mballoc(void)
2847{
2848 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
2849 SLAB_RECLAIM_ACCOUNT);
2850 if (ext4_pspace_cachep == NULL)
2851 return -ENOMEM;
2852
2853 ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
2854 SLAB_RECLAIM_ACCOUNT);
2855 if (ext4_ac_cachep == NULL) {
2856 kmem_cache_destroy(ext4_pspace_cachep);
2857 return -ENOMEM;
2858 }
2859
2860 ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
2861 SLAB_RECLAIM_ACCOUNT);
2862 if (ext4_free_data_cachep == NULL) {
2863 kmem_cache_destroy(ext4_pspace_cachep);
2864 kmem_cache_destroy(ext4_ac_cachep);
2865 return -ENOMEM;
2866 }
2867 return 0;
2868}
2869
2870void ext4_exit_mballoc(void)
2871{
2872
2873
2874
2875
2876 rcu_barrier();
2877 kmem_cache_destroy(ext4_pspace_cachep);
2878 kmem_cache_destroy(ext4_ac_cachep);
2879 kmem_cache_destroy(ext4_free_data_cachep);
2880 ext4_groupinfo_destroy_slabs();
2881}
2882
2883
2884
2885
2886
2887
2888static noinline_for_stack int
2889ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2890 handle_t *handle, unsigned int reserv_clstrs)
2891{
2892 struct buffer_head *bitmap_bh = NULL;
2893 struct ext4_group_desc *gdp;
2894 struct buffer_head *gdp_bh;
2895 struct ext4_sb_info *sbi;
2896 struct super_block *sb;
2897 ext4_fsblk_t block;
2898 int err, len;
2899
2900 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
2901 BUG_ON(ac->ac_b_ex.fe_len <= 0);
2902
2903 sb = ac->ac_sb;
2904 sbi = EXT4_SB(sb);
2905
2906 err = -EIO;
2907 bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
2908 if (!bitmap_bh)
2909 goto out_err;
2910
2911 BUFFER_TRACE(bitmap_bh, "getting write access");
2912 err = ext4_journal_get_write_access(handle, bitmap_bh);
2913 if (err)
2914 goto out_err;
2915
2916 err = -EIO;
2917 gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
2918 if (!gdp)
2919 goto out_err;
2920
2921 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
2922 ext4_free_group_clusters(sb, gdp));
2923
2924 BUFFER_TRACE(gdp_bh, "get_write_access");
2925 err = ext4_journal_get_write_access(handle, gdp_bh);
2926 if (err)
2927 goto out_err;
2928
2929 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
2930
2931 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
2932 if (!ext4_data_block_valid(sbi, block, len)) {
2933 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
2934 "fs metadata", block, block+len);
2935
2936
2937
2938
2939 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2940 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2941 ac->ac_b_ex.fe_len);
2942 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2943 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2944 if (!err)
2945 err = -EFSCORRUPTED;
2946 goto out_err;
2947 }
2948
2949 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2950#ifdef AGGRESSIVE_CHECK
2951 {
2952 int i;
2953 for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
2954 BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
2955 bitmap_bh->b_data));
2956 }
2957 }
2958#endif
2959 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2960 ac->ac_b_ex.fe_len);
2961 if (ext4_has_group_desc_csum(sb) &&
2962 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
2963 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
2964 ext4_free_group_clusters_set(sb, gdp,
2965 ext4_free_clusters_after_init(sb,
2966 ac->ac_b_ex.fe_group, gdp));
2967 }
2968 len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
2969 ext4_free_group_clusters_set(sb, gdp, len);
2970 ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh);
2971 ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
2972
2973 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2974 percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
2975
2976
2977
2978 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2979
2980 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
2981 reserv_clstrs);
2982
2983 if (sbi->s_log_groups_per_flex) {
2984 ext4_group_t flex_group = ext4_flex_group(sbi,
2985 ac->ac_b_ex.fe_group);
2986 atomic64_sub(ac->ac_b_ex.fe_len,
2987 &sbi->s_flex_groups[flex_group].free_clusters);
2988 }
2989
2990 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2991 if (err)
2992 goto out_err;
2993 err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
2994
2995out_err:
2996 brelse(bitmap_bh);
2997 return err;
2998}
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3010{
3011 struct super_block *sb = ac->ac_sb;
3012 struct ext4_locality_group *lg = ac->ac_lg;
3013
3014 BUG_ON(lg == NULL);
3015 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
3016 mb_debug(1, "#%u: goal %u blocks for locality group\n",
3017 current->pid, ac->ac_g_ex.fe_len);
3018}
3019
3020
3021
3022
3023
3024static noinline_for_stack void
3025ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3026 struct ext4_allocation_request *ar)
3027{
3028 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3029 int bsbits, max;
3030 ext4_lblk_t end;
3031 loff_t size, start_off;
3032 loff_t orig_size __maybe_unused;
3033 ext4_lblk_t start;
3034 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3035 struct ext4_prealloc_space *pa;
3036
3037
3038
3039 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3040 return;
3041
3042
3043 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
3044 return;
3045
3046
3047
3048 if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
3049 return;
3050
3051 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
3052 ext4_mb_normalize_group_request(ac);
3053 return ;
3054 }
3055
3056 bsbits = ac->ac_sb->s_blocksize_bits;
3057
3058
3059
3060 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
3061 size = size << bsbits;
3062 if (size < i_size_read(ac->ac_inode))
3063 size = i_size_read(ac->ac_inode);
3064 orig_size = size;
3065
3066
3067 max = 2 << bsbits;
3068
3069#define NRL_CHECK_SIZE(req, size, max, chunk_size) \
3070 (req <= (size) || max <= (chunk_size))
3071
3072
3073
3074 start_off = 0;
3075 if (size <= 16 * 1024) {
3076 size = 16 * 1024;
3077 } else if (size <= 32 * 1024) {
3078 size = 32 * 1024;
3079 } else if (size <= 64 * 1024) {
3080 size = 64 * 1024;
3081 } else if (size <= 128 * 1024) {
3082 size = 128 * 1024;
3083 } else if (size <= 256 * 1024) {
3084 size = 256 * 1024;
3085 } else if (size <= 512 * 1024) {
3086 size = 512 * 1024;
3087 } else if (size <= 1024 * 1024) {
3088 size = 1024 * 1024;
3089 } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
3090 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3091 (21 - bsbits)) << 21;
3092 size = 2 * 1024 * 1024;
3093 } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
3094 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3095 (22 - bsbits)) << 22;
3096 size = 4 * 1024 * 1024;
3097 } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
3098 (8<<20)>>bsbits, max, 8 * 1024)) {
3099 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3100 (23 - bsbits)) << 23;
3101 size = 8 * 1024 * 1024;
3102 } else {
3103 start_off = (loff_t) ac->ac_o_ex.fe_logical << bsbits;
3104 size = (loff_t) EXT4_C2B(EXT4_SB(ac->ac_sb),
3105 ac->ac_o_ex.fe_len) << bsbits;
3106 }
3107 size = size >> bsbits;
3108 start = start_off >> bsbits;
3109
3110
3111 if (ar->pleft && start <= ar->lleft) {
3112 size -= ar->lleft + 1 - start;
3113 start = ar->lleft + 1;
3114 }
3115 if (ar->pright && start + size - 1 >= ar->lright)
3116 size -= start + size - ar->lright;
3117
3118
3119
3120
3121
3122 if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
3123 size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb);
3124
3125 end = start + size;
3126
3127
3128 rcu_read_lock();
3129 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3130 ext4_lblk_t pa_end;
3131
3132 if (pa->pa_deleted)
3133 continue;
3134 spin_lock(&pa->pa_lock);
3135 if (pa->pa_deleted) {
3136 spin_unlock(&pa->pa_lock);
3137 continue;
3138 }
3139
3140 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3141 pa->pa_len);
3142
3143
3144 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
3145 ac->ac_o_ex.fe_logical < pa->pa_lstart));
3146
3147
3148 if (pa->pa_lstart >= end || pa_end <= start) {
3149 spin_unlock(&pa->pa_lock);
3150 continue;
3151 }
3152 BUG_ON(pa->pa_lstart <= start && pa_end >= end);
3153
3154
3155 if (pa_end <= ac->ac_o_ex.fe_logical) {
3156 BUG_ON(pa_end < start);
3157 start = pa_end;
3158 } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
3159 BUG_ON(pa->pa_lstart > end);
3160 end = pa->pa_lstart;
3161 }
3162 spin_unlock(&pa->pa_lock);
3163 }
3164 rcu_read_unlock();
3165 size = end - start;
3166
3167
3168 rcu_read_lock();
3169 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3170 ext4_lblk_t pa_end;
3171
3172 spin_lock(&pa->pa_lock);
3173 if (pa->pa_deleted == 0) {
3174 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3175 pa->pa_len);
3176 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
3177 }
3178 spin_unlock(&pa->pa_lock);
3179 }
3180 rcu_read_unlock();
3181
3182 if (start + size <= ac->ac_o_ex.fe_logical &&
3183 start > ac->ac_o_ex.fe_logical) {
3184 ext4_msg(ac->ac_sb, KERN_ERR,
3185 "start %lu, size %lu, fe_logical %lu",
3186 (unsigned long) start, (unsigned long) size,
3187 (unsigned long) ac->ac_o_ex.fe_logical);
3188 BUG();
3189 }
3190 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
3191
3192
3193
3194
3195
3196 ac->ac_g_ex.fe_logical = start;
3197 ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
3198
3199
3200 if (ar->pright && (ar->lright == (start + size))) {
3201
3202 ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
3203 &ac->ac_f_ex.fe_group,
3204 &ac->ac_f_ex.fe_start);
3205 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3206 }
3207 if (ar->pleft && (ar->lleft + 1 == start)) {
3208
3209 ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
3210 &ac->ac_f_ex.fe_group,
3211 &ac->ac_f_ex.fe_start);
3212 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3213 }
3214
3215 mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size,
3216 (unsigned) orig_size, (unsigned) start);
3217}
3218
3219static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3220{
3221 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3222
3223 if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
3224 atomic_inc(&sbi->s_bal_reqs);
3225 atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
3226 if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
3227 atomic_inc(&sbi->s_bal_success);
3228 atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
3229 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
3230 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
3231 atomic_inc(&sbi->s_bal_goals);
3232 if (ac->ac_found > sbi->s_mb_max_to_scan)
3233 atomic_inc(&sbi->s_bal_breaks);
3234 }
3235
3236 if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
3237 trace_ext4_mballoc_alloc(ac);
3238 else
3239 trace_ext4_mballoc_prealloc(ac);
3240}
3241
3242
3243
3244
3245
3246
3247
3248static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3249{
3250 struct ext4_prealloc_space *pa = ac->ac_pa;
3251 struct ext4_buddy e4b;
3252 int err;
3253
3254 if (pa == NULL) {
3255 if (ac->ac_f_ex.fe_len == 0)
3256 return;
3257 err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
3258 if (err) {
3259
3260
3261
3262
3263
3264 WARN(1, "mb_load_buddy failed (%d)", err);
3265 return;
3266 }
3267 ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
3268 mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
3269 ac->ac_f_ex.fe_len);
3270 ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
3271 ext4_mb_unload_buddy(&e4b);
3272 return;
3273 }
3274 if (pa->pa_type == MB_INODE_PA)
3275 pa->pa_free += ac->ac_b_ex.fe_len;
3276}
3277
3278
3279
3280
3281static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3282 struct ext4_prealloc_space *pa)
3283{
3284 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3285 ext4_fsblk_t start;
3286 ext4_fsblk_t end;
3287 int len;
3288
3289
3290 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
3291 end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
3292 start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
3293 len = EXT4_NUM_B2C(sbi, end - start);
3294 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
3295 &ac->ac_b_ex.fe_start);
3296 ac->ac_b_ex.fe_len = len;
3297 ac->ac_status = AC_STATUS_FOUND;
3298 ac->ac_pa = pa;
3299
3300 BUG_ON(start < pa->pa_pstart);
3301 BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
3302 BUG_ON(pa->pa_free < len);
3303 pa->pa_free -= len;
3304
3305 mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa);
3306}
3307
3308
3309
3310
3311static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3312 struct ext4_prealloc_space *pa)
3313{
3314 unsigned int len = ac->ac_o_ex.fe_len;
3315
3316 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
3317 &ac->ac_b_ex.fe_group,
3318 &ac->ac_b_ex.fe_start);
3319 ac->ac_b_ex.fe_len = len;
3320 ac->ac_status = AC_STATUS_FOUND;
3321 ac->ac_pa = pa;
3322
3323
3324
3325
3326
3327
3328
3329 mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
3330}
3331
3332
3333
3334
3335
3336
3337
3338static struct ext4_prealloc_space *
3339ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3340 struct ext4_prealloc_space *pa,
3341 struct ext4_prealloc_space *cpa)
3342{
3343 ext4_fsblk_t cur_distance, new_distance;
3344
3345 if (cpa == NULL) {
3346 atomic_inc(&pa->pa_count);
3347 return pa;
3348 }
3349 cur_distance = abs(goal_block - cpa->pa_pstart);
3350 new_distance = abs(goal_block - pa->pa_pstart);
3351
3352 if (cur_distance <= new_distance)
3353 return cpa;
3354
3355
3356 atomic_dec(&cpa->pa_count);
3357 atomic_inc(&pa->pa_count);
3358 return pa;
3359}
3360
3361
3362
3363
3364static noinline_for_stack int
3365ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3366{
3367 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3368 int order, i;
3369 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3370 struct ext4_locality_group *lg;
3371 struct ext4_prealloc_space *pa, *cpa = NULL;
3372 ext4_fsblk_t goal_block;
3373
3374
3375 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3376 return 0;
3377
3378
3379 rcu_read_lock();
3380 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3381
3382
3383
3384 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
3385 ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
3386 EXT4_C2B(sbi, pa->pa_len)))
3387 continue;
3388
3389
3390 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
3391 (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
3392 EXT4_MAX_BLOCK_FILE_PHYS))
3393 continue;
3394
3395
3396 spin_lock(&pa->pa_lock);
3397 if (pa->pa_deleted == 0 && pa->pa_free) {
3398 atomic_inc(&pa->pa_count);
3399 ext4_mb_use_inode_pa(ac, pa);
3400 spin_unlock(&pa->pa_lock);
3401 ac->ac_criteria = 10;
3402 rcu_read_unlock();
3403 return 1;
3404 }
3405 spin_unlock(&pa->pa_lock);
3406 }
3407 rcu_read_unlock();
3408
3409
3410 if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
3411 return 0;
3412
3413
3414 lg = ac->ac_lg;
3415 if (lg == NULL)
3416 return 0;
3417 order = fls(ac->ac_o_ex.fe_len) - 1;
3418 if (order > PREALLOC_TB_SIZE - 1)
3419
3420 order = PREALLOC_TB_SIZE - 1;
3421
3422 goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
3423
3424
3425
3426
3427 for (i = order; i < PREALLOC_TB_SIZE; i++) {
3428 rcu_read_lock();
3429 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
3430 pa_inode_list) {
3431 spin_lock(&pa->pa_lock);
3432 if (pa->pa_deleted == 0 &&
3433 pa->pa_free >= ac->ac_o_ex.fe_len) {
3434
3435 cpa = ext4_mb_check_group_pa(goal_block,
3436 pa, cpa);
3437 }
3438 spin_unlock(&pa->pa_lock);
3439 }
3440 rcu_read_unlock();
3441 }
3442 if (cpa) {
3443 ext4_mb_use_group_pa(ac, cpa);
3444 ac->ac_criteria = 20;
3445 return 1;
3446 }
3447 return 0;
3448}
3449
3450
3451
3452
3453
3454
3455
3456static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3457 ext4_group_t group)
3458{
3459 struct rb_node *n;
3460 struct ext4_group_info *grp;
3461 struct ext4_free_data *entry;
3462
3463 grp = ext4_get_group_info(sb, group);
3464 n = rb_first(&(grp->bb_free_root));
3465
3466 while (n) {
3467 entry = rb_entry(n, struct ext4_free_data, efd_node);
3468 ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
3469 n = rb_next(n);
3470 }
3471 return;
3472}
3473
3474
3475
3476
3477
3478
3479static noinline_for_stack
3480void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3481 ext4_group_t group)
3482{
3483 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3484 struct ext4_prealloc_space *pa;
3485 struct list_head *cur;
3486 ext4_group_t groupnr;
3487 ext4_grpblk_t start;
3488 int preallocated = 0;
3489 int len;
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499 list_for_each(cur, &grp->bb_prealloc_list) {
3500 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
3501 spin_lock(&pa->pa_lock);
3502 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
3503 &groupnr, &start);
3504 len = pa->pa_len;
3505 spin_unlock(&pa->pa_lock);
3506 if (unlikely(len == 0))
3507 continue;
3508 BUG_ON(groupnr != group);
3509 ext4_set_bits(bitmap, start, len);
3510 preallocated += len;
3511 }
3512 mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
3513}
3514
3515static void ext4_mb_pa_callback(struct rcu_head *head)
3516{
3517 struct ext4_prealloc_space *pa;
3518 pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
3519
3520 BUG_ON(atomic_read(&pa->pa_count));
3521 BUG_ON(pa->pa_deleted == 0);
3522 kmem_cache_free(ext4_pspace_cachep, pa);
3523}
3524
3525
3526
3527
3528
3529static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3530 struct super_block *sb, struct ext4_prealloc_space *pa)
3531{
3532 ext4_group_t grp;
3533 ext4_fsblk_t grp_blk;
3534
3535
3536 spin_lock(&pa->pa_lock);
3537 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
3538 spin_unlock(&pa->pa_lock);
3539 return;
3540 }
3541
3542 if (pa->pa_deleted == 1) {
3543 spin_unlock(&pa->pa_lock);
3544 return;
3545 }
3546
3547 pa->pa_deleted = 1;
3548 spin_unlock(&pa->pa_lock);
3549
3550 grp_blk = pa->pa_pstart;
3551
3552
3553
3554
3555 if (pa->pa_type == MB_GROUP_PA)
3556 grp_blk--;
3557
3558 grp = ext4_get_group_number(sb, grp_blk);
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574 ext4_lock_group(sb, grp);
3575 list_del(&pa->pa_group_list);
3576 ext4_unlock_group(sb, grp);
3577
3578 spin_lock(pa->pa_obj_lock);
3579 list_del_rcu(&pa->pa_inode_list);
3580 spin_unlock(pa->pa_obj_lock);
3581
3582 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3583}
3584
3585
3586
3587
3588static noinline_for_stack int
3589ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3590{
3591 struct super_block *sb = ac->ac_sb;
3592 struct ext4_sb_info *sbi = EXT4_SB(sb);
3593 struct ext4_prealloc_space *pa;
3594 struct ext4_group_info *grp;
3595 struct ext4_inode_info *ei;
3596
3597
3598 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3599 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3600 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3601
3602 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3603 if (pa == NULL)
3604 return -ENOMEM;
3605
3606 if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
3607 int winl;
3608 int wins;
3609 int win;
3610 int offs;
3611
3612
3613
3614
3615 BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
3616 BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
3617
3618
3619
3620
3621 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
3622
3623
3624 wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
3625
3626
3627 win = min(winl, wins);
3628
3629 offs = ac->ac_o_ex.fe_logical %
3630 EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
3631 if (offs && offs < win)
3632 win = offs;
3633
3634 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
3635 EXT4_NUM_B2C(sbi, win);
3636 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
3637 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
3638 }
3639
3640
3641
3642 ac->ac_f_ex = ac->ac_b_ex;
3643
3644 pa->pa_lstart = ac->ac_b_ex.fe_logical;
3645 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3646 pa->pa_len = ac->ac_b_ex.fe_len;
3647 pa->pa_free = pa->pa_len;
3648 atomic_set(&pa->pa_count, 1);
3649 spin_lock_init(&pa->pa_lock);
3650 INIT_LIST_HEAD(&pa->pa_inode_list);
3651 INIT_LIST_HEAD(&pa->pa_group_list);
3652 pa->pa_deleted = 0;
3653 pa->pa_type = MB_INODE_PA;
3654
3655 mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa,
3656 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3657 trace_ext4_mb_new_inode_pa(ac, pa);
3658
3659 ext4_mb_use_inode_pa(ac, pa);
3660 atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
3661
3662 ei = EXT4_I(ac->ac_inode);
3663 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3664
3665 pa->pa_obj_lock = &ei->i_prealloc_lock;
3666 pa->pa_inode = ac->ac_inode;
3667
3668 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3669 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3670 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3671
3672 spin_lock(pa->pa_obj_lock);
3673 list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
3674 spin_unlock(pa->pa_obj_lock);
3675
3676 return 0;
3677}
3678
3679
3680
3681
3682static noinline_for_stack int
3683ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3684{
3685 struct super_block *sb = ac->ac_sb;
3686 struct ext4_locality_group *lg;
3687 struct ext4_prealloc_space *pa;
3688 struct ext4_group_info *grp;
3689
3690
3691 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3692 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3693 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3694
3695 BUG_ON(ext4_pspace_cachep == NULL);
3696 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3697 if (pa == NULL)
3698 return -ENOMEM;
3699
3700
3701
3702 ac->ac_f_ex = ac->ac_b_ex;
3703
3704 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3705 pa->pa_lstart = pa->pa_pstart;
3706 pa->pa_len = ac->ac_b_ex.fe_len;
3707 pa->pa_free = pa->pa_len;
3708 atomic_set(&pa->pa_count, 1);
3709 spin_lock_init(&pa->pa_lock);
3710 INIT_LIST_HEAD(&pa->pa_inode_list);
3711 INIT_LIST_HEAD(&pa->pa_group_list);
3712 pa->pa_deleted = 0;
3713 pa->pa_type = MB_GROUP_PA;
3714
3715 mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa,
3716 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3717 trace_ext4_mb_new_group_pa(ac, pa);
3718
3719 ext4_mb_use_group_pa(ac, pa);
3720 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
3721
3722 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3723 lg = ac->ac_lg;
3724 BUG_ON(lg == NULL);
3725
3726 pa->pa_obj_lock = &lg->lg_prealloc_lock;
3727 pa->pa_inode = NULL;
3728
3729 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3730 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3731 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3732
3733
3734
3735
3736
3737 return 0;
3738}
3739
3740static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3741{
3742 int err;
3743
3744 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
3745 err = ext4_mb_new_group_pa(ac);
3746 else
3747 err = ext4_mb_new_inode_pa(ac);
3748 return err;
3749}
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759static noinline_for_stack int
3760ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3761 struct ext4_prealloc_space *pa)
3762{
3763 struct super_block *sb = e4b->bd_sb;
3764 struct ext4_sb_info *sbi = EXT4_SB(sb);
3765 unsigned int end;
3766 unsigned int next;
3767 ext4_group_t group;
3768 ext4_grpblk_t bit;
3769 unsigned long long grp_blk_start;
3770 int err = 0;
3771 int free = 0;
3772
3773 BUG_ON(pa->pa_deleted == 0);
3774 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3775 grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
3776 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3777 end = bit + pa->pa_len;
3778
3779 while (bit < end) {
3780 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
3781 if (bit >= end)
3782 break;
3783 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
3784 mb_debug(1, " free preallocated %u/%u in group %u\n",
3785 (unsigned) ext4_group_first_block_no(sb, group) + bit,
3786 (unsigned) next - bit, (unsigned) group);
3787 free += next - bit;
3788
3789 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
3790 trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
3791 EXT4_C2B(sbi, bit)),
3792 next - bit);
3793 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3794 bit = next + 1;
3795 }
3796 if (free != pa->pa_free) {
3797 ext4_msg(e4b->bd_sb, KERN_CRIT,
3798 "pa %p: logic %lu, phys. %lu, len %lu",
3799 pa, (unsigned long) pa->pa_lstart,
3800 (unsigned long) pa->pa_pstart,
3801 (unsigned long) pa->pa_len);
3802 ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
3803 free, pa->pa_free);
3804
3805
3806
3807
3808 }
3809 atomic_add(free, &sbi->s_mb_discarded);
3810
3811 return err;
3812}
3813
3814static noinline_for_stack int
3815ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3816 struct ext4_prealloc_space *pa)
3817{
3818 struct super_block *sb = e4b->bd_sb;
3819 ext4_group_t group;
3820 ext4_grpblk_t bit;
3821
3822 trace_ext4_mb_release_group_pa(sb, pa);
3823 BUG_ON(pa->pa_deleted == 0);
3824 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3825 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3826 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
3827 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
3828 trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
3829
3830 return 0;
3831}
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842static noinline_for_stack int
3843ext4_mb_discard_group_preallocations(struct super_block *sb,
3844 ext4_group_t group, int needed)
3845{
3846 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3847 struct buffer_head *bitmap_bh = NULL;
3848 struct ext4_prealloc_space *pa, *tmp;
3849 struct list_head list;
3850 struct ext4_buddy e4b;
3851 int err;
3852 int busy = 0;
3853 int free = 0;
3854
3855 mb_debug(1, "discard preallocation for group %u\n", group);
3856
3857 if (list_empty(&grp->bb_prealloc_list))
3858 return 0;
3859
3860 bitmap_bh = ext4_read_block_bitmap(sb, group);
3861 if (bitmap_bh == NULL) {
3862 ext4_error(sb, "Error reading block bitmap for %u", group);
3863 return 0;
3864 }
3865
3866 err = ext4_mb_load_buddy(sb, group, &e4b);
3867 if (err) {
3868 ext4_error(sb, "Error loading buddy information for %u", group);
3869 put_bh(bitmap_bh);
3870 return 0;
3871 }
3872
3873 if (needed == 0)
3874 needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
3875
3876 INIT_LIST_HEAD(&list);
3877repeat:
3878 ext4_lock_group(sb, group);
3879 list_for_each_entry_safe(pa, tmp,
3880 &grp->bb_prealloc_list, pa_group_list) {
3881 spin_lock(&pa->pa_lock);
3882 if (atomic_read(&pa->pa_count)) {
3883 spin_unlock(&pa->pa_lock);
3884 busy = 1;
3885 continue;
3886 }
3887 if (pa->pa_deleted) {
3888 spin_unlock(&pa->pa_lock);
3889 continue;
3890 }
3891
3892
3893 pa->pa_deleted = 1;
3894
3895
3896 free += pa->pa_free;
3897
3898 spin_unlock(&pa->pa_lock);
3899
3900 list_del(&pa->pa_group_list);
3901 list_add(&pa->u.pa_tmp_list, &list);
3902 }
3903
3904
3905 if (free < needed && busy) {
3906 busy = 0;
3907 ext4_unlock_group(sb, group);
3908 cond_resched();
3909 goto repeat;
3910 }
3911
3912
3913 if (list_empty(&list)) {
3914 BUG_ON(free != 0);
3915 goto out;
3916 }
3917
3918
3919 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
3920
3921
3922 spin_lock(pa->pa_obj_lock);
3923 list_del_rcu(&pa->pa_inode_list);
3924 spin_unlock(pa->pa_obj_lock);
3925
3926 if (pa->pa_type == MB_GROUP_PA)
3927 ext4_mb_release_group_pa(&e4b, pa);
3928 else
3929 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3930
3931 list_del(&pa->u.pa_tmp_list);
3932 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3933 }
3934
3935out:
3936 ext4_unlock_group(sb, group);
3937 ext4_mb_unload_buddy(&e4b);
3938 put_bh(bitmap_bh);
3939 return free;
3940}
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951void ext4_discard_preallocations(struct inode *inode)
3952{
3953 struct ext4_inode_info *ei = EXT4_I(inode);
3954 struct super_block *sb = inode->i_sb;
3955 struct buffer_head *bitmap_bh = NULL;
3956 struct ext4_prealloc_space *pa, *tmp;
3957 ext4_group_t group = 0;
3958 struct list_head list;
3959 struct ext4_buddy e4b;
3960 int err;
3961
3962 if (!S_ISREG(inode->i_mode)) {
3963
3964 return;
3965 }
3966
3967 mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino);
3968 trace_ext4_discard_preallocations(inode);
3969
3970 INIT_LIST_HEAD(&list);
3971
3972repeat:
3973
3974 spin_lock(&ei->i_prealloc_lock);
3975 while (!list_empty(&ei->i_prealloc_list)) {
3976 pa = list_entry(ei->i_prealloc_list.next,
3977 struct ext4_prealloc_space, pa_inode_list);
3978 BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
3979 spin_lock(&pa->pa_lock);
3980 if (atomic_read(&pa->pa_count)) {
3981
3982
3983 spin_unlock(&pa->pa_lock);
3984 spin_unlock(&ei->i_prealloc_lock);
3985 ext4_msg(sb, KERN_ERR,
3986 "uh-oh! used pa while discarding");
3987 WARN_ON(1);
3988 schedule_timeout_uninterruptible(HZ);
3989 goto repeat;
3990
3991 }
3992 if (pa->pa_deleted == 0) {
3993 pa->pa_deleted = 1;
3994 spin_unlock(&pa->pa_lock);
3995 list_del_rcu(&pa->pa_inode_list);
3996 list_add(&pa->u.pa_tmp_list, &list);
3997 continue;
3998 }
3999
4000
4001 spin_unlock(&pa->pa_lock);
4002 spin_unlock(&ei->i_prealloc_lock);
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016 schedule_timeout_uninterruptible(HZ);
4017 goto repeat;
4018 }
4019 spin_unlock(&ei->i_prealloc_lock);
4020
4021 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
4022 BUG_ON(pa->pa_type != MB_INODE_PA);
4023 group = ext4_get_group_number(sb, pa->pa_pstart);
4024
4025 err = ext4_mb_load_buddy(sb, group, &e4b);
4026 if (err) {
4027 ext4_error(sb, "Error loading buddy information for %u",
4028 group);
4029 continue;
4030 }
4031
4032 bitmap_bh = ext4_read_block_bitmap(sb, group);
4033 if (bitmap_bh == NULL) {
4034 ext4_error(sb, "Error reading block bitmap for %u",
4035 group);
4036 ext4_mb_unload_buddy(&e4b);
4037 continue;
4038 }
4039
4040 ext4_lock_group(sb, group);
4041 list_del(&pa->pa_group_list);
4042 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
4043 ext4_unlock_group(sb, group);
4044
4045 ext4_mb_unload_buddy(&e4b);
4046 put_bh(bitmap_bh);
4047
4048 list_del(&pa->u.pa_tmp_list);
4049 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4050 }
4051}
4052
4053#ifdef CONFIG_EXT4_DEBUG
4054static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4055{
4056 struct super_block *sb = ac->ac_sb;
4057 ext4_group_t ngroups, i;
4058
4059 if (!ext4_mballoc_debug ||
4060 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
4061 return;
4062
4063 ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:"
4064 " Allocation context details:");
4065 ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d",
4066 ac->ac_status, ac->ac_flags);
4067 ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, "
4068 "goal %lu/%lu/%lu@%lu, "
4069 "best %lu/%lu/%lu@%lu cr %d",
4070 (unsigned long)ac->ac_o_ex.fe_group,
4071 (unsigned long)ac->ac_o_ex.fe_start,
4072 (unsigned long)ac->ac_o_ex.fe_len,
4073 (unsigned long)ac->ac_o_ex.fe_logical,
4074 (unsigned long)ac->ac_g_ex.fe_group,
4075 (unsigned long)ac->ac_g_ex.fe_start,
4076 (unsigned long)ac->ac_g_ex.fe_len,
4077 (unsigned long)ac->ac_g_ex.fe_logical,
4078 (unsigned long)ac->ac_b_ex.fe_group,
4079 (unsigned long)ac->ac_b_ex.fe_start,
4080 (unsigned long)ac->ac_b_ex.fe_len,
4081 (unsigned long)ac->ac_b_ex.fe_logical,
4082 (int)ac->ac_criteria);
4083 ext4_msg(ac->ac_sb, KERN_ERR, "%d found", ac->ac_found);
4084 ext4_msg(ac->ac_sb, KERN_ERR, "groups: ");
4085 ngroups = ext4_get_groups_count(sb);
4086 for (i = 0; i < ngroups; i++) {
4087 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
4088 struct ext4_prealloc_space *pa;
4089 ext4_grpblk_t start;
4090 struct list_head *cur;
4091 ext4_lock_group(sb, i);
4092 list_for_each(cur, &grp->bb_prealloc_list) {
4093 pa = list_entry(cur, struct ext4_prealloc_space,
4094 pa_group_list);
4095 spin_lock(&pa->pa_lock);
4096 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
4097 NULL, &start);
4098 spin_unlock(&pa->pa_lock);
4099 printk(KERN_ERR "PA:%u:%d:%u \n", i,
4100 start, pa->pa_len);
4101 }
4102 ext4_unlock_group(sb, i);
4103
4104 if (grp->bb_free == 0)
4105 continue;
4106 printk(KERN_ERR "%u: %d/%d \n",
4107 i, grp->bb_free, grp->bb_fragments);
4108 }
4109 printk(KERN_ERR "\n");
4110}
4111#else
4112static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4113{
4114 return;
4115}
4116#endif
4117
4118
4119
4120
4121
4122
4123
4124
4125static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
4126{
4127 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4128 int bsbits = ac->ac_sb->s_blocksize_bits;
4129 loff_t size, isize;
4130
4131 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
4132 return;
4133
4134 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
4135 return;
4136
4137 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
4138 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
4139 >> bsbits;
4140
4141 if ((size == isize) &&
4142 !ext4_fs_is_busy(sbi) &&
4143 (atomic_read(&ac->ac_inode->i_writecount) == 0)) {
4144 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
4145 return;
4146 }
4147
4148 if (sbi->s_mb_group_prealloc <= 0) {
4149 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
4150 return;
4151 }
4152
4153
4154 size = max(size, isize);
4155 if (size > sbi->s_mb_stream_request) {
4156 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
4157 return;
4158 }
4159
4160 BUG_ON(ac->ac_lg != NULL);
4161
4162
4163
4164
4165
4166 ac->ac_lg = __this_cpu_ptr(sbi->s_locality_groups);
4167
4168
4169 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
4170
4171
4172 mutex_lock(&ac->ac_lg->lg_mutex);
4173}
4174
4175static noinline_for_stack int
4176ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4177 struct ext4_allocation_request *ar)
4178{
4179 struct super_block *sb = ar->inode->i_sb;
4180 struct ext4_sb_info *sbi = EXT4_SB(sb);
4181 struct ext4_super_block *es = sbi->s_es;
4182 ext4_group_t group;
4183 unsigned int len;
4184 ext4_fsblk_t goal;
4185 ext4_grpblk_t block;
4186
4187
4188 len = ar->len;
4189
4190
4191 if (len >= EXT4_CLUSTERS_PER_GROUP(sb))
4192 len = EXT4_CLUSTERS_PER_GROUP(sb);
4193
4194
4195 goal = ar->goal;
4196 if (goal < le32_to_cpu(es->s_first_data_block) ||
4197 goal >= ext4_blocks_count(es))
4198 goal = le32_to_cpu(es->s_first_data_block);
4199 ext4_get_group_no_and_offset(sb, goal, &group, &block);
4200
4201
4202 ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
4203 ac->ac_status = AC_STATUS_CONTINUE;
4204 ac->ac_sb = sb;
4205 ac->ac_inode = ar->inode;
4206 ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
4207 ac->ac_o_ex.fe_group = group;
4208 ac->ac_o_ex.fe_start = block;
4209 ac->ac_o_ex.fe_len = len;
4210 ac->ac_g_ex = ac->ac_o_ex;
4211 ac->ac_flags = ar->flags;
4212
4213
4214
4215 ext4_mb_group_or_file(ac);
4216
4217 mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, "
4218 "left: %u/%u, right %u/%u to %swritable\n",
4219 (unsigned) ar->len, (unsigned) ar->logical,
4220 (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
4221 (unsigned) ar->lleft, (unsigned) ar->pleft,
4222 (unsigned) ar->lright, (unsigned) ar->pright,
4223 atomic_read(&ar->inode->i_writecount) ? "" : "non-");
4224 return 0;
4225
4226}
4227
4228static noinline_for_stack void
4229ext4_mb_discard_lg_preallocations(struct super_block *sb,
4230 struct ext4_locality_group *lg,
4231 int order, int total_entries)
4232{
4233 ext4_group_t group = 0;
4234 struct ext4_buddy e4b;
4235 struct list_head discard_list;
4236 struct ext4_prealloc_space *pa, *tmp;
4237
4238 mb_debug(1, "discard locality group preallocation\n");
4239
4240 INIT_LIST_HEAD(&discard_list);
4241
4242 spin_lock(&lg->lg_prealloc_lock);
4243 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
4244 pa_inode_list) {
4245 spin_lock(&pa->pa_lock);
4246 if (atomic_read(&pa->pa_count)) {
4247
4248
4249
4250
4251
4252 spin_unlock(&pa->pa_lock);
4253 continue;
4254 }
4255 if (pa->pa_deleted) {
4256 spin_unlock(&pa->pa_lock);
4257 continue;
4258 }
4259
4260 BUG_ON(pa->pa_type != MB_GROUP_PA);
4261
4262
4263 pa->pa_deleted = 1;
4264 spin_unlock(&pa->pa_lock);
4265
4266 list_del_rcu(&pa->pa_inode_list);
4267 list_add(&pa->u.pa_tmp_list, &discard_list);
4268
4269 total_entries--;
4270 if (total_entries <= 5) {
4271
4272
4273
4274
4275
4276
4277 break;
4278 }
4279 }
4280 spin_unlock(&lg->lg_prealloc_lock);
4281
4282 list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
4283
4284 group = ext4_get_group_number(sb, pa->pa_pstart);
4285 if (ext4_mb_load_buddy(sb, group, &e4b)) {
4286 ext4_error(sb, "Error loading buddy information for %u",
4287 group);
4288 continue;
4289 }
4290 ext4_lock_group(sb, group);
4291 list_del(&pa->pa_group_list);
4292 ext4_mb_release_group_pa(&e4b, pa);
4293 ext4_unlock_group(sb, group);
4294
4295 ext4_mb_unload_buddy(&e4b);
4296 list_del(&pa->u.pa_tmp_list);
4297 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4298 }
4299}
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4311{
4312 int order, added = 0, lg_prealloc_count = 1;
4313 struct super_block *sb = ac->ac_sb;
4314 struct ext4_locality_group *lg = ac->ac_lg;
4315 struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
4316
4317 order = fls(pa->pa_free) - 1;
4318 if (order > PREALLOC_TB_SIZE - 1)
4319
4320 order = PREALLOC_TB_SIZE - 1;
4321
4322 spin_lock(&lg->lg_prealloc_lock);
4323 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
4324 pa_inode_list) {
4325 spin_lock(&tmp_pa->pa_lock);
4326 if (tmp_pa->pa_deleted) {
4327 spin_unlock(&tmp_pa->pa_lock);
4328 continue;
4329 }
4330 if (!added && pa->pa_free < tmp_pa->pa_free) {
4331
4332 list_add_tail_rcu(&pa->pa_inode_list,
4333 &tmp_pa->pa_inode_list);
4334 added = 1;
4335
4336
4337
4338
4339 }
4340 spin_unlock(&tmp_pa->pa_lock);
4341 lg_prealloc_count++;
4342 }
4343 if (!added)
4344 list_add_tail_rcu(&pa->pa_inode_list,
4345 &lg->lg_prealloc_list[order]);
4346 spin_unlock(&lg->lg_prealloc_lock);
4347
4348
4349 if (lg_prealloc_count > 8) {
4350 ext4_mb_discard_lg_preallocations(sb, lg,
4351 order, lg_prealloc_count);
4352 return;
4353 }
4354 return ;
4355}
4356
4357
4358
4359
4360static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4361{
4362 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4363 struct ext4_prealloc_space *pa = ac->ac_pa;
4364 if (pa) {
4365 if (pa->pa_type == MB_GROUP_PA) {
4366
4367 spin_lock(&pa->pa_lock);
4368 pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4369 pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4370 pa->pa_free -= ac->ac_b_ex.fe_len;
4371 pa->pa_len -= ac->ac_b_ex.fe_len;
4372 spin_unlock(&pa->pa_lock);
4373 }
4374 }
4375 if (pa) {
4376
4377
4378
4379
4380
4381
4382 if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
4383 spin_lock(pa->pa_obj_lock);
4384 list_del_rcu(&pa->pa_inode_list);
4385 spin_unlock(pa->pa_obj_lock);
4386 ext4_mb_add_n_trim(ac);
4387 }
4388 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4389 }
4390 if (ac->ac_bitmap_page)
4391 page_cache_release(ac->ac_bitmap_page);
4392 if (ac->ac_buddy_page)
4393 page_cache_release(ac->ac_buddy_page);
4394 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
4395 mutex_unlock(&ac->ac_lg->lg_mutex);
4396 ext4_mb_collect_stats(ac);
4397 return 0;
4398}
4399
4400static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4401{
4402 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4403 int ret;
4404 int freed = 0;
4405
4406 trace_ext4_mb_discard_preallocations(sb, needed);
4407 for (i = 0; i < ngroups && needed > 0; i++) {
4408 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
4409 freed += ret;
4410 needed -= ret;
4411 }
4412
4413 return freed;
4414}
4415
4416
4417
4418
4419
4420
4421ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4422 struct ext4_allocation_request *ar, int *errp)
4423{
4424 int freed;
4425 struct ext4_allocation_context *ac = NULL;
4426 struct ext4_sb_info *sbi;
4427 struct super_block *sb;
4428 ext4_fsblk_t block = 0;
4429 unsigned int inquota = 0;
4430 unsigned int reserv_clstrs = 0;
4431
4432 might_sleep();
4433 sb = ar->inode->i_sb;
4434 sbi = EXT4_SB(sb);
4435
4436 trace_ext4_request_blocks(ar);
4437
4438
4439 if (IS_NOQUOTA(ar->inode))
4440 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
4441
4442 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
4443
4444
4445
4446
4447 while (ar->len &&
4448 ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
4449
4450
4451 cond_resched();
4452 ar->len = ar->len >> 1;
4453 }
4454 if (!ar->len) {
4455 *errp = -ENOSPC;
4456 return 0;
4457 }
4458 reserv_clstrs = ar->len;
4459 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
4460 dquot_alloc_block_nofail(ar->inode,
4461 EXT4_C2B(sbi, ar->len));
4462 } else {
4463 while (ar->len &&
4464 dquot_alloc_block(ar->inode,
4465 EXT4_C2B(sbi, ar->len))) {
4466
4467 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4468 ar->len--;
4469 }
4470 }
4471 inquota = ar->len;
4472 if (ar->len == 0) {
4473 *errp = -EDQUOT;
4474 goto out;
4475 }
4476 }
4477
4478 ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS);
4479 if (!ac) {
4480 ar->len = 0;
4481 *errp = -ENOMEM;
4482 goto out;
4483 }
4484
4485 *errp = ext4_mb_initialize_context(ac, ar);
4486 if (*errp) {
4487 ar->len = 0;
4488 goto out;
4489 }
4490
4491 ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
4492 if (!ext4_mb_use_preallocated(ac)) {
4493 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
4494 ext4_mb_normalize_request(ac, ar);
4495repeat:
4496
4497 *errp = ext4_mb_regular_allocator(ac);
4498 if (*errp)
4499 goto discard_and_exit;
4500
4501
4502
4503
4504 if (ac->ac_status == AC_STATUS_FOUND &&
4505 ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
4506 *errp = ext4_mb_new_preallocation(ac);
4507 if (*errp) {
4508 discard_and_exit:
4509 ext4_discard_allocated_blocks(ac);
4510 goto errout;
4511 }
4512 }
4513 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4514 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
4515 if (*errp) {
4516 ext4_discard_allocated_blocks(ac);
4517 goto errout;
4518 } else {
4519 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4520 ar->len = ac->ac_b_ex.fe_len;
4521 }
4522 } else {
4523 freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
4524 if (freed)
4525 goto repeat;
4526 *errp = -ENOSPC;
4527 }
4528
4529errout:
4530 if (*errp) {
4531 ac->ac_b_ex.fe_len = 0;
4532 ar->len = 0;
4533 ext4_mb_show_ac(ac);
4534 }
4535 ext4_mb_release_context(ac);
4536out:
4537 if (ac)
4538 kmem_cache_free(ext4_ac_cachep, ac);
4539 if (inquota && ar->len < inquota)
4540 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
4541 if (!ar->len) {
4542 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
4543
4544 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
4545 reserv_clstrs);
4546 }
4547
4548 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
4549
4550 return block;
4551}
4552
4553
4554
4555
4556
4557
4558static int can_merge(struct ext4_free_data *entry1,
4559 struct ext4_free_data *entry2)
4560{
4561 if ((entry1->efd_tid == entry2->efd_tid) &&
4562 (entry1->efd_group == entry2->efd_group) &&
4563 ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster))
4564 return 1;
4565 return 0;
4566}
4567
4568static noinline_for_stack int
4569ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4570 struct ext4_free_data *new_entry)
4571{
4572 ext4_group_t group = e4b->bd_group;
4573 ext4_grpblk_t cluster;
4574 struct ext4_free_data *entry;
4575 struct ext4_group_info *db = e4b->bd_info;
4576 struct super_block *sb = e4b->bd_sb;
4577 struct ext4_sb_info *sbi = EXT4_SB(sb);
4578 struct rb_node **n = &db->bb_free_root.rb_node, *node;
4579 struct rb_node *parent = NULL, *new_node;
4580
4581 BUG_ON(!ext4_handle_valid(handle));
4582 BUG_ON(e4b->bd_bitmap_page == NULL);
4583 BUG_ON(e4b->bd_buddy_page == NULL);
4584
4585 new_node = &new_entry->efd_node;
4586 cluster = new_entry->efd_start_cluster;
4587
4588 if (!*n) {
4589
4590
4591
4592
4593
4594 page_cache_get(e4b->bd_buddy_page);
4595 page_cache_get(e4b->bd_bitmap_page);
4596 }
4597 while (*n) {
4598 parent = *n;
4599 entry = rb_entry(parent, struct ext4_free_data, efd_node);
4600 if (cluster < entry->efd_start_cluster)
4601 n = &(*n)->rb_left;
4602 else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
4603 n = &(*n)->rb_right;
4604 else {
4605 ext4_grp_locked_error(sb, group, 0,
4606 ext4_group_first_block_no(sb, group) +
4607 EXT4_C2B(sbi, cluster),
4608 "Block already on to-be-freed list");
4609 return 0;
4610 }
4611 }
4612
4613 rb_link_node(new_node, parent, n);
4614 rb_insert_color(new_node, &db->bb_free_root);
4615
4616
4617 node = rb_prev(new_node);
4618 if (node) {
4619 entry = rb_entry(node, struct ext4_free_data, efd_node);
4620 if (can_merge(entry, new_entry) &&
4621 ext4_journal_callback_try_del(handle, &entry->efd_jce)) {
4622 new_entry->efd_start_cluster = entry->efd_start_cluster;
4623 new_entry->efd_count += entry->efd_count;
4624 rb_erase(node, &(db->bb_free_root));
4625 kmem_cache_free(ext4_free_data_cachep, entry);
4626 }
4627 }
4628
4629 node = rb_next(new_node);
4630 if (node) {
4631 entry = rb_entry(node, struct ext4_free_data, efd_node);
4632 if (can_merge(new_entry, entry) &&
4633 ext4_journal_callback_try_del(handle, &entry->efd_jce)) {
4634 new_entry->efd_count += entry->efd_count;
4635 rb_erase(node, &(db->bb_free_root));
4636 kmem_cache_free(ext4_free_data_cachep, entry);
4637 }
4638 }
4639
4640 ext4_journal_callback_add(handle, ext4_free_data_callback,
4641 &new_entry->efd_jce);
4642 return 0;
4643}
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653void ext4_free_blocks(handle_t *handle, struct inode *inode,
4654 struct buffer_head *bh, ext4_fsblk_t block,
4655 unsigned long count, int flags)
4656{
4657 struct buffer_head *bitmap_bh = NULL;
4658 struct super_block *sb = inode->i_sb;
4659 struct ext4_group_desc *gdp;
4660 unsigned int overflow;
4661 ext4_grpblk_t bit;
4662 struct buffer_head *gd_bh;
4663 ext4_group_t block_group;
4664 struct ext4_sb_info *sbi;
4665 struct ext4_buddy e4b;
4666 unsigned int count_clusters;
4667 int err = 0;
4668 int ret;
4669
4670 might_sleep();
4671 if (bh) {
4672 if (block)
4673 BUG_ON(block != bh->b_blocknr);
4674 else
4675 block = bh->b_blocknr;
4676 }
4677
4678 sbi = EXT4_SB(sb);
4679 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
4680 !ext4_data_block_valid(sbi, block, count)) {
4681 ext4_error(sb, "Freeing blocks not in datazone - "
4682 "block = %llu, count = %lu", block, count);
4683 goto error_return;
4684 }
4685
4686 ext4_debug("freeing block %llu\n", block);
4687 trace_ext4_free_blocks(inode, block, count, flags);
4688
4689 if (flags & EXT4_FREE_BLOCKS_FORGET) {
4690 struct buffer_head *tbh = bh;
4691 int i;
4692
4693 BUG_ON(bh && (count > 1));
4694
4695 for (i = 0; i < count; i++) {
4696 cond_resched();
4697 if (!bh)
4698 tbh = sb_find_get_block(inode->i_sb,
4699 block + i);
4700 if (!tbh)
4701 continue;
4702 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
4703 inode, tbh, block + i);
4704 }
4705 }
4706
4707
4708
4709
4710
4711
4712
4713
4714 if (!ext4_should_writeback_data(inode))
4715 flags |= EXT4_FREE_BLOCKS_METADATA;
4716
4717
4718
4719
4720
4721
4722
4723
4724 overflow = EXT4_PBLK_COFF(sbi, block);
4725 if (overflow) {
4726 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
4727 overflow = sbi->s_cluster_ratio - overflow;
4728 block += overflow;
4729 if (count > overflow)
4730 count -= overflow;
4731 else
4732 return;
4733 } else {
4734 block -= overflow;
4735 count += overflow;
4736 }
4737 }
4738 overflow = EXT4_LBLK_COFF(sbi, count);
4739 if (overflow) {
4740 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
4741 if (count > overflow)
4742 count -= overflow;
4743 else
4744 return;
4745 } else
4746 count += sbi->s_cluster_ratio - overflow;
4747 }
4748
4749do_more:
4750 overflow = 0;
4751 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4752
4753 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
4754 ext4_get_group_info(sb, block_group))))
4755 return;
4756
4757
4758
4759
4760
4761 if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4762 overflow = EXT4_C2B(sbi, bit) + count -
4763 EXT4_BLOCKS_PER_GROUP(sb);
4764 count -= overflow;
4765 }
4766 count_clusters = EXT4_NUM_B2C(sbi, count);
4767 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4768 if (!bitmap_bh) {
4769 err = -EIO;
4770 goto error_return;
4771 }
4772 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
4773 if (!gdp) {
4774 err = -EIO;
4775 goto error_return;
4776 }
4777
4778 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4779 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
4780 in_range(block, ext4_inode_table(sb, gdp),
4781 EXT4_SB(sb)->s_itb_per_group) ||
4782 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4783 EXT4_SB(sb)->s_itb_per_group)) {
4784
4785 ext4_error(sb, "Freeing blocks in system zone - "
4786 "Block = %llu, count = %lu", block, count);
4787
4788 goto error_return;
4789 }
4790
4791 BUFFER_TRACE(bitmap_bh, "getting write access");
4792 err = ext4_journal_get_write_access(handle, bitmap_bh);
4793 if (err)
4794 goto error_return;
4795
4796
4797
4798
4799
4800
4801 BUFFER_TRACE(gd_bh, "get_write_access");
4802 err = ext4_journal_get_write_access(handle, gd_bh);
4803 if (err)
4804 goto error_return;
4805#ifdef AGGRESSIVE_CHECK
4806 {
4807 int i;
4808 for (i = 0; i < count_clusters; i++)
4809 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4810 }
4811#endif
4812 trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
4813
4814 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4815 if (err)
4816 goto error_return;
4817
4818 if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
4819 struct ext4_free_data *new_entry;
4820
4821
4822
4823
4824
4825
4826
4827 new_entry = kmem_cache_alloc(ext4_free_data_cachep,
4828 GFP_NOFS|__GFP_NOFAIL);
4829 new_entry->efd_start_cluster = bit;
4830 new_entry->efd_group = block_group;
4831 new_entry->efd_count = count_clusters;
4832 new_entry->efd_tid = handle->h_transaction->t_tid;
4833
4834 ext4_lock_group(sb, block_group);
4835 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4836 ext4_mb_free_metadata(handle, &e4b, new_entry);
4837 } else {
4838
4839
4840
4841
4842 if (test_opt(sb, DISCARD)) {
4843 err = ext4_issue_discard(sb, block_group, bit, count);
4844 if (err && err != -EOPNOTSUPP)
4845 ext4_msg(sb, KERN_WARNING, "discard request in"
4846 " group:%d block:%d count:%lu failed"
4847 " with %d", block_group, bit, count,
4848 err);
4849 } else
4850 EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
4851
4852 ext4_lock_group(sb, block_group);
4853 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4854 mb_free_blocks(inode, &e4b, bit, count_clusters);
4855 }
4856
4857 ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
4858 ext4_free_group_clusters_set(sb, gdp, ret);
4859 ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
4860 ext4_group_desc_csum_set(sb, block_group, gdp);
4861 ext4_unlock_group(sb, block_group);
4862
4863 if (sbi->s_log_groups_per_flex) {
4864 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4865 atomic64_add(count_clusters,
4866 &sbi->s_flex_groups[flex_group].free_clusters);
4867 }
4868
4869 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4870 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
4871 percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
4872
4873 ext4_mb_unload_buddy(&e4b);
4874
4875
4876 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4877 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
4878
4879
4880 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
4881 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
4882 if (!err)
4883 err = ret;
4884
4885 if (overflow && !err) {
4886 block += count;
4887 count = overflow;
4888 put_bh(bitmap_bh);
4889 goto do_more;
4890 }
4891error_return:
4892 brelse(bitmap_bh);
4893 ext4_std_error(sb, err);
4894 return;
4895}
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
4907 ext4_fsblk_t block, unsigned long count)
4908{
4909 struct buffer_head *bitmap_bh = NULL;
4910 struct buffer_head *gd_bh;
4911 ext4_group_t block_group;
4912 ext4_grpblk_t bit;
4913 unsigned int i;
4914 struct ext4_group_desc *desc;
4915 struct ext4_sb_info *sbi = EXT4_SB(sb);
4916 struct ext4_buddy e4b;
4917 int err = 0, ret, blk_free_count;
4918 ext4_grpblk_t blocks_freed;
4919
4920 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
4921
4922 if (count == 0)
4923 return 0;
4924
4925 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4926
4927
4928
4929
4930 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4931 ext4_warning(sb, "too much blocks added to group %u\n",
4932 block_group);
4933 err = -EINVAL;
4934 goto error_return;
4935 }
4936
4937 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4938 if (!bitmap_bh) {
4939 err = -EIO;
4940 goto error_return;
4941 }
4942
4943 desc = ext4_get_group_desc(sb, block_group, &gd_bh);
4944 if (!desc) {
4945 err = -EIO;
4946 goto error_return;
4947 }
4948
4949 if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
4950 in_range(ext4_inode_bitmap(sb, desc), block, count) ||
4951 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
4952 in_range(block + count - 1, ext4_inode_table(sb, desc),
4953 sbi->s_itb_per_group)) {
4954 ext4_error(sb, "Adding blocks in system zones - "
4955 "Block = %llu, count = %lu",
4956 block, count);
4957 err = -EINVAL;
4958 goto error_return;
4959 }
4960
4961 BUFFER_TRACE(bitmap_bh, "getting write access");
4962 err = ext4_journal_get_write_access(handle, bitmap_bh);
4963 if (err)
4964 goto error_return;
4965
4966
4967
4968
4969
4970
4971 BUFFER_TRACE(gd_bh, "get_write_access");
4972 err = ext4_journal_get_write_access(handle, gd_bh);
4973 if (err)
4974 goto error_return;
4975
4976 for (i = 0, blocks_freed = 0; i < count; i++) {
4977 BUFFER_TRACE(bitmap_bh, "clear bit");
4978 if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
4979 ext4_error(sb, "bit already cleared for block %llu",
4980 (ext4_fsblk_t)(block + i));
4981 BUFFER_TRACE(bitmap_bh, "bit already cleared");
4982 } else {
4983 blocks_freed++;
4984 }
4985 }
4986
4987 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4988 if (err)
4989 goto error_return;
4990
4991
4992
4993
4994
4995
4996 ext4_lock_group(sb, block_group);
4997 mb_clear_bits(bitmap_bh->b_data, bit, count);
4998 mb_free_blocks(NULL, &e4b, bit, count);
4999 blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
5000 ext4_free_group_clusters_set(sb, desc, blk_free_count);
5001 ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
5002 ext4_group_desc_csum_set(sb, block_group, desc);
5003 ext4_unlock_group(sb, block_group);
5004 percpu_counter_add(&sbi->s_freeclusters_counter,
5005 EXT4_NUM_B2C(sbi, blocks_freed));
5006
5007 if (sbi->s_log_groups_per_flex) {
5008 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
5009 atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed),
5010 &sbi->s_flex_groups[flex_group].free_clusters);
5011 }
5012
5013 ext4_mb_unload_buddy(&e4b);
5014
5015
5016 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
5017 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
5018
5019
5020 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
5021 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
5022 if (!err)
5023 err = ret;
5024
5025error_return:
5026 brelse(bitmap_bh);
5027 ext4_std_error(sb, err);
5028 return err;
5029}
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043static int ext4_trim_extent(struct super_block *sb, int start, int count,
5044 ext4_group_t group, struct ext4_buddy *e4b)
5045__releases(bitlock)
5046__acquires(bitlock)
5047{
5048 struct ext4_free_extent ex;
5049 int ret = 0;
5050
5051 trace_ext4_trim_extent(sb, group, start, count);
5052
5053 assert_spin_locked(ext4_group_lock_ptr(sb, group));
5054
5055 ex.fe_start = start;
5056 ex.fe_group = group;
5057 ex.fe_len = count;
5058
5059
5060
5061
5062
5063 mb_mark_used(e4b, &ex);
5064 ext4_unlock_group(sb, group);
5065 ret = ext4_issue_discard(sb, group, start, count);
5066 ext4_lock_group(sb, group);
5067 mb_free_blocks(NULL, e4b, start, ex.fe_len);
5068 return ret;
5069}
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089static ext4_grpblk_t
5090ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
5091 ext4_grpblk_t start, ext4_grpblk_t max,
5092 ext4_grpblk_t minblocks)
5093{
5094 void *bitmap;
5095 ext4_grpblk_t next, count = 0, free_count = 0;
5096 struct ext4_buddy e4b;
5097 int ret = 0;
5098
5099 trace_ext4_trim_all_free(sb, group, start, max);
5100
5101 ret = ext4_mb_load_buddy(sb, group, &e4b);
5102 if (ret) {
5103 ext4_error(sb, "Error in loading buddy "
5104 "information for %u", group);
5105 return ret;
5106 }
5107 bitmap = e4b.bd_bitmap;
5108
5109 ext4_lock_group(sb, group);
5110 if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
5111 minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
5112 goto out;
5113
5114 start = (e4b.bd_info->bb_first_free > start) ?
5115 e4b.bd_info->bb_first_free : start;
5116
5117 while (start <= max) {
5118 start = mb_find_next_zero_bit(bitmap, max + 1, start);
5119 if (start > max)
5120 break;
5121 next = mb_find_next_bit(bitmap, max + 1, start);
5122
5123 if ((next - start) >= minblocks) {
5124 ret = ext4_trim_extent(sb, start,
5125 next - start, group, &e4b);
5126 if (ret && ret != -EOPNOTSUPP)
5127 break;
5128 ret = 0;
5129 count += next - start;
5130 }
5131 free_count += next - start;
5132 start = next + 1;
5133
5134 if (fatal_signal_pending(current)) {
5135 count = -ERESTARTSYS;
5136 break;
5137 }
5138
5139 if (need_resched()) {
5140 ext4_unlock_group(sb, group);
5141 cond_resched();
5142 ext4_lock_group(sb, group);
5143 }
5144
5145 if ((e4b.bd_info->bb_free - free_count) < minblocks)
5146 break;
5147 }
5148
5149 if (!ret) {
5150 ret = count;
5151 EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
5152 }
5153out:
5154 ext4_unlock_group(sb, group);
5155 ext4_mb_unload_buddy(&e4b);
5156
5157 ext4_debug("trimmed %d blocks in the group %d\n",
5158 count, group);
5159
5160 return ret;
5161}
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
5176{
5177 struct ext4_group_info *grp;
5178 ext4_group_t group, first_group, last_group;
5179 ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
5180 uint64_t start, end, minlen, trimmed = 0;
5181 ext4_fsblk_t first_data_blk =
5182 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
5183 ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
5184 int ret = 0;
5185
5186 start = range->start >> sb->s_blocksize_bits;
5187 end = start + (range->len >> sb->s_blocksize_bits) - 1;
5188 minlen = EXT4_NUM_B2C(EXT4_SB(sb),
5189 range->minlen >> sb->s_blocksize_bits);
5190
5191 if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) ||
5192 start >= max_blks ||
5193 range->len < sb->s_blocksize)
5194 return -EINVAL;
5195 if (end >= max_blks)
5196 end = max_blks - 1;
5197 if (end <= first_data_blk)
5198 goto out;
5199 if (start < first_data_blk)
5200 start = first_data_blk;
5201
5202
5203 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
5204 &first_group, &first_cluster);
5205 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end,
5206 &last_group, &last_cluster);
5207
5208
5209 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
5210
5211 for (group = first_group; group <= last_group; group++) {
5212 grp = ext4_get_group_info(sb, group);
5213
5214 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
5215 ret = ext4_mb_init_group(sb, group);
5216 if (ret)
5217 break;
5218 }
5219
5220
5221
5222
5223
5224
5225
5226 if (group == last_group)
5227 end = last_cluster;
5228
5229 if (grp->bb_free >= minlen) {
5230 cnt = ext4_trim_all_free(sb, group, first_cluster,
5231 end, minlen);
5232 if (cnt < 0) {
5233 ret = cnt;
5234 break;
5235 }
5236 trimmed += cnt;
5237 }
5238
5239
5240
5241
5242
5243 first_cluster = 0;
5244 }
5245
5246 if (!ret)
5247 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
5248
5249out:
5250 range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
5251 return ret;
5252}
5253