1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include "mballoc.h"
25#include <linux/debugfs.h>
26#include <linux/slab.h>
27#include <trace/events/ext4.h>
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339static struct kmem_cache *ext4_pspace_cachep;
340static struct kmem_cache *ext4_ac_cachep;
341static struct kmem_cache *ext4_free_ext_cachep;
342
343
344
345
346#define NR_GRPINFO_CACHES 8
347static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
348
349static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
350 "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
351 "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
352 "ext4_groupinfo_64k", "ext4_groupinfo_128k"
353};
354
355static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
356 ext4_group_t group);
357static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
358 ext4_group_t group);
359static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
360
361static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
362{
363#if BITS_PER_LONG == 64
364 *bit += ((unsigned long) addr & 7UL) << 3;
365 addr = (void *) ((unsigned long) addr & ~7UL);
366#elif BITS_PER_LONG == 32
367 *bit += ((unsigned long) addr & 3UL) << 3;
368 addr = (void *) ((unsigned long) addr & ~3UL);
369#else
370#error "how many bits you are?!"
371#endif
372 return addr;
373}
374
375static inline int mb_test_bit(int bit, void *addr)
376{
377
378
379
380
381 addr = mb_correct_addr_and_bit(&bit, addr);
382 return ext4_test_bit(bit, addr);
383}
384
385static inline void mb_set_bit(int bit, void *addr)
386{
387 addr = mb_correct_addr_and_bit(&bit, addr);
388 ext4_set_bit(bit, addr);
389}
390
391static inline void mb_clear_bit(int bit, void *addr)
392{
393 addr = mb_correct_addr_and_bit(&bit, addr);
394 ext4_clear_bit(bit, addr);
395}
396
397static inline int mb_find_next_zero_bit(void *addr, int max, int start)
398{
399 int fix = 0, ret, tmpmax;
400 addr = mb_correct_addr_and_bit(&fix, addr);
401 tmpmax = max + fix;
402 start += fix;
403
404 ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
405 if (ret > max)
406 return max;
407 return ret;
408}
409
410static inline int mb_find_next_bit(void *addr, int max, int start)
411{
412 int fix = 0, ret, tmpmax;
413 addr = mb_correct_addr_and_bit(&fix, addr);
414 tmpmax = max + fix;
415 start += fix;
416
417 ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
418 if (ret > max)
419 return max;
420 return ret;
421}
422
423static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
424{
425 char *bb;
426
427 BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
428 BUG_ON(max == NULL);
429
430 if (order > e4b->bd_blkbits + 1) {
431 *max = 0;
432 return NULL;
433 }
434
435
436 if (order == 0) {
437 *max = 1 << (e4b->bd_blkbits + 3);
438 return EXT4_MB_BITMAP(e4b);
439 }
440
441 bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
442 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
443
444 return bb;
445}
446
447#ifdef DOUBLE_CHECK
448static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
449 int first, int count)
450{
451 int i;
452 struct super_block *sb = e4b->bd_sb;
453
454 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
455 return;
456 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
457 for (i = 0; i < count; i++) {
458 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
459 ext4_fsblk_t blocknr;
460
461 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
462 blocknr += first + i;
463 ext4_grp_locked_error(sb, e4b->bd_group,
464 inode ? inode->i_ino : 0,
465 blocknr,
466 "freeing block already freed "
467 "(bit %u)",
468 first + i);
469 }
470 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
471 }
472}
473
474static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
475{
476 int i;
477
478 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
479 return;
480 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
481 for (i = 0; i < count; i++) {
482 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
483 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
484 }
485}
486
487static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
488{
489 if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
490 unsigned char *b1, *b2;
491 int i;
492 b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
493 b2 = (unsigned char *) bitmap;
494 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
495 if (b1[i] != b2[i]) {
496 ext4_msg(e4b->bd_sb, KERN_ERR,
497 "corruption in group %u "
498 "at byte %u(%u): %x in copy != %x "
499 "on disk/prealloc",
500 e4b->bd_group, i, i * 8, b1[i], b2[i]);
501 BUG();
502 }
503 }
504 }
505}
506
507#else
508static inline void mb_free_blocks_double(struct inode *inode,
509 struct ext4_buddy *e4b, int first, int count)
510{
511 return;
512}
513static inline void mb_mark_used_double(struct ext4_buddy *e4b,
514 int first, int count)
515{
516 return;
517}
518static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
519{
520 return;
521}
522#endif
523
524#ifdef AGGRESSIVE_CHECK
525
526#define MB_CHECK_ASSERT(assert) \
527do { \
528 if (!(assert)) { \
529 printk(KERN_EMERG \
530 "Assertion failure in %s() at %s:%d: \"%s\"\n", \
531 function, file, line, # assert); \
532 BUG(); \
533 } \
534} while (0)
535
536static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
537 const char *function, int line)
538{
539 struct super_block *sb = e4b->bd_sb;
540 int order = e4b->bd_blkbits + 1;
541 int max;
542 int max2;
543 int i;
544 int j;
545 int k;
546 int count;
547 struct ext4_group_info *grp;
548 int fragments = 0;
549 int fstart;
550 struct list_head *cur;
551 void *buddy;
552 void *buddy2;
553
554 {
555 static int mb_check_counter;
556 if (mb_check_counter++ % 100 != 0)
557 return 0;
558 }
559
560 while (order > 1) {
561 buddy = mb_find_buddy(e4b, order, &max);
562 MB_CHECK_ASSERT(buddy);
563 buddy2 = mb_find_buddy(e4b, order - 1, &max2);
564 MB_CHECK_ASSERT(buddy2);
565 MB_CHECK_ASSERT(buddy != buddy2);
566 MB_CHECK_ASSERT(max * 2 == max2);
567
568 count = 0;
569 for (i = 0; i < max; i++) {
570
571 if (mb_test_bit(i, buddy)) {
572
573 if (!mb_test_bit(i << 1, buddy2)) {
574 MB_CHECK_ASSERT(
575 mb_test_bit((i<<1)+1, buddy2));
576 } else if (!mb_test_bit((i << 1) + 1, buddy2)) {
577 MB_CHECK_ASSERT(
578 mb_test_bit(i << 1, buddy2));
579 }
580 continue;
581 }
582
583
584 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
585 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
586
587 for (j = 0; j < (1 << order); j++) {
588 k = (i * (1 << order)) + j;
589 MB_CHECK_ASSERT(
590 !mb_test_bit(k, EXT4_MB_BITMAP(e4b)));
591 }
592 count++;
593 }
594 MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
595 order--;
596 }
597
598 fstart = -1;
599 buddy = mb_find_buddy(e4b, 0, &max);
600 for (i = 0; i < max; i++) {
601 if (!mb_test_bit(i, buddy)) {
602 MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
603 if (fstart == -1) {
604 fragments++;
605 fstart = i;
606 }
607 continue;
608 }
609 fstart = -1;
610
611 for (j = 0; j < e4b->bd_blkbits + 1; j++) {
612 buddy2 = mb_find_buddy(e4b, j, &max2);
613 k = i >> j;
614 MB_CHECK_ASSERT(k < max2);
615 MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
616 }
617 }
618 MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
619 MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
620
621 grp = ext4_get_group_info(sb, e4b->bd_group);
622 list_for_each(cur, &grp->bb_prealloc_list) {
623 ext4_group_t groupnr;
624 struct ext4_prealloc_space *pa;
625 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
626 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
627 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
628 for (i = 0; i < pa->pa_len; i++)
629 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
630 }
631 return 0;
632}
633#undef MB_CHECK_ASSERT
634#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
635 __FILE__, __func__, __LINE__)
636#else
637#define mb_check_buddy(e4b)
638#endif
639
640
641
642
643
644
645
646static void ext4_mb_mark_free_simple(struct super_block *sb,
647 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
648 struct ext4_group_info *grp)
649{
650 struct ext4_sb_info *sbi = EXT4_SB(sb);
651 ext4_grpblk_t min;
652 ext4_grpblk_t max;
653 ext4_grpblk_t chunk;
654 unsigned short border;
655
656 BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb));
657
658 border = 2 << sb->s_blocksize_bits;
659
660 while (len > 0) {
661
662 max = ffs(first | border) - 1;
663
664
665 min = fls(len) - 1;
666
667 if (max < min)
668 min = max;
669 chunk = 1 << min;
670
671
672 grp->bb_counters[min]++;
673 if (min > 0)
674 mb_clear_bit(first >> min,
675 buddy + sbi->s_mb_offsets[min]);
676
677 len -= chunk;
678 first += chunk;
679 }
680}
681
682
683
684
685
686static void
687mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
688{
689 int i;
690 int bits;
691
692 grp->bb_largest_free_order = -1;
693
694 bits = sb->s_blocksize_bits + 1;
695 for (i = bits; i >= 0; i--) {
696 if (grp->bb_counters[i] > 0) {
697 grp->bb_largest_free_order = i;
698 break;
699 }
700 }
701}
702
703static noinline_for_stack
704void ext4_mb_generate_buddy(struct super_block *sb,
705 void *buddy, void *bitmap, ext4_group_t group)
706{
707 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
708 ext4_grpblk_t max = EXT4_BLOCKS_PER_GROUP(sb);
709 ext4_grpblk_t i = 0;
710 ext4_grpblk_t first;
711 ext4_grpblk_t len;
712 unsigned free = 0;
713 unsigned fragments = 0;
714 unsigned long long period = get_cycles();
715
716
717
718 i = mb_find_next_zero_bit(bitmap, max, 0);
719 grp->bb_first_free = i;
720 while (i < max) {
721 fragments++;
722 first = i;
723 i = mb_find_next_bit(bitmap, max, i);
724 len = i - first;
725 free += len;
726 if (len > 1)
727 ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
728 else
729 grp->bb_counters[0]++;
730 if (i < max)
731 i = mb_find_next_zero_bit(bitmap, max, i);
732 }
733 grp->bb_fragments = fragments;
734
735 if (free != grp->bb_free) {
736 ext4_grp_locked_error(sb, group, 0, 0,
737 "%u blocks in bitmap, %u in gd",
738 free, grp->bb_free);
739
740
741
742
743 grp->bb_free = free;
744 }
745 mb_set_largest_free_order(sb, grp);
746
747 clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
748
749 period = get_cycles() - period;
750 spin_lock(&EXT4_SB(sb)->s_bal_lock);
751 EXT4_SB(sb)->s_mb_buddies_generated++;
752 EXT4_SB(sb)->s_mb_generation_time += period;
753 spin_unlock(&EXT4_SB(sb)->s_bal_lock);
754}
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776static int ext4_mb_init_cache(struct page *page, char *incore)
777{
778 ext4_group_t ngroups;
779 int blocksize;
780 int blocks_per_page;
781 int groups_per_page;
782 int err = 0;
783 int i;
784 ext4_group_t first_group;
785 int first_block;
786 struct super_block *sb;
787 struct buffer_head *bhs;
788 struct buffer_head **bh;
789 struct inode *inode;
790 char *data;
791 char *bitmap;
792 struct ext4_group_info *grinfo;
793
794 mb_debug(1, "init page %lu\n", page->index);
795
796 inode = page->mapping->host;
797 sb = inode->i_sb;
798 ngroups = ext4_get_groups_count(sb);
799 blocksize = 1 << inode->i_blkbits;
800 blocks_per_page = PAGE_CACHE_SIZE / blocksize;
801
802 groups_per_page = blocks_per_page >> 1;
803 if (groups_per_page == 0)
804 groups_per_page = 1;
805
806
807 if (groups_per_page > 1) {
808 err = -ENOMEM;
809 i = sizeof(struct buffer_head *) * groups_per_page;
810 bh = kzalloc(i, GFP_NOFS);
811 if (bh == NULL)
812 goto out;
813 } else
814 bh = &bhs;
815
816 first_group = page->index * blocks_per_page / 2;
817
818
819 for (i = 0; i < groups_per_page; i++) {
820 struct ext4_group_desc *desc;
821
822 if (first_group + i >= ngroups)
823 break;
824
825 grinfo = ext4_get_group_info(sb, first_group + i);
826
827
828
829
830
831
832 if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
833 bh[i] = NULL;
834 continue;
835 }
836
837 err = -EIO;
838 desc = ext4_get_group_desc(sb, first_group + i, NULL);
839 if (desc == NULL)
840 goto out;
841
842 err = -ENOMEM;
843 bh[i] = sb_getblk(sb, ext4_block_bitmap(sb, desc));
844 if (bh[i] == NULL)
845 goto out;
846
847 if (bitmap_uptodate(bh[i]))
848 continue;
849
850 lock_buffer(bh[i]);
851 if (bitmap_uptodate(bh[i])) {
852 unlock_buffer(bh[i]);
853 continue;
854 }
855 ext4_lock_group(sb, first_group + i);
856 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
857 ext4_init_block_bitmap(sb, bh[i],
858 first_group + i, desc);
859 set_bitmap_uptodate(bh[i]);
860 set_buffer_uptodate(bh[i]);
861 ext4_unlock_group(sb, first_group + i);
862 unlock_buffer(bh[i]);
863 continue;
864 }
865 ext4_unlock_group(sb, first_group + i);
866 if (buffer_uptodate(bh[i])) {
867
868
869
870
871 set_bitmap_uptodate(bh[i]);
872 unlock_buffer(bh[i]);
873 continue;
874 }
875 get_bh(bh[i]);
876
877
878
879
880
881
882 set_bitmap_uptodate(bh[i]);
883 bh[i]->b_end_io = end_buffer_read_sync;
884 submit_bh(READ, bh[i]);
885 mb_debug(1, "read bitmap for group %u\n", first_group + i);
886 }
887
888
889 for (i = 0; i < groups_per_page; i++)
890 if (bh[i])
891 wait_on_buffer(bh[i]);
892
893 err = -EIO;
894 for (i = 0; i < groups_per_page; i++)
895 if (bh[i] && !buffer_uptodate(bh[i]))
896 goto out;
897
898 err = 0;
899 first_block = page->index * blocks_per_page;
900 for (i = 0; i < blocks_per_page; i++) {
901 int group;
902
903 group = (first_block + i) >> 1;
904 if (group >= ngroups)
905 break;
906
907 if (!bh[group - first_group])
908
909 continue;
910
911
912
913
914
915
916
917 data = page_address(page) + (i * blocksize);
918 bitmap = bh[group - first_group]->b_data;
919
920
921
922
923
924 if ((first_block + i) & 1) {
925
926 BUG_ON(incore == NULL);
927 mb_debug(1, "put buddy for group %u in page %lu/%x\n",
928 group, page->index, i * blocksize);
929 trace_ext4_mb_buddy_bitmap_load(sb, group);
930 grinfo = ext4_get_group_info(sb, group);
931 grinfo->bb_fragments = 0;
932 memset(grinfo->bb_counters, 0,
933 sizeof(*grinfo->bb_counters) *
934 (sb->s_blocksize_bits+2));
935
936
937
938 ext4_lock_group(sb, group);
939
940 memset(data, 0xff, blocksize);
941 ext4_mb_generate_buddy(sb, data, incore, group);
942 ext4_unlock_group(sb, group);
943 incore = NULL;
944 } else {
945
946 BUG_ON(incore != NULL);
947 mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
948 group, page->index, i * blocksize);
949 trace_ext4_mb_bitmap_load(sb, group);
950
951
952 ext4_lock_group(sb, group);
953 memcpy(data, bitmap, blocksize);
954
955
956 ext4_mb_generate_from_pa(sb, data, group);
957 ext4_mb_generate_from_freelist(sb, data, group);
958 ext4_unlock_group(sb, group);
959
960
961
962
963 incore = data;
964 }
965 }
966 SetPageUptodate(page);
967
968out:
969 if (bh) {
970 for (i = 0; i < groups_per_page; i++)
971 brelse(bh[i]);
972 if (bh != &bhs)
973 kfree(bh);
974 }
975 return err;
976}
977
978
979
980
981
982
983
984static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
985 ext4_group_t group, struct ext4_buddy *e4b)
986{
987 struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
988 int block, pnum, poff;
989 int blocks_per_page;
990 struct page *page;
991
992 e4b->bd_buddy_page = NULL;
993 e4b->bd_bitmap_page = NULL;
994
995 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
996
997
998
999
1000
1001 block = group * 2;
1002 pnum = block / blocks_per_page;
1003 poff = block % blocks_per_page;
1004 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1005 if (!page)
1006 return -EIO;
1007 BUG_ON(page->mapping != inode->i_mapping);
1008 e4b->bd_bitmap_page = page;
1009 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1010
1011 if (blocks_per_page >= 2) {
1012
1013 return 0;
1014 }
1015
1016 block++;
1017 pnum = block / blocks_per_page;
1018 poff = block % blocks_per_page;
1019 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1020 if (!page)
1021 return -EIO;
1022 BUG_ON(page->mapping != inode->i_mapping);
1023 e4b->bd_buddy_page = page;
1024 return 0;
1025}
1026
1027static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
1028{
1029 if (e4b->bd_bitmap_page) {
1030 unlock_page(e4b->bd_bitmap_page);
1031 page_cache_release(e4b->bd_bitmap_page);
1032 }
1033 if (e4b->bd_buddy_page) {
1034 unlock_page(e4b->bd_buddy_page);
1035 page_cache_release(e4b->bd_buddy_page);
1036 }
1037}
1038
1039
1040
1041
1042
1043
1044static noinline_for_stack
1045int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
1046{
1047
1048 struct ext4_group_info *this_grp;
1049 struct ext4_buddy e4b;
1050 struct page *page;
1051 int ret = 0;
1052
1053 mb_debug(1, "init group %u\n", group);
1054 this_grp = ext4_get_group_info(sb, group);
1055
1056
1057
1058
1059
1060
1061
1062 ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
1063 if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
1064
1065
1066
1067
1068 goto err;
1069 }
1070
1071 page = e4b.bd_bitmap_page;
1072 ret = ext4_mb_init_cache(page, NULL);
1073 if (ret)
1074 goto err;
1075 if (!PageUptodate(page)) {
1076 ret = -EIO;
1077 goto err;
1078 }
1079 mark_page_accessed(page);
1080
1081 if (e4b.bd_buddy_page == NULL) {
1082
1083
1084
1085
1086
1087 ret = 0;
1088 goto err;
1089 }
1090
1091 page = e4b.bd_buddy_page;
1092 ret = ext4_mb_init_cache(page, e4b.bd_bitmap);
1093 if (ret)
1094 goto err;
1095 if (!PageUptodate(page)) {
1096 ret = -EIO;
1097 goto err;
1098 }
1099 mark_page_accessed(page);
1100err:
1101 ext4_mb_put_buddy_page_lock(&e4b);
1102 return ret;
1103}
1104
1105
1106
1107
1108
1109
1110static noinline_for_stack int
1111ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1112 struct ext4_buddy *e4b)
1113{
1114 int blocks_per_page;
1115 int block;
1116 int pnum;
1117 int poff;
1118 struct page *page;
1119 int ret;
1120 struct ext4_group_info *grp;
1121 struct ext4_sb_info *sbi = EXT4_SB(sb);
1122 struct inode *inode = sbi->s_buddy_cache;
1123
1124 mb_debug(1, "load group %u\n", group);
1125
1126 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1127 grp = ext4_get_group_info(sb, group);
1128
1129 e4b->bd_blkbits = sb->s_blocksize_bits;
1130 e4b->bd_info = grp;
1131 e4b->bd_sb = sb;
1132 e4b->bd_group = group;
1133 e4b->bd_buddy_page = NULL;
1134 e4b->bd_bitmap_page = NULL;
1135
1136 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1137
1138
1139
1140
1141 ret = ext4_mb_init_group(sb, group);
1142 if (ret)
1143 return ret;
1144 }
1145
1146
1147
1148
1149
1150
1151 block = group * 2;
1152 pnum = block / blocks_per_page;
1153 poff = block % blocks_per_page;
1154
1155
1156
1157 page = find_get_page(inode->i_mapping, pnum);
1158 if (page == NULL || !PageUptodate(page)) {
1159 if (page)
1160
1161
1162
1163
1164
1165
1166
1167
1168 page_cache_release(page);
1169 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1170 if (page) {
1171 BUG_ON(page->mapping != inode->i_mapping);
1172 if (!PageUptodate(page)) {
1173 ret = ext4_mb_init_cache(page, NULL);
1174 if (ret) {
1175 unlock_page(page);
1176 goto err;
1177 }
1178 mb_cmp_bitmaps(e4b, page_address(page) +
1179 (poff * sb->s_blocksize));
1180 }
1181 unlock_page(page);
1182 }
1183 }
1184 if (page == NULL || !PageUptodate(page)) {
1185 ret = -EIO;
1186 goto err;
1187 }
1188 e4b->bd_bitmap_page = page;
1189 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1190 mark_page_accessed(page);
1191
1192 block++;
1193 pnum = block / blocks_per_page;
1194 poff = block % blocks_per_page;
1195
1196 page = find_get_page(inode->i_mapping, pnum);
1197 if (page == NULL || !PageUptodate(page)) {
1198 if (page)
1199 page_cache_release(page);
1200 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1201 if (page) {
1202 BUG_ON(page->mapping != inode->i_mapping);
1203 if (!PageUptodate(page)) {
1204 ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
1205 if (ret) {
1206 unlock_page(page);
1207 goto err;
1208 }
1209 }
1210 unlock_page(page);
1211 }
1212 }
1213 if (page == NULL || !PageUptodate(page)) {
1214 ret = -EIO;
1215 goto err;
1216 }
1217 e4b->bd_buddy_page = page;
1218 e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
1219 mark_page_accessed(page);
1220
1221 BUG_ON(e4b->bd_bitmap_page == NULL);
1222 BUG_ON(e4b->bd_buddy_page == NULL);
1223
1224 return 0;
1225
1226err:
1227 if (page)
1228 page_cache_release(page);
1229 if (e4b->bd_bitmap_page)
1230 page_cache_release(e4b->bd_bitmap_page);
1231 if (e4b->bd_buddy_page)
1232 page_cache_release(e4b->bd_buddy_page);
1233 e4b->bd_buddy = NULL;
1234 e4b->bd_bitmap = NULL;
1235 return ret;
1236}
1237
1238static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
1239{
1240 if (e4b->bd_bitmap_page)
1241 page_cache_release(e4b->bd_bitmap_page);
1242 if (e4b->bd_buddy_page)
1243 page_cache_release(e4b->bd_buddy_page);
1244}
1245
1246
1247static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1248{
1249 int order = 1;
1250 void *bb;
1251
1252 BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
1253 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
1254
1255 bb = EXT4_MB_BUDDY(e4b);
1256 while (order <= e4b->bd_blkbits + 1) {
1257 block = block >> 1;
1258 if (!mb_test_bit(block, bb)) {
1259
1260 return order;
1261 }
1262 bb += 1 << (e4b->bd_blkbits - order);
1263 order++;
1264 }
1265 return 0;
1266}
1267
1268static void mb_clear_bits(void *bm, int cur, int len)
1269{
1270 __u32 *addr;
1271
1272 len = cur + len;
1273 while (cur < len) {
1274 if ((cur & 31) == 0 && (len - cur) >= 32) {
1275
1276 addr = bm + (cur >> 3);
1277 *addr = 0;
1278 cur += 32;
1279 continue;
1280 }
1281 mb_clear_bit(cur, bm);
1282 cur++;
1283 }
1284}
1285
1286void ext4_set_bits(void *bm, int cur, int len)
1287{
1288 __u32 *addr;
1289
1290 len = cur + len;
1291 while (cur < len) {
1292 if ((cur & 31) == 0 && (len - cur) >= 32) {
1293
1294 addr = bm + (cur >> 3);
1295 *addr = 0xffffffff;
1296 cur += 32;
1297 continue;
1298 }
1299 mb_set_bit(cur, bm);
1300 cur++;
1301 }
1302}
1303
1304static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1305 int first, int count)
1306{
1307 int block = 0;
1308 int max = 0;
1309 int order;
1310 void *buddy;
1311 void *buddy2;
1312 struct super_block *sb = e4b->bd_sb;
1313
1314 BUG_ON(first + count > (sb->s_blocksize << 3));
1315 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1316 mb_check_buddy(e4b);
1317 mb_free_blocks_double(inode, e4b, first, count);
1318
1319 e4b->bd_info->bb_free += count;
1320 if (first < e4b->bd_info->bb_first_free)
1321 e4b->bd_info->bb_first_free = first;
1322
1323
1324 if (first != 0)
1325 block = !mb_test_bit(first - 1, EXT4_MB_BITMAP(e4b));
1326 if (first + count < EXT4_SB(sb)->s_mb_maxs[0])
1327 max = !mb_test_bit(first + count, EXT4_MB_BITMAP(e4b));
1328 if (block && max)
1329 e4b->bd_info->bb_fragments--;
1330 else if (!block && !max)
1331 e4b->bd_info->bb_fragments++;
1332
1333
1334 while (count-- > 0) {
1335 block = first++;
1336 order = 0;
1337
1338 if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) {
1339 ext4_fsblk_t blocknr;
1340
1341 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1342 blocknr += block;
1343 ext4_grp_locked_error(sb, e4b->bd_group,
1344 inode ? inode->i_ino : 0,
1345 blocknr,
1346 "freeing already freed block "
1347 "(bit %u)", block);
1348 }
1349 mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
1350 e4b->bd_info->bb_counters[order]++;
1351
1352
1353 buddy = mb_find_buddy(e4b, order, &max);
1354
1355 do {
1356 block &= ~1UL;
1357 if (mb_test_bit(block, buddy) ||
1358 mb_test_bit(block + 1, buddy))
1359 break;
1360
1361
1362 buddy2 = mb_find_buddy(e4b, order + 1, &max);
1363
1364 if (!buddy2)
1365 break;
1366
1367 if (order > 0) {
1368
1369
1370 mb_set_bit(block, buddy);
1371 mb_set_bit(block + 1, buddy);
1372 }
1373 e4b->bd_info->bb_counters[order]--;
1374 e4b->bd_info->bb_counters[order]--;
1375
1376 block = block >> 1;
1377 order++;
1378 e4b->bd_info->bb_counters[order]++;
1379
1380 mb_clear_bit(block, buddy2);
1381 buddy = buddy2;
1382 } while (1);
1383 }
1384 mb_set_largest_free_order(sb, e4b->bd_info);
1385 mb_check_buddy(e4b);
1386}
1387
1388static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
1389 int needed, struct ext4_free_extent *ex)
1390{
1391 int next = block;
1392 int max;
1393 int ord;
1394 void *buddy;
1395
1396 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1397 BUG_ON(ex == NULL);
1398
1399 buddy = mb_find_buddy(e4b, order, &max);
1400 BUG_ON(buddy == NULL);
1401 BUG_ON(block >= max);
1402 if (mb_test_bit(block, buddy)) {
1403 ex->fe_len = 0;
1404 ex->fe_start = 0;
1405 ex->fe_group = 0;
1406 return 0;
1407 }
1408
1409
1410 if (likely(order == 0)) {
1411
1412 order = mb_find_order_for_block(e4b, block);
1413 block = block >> order;
1414 }
1415
1416 ex->fe_len = 1 << order;
1417 ex->fe_start = block << order;
1418 ex->fe_group = e4b->bd_group;
1419
1420
1421 next = next - ex->fe_start;
1422 ex->fe_len -= next;
1423 ex->fe_start += next;
1424
1425 while (needed > ex->fe_len &&
1426 (buddy = mb_find_buddy(e4b, order, &max))) {
1427
1428 if (block + 1 >= max)
1429 break;
1430
1431 next = (block + 1) * (1 << order);
1432 if (mb_test_bit(next, EXT4_MB_BITMAP(e4b)))
1433 break;
1434
1435 ord = mb_find_order_for_block(e4b, next);
1436
1437 order = ord;
1438 block = next >> order;
1439 ex->fe_len += 1 << order;
1440 }
1441
1442 BUG_ON(ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3)));
1443 return ex->fe_len;
1444}
1445
1446static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1447{
1448 int ord;
1449 int mlen = 0;
1450 int max = 0;
1451 int cur;
1452 int start = ex->fe_start;
1453 int len = ex->fe_len;
1454 unsigned ret = 0;
1455 int len0 = len;
1456 void *buddy;
1457
1458 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
1459 BUG_ON(e4b->bd_group != ex->fe_group);
1460 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1461 mb_check_buddy(e4b);
1462 mb_mark_used_double(e4b, start, len);
1463
1464 e4b->bd_info->bb_free -= len;
1465 if (e4b->bd_info->bb_first_free == start)
1466 e4b->bd_info->bb_first_free += len;
1467
1468
1469 if (start != 0)
1470 mlen = !mb_test_bit(start - 1, EXT4_MB_BITMAP(e4b));
1471 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
1472 max = !mb_test_bit(start + len, EXT4_MB_BITMAP(e4b));
1473 if (mlen && max)
1474 e4b->bd_info->bb_fragments++;
1475 else if (!mlen && !max)
1476 e4b->bd_info->bb_fragments--;
1477
1478
1479 while (len) {
1480 ord = mb_find_order_for_block(e4b, start);
1481
1482 if (((start >> ord) << ord) == start && len >= (1 << ord)) {
1483
1484 mlen = 1 << ord;
1485 buddy = mb_find_buddy(e4b, ord, &max);
1486 BUG_ON((start >> ord) >= max);
1487 mb_set_bit(start >> ord, buddy);
1488 e4b->bd_info->bb_counters[ord]--;
1489 start += mlen;
1490 len -= mlen;
1491 BUG_ON(len < 0);
1492 continue;
1493 }
1494
1495
1496 if (ret == 0)
1497 ret = len | (ord << 16);
1498
1499
1500 BUG_ON(ord <= 0);
1501 buddy = mb_find_buddy(e4b, ord, &max);
1502 mb_set_bit(start >> ord, buddy);
1503 e4b->bd_info->bb_counters[ord]--;
1504
1505 ord--;
1506 cur = (start >> ord) & ~1U;
1507 buddy = mb_find_buddy(e4b, ord, &max);
1508 mb_clear_bit(cur, buddy);
1509 mb_clear_bit(cur + 1, buddy);
1510 e4b->bd_info->bb_counters[ord]++;
1511 e4b->bd_info->bb_counters[ord]++;
1512 }
1513 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
1514
1515 ext4_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
1516 mb_check_buddy(e4b);
1517
1518 return ret;
1519}
1520
1521
1522
1523
1524static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
1525 struct ext4_buddy *e4b)
1526{
1527 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1528 int ret;
1529
1530 BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
1531 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1532
1533 ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
1534 ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
1535 ret = mb_mark_used(e4b, &ac->ac_b_ex);
1536
1537
1538
1539 ac->ac_f_ex = ac->ac_b_ex;
1540
1541 ac->ac_status = AC_STATUS_FOUND;
1542 ac->ac_tail = ret & 0xffff;
1543 ac->ac_buddy = ret >> 16;
1544
1545
1546
1547
1548
1549
1550
1551
1552 ac->ac_bitmap_page = e4b->bd_bitmap_page;
1553 get_page(ac->ac_bitmap_page);
1554 ac->ac_buddy_page = e4b->bd_buddy_page;
1555 get_page(ac->ac_buddy_page);
1556
1557 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
1558 spin_lock(&sbi->s_md_lock);
1559 sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
1560 sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
1561 spin_unlock(&sbi->s_md_lock);
1562 }
1563}
1564
1565
1566
1567
1568
1569static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
1570 struct ext4_buddy *e4b,
1571 int finish_group)
1572{
1573 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1574 struct ext4_free_extent *bex = &ac->ac_b_ex;
1575 struct ext4_free_extent *gex = &ac->ac_g_ex;
1576 struct ext4_free_extent ex;
1577 int max;
1578
1579 if (ac->ac_status == AC_STATUS_FOUND)
1580 return;
1581
1582
1583
1584 if (ac->ac_found > sbi->s_mb_max_to_scan &&
1585 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1586 ac->ac_status = AC_STATUS_BREAK;
1587 return;
1588 }
1589
1590
1591
1592
1593 if (bex->fe_len < gex->fe_len)
1594 return;
1595
1596 if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
1597 && bex->fe_group == e4b->bd_group) {
1598
1599
1600
1601 max = mb_find_extent(e4b, 0, bex->fe_start, gex->fe_len, &ex);
1602 if (max >= gex->fe_len) {
1603 ext4_mb_use_best_found(ac, e4b);
1604 return;
1605 }
1606 }
1607}
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
1620 struct ext4_free_extent *ex,
1621 struct ext4_buddy *e4b)
1622{
1623 struct ext4_free_extent *bex = &ac->ac_b_ex;
1624 struct ext4_free_extent *gex = &ac->ac_g_ex;
1625
1626 BUG_ON(ex->fe_len <= 0);
1627 BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
1628 BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
1629 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
1630
1631 ac->ac_found++;
1632
1633
1634
1635
1636 if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1637 *bex = *ex;
1638 ext4_mb_use_best_found(ac, e4b);
1639 return;
1640 }
1641
1642
1643
1644
1645 if (ex->fe_len == gex->fe_len) {
1646 *bex = *ex;
1647 ext4_mb_use_best_found(ac, e4b);
1648 return;
1649 }
1650
1651
1652
1653
1654 if (bex->fe_len == 0) {
1655 *bex = *ex;
1656 return;
1657 }
1658
1659
1660
1661
1662 if (bex->fe_len < gex->fe_len) {
1663
1664
1665 if (ex->fe_len > bex->fe_len)
1666 *bex = *ex;
1667 } else if (ex->fe_len > gex->fe_len) {
1668
1669
1670
1671 if (ex->fe_len < bex->fe_len)
1672 *bex = *ex;
1673 }
1674
1675 ext4_mb_check_limits(ac, e4b, 0);
1676}
1677
1678static noinline_for_stack
1679int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
1680 struct ext4_buddy *e4b)
1681{
1682 struct ext4_free_extent ex = ac->ac_b_ex;
1683 ext4_group_t group = ex.fe_group;
1684 int max;
1685 int err;
1686
1687 BUG_ON(ex.fe_len <= 0);
1688 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1689 if (err)
1690 return err;
1691
1692 ext4_lock_group(ac->ac_sb, group);
1693 max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex);
1694
1695 if (max > 0) {
1696 ac->ac_b_ex = ex;
1697 ext4_mb_use_best_found(ac, e4b);
1698 }
1699
1700 ext4_unlock_group(ac->ac_sb, group);
1701 ext4_mb_unload_buddy(e4b);
1702
1703 return 0;
1704}
1705
1706static noinline_for_stack
1707int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1708 struct ext4_buddy *e4b)
1709{
1710 ext4_group_t group = ac->ac_g_ex.fe_group;
1711 int max;
1712 int err;
1713 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1714 struct ext4_free_extent ex;
1715
1716 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
1717 return 0;
1718
1719 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1720 if (err)
1721 return err;
1722
1723 ext4_lock_group(ac->ac_sb, group);
1724 max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start,
1725 ac->ac_g_ex.fe_len, &ex);
1726
1727 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
1728 ext4_fsblk_t start;
1729
1730 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
1731 ex.fe_start;
1732
1733 if (do_div(start, sbi->s_stripe) == 0) {
1734 ac->ac_found++;
1735 ac->ac_b_ex = ex;
1736 ext4_mb_use_best_found(ac, e4b);
1737 }
1738 } else if (max >= ac->ac_g_ex.fe_len) {
1739 BUG_ON(ex.fe_len <= 0);
1740 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1741 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1742 ac->ac_found++;
1743 ac->ac_b_ex = ex;
1744 ext4_mb_use_best_found(ac, e4b);
1745 } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
1746
1747
1748 BUG_ON(ex.fe_len <= 0);
1749 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1750 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1751 ac->ac_found++;
1752 ac->ac_b_ex = ex;
1753 ext4_mb_use_best_found(ac, e4b);
1754 }
1755 ext4_unlock_group(ac->ac_sb, group);
1756 ext4_mb_unload_buddy(e4b);
1757
1758 return 0;
1759}
1760
1761
1762
1763
1764
1765static noinline_for_stack
1766void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
1767 struct ext4_buddy *e4b)
1768{
1769 struct super_block *sb = ac->ac_sb;
1770 struct ext4_group_info *grp = e4b->bd_info;
1771 void *buddy;
1772 int i;
1773 int k;
1774 int max;
1775
1776 BUG_ON(ac->ac_2order <= 0);
1777 for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
1778 if (grp->bb_counters[i] == 0)
1779 continue;
1780
1781 buddy = mb_find_buddy(e4b, i, &max);
1782 BUG_ON(buddy == NULL);
1783
1784 k = mb_find_next_zero_bit(buddy, max, 0);
1785 BUG_ON(k >= max);
1786
1787 ac->ac_found++;
1788
1789 ac->ac_b_ex.fe_len = 1 << i;
1790 ac->ac_b_ex.fe_start = k << i;
1791 ac->ac_b_ex.fe_group = e4b->bd_group;
1792
1793 ext4_mb_use_best_found(ac, e4b);
1794
1795 BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len);
1796
1797 if (EXT4_SB(sb)->s_mb_stats)
1798 atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
1799
1800 break;
1801 }
1802}
1803
1804
1805
1806
1807
1808
1809static noinline_for_stack
1810void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1811 struct ext4_buddy *e4b)
1812{
1813 struct super_block *sb = ac->ac_sb;
1814 void *bitmap = EXT4_MB_BITMAP(e4b);
1815 struct ext4_free_extent ex;
1816 int i;
1817 int free;
1818
1819 free = e4b->bd_info->bb_free;
1820 BUG_ON(free <= 0);
1821
1822 i = e4b->bd_info->bb_first_free;
1823
1824 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
1825 i = mb_find_next_zero_bit(bitmap,
1826 EXT4_BLOCKS_PER_GROUP(sb), i);
1827 if (i >= EXT4_BLOCKS_PER_GROUP(sb)) {
1828
1829
1830
1831
1832
1833 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1834 "%d free blocks as per "
1835 "group info. But bitmap says 0",
1836 free);
1837 break;
1838 }
1839
1840 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
1841 BUG_ON(ex.fe_len <= 0);
1842 if (free < ex.fe_len) {
1843 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1844 "%d free blocks as per "
1845 "group info. But got %d blocks",
1846 free, ex.fe_len);
1847
1848
1849
1850
1851
1852 break;
1853 }
1854
1855 ext4_mb_measure_extent(ac, &ex, e4b);
1856
1857 i += ex.fe_len;
1858 free -= ex.fe_len;
1859 }
1860
1861 ext4_mb_check_limits(ac, e4b, 1);
1862}
1863
1864
1865
1866
1867
1868static noinline_for_stack
1869void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1870 struct ext4_buddy *e4b)
1871{
1872 struct super_block *sb = ac->ac_sb;
1873 struct ext4_sb_info *sbi = EXT4_SB(sb);
1874 void *bitmap = EXT4_MB_BITMAP(e4b);
1875 struct ext4_free_extent ex;
1876 ext4_fsblk_t first_group_block;
1877 ext4_fsblk_t a;
1878 ext4_grpblk_t i;
1879 int max;
1880
1881 BUG_ON(sbi->s_stripe == 0);
1882
1883
1884 first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
1885
1886 a = first_group_block + sbi->s_stripe - 1;
1887 do_div(a, sbi->s_stripe);
1888 i = (a * sbi->s_stripe) - first_group_block;
1889
1890 while (i < EXT4_BLOCKS_PER_GROUP(sb)) {
1891 if (!mb_test_bit(i, bitmap)) {
1892 max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
1893 if (max >= sbi->s_stripe) {
1894 ac->ac_found++;
1895 ac->ac_b_ex = ex;
1896 ext4_mb_use_best_found(ac, e4b);
1897 break;
1898 }
1899 }
1900 i += sbi->s_stripe;
1901 }
1902}
1903
1904
1905static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1906 ext4_group_t group, int cr)
1907{
1908 unsigned free, fragments;
1909 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
1910 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1911
1912 BUG_ON(cr < 0 || cr >= 4);
1913
1914
1915 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1916 int ret = ext4_mb_init_group(ac->ac_sb, group);
1917 if (ret)
1918 return 0;
1919 }
1920
1921 free = grp->bb_free;
1922 fragments = grp->bb_fragments;
1923 if (free == 0)
1924 return 0;
1925 if (fragments == 0)
1926 return 0;
1927
1928 switch (cr) {
1929 case 0:
1930 BUG_ON(ac->ac_2order == 0);
1931
1932 if (grp->bb_largest_free_order < ac->ac_2order)
1933 return 0;
1934
1935
1936 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
1937 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
1938 ((group % flex_size) == 0))
1939 return 0;
1940
1941 return 1;
1942 case 1:
1943 if ((free / fragments) >= ac->ac_g_ex.fe_len)
1944 return 1;
1945 break;
1946 case 2:
1947 if (free >= ac->ac_g_ex.fe_len)
1948 return 1;
1949 break;
1950 case 3:
1951 return 1;
1952 default:
1953 BUG();
1954 }
1955
1956 return 0;
1957}
1958
1959static noinline_for_stack int
1960ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1961{
1962 ext4_group_t ngroups, group, i;
1963 int cr;
1964 int err = 0;
1965 struct ext4_sb_info *sbi;
1966 struct super_block *sb;
1967 struct ext4_buddy e4b;
1968
1969 sb = ac->ac_sb;
1970 sbi = EXT4_SB(sb);
1971 ngroups = ext4_get_groups_count(sb);
1972
1973 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
1974 ngroups = sbi->s_blockfile_groups;
1975
1976 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1977
1978
1979 err = ext4_mb_find_by_goal(ac, &e4b);
1980 if (err || ac->ac_status == AC_STATUS_FOUND)
1981 goto out;
1982
1983 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
1984 goto out;
1985
1986
1987
1988
1989
1990
1991 i = fls(ac->ac_g_ex.fe_len);
1992 ac->ac_2order = 0;
1993
1994
1995
1996
1997
1998 if (i >= sbi->s_mb_order2_reqs) {
1999
2000
2001
2002 if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
2003 ac->ac_2order = i - 1;
2004 }
2005
2006
2007 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
2008
2009 spin_lock(&sbi->s_md_lock);
2010 ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
2011 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
2012 spin_unlock(&sbi->s_md_lock);
2013 }
2014
2015
2016 cr = ac->ac_2order ? 0 : 1;
2017
2018
2019
2020
2021repeat:
2022 for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
2023 ac->ac_criteria = cr;
2024
2025
2026
2027
2028 group = ac->ac_g_ex.fe_group;
2029
2030 for (i = 0; i < ngroups; group++, i++) {
2031 if (group == ngroups)
2032 group = 0;
2033
2034
2035 if (!ext4_mb_good_group(ac, group, cr))
2036 continue;
2037
2038 err = ext4_mb_load_buddy(sb, group, &e4b);
2039 if (err)
2040 goto out;
2041
2042 ext4_lock_group(sb, group);
2043
2044
2045
2046
2047
2048 if (!ext4_mb_good_group(ac, group, cr)) {
2049 ext4_unlock_group(sb, group);
2050 ext4_mb_unload_buddy(&e4b);
2051 continue;
2052 }
2053
2054 ac->ac_groups_scanned++;
2055 if (cr == 0)
2056 ext4_mb_simple_scan_group(ac, &e4b);
2057 else if (cr == 1 && sbi->s_stripe &&
2058 !(ac->ac_g_ex.fe_len % sbi->s_stripe))
2059 ext4_mb_scan_aligned(ac, &e4b);
2060 else
2061 ext4_mb_complex_scan_group(ac, &e4b);
2062
2063 ext4_unlock_group(sb, group);
2064 ext4_mb_unload_buddy(&e4b);
2065
2066 if (ac->ac_status != AC_STATUS_CONTINUE)
2067 break;
2068 }
2069 }
2070
2071 if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
2072 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2073
2074
2075
2076
2077
2078 ext4_mb_try_best_found(ac, &e4b);
2079 if (ac->ac_status != AC_STATUS_FOUND) {
2080
2081
2082
2083
2084
2085
2086 ac->ac_b_ex.fe_group = 0;
2087 ac->ac_b_ex.fe_start = 0;
2088 ac->ac_b_ex.fe_len = 0;
2089 ac->ac_status = AC_STATUS_CONTINUE;
2090 ac->ac_flags |= EXT4_MB_HINT_FIRST;
2091 cr = 3;
2092 atomic_inc(&sbi->s_mb_lost_chunks);
2093 goto repeat;
2094 }
2095 }
2096out:
2097 return err;
2098}
2099
2100static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2101{
2102 struct super_block *sb = seq->private;
2103 ext4_group_t group;
2104
2105 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2106 return NULL;
2107 group = *pos + 1;
2108 return (void *) ((unsigned long) group);
2109}
2110
2111static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2112{
2113 struct super_block *sb = seq->private;
2114 ext4_group_t group;
2115
2116 ++*pos;
2117 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2118 return NULL;
2119 group = *pos + 1;
2120 return (void *) ((unsigned long) group);
2121}
2122
2123static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2124{
2125 struct super_block *sb = seq->private;
2126 ext4_group_t group = (ext4_group_t) ((unsigned long) v);
2127 int i;
2128 int err;
2129 struct ext4_buddy e4b;
2130 struct sg {
2131 struct ext4_group_info info;
2132 ext4_grpblk_t counters[16];
2133 } sg;
2134
2135 group--;
2136 if (group == 0)
2137 seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
2138 "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
2139 "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
2140 "group", "free", "frags", "first",
2141 "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
2142 "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
2143
2144 i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
2145 sizeof(struct ext4_group_info);
2146 err = ext4_mb_load_buddy(sb, group, &e4b);
2147 if (err) {
2148 seq_printf(seq, "#%-5u: I/O error\n", group);
2149 return 0;
2150 }
2151 ext4_lock_group(sb, group);
2152 memcpy(&sg, ext4_get_group_info(sb, group), i);
2153 ext4_unlock_group(sb, group);
2154 ext4_mb_unload_buddy(&e4b);
2155
2156 seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
2157 sg.info.bb_fragments, sg.info.bb_first_free);
2158 for (i = 0; i <= 13; i++)
2159 seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
2160 sg.info.bb_counters[i] : 0);
2161 seq_printf(seq, " ]\n");
2162
2163 return 0;
2164}
2165
2166static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
2167{
2168}
2169
2170static const struct seq_operations ext4_mb_seq_groups_ops = {
2171 .start = ext4_mb_seq_groups_start,
2172 .next = ext4_mb_seq_groups_next,
2173 .stop = ext4_mb_seq_groups_stop,
2174 .show = ext4_mb_seq_groups_show,
2175};
2176
2177static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
2178{
2179 struct super_block *sb = PDE(inode)->data;
2180 int rc;
2181
2182 rc = seq_open(file, &ext4_mb_seq_groups_ops);
2183 if (rc == 0) {
2184 struct seq_file *m = file->private_data;
2185 m->private = sb;
2186 }
2187 return rc;
2188
2189}
2190
2191static const struct file_operations ext4_mb_seq_groups_fops = {
2192 .owner = THIS_MODULE,
2193 .open = ext4_mb_seq_groups_open,
2194 .read = seq_read,
2195 .llseek = seq_lseek,
2196 .release = seq_release,
2197};
2198
2199static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
2200{
2201 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2202 struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
2203
2204 BUG_ON(!cachep);
2205 return cachep;
2206}
2207
2208
2209int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2210 struct ext4_group_desc *desc)
2211{
2212 int i;
2213 int metalen = 0;
2214 struct ext4_sb_info *sbi = EXT4_SB(sb);
2215 struct ext4_group_info **meta_group_info;
2216 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2217
2218
2219
2220
2221
2222
2223 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2224 metalen = sizeof(*meta_group_info) <<
2225 EXT4_DESC_PER_BLOCK_BITS(sb);
2226 meta_group_info = kmalloc(metalen, GFP_KERNEL);
2227 if (meta_group_info == NULL) {
2228 ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate mem "
2229 "for a buddy group");
2230 goto exit_meta_group_info;
2231 }
2232 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
2233 meta_group_info;
2234 }
2235
2236 meta_group_info =
2237 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2238 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2239
2240 meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
2241 if (meta_group_info[i] == NULL) {
2242 ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate buddy mem");
2243 goto exit_group_info;
2244 }
2245 memset(meta_group_info[i], 0, kmem_cache_size(cachep));
2246 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2247 &(meta_group_info[i]->bb_state));
2248
2249
2250
2251
2252
2253 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2254 meta_group_info[i]->bb_free =
2255 ext4_free_blocks_after_init(sb, group, desc);
2256 } else {
2257 meta_group_info[i]->bb_free =
2258 ext4_free_blks_count(sb, desc);
2259 }
2260
2261 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2262 init_rwsem(&meta_group_info[i]->alloc_sem);
2263 meta_group_info[i]->bb_free_root = RB_ROOT;
2264 meta_group_info[i]->bb_largest_free_order = -1;
2265
2266#ifdef DOUBLE_CHECK
2267 {
2268 struct buffer_head *bh;
2269 meta_group_info[i]->bb_bitmap =
2270 kmalloc(sb->s_blocksize, GFP_KERNEL);
2271 BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
2272 bh = ext4_read_block_bitmap(sb, group);
2273 BUG_ON(bh == NULL);
2274 memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
2275 sb->s_blocksize);
2276 put_bh(bh);
2277 }
2278#endif
2279
2280 return 0;
2281
2282exit_group_info:
2283
2284 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2285 kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
2286 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL;
2287 }
2288exit_meta_group_info:
2289 return -ENOMEM;
2290}
2291
2292static int ext4_mb_init_backend(struct super_block *sb)
2293{
2294 ext4_group_t ngroups = ext4_get_groups_count(sb);
2295 ext4_group_t i;
2296 struct ext4_sb_info *sbi = EXT4_SB(sb);
2297 struct ext4_super_block *es = sbi->s_es;
2298 int num_meta_group_infos;
2299 int num_meta_group_infos_max;
2300 int array_size;
2301 struct ext4_group_desc *desc;
2302 struct kmem_cache *cachep;
2303
2304
2305 num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
2306 1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319 num_meta_group_infos_max = num_meta_group_infos +
2320 le16_to_cpu(es->s_reserved_gdt_blocks);
2321
2322
2323
2324
2325
2326
2327
2328 array_size = 1;
2329 while (array_size < sizeof(*sbi->s_group_info) *
2330 num_meta_group_infos_max)
2331 array_size = array_size << 1;
2332
2333
2334
2335 sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL);
2336 if (sbi->s_group_info == NULL) {
2337 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
2338 return -ENOMEM;
2339 }
2340 sbi->s_buddy_cache = new_inode(sb);
2341 if (sbi->s_buddy_cache == NULL) {
2342 ext4_msg(sb, KERN_ERR, "can't get new inode");
2343 goto err_freesgi;
2344 }
2345
2346
2347
2348
2349 sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
2350 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
2351 for (i = 0; i < ngroups; i++) {
2352 desc = ext4_get_group_desc(sb, i, NULL);
2353 if (desc == NULL) {
2354 ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
2355 goto err_freebuddy;
2356 }
2357 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
2358 goto err_freebuddy;
2359 }
2360
2361 return 0;
2362
2363err_freebuddy:
2364 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2365 while (i-- > 0)
2366 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
2367 i = num_meta_group_infos;
2368 while (i-- > 0)
2369 kfree(sbi->s_group_info[i]);
2370 iput(sbi->s_buddy_cache);
2371err_freesgi:
2372 ext4_kvfree(sbi->s_group_info);
2373 return -ENOMEM;
2374}
2375
2376static void ext4_groupinfo_destroy_slabs(void)
2377{
2378 int i;
2379
2380 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2381 if (ext4_groupinfo_caches[i])
2382 kmem_cache_destroy(ext4_groupinfo_caches[i]);
2383 ext4_groupinfo_caches[i] = NULL;
2384 }
2385}
2386
2387static int ext4_groupinfo_create_slab(size_t size)
2388{
2389 static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
2390 int slab_size;
2391 int blocksize_bits = order_base_2(size);
2392 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2393 struct kmem_cache *cachep;
2394
2395 if (cache_index >= NR_GRPINFO_CACHES)
2396 return -EINVAL;
2397
2398 if (unlikely(cache_index < 0))
2399 cache_index = 0;
2400
2401 mutex_lock(&ext4_grpinfo_slab_create_mutex);
2402 if (ext4_groupinfo_caches[cache_index]) {
2403 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2404 return 0;
2405 }
2406
2407 slab_size = offsetof(struct ext4_group_info,
2408 bb_counters[blocksize_bits + 2]);
2409
2410 cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
2411 slab_size, 0, SLAB_RECLAIM_ACCOUNT,
2412 NULL);
2413
2414 ext4_groupinfo_caches[cache_index] = cachep;
2415
2416 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2417 if (!cachep) {
2418 printk(KERN_EMERG
2419 "EXT4-fs: no memory for groupinfo slab cache\n");
2420 return -ENOMEM;
2421 }
2422
2423 return 0;
2424}
2425
2426int ext4_mb_init(struct super_block *sb, int needs_recovery)
2427{
2428 struct ext4_sb_info *sbi = EXT4_SB(sb);
2429 unsigned i, j;
2430 unsigned offset;
2431 unsigned max;
2432 int ret;
2433
2434 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2435
2436 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
2437 if (sbi->s_mb_offsets == NULL) {
2438 ret = -ENOMEM;
2439 goto out;
2440 }
2441
2442 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
2443 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2444 if (sbi->s_mb_maxs == NULL) {
2445 ret = -ENOMEM;
2446 goto out;
2447 }
2448
2449 ret = ext4_groupinfo_create_slab(sb->s_blocksize);
2450 if (ret < 0)
2451 goto out;
2452
2453
2454 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
2455 sbi->s_mb_offsets[0] = 0;
2456
2457 i = 1;
2458 offset = 0;
2459 max = sb->s_blocksize << 2;
2460 do {
2461 sbi->s_mb_offsets[i] = offset;
2462 sbi->s_mb_maxs[i] = max;
2463 offset += 1 << (sb->s_blocksize_bits - i);
2464 max = max >> 1;
2465 i++;
2466 } while (i <= sb->s_blocksize_bits + 1);
2467
2468 spin_lock_init(&sbi->s_md_lock);
2469 spin_lock_init(&sbi->s_bal_lock);
2470
2471 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
2472 sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
2473 sbi->s_mb_stats = MB_DEFAULT_STATS;
2474 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
2475 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
2476 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
2477
2478
2479
2480
2481
2482
2483
2484
2485 if (sbi->s_stripe > 1) {
2486 sbi->s_mb_group_prealloc = roundup(
2487 sbi->s_mb_group_prealloc, sbi->s_stripe);
2488 }
2489
2490 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2491 if (sbi->s_locality_groups == NULL) {
2492 ret = -ENOMEM;
2493 goto out;
2494 }
2495 for_each_possible_cpu(i) {
2496 struct ext4_locality_group *lg;
2497 lg = per_cpu_ptr(sbi->s_locality_groups, i);
2498 mutex_init(&lg->lg_mutex);
2499 for (j = 0; j < PREALLOC_TB_SIZE; j++)
2500 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
2501 spin_lock_init(&lg->lg_prealloc_lock);
2502 }
2503
2504
2505 ret = ext4_mb_init_backend(sb);
2506 if (ret != 0) {
2507 goto out;
2508 }
2509
2510 if (sbi->s_proc)
2511 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2512 &ext4_mb_seq_groups_fops, sb);
2513
2514 if (sbi->s_journal)
2515 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
2516out:
2517 if (ret) {
2518 kfree(sbi->s_mb_offsets);
2519 kfree(sbi->s_mb_maxs);
2520 }
2521 return ret;
2522}
2523
2524
2525static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2526{
2527 struct ext4_prealloc_space *pa;
2528 struct list_head *cur, *tmp;
2529 int count = 0;
2530
2531 list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
2532 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
2533 list_del(&pa->pa_group_list);
2534 count++;
2535 kmem_cache_free(ext4_pspace_cachep, pa);
2536 }
2537 if (count)
2538 mb_debug(1, "mballoc: %u PAs left\n", count);
2539
2540}
2541
2542int ext4_mb_release(struct super_block *sb)
2543{
2544 ext4_group_t ngroups = ext4_get_groups_count(sb);
2545 ext4_group_t i;
2546 int num_meta_group_infos;
2547 struct ext4_group_info *grinfo;
2548 struct ext4_sb_info *sbi = EXT4_SB(sb);
2549 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2550
2551 if (sbi->s_group_info) {
2552 for (i = 0; i < ngroups; i++) {
2553 grinfo = ext4_get_group_info(sb, i);
2554#ifdef DOUBLE_CHECK
2555 kfree(grinfo->bb_bitmap);
2556#endif
2557 ext4_lock_group(sb, i);
2558 ext4_mb_cleanup_pa(grinfo);
2559 ext4_unlock_group(sb, i);
2560 kmem_cache_free(cachep, grinfo);
2561 }
2562 num_meta_group_infos = (ngroups +
2563 EXT4_DESC_PER_BLOCK(sb) - 1) >>
2564 EXT4_DESC_PER_BLOCK_BITS(sb);
2565 for (i = 0; i < num_meta_group_infos; i++)
2566 kfree(sbi->s_group_info[i]);
2567 ext4_kvfree(sbi->s_group_info);
2568 }
2569 kfree(sbi->s_mb_offsets);
2570 kfree(sbi->s_mb_maxs);
2571 if (sbi->s_buddy_cache)
2572 iput(sbi->s_buddy_cache);
2573 if (sbi->s_mb_stats) {
2574 ext4_msg(sb, KERN_INFO,
2575 "mballoc: %u blocks %u reqs (%u success)",
2576 atomic_read(&sbi->s_bal_allocated),
2577 atomic_read(&sbi->s_bal_reqs),
2578 atomic_read(&sbi->s_bal_success));
2579 ext4_msg(sb, KERN_INFO,
2580 "mballoc: %u extents scanned, %u goal hits, "
2581 "%u 2^N hits, %u breaks, %u lost",
2582 atomic_read(&sbi->s_bal_ex_scanned),
2583 atomic_read(&sbi->s_bal_goals),
2584 atomic_read(&sbi->s_bal_2orders),
2585 atomic_read(&sbi->s_bal_breaks),
2586 atomic_read(&sbi->s_mb_lost_chunks));
2587 ext4_msg(sb, KERN_INFO,
2588 "mballoc: %lu generated and it took %Lu",
2589 sbi->s_mb_buddies_generated,
2590 sbi->s_mb_generation_time);
2591 ext4_msg(sb, KERN_INFO,
2592 "mballoc: %u preallocated, %u discarded",
2593 atomic_read(&sbi->s_mb_preallocated),
2594 atomic_read(&sbi->s_mb_discarded));
2595 }
2596
2597 free_percpu(sbi->s_locality_groups);
2598 if (sbi->s_proc)
2599 remove_proc_entry("mb_groups", sbi->s_proc);
2600
2601 return 0;
2602}
2603
2604static inline int ext4_issue_discard(struct super_block *sb,
2605 ext4_group_t block_group, ext4_grpblk_t block, int count)
2606{
2607 ext4_fsblk_t discard_block;
2608
2609 discard_block = block + ext4_group_first_block_no(sb, block_group);
2610 trace_ext4_discard_blocks(sb,
2611 (unsigned long long) discard_block, count);
2612 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
2613}
2614
2615
2616
2617
2618
2619static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2620{
2621 struct super_block *sb = journal->j_private;
2622 struct ext4_buddy e4b;
2623 struct ext4_group_info *db;
2624 int err, count = 0, count2 = 0;
2625 struct ext4_free_data *entry;
2626 struct list_head *l, *ltmp;
2627
2628 list_for_each_safe(l, ltmp, &txn->t_private_list) {
2629 entry = list_entry(l, struct ext4_free_data, list);
2630
2631 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2632 entry->count, entry->group, entry);
2633
2634 if (test_opt(sb, DISCARD))
2635 ext4_issue_discard(sb, entry->group,
2636 entry->start_blk, entry->count);
2637
2638 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2639
2640 BUG_ON(err != 0);
2641
2642 db = e4b.bd_info;
2643
2644 count += entry->count;
2645 count2++;
2646 ext4_lock_group(sb, entry->group);
2647
2648 rb_erase(&entry->node, &(db->bb_free_root));
2649 mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
2650
2651
2652
2653
2654
2655
2656
2657 if (!test_opt(sb, DISCARD))
2658 EXT4_MB_GRP_CLEAR_TRIMMED(db);
2659
2660 if (!db->bb_free_root.rb_node) {
2661
2662
2663
2664 page_cache_release(e4b.bd_buddy_page);
2665 page_cache_release(e4b.bd_bitmap_page);
2666 }
2667 ext4_unlock_group(sb, entry->group);
2668 kmem_cache_free(ext4_free_ext_cachep, entry);
2669 ext4_mb_unload_buddy(&e4b);
2670 }
2671
2672 mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
2673}
2674
2675#ifdef CONFIG_EXT4_DEBUG
2676u8 mb_enable_debug __read_mostly;
2677
2678static struct dentry *debugfs_dir;
2679static struct dentry *debugfs_debug;
2680
2681static void __init ext4_create_debugfs_entry(void)
2682{
2683 debugfs_dir = debugfs_create_dir("ext4", NULL);
2684 if (debugfs_dir)
2685 debugfs_debug = debugfs_create_u8("mballoc-debug",
2686 S_IRUGO | S_IWUSR,
2687 debugfs_dir,
2688 &mb_enable_debug);
2689}
2690
2691static void ext4_remove_debugfs_entry(void)
2692{
2693 debugfs_remove(debugfs_debug);
2694 debugfs_remove(debugfs_dir);
2695}
2696
2697#else
2698
2699static void __init ext4_create_debugfs_entry(void)
2700{
2701}
2702
2703static void ext4_remove_debugfs_entry(void)
2704{
2705}
2706
2707#endif
2708
2709int __init ext4_init_mballoc(void)
2710{
2711 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
2712 SLAB_RECLAIM_ACCOUNT);
2713 if (ext4_pspace_cachep == NULL)
2714 return -ENOMEM;
2715
2716 ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
2717 SLAB_RECLAIM_ACCOUNT);
2718 if (ext4_ac_cachep == NULL) {
2719 kmem_cache_destroy(ext4_pspace_cachep);
2720 return -ENOMEM;
2721 }
2722
2723 ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data,
2724 SLAB_RECLAIM_ACCOUNT);
2725 if (ext4_free_ext_cachep == NULL) {
2726 kmem_cache_destroy(ext4_pspace_cachep);
2727 kmem_cache_destroy(ext4_ac_cachep);
2728 return -ENOMEM;
2729 }
2730 ext4_create_debugfs_entry();
2731 return 0;
2732}
2733
2734void ext4_exit_mballoc(void)
2735{
2736
2737
2738
2739
2740 rcu_barrier();
2741 kmem_cache_destroy(ext4_pspace_cachep);
2742 kmem_cache_destroy(ext4_ac_cachep);
2743 kmem_cache_destroy(ext4_free_ext_cachep);
2744 ext4_groupinfo_destroy_slabs();
2745 ext4_remove_debugfs_entry();
2746}
2747
2748
2749
2750
2751
2752
2753static noinline_for_stack int
2754ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2755 handle_t *handle, unsigned int reserv_blks)
2756{
2757 struct buffer_head *bitmap_bh = NULL;
2758 struct ext4_group_desc *gdp;
2759 struct buffer_head *gdp_bh;
2760 struct ext4_sb_info *sbi;
2761 struct super_block *sb;
2762 ext4_fsblk_t block;
2763 int err, len;
2764
2765 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
2766 BUG_ON(ac->ac_b_ex.fe_len <= 0);
2767
2768 sb = ac->ac_sb;
2769 sbi = EXT4_SB(sb);
2770
2771 err = -EIO;
2772 bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
2773 if (!bitmap_bh)
2774 goto out_err;
2775
2776 err = ext4_journal_get_write_access(handle, bitmap_bh);
2777 if (err)
2778 goto out_err;
2779
2780 err = -EIO;
2781 gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
2782 if (!gdp)
2783 goto out_err;
2784
2785 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
2786 ext4_free_blks_count(sb, gdp));
2787
2788 err = ext4_journal_get_write_access(handle, gdp_bh);
2789 if (err)
2790 goto out_err;
2791
2792 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
2793
2794 len = ac->ac_b_ex.fe_len;
2795 if (!ext4_data_block_valid(sbi, block, len)) {
2796 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
2797 "fs metadata\n", block, block+len);
2798
2799
2800
2801
2802 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2803 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2804 ac->ac_b_ex.fe_len);
2805 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2806 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2807 if (!err)
2808 err = -EAGAIN;
2809 goto out_err;
2810 }
2811
2812 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2813#ifdef AGGRESSIVE_CHECK
2814 {
2815 int i;
2816 for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
2817 BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
2818 bitmap_bh->b_data));
2819 }
2820 }
2821#endif
2822 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2823 ac->ac_b_ex.fe_len);
2824 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2825 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
2826 ext4_free_blks_set(sb, gdp,
2827 ext4_free_blocks_after_init(sb,
2828 ac->ac_b_ex.fe_group, gdp));
2829 }
2830 len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
2831 ext4_free_blks_set(sb, gdp, len);
2832 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
2833
2834 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2835 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
2836
2837
2838
2839 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2840
2841 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
2842
2843 if (sbi->s_log_groups_per_flex) {
2844 ext4_group_t flex_group = ext4_flex_group(sbi,
2845 ac->ac_b_ex.fe_group);
2846 atomic_sub(ac->ac_b_ex.fe_len,
2847 &sbi->s_flex_groups[flex_group].free_blocks);
2848 }
2849
2850 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2851 if (err)
2852 goto out_err;
2853 err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
2854
2855out_err:
2856 ext4_mark_super_dirty(sb);
2857 brelse(bitmap_bh);
2858 return err;
2859}
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
2871{
2872 struct super_block *sb = ac->ac_sb;
2873 struct ext4_locality_group *lg = ac->ac_lg;
2874
2875 BUG_ON(lg == NULL);
2876 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
2877 mb_debug(1, "#%u: goal %u blocks for locality group\n",
2878 current->pid, ac->ac_g_ex.fe_len);
2879}
2880
2881
2882
2883
2884
2885static noinline_for_stack void
2886ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2887 struct ext4_allocation_request *ar)
2888{
2889 int bsbits, max;
2890 ext4_lblk_t end;
2891 loff_t size, orig_size, start_off;
2892 ext4_lblk_t start;
2893 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
2894 struct ext4_prealloc_space *pa;
2895
2896
2897
2898 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
2899 return;
2900
2901
2902 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
2903 return;
2904
2905
2906
2907 if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
2908 return;
2909
2910 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
2911 ext4_mb_normalize_group_request(ac);
2912 return ;
2913 }
2914
2915 bsbits = ac->ac_sb->s_blocksize_bits;
2916
2917
2918
2919 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
2920 size = size << bsbits;
2921 if (size < i_size_read(ac->ac_inode))
2922 size = i_size_read(ac->ac_inode);
2923 orig_size = size;
2924
2925
2926 max = 2 << bsbits;
2927
2928#define NRL_CHECK_SIZE(req, size, max, chunk_size) \
2929 (req <= (size) || max <= (chunk_size))
2930
2931
2932
2933 start_off = 0;
2934 if (size <= 16 * 1024) {
2935 size = 16 * 1024;
2936 } else if (size <= 32 * 1024) {
2937 size = 32 * 1024;
2938 } else if (size <= 64 * 1024) {
2939 size = 64 * 1024;
2940 } else if (size <= 128 * 1024) {
2941 size = 128 * 1024;
2942 } else if (size <= 256 * 1024) {
2943 size = 256 * 1024;
2944 } else if (size <= 512 * 1024) {
2945 size = 512 * 1024;
2946 } else if (size <= 1024 * 1024) {
2947 size = 1024 * 1024;
2948 } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
2949 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
2950 (21 - bsbits)) << 21;
2951 size = 2 * 1024 * 1024;
2952 } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
2953 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
2954 (22 - bsbits)) << 22;
2955 size = 4 * 1024 * 1024;
2956 } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
2957 (8<<20)>>bsbits, max, 8 * 1024)) {
2958 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
2959 (23 - bsbits)) << 23;
2960 size = 8 * 1024 * 1024;
2961 } else {
2962 start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
2963 size = ac->ac_o_ex.fe_len << bsbits;
2964 }
2965 size = size >> bsbits;
2966 start = start_off >> bsbits;
2967
2968
2969 if (ar->pleft && start <= ar->lleft) {
2970 size -= ar->lleft + 1 - start;
2971 start = ar->lleft + 1;
2972 }
2973 if (ar->pright && start + size - 1 >= ar->lright)
2974 size -= start + size - ar->lright;
2975
2976 end = start + size;
2977
2978
2979 rcu_read_lock();
2980 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
2981 ext4_lblk_t pa_end;
2982
2983 if (pa->pa_deleted)
2984 continue;
2985 spin_lock(&pa->pa_lock);
2986 if (pa->pa_deleted) {
2987 spin_unlock(&pa->pa_lock);
2988 continue;
2989 }
2990
2991 pa_end = pa->pa_lstart + pa->pa_len;
2992
2993
2994 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
2995 ac->ac_o_ex.fe_logical < pa->pa_lstart));
2996
2997
2998 if (pa->pa_lstart >= end || pa_end <= start) {
2999 spin_unlock(&pa->pa_lock);
3000 continue;
3001 }
3002 BUG_ON(pa->pa_lstart <= start && pa_end >= end);
3003
3004
3005 if (pa_end <= ac->ac_o_ex.fe_logical) {
3006 BUG_ON(pa_end < start);
3007 start = pa_end;
3008 } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
3009 BUG_ON(pa->pa_lstart > end);
3010 end = pa->pa_lstart;
3011 }
3012 spin_unlock(&pa->pa_lock);
3013 }
3014 rcu_read_unlock();
3015 size = end - start;
3016
3017
3018 rcu_read_lock();
3019 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3020 ext4_lblk_t pa_end;
3021 spin_lock(&pa->pa_lock);
3022 if (pa->pa_deleted == 0) {
3023 pa_end = pa->pa_lstart + pa->pa_len;
3024 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
3025 }
3026 spin_unlock(&pa->pa_lock);
3027 }
3028 rcu_read_unlock();
3029
3030 if (start + size <= ac->ac_o_ex.fe_logical &&
3031 start > ac->ac_o_ex.fe_logical) {
3032 ext4_msg(ac->ac_sb, KERN_ERR,
3033 "start %lu, size %lu, fe_logical %lu",
3034 (unsigned long) start, (unsigned long) size,
3035 (unsigned long) ac->ac_o_ex.fe_logical);
3036 }
3037 BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
3038 start > ac->ac_o_ex.fe_logical);
3039 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
3040
3041
3042
3043
3044
3045 ac->ac_g_ex.fe_logical = start;
3046 ac->ac_g_ex.fe_len = size;
3047
3048
3049 if (ar->pright && (ar->lright == (start + size))) {
3050
3051 ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
3052 &ac->ac_f_ex.fe_group,
3053 &ac->ac_f_ex.fe_start);
3054 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3055 }
3056 if (ar->pleft && (ar->lleft + 1 == start)) {
3057
3058 ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
3059 &ac->ac_f_ex.fe_group,
3060 &ac->ac_f_ex.fe_start);
3061 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3062 }
3063
3064 mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size,
3065 (unsigned) orig_size, (unsigned) start);
3066}
3067
3068static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3069{
3070 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3071
3072 if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
3073 atomic_inc(&sbi->s_bal_reqs);
3074 atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
3075 if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
3076 atomic_inc(&sbi->s_bal_success);
3077 atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
3078 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
3079 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
3080 atomic_inc(&sbi->s_bal_goals);
3081 if (ac->ac_found > sbi->s_mb_max_to_scan)
3082 atomic_inc(&sbi->s_bal_breaks);
3083 }
3084
3085 if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
3086 trace_ext4_mballoc_alloc(ac);
3087 else
3088 trace_ext4_mballoc_prealloc(ac);
3089}
3090
3091
3092
3093
3094
3095
3096
3097static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3098{
3099 struct ext4_prealloc_space *pa = ac->ac_pa;
3100 int len;
3101
3102 if (pa && pa->pa_type == MB_INODE_PA) {
3103 len = ac->ac_b_ex.fe_len;
3104 pa->pa_free += len;
3105 }
3106
3107}
3108
3109
3110
3111
3112static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3113 struct ext4_prealloc_space *pa)
3114{
3115 ext4_fsblk_t start;
3116 ext4_fsblk_t end;
3117 int len;
3118
3119
3120 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
3121 end = min(pa->pa_pstart + pa->pa_len, start + ac->ac_o_ex.fe_len);
3122 len = end - start;
3123 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
3124 &ac->ac_b_ex.fe_start);
3125 ac->ac_b_ex.fe_len = len;
3126 ac->ac_status = AC_STATUS_FOUND;
3127 ac->ac_pa = pa;
3128
3129 BUG_ON(start < pa->pa_pstart);
3130 BUG_ON(start + len > pa->pa_pstart + pa->pa_len);
3131 BUG_ON(pa->pa_free < len);
3132 pa->pa_free -= len;
3133
3134 mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa);
3135}
3136
3137
3138
3139
3140static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3141 struct ext4_prealloc_space *pa)
3142{
3143 unsigned int len = ac->ac_o_ex.fe_len;
3144
3145 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
3146 &ac->ac_b_ex.fe_group,
3147 &ac->ac_b_ex.fe_start);
3148 ac->ac_b_ex.fe_len = len;
3149 ac->ac_status = AC_STATUS_FOUND;
3150 ac->ac_pa = pa;
3151
3152
3153
3154
3155
3156
3157
3158 mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
3159}
3160
3161
3162
3163
3164
3165
3166
3167static struct ext4_prealloc_space *
3168ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3169 struct ext4_prealloc_space *pa,
3170 struct ext4_prealloc_space *cpa)
3171{
3172 ext4_fsblk_t cur_distance, new_distance;
3173
3174 if (cpa == NULL) {
3175 atomic_inc(&pa->pa_count);
3176 return pa;
3177 }
3178 cur_distance = abs(goal_block - cpa->pa_pstart);
3179 new_distance = abs(goal_block - pa->pa_pstart);
3180
3181 if (cur_distance <= new_distance)
3182 return cpa;
3183
3184
3185 atomic_dec(&cpa->pa_count);
3186 atomic_inc(&pa->pa_count);
3187 return pa;
3188}
3189
3190
3191
3192
3193static noinline_for_stack int
3194ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3195{
3196 int order, i;
3197 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3198 struct ext4_locality_group *lg;
3199 struct ext4_prealloc_space *pa, *cpa = NULL;
3200 ext4_fsblk_t goal_block;
3201
3202
3203 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3204 return 0;
3205
3206
3207 rcu_read_lock();
3208 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3209
3210
3211
3212 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
3213 ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len)
3214 continue;
3215
3216
3217 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
3218 pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS)
3219 continue;
3220
3221
3222 spin_lock(&pa->pa_lock);
3223 if (pa->pa_deleted == 0 && pa->pa_free) {
3224 atomic_inc(&pa->pa_count);
3225 ext4_mb_use_inode_pa(ac, pa);
3226 spin_unlock(&pa->pa_lock);
3227 ac->ac_criteria = 10;
3228 rcu_read_unlock();
3229 return 1;
3230 }
3231 spin_unlock(&pa->pa_lock);
3232 }
3233 rcu_read_unlock();
3234
3235
3236 if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
3237 return 0;
3238
3239
3240 lg = ac->ac_lg;
3241 if (lg == NULL)
3242 return 0;
3243 order = fls(ac->ac_o_ex.fe_len) - 1;
3244 if (order > PREALLOC_TB_SIZE - 1)
3245
3246 order = PREALLOC_TB_SIZE - 1;
3247
3248 goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
3249
3250
3251
3252
3253 for (i = order; i < PREALLOC_TB_SIZE; i++) {
3254 rcu_read_lock();
3255 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
3256 pa_inode_list) {
3257 spin_lock(&pa->pa_lock);
3258 if (pa->pa_deleted == 0 &&
3259 pa->pa_free >= ac->ac_o_ex.fe_len) {
3260
3261 cpa = ext4_mb_check_group_pa(goal_block,
3262 pa, cpa);
3263 }
3264 spin_unlock(&pa->pa_lock);
3265 }
3266 rcu_read_unlock();
3267 }
3268 if (cpa) {
3269 ext4_mb_use_group_pa(ac, cpa);
3270 ac->ac_criteria = 20;
3271 return 1;
3272 }
3273 return 0;
3274}
3275
3276
3277
3278
3279
3280
3281
3282static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3283 ext4_group_t group)
3284{
3285 struct rb_node *n;
3286 struct ext4_group_info *grp;
3287 struct ext4_free_data *entry;
3288
3289 grp = ext4_get_group_info(sb, group);
3290 n = rb_first(&(grp->bb_free_root));
3291
3292 while (n) {
3293 entry = rb_entry(n, struct ext4_free_data, node);
3294 ext4_set_bits(bitmap, entry->start_blk, entry->count);
3295 n = rb_next(n);
3296 }
3297 return;
3298}
3299
3300
3301
3302
3303
3304
3305static noinline_for_stack
3306void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3307 ext4_group_t group)
3308{
3309 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3310 struct ext4_prealloc_space *pa;
3311 struct list_head *cur;
3312 ext4_group_t groupnr;
3313 ext4_grpblk_t start;
3314 int preallocated = 0;
3315 int count = 0;
3316 int len;
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326 list_for_each(cur, &grp->bb_prealloc_list) {
3327 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
3328 spin_lock(&pa->pa_lock);
3329 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
3330 &groupnr, &start);
3331 len = pa->pa_len;
3332 spin_unlock(&pa->pa_lock);
3333 if (unlikely(len == 0))
3334 continue;
3335 BUG_ON(groupnr != group);
3336 ext4_set_bits(bitmap, start, len);
3337 preallocated += len;
3338 count++;
3339 }
3340 mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
3341}
3342
3343static void ext4_mb_pa_callback(struct rcu_head *head)
3344{
3345 struct ext4_prealloc_space *pa;
3346 pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
3347 kmem_cache_free(ext4_pspace_cachep, pa);
3348}
3349
3350
3351
3352
3353
3354static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3355 struct super_block *sb, struct ext4_prealloc_space *pa)
3356{
3357 ext4_group_t grp;
3358 ext4_fsblk_t grp_blk;
3359
3360 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
3361 return;
3362
3363
3364 spin_lock(&pa->pa_lock);
3365 if (pa->pa_deleted == 1) {
3366 spin_unlock(&pa->pa_lock);
3367 return;
3368 }
3369
3370 pa->pa_deleted = 1;
3371 spin_unlock(&pa->pa_lock);
3372
3373 grp_blk = pa->pa_pstart;
3374
3375
3376
3377
3378 if (pa->pa_type == MB_GROUP_PA)
3379 grp_blk--;
3380
3381 ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397 ext4_lock_group(sb, grp);
3398 list_del(&pa->pa_group_list);
3399 ext4_unlock_group(sb, grp);
3400
3401 spin_lock(pa->pa_obj_lock);
3402 list_del_rcu(&pa->pa_inode_list);
3403 spin_unlock(pa->pa_obj_lock);
3404
3405 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3406}
3407
3408
3409
3410
3411static noinline_for_stack int
3412ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3413{
3414 struct super_block *sb = ac->ac_sb;
3415 struct ext4_prealloc_space *pa;
3416 struct ext4_group_info *grp;
3417 struct ext4_inode_info *ei;
3418
3419
3420 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3421 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3422 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3423
3424 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3425 if (pa == NULL)
3426 return -ENOMEM;
3427
3428 if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
3429 int winl;
3430 int wins;
3431 int win;
3432 int offs;
3433
3434
3435
3436
3437 BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
3438 BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
3439
3440
3441
3442
3443 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
3444
3445
3446 wins = ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len;
3447
3448
3449 win = min(winl, wins);
3450
3451 offs = ac->ac_o_ex.fe_logical % ac->ac_b_ex.fe_len;
3452 if (offs && offs < win)
3453 win = offs;
3454
3455 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - win;
3456 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
3457 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
3458 }
3459
3460
3461
3462 ac->ac_f_ex = ac->ac_b_ex;
3463
3464 pa->pa_lstart = ac->ac_b_ex.fe_logical;
3465 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3466 pa->pa_len = ac->ac_b_ex.fe_len;
3467 pa->pa_free = pa->pa_len;
3468 atomic_set(&pa->pa_count, 1);
3469 spin_lock_init(&pa->pa_lock);
3470 INIT_LIST_HEAD(&pa->pa_inode_list);
3471 INIT_LIST_HEAD(&pa->pa_group_list);
3472 pa->pa_deleted = 0;
3473 pa->pa_type = MB_INODE_PA;
3474
3475 mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa,
3476 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3477 trace_ext4_mb_new_inode_pa(ac, pa);
3478
3479 ext4_mb_use_inode_pa(ac, pa);
3480 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
3481
3482 ei = EXT4_I(ac->ac_inode);
3483 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3484
3485 pa->pa_obj_lock = &ei->i_prealloc_lock;
3486 pa->pa_inode = ac->ac_inode;
3487
3488 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3489 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3490 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3491
3492 spin_lock(pa->pa_obj_lock);
3493 list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
3494 spin_unlock(pa->pa_obj_lock);
3495
3496 return 0;
3497}
3498
3499
3500
3501
3502static noinline_for_stack int
3503ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3504{
3505 struct super_block *sb = ac->ac_sb;
3506 struct ext4_locality_group *lg;
3507 struct ext4_prealloc_space *pa;
3508 struct ext4_group_info *grp;
3509
3510
3511 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3512 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3513 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3514
3515 BUG_ON(ext4_pspace_cachep == NULL);
3516 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3517 if (pa == NULL)
3518 return -ENOMEM;
3519
3520
3521
3522 ac->ac_f_ex = ac->ac_b_ex;
3523
3524 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3525 pa->pa_lstart = pa->pa_pstart;
3526 pa->pa_len = ac->ac_b_ex.fe_len;
3527 pa->pa_free = pa->pa_len;
3528 atomic_set(&pa->pa_count, 1);
3529 spin_lock_init(&pa->pa_lock);
3530 INIT_LIST_HEAD(&pa->pa_inode_list);
3531 INIT_LIST_HEAD(&pa->pa_group_list);
3532 pa->pa_deleted = 0;
3533 pa->pa_type = MB_GROUP_PA;
3534
3535 mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa,
3536 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3537 trace_ext4_mb_new_group_pa(ac, pa);
3538
3539 ext4_mb_use_group_pa(ac, pa);
3540 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
3541
3542 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3543 lg = ac->ac_lg;
3544 BUG_ON(lg == NULL);
3545
3546 pa->pa_obj_lock = &lg->lg_prealloc_lock;
3547 pa->pa_inode = NULL;
3548
3549 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3550 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3551 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3552
3553
3554
3555
3556
3557 return 0;
3558}
3559
3560static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3561{
3562 int err;
3563
3564 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
3565 err = ext4_mb_new_group_pa(ac);
3566 else
3567 err = ext4_mb_new_inode_pa(ac);
3568 return err;
3569}
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579static noinline_for_stack int
3580ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3581 struct ext4_prealloc_space *pa)
3582{
3583 struct super_block *sb = e4b->bd_sb;
3584 struct ext4_sb_info *sbi = EXT4_SB(sb);
3585 unsigned int end;
3586 unsigned int next;
3587 ext4_group_t group;
3588 ext4_grpblk_t bit;
3589 unsigned long long grp_blk_start;
3590 int err = 0;
3591 int free = 0;
3592
3593 BUG_ON(pa->pa_deleted == 0);
3594 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3595 grp_blk_start = pa->pa_pstart - bit;
3596 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3597 end = bit + pa->pa_len;
3598
3599 while (bit < end) {
3600 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
3601 if (bit >= end)
3602 break;
3603 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
3604 mb_debug(1, " free preallocated %u/%u in group %u\n",
3605 (unsigned) ext4_group_first_block_no(sb, group) + bit,
3606 (unsigned) next - bit, (unsigned) group);
3607 free += next - bit;
3608
3609 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
3610 trace_ext4_mb_release_inode_pa(pa, grp_blk_start + bit,
3611 next - bit);
3612 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3613 bit = next + 1;
3614 }
3615 if (free != pa->pa_free) {
3616 ext4_msg(e4b->bd_sb, KERN_CRIT,
3617 "pa %p: logic %lu, phys. %lu, len %lu",
3618 pa, (unsigned long) pa->pa_lstart,
3619 (unsigned long) pa->pa_pstart,
3620 (unsigned long) pa->pa_len);
3621 ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
3622 free, pa->pa_free);
3623
3624
3625
3626
3627 }
3628 atomic_add(free, &sbi->s_mb_discarded);
3629
3630 return err;
3631}
3632
3633static noinline_for_stack int
3634ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3635 struct ext4_prealloc_space *pa)
3636{
3637 struct super_block *sb = e4b->bd_sb;
3638 ext4_group_t group;
3639 ext4_grpblk_t bit;
3640
3641 trace_ext4_mb_release_group_pa(pa);
3642 BUG_ON(pa->pa_deleted == 0);
3643 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3644 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3645 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
3646 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
3647 trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
3648
3649 return 0;
3650}
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661static noinline_for_stack int
3662ext4_mb_discard_group_preallocations(struct super_block *sb,
3663 ext4_group_t group, int needed)
3664{
3665 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3666 struct buffer_head *bitmap_bh = NULL;
3667 struct ext4_prealloc_space *pa, *tmp;
3668 struct list_head list;
3669 struct ext4_buddy e4b;
3670 int err;
3671 int busy = 0;
3672 int free = 0;
3673
3674 mb_debug(1, "discard preallocation for group %u\n", group);
3675
3676 if (list_empty(&grp->bb_prealloc_list))
3677 return 0;
3678
3679 bitmap_bh = ext4_read_block_bitmap(sb, group);
3680 if (bitmap_bh == NULL) {
3681 ext4_error(sb, "Error reading block bitmap for %u", group);
3682 return 0;
3683 }
3684
3685 err = ext4_mb_load_buddy(sb, group, &e4b);
3686 if (err) {
3687 ext4_error(sb, "Error loading buddy information for %u", group);
3688 put_bh(bitmap_bh);
3689 return 0;
3690 }
3691
3692 if (needed == 0)
3693 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
3694
3695 INIT_LIST_HEAD(&list);
3696repeat:
3697 ext4_lock_group(sb, group);
3698 list_for_each_entry_safe(pa, tmp,
3699 &grp->bb_prealloc_list, pa_group_list) {
3700 spin_lock(&pa->pa_lock);
3701 if (atomic_read(&pa->pa_count)) {
3702 spin_unlock(&pa->pa_lock);
3703 busy = 1;
3704 continue;
3705 }
3706 if (pa->pa_deleted) {
3707 spin_unlock(&pa->pa_lock);
3708 continue;
3709 }
3710
3711
3712 pa->pa_deleted = 1;
3713
3714
3715 free += pa->pa_free;
3716
3717 spin_unlock(&pa->pa_lock);
3718
3719 list_del(&pa->pa_group_list);
3720 list_add(&pa->u.pa_tmp_list, &list);
3721 }
3722
3723
3724 if (free < needed && busy) {
3725 busy = 0;
3726 ext4_unlock_group(sb, group);
3727
3728
3729
3730
3731 yield();
3732 goto repeat;
3733 }
3734
3735
3736 if (list_empty(&list)) {
3737 BUG_ON(free != 0);
3738 goto out;
3739 }
3740
3741
3742 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
3743
3744
3745 spin_lock(pa->pa_obj_lock);
3746 list_del_rcu(&pa->pa_inode_list);
3747 spin_unlock(pa->pa_obj_lock);
3748
3749 if (pa->pa_type == MB_GROUP_PA)
3750 ext4_mb_release_group_pa(&e4b, pa);
3751 else
3752 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3753
3754 list_del(&pa->u.pa_tmp_list);
3755 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3756 }
3757
3758out:
3759 ext4_unlock_group(sb, group);
3760 ext4_mb_unload_buddy(&e4b);
3761 put_bh(bitmap_bh);
3762 return free;
3763}
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774void ext4_discard_preallocations(struct inode *inode)
3775{
3776 struct ext4_inode_info *ei = EXT4_I(inode);
3777 struct super_block *sb = inode->i_sb;
3778 struct buffer_head *bitmap_bh = NULL;
3779 struct ext4_prealloc_space *pa, *tmp;
3780 ext4_group_t group = 0;
3781 struct list_head list;
3782 struct ext4_buddy e4b;
3783 int err;
3784
3785 if (!S_ISREG(inode->i_mode)) {
3786
3787 return;
3788 }
3789
3790 mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino);
3791 trace_ext4_discard_preallocations(inode);
3792
3793 INIT_LIST_HEAD(&list);
3794
3795repeat:
3796
3797 spin_lock(&ei->i_prealloc_lock);
3798 while (!list_empty(&ei->i_prealloc_list)) {
3799 pa = list_entry(ei->i_prealloc_list.next,
3800 struct ext4_prealloc_space, pa_inode_list);
3801 BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
3802 spin_lock(&pa->pa_lock);
3803 if (atomic_read(&pa->pa_count)) {
3804
3805
3806 spin_unlock(&pa->pa_lock);
3807 spin_unlock(&ei->i_prealloc_lock);
3808 ext4_msg(sb, KERN_ERR,
3809 "uh-oh! used pa while discarding");
3810 WARN_ON(1);
3811 schedule_timeout_uninterruptible(HZ);
3812 goto repeat;
3813
3814 }
3815 if (pa->pa_deleted == 0) {
3816 pa->pa_deleted = 1;
3817 spin_unlock(&pa->pa_lock);
3818 list_del_rcu(&pa->pa_inode_list);
3819 list_add(&pa->u.pa_tmp_list, &list);
3820 continue;
3821 }
3822
3823
3824 spin_unlock(&pa->pa_lock);
3825 spin_unlock(&ei->i_prealloc_lock);
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839 schedule_timeout_uninterruptible(HZ);
3840 goto repeat;
3841 }
3842 spin_unlock(&ei->i_prealloc_lock);
3843
3844 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
3845 BUG_ON(pa->pa_type != MB_INODE_PA);
3846 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
3847
3848 err = ext4_mb_load_buddy(sb, group, &e4b);
3849 if (err) {
3850 ext4_error(sb, "Error loading buddy information for %u",
3851 group);
3852 continue;
3853 }
3854
3855 bitmap_bh = ext4_read_block_bitmap(sb, group);
3856 if (bitmap_bh == NULL) {
3857 ext4_error(sb, "Error reading block bitmap for %u",
3858 group);
3859 ext4_mb_unload_buddy(&e4b);
3860 continue;
3861 }
3862
3863 ext4_lock_group(sb, group);
3864 list_del(&pa->pa_group_list);
3865 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3866 ext4_unlock_group(sb, group);
3867
3868 ext4_mb_unload_buddy(&e4b);
3869 put_bh(bitmap_bh);
3870
3871 list_del(&pa->u.pa_tmp_list);
3872 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3873 }
3874}
3875
3876#ifdef CONFIG_EXT4_DEBUG
3877static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3878{
3879 struct super_block *sb = ac->ac_sb;
3880 ext4_group_t ngroups, i;
3881
3882 if (!mb_enable_debug ||
3883 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
3884 return;
3885
3886 ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: Can't allocate:"
3887 " Allocation context details:");
3888 ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: status %d flags %d",
3889 ac->ac_status, ac->ac_flags);
3890 ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: orig %lu/%lu/%lu@%lu, "
3891 "goal %lu/%lu/%lu@%lu, "
3892 "best %lu/%lu/%lu@%lu cr %d",
3893 (unsigned long)ac->ac_o_ex.fe_group,
3894 (unsigned long)ac->ac_o_ex.fe_start,
3895 (unsigned long)ac->ac_o_ex.fe_len,
3896 (unsigned long)ac->ac_o_ex.fe_logical,
3897 (unsigned long)ac->ac_g_ex.fe_group,
3898 (unsigned long)ac->ac_g_ex.fe_start,
3899 (unsigned long)ac->ac_g_ex.fe_len,
3900 (unsigned long)ac->ac_g_ex.fe_logical,
3901 (unsigned long)ac->ac_b_ex.fe_group,
3902 (unsigned long)ac->ac_b_ex.fe_start,
3903 (unsigned long)ac->ac_b_ex.fe_len,
3904 (unsigned long)ac->ac_b_ex.fe_logical,
3905 (int)ac->ac_criteria);
3906 ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: %lu scanned, %d found",
3907 ac->ac_ex_scanned, ac->ac_found);
3908 ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: groups: ");
3909 ngroups = ext4_get_groups_count(sb);
3910 for (i = 0; i < ngroups; i++) {
3911 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
3912 struct ext4_prealloc_space *pa;
3913 ext4_grpblk_t start;
3914 struct list_head *cur;
3915 ext4_lock_group(sb, i);
3916 list_for_each(cur, &grp->bb_prealloc_list) {
3917 pa = list_entry(cur, struct ext4_prealloc_space,
3918 pa_group_list);
3919 spin_lock(&pa->pa_lock);
3920 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
3921 NULL, &start);
3922 spin_unlock(&pa->pa_lock);
3923 printk(KERN_ERR "PA:%u:%d:%u \n", i,
3924 start, pa->pa_len);
3925 }
3926 ext4_unlock_group(sb, i);
3927
3928 if (grp->bb_free == 0)
3929 continue;
3930 printk(KERN_ERR "%u: %d/%d \n",
3931 i, grp->bb_free, grp->bb_fragments);
3932 }
3933 printk(KERN_ERR "\n");
3934}
3935#else
3936static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3937{
3938 return;
3939}
3940#endif
3941
3942
3943
3944
3945
3946
3947
3948
3949static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
3950{
3951 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3952 int bsbits = ac->ac_sb->s_blocksize_bits;
3953 loff_t size, isize;
3954
3955 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3956 return;
3957
3958 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
3959 return;
3960
3961 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
3962 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
3963 >> bsbits;
3964
3965 if ((size == isize) &&
3966 !ext4_fs_is_busy(sbi) &&
3967 (atomic_read(&ac->ac_inode->i_writecount) == 0)) {
3968 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
3969 return;
3970 }
3971
3972
3973 size = max(size, isize);
3974 if (size > sbi->s_mb_stream_request) {
3975 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
3976 return;
3977 }
3978
3979 BUG_ON(ac->ac_lg != NULL);
3980
3981
3982
3983
3984
3985 ac->ac_lg = __this_cpu_ptr(sbi->s_locality_groups);
3986
3987
3988 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
3989
3990
3991 mutex_lock(&ac->ac_lg->lg_mutex);
3992}
3993
3994static noinline_for_stack int
3995ext4_mb_initialize_context(struct ext4_allocation_context *ac,
3996 struct ext4_allocation_request *ar)
3997{
3998 struct super_block *sb = ar->inode->i_sb;
3999 struct ext4_sb_info *sbi = EXT4_SB(sb);
4000 struct ext4_super_block *es = sbi->s_es;
4001 ext4_group_t group;
4002 unsigned int len;
4003 ext4_fsblk_t goal;
4004 ext4_grpblk_t block;
4005
4006
4007 len = ar->len;
4008
4009
4010 if (len >= EXT4_BLOCKS_PER_GROUP(sb) - 10)
4011 len = EXT4_BLOCKS_PER_GROUP(sb) - 10;
4012
4013
4014 goal = ar->goal;
4015 if (goal < le32_to_cpu(es->s_first_data_block) ||
4016 goal >= ext4_blocks_count(es))
4017 goal = le32_to_cpu(es->s_first_data_block);
4018 ext4_get_group_no_and_offset(sb, goal, &group, &block);
4019
4020
4021 memset(ac, 0, sizeof(struct ext4_allocation_context));
4022 ac->ac_b_ex.fe_logical = ar->logical;
4023 ac->ac_status = AC_STATUS_CONTINUE;
4024 ac->ac_sb = sb;
4025 ac->ac_inode = ar->inode;
4026 ac->ac_o_ex.fe_logical = ar->logical;
4027 ac->ac_o_ex.fe_group = group;
4028 ac->ac_o_ex.fe_start = block;
4029 ac->ac_o_ex.fe_len = len;
4030 ac->ac_g_ex.fe_logical = ar->logical;
4031 ac->ac_g_ex.fe_group = group;
4032 ac->ac_g_ex.fe_start = block;
4033 ac->ac_g_ex.fe_len = len;
4034 ac->ac_flags = ar->flags;
4035
4036
4037
4038 ext4_mb_group_or_file(ac);
4039
4040 mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, "
4041 "left: %u/%u, right %u/%u to %swritable\n",
4042 (unsigned) ar->len, (unsigned) ar->logical,
4043 (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
4044 (unsigned) ar->lleft, (unsigned) ar->pleft,
4045 (unsigned) ar->lright, (unsigned) ar->pright,
4046 atomic_read(&ar->inode->i_writecount) ? "" : "non-");
4047 return 0;
4048
4049}
4050
4051static noinline_for_stack void
4052ext4_mb_discard_lg_preallocations(struct super_block *sb,
4053 struct ext4_locality_group *lg,
4054 int order, int total_entries)
4055{
4056 ext4_group_t group = 0;
4057 struct ext4_buddy e4b;
4058 struct list_head discard_list;
4059 struct ext4_prealloc_space *pa, *tmp;
4060
4061 mb_debug(1, "discard locality group preallocation\n");
4062
4063 INIT_LIST_HEAD(&discard_list);
4064
4065 spin_lock(&lg->lg_prealloc_lock);
4066 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
4067 pa_inode_list) {
4068 spin_lock(&pa->pa_lock);
4069 if (atomic_read(&pa->pa_count)) {
4070
4071
4072
4073
4074
4075 spin_unlock(&pa->pa_lock);
4076 continue;
4077 }
4078 if (pa->pa_deleted) {
4079 spin_unlock(&pa->pa_lock);
4080 continue;
4081 }
4082
4083 BUG_ON(pa->pa_type != MB_GROUP_PA);
4084
4085
4086 pa->pa_deleted = 1;
4087 spin_unlock(&pa->pa_lock);
4088
4089 list_del_rcu(&pa->pa_inode_list);
4090 list_add(&pa->u.pa_tmp_list, &discard_list);
4091
4092 total_entries--;
4093 if (total_entries <= 5) {
4094
4095
4096
4097
4098
4099
4100 break;
4101 }
4102 }
4103 spin_unlock(&lg->lg_prealloc_lock);
4104
4105 list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
4106
4107 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
4108 if (ext4_mb_load_buddy(sb, group, &e4b)) {
4109 ext4_error(sb, "Error loading buddy information for %u",
4110 group);
4111 continue;
4112 }
4113 ext4_lock_group(sb, group);
4114 list_del(&pa->pa_group_list);
4115 ext4_mb_release_group_pa(&e4b, pa);
4116 ext4_unlock_group(sb, group);
4117
4118 ext4_mb_unload_buddy(&e4b);
4119 list_del(&pa->u.pa_tmp_list);
4120 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4121 }
4122}
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4134{
4135 int order, added = 0, lg_prealloc_count = 1;
4136 struct super_block *sb = ac->ac_sb;
4137 struct ext4_locality_group *lg = ac->ac_lg;
4138 struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
4139
4140 order = fls(pa->pa_free) - 1;
4141 if (order > PREALLOC_TB_SIZE - 1)
4142
4143 order = PREALLOC_TB_SIZE - 1;
4144
4145 rcu_read_lock();
4146 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
4147 pa_inode_list) {
4148 spin_lock(&tmp_pa->pa_lock);
4149 if (tmp_pa->pa_deleted) {
4150 spin_unlock(&tmp_pa->pa_lock);
4151 continue;
4152 }
4153 if (!added && pa->pa_free < tmp_pa->pa_free) {
4154
4155 list_add_tail_rcu(&pa->pa_inode_list,
4156 &tmp_pa->pa_inode_list);
4157 added = 1;
4158
4159
4160
4161
4162 }
4163 spin_unlock(&tmp_pa->pa_lock);
4164 lg_prealloc_count++;
4165 }
4166 if (!added)
4167 list_add_tail_rcu(&pa->pa_inode_list,
4168 &lg->lg_prealloc_list[order]);
4169 rcu_read_unlock();
4170
4171
4172 if (lg_prealloc_count > 8) {
4173 ext4_mb_discard_lg_preallocations(sb, lg,
4174 order, lg_prealloc_count);
4175 return;
4176 }
4177 return ;
4178}
4179
4180
4181
4182
4183static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4184{
4185 struct ext4_prealloc_space *pa = ac->ac_pa;
4186 if (pa) {
4187 if (pa->pa_type == MB_GROUP_PA) {
4188
4189 spin_lock(&pa->pa_lock);
4190 pa->pa_pstart += ac->ac_b_ex.fe_len;
4191 pa->pa_lstart += ac->ac_b_ex.fe_len;
4192 pa->pa_free -= ac->ac_b_ex.fe_len;
4193 pa->pa_len -= ac->ac_b_ex.fe_len;
4194 spin_unlock(&pa->pa_lock);
4195 }
4196 }
4197 if (pa) {
4198
4199
4200
4201
4202
4203
4204 if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
4205 spin_lock(pa->pa_obj_lock);
4206 list_del_rcu(&pa->pa_inode_list);
4207 spin_unlock(pa->pa_obj_lock);
4208 ext4_mb_add_n_trim(ac);
4209 }
4210 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4211 }
4212 if (ac->ac_bitmap_page)
4213 page_cache_release(ac->ac_bitmap_page);
4214 if (ac->ac_buddy_page)
4215 page_cache_release(ac->ac_buddy_page);
4216 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
4217 mutex_unlock(&ac->ac_lg->lg_mutex);
4218 ext4_mb_collect_stats(ac);
4219 return 0;
4220}
4221
4222static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4223{
4224 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4225 int ret;
4226 int freed = 0;
4227
4228 trace_ext4_mb_discard_preallocations(sb, needed);
4229 for (i = 0; i < ngroups && needed > 0; i++) {
4230 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
4231 freed += ret;
4232 needed -= ret;
4233 }
4234
4235 return freed;
4236}
4237
4238
4239
4240
4241
4242
4243ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4244 struct ext4_allocation_request *ar, int *errp)
4245{
4246 int freed;
4247 struct ext4_allocation_context *ac = NULL;
4248 struct ext4_sb_info *sbi;
4249 struct super_block *sb;
4250 ext4_fsblk_t block = 0;
4251 unsigned int inquota = 0;
4252 unsigned int reserv_blks = 0;
4253
4254 sb = ar->inode->i_sb;
4255 sbi = EXT4_SB(sb);
4256
4257 trace_ext4_request_blocks(ar);
4258
4259
4260
4261
4262
4263
4264 if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED))
4265 ar->flags |= EXT4_MB_DELALLOC_RESERVED;
4266 else {
4267
4268
4269
4270
4271 while (ar->len &&
4272 ext4_claim_free_blocks(sbi, ar->len, ar->flags)) {
4273
4274
4275 yield();
4276 ar->len = ar->len >> 1;
4277 }
4278 if (!ar->len) {
4279 *errp = -ENOSPC;
4280 return 0;
4281 }
4282 reserv_blks = ar->len;
4283 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
4284 dquot_alloc_block_nofail(ar->inode, ar->len);
4285 } else {
4286 while (ar->len &&
4287 dquot_alloc_block(ar->inode, ar->len)) {
4288
4289 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4290 ar->len--;
4291 }
4292 }
4293 inquota = ar->len;
4294 if (ar->len == 0) {
4295 *errp = -EDQUOT;
4296 goto out;
4297 }
4298 }
4299
4300 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4301 if (!ac) {
4302 ar->len = 0;
4303 *errp = -ENOMEM;
4304 goto out;
4305 }
4306
4307 *errp = ext4_mb_initialize_context(ac, ar);
4308 if (*errp) {
4309 ar->len = 0;
4310 goto out;
4311 }
4312
4313 ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
4314 if (!ext4_mb_use_preallocated(ac)) {
4315 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
4316 ext4_mb_normalize_request(ac, ar);
4317repeat:
4318
4319 *errp = ext4_mb_regular_allocator(ac);
4320 if (*errp)
4321 goto errout;
4322
4323
4324
4325
4326 if (ac->ac_status == AC_STATUS_FOUND &&
4327 ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
4328 ext4_mb_new_preallocation(ac);
4329 }
4330 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4331 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
4332 if (*errp == -EAGAIN) {
4333
4334
4335
4336
4337 ext4_mb_release_context(ac);
4338 ac->ac_b_ex.fe_group = 0;
4339 ac->ac_b_ex.fe_start = 0;
4340 ac->ac_b_ex.fe_len = 0;
4341 ac->ac_status = AC_STATUS_CONTINUE;
4342 goto repeat;
4343 } else if (*errp)
4344 errout:
4345 ext4_discard_allocated_blocks(ac);
4346 else {
4347 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4348 ar->len = ac->ac_b_ex.fe_len;
4349 }
4350 } else {
4351 freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
4352 if (freed)
4353 goto repeat;
4354 *errp = -ENOSPC;
4355 }
4356
4357 if (*errp) {
4358 ac->ac_b_ex.fe_len = 0;
4359 ar->len = 0;
4360 ext4_mb_show_ac(ac);
4361 }
4362 ext4_mb_release_context(ac);
4363out:
4364 if (ac)
4365 kmem_cache_free(ext4_ac_cachep, ac);
4366 if (inquota && ar->len < inquota)
4367 dquot_free_block(ar->inode, inquota - ar->len);
4368 if (!ar->len) {
4369 if (!ext4_test_inode_state(ar->inode,
4370 EXT4_STATE_DELALLOC_RESERVED))
4371
4372 percpu_counter_sub(&sbi->s_dirtyblocks_counter,
4373 reserv_blks);
4374 }
4375
4376 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
4377
4378 return block;
4379}
4380
4381
4382
4383
4384
4385
4386static int can_merge(struct ext4_free_data *entry1,
4387 struct ext4_free_data *entry2)
4388{
4389 if ((entry1->t_tid == entry2->t_tid) &&
4390 (entry1->group == entry2->group) &&
4391 ((entry1->start_blk + entry1->count) == entry2->start_blk))
4392 return 1;
4393 return 0;
4394}
4395
4396static noinline_for_stack int
4397ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4398 struct ext4_free_data *new_entry)
4399{
4400 ext4_group_t group = e4b->bd_group;
4401 ext4_grpblk_t block;
4402 struct ext4_free_data *entry;
4403 struct ext4_group_info *db = e4b->bd_info;
4404 struct super_block *sb = e4b->bd_sb;
4405 struct ext4_sb_info *sbi = EXT4_SB(sb);
4406 struct rb_node **n = &db->bb_free_root.rb_node, *node;
4407 struct rb_node *parent = NULL, *new_node;
4408
4409 BUG_ON(!ext4_handle_valid(handle));
4410 BUG_ON(e4b->bd_bitmap_page == NULL);
4411 BUG_ON(e4b->bd_buddy_page == NULL);
4412
4413 new_node = &new_entry->node;
4414 block = new_entry->start_blk;
4415
4416 if (!*n) {
4417
4418
4419
4420
4421
4422 page_cache_get(e4b->bd_buddy_page);
4423 page_cache_get(e4b->bd_bitmap_page);
4424 }
4425 while (*n) {
4426 parent = *n;
4427 entry = rb_entry(parent, struct ext4_free_data, node);
4428 if (block < entry->start_blk)
4429 n = &(*n)->rb_left;
4430 else if (block >= (entry->start_blk + entry->count))
4431 n = &(*n)->rb_right;
4432 else {
4433 ext4_grp_locked_error(sb, group, 0,
4434 ext4_group_first_block_no(sb, group) + block,
4435 "Block already on to-be-freed list");
4436 return 0;
4437 }
4438 }
4439
4440 rb_link_node(new_node, parent, n);
4441 rb_insert_color(new_node, &db->bb_free_root);
4442
4443
4444 node = rb_prev(new_node);
4445 if (node) {
4446 entry = rb_entry(node, struct ext4_free_data, node);
4447 if (can_merge(entry, new_entry)) {
4448 new_entry->start_blk = entry->start_blk;
4449 new_entry->count += entry->count;
4450 rb_erase(node, &(db->bb_free_root));
4451 spin_lock(&sbi->s_md_lock);
4452 list_del(&entry->list);
4453 spin_unlock(&sbi->s_md_lock);
4454 kmem_cache_free(ext4_free_ext_cachep, entry);
4455 }
4456 }
4457
4458 node = rb_next(new_node);
4459 if (node) {
4460 entry = rb_entry(node, struct ext4_free_data, node);
4461 if (can_merge(new_entry, entry)) {
4462 new_entry->count += entry->count;
4463 rb_erase(node, &(db->bb_free_root));
4464 spin_lock(&sbi->s_md_lock);
4465 list_del(&entry->list);
4466 spin_unlock(&sbi->s_md_lock);
4467 kmem_cache_free(ext4_free_ext_cachep, entry);
4468 }
4469 }
4470
4471 spin_lock(&sbi->s_md_lock);
4472 list_add(&new_entry->list, &handle->h_transaction->t_private_list);
4473 spin_unlock(&sbi->s_md_lock);
4474 return 0;
4475}
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485void ext4_free_blocks(handle_t *handle, struct inode *inode,
4486 struct buffer_head *bh, ext4_fsblk_t block,
4487 unsigned long count, int flags)
4488{
4489 struct buffer_head *bitmap_bh = NULL;
4490 struct super_block *sb = inode->i_sb;
4491 struct ext4_group_desc *gdp;
4492 unsigned long freed = 0;
4493 unsigned int overflow;
4494 ext4_grpblk_t bit;
4495 struct buffer_head *gd_bh;
4496 ext4_group_t block_group;
4497 struct ext4_sb_info *sbi;
4498 struct ext4_buddy e4b;
4499 int err = 0;
4500 int ret;
4501
4502 if (bh) {
4503 if (block)
4504 BUG_ON(block != bh->b_blocknr);
4505 else
4506 block = bh->b_blocknr;
4507 }
4508
4509 sbi = EXT4_SB(sb);
4510 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
4511 !ext4_data_block_valid(sbi, block, count)) {
4512 ext4_error(sb, "Freeing blocks not in datazone - "
4513 "block = %llu, count = %lu", block, count);
4514 goto error_return;
4515 }
4516
4517 ext4_debug("freeing block %llu\n", block);
4518 trace_ext4_free_blocks(inode, block, count, flags);
4519
4520 if (flags & EXT4_FREE_BLOCKS_FORGET) {
4521 struct buffer_head *tbh = bh;
4522 int i;
4523
4524 BUG_ON(bh && (count > 1));
4525
4526 for (i = 0; i < count; i++) {
4527 if (!bh)
4528 tbh = sb_find_get_block(inode->i_sb,
4529 block + i);
4530 if (unlikely(!tbh))
4531 continue;
4532 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
4533 inode, tbh, block + i);
4534 }
4535 }
4536
4537
4538
4539
4540
4541
4542
4543
4544 if (!ext4_should_writeback_data(inode))
4545 flags |= EXT4_FREE_BLOCKS_METADATA;
4546
4547do_more:
4548 overflow = 0;
4549 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4550
4551
4552
4553
4554
4555 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4556 overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb);
4557 count -= overflow;
4558 }
4559 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4560 if (!bitmap_bh) {
4561 err = -EIO;
4562 goto error_return;
4563 }
4564 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
4565 if (!gdp) {
4566 err = -EIO;
4567 goto error_return;
4568 }
4569
4570 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4571 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
4572 in_range(block, ext4_inode_table(sb, gdp),
4573 EXT4_SB(sb)->s_itb_per_group) ||
4574 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4575 EXT4_SB(sb)->s_itb_per_group)) {
4576
4577 ext4_error(sb, "Freeing blocks in system zone - "
4578 "Block = %llu, count = %lu", block, count);
4579
4580 goto error_return;
4581 }
4582
4583 BUFFER_TRACE(bitmap_bh, "getting write access");
4584 err = ext4_journal_get_write_access(handle, bitmap_bh);
4585 if (err)
4586 goto error_return;
4587
4588
4589
4590
4591
4592
4593 BUFFER_TRACE(gd_bh, "get_write_access");
4594 err = ext4_journal_get_write_access(handle, gd_bh);
4595 if (err)
4596 goto error_return;
4597#ifdef AGGRESSIVE_CHECK
4598 {
4599 int i;
4600 for (i = 0; i < count; i++)
4601 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4602 }
4603#endif
4604 trace_ext4_mballoc_free(sb, inode, block_group, bit, count);
4605
4606 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4607 if (err)
4608 goto error_return;
4609
4610 if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
4611 struct ext4_free_data *new_entry;
4612
4613
4614
4615
4616 new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
4617 if (!new_entry) {
4618 err = -ENOMEM;
4619 goto error_return;
4620 }
4621 new_entry->start_blk = bit;
4622 new_entry->group = block_group;
4623 new_entry->count = count;
4624 new_entry->t_tid = handle->h_transaction->t_tid;
4625
4626 ext4_lock_group(sb, block_group);
4627 mb_clear_bits(bitmap_bh->b_data, bit, count);
4628 ext4_mb_free_metadata(handle, &e4b, new_entry);
4629 } else {
4630
4631
4632
4633
4634 ext4_lock_group(sb, block_group);
4635 mb_clear_bits(bitmap_bh->b_data, bit, count);
4636 mb_free_blocks(inode, &e4b, bit, count);
4637 }
4638
4639 ret = ext4_free_blks_count(sb, gdp) + count;
4640 ext4_free_blks_set(sb, gdp, ret);
4641 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
4642 ext4_unlock_group(sb, block_group);
4643 percpu_counter_add(&sbi->s_freeblocks_counter, count);
4644
4645 if (sbi->s_log_groups_per_flex) {
4646 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4647 atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks);
4648 }
4649
4650 ext4_mb_unload_buddy(&e4b);
4651
4652 freed += count;
4653
4654
4655 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4656 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
4657
4658
4659 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
4660 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
4661 if (!err)
4662 err = ret;
4663
4664 if (overflow && !err) {
4665 block += count;
4666 count = overflow;
4667 put_bh(bitmap_bh);
4668 goto do_more;
4669 }
4670 ext4_mark_super_dirty(sb);
4671error_return:
4672 if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4673 dquot_free_block(inode, freed);
4674 brelse(bitmap_bh);
4675 ext4_std_error(sb, err);
4676 return;
4677}
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
4689 ext4_fsblk_t block, unsigned long count)
4690{
4691 struct buffer_head *bitmap_bh = NULL;
4692 struct buffer_head *gd_bh;
4693 ext4_group_t block_group;
4694 ext4_grpblk_t bit;
4695 unsigned int i;
4696 struct ext4_group_desc *desc;
4697 struct ext4_sb_info *sbi = EXT4_SB(sb);
4698 struct ext4_buddy e4b;
4699 int err = 0, ret, blk_free_count;
4700 ext4_grpblk_t blocks_freed;
4701
4702 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
4703
4704 if (count == 0)
4705 return 0;
4706
4707 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4708
4709
4710
4711
4712 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4713 ext4_warning(sb, "too much blocks added to group %u\n",
4714 block_group);
4715 err = -EINVAL;
4716 goto error_return;
4717 }
4718
4719 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4720 if (!bitmap_bh) {
4721 err = -EIO;
4722 goto error_return;
4723 }
4724
4725 desc = ext4_get_group_desc(sb, block_group, &gd_bh);
4726 if (!desc) {
4727 err = -EIO;
4728 goto error_return;
4729 }
4730
4731 if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
4732 in_range(ext4_inode_bitmap(sb, desc), block, count) ||
4733 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
4734 in_range(block + count - 1, ext4_inode_table(sb, desc),
4735 sbi->s_itb_per_group)) {
4736 ext4_error(sb, "Adding blocks in system zones - "
4737 "Block = %llu, count = %lu",
4738 block, count);
4739 err = -EINVAL;
4740 goto error_return;
4741 }
4742
4743 BUFFER_TRACE(bitmap_bh, "getting write access");
4744 err = ext4_journal_get_write_access(handle, bitmap_bh);
4745 if (err)
4746 goto error_return;
4747
4748
4749
4750
4751
4752
4753 BUFFER_TRACE(gd_bh, "get_write_access");
4754 err = ext4_journal_get_write_access(handle, gd_bh);
4755 if (err)
4756 goto error_return;
4757
4758 for (i = 0, blocks_freed = 0; i < count; i++) {
4759 BUFFER_TRACE(bitmap_bh, "clear bit");
4760 if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
4761 ext4_error(sb, "bit already cleared for block %llu",
4762 (ext4_fsblk_t)(block + i));
4763 BUFFER_TRACE(bitmap_bh, "bit already cleared");
4764 } else {
4765 blocks_freed++;
4766 }
4767 }
4768
4769 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4770 if (err)
4771 goto error_return;
4772
4773
4774
4775
4776
4777
4778 ext4_lock_group(sb, block_group);
4779 mb_clear_bits(bitmap_bh->b_data, bit, count);
4780 mb_free_blocks(NULL, &e4b, bit, count);
4781 blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
4782 ext4_free_blks_set(sb, desc, blk_free_count);
4783 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
4784 ext4_unlock_group(sb, block_group);
4785 percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
4786
4787 if (sbi->s_log_groups_per_flex) {
4788 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4789 atomic_add(blocks_freed,
4790 &sbi->s_flex_groups[flex_group].free_blocks);
4791 }
4792
4793 ext4_mb_unload_buddy(&e4b);
4794
4795
4796 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4797 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
4798
4799
4800 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
4801 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
4802 if (!err)
4803 err = ret;
4804
4805error_return:
4806 brelse(bitmap_bh);
4807 ext4_std_error(sb, err);
4808 return err;
4809}
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823static void ext4_trim_extent(struct super_block *sb, int start, int count,
4824 ext4_group_t group, struct ext4_buddy *e4b)
4825{
4826 struct ext4_free_extent ex;
4827
4828 trace_ext4_trim_extent(sb, group, start, count);
4829
4830 assert_spin_locked(ext4_group_lock_ptr(sb, group));
4831
4832 ex.fe_start = start;
4833 ex.fe_group = group;
4834 ex.fe_len = count;
4835
4836
4837
4838
4839
4840 mb_mark_used(e4b, &ex);
4841 ext4_unlock_group(sb, group);
4842 ext4_issue_discard(sb, group, start, count);
4843 ext4_lock_group(sb, group);
4844 mb_free_blocks(NULL, e4b, start, ex.fe_len);
4845}
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865static ext4_grpblk_t
4866ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
4867 ext4_grpblk_t start, ext4_grpblk_t max,
4868 ext4_grpblk_t minblocks)
4869{
4870 void *bitmap;
4871 ext4_grpblk_t next, count = 0, free_count = 0;
4872 struct ext4_buddy e4b;
4873 int ret;
4874
4875 trace_ext4_trim_all_free(sb, group, start, max);
4876
4877 ret = ext4_mb_load_buddy(sb, group, &e4b);
4878 if (ret) {
4879 ext4_error(sb, "Error in loading buddy "
4880 "information for %u", group);
4881 return ret;
4882 }
4883 bitmap = e4b.bd_bitmap;
4884
4885 ext4_lock_group(sb, group);
4886 if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
4887 minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
4888 goto out;
4889
4890 start = (e4b.bd_info->bb_first_free > start) ?
4891 e4b.bd_info->bb_first_free : start;
4892
4893 while (start < max) {
4894 start = mb_find_next_zero_bit(bitmap, max, start);
4895 if (start >= max)
4896 break;
4897 next = mb_find_next_bit(bitmap, max, start);
4898
4899 if ((next - start) >= minblocks) {
4900 ext4_trim_extent(sb, start,
4901 next - start, group, &e4b);
4902 count += next - start;
4903 }
4904 free_count += next - start;
4905 start = next + 1;
4906
4907 if (fatal_signal_pending(current)) {
4908 count = -ERESTARTSYS;
4909 break;
4910 }
4911
4912 if (need_resched()) {
4913 ext4_unlock_group(sb, group);
4914 cond_resched();
4915 ext4_lock_group(sb, group);
4916 }
4917
4918 if ((e4b.bd_info->bb_free - free_count) < minblocks)
4919 break;
4920 }
4921
4922 if (!ret)
4923 EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
4924out:
4925 ext4_unlock_group(sb, group);
4926 ext4_mb_unload_buddy(&e4b);
4927
4928 ext4_debug("trimmed %d blocks in the group %d\n",
4929 count, group);
4930
4931 return count;
4932}
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4947{
4948 struct ext4_group_info *grp;
4949 ext4_group_t first_group, last_group;
4950 ext4_group_t group, ngroups = ext4_get_groups_count(sb);
4951 ext4_grpblk_t cnt = 0, first_block, last_block;
4952 uint64_t start, len, minlen, trimmed = 0;
4953 ext4_fsblk_t first_data_blk =
4954 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
4955 int ret = 0;
4956
4957 start = range->start >> sb->s_blocksize_bits;
4958 len = range->len >> sb->s_blocksize_bits;
4959 minlen = range->minlen >> sb->s_blocksize_bits;
4960
4961 if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
4962 return -EINVAL;
4963 if (start + len <= first_data_blk)
4964 goto out;
4965 if (start < first_data_blk) {
4966 len -= first_data_blk - start;
4967 start = first_data_blk;
4968 }
4969
4970
4971 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
4972 &first_group, &first_block);
4973 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
4974 &last_group, &last_block);
4975 last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
4976 last_block = EXT4_BLOCKS_PER_GROUP(sb);
4977
4978 if (first_group > last_group)
4979 return -EINVAL;
4980
4981 for (group = first_group; group <= last_group; group++) {
4982 grp = ext4_get_group_info(sb, group);
4983
4984 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
4985 ret = ext4_mb_init_group(sb, group);
4986 if (ret)
4987 break;
4988 }
4989
4990
4991
4992
4993
4994
4995
4996 if (first_block + len < EXT4_BLOCKS_PER_GROUP(sb))
4997 last_block = first_block + len;
4998 len -= last_block - first_block;
4999
5000 if (grp->bb_free >= minlen) {
5001 cnt = ext4_trim_all_free(sb, group, first_block,
5002 last_block, minlen);
5003 if (cnt < 0) {
5004 ret = cnt;
5005 break;
5006 }
5007 }
5008 trimmed += cnt;
5009 first_block = 0;
5010 }
5011 range->len = trimmed * sb->s_blocksize;
5012
5013 if (!ret)
5014 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
5015
5016out:
5017 return ret;
5018}
5019