1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include "ext4_jbd2.h"
25#include "mballoc.h"
26#include <linux/log2.h>
27#include <linux/module.h>
28#include <linux/slab.h>
29#include <trace/events/ext4.h>
30
31#ifdef CONFIG_EXT4_DEBUG
32ushort ext4_mballoc_debug __read_mostly;
33
34module_param_named(mballoc_debug, ext4_mballoc_debug, ushort, 0644);
35MODULE_PARM_DESC(mballoc_debug, "Debugging level for ext4's mballoc");
36#endif
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349static struct kmem_cache *ext4_pspace_cachep;
350static struct kmem_cache *ext4_ac_cachep;
351static struct kmem_cache *ext4_free_data_cachep;
352
353
354
355
356#define NR_GRPINFO_CACHES 8
357static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
358
359static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
360 "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
361 "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
362 "ext4_groupinfo_64k", "ext4_groupinfo_128k"
363};
364
365static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
366 ext4_group_t group);
367static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
368 ext4_group_t group);
369static void ext4_free_data_callback(struct super_block *sb,
370 struct ext4_journal_cb_entry *jce, int rc);
371
372static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
373{
374#if BITS_PER_LONG == 64
375 *bit += ((unsigned long) addr & 7UL) << 3;
376 addr = (void *) ((unsigned long) addr & ~7UL);
377#elif BITS_PER_LONG == 32
378 *bit += ((unsigned long) addr & 3UL) << 3;
379 addr = (void *) ((unsigned long) addr & ~3UL);
380#else
381#error "how many bits you are?!"
382#endif
383 return addr;
384}
385
386static inline int mb_test_bit(int bit, void *addr)
387{
388
389
390
391
392 addr = mb_correct_addr_and_bit(&bit, addr);
393 return ext4_test_bit(bit, addr);
394}
395
396static inline void mb_set_bit(int bit, void *addr)
397{
398 addr = mb_correct_addr_and_bit(&bit, addr);
399 ext4_set_bit(bit, addr);
400}
401
402static inline void mb_clear_bit(int bit, void *addr)
403{
404 addr = mb_correct_addr_and_bit(&bit, addr);
405 ext4_clear_bit(bit, addr);
406}
407
408static inline int mb_test_and_clear_bit(int bit, void *addr)
409{
410 addr = mb_correct_addr_and_bit(&bit, addr);
411 return ext4_test_and_clear_bit(bit, addr);
412}
413
414static inline int mb_find_next_zero_bit(void *addr, int max, int start)
415{
416 int fix = 0, ret, tmpmax;
417 addr = mb_correct_addr_and_bit(&fix, addr);
418 tmpmax = max + fix;
419 start += fix;
420
421 ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
422 if (ret > max)
423 return max;
424 return ret;
425}
426
427static inline int mb_find_next_bit(void *addr, int max, int start)
428{
429 int fix = 0, ret, tmpmax;
430 addr = mb_correct_addr_and_bit(&fix, addr);
431 tmpmax = max + fix;
432 start += fix;
433
434 ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
435 if (ret > max)
436 return max;
437 return ret;
438}
439
440static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
441{
442 char *bb;
443
444 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
445 BUG_ON(max == NULL);
446
447 if (order > e4b->bd_blkbits + 1) {
448 *max = 0;
449 return NULL;
450 }
451
452
453 if (order == 0) {
454 *max = 1 << (e4b->bd_blkbits + 3);
455 return e4b->bd_bitmap;
456 }
457
458 bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
459 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
460
461 return bb;
462}
463
464#ifdef DOUBLE_CHECK
465static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
466 int first, int count)
467{
468 int i;
469 struct super_block *sb = e4b->bd_sb;
470
471 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
472 return;
473 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
474 for (i = 0; i < count; i++) {
475 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
476 ext4_fsblk_t blocknr;
477
478 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
479 blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
480 ext4_grp_locked_error(sb, e4b->bd_group,
481 inode ? inode->i_ino : 0,
482 blocknr,
483 "freeing block already freed "
484 "(bit %u)",
485 first + i);
486 }
487 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
488 }
489}
490
491static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
492{
493 int i;
494
495 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
496 return;
497 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
498 for (i = 0; i < count; i++) {
499 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
500 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
501 }
502}
503
504static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
505{
506 if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
507 unsigned char *b1, *b2;
508 int i;
509 b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
510 b2 = (unsigned char *) bitmap;
511 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
512 if (b1[i] != b2[i]) {
513 ext4_msg(e4b->bd_sb, KERN_ERR,
514 "corruption in group %u "
515 "at byte %u(%u): %x in copy != %x "
516 "on disk/prealloc",
517 e4b->bd_group, i, i * 8, b1[i], b2[i]);
518 BUG();
519 }
520 }
521 }
522}
523
524#else
525static inline void mb_free_blocks_double(struct inode *inode,
526 struct ext4_buddy *e4b, int first, int count)
527{
528 return;
529}
530static inline void mb_mark_used_double(struct ext4_buddy *e4b,
531 int first, int count)
532{
533 return;
534}
535static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
536{
537 return;
538}
539#endif
540
541#ifdef AGGRESSIVE_CHECK
542
543#define MB_CHECK_ASSERT(assert) \
544do { \
545 if (!(assert)) { \
546 printk(KERN_EMERG \
547 "Assertion failure in %s() at %s:%d: \"%s\"\n", \
548 function, file, line, # assert); \
549 BUG(); \
550 } \
551} while (0)
552
553static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
554 const char *function, int line)
555{
556 struct super_block *sb = e4b->bd_sb;
557 int order = e4b->bd_blkbits + 1;
558 int max;
559 int max2;
560 int i;
561 int j;
562 int k;
563 int count;
564 struct ext4_group_info *grp;
565 int fragments = 0;
566 int fstart;
567 struct list_head *cur;
568 void *buddy;
569 void *buddy2;
570
571 {
572 static int mb_check_counter;
573 if (mb_check_counter++ % 100 != 0)
574 return 0;
575 }
576
577 while (order > 1) {
578 buddy = mb_find_buddy(e4b, order, &max);
579 MB_CHECK_ASSERT(buddy);
580 buddy2 = mb_find_buddy(e4b, order - 1, &max2);
581 MB_CHECK_ASSERT(buddy2);
582 MB_CHECK_ASSERT(buddy != buddy2);
583 MB_CHECK_ASSERT(max * 2 == max2);
584
585 count = 0;
586 for (i = 0; i < max; i++) {
587
588 if (mb_test_bit(i, buddy)) {
589
590 if (!mb_test_bit(i << 1, buddy2)) {
591 MB_CHECK_ASSERT(
592 mb_test_bit((i<<1)+1, buddy2));
593 } else if (!mb_test_bit((i << 1) + 1, buddy2)) {
594 MB_CHECK_ASSERT(
595 mb_test_bit(i << 1, buddy2));
596 }
597 continue;
598 }
599
600
601 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
602 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
603
604 for (j = 0; j < (1 << order); j++) {
605 k = (i * (1 << order)) + j;
606 MB_CHECK_ASSERT(
607 !mb_test_bit(k, e4b->bd_bitmap));
608 }
609 count++;
610 }
611 MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
612 order--;
613 }
614
615 fstart = -1;
616 buddy = mb_find_buddy(e4b, 0, &max);
617 for (i = 0; i < max; i++) {
618 if (!mb_test_bit(i, buddy)) {
619 MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
620 if (fstart == -1) {
621 fragments++;
622 fstart = i;
623 }
624 continue;
625 }
626 fstart = -1;
627
628 for (j = 0; j < e4b->bd_blkbits + 1; j++) {
629 buddy2 = mb_find_buddy(e4b, j, &max2);
630 k = i >> j;
631 MB_CHECK_ASSERT(k < max2);
632 MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
633 }
634 }
635 MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
636 MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
637
638 grp = ext4_get_group_info(sb, e4b->bd_group);
639 list_for_each(cur, &grp->bb_prealloc_list) {
640 ext4_group_t groupnr;
641 struct ext4_prealloc_space *pa;
642 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
643 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
644 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
645 for (i = 0; i < pa->pa_len; i++)
646 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
647 }
648 return 0;
649}
650#undef MB_CHECK_ASSERT
651#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
652 __FILE__, __func__, __LINE__)
653#else
654#define mb_check_buddy(e4b)
655#endif
656
657
658
659
660
661
662
663static void ext4_mb_mark_free_simple(struct super_block *sb,
664 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
665 struct ext4_group_info *grp)
666{
667 struct ext4_sb_info *sbi = EXT4_SB(sb);
668 ext4_grpblk_t min;
669 ext4_grpblk_t max;
670 ext4_grpblk_t chunk;
671 unsigned short border;
672
673 BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
674
675 border = 2 << sb->s_blocksize_bits;
676
677 while (len > 0) {
678
679 max = ffs(first | border) - 1;
680
681
682 min = fls(len) - 1;
683
684 if (max < min)
685 min = max;
686 chunk = 1 << min;
687
688
689 grp->bb_counters[min]++;
690 if (min > 0)
691 mb_clear_bit(first >> min,
692 buddy + sbi->s_mb_offsets[min]);
693
694 len -= chunk;
695 first += chunk;
696 }
697}
698
699
700
701
702
703static void
704mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
705{
706 int i;
707 int bits;
708
709 grp->bb_largest_free_order = -1;
710
711 bits = sb->s_blocksize_bits + 1;
712 for (i = bits; i >= 0; i--) {
713 if (grp->bb_counters[i] > 0) {
714 grp->bb_largest_free_order = i;
715 break;
716 }
717 }
718}
719
720static noinline_for_stack
721void ext4_mb_generate_buddy(struct super_block *sb,
722 void *buddy, void *bitmap, ext4_group_t group)
723{
724 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
725 ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
726 ext4_grpblk_t i = 0;
727 ext4_grpblk_t first;
728 ext4_grpblk_t len;
729 unsigned free = 0;
730 unsigned fragments = 0;
731 unsigned long long period = get_cycles();
732
733
734
735 i = mb_find_next_zero_bit(bitmap, max, 0);
736 grp->bb_first_free = i;
737 while (i < max) {
738 fragments++;
739 first = i;
740 i = mb_find_next_bit(bitmap, max, i);
741 len = i - first;
742 free += len;
743 if (len > 1)
744 ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
745 else
746 grp->bb_counters[0]++;
747 if (i < max)
748 i = mb_find_next_zero_bit(bitmap, max, i);
749 }
750 grp->bb_fragments = fragments;
751
752 if (free != grp->bb_free) {
753 ext4_grp_locked_error(sb, group, 0, 0,
754 "%u clusters in bitmap, %u in gd",
755 free, grp->bb_free);
756
757
758
759
760 grp->bb_free = free;
761 }
762 mb_set_largest_free_order(sb, grp);
763
764 clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
765
766 period = get_cycles() - period;
767 spin_lock(&EXT4_SB(sb)->s_bal_lock);
768 EXT4_SB(sb)->s_mb_buddies_generated++;
769 EXT4_SB(sb)->s_mb_generation_time += period;
770 spin_unlock(&EXT4_SB(sb)->s_bal_lock);
771}
772
773static void mb_regenerate_buddy(struct ext4_buddy *e4b)
774{
775 int count;
776 int order = 1;
777 void *buddy;
778
779 while ((buddy = mb_find_buddy(e4b, order++, &count))) {
780 ext4_set_bits(buddy, 0, count);
781 }
782 e4b->bd_info->bb_fragments = 0;
783 memset(e4b->bd_info->bb_counters, 0,
784 sizeof(*e4b->bd_info->bb_counters) *
785 (e4b->bd_sb->s_blocksize_bits + 2));
786
787 ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
788 e4b->bd_bitmap, e4b->bd_group);
789}
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811static int ext4_mb_init_cache(struct page *page, char *incore)
812{
813 ext4_group_t ngroups;
814 int blocksize;
815 int blocks_per_page;
816 int groups_per_page;
817 int err = 0;
818 int i;
819 ext4_group_t first_group, group;
820 int first_block;
821 struct super_block *sb;
822 struct buffer_head *bhs;
823 struct buffer_head **bh = NULL;
824 struct inode *inode;
825 char *data;
826 char *bitmap;
827 struct ext4_group_info *grinfo;
828
829 mb_debug(1, "init page %lu\n", page->index);
830
831 inode = page->mapping->host;
832 sb = inode->i_sb;
833 ngroups = ext4_get_groups_count(sb);
834 blocksize = 1 << inode->i_blkbits;
835 blocks_per_page = PAGE_CACHE_SIZE / blocksize;
836
837 groups_per_page = blocks_per_page >> 1;
838 if (groups_per_page == 0)
839 groups_per_page = 1;
840
841
842 if (groups_per_page > 1) {
843 i = sizeof(struct buffer_head *) * groups_per_page;
844 bh = kzalloc(i, GFP_NOFS);
845 if (bh == NULL) {
846 err = -ENOMEM;
847 goto out;
848 }
849 } else
850 bh = &bhs;
851
852 first_group = page->index * blocks_per_page / 2;
853
854
855 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
856 if (group >= ngroups)
857 break;
858
859 grinfo = ext4_get_group_info(sb, group);
860
861
862
863
864
865
866 if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
867 bh[i] = NULL;
868 continue;
869 }
870 if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group))) {
871 err = -ENOMEM;
872 goto out;
873 }
874 mb_debug(1, "read bitmap for group %u\n", group);
875 }
876
877
878 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
879 if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i])) {
880 err = -EIO;
881 goto out;
882 }
883 }
884
885 first_block = page->index * blocks_per_page;
886 for (i = 0; i < blocks_per_page; i++) {
887 group = (first_block + i) >> 1;
888 if (group >= ngroups)
889 break;
890
891 if (!bh[group - first_group])
892
893 continue;
894
895
896
897
898
899
900
901 data = page_address(page) + (i * blocksize);
902 bitmap = bh[group - first_group]->b_data;
903
904
905
906
907
908 if ((first_block + i) & 1) {
909
910 BUG_ON(incore == NULL);
911 mb_debug(1, "put buddy for group %u in page %lu/%x\n",
912 group, page->index, i * blocksize);
913 trace_ext4_mb_buddy_bitmap_load(sb, group);
914 grinfo = ext4_get_group_info(sb, group);
915 grinfo->bb_fragments = 0;
916 memset(grinfo->bb_counters, 0,
917 sizeof(*grinfo->bb_counters) *
918 (sb->s_blocksize_bits+2));
919
920
921
922 ext4_lock_group(sb, group);
923
924 memset(data, 0xff, blocksize);
925 ext4_mb_generate_buddy(sb, data, incore, group);
926 ext4_unlock_group(sb, group);
927 incore = NULL;
928 } else {
929
930 BUG_ON(incore != NULL);
931 mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
932 group, page->index, i * blocksize);
933 trace_ext4_mb_bitmap_load(sb, group);
934
935
936 ext4_lock_group(sb, group);
937 memcpy(data, bitmap, blocksize);
938
939
940 ext4_mb_generate_from_pa(sb, data, group);
941 ext4_mb_generate_from_freelist(sb, data, group);
942 ext4_unlock_group(sb, group);
943
944
945
946
947 incore = data;
948 }
949 }
950 SetPageUptodate(page);
951
952out:
953 if (bh) {
954 for (i = 0; i < groups_per_page; i++)
955 brelse(bh[i]);
956 if (bh != &bhs)
957 kfree(bh);
958 }
959 return err;
960}
961
962
963
964
965
966
967
968static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
969 ext4_group_t group, struct ext4_buddy *e4b)
970{
971 struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
972 int block, pnum, poff;
973 int blocks_per_page;
974 struct page *page;
975
976 e4b->bd_buddy_page = NULL;
977 e4b->bd_bitmap_page = NULL;
978
979 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
980
981
982
983
984
985 block = group * 2;
986 pnum = block / blocks_per_page;
987 poff = block % blocks_per_page;
988 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
989 if (!page)
990 return -EIO;
991 BUG_ON(page->mapping != inode->i_mapping);
992 e4b->bd_bitmap_page = page;
993 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
994
995 if (blocks_per_page >= 2) {
996
997 return 0;
998 }
999
1000 block++;
1001 pnum = block / blocks_per_page;
1002 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1003 if (!page)
1004 return -EIO;
1005 BUG_ON(page->mapping != inode->i_mapping);
1006 e4b->bd_buddy_page = page;
1007 return 0;
1008}
1009
1010static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
1011{
1012 if (e4b->bd_bitmap_page) {
1013 unlock_page(e4b->bd_bitmap_page);
1014 page_cache_release(e4b->bd_bitmap_page);
1015 }
1016 if (e4b->bd_buddy_page) {
1017 unlock_page(e4b->bd_buddy_page);
1018 page_cache_release(e4b->bd_buddy_page);
1019 }
1020}
1021
1022
1023
1024
1025
1026
1027static noinline_for_stack
1028int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
1029{
1030
1031 struct ext4_group_info *this_grp;
1032 struct ext4_buddy e4b;
1033 struct page *page;
1034 int ret = 0;
1035
1036 might_sleep();
1037 mb_debug(1, "init group %u\n", group);
1038 this_grp = ext4_get_group_info(sb, group);
1039
1040
1041
1042
1043
1044
1045
1046 ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
1047 if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
1048
1049
1050
1051
1052 goto err;
1053 }
1054
1055 page = e4b.bd_bitmap_page;
1056 ret = ext4_mb_init_cache(page, NULL);
1057 if (ret)
1058 goto err;
1059 if (!PageUptodate(page)) {
1060 ret = -EIO;
1061 goto err;
1062 }
1063 mark_page_accessed(page);
1064
1065 if (e4b.bd_buddy_page == NULL) {
1066
1067
1068
1069
1070
1071 ret = 0;
1072 goto err;
1073 }
1074
1075 page = e4b.bd_buddy_page;
1076 ret = ext4_mb_init_cache(page, e4b.bd_bitmap);
1077 if (ret)
1078 goto err;
1079 if (!PageUptodate(page)) {
1080 ret = -EIO;
1081 goto err;
1082 }
1083 mark_page_accessed(page);
1084err:
1085 ext4_mb_put_buddy_page_lock(&e4b);
1086 return ret;
1087}
1088
1089
1090
1091
1092
1093
1094static noinline_for_stack int
1095ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1096 struct ext4_buddy *e4b)
1097{
1098 int blocks_per_page;
1099 int block;
1100 int pnum;
1101 int poff;
1102 struct page *page;
1103 int ret;
1104 struct ext4_group_info *grp;
1105 struct ext4_sb_info *sbi = EXT4_SB(sb);
1106 struct inode *inode = sbi->s_buddy_cache;
1107
1108 might_sleep();
1109 mb_debug(1, "load group %u\n", group);
1110
1111 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1112 grp = ext4_get_group_info(sb, group);
1113
1114 e4b->bd_blkbits = sb->s_blocksize_bits;
1115 e4b->bd_info = grp;
1116 e4b->bd_sb = sb;
1117 e4b->bd_group = group;
1118 e4b->bd_buddy_page = NULL;
1119 e4b->bd_bitmap_page = NULL;
1120
1121 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1122
1123
1124
1125
1126 ret = ext4_mb_init_group(sb, group);
1127 if (ret)
1128 return ret;
1129 }
1130
1131
1132
1133
1134
1135
1136 block = group * 2;
1137 pnum = block / blocks_per_page;
1138 poff = block % blocks_per_page;
1139
1140
1141
1142 page = find_get_page(inode->i_mapping, pnum);
1143 if (page == NULL || !PageUptodate(page)) {
1144 if (page)
1145
1146
1147
1148
1149
1150
1151
1152
1153 page_cache_release(page);
1154 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1155 if (page) {
1156 BUG_ON(page->mapping != inode->i_mapping);
1157 if (!PageUptodate(page)) {
1158 ret = ext4_mb_init_cache(page, NULL);
1159 if (ret) {
1160 unlock_page(page);
1161 goto err;
1162 }
1163 mb_cmp_bitmaps(e4b, page_address(page) +
1164 (poff * sb->s_blocksize));
1165 }
1166 unlock_page(page);
1167 }
1168 }
1169 if (page == NULL || !PageUptodate(page)) {
1170 ret = -EIO;
1171 goto err;
1172 }
1173 e4b->bd_bitmap_page = page;
1174 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1175 mark_page_accessed(page);
1176
1177 block++;
1178 pnum = block / blocks_per_page;
1179 poff = block % blocks_per_page;
1180
1181 page = find_get_page(inode->i_mapping, pnum);
1182 if (page == NULL || !PageUptodate(page)) {
1183 if (page)
1184 page_cache_release(page);
1185 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1186 if (page) {
1187 BUG_ON(page->mapping != inode->i_mapping);
1188 if (!PageUptodate(page)) {
1189 ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
1190 if (ret) {
1191 unlock_page(page);
1192 goto err;
1193 }
1194 }
1195 unlock_page(page);
1196 }
1197 }
1198 if (page == NULL || !PageUptodate(page)) {
1199 ret = -EIO;
1200 goto err;
1201 }
1202 e4b->bd_buddy_page = page;
1203 e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
1204 mark_page_accessed(page);
1205
1206 BUG_ON(e4b->bd_bitmap_page == NULL);
1207 BUG_ON(e4b->bd_buddy_page == NULL);
1208
1209 return 0;
1210
1211err:
1212 if (page)
1213 page_cache_release(page);
1214 if (e4b->bd_bitmap_page)
1215 page_cache_release(e4b->bd_bitmap_page);
1216 if (e4b->bd_buddy_page)
1217 page_cache_release(e4b->bd_buddy_page);
1218 e4b->bd_buddy = NULL;
1219 e4b->bd_bitmap = NULL;
1220 return ret;
1221}
1222
1223static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
1224{
1225 if (e4b->bd_bitmap_page)
1226 page_cache_release(e4b->bd_bitmap_page);
1227 if (e4b->bd_buddy_page)
1228 page_cache_release(e4b->bd_buddy_page);
1229}
1230
1231
1232static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1233{
1234 int order = 1;
1235 void *bb;
1236
1237 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
1238 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
1239
1240 bb = e4b->bd_buddy;
1241 while (order <= e4b->bd_blkbits + 1) {
1242 block = block >> 1;
1243 if (!mb_test_bit(block, bb)) {
1244
1245 return order;
1246 }
1247 bb += 1 << (e4b->bd_blkbits - order);
1248 order++;
1249 }
1250 return 0;
1251}
1252
1253static void mb_clear_bits(void *bm, int cur, int len)
1254{
1255 __u32 *addr;
1256
1257 len = cur + len;
1258 while (cur < len) {
1259 if ((cur & 31) == 0 && (len - cur) >= 32) {
1260
1261 addr = bm + (cur >> 3);
1262 *addr = 0;
1263 cur += 32;
1264 continue;
1265 }
1266 mb_clear_bit(cur, bm);
1267 cur++;
1268 }
1269}
1270
1271
1272
1273
1274static int mb_test_and_clear_bits(void *bm, int cur, int len)
1275{
1276 __u32 *addr;
1277 int zero_bit = -1;
1278
1279 len = cur + len;
1280 while (cur < len) {
1281 if ((cur & 31) == 0 && (len - cur) >= 32) {
1282
1283 addr = bm + (cur >> 3);
1284 if (*addr != (__u32)(-1) && zero_bit == -1)
1285 zero_bit = cur + mb_find_next_zero_bit(addr, 32, 0);
1286 *addr = 0;
1287 cur += 32;
1288 continue;
1289 }
1290 if (!mb_test_and_clear_bit(cur, bm) && zero_bit == -1)
1291 zero_bit = cur;
1292 cur++;
1293 }
1294
1295 return zero_bit;
1296}
1297
1298void ext4_set_bits(void *bm, int cur, int len)
1299{
1300 __u32 *addr;
1301
1302 len = cur + len;
1303 while (cur < len) {
1304 if ((cur & 31) == 0 && (len - cur) >= 32) {
1305
1306 addr = bm + (cur >> 3);
1307 *addr = 0xffffffff;
1308 cur += 32;
1309 continue;
1310 }
1311 mb_set_bit(cur, bm);
1312 cur++;
1313 }
1314}
1315
1316
1317
1318
1319static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side)
1320{
1321 if (mb_test_bit(*bit + side, bitmap)) {
1322 mb_clear_bit(*bit, bitmap);
1323 (*bit) -= side;
1324 return 1;
1325 }
1326 else {
1327 (*bit) += side;
1328 mb_set_bit(*bit, bitmap);
1329 return -1;
1330 }
1331}
1332
1333static void mb_buddy_mark_free(struct ext4_buddy *e4b, int first, int last)
1334{
1335 int max;
1336 int order = 1;
1337 void *buddy = mb_find_buddy(e4b, order, &max);
1338
1339 while (buddy) {
1340 void *buddy2;
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371 if (first & 1)
1372 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&first, buddy, -1);
1373 if (!(last & 1))
1374 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&last, buddy, 1);
1375 if (first > last)
1376 break;
1377 order++;
1378
1379 if (first == last || !(buddy2 = mb_find_buddy(e4b, order, &max))) {
1380 mb_clear_bits(buddy, first, last - first + 1);
1381 e4b->bd_info->bb_counters[order - 1] += last - first + 1;
1382 break;
1383 }
1384 first >>= 1;
1385 last >>= 1;
1386 buddy = buddy2;
1387 }
1388}
1389
1390static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1391 int first, int count)
1392{
1393 int left_is_free = 0;
1394 int right_is_free = 0;
1395 int block;
1396 int last = first + count - 1;
1397 struct super_block *sb = e4b->bd_sb;
1398
1399 BUG_ON(last >= (sb->s_blocksize << 3));
1400 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1401 mb_check_buddy(e4b);
1402 mb_free_blocks_double(inode, e4b, first, count);
1403
1404 e4b->bd_info->bb_free += count;
1405 if (first < e4b->bd_info->bb_first_free)
1406 e4b->bd_info->bb_first_free = first;
1407
1408
1409
1410
1411 if (first != 0)
1412 left_is_free = !mb_test_bit(first - 1, e4b->bd_bitmap);
1413 block = mb_test_and_clear_bits(e4b->bd_bitmap, first, count);
1414 if (last + 1 < EXT4_SB(sb)->s_mb_maxs[0])
1415 right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
1416
1417 if (unlikely(block != -1)) {
1418 ext4_fsblk_t blocknr;
1419
1420 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1421 blocknr += EXT4_C2B(EXT4_SB(sb), block);
1422 ext4_grp_locked_error(sb, e4b->bd_group,
1423 inode ? inode->i_ino : 0,
1424 blocknr,
1425 "freeing already freed block "
1426 "(bit %u)", block);
1427 mb_regenerate_buddy(e4b);
1428 goto done;
1429 }
1430
1431
1432 if (left_is_free && right_is_free)
1433 e4b->bd_info->bb_fragments--;
1434 else if (!left_is_free && !right_is_free)
1435 e4b->bd_info->bb_fragments++;
1436
1437
1438
1439
1440
1441
1442
1443 if (first & 1) {
1444 first += !left_is_free;
1445 e4b->bd_info->bb_counters[0] += left_is_free ? -1 : 1;
1446 }
1447 if (!(last & 1)) {
1448 last -= !right_is_free;
1449 e4b->bd_info->bb_counters[0] += right_is_free ? -1 : 1;
1450 }
1451
1452 if (first <= last)
1453 mb_buddy_mark_free(e4b, first >> 1, last >> 1);
1454
1455done:
1456 mb_set_largest_free_order(sb, e4b->bd_info);
1457 mb_check_buddy(e4b);
1458}
1459
1460static int mb_find_extent(struct ext4_buddy *e4b, int block,
1461 int needed, struct ext4_free_extent *ex)
1462{
1463 int next = block;
1464 int max, order;
1465 void *buddy;
1466
1467 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1468 BUG_ON(ex == NULL);
1469
1470 buddy = mb_find_buddy(e4b, 0, &max);
1471 BUG_ON(buddy == NULL);
1472 BUG_ON(block >= max);
1473 if (mb_test_bit(block, buddy)) {
1474 ex->fe_len = 0;
1475 ex->fe_start = 0;
1476 ex->fe_group = 0;
1477 return 0;
1478 }
1479
1480
1481 order = mb_find_order_for_block(e4b, block);
1482 block = block >> order;
1483
1484 ex->fe_len = 1 << order;
1485 ex->fe_start = block << order;
1486 ex->fe_group = e4b->bd_group;
1487
1488
1489 next = next - ex->fe_start;
1490 ex->fe_len -= next;
1491 ex->fe_start += next;
1492
1493 while (needed > ex->fe_len &&
1494 mb_find_buddy(e4b, order, &max)) {
1495
1496 if (block + 1 >= max)
1497 break;
1498
1499 next = (block + 1) * (1 << order);
1500 if (mb_test_bit(next, e4b->bd_bitmap))
1501 break;
1502
1503 order = mb_find_order_for_block(e4b, next);
1504
1505 block = next >> order;
1506 ex->fe_len += 1 << order;
1507 }
1508
1509 BUG_ON(ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3)));
1510 return ex->fe_len;
1511}
1512
1513static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1514{
1515 int ord;
1516 int mlen = 0;
1517 int max = 0;
1518 int cur;
1519 int start = ex->fe_start;
1520 int len = ex->fe_len;
1521 unsigned ret = 0;
1522 int len0 = len;
1523 void *buddy;
1524
1525 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
1526 BUG_ON(e4b->bd_group != ex->fe_group);
1527 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1528 mb_check_buddy(e4b);
1529 mb_mark_used_double(e4b, start, len);
1530
1531 e4b->bd_info->bb_free -= len;
1532 if (e4b->bd_info->bb_first_free == start)
1533 e4b->bd_info->bb_first_free += len;
1534
1535
1536 if (start != 0)
1537 mlen = !mb_test_bit(start - 1, e4b->bd_bitmap);
1538 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
1539 max = !mb_test_bit(start + len, e4b->bd_bitmap);
1540 if (mlen && max)
1541 e4b->bd_info->bb_fragments++;
1542 else if (!mlen && !max)
1543 e4b->bd_info->bb_fragments--;
1544
1545
1546 while (len) {
1547 ord = mb_find_order_for_block(e4b, start);
1548
1549 if (((start >> ord) << ord) == start && len >= (1 << ord)) {
1550
1551 mlen = 1 << ord;
1552 buddy = mb_find_buddy(e4b, ord, &max);
1553 BUG_ON((start >> ord) >= max);
1554 mb_set_bit(start >> ord, buddy);
1555 e4b->bd_info->bb_counters[ord]--;
1556 start += mlen;
1557 len -= mlen;
1558 BUG_ON(len < 0);
1559 continue;
1560 }
1561
1562
1563 if (ret == 0)
1564 ret = len | (ord << 16);
1565
1566
1567 BUG_ON(ord <= 0);
1568 buddy = mb_find_buddy(e4b, ord, &max);
1569 mb_set_bit(start >> ord, buddy);
1570 e4b->bd_info->bb_counters[ord]--;
1571
1572 ord--;
1573 cur = (start >> ord) & ~1U;
1574 buddy = mb_find_buddy(e4b, ord, &max);
1575 mb_clear_bit(cur, buddy);
1576 mb_clear_bit(cur + 1, buddy);
1577 e4b->bd_info->bb_counters[ord]++;
1578 e4b->bd_info->bb_counters[ord]++;
1579 }
1580 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
1581
1582 ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
1583 mb_check_buddy(e4b);
1584
1585 return ret;
1586}
1587
1588
1589
1590
1591static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
1592 struct ext4_buddy *e4b)
1593{
1594 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1595 int ret;
1596
1597 BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
1598 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1599
1600 ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
1601 ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
1602 ret = mb_mark_used(e4b, &ac->ac_b_ex);
1603
1604
1605
1606 ac->ac_f_ex = ac->ac_b_ex;
1607
1608 ac->ac_status = AC_STATUS_FOUND;
1609 ac->ac_tail = ret & 0xffff;
1610 ac->ac_buddy = ret >> 16;
1611
1612
1613
1614
1615
1616
1617
1618
1619 ac->ac_bitmap_page = e4b->bd_bitmap_page;
1620 get_page(ac->ac_bitmap_page);
1621 ac->ac_buddy_page = e4b->bd_buddy_page;
1622 get_page(ac->ac_buddy_page);
1623
1624 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
1625 spin_lock(&sbi->s_md_lock);
1626 sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
1627 sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
1628 spin_unlock(&sbi->s_md_lock);
1629 }
1630}
1631
1632
1633
1634
1635
1636static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
1637 struct ext4_buddy *e4b,
1638 int finish_group)
1639{
1640 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1641 struct ext4_free_extent *bex = &ac->ac_b_ex;
1642 struct ext4_free_extent *gex = &ac->ac_g_ex;
1643 struct ext4_free_extent ex;
1644 int max;
1645
1646 if (ac->ac_status == AC_STATUS_FOUND)
1647 return;
1648
1649
1650
1651 if (ac->ac_found > sbi->s_mb_max_to_scan &&
1652 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1653 ac->ac_status = AC_STATUS_BREAK;
1654 return;
1655 }
1656
1657
1658
1659
1660 if (bex->fe_len < gex->fe_len)
1661 return;
1662
1663 if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
1664 && bex->fe_group == e4b->bd_group) {
1665
1666
1667
1668 max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
1669 if (max >= gex->fe_len) {
1670 ext4_mb_use_best_found(ac, e4b);
1671 return;
1672 }
1673 }
1674}
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
1687 struct ext4_free_extent *ex,
1688 struct ext4_buddy *e4b)
1689{
1690 struct ext4_free_extent *bex = &ac->ac_b_ex;
1691 struct ext4_free_extent *gex = &ac->ac_g_ex;
1692
1693 BUG_ON(ex->fe_len <= 0);
1694 BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1695 BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1696 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
1697
1698 ac->ac_found++;
1699
1700
1701
1702
1703 if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1704 *bex = *ex;
1705 ext4_mb_use_best_found(ac, e4b);
1706 return;
1707 }
1708
1709
1710
1711
1712 if (ex->fe_len == gex->fe_len) {
1713 *bex = *ex;
1714 ext4_mb_use_best_found(ac, e4b);
1715 return;
1716 }
1717
1718
1719
1720
1721 if (bex->fe_len == 0) {
1722 *bex = *ex;
1723 return;
1724 }
1725
1726
1727
1728
1729 if (bex->fe_len < gex->fe_len) {
1730
1731
1732 if (ex->fe_len > bex->fe_len)
1733 *bex = *ex;
1734 } else if (ex->fe_len > gex->fe_len) {
1735
1736
1737
1738 if (ex->fe_len < bex->fe_len)
1739 *bex = *ex;
1740 }
1741
1742 ext4_mb_check_limits(ac, e4b, 0);
1743}
1744
1745static noinline_for_stack
1746int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
1747 struct ext4_buddy *e4b)
1748{
1749 struct ext4_free_extent ex = ac->ac_b_ex;
1750 ext4_group_t group = ex.fe_group;
1751 int max;
1752 int err;
1753
1754 BUG_ON(ex.fe_len <= 0);
1755 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1756 if (err)
1757 return err;
1758
1759 ext4_lock_group(ac->ac_sb, group);
1760 max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
1761
1762 if (max > 0) {
1763 ac->ac_b_ex = ex;
1764 ext4_mb_use_best_found(ac, e4b);
1765 }
1766
1767 ext4_unlock_group(ac->ac_sb, group);
1768 ext4_mb_unload_buddy(e4b);
1769
1770 return 0;
1771}
1772
1773static noinline_for_stack
1774int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1775 struct ext4_buddy *e4b)
1776{
1777 ext4_group_t group = ac->ac_g_ex.fe_group;
1778 int max;
1779 int err;
1780 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1781 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1782 struct ext4_free_extent ex;
1783
1784 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
1785 return 0;
1786 if (grp->bb_free == 0)
1787 return 0;
1788
1789 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1790 if (err)
1791 return err;
1792
1793 ext4_lock_group(ac->ac_sb, group);
1794 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
1795 ac->ac_g_ex.fe_len, &ex);
1796
1797 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
1798 ext4_fsblk_t start;
1799
1800 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
1801 ex.fe_start;
1802
1803 if (do_div(start, sbi->s_stripe) == 0) {
1804 ac->ac_found++;
1805 ac->ac_b_ex = ex;
1806 ext4_mb_use_best_found(ac, e4b);
1807 }
1808 } else if (max >= ac->ac_g_ex.fe_len) {
1809 BUG_ON(ex.fe_len <= 0);
1810 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1811 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1812 ac->ac_found++;
1813 ac->ac_b_ex = ex;
1814 ext4_mb_use_best_found(ac, e4b);
1815 } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
1816
1817
1818 BUG_ON(ex.fe_len <= 0);
1819 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1820 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1821 ac->ac_found++;
1822 ac->ac_b_ex = ex;
1823 ext4_mb_use_best_found(ac, e4b);
1824 }
1825 ext4_unlock_group(ac->ac_sb, group);
1826 ext4_mb_unload_buddy(e4b);
1827
1828 return 0;
1829}
1830
1831
1832
1833
1834
1835static noinline_for_stack
1836void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
1837 struct ext4_buddy *e4b)
1838{
1839 struct super_block *sb = ac->ac_sb;
1840 struct ext4_group_info *grp = e4b->bd_info;
1841 void *buddy;
1842 int i;
1843 int k;
1844 int max;
1845
1846 BUG_ON(ac->ac_2order <= 0);
1847 for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
1848 if (grp->bb_counters[i] == 0)
1849 continue;
1850
1851 buddy = mb_find_buddy(e4b, i, &max);
1852 BUG_ON(buddy == NULL);
1853
1854 k = mb_find_next_zero_bit(buddy, max, 0);
1855 BUG_ON(k >= max);
1856
1857 ac->ac_found++;
1858
1859 ac->ac_b_ex.fe_len = 1 << i;
1860 ac->ac_b_ex.fe_start = k << i;
1861 ac->ac_b_ex.fe_group = e4b->bd_group;
1862
1863 ext4_mb_use_best_found(ac, e4b);
1864
1865 BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len);
1866
1867 if (EXT4_SB(sb)->s_mb_stats)
1868 atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
1869
1870 break;
1871 }
1872}
1873
1874
1875
1876
1877
1878
1879static noinline_for_stack
1880void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1881 struct ext4_buddy *e4b)
1882{
1883 struct super_block *sb = ac->ac_sb;
1884 void *bitmap = e4b->bd_bitmap;
1885 struct ext4_free_extent ex;
1886 int i;
1887 int free;
1888
1889 free = e4b->bd_info->bb_free;
1890 BUG_ON(free <= 0);
1891
1892 i = e4b->bd_info->bb_first_free;
1893
1894 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
1895 i = mb_find_next_zero_bit(bitmap,
1896 EXT4_CLUSTERS_PER_GROUP(sb), i);
1897 if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
1898
1899
1900
1901
1902
1903 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1904 "%d free clusters as per "
1905 "group info. But bitmap says 0",
1906 free);
1907 break;
1908 }
1909
1910 mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
1911 BUG_ON(ex.fe_len <= 0);
1912 if (free < ex.fe_len) {
1913 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1914 "%d free clusters as per "
1915 "group info. But got %d blocks",
1916 free, ex.fe_len);
1917
1918
1919
1920
1921
1922 break;
1923 }
1924
1925 ext4_mb_measure_extent(ac, &ex, e4b);
1926
1927 i += ex.fe_len;
1928 free -= ex.fe_len;
1929 }
1930
1931 ext4_mb_check_limits(ac, e4b, 1);
1932}
1933
1934
1935
1936
1937
1938static noinline_for_stack
1939void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1940 struct ext4_buddy *e4b)
1941{
1942 struct super_block *sb = ac->ac_sb;
1943 struct ext4_sb_info *sbi = EXT4_SB(sb);
1944 void *bitmap = e4b->bd_bitmap;
1945 struct ext4_free_extent ex;
1946 ext4_fsblk_t first_group_block;
1947 ext4_fsblk_t a;
1948 ext4_grpblk_t i;
1949 int max;
1950
1951 BUG_ON(sbi->s_stripe == 0);
1952
1953
1954 first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
1955
1956 a = first_group_block + sbi->s_stripe - 1;
1957 do_div(a, sbi->s_stripe);
1958 i = (a * sbi->s_stripe) - first_group_block;
1959
1960 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
1961 if (!mb_test_bit(i, bitmap)) {
1962 max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
1963 if (max >= sbi->s_stripe) {
1964 ac->ac_found++;
1965 ac->ac_b_ex = ex;
1966 ext4_mb_use_best_found(ac, e4b);
1967 break;
1968 }
1969 }
1970 i += sbi->s_stripe;
1971 }
1972}
1973
1974
1975static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1976 ext4_group_t group, int cr)
1977{
1978 unsigned free, fragments;
1979 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
1980 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1981
1982 BUG_ON(cr < 0 || cr >= 4);
1983
1984 free = grp->bb_free;
1985 if (free == 0)
1986 return 0;
1987 if (cr <= 2 && free < ac->ac_g_ex.fe_len)
1988 return 0;
1989
1990
1991 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1992 int ret = ext4_mb_init_group(ac->ac_sb, group);
1993 if (ret)
1994 return 0;
1995 }
1996
1997 fragments = grp->bb_fragments;
1998 if (fragments == 0)
1999 return 0;
2000
2001 switch (cr) {
2002 case 0:
2003 BUG_ON(ac->ac_2order == 0);
2004
2005
2006 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
2007 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
2008 ((group % flex_size) == 0))
2009 return 0;
2010
2011 if ((ac->ac_2order > ac->ac_sb->s_blocksize_bits+1) ||
2012 (free / fragments) >= ac->ac_g_ex.fe_len)
2013 return 1;
2014
2015 if (grp->bb_largest_free_order < ac->ac_2order)
2016 return 0;
2017
2018 return 1;
2019 case 1:
2020 if ((free / fragments) >= ac->ac_g_ex.fe_len)
2021 return 1;
2022 break;
2023 case 2:
2024 if (free >= ac->ac_g_ex.fe_len)
2025 return 1;
2026 break;
2027 case 3:
2028 return 1;
2029 default:
2030 BUG();
2031 }
2032
2033 return 0;
2034}
2035
2036static noinline_for_stack int
2037ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
2038{
2039 ext4_group_t ngroups, group, i;
2040 int cr;
2041 int err = 0;
2042 struct ext4_sb_info *sbi;
2043 struct super_block *sb;
2044 struct ext4_buddy e4b;
2045
2046 sb = ac->ac_sb;
2047 sbi = EXT4_SB(sb);
2048 ngroups = ext4_get_groups_count(sb);
2049
2050 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
2051 ngroups = sbi->s_blockfile_groups;
2052
2053 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
2054
2055
2056 err = ext4_mb_find_by_goal(ac, &e4b);
2057 if (err || ac->ac_status == AC_STATUS_FOUND)
2058 goto out;
2059
2060 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
2061 goto out;
2062
2063
2064
2065
2066
2067
2068 i = fls(ac->ac_g_ex.fe_len);
2069 ac->ac_2order = 0;
2070
2071
2072
2073
2074
2075 if (i >= sbi->s_mb_order2_reqs) {
2076
2077
2078
2079 if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
2080 ac->ac_2order = i - 1;
2081 }
2082
2083
2084 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
2085
2086 spin_lock(&sbi->s_md_lock);
2087 ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
2088 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
2089 spin_unlock(&sbi->s_md_lock);
2090 }
2091
2092
2093 cr = ac->ac_2order ? 0 : 1;
2094
2095
2096
2097
2098repeat:
2099 for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
2100 ac->ac_criteria = cr;
2101
2102
2103
2104
2105 group = ac->ac_g_ex.fe_group;
2106
2107 for (i = 0; i < ngroups; group++, i++) {
2108 cond_resched();
2109
2110
2111
2112
2113 if (group >= ngroups)
2114 group = 0;
2115
2116
2117 if (!ext4_mb_good_group(ac, group, cr))
2118 continue;
2119
2120 err = ext4_mb_load_buddy(sb, group, &e4b);
2121 if (err)
2122 goto out;
2123
2124 ext4_lock_group(sb, group);
2125
2126
2127
2128
2129
2130 if (!ext4_mb_good_group(ac, group, cr)) {
2131 ext4_unlock_group(sb, group);
2132 ext4_mb_unload_buddy(&e4b);
2133 continue;
2134 }
2135
2136 ac->ac_groups_scanned++;
2137 if (cr == 0 && ac->ac_2order < sb->s_blocksize_bits+2)
2138 ext4_mb_simple_scan_group(ac, &e4b);
2139 else if (cr == 1 && sbi->s_stripe &&
2140 !(ac->ac_g_ex.fe_len % sbi->s_stripe))
2141 ext4_mb_scan_aligned(ac, &e4b);
2142 else
2143 ext4_mb_complex_scan_group(ac, &e4b);
2144
2145 ext4_unlock_group(sb, group);
2146 ext4_mb_unload_buddy(&e4b);
2147
2148 if (ac->ac_status != AC_STATUS_CONTINUE)
2149 break;
2150 }
2151 }
2152
2153 if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
2154 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2155
2156
2157
2158
2159
2160 ext4_mb_try_best_found(ac, &e4b);
2161 if (ac->ac_status != AC_STATUS_FOUND) {
2162
2163
2164
2165
2166
2167
2168 ac->ac_b_ex.fe_group = 0;
2169 ac->ac_b_ex.fe_start = 0;
2170 ac->ac_b_ex.fe_len = 0;
2171 ac->ac_status = AC_STATUS_CONTINUE;
2172 ac->ac_flags |= EXT4_MB_HINT_FIRST;
2173 cr = 3;
2174 atomic_inc(&sbi->s_mb_lost_chunks);
2175 goto repeat;
2176 }
2177 }
2178out:
2179 return err;
2180}
2181
2182static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2183{
2184 struct super_block *sb = seq->private;
2185 ext4_group_t group;
2186
2187 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2188 return NULL;
2189 group = *pos + 1;
2190 return (void *) ((unsigned long) group);
2191}
2192
2193static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2194{
2195 struct super_block *sb = seq->private;
2196 ext4_group_t group;
2197
2198 ++*pos;
2199 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2200 return NULL;
2201 group = *pos + 1;
2202 return (void *) ((unsigned long) group);
2203}
2204
2205static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2206{
2207 struct super_block *sb = seq->private;
2208 ext4_group_t group = (ext4_group_t) ((unsigned long) v);
2209 int i;
2210 int err, buddy_loaded = 0;
2211 struct ext4_buddy e4b;
2212 struct ext4_group_info *grinfo;
2213 struct sg {
2214 struct ext4_group_info info;
2215 ext4_grpblk_t counters[16];
2216 } sg;
2217
2218 group--;
2219 if (group == 0)
2220 seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
2221 "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
2222 "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
2223 "group", "free", "frags", "first",
2224 "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
2225 "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
2226
2227 i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
2228 sizeof(struct ext4_group_info);
2229 grinfo = ext4_get_group_info(sb, group);
2230
2231 if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
2232 err = ext4_mb_load_buddy(sb, group, &e4b);
2233 if (err) {
2234 seq_printf(seq, "#%-5u: I/O error\n", group);
2235 return 0;
2236 }
2237 buddy_loaded = 1;
2238 }
2239
2240 memcpy(&sg, ext4_get_group_info(sb, group), i);
2241
2242 if (buddy_loaded)
2243 ext4_mb_unload_buddy(&e4b);
2244
2245 seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
2246 sg.info.bb_fragments, sg.info.bb_first_free);
2247 for (i = 0; i <= 13; i++)
2248 seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
2249 sg.info.bb_counters[i] : 0);
2250 seq_printf(seq, " ]\n");
2251
2252 return 0;
2253}
2254
2255static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
2256{
2257}
2258
2259static const struct seq_operations ext4_mb_seq_groups_ops = {
2260 .start = ext4_mb_seq_groups_start,
2261 .next = ext4_mb_seq_groups_next,
2262 .stop = ext4_mb_seq_groups_stop,
2263 .show = ext4_mb_seq_groups_show,
2264};
2265
2266static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
2267{
2268 struct super_block *sb = PDE_DATA(inode);
2269 int rc;
2270
2271 rc = seq_open(file, &ext4_mb_seq_groups_ops);
2272 if (rc == 0) {
2273 struct seq_file *m = file->private_data;
2274 m->private = sb;
2275 }
2276 return rc;
2277
2278}
2279
2280static const struct file_operations ext4_mb_seq_groups_fops = {
2281 .owner = THIS_MODULE,
2282 .open = ext4_mb_seq_groups_open,
2283 .read = seq_read,
2284 .llseek = seq_lseek,
2285 .release = seq_release,
2286};
2287
2288static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
2289{
2290 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2291 struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
2292
2293 BUG_ON(!cachep);
2294 return cachep;
2295}
2296
2297
2298
2299
2300
2301int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
2302{
2303 struct ext4_sb_info *sbi = EXT4_SB(sb);
2304 unsigned size;
2305 struct ext4_group_info ***new_groupinfo;
2306
2307 size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
2308 EXT4_DESC_PER_BLOCK_BITS(sb);
2309 if (size <= sbi->s_group_info_size)
2310 return 0;
2311
2312 size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
2313 new_groupinfo = ext4_kvzalloc(size, GFP_KERNEL);
2314 if (!new_groupinfo) {
2315 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
2316 return -ENOMEM;
2317 }
2318 if (sbi->s_group_info) {
2319 memcpy(new_groupinfo, sbi->s_group_info,
2320 sbi->s_group_info_size * sizeof(*sbi->s_group_info));
2321 ext4_kvfree(sbi->s_group_info);
2322 }
2323 sbi->s_group_info = new_groupinfo;
2324 sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
2325 ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
2326 sbi->s_group_info_size);
2327 return 0;
2328}
2329
2330
2331int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2332 struct ext4_group_desc *desc)
2333{
2334 int i;
2335 int metalen = 0;
2336 struct ext4_sb_info *sbi = EXT4_SB(sb);
2337 struct ext4_group_info **meta_group_info;
2338 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2339
2340
2341
2342
2343
2344
2345 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2346 metalen = sizeof(*meta_group_info) <<
2347 EXT4_DESC_PER_BLOCK_BITS(sb);
2348 meta_group_info = kmalloc(metalen, GFP_KERNEL);
2349 if (meta_group_info == NULL) {
2350 ext4_msg(sb, KERN_ERR, "can't allocate mem "
2351 "for a buddy group");
2352 goto exit_meta_group_info;
2353 }
2354 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
2355 meta_group_info;
2356 }
2357
2358 meta_group_info =
2359 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2360 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2361
2362 meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_KERNEL);
2363 if (meta_group_info[i] == NULL) {
2364 ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
2365 goto exit_group_info;
2366 }
2367 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2368 &(meta_group_info[i]->bb_state));
2369
2370
2371
2372
2373
2374 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2375 meta_group_info[i]->bb_free =
2376 ext4_free_clusters_after_init(sb, group, desc);
2377 } else {
2378 meta_group_info[i]->bb_free =
2379 ext4_free_group_clusters(sb, desc);
2380 }
2381
2382 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2383 init_rwsem(&meta_group_info[i]->alloc_sem);
2384 meta_group_info[i]->bb_free_root = RB_ROOT;
2385 meta_group_info[i]->bb_largest_free_order = -1;
2386
2387#ifdef DOUBLE_CHECK
2388 {
2389 struct buffer_head *bh;
2390 meta_group_info[i]->bb_bitmap =
2391 kmalloc(sb->s_blocksize, GFP_KERNEL);
2392 BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
2393 bh = ext4_read_block_bitmap(sb, group);
2394 BUG_ON(bh == NULL);
2395 memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
2396 sb->s_blocksize);
2397 put_bh(bh);
2398 }
2399#endif
2400
2401 return 0;
2402
2403exit_group_info:
2404
2405 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2406 kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
2407 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL;
2408 }
2409exit_meta_group_info:
2410 return -ENOMEM;
2411}
2412
2413static int ext4_mb_init_backend(struct super_block *sb)
2414{
2415 ext4_group_t ngroups = ext4_get_groups_count(sb);
2416 ext4_group_t i;
2417 struct ext4_sb_info *sbi = EXT4_SB(sb);
2418 int err;
2419 struct ext4_group_desc *desc;
2420 struct kmem_cache *cachep;
2421
2422 err = ext4_mb_alloc_groupinfo(sb, ngroups);
2423 if (err)
2424 return err;
2425
2426 sbi->s_buddy_cache = new_inode(sb);
2427 if (sbi->s_buddy_cache == NULL) {
2428 ext4_msg(sb, KERN_ERR, "can't get new inode");
2429 goto err_freesgi;
2430 }
2431
2432
2433
2434
2435 sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
2436 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
2437 for (i = 0; i < ngroups; i++) {
2438 desc = ext4_get_group_desc(sb, i, NULL);
2439 if (desc == NULL) {
2440 ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
2441 goto err_freebuddy;
2442 }
2443 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
2444 goto err_freebuddy;
2445 }
2446
2447 return 0;
2448
2449err_freebuddy:
2450 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2451 while (i-- > 0)
2452 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
2453 i = sbi->s_group_info_size;
2454 while (i-- > 0)
2455 kfree(sbi->s_group_info[i]);
2456 iput(sbi->s_buddy_cache);
2457err_freesgi:
2458 ext4_kvfree(sbi->s_group_info);
2459 return -ENOMEM;
2460}
2461
2462static void ext4_groupinfo_destroy_slabs(void)
2463{
2464 int i;
2465
2466 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2467 if (ext4_groupinfo_caches[i])
2468 kmem_cache_destroy(ext4_groupinfo_caches[i]);
2469 ext4_groupinfo_caches[i] = NULL;
2470 }
2471}
2472
2473static int ext4_groupinfo_create_slab(size_t size)
2474{
2475 static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
2476 int slab_size;
2477 int blocksize_bits = order_base_2(size);
2478 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2479 struct kmem_cache *cachep;
2480
2481 if (cache_index >= NR_GRPINFO_CACHES)
2482 return -EINVAL;
2483
2484 if (unlikely(cache_index < 0))
2485 cache_index = 0;
2486
2487 mutex_lock(&ext4_grpinfo_slab_create_mutex);
2488 if (ext4_groupinfo_caches[cache_index]) {
2489 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2490 return 0;
2491 }
2492
2493 slab_size = offsetof(struct ext4_group_info,
2494 bb_counters[blocksize_bits + 2]);
2495
2496 cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
2497 slab_size, 0, SLAB_RECLAIM_ACCOUNT,
2498 NULL);
2499
2500 ext4_groupinfo_caches[cache_index] = cachep;
2501
2502 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2503 if (!cachep) {
2504 printk(KERN_EMERG
2505 "EXT4-fs: no memory for groupinfo slab cache\n");
2506 return -ENOMEM;
2507 }
2508
2509 return 0;
2510}
2511
2512int ext4_mb_init(struct super_block *sb)
2513{
2514 struct ext4_sb_info *sbi = EXT4_SB(sb);
2515 unsigned i, j;
2516 unsigned offset;
2517 unsigned max;
2518 int ret;
2519
2520 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2521
2522 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
2523 if (sbi->s_mb_offsets == NULL) {
2524 ret = -ENOMEM;
2525 goto out;
2526 }
2527
2528 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
2529 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2530 if (sbi->s_mb_maxs == NULL) {
2531 ret = -ENOMEM;
2532 goto out;
2533 }
2534
2535 ret = ext4_groupinfo_create_slab(sb->s_blocksize);
2536 if (ret < 0)
2537 goto out;
2538
2539
2540 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
2541 sbi->s_mb_offsets[0] = 0;
2542
2543 i = 1;
2544 offset = 0;
2545 max = sb->s_blocksize << 2;
2546 do {
2547 sbi->s_mb_offsets[i] = offset;
2548 sbi->s_mb_maxs[i] = max;
2549 offset += 1 << (sb->s_blocksize_bits - i);
2550 max = max >> 1;
2551 i++;
2552 } while (i <= sb->s_blocksize_bits + 1);
2553
2554 spin_lock_init(&sbi->s_md_lock);
2555 spin_lock_init(&sbi->s_bal_lock);
2556
2557 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
2558 sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
2559 sbi->s_mb_stats = MB_DEFAULT_STATS;
2560 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
2561 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574 sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
2575 sbi->s_cluster_bits, 32);
2576
2577
2578
2579
2580
2581
2582
2583
2584 if (sbi->s_stripe > 1) {
2585 sbi->s_mb_group_prealloc = roundup(
2586 sbi->s_mb_group_prealloc, sbi->s_stripe);
2587 }
2588
2589 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2590 if (sbi->s_locality_groups == NULL) {
2591 ret = -ENOMEM;
2592 goto out_free_groupinfo_slab;
2593 }
2594 for_each_possible_cpu(i) {
2595 struct ext4_locality_group *lg;
2596 lg = per_cpu_ptr(sbi->s_locality_groups, i);
2597 mutex_init(&lg->lg_mutex);
2598 for (j = 0; j < PREALLOC_TB_SIZE; j++)
2599 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
2600 spin_lock_init(&lg->lg_prealloc_lock);
2601 }
2602
2603
2604 ret = ext4_mb_init_backend(sb);
2605 if (ret != 0)
2606 goto out_free_locality_groups;
2607
2608 if (sbi->s_proc)
2609 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2610 &ext4_mb_seq_groups_fops, sb);
2611
2612 return 0;
2613
2614out_free_locality_groups:
2615 free_percpu(sbi->s_locality_groups);
2616 sbi->s_locality_groups = NULL;
2617out_free_groupinfo_slab:
2618 ext4_groupinfo_destroy_slabs();
2619out:
2620 kfree(sbi->s_mb_offsets);
2621 sbi->s_mb_offsets = NULL;
2622 kfree(sbi->s_mb_maxs);
2623 sbi->s_mb_maxs = NULL;
2624 return ret;
2625}
2626
2627
2628static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2629{
2630 struct ext4_prealloc_space *pa;
2631 struct list_head *cur, *tmp;
2632 int count = 0;
2633
2634 list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
2635 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
2636 list_del(&pa->pa_group_list);
2637 count++;
2638 kmem_cache_free(ext4_pspace_cachep, pa);
2639 }
2640 if (count)
2641 mb_debug(1, "mballoc: %u PAs left\n", count);
2642
2643}
2644
2645int ext4_mb_release(struct super_block *sb)
2646{
2647 ext4_group_t ngroups = ext4_get_groups_count(sb);
2648 ext4_group_t i;
2649 int num_meta_group_infos;
2650 struct ext4_group_info *grinfo;
2651 struct ext4_sb_info *sbi = EXT4_SB(sb);
2652 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2653
2654 if (sbi->s_proc)
2655 remove_proc_entry("mb_groups", sbi->s_proc);
2656
2657 if (sbi->s_group_info) {
2658 for (i = 0; i < ngroups; i++) {
2659 grinfo = ext4_get_group_info(sb, i);
2660#ifdef DOUBLE_CHECK
2661 kfree(grinfo->bb_bitmap);
2662#endif
2663 ext4_lock_group(sb, i);
2664 ext4_mb_cleanup_pa(grinfo);
2665 ext4_unlock_group(sb, i);
2666 kmem_cache_free(cachep, grinfo);
2667 }
2668 num_meta_group_infos = (ngroups +
2669 EXT4_DESC_PER_BLOCK(sb) - 1) >>
2670 EXT4_DESC_PER_BLOCK_BITS(sb);
2671 for (i = 0; i < num_meta_group_infos; i++)
2672 kfree(sbi->s_group_info[i]);
2673 ext4_kvfree(sbi->s_group_info);
2674 }
2675 kfree(sbi->s_mb_offsets);
2676 kfree(sbi->s_mb_maxs);
2677 if (sbi->s_buddy_cache)
2678 iput(sbi->s_buddy_cache);
2679 if (sbi->s_mb_stats) {
2680 ext4_msg(sb, KERN_INFO,
2681 "mballoc: %u blocks %u reqs (%u success)",
2682 atomic_read(&sbi->s_bal_allocated),
2683 atomic_read(&sbi->s_bal_reqs),
2684 atomic_read(&sbi->s_bal_success));
2685 ext4_msg(sb, KERN_INFO,
2686 "mballoc: %u extents scanned, %u goal hits, "
2687 "%u 2^N hits, %u breaks, %u lost",
2688 atomic_read(&sbi->s_bal_ex_scanned),
2689 atomic_read(&sbi->s_bal_goals),
2690 atomic_read(&sbi->s_bal_2orders),
2691 atomic_read(&sbi->s_bal_breaks),
2692 atomic_read(&sbi->s_mb_lost_chunks));
2693 ext4_msg(sb, KERN_INFO,
2694 "mballoc: %lu generated and it took %Lu",
2695 sbi->s_mb_buddies_generated,
2696 sbi->s_mb_generation_time);
2697 ext4_msg(sb, KERN_INFO,
2698 "mballoc: %u preallocated, %u discarded",
2699 atomic_read(&sbi->s_mb_preallocated),
2700 atomic_read(&sbi->s_mb_discarded));
2701 }
2702
2703 free_percpu(sbi->s_locality_groups);
2704
2705 return 0;
2706}
2707
2708static inline int ext4_issue_discard(struct super_block *sb,
2709 ext4_group_t block_group, ext4_grpblk_t cluster, int count)
2710{
2711 ext4_fsblk_t discard_block;
2712
2713 discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
2714 ext4_group_first_block_no(sb, block_group));
2715 count = EXT4_C2B(EXT4_SB(sb), count);
2716 trace_ext4_discard_blocks(sb,
2717 (unsigned long long) discard_block, count);
2718 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
2719}
2720
2721
2722
2723
2724
2725static void ext4_free_data_callback(struct super_block *sb,
2726 struct ext4_journal_cb_entry *jce,
2727 int rc)
2728{
2729 struct ext4_free_data *entry = (struct ext4_free_data *)jce;
2730 struct ext4_buddy e4b;
2731 struct ext4_group_info *db;
2732 int err, count = 0, count2 = 0;
2733
2734 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2735 entry->efd_count, entry->efd_group, entry);
2736
2737 if (test_opt(sb, DISCARD)) {
2738 err = ext4_issue_discard(sb, entry->efd_group,
2739 entry->efd_start_cluster,
2740 entry->efd_count);
2741 if (err && err != -EOPNOTSUPP)
2742 ext4_msg(sb, KERN_WARNING, "discard request in"
2743 " group:%d block:%d count:%d failed"
2744 " with %d", entry->efd_group,
2745 entry->efd_start_cluster,
2746 entry->efd_count, err);
2747 }
2748
2749 err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
2750
2751 BUG_ON(err != 0);
2752
2753
2754 db = e4b.bd_info;
2755
2756 count += entry->efd_count;
2757 count2++;
2758 ext4_lock_group(sb, entry->efd_group);
2759
2760 rb_erase(&entry->efd_node, &(db->bb_free_root));
2761 mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
2762
2763
2764
2765
2766
2767
2768
2769 if (!test_opt(sb, DISCARD))
2770 EXT4_MB_GRP_CLEAR_TRIMMED(db);
2771
2772 if (!db->bb_free_root.rb_node) {
2773
2774
2775
2776 page_cache_release(e4b.bd_buddy_page);
2777 page_cache_release(e4b.bd_bitmap_page);
2778 }
2779 ext4_unlock_group(sb, entry->efd_group);
2780 kmem_cache_free(ext4_free_data_cachep, entry);
2781 ext4_mb_unload_buddy(&e4b);
2782
2783 mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
2784}
2785
2786int __init ext4_init_mballoc(void)
2787{
2788 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
2789 SLAB_RECLAIM_ACCOUNT);
2790 if (ext4_pspace_cachep == NULL)
2791 return -ENOMEM;
2792
2793 ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
2794 SLAB_RECLAIM_ACCOUNT);
2795 if (ext4_ac_cachep == NULL) {
2796 kmem_cache_destroy(ext4_pspace_cachep);
2797 return -ENOMEM;
2798 }
2799
2800 ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
2801 SLAB_RECLAIM_ACCOUNT);
2802 if (ext4_free_data_cachep == NULL) {
2803 kmem_cache_destroy(ext4_pspace_cachep);
2804 kmem_cache_destroy(ext4_ac_cachep);
2805 return -ENOMEM;
2806 }
2807 return 0;
2808}
2809
2810void ext4_exit_mballoc(void)
2811{
2812
2813
2814
2815
2816 rcu_barrier();
2817 kmem_cache_destroy(ext4_pspace_cachep);
2818 kmem_cache_destroy(ext4_ac_cachep);
2819 kmem_cache_destroy(ext4_free_data_cachep);
2820 ext4_groupinfo_destroy_slabs();
2821}
2822
2823
2824
2825
2826
2827
2828static noinline_for_stack int
2829ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2830 handle_t *handle, unsigned int reserv_clstrs)
2831{
2832 struct buffer_head *bitmap_bh = NULL;
2833 struct ext4_group_desc *gdp;
2834 struct buffer_head *gdp_bh;
2835 struct ext4_sb_info *sbi;
2836 struct super_block *sb;
2837 ext4_fsblk_t block;
2838 int err, len;
2839
2840 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
2841 BUG_ON(ac->ac_b_ex.fe_len <= 0);
2842
2843 sb = ac->ac_sb;
2844 sbi = EXT4_SB(sb);
2845
2846 err = -EIO;
2847 bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
2848 if (!bitmap_bh)
2849 goto out_err;
2850
2851 err = ext4_journal_get_write_access(handle, bitmap_bh);
2852 if (err)
2853 goto out_err;
2854
2855 err = -EIO;
2856 gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
2857 if (!gdp)
2858 goto out_err;
2859
2860 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
2861 ext4_free_group_clusters(sb, gdp));
2862
2863 err = ext4_journal_get_write_access(handle, gdp_bh);
2864 if (err)
2865 goto out_err;
2866
2867 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
2868
2869 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
2870 if (!ext4_data_block_valid(sbi, block, len)) {
2871 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
2872 "fs metadata", block, block+len);
2873
2874
2875
2876
2877 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2878 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2879 ac->ac_b_ex.fe_len);
2880 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2881 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2882 if (!err)
2883 err = -EAGAIN;
2884 goto out_err;
2885 }
2886
2887 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2888#ifdef AGGRESSIVE_CHECK
2889 {
2890 int i;
2891 for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
2892 BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
2893 bitmap_bh->b_data));
2894 }
2895 }
2896#endif
2897 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2898 ac->ac_b_ex.fe_len);
2899 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2900 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
2901 ext4_free_group_clusters_set(sb, gdp,
2902 ext4_free_clusters_after_init(sb,
2903 ac->ac_b_ex.fe_group, gdp));
2904 }
2905 len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
2906 ext4_free_group_clusters_set(sb, gdp, len);
2907 ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh);
2908 ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
2909
2910 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2911 percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
2912
2913
2914
2915 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2916
2917 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
2918 reserv_clstrs);
2919
2920 if (sbi->s_log_groups_per_flex) {
2921 ext4_group_t flex_group = ext4_flex_group(sbi,
2922 ac->ac_b_ex.fe_group);
2923 atomic64_sub(ac->ac_b_ex.fe_len,
2924 &sbi->s_flex_groups[flex_group].free_clusters);
2925 }
2926
2927 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2928 if (err)
2929 goto out_err;
2930 err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
2931
2932out_err:
2933 brelse(bitmap_bh);
2934 return err;
2935}
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
2947{
2948 struct super_block *sb = ac->ac_sb;
2949 struct ext4_locality_group *lg = ac->ac_lg;
2950
2951 BUG_ON(lg == NULL);
2952 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
2953 mb_debug(1, "#%u: goal %u blocks for locality group\n",
2954 current->pid, ac->ac_g_ex.fe_len);
2955}
2956
2957
2958
2959
2960
2961static noinline_for_stack void
2962ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2963 struct ext4_allocation_request *ar)
2964{
2965 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2966 int bsbits, max;
2967 ext4_lblk_t end;
2968 loff_t size, start_off;
2969 loff_t orig_size __maybe_unused;
2970 ext4_lblk_t start;
2971 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
2972 struct ext4_prealloc_space *pa;
2973
2974
2975
2976 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
2977 return;
2978
2979
2980 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
2981 return;
2982
2983
2984
2985 if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
2986 return;
2987
2988 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
2989 ext4_mb_normalize_group_request(ac);
2990 return ;
2991 }
2992
2993 bsbits = ac->ac_sb->s_blocksize_bits;
2994
2995
2996
2997 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
2998 size = size << bsbits;
2999 if (size < i_size_read(ac->ac_inode))
3000 size = i_size_read(ac->ac_inode);
3001 orig_size = size;
3002
3003
3004 max = 2 << bsbits;
3005
3006#define NRL_CHECK_SIZE(req, size, max, chunk_size) \
3007 (req <= (size) || max <= (chunk_size))
3008
3009
3010
3011 start_off = 0;
3012 if (size <= 16 * 1024) {
3013 size = 16 * 1024;
3014 } else if (size <= 32 * 1024) {
3015 size = 32 * 1024;
3016 } else if (size <= 64 * 1024) {
3017 size = 64 * 1024;
3018 } else if (size <= 128 * 1024) {
3019 size = 128 * 1024;
3020 } else if (size <= 256 * 1024) {
3021 size = 256 * 1024;
3022 } else if (size <= 512 * 1024) {
3023 size = 512 * 1024;
3024 } else if (size <= 1024 * 1024) {
3025 size = 1024 * 1024;
3026 } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
3027 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3028 (21 - bsbits)) << 21;
3029 size = 2 * 1024 * 1024;
3030 } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
3031 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3032 (22 - bsbits)) << 22;
3033 size = 4 * 1024 * 1024;
3034 } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
3035 (8<<20)>>bsbits, max, 8 * 1024)) {
3036 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3037 (23 - bsbits)) << 23;
3038 size = 8 * 1024 * 1024;
3039 } else {
3040 start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
3041 size = ac->ac_o_ex.fe_len << bsbits;
3042 }
3043 size = size >> bsbits;
3044 start = start_off >> bsbits;
3045
3046
3047 if (ar->pleft && start <= ar->lleft) {
3048 size -= ar->lleft + 1 - start;
3049 start = ar->lleft + 1;
3050 }
3051 if (ar->pright && start + size - 1 >= ar->lright)
3052 size -= start + size - ar->lright;
3053
3054 end = start + size;
3055
3056
3057 rcu_read_lock();
3058 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3059 ext4_lblk_t pa_end;
3060
3061 if (pa->pa_deleted)
3062 continue;
3063 spin_lock(&pa->pa_lock);
3064 if (pa->pa_deleted) {
3065 spin_unlock(&pa->pa_lock);
3066 continue;
3067 }
3068
3069 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3070 pa->pa_len);
3071
3072
3073 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
3074 ac->ac_o_ex.fe_logical < pa->pa_lstart));
3075
3076
3077 if (pa->pa_lstart >= end || pa_end <= start) {
3078 spin_unlock(&pa->pa_lock);
3079 continue;
3080 }
3081 BUG_ON(pa->pa_lstart <= start && pa_end >= end);
3082
3083
3084 if (pa_end <= ac->ac_o_ex.fe_logical) {
3085 BUG_ON(pa_end < start);
3086 start = pa_end;
3087 } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
3088 BUG_ON(pa->pa_lstart > end);
3089 end = pa->pa_lstart;
3090 }
3091 spin_unlock(&pa->pa_lock);
3092 }
3093 rcu_read_unlock();
3094 size = end - start;
3095
3096
3097 rcu_read_lock();
3098 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3099 ext4_lblk_t pa_end;
3100
3101 spin_lock(&pa->pa_lock);
3102 if (pa->pa_deleted == 0) {
3103 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3104 pa->pa_len);
3105 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
3106 }
3107 spin_unlock(&pa->pa_lock);
3108 }
3109 rcu_read_unlock();
3110
3111 if (start + size <= ac->ac_o_ex.fe_logical &&
3112 start > ac->ac_o_ex.fe_logical) {
3113 ext4_msg(ac->ac_sb, KERN_ERR,
3114 "start %lu, size %lu, fe_logical %lu",
3115 (unsigned long) start, (unsigned long) size,
3116 (unsigned long) ac->ac_o_ex.fe_logical);
3117 }
3118 BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
3119 start > ac->ac_o_ex.fe_logical);
3120 BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
3121
3122
3123
3124
3125
3126 ac->ac_g_ex.fe_logical = start;
3127 ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
3128
3129
3130 if (ar->pright && (ar->lright == (start + size))) {
3131
3132 ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
3133 &ac->ac_f_ex.fe_group,
3134 &ac->ac_f_ex.fe_start);
3135 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3136 }
3137 if (ar->pleft && (ar->lleft + 1 == start)) {
3138
3139 ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
3140 &ac->ac_f_ex.fe_group,
3141 &ac->ac_f_ex.fe_start);
3142 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3143 }
3144
3145 mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size,
3146 (unsigned) orig_size, (unsigned) start);
3147}
3148
3149static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3150{
3151 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3152
3153 if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
3154 atomic_inc(&sbi->s_bal_reqs);
3155 atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
3156 if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
3157 atomic_inc(&sbi->s_bal_success);
3158 atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
3159 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
3160 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
3161 atomic_inc(&sbi->s_bal_goals);
3162 if (ac->ac_found > sbi->s_mb_max_to_scan)
3163 atomic_inc(&sbi->s_bal_breaks);
3164 }
3165
3166 if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
3167 trace_ext4_mballoc_alloc(ac);
3168 else
3169 trace_ext4_mballoc_prealloc(ac);
3170}
3171
3172
3173
3174
3175
3176
3177
3178static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3179{
3180 struct ext4_prealloc_space *pa = ac->ac_pa;
3181
3182 if (pa && pa->pa_type == MB_INODE_PA)
3183 pa->pa_free += ac->ac_b_ex.fe_len;
3184}
3185
3186
3187
3188
3189static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3190 struct ext4_prealloc_space *pa)
3191{
3192 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3193 ext4_fsblk_t start;
3194 ext4_fsblk_t end;
3195 int len;
3196
3197
3198 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
3199 end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
3200 start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
3201 len = EXT4_NUM_B2C(sbi, end - start);
3202 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
3203 &ac->ac_b_ex.fe_start);
3204 ac->ac_b_ex.fe_len = len;
3205 ac->ac_status = AC_STATUS_FOUND;
3206 ac->ac_pa = pa;
3207
3208 BUG_ON(start < pa->pa_pstart);
3209 BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
3210 BUG_ON(pa->pa_free < len);
3211 pa->pa_free -= len;
3212
3213 mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa);
3214}
3215
3216
3217
3218
3219static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3220 struct ext4_prealloc_space *pa)
3221{
3222 unsigned int len = ac->ac_o_ex.fe_len;
3223
3224 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
3225 &ac->ac_b_ex.fe_group,
3226 &ac->ac_b_ex.fe_start);
3227 ac->ac_b_ex.fe_len = len;
3228 ac->ac_status = AC_STATUS_FOUND;
3229 ac->ac_pa = pa;
3230
3231
3232
3233
3234
3235
3236
3237 mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
3238}
3239
3240
3241
3242
3243
3244
3245
3246static struct ext4_prealloc_space *
3247ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3248 struct ext4_prealloc_space *pa,
3249 struct ext4_prealloc_space *cpa)
3250{
3251 ext4_fsblk_t cur_distance, new_distance;
3252
3253 if (cpa == NULL) {
3254 atomic_inc(&pa->pa_count);
3255 return pa;
3256 }
3257 cur_distance = abs(goal_block - cpa->pa_pstart);
3258 new_distance = abs(goal_block - pa->pa_pstart);
3259
3260 if (cur_distance <= new_distance)
3261 return cpa;
3262
3263
3264 atomic_dec(&cpa->pa_count);
3265 atomic_inc(&pa->pa_count);
3266 return pa;
3267}
3268
3269
3270
3271
3272static noinline_for_stack int
3273ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3274{
3275 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3276 int order, i;
3277 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3278 struct ext4_locality_group *lg;
3279 struct ext4_prealloc_space *pa, *cpa = NULL;
3280 ext4_fsblk_t goal_block;
3281
3282
3283 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3284 return 0;
3285
3286
3287 rcu_read_lock();
3288 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3289
3290
3291
3292 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
3293 ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
3294 EXT4_C2B(sbi, pa->pa_len)))
3295 continue;
3296
3297
3298 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
3299 (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
3300 EXT4_MAX_BLOCK_FILE_PHYS))
3301 continue;
3302
3303
3304 spin_lock(&pa->pa_lock);
3305 if (pa->pa_deleted == 0 && pa->pa_free) {
3306 atomic_inc(&pa->pa_count);
3307 ext4_mb_use_inode_pa(ac, pa);
3308 spin_unlock(&pa->pa_lock);
3309 ac->ac_criteria = 10;
3310 rcu_read_unlock();
3311 return 1;
3312 }
3313 spin_unlock(&pa->pa_lock);
3314 }
3315 rcu_read_unlock();
3316
3317
3318 if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
3319 return 0;
3320
3321
3322 lg = ac->ac_lg;
3323 if (lg == NULL)
3324 return 0;
3325 order = fls(ac->ac_o_ex.fe_len) - 1;
3326 if (order > PREALLOC_TB_SIZE - 1)
3327
3328 order = PREALLOC_TB_SIZE - 1;
3329
3330 goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
3331
3332
3333
3334
3335 for (i = order; i < PREALLOC_TB_SIZE; i++) {
3336 rcu_read_lock();
3337 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
3338 pa_inode_list) {
3339 spin_lock(&pa->pa_lock);
3340 if (pa->pa_deleted == 0 &&
3341 pa->pa_free >= ac->ac_o_ex.fe_len) {
3342
3343 cpa = ext4_mb_check_group_pa(goal_block,
3344 pa, cpa);
3345 }
3346 spin_unlock(&pa->pa_lock);
3347 }
3348 rcu_read_unlock();
3349 }
3350 if (cpa) {
3351 ext4_mb_use_group_pa(ac, cpa);
3352 ac->ac_criteria = 20;
3353 return 1;
3354 }
3355 return 0;
3356}
3357
3358
3359
3360
3361
3362
3363
3364static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3365 ext4_group_t group)
3366{
3367 struct rb_node *n;
3368 struct ext4_group_info *grp;
3369 struct ext4_free_data *entry;
3370
3371 grp = ext4_get_group_info(sb, group);
3372 n = rb_first(&(grp->bb_free_root));
3373
3374 while (n) {
3375 entry = rb_entry(n, struct ext4_free_data, efd_node);
3376 ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
3377 n = rb_next(n);
3378 }
3379 return;
3380}
3381
3382
3383
3384
3385
3386
3387static noinline_for_stack
3388void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3389 ext4_group_t group)
3390{
3391 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3392 struct ext4_prealloc_space *pa;
3393 struct list_head *cur;
3394 ext4_group_t groupnr;
3395 ext4_grpblk_t start;
3396 int preallocated = 0;
3397 int len;
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407 list_for_each(cur, &grp->bb_prealloc_list) {
3408 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
3409 spin_lock(&pa->pa_lock);
3410 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
3411 &groupnr, &start);
3412 len = pa->pa_len;
3413 spin_unlock(&pa->pa_lock);
3414 if (unlikely(len == 0))
3415 continue;
3416 BUG_ON(groupnr != group);
3417 ext4_set_bits(bitmap, start, len);
3418 preallocated += len;
3419 }
3420 mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
3421}
3422
3423static void ext4_mb_pa_callback(struct rcu_head *head)
3424{
3425 struct ext4_prealloc_space *pa;
3426 pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
3427 kmem_cache_free(ext4_pspace_cachep, pa);
3428}
3429
3430
3431
3432
3433
3434static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3435 struct super_block *sb, struct ext4_prealloc_space *pa)
3436{
3437 ext4_group_t grp;
3438 ext4_fsblk_t grp_blk;
3439
3440 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
3441 return;
3442
3443
3444 spin_lock(&pa->pa_lock);
3445 if (pa->pa_deleted == 1) {
3446 spin_unlock(&pa->pa_lock);
3447 return;
3448 }
3449
3450 pa->pa_deleted = 1;
3451 spin_unlock(&pa->pa_lock);
3452
3453 grp_blk = pa->pa_pstart;
3454
3455
3456
3457
3458 if (pa->pa_type == MB_GROUP_PA)
3459 grp_blk--;
3460
3461 grp = ext4_get_group_number(sb, grp_blk);
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477 ext4_lock_group(sb, grp);
3478 list_del(&pa->pa_group_list);
3479 ext4_unlock_group(sb, grp);
3480
3481 spin_lock(pa->pa_obj_lock);
3482 list_del_rcu(&pa->pa_inode_list);
3483 spin_unlock(pa->pa_obj_lock);
3484
3485 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3486}
3487
3488
3489
3490
3491static noinline_for_stack int
3492ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3493{
3494 struct super_block *sb = ac->ac_sb;
3495 struct ext4_sb_info *sbi = EXT4_SB(sb);
3496 struct ext4_prealloc_space *pa;
3497 struct ext4_group_info *grp;
3498 struct ext4_inode_info *ei;
3499
3500
3501 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3502 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3503 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3504
3505 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3506 if (pa == NULL)
3507 return -ENOMEM;
3508
3509 if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
3510 int winl;
3511 int wins;
3512 int win;
3513 int offs;
3514
3515
3516
3517
3518 BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
3519 BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
3520
3521
3522
3523
3524 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
3525
3526
3527 wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
3528
3529
3530 win = min(winl, wins);
3531
3532 offs = ac->ac_o_ex.fe_logical %
3533 EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
3534 if (offs && offs < win)
3535 win = offs;
3536
3537 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
3538 EXT4_NUM_B2C(sbi, win);
3539 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
3540 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
3541 }
3542
3543
3544
3545 ac->ac_f_ex = ac->ac_b_ex;
3546
3547 pa->pa_lstart = ac->ac_b_ex.fe_logical;
3548 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3549 pa->pa_len = ac->ac_b_ex.fe_len;
3550 pa->pa_free = pa->pa_len;
3551 atomic_set(&pa->pa_count, 1);
3552 spin_lock_init(&pa->pa_lock);
3553 INIT_LIST_HEAD(&pa->pa_inode_list);
3554 INIT_LIST_HEAD(&pa->pa_group_list);
3555 pa->pa_deleted = 0;
3556 pa->pa_type = MB_INODE_PA;
3557
3558 mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa,
3559 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3560 trace_ext4_mb_new_inode_pa(ac, pa);
3561
3562 ext4_mb_use_inode_pa(ac, pa);
3563 atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
3564
3565 ei = EXT4_I(ac->ac_inode);
3566 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3567
3568 pa->pa_obj_lock = &ei->i_prealloc_lock;
3569 pa->pa_inode = ac->ac_inode;
3570
3571 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3572 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3573 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3574
3575 spin_lock(pa->pa_obj_lock);
3576 list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
3577 spin_unlock(pa->pa_obj_lock);
3578
3579 return 0;
3580}
3581
3582
3583
3584
3585static noinline_for_stack int
3586ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3587{
3588 struct super_block *sb = ac->ac_sb;
3589 struct ext4_locality_group *lg;
3590 struct ext4_prealloc_space *pa;
3591 struct ext4_group_info *grp;
3592
3593
3594 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3595 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3596 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3597
3598 BUG_ON(ext4_pspace_cachep == NULL);
3599 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3600 if (pa == NULL)
3601 return -ENOMEM;
3602
3603
3604
3605 ac->ac_f_ex = ac->ac_b_ex;
3606
3607 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3608 pa->pa_lstart = pa->pa_pstart;
3609 pa->pa_len = ac->ac_b_ex.fe_len;
3610 pa->pa_free = pa->pa_len;
3611 atomic_set(&pa->pa_count, 1);
3612 spin_lock_init(&pa->pa_lock);
3613 INIT_LIST_HEAD(&pa->pa_inode_list);
3614 INIT_LIST_HEAD(&pa->pa_group_list);
3615 pa->pa_deleted = 0;
3616 pa->pa_type = MB_GROUP_PA;
3617
3618 mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa,
3619 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3620 trace_ext4_mb_new_group_pa(ac, pa);
3621
3622 ext4_mb_use_group_pa(ac, pa);
3623 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
3624
3625 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3626 lg = ac->ac_lg;
3627 BUG_ON(lg == NULL);
3628
3629 pa->pa_obj_lock = &lg->lg_prealloc_lock;
3630 pa->pa_inode = NULL;
3631
3632 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3633 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3634 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3635
3636
3637
3638
3639
3640 return 0;
3641}
3642
3643static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3644{
3645 int err;
3646
3647 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
3648 err = ext4_mb_new_group_pa(ac);
3649 else
3650 err = ext4_mb_new_inode_pa(ac);
3651 return err;
3652}
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662static noinline_for_stack int
3663ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3664 struct ext4_prealloc_space *pa)
3665{
3666 struct super_block *sb = e4b->bd_sb;
3667 struct ext4_sb_info *sbi = EXT4_SB(sb);
3668 unsigned int end;
3669 unsigned int next;
3670 ext4_group_t group;
3671 ext4_grpblk_t bit;
3672 unsigned long long grp_blk_start;
3673 int err = 0;
3674 int free = 0;
3675
3676 BUG_ON(pa->pa_deleted == 0);
3677 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3678 grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
3679 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3680 end = bit + pa->pa_len;
3681
3682 while (bit < end) {
3683 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
3684 if (bit >= end)
3685 break;
3686 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
3687 mb_debug(1, " free preallocated %u/%u in group %u\n",
3688 (unsigned) ext4_group_first_block_no(sb, group) + bit,
3689 (unsigned) next - bit, (unsigned) group);
3690 free += next - bit;
3691
3692 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
3693 trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
3694 EXT4_C2B(sbi, bit)),
3695 next - bit);
3696 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3697 bit = next + 1;
3698 }
3699 if (free != pa->pa_free) {
3700 ext4_msg(e4b->bd_sb, KERN_CRIT,
3701 "pa %p: logic %lu, phys. %lu, len %lu",
3702 pa, (unsigned long) pa->pa_lstart,
3703 (unsigned long) pa->pa_pstart,
3704 (unsigned long) pa->pa_len);
3705 ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
3706 free, pa->pa_free);
3707
3708
3709
3710
3711 }
3712 atomic_add(free, &sbi->s_mb_discarded);
3713
3714 return err;
3715}
3716
3717static noinline_for_stack int
3718ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3719 struct ext4_prealloc_space *pa)
3720{
3721 struct super_block *sb = e4b->bd_sb;
3722 ext4_group_t group;
3723 ext4_grpblk_t bit;
3724
3725 trace_ext4_mb_release_group_pa(sb, pa);
3726 BUG_ON(pa->pa_deleted == 0);
3727 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3728 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3729 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
3730 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
3731 trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
3732
3733 return 0;
3734}
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745static noinline_for_stack int
3746ext4_mb_discard_group_preallocations(struct super_block *sb,
3747 ext4_group_t group, int needed)
3748{
3749 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3750 struct buffer_head *bitmap_bh = NULL;
3751 struct ext4_prealloc_space *pa, *tmp;
3752 struct list_head list;
3753 struct ext4_buddy e4b;
3754 int err;
3755 int busy = 0;
3756 int free = 0;
3757
3758 mb_debug(1, "discard preallocation for group %u\n", group);
3759
3760 if (list_empty(&grp->bb_prealloc_list))
3761 return 0;
3762
3763 bitmap_bh = ext4_read_block_bitmap(sb, group);
3764 if (bitmap_bh == NULL) {
3765 ext4_error(sb, "Error reading block bitmap for %u", group);
3766 return 0;
3767 }
3768
3769 err = ext4_mb_load_buddy(sb, group, &e4b);
3770 if (err) {
3771 ext4_error(sb, "Error loading buddy information for %u", group);
3772 put_bh(bitmap_bh);
3773 return 0;
3774 }
3775
3776 if (needed == 0)
3777 needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
3778
3779 INIT_LIST_HEAD(&list);
3780repeat:
3781 ext4_lock_group(sb, group);
3782 list_for_each_entry_safe(pa, tmp,
3783 &grp->bb_prealloc_list, pa_group_list) {
3784 spin_lock(&pa->pa_lock);
3785 if (atomic_read(&pa->pa_count)) {
3786 spin_unlock(&pa->pa_lock);
3787 busy = 1;
3788 continue;
3789 }
3790 if (pa->pa_deleted) {
3791 spin_unlock(&pa->pa_lock);
3792 continue;
3793 }
3794
3795
3796 pa->pa_deleted = 1;
3797
3798
3799 free += pa->pa_free;
3800
3801 spin_unlock(&pa->pa_lock);
3802
3803 list_del(&pa->pa_group_list);
3804 list_add(&pa->u.pa_tmp_list, &list);
3805 }
3806
3807
3808 if (free < needed && busy) {
3809 busy = 0;
3810 ext4_unlock_group(sb, group);
3811 cond_resched();
3812 goto repeat;
3813 }
3814
3815
3816 if (list_empty(&list)) {
3817 BUG_ON(free != 0);
3818 goto out;
3819 }
3820
3821
3822 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
3823
3824
3825 spin_lock(pa->pa_obj_lock);
3826 list_del_rcu(&pa->pa_inode_list);
3827 spin_unlock(pa->pa_obj_lock);
3828
3829 if (pa->pa_type == MB_GROUP_PA)
3830 ext4_mb_release_group_pa(&e4b, pa);
3831 else
3832 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3833
3834 list_del(&pa->u.pa_tmp_list);
3835 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3836 }
3837
3838out:
3839 ext4_unlock_group(sb, group);
3840 ext4_mb_unload_buddy(&e4b);
3841 put_bh(bitmap_bh);
3842 return free;
3843}
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854void ext4_discard_preallocations(struct inode *inode)
3855{
3856 struct ext4_inode_info *ei = EXT4_I(inode);
3857 struct super_block *sb = inode->i_sb;
3858 struct buffer_head *bitmap_bh = NULL;
3859 struct ext4_prealloc_space *pa, *tmp;
3860 ext4_group_t group = 0;
3861 struct list_head list;
3862 struct ext4_buddy e4b;
3863 int err;
3864
3865 if (!S_ISREG(inode->i_mode)) {
3866
3867 return;
3868 }
3869
3870 mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino);
3871 trace_ext4_discard_preallocations(inode);
3872
3873 INIT_LIST_HEAD(&list);
3874
3875repeat:
3876
3877 spin_lock(&ei->i_prealloc_lock);
3878 while (!list_empty(&ei->i_prealloc_list)) {
3879 pa = list_entry(ei->i_prealloc_list.next,
3880 struct ext4_prealloc_space, pa_inode_list);
3881 BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
3882 spin_lock(&pa->pa_lock);
3883 if (atomic_read(&pa->pa_count)) {
3884
3885
3886 spin_unlock(&pa->pa_lock);
3887 spin_unlock(&ei->i_prealloc_lock);
3888 ext4_msg(sb, KERN_ERR,
3889 "uh-oh! used pa while discarding");
3890 WARN_ON(1);
3891 schedule_timeout_uninterruptible(HZ);
3892 goto repeat;
3893
3894 }
3895 if (pa->pa_deleted == 0) {
3896 pa->pa_deleted = 1;
3897 spin_unlock(&pa->pa_lock);
3898 list_del_rcu(&pa->pa_inode_list);
3899 list_add(&pa->u.pa_tmp_list, &list);
3900 continue;
3901 }
3902
3903
3904 spin_unlock(&pa->pa_lock);
3905 spin_unlock(&ei->i_prealloc_lock);
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919 schedule_timeout_uninterruptible(HZ);
3920 goto repeat;
3921 }
3922 spin_unlock(&ei->i_prealloc_lock);
3923
3924 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
3925 BUG_ON(pa->pa_type != MB_INODE_PA);
3926 group = ext4_get_group_number(sb, pa->pa_pstart);
3927
3928 err = ext4_mb_load_buddy(sb, group, &e4b);
3929 if (err) {
3930 ext4_error(sb, "Error loading buddy information for %u",
3931 group);
3932 continue;
3933 }
3934
3935 bitmap_bh = ext4_read_block_bitmap(sb, group);
3936 if (bitmap_bh == NULL) {
3937 ext4_error(sb, "Error reading block bitmap for %u",
3938 group);
3939 ext4_mb_unload_buddy(&e4b);
3940 continue;
3941 }
3942
3943 ext4_lock_group(sb, group);
3944 list_del(&pa->pa_group_list);
3945 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3946 ext4_unlock_group(sb, group);
3947
3948 ext4_mb_unload_buddy(&e4b);
3949 put_bh(bitmap_bh);
3950
3951 list_del(&pa->u.pa_tmp_list);
3952 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3953 }
3954}
3955
3956#ifdef CONFIG_EXT4_DEBUG
3957static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3958{
3959 struct super_block *sb = ac->ac_sb;
3960 ext4_group_t ngroups, i;
3961
3962 if (!ext4_mballoc_debug ||
3963 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
3964 return;
3965
3966 ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:"
3967 " Allocation context details:");
3968 ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d",
3969 ac->ac_status, ac->ac_flags);
3970 ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, "
3971 "goal %lu/%lu/%lu@%lu, "
3972 "best %lu/%lu/%lu@%lu cr %d",
3973 (unsigned long)ac->ac_o_ex.fe_group,
3974 (unsigned long)ac->ac_o_ex.fe_start,
3975 (unsigned long)ac->ac_o_ex.fe_len,
3976 (unsigned long)ac->ac_o_ex.fe_logical,
3977 (unsigned long)ac->ac_g_ex.fe_group,
3978 (unsigned long)ac->ac_g_ex.fe_start,
3979 (unsigned long)ac->ac_g_ex.fe_len,
3980 (unsigned long)ac->ac_g_ex.fe_logical,
3981 (unsigned long)ac->ac_b_ex.fe_group,
3982 (unsigned long)ac->ac_b_ex.fe_start,
3983 (unsigned long)ac->ac_b_ex.fe_len,
3984 (unsigned long)ac->ac_b_ex.fe_logical,
3985 (int)ac->ac_criteria);
3986 ext4_msg(ac->ac_sb, KERN_ERR, "%lu scanned, %d found",
3987 ac->ac_ex_scanned, ac->ac_found);
3988 ext4_msg(ac->ac_sb, KERN_ERR, "groups: ");
3989 ngroups = ext4_get_groups_count(sb);
3990 for (i = 0; i < ngroups; i++) {
3991 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
3992 struct ext4_prealloc_space *pa;
3993 ext4_grpblk_t start;
3994 struct list_head *cur;
3995 ext4_lock_group(sb, i);
3996 list_for_each(cur, &grp->bb_prealloc_list) {
3997 pa = list_entry(cur, struct ext4_prealloc_space,
3998 pa_group_list);
3999 spin_lock(&pa->pa_lock);
4000 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
4001 NULL, &start);
4002 spin_unlock(&pa->pa_lock);
4003 printk(KERN_ERR "PA:%u:%d:%u \n", i,
4004 start, pa->pa_len);
4005 }
4006 ext4_unlock_group(sb, i);
4007
4008 if (grp->bb_free == 0)
4009 continue;
4010 printk(KERN_ERR "%u: %d/%d \n",
4011 i, grp->bb_free, grp->bb_fragments);
4012 }
4013 printk(KERN_ERR "\n");
4014}
4015#else
4016static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4017{
4018 return;
4019}
4020#endif
4021
4022
4023
4024
4025
4026
4027
4028
4029static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
4030{
4031 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4032 int bsbits = ac->ac_sb->s_blocksize_bits;
4033 loff_t size, isize;
4034
4035 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
4036 return;
4037
4038 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
4039 return;
4040
4041 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
4042 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
4043 >> bsbits;
4044
4045 if ((size == isize) &&
4046 !ext4_fs_is_busy(sbi) &&
4047 (atomic_read(&ac->ac_inode->i_writecount) == 0)) {
4048 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
4049 return;
4050 }
4051
4052 if (sbi->s_mb_group_prealloc <= 0) {
4053 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
4054 return;
4055 }
4056
4057
4058 size = max(size, isize);
4059 if (size > sbi->s_mb_stream_request) {
4060 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
4061 return;
4062 }
4063
4064 BUG_ON(ac->ac_lg != NULL);
4065
4066
4067
4068
4069
4070 ac->ac_lg = __this_cpu_ptr(sbi->s_locality_groups);
4071
4072
4073 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
4074
4075
4076 mutex_lock(&ac->ac_lg->lg_mutex);
4077}
4078
4079static noinline_for_stack int
4080ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4081 struct ext4_allocation_request *ar)
4082{
4083 struct super_block *sb = ar->inode->i_sb;
4084 struct ext4_sb_info *sbi = EXT4_SB(sb);
4085 struct ext4_super_block *es = sbi->s_es;
4086 ext4_group_t group;
4087 unsigned int len;
4088 ext4_fsblk_t goal;
4089 ext4_grpblk_t block;
4090
4091
4092 len = ar->len;
4093
4094
4095 if (len >= EXT4_CLUSTERS_PER_GROUP(sb))
4096 len = EXT4_CLUSTERS_PER_GROUP(sb);
4097
4098
4099 goal = ar->goal;
4100 if (goal < le32_to_cpu(es->s_first_data_block) ||
4101 goal >= ext4_blocks_count(es))
4102 goal = le32_to_cpu(es->s_first_data_block);
4103 ext4_get_group_no_and_offset(sb, goal, &group, &block);
4104
4105
4106 ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
4107 ac->ac_status = AC_STATUS_CONTINUE;
4108 ac->ac_sb = sb;
4109 ac->ac_inode = ar->inode;
4110 ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
4111 ac->ac_o_ex.fe_group = group;
4112 ac->ac_o_ex.fe_start = block;
4113 ac->ac_o_ex.fe_len = len;
4114 ac->ac_g_ex = ac->ac_o_ex;
4115 ac->ac_flags = ar->flags;
4116
4117
4118
4119 ext4_mb_group_or_file(ac);
4120
4121 mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, "
4122 "left: %u/%u, right %u/%u to %swritable\n",
4123 (unsigned) ar->len, (unsigned) ar->logical,
4124 (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
4125 (unsigned) ar->lleft, (unsigned) ar->pleft,
4126 (unsigned) ar->lright, (unsigned) ar->pright,
4127 atomic_read(&ar->inode->i_writecount) ? "" : "non-");
4128 return 0;
4129
4130}
4131
4132static noinline_for_stack void
4133ext4_mb_discard_lg_preallocations(struct super_block *sb,
4134 struct ext4_locality_group *lg,
4135 int order, int total_entries)
4136{
4137 ext4_group_t group = 0;
4138 struct ext4_buddy e4b;
4139 struct list_head discard_list;
4140 struct ext4_prealloc_space *pa, *tmp;
4141
4142 mb_debug(1, "discard locality group preallocation\n");
4143
4144 INIT_LIST_HEAD(&discard_list);
4145
4146 spin_lock(&lg->lg_prealloc_lock);
4147 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
4148 pa_inode_list) {
4149 spin_lock(&pa->pa_lock);
4150 if (atomic_read(&pa->pa_count)) {
4151
4152
4153
4154
4155
4156 spin_unlock(&pa->pa_lock);
4157 continue;
4158 }
4159 if (pa->pa_deleted) {
4160 spin_unlock(&pa->pa_lock);
4161 continue;
4162 }
4163
4164 BUG_ON(pa->pa_type != MB_GROUP_PA);
4165
4166
4167 pa->pa_deleted = 1;
4168 spin_unlock(&pa->pa_lock);
4169
4170 list_del_rcu(&pa->pa_inode_list);
4171 list_add(&pa->u.pa_tmp_list, &discard_list);
4172
4173 total_entries--;
4174 if (total_entries <= 5) {
4175
4176
4177
4178
4179
4180
4181 break;
4182 }
4183 }
4184 spin_unlock(&lg->lg_prealloc_lock);
4185
4186 list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
4187
4188 group = ext4_get_group_number(sb, pa->pa_pstart);
4189 if (ext4_mb_load_buddy(sb, group, &e4b)) {
4190 ext4_error(sb, "Error loading buddy information for %u",
4191 group);
4192 continue;
4193 }
4194 ext4_lock_group(sb, group);
4195 list_del(&pa->pa_group_list);
4196 ext4_mb_release_group_pa(&e4b, pa);
4197 ext4_unlock_group(sb, group);
4198
4199 ext4_mb_unload_buddy(&e4b);
4200 list_del(&pa->u.pa_tmp_list);
4201 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4202 }
4203}
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4215{
4216 int order, added = 0, lg_prealloc_count = 1;
4217 struct super_block *sb = ac->ac_sb;
4218 struct ext4_locality_group *lg = ac->ac_lg;
4219 struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
4220
4221 order = fls(pa->pa_free) - 1;
4222 if (order > PREALLOC_TB_SIZE - 1)
4223
4224 order = PREALLOC_TB_SIZE - 1;
4225
4226 spin_lock(&lg->lg_prealloc_lock);
4227 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
4228 pa_inode_list) {
4229 spin_lock(&tmp_pa->pa_lock);
4230 if (tmp_pa->pa_deleted) {
4231 spin_unlock(&tmp_pa->pa_lock);
4232 continue;
4233 }
4234 if (!added && pa->pa_free < tmp_pa->pa_free) {
4235
4236 list_add_tail_rcu(&pa->pa_inode_list,
4237 &tmp_pa->pa_inode_list);
4238 added = 1;
4239
4240
4241
4242
4243 }
4244 spin_unlock(&tmp_pa->pa_lock);
4245 lg_prealloc_count++;
4246 }
4247 if (!added)
4248 list_add_tail_rcu(&pa->pa_inode_list,
4249 &lg->lg_prealloc_list[order]);
4250 spin_unlock(&lg->lg_prealloc_lock);
4251
4252
4253 if (lg_prealloc_count > 8) {
4254 ext4_mb_discard_lg_preallocations(sb, lg,
4255 order, lg_prealloc_count);
4256 return;
4257 }
4258 return ;
4259}
4260
4261
4262
4263
4264static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4265{
4266 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4267 struct ext4_prealloc_space *pa = ac->ac_pa;
4268 if (pa) {
4269 if (pa->pa_type == MB_GROUP_PA) {
4270
4271 spin_lock(&pa->pa_lock);
4272 pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4273 pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4274 pa->pa_free -= ac->ac_b_ex.fe_len;
4275 pa->pa_len -= ac->ac_b_ex.fe_len;
4276 spin_unlock(&pa->pa_lock);
4277 }
4278 }
4279 if (pa) {
4280
4281
4282
4283
4284
4285
4286 if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
4287 spin_lock(pa->pa_obj_lock);
4288 list_del_rcu(&pa->pa_inode_list);
4289 spin_unlock(pa->pa_obj_lock);
4290 ext4_mb_add_n_trim(ac);
4291 }
4292 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4293 }
4294 if (ac->ac_bitmap_page)
4295 page_cache_release(ac->ac_bitmap_page);
4296 if (ac->ac_buddy_page)
4297 page_cache_release(ac->ac_buddy_page);
4298 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
4299 mutex_unlock(&ac->ac_lg->lg_mutex);
4300 ext4_mb_collect_stats(ac);
4301 return 0;
4302}
4303
4304static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4305{
4306 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4307 int ret;
4308 int freed = 0;
4309
4310 trace_ext4_mb_discard_preallocations(sb, needed);
4311 for (i = 0; i < ngroups && needed > 0; i++) {
4312 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
4313 freed += ret;
4314 needed -= ret;
4315 }
4316
4317 return freed;
4318}
4319
4320
4321
4322
4323
4324
4325ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4326 struct ext4_allocation_request *ar, int *errp)
4327{
4328 int freed;
4329 struct ext4_allocation_context *ac = NULL;
4330 struct ext4_sb_info *sbi;
4331 struct super_block *sb;
4332 ext4_fsblk_t block = 0;
4333 unsigned int inquota = 0;
4334 unsigned int reserv_clstrs = 0;
4335
4336 might_sleep();
4337 sb = ar->inode->i_sb;
4338 sbi = EXT4_SB(sb);
4339
4340 trace_ext4_request_blocks(ar);
4341
4342
4343 if (IS_NOQUOTA(ar->inode))
4344 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
4345
4346
4347
4348
4349
4350
4351 if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED))
4352 ar->flags |= EXT4_MB_DELALLOC_RESERVED;
4353 else {
4354
4355
4356
4357
4358 while (ar->len &&
4359 ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
4360
4361
4362 cond_resched();
4363 ar->len = ar->len >> 1;
4364 }
4365 if (!ar->len) {
4366 *errp = -ENOSPC;
4367 return 0;
4368 }
4369 reserv_clstrs = ar->len;
4370 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
4371 dquot_alloc_block_nofail(ar->inode,
4372 EXT4_C2B(sbi, ar->len));
4373 } else {
4374 while (ar->len &&
4375 dquot_alloc_block(ar->inode,
4376 EXT4_C2B(sbi, ar->len))) {
4377
4378 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4379 ar->len--;
4380 }
4381 }
4382 inquota = ar->len;
4383 if (ar->len == 0) {
4384 *errp = -EDQUOT;
4385 goto out;
4386 }
4387 }
4388
4389 ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS);
4390 if (!ac) {
4391 ar->len = 0;
4392 *errp = -ENOMEM;
4393 goto out;
4394 }
4395
4396 *errp = ext4_mb_initialize_context(ac, ar);
4397 if (*errp) {
4398 ar->len = 0;
4399 goto out;
4400 }
4401
4402 ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
4403 if (!ext4_mb_use_preallocated(ac)) {
4404 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
4405 ext4_mb_normalize_request(ac, ar);
4406repeat:
4407
4408 *errp = ext4_mb_regular_allocator(ac);
4409 if (*errp)
4410 goto discard_and_exit;
4411
4412
4413
4414
4415 if (ac->ac_status == AC_STATUS_FOUND &&
4416 ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
4417 *errp = ext4_mb_new_preallocation(ac);
4418 if (*errp) {
4419 discard_and_exit:
4420 ext4_discard_allocated_blocks(ac);
4421 goto errout;
4422 }
4423 }
4424 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4425 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
4426 if (*errp == -EAGAIN) {
4427
4428
4429
4430
4431 ext4_mb_release_context(ac);
4432 ac->ac_b_ex.fe_group = 0;
4433 ac->ac_b_ex.fe_start = 0;
4434 ac->ac_b_ex.fe_len = 0;
4435 ac->ac_status = AC_STATUS_CONTINUE;
4436 goto repeat;
4437 } else if (*errp) {
4438 ext4_discard_allocated_blocks(ac);
4439 goto errout;
4440 } else {
4441 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4442 ar->len = ac->ac_b_ex.fe_len;
4443 }
4444 } else {
4445 freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
4446 if (freed)
4447 goto repeat;
4448 *errp = -ENOSPC;
4449 }
4450
4451errout:
4452 if (*errp) {
4453 ac->ac_b_ex.fe_len = 0;
4454 ar->len = 0;
4455 ext4_mb_show_ac(ac);
4456 }
4457 ext4_mb_release_context(ac);
4458out:
4459 if (ac)
4460 kmem_cache_free(ext4_ac_cachep, ac);
4461 if (inquota && ar->len < inquota)
4462 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
4463 if (!ar->len) {
4464 if (!ext4_test_inode_state(ar->inode,
4465 EXT4_STATE_DELALLOC_RESERVED))
4466
4467 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
4468 reserv_clstrs);
4469 }
4470
4471 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
4472
4473 return block;
4474}
4475
4476
4477
4478
4479
4480
4481static int can_merge(struct ext4_free_data *entry1,
4482 struct ext4_free_data *entry2)
4483{
4484 if ((entry1->efd_tid == entry2->efd_tid) &&
4485 (entry1->efd_group == entry2->efd_group) &&
4486 ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster))
4487 return 1;
4488 return 0;
4489}
4490
4491static noinline_for_stack int
4492ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4493 struct ext4_free_data *new_entry)
4494{
4495 ext4_group_t group = e4b->bd_group;
4496 ext4_grpblk_t cluster;
4497 struct ext4_free_data *entry;
4498 struct ext4_group_info *db = e4b->bd_info;
4499 struct super_block *sb = e4b->bd_sb;
4500 struct ext4_sb_info *sbi = EXT4_SB(sb);
4501 struct rb_node **n = &db->bb_free_root.rb_node, *node;
4502 struct rb_node *parent = NULL, *new_node;
4503
4504 BUG_ON(!ext4_handle_valid(handle));
4505 BUG_ON(e4b->bd_bitmap_page == NULL);
4506 BUG_ON(e4b->bd_buddy_page == NULL);
4507
4508 new_node = &new_entry->efd_node;
4509 cluster = new_entry->efd_start_cluster;
4510
4511 if (!*n) {
4512
4513
4514
4515
4516
4517 page_cache_get(e4b->bd_buddy_page);
4518 page_cache_get(e4b->bd_bitmap_page);
4519 }
4520 while (*n) {
4521 parent = *n;
4522 entry = rb_entry(parent, struct ext4_free_data, efd_node);
4523 if (cluster < entry->efd_start_cluster)
4524 n = &(*n)->rb_left;
4525 else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
4526 n = &(*n)->rb_right;
4527 else {
4528 ext4_grp_locked_error(sb, group, 0,
4529 ext4_group_first_block_no(sb, group) +
4530 EXT4_C2B(sbi, cluster),
4531 "Block already on to-be-freed list");
4532 return 0;
4533 }
4534 }
4535
4536 rb_link_node(new_node, parent, n);
4537 rb_insert_color(new_node, &db->bb_free_root);
4538
4539
4540 node = rb_prev(new_node);
4541 if (node) {
4542 entry = rb_entry(node, struct ext4_free_data, efd_node);
4543 if (can_merge(entry, new_entry) &&
4544 ext4_journal_callback_try_del(handle, &entry->efd_jce)) {
4545 new_entry->efd_start_cluster = entry->efd_start_cluster;
4546 new_entry->efd_count += entry->efd_count;
4547 rb_erase(node, &(db->bb_free_root));
4548 kmem_cache_free(ext4_free_data_cachep, entry);
4549 }
4550 }
4551
4552 node = rb_next(new_node);
4553 if (node) {
4554 entry = rb_entry(node, struct ext4_free_data, efd_node);
4555 if (can_merge(new_entry, entry) &&
4556 ext4_journal_callback_try_del(handle, &entry->efd_jce)) {
4557 new_entry->efd_count += entry->efd_count;
4558 rb_erase(node, &(db->bb_free_root));
4559 kmem_cache_free(ext4_free_data_cachep, entry);
4560 }
4561 }
4562
4563 ext4_journal_callback_add(handle, ext4_free_data_callback,
4564 &new_entry->efd_jce);
4565 return 0;
4566}
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576void ext4_free_blocks(handle_t *handle, struct inode *inode,
4577 struct buffer_head *bh, ext4_fsblk_t block,
4578 unsigned long count, int flags)
4579{
4580 struct buffer_head *bitmap_bh = NULL;
4581 struct super_block *sb = inode->i_sb;
4582 struct ext4_group_desc *gdp;
4583 unsigned int overflow;
4584 ext4_grpblk_t bit;
4585 struct buffer_head *gd_bh;
4586 ext4_group_t block_group;
4587 struct ext4_sb_info *sbi;
4588 struct ext4_buddy e4b;
4589 unsigned int count_clusters;
4590 int err = 0;
4591 int ret;
4592
4593 might_sleep();
4594 if (bh) {
4595 if (block)
4596 BUG_ON(block != bh->b_blocknr);
4597 else
4598 block = bh->b_blocknr;
4599 }
4600
4601 sbi = EXT4_SB(sb);
4602 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
4603 !ext4_data_block_valid(sbi, block, count)) {
4604 ext4_error(sb, "Freeing blocks not in datazone - "
4605 "block = %llu, count = %lu", block, count);
4606 goto error_return;
4607 }
4608
4609 ext4_debug("freeing block %llu\n", block);
4610 trace_ext4_free_blocks(inode, block, count, flags);
4611
4612 if (flags & EXT4_FREE_BLOCKS_FORGET) {
4613 struct buffer_head *tbh = bh;
4614 int i;
4615
4616 BUG_ON(bh && (count > 1));
4617
4618 for (i = 0; i < count; i++) {
4619 cond_resched();
4620 if (!bh)
4621 tbh = sb_find_get_block(inode->i_sb,
4622 block + i);
4623 if (!tbh)
4624 continue;
4625 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
4626 inode, tbh, block + i);
4627 }
4628 }
4629
4630
4631
4632
4633
4634
4635
4636
4637 if (!ext4_should_writeback_data(inode))
4638 flags |= EXT4_FREE_BLOCKS_METADATA;
4639
4640
4641
4642
4643
4644
4645
4646
4647 overflow = block & (sbi->s_cluster_ratio - 1);
4648 if (overflow) {
4649 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
4650 overflow = sbi->s_cluster_ratio - overflow;
4651 block += overflow;
4652 if (count > overflow)
4653 count -= overflow;
4654 else
4655 return;
4656 } else {
4657 block -= overflow;
4658 count += overflow;
4659 }
4660 }
4661 overflow = count & (sbi->s_cluster_ratio - 1);
4662 if (overflow) {
4663 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
4664 if (count > overflow)
4665 count -= overflow;
4666 else
4667 return;
4668 } else
4669 count += sbi->s_cluster_ratio - overflow;
4670 }
4671
4672do_more:
4673 overflow = 0;
4674 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4675
4676
4677
4678
4679
4680 if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4681 overflow = EXT4_C2B(sbi, bit) + count -
4682 EXT4_BLOCKS_PER_GROUP(sb);
4683 count -= overflow;
4684 }
4685 count_clusters = EXT4_NUM_B2C(sbi, count);
4686 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4687 if (!bitmap_bh) {
4688 err = -EIO;
4689 goto error_return;
4690 }
4691 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
4692 if (!gdp) {
4693 err = -EIO;
4694 goto error_return;
4695 }
4696
4697 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4698 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
4699 in_range(block, ext4_inode_table(sb, gdp),
4700 EXT4_SB(sb)->s_itb_per_group) ||
4701 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4702 EXT4_SB(sb)->s_itb_per_group)) {
4703
4704 ext4_error(sb, "Freeing blocks in system zone - "
4705 "Block = %llu, count = %lu", block, count);
4706
4707 goto error_return;
4708 }
4709
4710 BUFFER_TRACE(bitmap_bh, "getting write access");
4711 err = ext4_journal_get_write_access(handle, bitmap_bh);
4712 if (err)
4713 goto error_return;
4714
4715
4716
4717
4718
4719
4720 BUFFER_TRACE(gd_bh, "get_write_access");
4721 err = ext4_journal_get_write_access(handle, gd_bh);
4722 if (err)
4723 goto error_return;
4724#ifdef AGGRESSIVE_CHECK
4725 {
4726 int i;
4727 for (i = 0; i < count_clusters; i++)
4728 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4729 }
4730#endif
4731 trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
4732
4733 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4734 if (err)
4735 goto error_return;
4736
4737 if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
4738 struct ext4_free_data *new_entry;
4739
4740
4741
4742
4743 retry:
4744 new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
4745 if (!new_entry) {
4746
4747
4748
4749
4750 cond_resched();
4751 congestion_wait(BLK_RW_ASYNC, HZ/50);
4752 goto retry;
4753 }
4754 new_entry->efd_start_cluster = bit;
4755 new_entry->efd_group = block_group;
4756 new_entry->efd_count = count_clusters;
4757 new_entry->efd_tid = handle->h_transaction->t_tid;
4758
4759 ext4_lock_group(sb, block_group);
4760 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4761 ext4_mb_free_metadata(handle, &e4b, new_entry);
4762 } else {
4763
4764
4765
4766
4767 if (test_opt(sb, DISCARD)) {
4768 err = ext4_issue_discard(sb, block_group, bit, count);
4769 if (err && err != -EOPNOTSUPP)
4770 ext4_msg(sb, KERN_WARNING, "discard request in"
4771 " group:%d block:%d count:%lu failed"
4772 " with %d", block_group, bit, count,
4773 err);
4774 }
4775
4776
4777 ext4_lock_group(sb, block_group);
4778 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4779 mb_free_blocks(inode, &e4b, bit, count_clusters);
4780 }
4781
4782 ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
4783 ext4_free_group_clusters_set(sb, gdp, ret);
4784 ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
4785 ext4_group_desc_csum_set(sb, block_group, gdp);
4786 ext4_unlock_group(sb, block_group);
4787 percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
4788
4789 if (sbi->s_log_groups_per_flex) {
4790 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4791 atomic64_add(count_clusters,
4792 &sbi->s_flex_groups[flex_group].free_clusters);
4793 }
4794
4795 ext4_mb_unload_buddy(&e4b);
4796
4797 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4798 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
4799
4800
4801 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4802 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
4803
4804
4805 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
4806 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
4807 if (!err)
4808 err = ret;
4809
4810 if (overflow && !err) {
4811 block += count;
4812 count = overflow;
4813 put_bh(bitmap_bh);
4814 goto do_more;
4815 }
4816error_return:
4817 brelse(bitmap_bh);
4818 ext4_std_error(sb, err);
4819 return;
4820}
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
4832 ext4_fsblk_t block, unsigned long count)
4833{
4834 struct buffer_head *bitmap_bh = NULL;
4835 struct buffer_head *gd_bh;
4836 ext4_group_t block_group;
4837 ext4_grpblk_t bit;
4838 unsigned int i;
4839 struct ext4_group_desc *desc;
4840 struct ext4_sb_info *sbi = EXT4_SB(sb);
4841 struct ext4_buddy e4b;
4842 int err = 0, ret, blk_free_count;
4843 ext4_grpblk_t blocks_freed;
4844
4845 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
4846
4847 if (count == 0)
4848 return 0;
4849
4850 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4851
4852
4853
4854
4855 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4856 ext4_warning(sb, "too much blocks added to group %u\n",
4857 block_group);
4858 err = -EINVAL;
4859 goto error_return;
4860 }
4861
4862 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4863 if (!bitmap_bh) {
4864 err = -EIO;
4865 goto error_return;
4866 }
4867
4868 desc = ext4_get_group_desc(sb, block_group, &gd_bh);
4869 if (!desc) {
4870 err = -EIO;
4871 goto error_return;
4872 }
4873
4874 if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
4875 in_range(ext4_inode_bitmap(sb, desc), block, count) ||
4876 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
4877 in_range(block + count - 1, ext4_inode_table(sb, desc),
4878 sbi->s_itb_per_group)) {
4879 ext4_error(sb, "Adding blocks in system zones - "
4880 "Block = %llu, count = %lu",
4881 block, count);
4882 err = -EINVAL;
4883 goto error_return;
4884 }
4885
4886 BUFFER_TRACE(bitmap_bh, "getting write access");
4887 err = ext4_journal_get_write_access(handle, bitmap_bh);
4888 if (err)
4889 goto error_return;
4890
4891
4892
4893
4894
4895
4896 BUFFER_TRACE(gd_bh, "get_write_access");
4897 err = ext4_journal_get_write_access(handle, gd_bh);
4898 if (err)
4899 goto error_return;
4900
4901 for (i = 0, blocks_freed = 0; i < count; i++) {
4902 BUFFER_TRACE(bitmap_bh, "clear bit");
4903 if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
4904 ext4_error(sb, "bit already cleared for block %llu",
4905 (ext4_fsblk_t)(block + i));
4906 BUFFER_TRACE(bitmap_bh, "bit already cleared");
4907 } else {
4908 blocks_freed++;
4909 }
4910 }
4911
4912 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4913 if (err)
4914 goto error_return;
4915
4916
4917
4918
4919
4920
4921 ext4_lock_group(sb, block_group);
4922 mb_clear_bits(bitmap_bh->b_data, bit, count);
4923 mb_free_blocks(NULL, &e4b, bit, count);
4924 blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
4925 ext4_free_group_clusters_set(sb, desc, blk_free_count);
4926 ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
4927 ext4_group_desc_csum_set(sb, block_group, desc);
4928 ext4_unlock_group(sb, block_group);
4929 percpu_counter_add(&sbi->s_freeclusters_counter,
4930 EXT4_NUM_B2C(sbi, blocks_freed));
4931
4932 if (sbi->s_log_groups_per_flex) {
4933 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4934 atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed),
4935 &sbi->s_flex_groups[flex_group].free_clusters);
4936 }
4937
4938 ext4_mb_unload_buddy(&e4b);
4939
4940
4941 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4942 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
4943
4944
4945 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
4946 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
4947 if (!err)
4948 err = ret;
4949
4950error_return:
4951 brelse(bitmap_bh);
4952 ext4_std_error(sb, err);
4953 return err;
4954}
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968static int ext4_trim_extent(struct super_block *sb, int start, int count,
4969 ext4_group_t group, struct ext4_buddy *e4b)
4970{
4971 struct ext4_free_extent ex;
4972 int ret = 0;
4973
4974 trace_ext4_trim_extent(sb, group, start, count);
4975
4976 assert_spin_locked(ext4_group_lock_ptr(sb, group));
4977
4978 ex.fe_start = start;
4979 ex.fe_group = group;
4980 ex.fe_len = count;
4981
4982
4983
4984
4985
4986 mb_mark_used(e4b, &ex);
4987 ext4_unlock_group(sb, group);
4988 ret = ext4_issue_discard(sb, group, start, count);
4989 ext4_lock_group(sb, group);
4990 mb_free_blocks(NULL, e4b, start, ex.fe_len);
4991 return ret;
4992}
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012static ext4_grpblk_t
5013ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
5014 ext4_grpblk_t start, ext4_grpblk_t max,
5015 ext4_grpblk_t minblocks)
5016{
5017 void *bitmap;
5018 ext4_grpblk_t next, count = 0, free_count = 0;
5019 struct ext4_buddy e4b;
5020 int ret = 0;
5021
5022 trace_ext4_trim_all_free(sb, group, start, max);
5023
5024 ret = ext4_mb_load_buddy(sb, group, &e4b);
5025 if (ret) {
5026 ext4_error(sb, "Error in loading buddy "
5027 "information for %u", group);
5028 return ret;
5029 }
5030 bitmap = e4b.bd_bitmap;
5031
5032 ext4_lock_group(sb, group);
5033 if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
5034 minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
5035 goto out;
5036
5037 start = (e4b.bd_info->bb_first_free > start) ?
5038 e4b.bd_info->bb_first_free : start;
5039
5040 while (start <= max) {
5041 start = mb_find_next_zero_bit(bitmap, max + 1, start);
5042 if (start > max)
5043 break;
5044 next = mb_find_next_bit(bitmap, max + 1, start);
5045
5046 if ((next - start) >= minblocks) {
5047 ret = ext4_trim_extent(sb, start,
5048 next - start, group, &e4b);
5049 if (ret && ret != -EOPNOTSUPP)
5050 break;
5051 ret = 0;
5052 count += next - start;
5053 }
5054 free_count += next - start;
5055 start = next + 1;
5056
5057 if (fatal_signal_pending(current)) {
5058 count = -ERESTARTSYS;
5059 break;
5060 }
5061
5062 if (need_resched()) {
5063 ext4_unlock_group(sb, group);
5064 cond_resched();
5065 ext4_lock_group(sb, group);
5066 }
5067
5068 if ((e4b.bd_info->bb_free - free_count) < minblocks)
5069 break;
5070 }
5071
5072 if (!ret) {
5073 ret = count;
5074 EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
5075 }
5076out:
5077 ext4_unlock_group(sb, group);
5078 ext4_mb_unload_buddy(&e4b);
5079
5080 ext4_debug("trimmed %d blocks in the group %d\n",
5081 count, group);
5082
5083 return ret;
5084}
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
5099{
5100 struct ext4_group_info *grp;
5101 ext4_group_t group, first_group, last_group;
5102 ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
5103 uint64_t start, end, minlen, trimmed = 0;
5104 ext4_fsblk_t first_data_blk =
5105 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
5106 ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
5107 int ret = 0;
5108
5109 start = range->start >> sb->s_blocksize_bits;
5110 end = start + (range->len >> sb->s_blocksize_bits) - 1;
5111 minlen = EXT4_NUM_B2C(EXT4_SB(sb),
5112 range->minlen >> sb->s_blocksize_bits);
5113
5114 if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) ||
5115 start >= max_blks ||
5116 range->len < sb->s_blocksize)
5117 return -EINVAL;
5118 if (end >= max_blks)
5119 end = max_blks - 1;
5120 if (end <= first_data_blk)
5121 goto out;
5122 if (start < first_data_blk)
5123 start = first_data_blk;
5124
5125
5126 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
5127 &first_group, &first_cluster);
5128 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end,
5129 &last_group, &last_cluster);
5130
5131
5132 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
5133
5134 for (group = first_group; group <= last_group; group++) {
5135 grp = ext4_get_group_info(sb, group);
5136
5137 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
5138 ret = ext4_mb_init_group(sb, group);
5139 if (ret)
5140 break;
5141 }
5142
5143
5144
5145
5146
5147
5148
5149 if (group == last_group)
5150 end = last_cluster;
5151
5152 if (grp->bb_free >= minlen) {
5153 cnt = ext4_trim_all_free(sb, group, first_cluster,
5154 end, minlen);
5155 if (cnt < 0) {
5156 ret = cnt;
5157 break;
5158 }
5159 trimmed += cnt;
5160 }
5161
5162
5163
5164
5165
5166 first_cluster = 0;
5167 }
5168
5169 if (!ret)
5170 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
5171
5172out:
5173 range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
5174 return ret;
5175}
5176