1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41#define DEBUG_SUBSYSTEM S_FILTER
42
43#include <linux/init.h>
44#include <linux/module.h>
45#include <linux/fs.h>
46#include <linux/slab.h>
47#include <linux/pagemap.h>
48#include <ldiskfs/ldiskfs_config.h>
49#include <ext4/ext4.h>
50#include <ext4/ext4_jbd2.h>
51#include <linux/version.h>
52#include <linux/bitops.h>
53#include <linux/quota.h>
54
55#include <linux/libcfs/libcfs.h>
56#include <lustre_fsfilt.h>
57#include <obd.h>
58#include <linux/lustre_compat25.h>
59#include <linux/lprocfs_status.h>
60
61#include <ext4/ext4_extents.h>
62
63#ifdef HAVE_EXT_PBLOCK
64#define ext3_ext_pblock(ex) ext_pblock((ex))
65#endif
66
67
68#define FSFILT_SINGLEDATA_TRANS_BLOCKS(sb) EXT3_SINGLEDATA_TRANS_BLOCKS(sb)
69
70#define fsfilt_ext3_ext_insert_extent(handle, inode, path, newext, flag) \
71 ext3_ext_insert_extent(handle, inode, path, newext, flag)
72
73#define ext3_mb_discard_inode_preallocations(inode) \
74 ext3_discard_preallocations(inode)
75
76#define fsfilt_log_start_commit(journal, tid) jbd2_log_start_commit(journal, tid)
77#define fsfilt_log_wait_commit(journal, tid) jbd2_log_wait_commit(journal, tid)
78
79static struct kmem_cache *fcb_cache;
80
81struct fsfilt_cb_data {
82 struct ext4_journal_cb_entry cb_jcb;
83 fsfilt_cb_t cb_func;
84 struct obd_device *cb_obd;
85 __u64 cb_last_rcvd;
86 void *cb_data;
87};
88
89static char *fsfilt_ext3_get_label(struct super_block *sb)
90{
91 return EXT3_SB(sb)->s_es->s_volume_name;
92}
93
94
95# include <ext4/truncate.h>
96
97
98
99
100
101
102
103static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private,
104 int logs)
105{
106
107 int nblocks = FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb);
108 journal_t *journal;
109 void *handle;
110
111 if (current->journal_info) {
112 CDEBUG(D_INODE, "increasing refcount on %p\n",
113 current->journal_info);
114 goto journal_start;
115 }
116
117 switch(op) {
118 case FSFILT_OP_UNLINK:
119
120 nblocks += EXT3_DELETE_TRANS_BLOCKS(inode->i_sb);
121 nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS +
122 FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb)) * logs;
123 break;
124 case FSFILT_OP_CANCEL_UNLINK:
125 LASSERT(logs == 1);
126
127
128
129
130 nblocks = (LLOG_CHUNK_SIZE >> inode->i_blkbits) +
131 EXT3_DELETE_TRANS_BLOCKS(inode->i_sb) +
132 ext4_blocks_for_truncate(inode) + 3;
133 break;
134 default: CERROR("unknown transaction start op %d\n", op);
135 LBUG();
136 }
137
138 LASSERT(current->journal_info == desc_private);
139 journal = EXT3_SB(inode->i_sb)->s_journal;
140 if (nblocks > journal->j_max_transaction_buffers) {
141 CWARN("too many credits %d for op %ux%u using %d instead\n",
142 nblocks, op, logs, journal->j_max_transaction_buffers);
143 nblocks = journal->j_max_transaction_buffers;
144 }
145
146 journal_start:
147 LASSERTF(nblocks > 0, "can't start %d credit transaction\n", nblocks);
148 handle = ext3_journal_start(inode, nblocks);
149
150 if (!IS_ERR(handle))
151 LASSERT(current->journal_info == handle);
152 else
153 CERROR("error starting handle for op %u (%u credits): rc %ld\n",
154 op, nblocks, PTR_ERR(handle));
155 return handle;
156}
157
158static int fsfilt_ext3_commit(struct inode *inode, void *h, int force_sync)
159{
160 int rc;
161 handle_t *handle = h;
162
163 LASSERT(current->journal_info == handle);
164 if (force_sync)
165 handle->h_sync = 1;
166
167 rc = ext3_journal_stop(handle);
168
169 return rc;
170}
171
172#ifndef EXT3_EXTENTS_FL
173#define EXT3_EXTENTS_FL 0x00080000
174#endif
175
176#ifndef EXT_ASSERT
177#define EXT_ASSERT(cond) BUG_ON(!(cond))
178#endif
179
180#define EXT_GENERATION(inode) (EXT4_I(inode)->i_ext_generation)
181#define ext3_ext_base inode
182#define ext3_ext_base2inode(inode) (inode)
183#define EXT_DEPTH(inode) ext_depth(inode)
184#define fsfilt_ext3_ext_walk_space(inode, block, num, cb, cbdata) \
185 ext3_ext_walk_space(inode, block, num, cb, cbdata);
186
187struct bpointers {
188 unsigned long *blocks;
189 unsigned long start;
190 int num;
191 int init_num;
192 int create;
193};
194
195static long ext3_ext_find_goal(struct inode *inode, struct ext3_ext_path *path,
196 unsigned long block, int *aflags)
197{
198 struct ext3_inode_info *ei = EXT3_I(inode);
199 unsigned long bg_start;
200 unsigned long colour;
201 int depth;
202
203 if (path) {
204 struct ext3_extent *ex;
205 depth = path->p_depth;
206
207
208 if ((ex = path[depth].p_ext))
209 return ext4_ext_pblock(ex) + (block - le32_to_cpu(ex->ee_block));
210
211
212
213 if (path[depth].p_bh)
214 return path[depth].p_bh->b_blocknr;
215 }
216
217
218 bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
219 le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block);
220 colour = (current->pid % 16) *
221 (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16);
222 return bg_start + colour + block;
223}
224
225#define ll_unmap_underlying_metadata(sb, blocknr) \
226 unmap_underlying_metadata((sb)->s_bdev, blocknr)
227
228#ifndef EXT3_MB_HINT_GROUP_ALLOC
229static unsigned long new_blocks(handle_t *handle, struct ext3_ext_base *base,
230 struct ext3_ext_path *path, unsigned long block,
231 unsigned long *count, int *err)
232{
233 unsigned long pblock, goal;
234 int aflags = 0;
235 struct inode *inode = ext3_ext_base2inode(base);
236
237 goal = ext3_ext_find_goal(inode, path, block, &aflags);
238 aflags |= 2;
239 pblock = ext3_mb_new_blocks(handle, inode, goal, count, aflags, err);
240 return pblock;
241
242}
243#else
244static unsigned long new_blocks(handle_t *handle, struct ext3_ext_base *base,
245 struct ext3_ext_path *path, unsigned long block,
246 unsigned long *count, int *err)
247{
248 struct inode *inode = ext3_ext_base2inode(base);
249 struct ext3_allocation_request ar;
250 unsigned long pblock;
251 int aflags;
252
253
254 ar.lleft = block;
255 *err = ext3_ext_search_left(base, path, &ar.lleft, &ar.pleft);
256 if (*err)
257 return 0;
258 ar.lright = block;
259 *err = ext3_ext_search_right(base, path, &ar.lright, &ar.pright);
260 if (*err)
261 return 0;
262
263
264 ar.goal = ext3_ext_find_goal(inode, path, block, &aflags);
265 ar.inode = inode;
266 ar.logical = block;
267 ar.len = *count;
268 ar.flags = EXT3_MB_HINT_DATA;
269 pblock = ext3_mb_new_blocks(handle, &ar, err);
270 *count = ar.len;
271 return pblock;
272}
273#endif
274
275static int ext3_ext_new_extent_cb(struct ext3_ext_base *base,
276 struct ext3_ext_path *path,
277 struct ext3_ext_cache *cex,
278#ifdef HAVE_EXT_PREPARE_CB_EXTENT
279 struct ext3_extent *ex,
280#endif
281 void *cbdata)
282{
283 struct bpointers *bp = cbdata;
284 struct inode *inode = ext3_ext_base2inode(base);
285 struct ext3_extent nex;
286 unsigned long pblock;
287 unsigned long tgen;
288 int err, i;
289 unsigned long count;
290 handle_t *handle;
291
292#ifdef EXT3_EXT_CACHE_EXTENT
293 if (cex->ec_type == EXT3_EXT_CACHE_EXTENT)
294#else
295 if ((cex->ec_len != 0) && (cex->ec_start != 0))
296#endif
297 {
298 err = EXT_CONTINUE;
299 goto map;
300 }
301
302 if (bp->create == 0) {
303 i = 0;
304 if (cex->ec_block < bp->start)
305 i = bp->start - cex->ec_block;
306 if (i >= cex->ec_len)
307 CERROR("nothing to do?! i = %d, e_num = %u\n",
308 i, cex->ec_len);
309 for (; i < cex->ec_len && bp->num; i++) {
310 *(bp->blocks) = 0;
311 bp->blocks++;
312 bp->num--;
313 bp->start++;
314 }
315
316 return EXT_CONTINUE;
317 }
318
319 tgen = EXT_GENERATION(base);
320 count = ext3_ext_calc_credits_for_insert(base, path);
321
322 handle = ext3_journal_start(inode, count+EXT3_ALLOC_NEEDED+1);
323 if (IS_ERR(handle)) {
324 return PTR_ERR(handle);
325 }
326
327 if (tgen != EXT_GENERATION(base)) {
328
329 ext3_journal_stop(handle);
330 return EXT_REPEAT;
331 }
332
333
334
335
336 down_write((&EXT4_I(inode)->i_data_sem));
337
338
339 if (EXT_GENERATION(base) != path[0].p_generation) {
340
341 up_write(&EXT4_I(inode)->i_data_sem);
342 ext3_journal_stop(handle);
343 return EXT_REPEAT;
344 }
345
346 count = cex->ec_len;
347 pblock = new_blocks(handle, base, path, cex->ec_block, &count, &err);
348 if (!pblock)
349 goto out;
350 EXT_ASSERT(count <= cex->ec_len);
351
352
353 nex.ee_block = cpu_to_le32(cex->ec_block);
354 ext3_ext_store_pblock(&nex, pblock);
355 nex.ee_len = cpu_to_le16(count);
356 err = fsfilt_ext3_ext_insert_extent(handle, base, path, &nex, 0);
357 if (err) {
358
359
360
361#ifdef EXT3_MB_HINT_GROUP_ALLOC
362 ext3_mb_discard_inode_preallocations(inode);
363#endif
364#ifdef HAVE_EXT_FREE_BLOCK_WITH_BUFFER_HEAD
365 ext3_free_blocks(handle, inode, NULL, ext4_ext_pblock(&nex),
366 cpu_to_le16(nex.ee_len), 0);
367#else
368 ext3_free_blocks(handle, inode, ext4_ext_pblock(&nex),
369 cpu_to_le16(nex.ee_len), 0);
370#endif
371 goto out;
372 }
373
374
375
376
377
378
379 cex->ec_len = le16_to_cpu(nex.ee_len);
380 cex->ec_start = ext4_ext_pblock(&nex);
381 BUG_ON(le16_to_cpu(nex.ee_len) == 0);
382 BUG_ON(le32_to_cpu(nex.ee_block) != cex->ec_block);
383
384out:
385 up_write((&EXT4_I(inode)->i_data_sem));
386 ext3_journal_stop(handle);
387map:
388 if (err >= 0) {
389
390 if (bp->num == 0) {
391 CERROR("hmm. why do we find this extent?\n");
392 CERROR("initial space: %lu:%u\n",
393 bp->start, bp->init_num);
394#ifdef EXT3_EXT_CACHE_EXTENT
395 CERROR("current extent: %u/%u/%llu %d\n",
396 cex->ec_block, cex->ec_len,
397 (unsigned long long)cex->ec_start,
398 cex->ec_type);
399#else
400 CERROR("current extent: %u/%u/%llu\n",
401 cex->ec_block, cex->ec_len,
402 (unsigned long long)cex->ec_start);
403#endif
404 }
405 i = 0;
406 if (cex->ec_block < bp->start)
407 i = bp->start - cex->ec_block;
408 if (i >= cex->ec_len)
409 CERROR("nothing to do?! i = %d, e_num = %u\n",
410 i, cex->ec_len);
411 for (; i < cex->ec_len && bp->num; i++) {
412 *(bp->blocks) = cex->ec_start + i;
413#ifdef EXT3_EXT_CACHE_EXTENT
414 if (cex->ec_type != EXT3_EXT_CACHE_EXTENT)
415#else
416 if ((cex->ec_len == 0) || (cex->ec_start == 0))
417#endif
418 {
419
420
421 ll_unmap_underlying_metadata(inode->i_sb,
422 *(bp->blocks));
423 }
424 bp->blocks++;
425 bp->num--;
426 bp->start++;
427 }
428 }
429 return err;
430}
431
432int fsfilt_map_nblocks(struct inode *inode, unsigned long block,
433 unsigned long num, unsigned long *blocks,
434 int create)
435{
436 struct ext3_ext_base *base = inode;
437 struct bpointers bp;
438 int err;
439
440 CDEBUG(D_OTHER, "blocks %lu-%lu requested for inode %u\n",
441 block, block + num - 1, (unsigned) inode->i_ino);
442
443 bp.blocks = blocks;
444 bp.start = block;
445 bp.init_num = bp.num = num;
446 bp.create = create;
447
448 err = fsfilt_ext3_ext_walk_space(base, block, num,
449 ext3_ext_new_extent_cb, &bp);
450 ext3_ext_invalidate_cache(base);
451
452 return err;
453}
454
455int fsfilt_ext3_map_ext_inode_pages(struct inode *inode, struct page **page,
456 int pages, unsigned long *blocks,
457 int create)
458{
459 int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits;
460 int rc = 0, i = 0;
461 struct page *fp = NULL;
462 int clen = 0;
463
464 CDEBUG(D_OTHER, "inode %lu: map %d pages from %lu\n",
465 inode->i_ino, pages, (*page)->index);
466
467
468
469 while (i < pages) {
470 if (fp == NULL) {
471
472 fp = *page++;
473 clen = 1;
474 i++;
475 continue;
476 } else if (fp->index + clen == (*page)->index) {
477
478 page++;
479 clen++;
480 i++;
481 continue;
482 }
483
484
485 rc = fsfilt_map_nblocks(inode, fp->index * blocks_per_page,
486 clen * blocks_per_page, blocks,
487 create);
488 if (rc)
489 GOTO(cleanup, rc);
490
491
492 fp = NULL;
493 blocks += blocks_per_page * clen;
494 }
495
496 if (fp)
497 rc = fsfilt_map_nblocks(inode, fp->index * blocks_per_page,
498 clen * blocks_per_page, blocks,
499 create);
500cleanup:
501 return rc;
502}
503
504int fsfilt_ext3_map_bm_inode_pages(struct inode *inode, struct page **page,
505 int pages, unsigned long *blocks,
506 int create)
507{
508 int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits;
509 unsigned long *b;
510 int rc = 0, i;
511
512 for (i = 0, b = blocks; i < pages; i++, page++) {
513 rc = ext3_map_inode_page(inode, *page, b, create);
514 if (rc) {
515 CERROR("ino %lu, blk %lu create %d: rc %d\n",
516 inode->i_ino, *b, create, rc);
517 break;
518 }
519
520 b += blocks_per_page;
521 }
522 return rc;
523}
524
525int fsfilt_ext3_map_inode_pages(struct inode *inode, struct page **page,
526 int pages, unsigned long *blocks,
527 int create, struct mutex *optional_mutex)
528{
529 int rc;
530
531 if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) {
532 rc = fsfilt_ext3_map_ext_inode_pages(inode, page, pages,
533 blocks, create);
534 return rc;
535 }
536 if (optional_mutex != NULL)
537 mutex_lock(optional_mutex);
538 rc = fsfilt_ext3_map_bm_inode_pages(inode, page, pages, blocks, create);
539 if (optional_mutex != NULL)
540 mutex_unlock(optional_mutex);
541
542 return rc;
543}
544
545int fsfilt_ext3_read(struct inode *inode, void *buf, int size, loff_t *offs)
546{
547 unsigned long block;
548 struct buffer_head *bh;
549 int err, blocksize, csize, boffs, osize = size;
550
551
552 spin_lock(&inode->i_lock);
553 if (i_size_read(inode) < *offs + size) {
554 size = i_size_read(inode) - *offs;
555 spin_unlock(&inode->i_lock);
556 if (size < 0) {
557 CDEBUG(D_EXT2, "size %llu is too short for read @%llu\n",
558 i_size_read(inode), *offs);
559 return -EBADR;
560 } else if (size == 0) {
561 return 0;
562 }
563 } else {
564 spin_unlock(&inode->i_lock);
565 }
566
567 blocksize = 1 << inode->i_blkbits;
568
569 while (size > 0) {
570 block = *offs >> inode->i_blkbits;
571 boffs = *offs & (blocksize - 1);
572 csize = min(blocksize - boffs, size);
573 bh = ext3_bread(NULL, inode, block, 0, &err);
574 if (!bh) {
575 CERROR("can't read block: %d\n", err);
576 return err;
577 }
578
579 memcpy(buf, bh->b_data + boffs, csize);
580 brelse(bh);
581
582 *offs += csize;
583 buf += csize;
584 size -= csize;
585 }
586 return osize;
587}
588EXPORT_SYMBOL(fsfilt_ext3_read);
589
590static int fsfilt_ext3_read_record(struct file * file, void *buf,
591 int size, loff_t *offs)
592{
593 int rc;
594 rc = fsfilt_ext3_read(file->f_dentry->d_inode, buf, size, offs);
595 if (rc > 0)
596 rc = 0;
597 return rc;
598}
599
600int fsfilt_ext3_write_handle(struct inode *inode, void *buf, int bufsize,
601 loff_t *offs, handle_t *handle)
602{
603 struct buffer_head *bh = NULL;
604 loff_t old_size = i_size_read(inode), offset = *offs;
605 loff_t new_size = i_size_read(inode);
606 unsigned long block;
607 int err = 0, blocksize = 1 << inode->i_blkbits, size, boffs;
608
609 while (bufsize > 0) {
610 if (bh != NULL)
611 brelse(bh);
612
613 block = offset >> inode->i_blkbits;
614 boffs = offset & (blocksize - 1);
615 size = min(blocksize - boffs, bufsize);
616 bh = ext3_bread(handle, inode, block, 1, &err);
617 if (!bh) {
618 CERROR("can't read/create block: %d\n", err);
619 break;
620 }
621
622 err = ext3_journal_get_write_access(handle, bh);
623 if (err) {
624 CERROR("journal_get_write_access() returned error %d\n",
625 err);
626 break;
627 }
628 LASSERT(bh->b_data + boffs + size <= bh->b_data + bh->b_size);
629 memcpy(bh->b_data + boffs, buf, size);
630 err = ext3_journal_dirty_metadata(handle, bh);
631 if (err) {
632 CERROR("journal_dirty_metadata() returned error %d\n",
633 err);
634 break;
635 }
636 if (offset + size > new_size)
637 new_size = offset + size;
638 offset += size;
639 bufsize -= size;
640 buf += size;
641 }
642 if (bh)
643 brelse(bh);
644
645
646 if (new_size > i_size_read(inode)) {
647 spin_lock(&inode->i_lock);
648 if (new_size > i_size_read(inode))
649 i_size_write(inode, new_size);
650 if (i_size_read(inode) > EXT3_I(inode)->i_disksize)
651 EXT3_I(inode)->i_disksize = i_size_read(inode);
652 if (i_size_read(inode) > old_size) {
653 spin_unlock(&inode->i_lock);
654 mark_inode_dirty(inode);
655 } else {
656 spin_unlock(&inode->i_lock);
657 }
658 }
659
660 if (err == 0)
661 *offs = offset;
662 return err;
663}
664EXPORT_SYMBOL(fsfilt_ext3_write_handle);
665
666static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize,
667 loff_t *offs, int force_sync)
668{
669 struct inode *inode = file->f_dentry->d_inode;
670 handle_t *handle;
671 int err, block_count = 0, blocksize;
672
673
674 blocksize = 1 << inode->i_blkbits;
675 block_count = (*offs & (blocksize - 1)) + bufsize;
676 block_count = (block_count + blocksize - 1) >> inode->i_blkbits;
677
678 handle = ext3_journal_start(inode,
679 block_count * EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + 2);
680 if (IS_ERR(handle)) {
681 CERROR("can't start transaction for %d blocks (%d bytes)\n",
682 block_count * EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + 2,
683 bufsize);
684 return PTR_ERR(handle);
685 }
686
687 err = fsfilt_ext3_write_handle(inode, buf, bufsize, offs, handle);
688
689 if (!err && force_sync)
690 handle->h_sync = 1;
691
692 ext3_journal_stop(handle);
693
694 return err;
695}
696
697static int fsfilt_ext3_setup(struct super_block *sb)
698{
699 if (!EXT3_HAS_COMPAT_FEATURE(sb,
700 EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
701 CERROR("ext3 mounted without journal\n");
702 return -EINVAL;
703 }
704
705#ifdef S_PDIROPS
706 CWARN("Enabling PDIROPS\n");
707 set_opt(EXT3_SB(sb)->s_mount_opt, PDIROPS);
708 sb->s_flags |= S_PDIROPS;
709#endif
710 if (!EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
711 CWARN("filesystem doesn't have dir_index feature enabled\n");
712 return 0;
713}
714static struct fsfilt_operations fsfilt_ext3_ops = {
715 .fs_type = "ext3",
716 .fs_owner = THIS_MODULE,
717 .fs_getlabel = fsfilt_ext3_get_label,
718 .fs_start = fsfilt_ext3_start,
719 .fs_commit = fsfilt_ext3_commit,
720 .fs_map_inode_pages = fsfilt_ext3_map_inode_pages,
721 .fs_write_record = fsfilt_ext3_write_record,
722 .fs_read_record = fsfilt_ext3_read_record,
723 .fs_setup = fsfilt_ext3_setup,
724};
725
726static int __init fsfilt_ext3_init(void)
727{
728 int rc;
729
730 fcb_cache = kmem_cache_create("fsfilt_ext3_fcb",
731 sizeof(struct fsfilt_cb_data), 0, 0);
732 if (!fcb_cache) {
733 CERROR("error allocating fsfilt journal callback cache\n");
734 GOTO(out, rc = -ENOMEM);
735 }
736
737 rc = fsfilt_register_ops(&fsfilt_ext3_ops);
738
739 if (rc) {
740 int err = kmem_cache_destroy(fcb_cache);
741 LASSERTF(err == 0, "error destroying new cache: rc %d\n", err);
742 }
743out:
744 return rc;
745}
746
747static void __exit fsfilt_ext3_exit(void)
748{
749 int rc;
750
751 fsfilt_unregister_ops(&fsfilt_ext3_ops);
752 rc = kmem_cache_destroy(fcb_cache);
753 LASSERTF(rc == 0, "couldn't destroy fcb_cache slab\n");
754}
755
756module_init(fsfilt_ext3_init);
757module_exit(fsfilt_ext3_exit);
758
759MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
760MODULE_DESCRIPTION("Lustre ext3 Filesystem Helper v0.1");
761MODULE_LICENSE("GPL");
762