linux/fs/ext3/super.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/ext3/super.c
   3 *
   4 * Copyright (C) 1992, 1993, 1994, 1995
   5 * Remy Card (card@masi.ibp.fr)
   6 * Laboratoire MASI - Institut Blaise Pascal
   7 * Universite Pierre et Marie Curie (Paris VI)
   8 *
   9 *  from
  10 *
  11 *  linux/fs/minix/inode.c
  12 *
  13 *  Copyright (C) 1991, 1992  Linus Torvalds
  14 *
  15 *  Big-endian to little-endian byte-swapping/bitmaps by
  16 *        David S. Miller (davem@caip.rutgers.edu), 1995
  17 */
  18
  19#include <linux/module.h>
  20#include <linux/blkdev.h>
  21#include <linux/parser.h>
  22#include <linux/exportfs.h>
  23#include <linux/statfs.h>
  24#include <linux/random.h>
  25#include <linux/mount.h>
  26#include <linux/quotaops.h>
  27#include <linux/seq_file.h>
  28#include <linux/log2.h>
  29#include <linux/cleancache.h>
  30
  31#include <asm/uaccess.h>
  32
  33#define CREATE_TRACE_POINTS
  34
  35#include "ext3.h"
  36#include "xattr.h"
  37#include "acl.h"
  38#include "namei.h"
  39
  40#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED
  41  #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA
  42#else
  43  #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_WRITEBACK_DATA
  44#endif
  45
  46static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
  47                             unsigned long journal_devnum);
  48static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
  49                               unsigned int);
  50static int ext3_commit_super(struct super_block *sb,
  51                               struct ext3_super_block *es,
  52                               int sync);
  53static void ext3_mark_recovery_complete(struct super_block * sb,
  54                                        struct ext3_super_block * es);
  55static void ext3_clear_journal_err(struct super_block * sb,
  56                                   struct ext3_super_block * es);
  57static int ext3_sync_fs(struct super_block *sb, int wait);
  58static const char *ext3_decode_error(struct super_block * sb, int errno,
  59                                     char nbuf[16]);
  60static int ext3_remount (struct super_block * sb, int * flags, char * data);
  61static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf);
  62static int ext3_unfreeze(struct super_block *sb);
  63static int ext3_freeze(struct super_block *sb);
  64
  65/*
  66 * Wrappers for journal_start/end.
  67 */
  68handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
  69{
  70        journal_t *journal;
  71
  72        if (sb->s_flags & MS_RDONLY)
  73                return ERR_PTR(-EROFS);
  74
  75        /* Special case here: if the journal has aborted behind our
  76         * backs (eg. EIO in the commit thread), then we still need to
  77         * take the FS itself readonly cleanly. */
  78        journal = EXT3_SB(sb)->s_journal;
  79        if (is_journal_aborted(journal)) {
  80                ext3_abort(sb, __func__,
  81                           "Detected aborted journal");
  82                return ERR_PTR(-EROFS);
  83        }
  84
  85        return journal_start(journal, nblocks);
  86}
  87
  88int __ext3_journal_stop(const char *where, handle_t *handle)
  89{
  90        struct super_block *sb;
  91        int err;
  92        int rc;
  93
  94        sb = handle->h_transaction->t_journal->j_private;
  95        err = handle->h_err;
  96        rc = journal_stop(handle);
  97
  98        if (!err)
  99                err = rc;
 100        if (err)
 101                __ext3_std_error(sb, where, err);
 102        return err;
 103}
 104
 105void ext3_journal_abort_handle(const char *caller, const char *err_fn,
 106                struct buffer_head *bh, handle_t *handle, int err)
 107{
 108        char nbuf[16];
 109        const char *errstr = ext3_decode_error(NULL, err, nbuf);
 110
 111        if (bh)
 112                BUFFER_TRACE(bh, "abort");
 113
 114        if (!handle->h_err)
 115                handle->h_err = err;
 116
 117        if (is_handle_aborted(handle))
 118                return;
 119
 120        printk(KERN_ERR "EXT3-fs: %s: aborting transaction: %s in %s\n",
 121                caller, errstr, err_fn);
 122
 123        journal_abort_handle(handle);
 124}
 125
 126void ext3_msg(struct super_block *sb, const char *prefix,
 127                const char *fmt, ...)
 128{
 129        struct va_format vaf;
 130        va_list args;
 131
 132        va_start(args, fmt);
 133
 134        vaf.fmt = fmt;
 135        vaf.va = &args;
 136
 137        printk("%sEXT3-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
 138
 139        va_end(args);
 140}
 141
 142/* Deal with the reporting of failure conditions on a filesystem such as
 143 * inconsistencies detected or read IO failures.
 144 *
 145 * On ext2, we can store the error state of the filesystem in the
 146 * superblock.  That is not possible on ext3, because we may have other
 147 * write ordering constraints on the superblock which prevent us from
 148 * writing it out straight away; and given that the journal is about to
 149 * be aborted, we can't rely on the current, or future, transactions to
 150 * write out the superblock safely.
 151 *
 152 * We'll just use the journal_abort() error code to record an error in
 153 * the journal instead.  On recovery, the journal will complain about
 154 * that error until we've noted it down and cleared it.
 155 */
 156
 157static void ext3_handle_error(struct super_block *sb)
 158{
 159        struct ext3_super_block *es = EXT3_SB(sb)->s_es;
 160
 161        EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
 162        es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
 163
 164        if (sb->s_flags & MS_RDONLY)
 165                return;
 166
 167        if (!test_opt (sb, ERRORS_CONT)) {
 168                journal_t *journal = EXT3_SB(sb)->s_journal;
 169
 170                set_opt(EXT3_SB(sb)->s_mount_opt, ABORT);
 171                if (journal)
 172                        journal_abort(journal, -EIO);
 173        }
 174        if (test_opt (sb, ERRORS_RO)) {
 175                ext3_msg(sb, KERN_CRIT,
 176                        "error: remounting filesystem read-only");
 177                /*
 178                 * Make sure updated value of ->s_mount_state will be visible
 179                 * before ->s_flags update.
 180                 */
 181                smp_wmb();
 182                sb->s_flags |= MS_RDONLY;
 183        }
 184        ext3_commit_super(sb, es, 1);
 185        if (test_opt(sb, ERRORS_PANIC))
 186                panic("EXT3-fs (%s): panic forced after error\n",
 187                        sb->s_id);
 188}
 189
 190void ext3_error(struct super_block *sb, const char *function,
 191                const char *fmt, ...)
 192{
 193        struct va_format vaf;
 194        va_list args;
 195
 196        va_start(args, fmt);
 197
 198        vaf.fmt = fmt;
 199        vaf.va = &args;
 200
 201        printk(KERN_CRIT "EXT3-fs error (device %s): %s: %pV\n",
 202               sb->s_id, function, &vaf);
 203
 204        va_end(args);
 205
 206        ext3_handle_error(sb);
 207}
 208
 209static const char *ext3_decode_error(struct super_block * sb, int errno,
 210                                     char nbuf[16])
 211{
 212        char *errstr = NULL;
 213
 214        switch (errno) {
 215        case -EIO:
 216                errstr = "IO failure";
 217                break;
 218        case -ENOMEM:
 219                errstr = "Out of memory";
 220                break;
 221        case -EROFS:
 222                if (!sb || EXT3_SB(sb)->s_journal->j_flags & JFS_ABORT)
 223                        errstr = "Journal has aborted";
 224                else
 225                        errstr = "Readonly filesystem";
 226                break;
 227        default:
 228                /* If the caller passed in an extra buffer for unknown
 229                 * errors, textualise them now.  Else we just return
 230                 * NULL. */
 231                if (nbuf) {
 232                        /* Check for truncated error codes... */
 233                        if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
 234                                errstr = nbuf;
 235                }
 236                break;
 237        }
 238
 239        return errstr;
 240}
 241
 242/* __ext3_std_error decodes expected errors from journaling functions
 243 * automatically and invokes the appropriate error response.  */
 244
 245void __ext3_std_error (struct super_block * sb, const char * function,
 246                       int errno)
 247{
 248        char nbuf[16];
 249        const char *errstr;
 250
 251        /* Special case: if the error is EROFS, and we're not already
 252         * inside a transaction, then there's really no point in logging
 253         * an error. */
 254        if (errno == -EROFS && journal_current_handle() == NULL &&
 255            (sb->s_flags & MS_RDONLY))
 256                return;
 257
 258        errstr = ext3_decode_error(sb, errno, nbuf);
 259        ext3_msg(sb, KERN_CRIT, "error in %s: %s", function, errstr);
 260
 261        ext3_handle_error(sb);
 262}
 263
 264/*
 265 * ext3_abort is a much stronger failure handler than ext3_error.  The
 266 * abort function may be used to deal with unrecoverable failures such
 267 * as journal IO errors or ENOMEM at a critical moment in log management.
 268 *
 269 * We unconditionally force the filesystem into an ABORT|READONLY state,
 270 * unless the error response on the fs has been set to panic in which
 271 * case we take the easy way out and panic immediately.
 272 */
 273
 274void ext3_abort(struct super_block *sb, const char *function,
 275                 const char *fmt, ...)
 276{
 277        struct va_format vaf;
 278        va_list args;
 279
 280        va_start(args, fmt);
 281
 282        vaf.fmt = fmt;
 283        vaf.va = &args;
 284
 285        printk(KERN_CRIT "EXT3-fs (%s): error: %s: %pV\n",
 286               sb->s_id, function, &vaf);
 287
 288        va_end(args);
 289
 290        if (test_opt(sb, ERRORS_PANIC))
 291                panic("EXT3-fs: panic from previous error\n");
 292
 293        if (sb->s_flags & MS_RDONLY)
 294                return;
 295
 296        ext3_msg(sb, KERN_CRIT,
 297                "error: remounting filesystem read-only");
 298        EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
 299        set_opt(EXT3_SB(sb)->s_mount_opt, ABORT);
 300        /*
 301         * Make sure updated value of ->s_mount_state will be visible
 302         * before ->s_flags update.
 303         */
 304        smp_wmb();
 305        sb->s_flags |= MS_RDONLY;
 306
 307        if (EXT3_SB(sb)->s_journal)
 308                journal_abort(EXT3_SB(sb)->s_journal, -EIO);
 309}
 310
 311void ext3_warning(struct super_block *sb, const char *function,
 312                  const char *fmt, ...)
 313{
 314        struct va_format vaf;
 315        va_list args;
 316
 317        va_start(args, fmt);
 318
 319        vaf.fmt = fmt;
 320        vaf.va = &args;
 321
 322        printk(KERN_WARNING "EXT3-fs (%s): warning: %s: %pV\n",
 323               sb->s_id, function, &vaf);
 324
 325        va_end(args);
 326}
 327
 328void ext3_update_dynamic_rev(struct super_block *sb)
 329{
 330        struct ext3_super_block *es = EXT3_SB(sb)->s_es;
 331
 332        if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV)
 333                return;
 334
 335        ext3_msg(sb, KERN_WARNING,
 336                "warning: updating to rev %d because of "
 337                "new feature flag, running e2fsck is recommended",
 338                EXT3_DYNAMIC_REV);
 339
 340        es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO);
 341        es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE);
 342        es->s_rev_level = cpu_to_le32(EXT3_DYNAMIC_REV);
 343        /* leave es->s_feature_*compat flags alone */
 344        /* es->s_uuid will be set by e2fsck if empty */
 345
 346        /*
 347         * The rest of the superblock fields should be zero, and if not it
 348         * means they are likely already in use, so leave them alone.  We
 349         * can leave it up to e2fsck to clean up any inconsistencies there.
 350         */
 351}
 352
 353/*
 354 * Open the external journal device
 355 */
 356static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb)
 357{
 358        struct block_device *bdev;
 359        char b[BDEVNAME_SIZE];
 360
 361        bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
 362        if (IS_ERR(bdev))
 363                goto fail;
 364        return bdev;
 365
 366fail:
 367        ext3_msg(sb, KERN_ERR, "error: failed to open journal device %s: %ld",
 368                __bdevname(dev, b), PTR_ERR(bdev));
 369
 370        return NULL;
 371}
 372
 373/*
 374 * Release the journal device
 375 */
 376static void ext3_blkdev_put(struct block_device *bdev)
 377{
 378        blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 379}
 380
 381static void ext3_blkdev_remove(struct ext3_sb_info *sbi)
 382{
 383        struct block_device *bdev;
 384        bdev = sbi->journal_bdev;
 385        if (bdev) {
 386                ext3_blkdev_put(bdev);
 387                sbi->journal_bdev = NULL;
 388        }
 389}
 390
 391static inline struct inode *orphan_list_entry(struct list_head *l)
 392{
 393        return &list_entry(l, struct ext3_inode_info, i_orphan)->vfs_inode;
 394}
 395
 396static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
 397{
 398        struct list_head *l;
 399
 400        ext3_msg(sb, KERN_ERR, "error: sb orphan head is %d",
 401               le32_to_cpu(sbi->s_es->s_last_orphan));
 402
 403        ext3_msg(sb, KERN_ERR, "sb_info orphan list:");
 404        list_for_each(l, &sbi->s_orphan) {
 405                struct inode *inode = orphan_list_entry(l);
 406                ext3_msg(sb, KERN_ERR, "  "
 407                       "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
 408                       inode->i_sb->s_id, inode->i_ino, inode,
 409                       inode->i_mode, inode->i_nlink,
 410                       NEXT_ORPHAN(inode));
 411        }
 412}
 413
 414static void ext3_put_super (struct super_block * sb)
 415{
 416        struct ext3_sb_info *sbi = EXT3_SB(sb);
 417        struct ext3_super_block *es = sbi->s_es;
 418        int i, err;
 419
 420        dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
 421        ext3_xattr_put_super(sb);
 422        err = journal_destroy(sbi->s_journal);
 423        sbi->s_journal = NULL;
 424        if (err < 0)
 425                ext3_abort(sb, __func__, "Couldn't clean up the journal");
 426
 427        if (!(sb->s_flags & MS_RDONLY)) {
 428                EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
 429                es->s_state = cpu_to_le16(sbi->s_mount_state);
 430                BUFFER_TRACE(sbi->s_sbh, "marking dirty");
 431                mark_buffer_dirty(sbi->s_sbh);
 432                ext3_commit_super(sb, es, 1);
 433        }
 434
 435        for (i = 0; i < sbi->s_gdb_count; i++)
 436                brelse(sbi->s_group_desc[i]);
 437        kfree(sbi->s_group_desc);
 438        percpu_counter_destroy(&sbi->s_freeblocks_counter);
 439        percpu_counter_destroy(&sbi->s_freeinodes_counter);
 440        percpu_counter_destroy(&sbi->s_dirs_counter);
 441        brelse(sbi->s_sbh);
 442#ifdef CONFIG_QUOTA
 443        for (i = 0; i < MAXQUOTAS; i++)
 444                kfree(sbi->s_qf_names[i]);
 445#endif
 446
 447        /* Debugging code just in case the in-memory inode orphan list
 448         * isn't empty.  The on-disk one can be non-empty if we've
 449         * detected an error and taken the fs readonly, but the
 450         * in-memory list had better be clean by this point. */
 451        if (!list_empty(&sbi->s_orphan))
 452                dump_orphan_list(sb, sbi);
 453        J_ASSERT(list_empty(&sbi->s_orphan));
 454
 455        invalidate_bdev(sb->s_bdev);
 456        if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
 457                /*
 458                 * Invalidate the journal device's buffers.  We don't want them
 459                 * floating about in memory - the physical journal device may
 460                 * hotswapped, and it breaks the `ro-after' testing code.
 461                 */
 462                sync_blockdev(sbi->journal_bdev);
 463                invalidate_bdev(sbi->journal_bdev);
 464                ext3_blkdev_remove(sbi);
 465        }
 466        sb->s_fs_info = NULL;
 467        kfree(sbi->s_blockgroup_lock);
 468        kfree(sbi);
 469}
 470
 471static struct kmem_cache *ext3_inode_cachep;
 472
 473/*
 474 * Called inside transaction, so use GFP_NOFS
 475 */
 476static struct inode *ext3_alloc_inode(struct super_block *sb)
 477{
 478        struct ext3_inode_info *ei;
 479
 480        ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS);
 481        if (!ei)
 482                return NULL;
 483        ei->i_block_alloc_info = NULL;
 484        ei->vfs_inode.i_version = 1;
 485        atomic_set(&ei->i_datasync_tid, 0);
 486        atomic_set(&ei->i_sync_tid, 0);
 487        return &ei->vfs_inode;
 488}
 489
 490static int ext3_drop_inode(struct inode *inode)
 491{
 492        int drop = generic_drop_inode(inode);
 493
 494        trace_ext3_drop_inode(inode, drop);
 495        return drop;
 496}
 497
 498static void ext3_i_callback(struct rcu_head *head)
 499{
 500        struct inode *inode = container_of(head, struct inode, i_rcu);
 501        kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
 502}
 503
 504static void ext3_destroy_inode(struct inode *inode)
 505{
 506        if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
 507                printk("EXT3 Inode %p: orphan list check failed!\n",
 508                        EXT3_I(inode));
 509                print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
 510                                EXT3_I(inode), sizeof(struct ext3_inode_info),
 511                                false);
 512                dump_stack();
 513        }
 514        call_rcu(&inode->i_rcu, ext3_i_callback);
 515}
 516
 517static void init_once(void *foo)
 518{
 519        struct ext3_inode_info *ei = (struct ext3_inode_info *) foo;
 520
 521        INIT_LIST_HEAD(&ei->i_orphan);
 522#ifdef CONFIG_EXT3_FS_XATTR
 523        init_rwsem(&ei->xattr_sem);
 524#endif
 525        mutex_init(&ei->truncate_mutex);
 526        inode_init_once(&ei->vfs_inode);
 527}
 528
 529static int init_inodecache(void)
 530{
 531        ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
 532                                             sizeof(struct ext3_inode_info),
 533                                             0, (SLAB_RECLAIM_ACCOUNT|
 534                                                SLAB_MEM_SPREAD),
 535                                             init_once);
 536        if (ext3_inode_cachep == NULL)
 537                return -ENOMEM;
 538        return 0;
 539}
 540
 541static void destroy_inodecache(void)
 542{
 543        /*
 544         * Make sure all delayed rcu free inodes are flushed before we
 545         * destroy cache.
 546         */
 547        rcu_barrier();
 548        kmem_cache_destroy(ext3_inode_cachep);
 549}
 550
 551static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb)
 552{
 553#if defined(CONFIG_QUOTA)
 554        struct ext3_sb_info *sbi = EXT3_SB(sb);
 555
 556        if (sbi->s_jquota_fmt) {
 557                char *fmtname = "";
 558
 559                switch (sbi->s_jquota_fmt) {
 560                case QFMT_VFS_OLD:
 561                        fmtname = "vfsold";
 562                        break;
 563                case QFMT_VFS_V0:
 564                        fmtname = "vfsv0";
 565                        break;
 566                case QFMT_VFS_V1:
 567                        fmtname = "vfsv1";
 568                        break;
 569                }
 570                seq_printf(seq, ",jqfmt=%s", fmtname);
 571        }
 572
 573        if (sbi->s_qf_names[USRQUOTA])
 574                seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
 575
 576        if (sbi->s_qf_names[GRPQUOTA])
 577                seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
 578
 579        if (test_opt(sb, USRQUOTA))
 580                seq_puts(seq, ",usrquota");
 581
 582        if (test_opt(sb, GRPQUOTA))
 583                seq_puts(seq, ",grpquota");
 584#endif
 585}
 586
 587static char *data_mode_string(unsigned long mode)
 588{
 589        switch (mode) {
 590        case EXT3_MOUNT_JOURNAL_DATA:
 591                return "journal";
 592        case EXT3_MOUNT_ORDERED_DATA:
 593                return "ordered";
 594        case EXT3_MOUNT_WRITEBACK_DATA:
 595                return "writeback";
 596        }
 597        return "unknown";
 598}
 599
 600/*
 601 * Show an option if
 602 *  - it's set to a non-default value OR
 603 *  - if the per-sb default is different from the global default
 604 */
 605static int ext3_show_options(struct seq_file *seq, struct dentry *root)
 606{
 607        struct super_block *sb = root->d_sb;
 608        struct ext3_sb_info *sbi = EXT3_SB(sb);
 609        struct ext3_super_block *es = sbi->s_es;
 610        unsigned long def_mount_opts;
 611
 612        def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
 613
 614        if (sbi->s_sb_block != 1)
 615                seq_printf(seq, ",sb=%lu", sbi->s_sb_block);
 616        if (test_opt(sb, MINIX_DF))
 617                seq_puts(seq, ",minixdf");
 618        if (test_opt(sb, GRPID))
 619                seq_puts(seq, ",grpid");
 620        if (!test_opt(sb, GRPID) && (def_mount_opts & EXT3_DEFM_BSDGROUPS))
 621                seq_puts(seq, ",nogrpid");
 622        if (!uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT3_DEF_RESUID)) ||
 623            le16_to_cpu(es->s_def_resuid) != EXT3_DEF_RESUID) {
 624                seq_printf(seq, ",resuid=%u",
 625                                from_kuid_munged(&init_user_ns, sbi->s_resuid));
 626        }
 627        if (!gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT3_DEF_RESGID)) ||
 628            le16_to_cpu(es->s_def_resgid) != EXT3_DEF_RESGID) {
 629                seq_printf(seq, ",resgid=%u",
 630                                from_kgid_munged(&init_user_ns, sbi->s_resgid));
 631        }
 632        if (test_opt(sb, ERRORS_RO)) {
 633                int def_errors = le16_to_cpu(es->s_errors);
 634
 635                if (def_errors == EXT3_ERRORS_PANIC ||
 636                    def_errors == EXT3_ERRORS_CONTINUE) {
 637                        seq_puts(seq, ",errors=remount-ro");
 638                }
 639        }
 640        if (test_opt(sb, ERRORS_CONT))
 641                seq_puts(seq, ",errors=continue");
 642        if (test_opt(sb, ERRORS_PANIC))
 643                seq_puts(seq, ",errors=panic");
 644        if (test_opt(sb, NO_UID32))
 645                seq_puts(seq, ",nouid32");
 646        if (test_opt(sb, DEBUG))
 647                seq_puts(seq, ",debug");
 648#ifdef CONFIG_EXT3_FS_XATTR
 649        if (test_opt(sb, XATTR_USER))
 650                seq_puts(seq, ",user_xattr");
 651        if (!test_opt(sb, XATTR_USER) &&
 652            (def_mount_opts & EXT3_DEFM_XATTR_USER)) {
 653                seq_puts(seq, ",nouser_xattr");
 654        }
 655#endif
 656#ifdef CONFIG_EXT3_FS_POSIX_ACL
 657        if (test_opt(sb, POSIX_ACL))
 658                seq_puts(seq, ",acl");
 659        if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT3_DEFM_ACL))
 660                seq_puts(seq, ",noacl");
 661#endif
 662        if (!test_opt(sb, RESERVATION))
 663                seq_puts(seq, ",noreservation");
 664        if (sbi->s_commit_interval) {
 665                seq_printf(seq, ",commit=%u",
 666                           (unsigned) (sbi->s_commit_interval / HZ));
 667        }
 668
 669        /*
 670         * Always display barrier state so it's clear what the status is.
 671         */
 672        seq_puts(seq, ",barrier=");
 673        seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
 674        seq_printf(seq, ",data=%s", data_mode_string(test_opt(sb, DATA_FLAGS)));
 675        if (test_opt(sb, DATA_ERR_ABORT))
 676                seq_puts(seq, ",data_err=abort");
 677
 678        if (test_opt(sb, NOLOAD))
 679                seq_puts(seq, ",norecovery");
 680
 681        ext3_show_quota_options(seq, sb);
 682
 683        return 0;
 684}
 685
 686
 687static struct inode *ext3_nfs_get_inode(struct super_block *sb,
 688                u64 ino, u32 generation)
 689{
 690        struct inode *inode;
 691
 692        if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO)
 693                return ERR_PTR(-ESTALE);
 694        if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count))
 695                return ERR_PTR(-ESTALE);
 696
 697        /* iget isn't really right if the inode is currently unallocated!!
 698         *
 699         * ext3_read_inode will return a bad_inode if the inode had been
 700         * deleted, so we should be safe.
 701         *
 702         * Currently we don't know the generation for parent directory, so
 703         * a generation of 0 means "accept any"
 704         */
 705        inode = ext3_iget(sb, ino);
 706        if (IS_ERR(inode))
 707                return ERR_CAST(inode);
 708        if (generation && inode->i_generation != generation) {
 709                iput(inode);
 710                return ERR_PTR(-ESTALE);
 711        }
 712
 713        return inode;
 714}
 715
 716static struct dentry *ext3_fh_to_dentry(struct super_block *sb, struct fid *fid,
 717                int fh_len, int fh_type)
 718{
 719        return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
 720                                    ext3_nfs_get_inode);
 721}
 722
 723static struct dentry *ext3_fh_to_parent(struct super_block *sb, struct fid *fid,
 724                int fh_len, int fh_type)
 725{
 726        return generic_fh_to_parent(sb, fid, fh_len, fh_type,
 727                                    ext3_nfs_get_inode);
 728}
 729
 730/*
 731 * Try to release metadata pages (indirect blocks, directories) which are
 732 * mapped via the block device.  Since these pages could have journal heads
 733 * which would prevent try_to_free_buffers() from freeing them, we must use
 734 * jbd layer's try_to_free_buffers() function to release them.
 735 */
 736static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
 737                                 gfp_t wait)
 738{
 739        journal_t *journal = EXT3_SB(sb)->s_journal;
 740
 741        WARN_ON(PageChecked(page));
 742        if (!page_has_buffers(page))
 743                return 0;
 744        if (journal)
 745                return journal_try_to_free_buffers(journal, page, 
 746                                                   wait & ~__GFP_WAIT);
 747        return try_to_free_buffers(page);
 748}
 749
 750#ifdef CONFIG_QUOTA
 751#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
 752#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
 753
 754static int ext3_write_dquot(struct dquot *dquot);
 755static int ext3_acquire_dquot(struct dquot *dquot);
 756static int ext3_release_dquot(struct dquot *dquot);
 757static int ext3_mark_dquot_dirty(struct dquot *dquot);
 758static int ext3_write_info(struct super_block *sb, int type);
 759static int ext3_quota_on(struct super_block *sb, int type, int format_id,
 760                         struct path *path);
 761static int ext3_quota_on_mount(struct super_block *sb, int type);
 762static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
 763                               size_t len, loff_t off);
 764static ssize_t ext3_quota_write(struct super_block *sb, int type,
 765                                const char *data, size_t len, loff_t off);
 766
 767static const struct dquot_operations ext3_quota_operations = {
 768        .write_dquot    = ext3_write_dquot,
 769        .acquire_dquot  = ext3_acquire_dquot,
 770        .release_dquot  = ext3_release_dquot,
 771        .mark_dirty     = ext3_mark_dquot_dirty,
 772        .write_info     = ext3_write_info,
 773        .alloc_dquot    = dquot_alloc,
 774        .destroy_dquot  = dquot_destroy,
 775};
 776
 777static const struct quotactl_ops ext3_qctl_operations = {
 778        .quota_on       = ext3_quota_on,
 779        .quota_off      = dquot_quota_off,
 780        .quota_sync     = dquot_quota_sync,
 781        .get_info       = dquot_get_dqinfo,
 782        .set_info       = dquot_set_dqinfo,
 783        .get_dqblk      = dquot_get_dqblk,
 784        .set_dqblk      = dquot_set_dqblk
 785};
 786#endif
 787
 788static const struct super_operations ext3_sops = {
 789        .alloc_inode    = ext3_alloc_inode,
 790        .destroy_inode  = ext3_destroy_inode,
 791        .write_inode    = ext3_write_inode,
 792        .dirty_inode    = ext3_dirty_inode,
 793        .drop_inode     = ext3_drop_inode,
 794        .evict_inode    = ext3_evict_inode,
 795        .put_super      = ext3_put_super,
 796        .sync_fs        = ext3_sync_fs,
 797        .freeze_fs      = ext3_freeze,
 798        .unfreeze_fs    = ext3_unfreeze,
 799        .statfs         = ext3_statfs,
 800        .remount_fs     = ext3_remount,
 801        .show_options   = ext3_show_options,
 802#ifdef CONFIG_QUOTA
 803        .quota_read     = ext3_quota_read,
 804        .quota_write    = ext3_quota_write,
 805#endif
 806        .bdev_try_to_free_page = bdev_try_to_free_page,
 807};
 808
 809static const struct export_operations ext3_export_ops = {
 810        .fh_to_dentry = ext3_fh_to_dentry,
 811        .fh_to_parent = ext3_fh_to_parent,
 812        .get_parent = ext3_get_parent,
 813};
 814
 815enum {
 816        Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
 817        Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
 818        Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
 819        Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
 820        Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
 821        Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
 822        Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
 823        Opt_data_err_abort, Opt_data_err_ignore,
 824        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
 825        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
 826        Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
 827        Opt_resize, Opt_usrquota, Opt_grpquota
 828};
 829
 830static const match_table_t tokens = {
 831        {Opt_bsd_df, "bsddf"},
 832        {Opt_minix_df, "minixdf"},
 833        {Opt_grpid, "grpid"},
 834        {Opt_grpid, "bsdgroups"},
 835        {Opt_nogrpid, "nogrpid"},
 836        {Opt_nogrpid, "sysvgroups"},
 837        {Opt_resgid, "resgid=%u"},
 838        {Opt_resuid, "resuid=%u"},
 839        {Opt_sb, "sb=%u"},
 840        {Opt_err_cont, "errors=continue"},
 841        {Opt_err_panic, "errors=panic"},
 842        {Opt_err_ro, "errors=remount-ro"},
 843        {Opt_nouid32, "nouid32"},
 844        {Opt_nocheck, "nocheck"},
 845        {Opt_nocheck, "check=none"},
 846        {Opt_debug, "debug"},
 847        {Opt_oldalloc, "oldalloc"},
 848        {Opt_orlov, "orlov"},
 849        {Opt_user_xattr, "user_xattr"},
 850        {Opt_nouser_xattr, "nouser_xattr"},
 851        {Opt_acl, "acl"},
 852        {Opt_noacl, "noacl"},
 853        {Opt_reservation, "reservation"},
 854        {Opt_noreservation, "noreservation"},
 855        {Opt_noload, "noload"},
 856        {Opt_noload, "norecovery"},
 857        {Opt_nobh, "nobh"},
 858        {Opt_bh, "bh"},
 859        {Opt_commit, "commit=%u"},
 860        {Opt_journal_update, "journal=update"},
 861        {Opt_journal_inum, "journal=%u"},
 862        {Opt_journal_dev, "journal_dev=%u"},
 863        {Opt_abort, "abort"},
 864        {Opt_data_journal, "data=journal"},
 865        {Opt_data_ordered, "data=ordered"},
 866        {Opt_data_writeback, "data=writeback"},
 867        {Opt_data_err_abort, "data_err=abort"},
 868        {Opt_data_err_ignore, "data_err=ignore"},
 869        {Opt_offusrjquota, "usrjquota="},
 870        {Opt_usrjquota, "usrjquota=%s"},
 871        {Opt_offgrpjquota, "grpjquota="},
 872        {Opt_grpjquota, "grpjquota=%s"},
 873        {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
 874        {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
 875        {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
 876        {Opt_grpquota, "grpquota"},
 877        {Opt_noquota, "noquota"},
 878        {Opt_quota, "quota"},
 879        {Opt_usrquota, "usrquota"},
 880        {Opt_barrier, "barrier=%u"},
 881        {Opt_barrier, "barrier"},
 882        {Opt_nobarrier, "nobarrier"},
 883        {Opt_resize, "resize"},
 884        {Opt_err, NULL},
 885};
 886
 887static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb)
 888{
 889        ext3_fsblk_t    sb_block;
 890        char            *options = (char *) *data;
 891
 892        if (!options || strncmp(options, "sb=", 3) != 0)
 893                return 1;       /* Default location */
 894        options += 3;
 895        /*todo: use simple_strtoll with >32bit ext3 */
 896        sb_block = simple_strtoul(options, &options, 0);
 897        if (*options && *options != ',') {
 898                ext3_msg(sb, KERN_ERR, "error: invalid sb specification: %s",
 899                       (char *) *data);
 900                return 1;
 901        }
 902        if (*options == ',')
 903                options++;
 904        *data = (void *) options;
 905        return sb_block;
 906}
 907
 908#ifdef CONFIG_QUOTA
 909static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
 910{
 911        struct ext3_sb_info *sbi = EXT3_SB(sb);
 912        char *qname;
 913
 914        if (sb_any_quota_loaded(sb) &&
 915                !sbi->s_qf_names[qtype]) {
 916                ext3_msg(sb, KERN_ERR,
 917                        "Cannot change journaled "
 918                        "quota options when quota turned on");
 919                return 0;
 920        }
 921        qname = match_strdup(args);
 922        if (!qname) {
 923                ext3_msg(sb, KERN_ERR,
 924                        "Not enough memory for storing quotafile name");
 925                return 0;
 926        }
 927        if (sbi->s_qf_names[qtype]) {
 928                int same = !strcmp(sbi->s_qf_names[qtype], qname);
 929
 930                kfree(qname);
 931                if (!same) {
 932                        ext3_msg(sb, KERN_ERR,
 933                                 "%s quota file already specified",
 934                                 QTYPE2NAME(qtype));
 935                }
 936                return same;
 937        }
 938        if (strchr(qname, '/')) {
 939                ext3_msg(sb, KERN_ERR,
 940                        "quotafile must be on filesystem root");
 941                kfree(qname);
 942                return 0;
 943        }
 944        sbi->s_qf_names[qtype] = qname;
 945        set_opt(sbi->s_mount_opt, QUOTA);
 946        return 1;
 947}
 948
 949static int clear_qf_name(struct super_block *sb, int qtype) {
 950
 951        struct ext3_sb_info *sbi = EXT3_SB(sb);
 952
 953        if (sb_any_quota_loaded(sb) &&
 954                sbi->s_qf_names[qtype]) {
 955                ext3_msg(sb, KERN_ERR, "Cannot change journaled quota options"
 956                        " when quota turned on");
 957                return 0;
 958        }
 959        if (sbi->s_qf_names[qtype]) {
 960                kfree(sbi->s_qf_names[qtype]);
 961                sbi->s_qf_names[qtype] = NULL;
 962        }
 963        return 1;
 964}
 965#endif
 966
 967static int parse_options (char *options, struct super_block *sb,
 968                          unsigned int *inum, unsigned long *journal_devnum,
 969                          ext3_fsblk_t *n_blocks_count, int is_remount)
 970{
 971        struct ext3_sb_info *sbi = EXT3_SB(sb);
 972        char * p;
 973        substring_t args[MAX_OPT_ARGS];
 974        int data_opt = 0;
 975        int option;
 976        kuid_t uid;
 977        kgid_t gid;
 978#ifdef CONFIG_QUOTA
 979        int qfmt;
 980#endif
 981
 982        if (!options)
 983                return 1;
 984
 985        while ((p = strsep (&options, ",")) != NULL) {
 986                int token;
 987                if (!*p)
 988                        continue;
 989                /*
 990                 * Initialize args struct so we know whether arg was
 991                 * found; some options take optional arguments.
 992                 */
 993                args[0].to = args[0].from = NULL;
 994                token = match_token(p, tokens, args);
 995                switch (token) {
 996                case Opt_bsd_df:
 997                        clear_opt (sbi->s_mount_opt, MINIX_DF);
 998                        break;
 999                case Opt_minix_df:
1000                        set_opt (sbi->s_mount_opt, MINIX_DF);
1001                        break;
1002                case Opt_grpid:
1003                        set_opt (sbi->s_mount_opt, GRPID);
1004                        break;
1005                case Opt_nogrpid:
1006                        clear_opt (sbi->s_mount_opt, GRPID);
1007                        break;
1008                case Opt_resuid:
1009                        if (match_int(&args[0], &option))
1010                                return 0;
1011                        uid = make_kuid(current_user_ns(), option);
1012                        if (!uid_valid(uid)) {
1013                                ext3_msg(sb, KERN_ERR, "Invalid uid value %d", option);
1014                                return 0;
1015
1016                        }
1017                        sbi->s_resuid = uid;
1018                        break;
1019                case Opt_resgid:
1020                        if (match_int(&args[0], &option))
1021                                return 0;
1022                        gid = make_kgid(current_user_ns(), option);
1023                        if (!gid_valid(gid)) {
1024                                ext3_msg(sb, KERN_ERR, "Invalid gid value %d", option);
1025                                return 0;
1026                        }
1027                        sbi->s_resgid = gid;
1028                        break;
1029                case Opt_sb:
1030                        /* handled by get_sb_block() instead of here */
1031                        /* *sb_block = match_int(&args[0]); */
1032                        break;
1033                case Opt_err_panic:
1034                        clear_opt (sbi->s_mount_opt, ERRORS_CONT);
1035                        clear_opt (sbi->s_mount_opt, ERRORS_RO);
1036                        set_opt (sbi->s_mount_opt, ERRORS_PANIC);
1037                        break;
1038                case Opt_err_ro:
1039                        clear_opt (sbi->s_mount_opt, ERRORS_CONT);
1040                        clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
1041                        set_opt (sbi->s_mount_opt, ERRORS_RO);
1042                        break;
1043                case Opt_err_cont:
1044                        clear_opt (sbi->s_mount_opt, ERRORS_RO);
1045                        clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
1046                        set_opt (sbi->s_mount_opt, ERRORS_CONT);
1047                        break;
1048                case Opt_nouid32:
1049                        set_opt (sbi->s_mount_opt, NO_UID32);
1050                        break;
1051                case Opt_nocheck:
1052                        clear_opt (sbi->s_mount_opt, CHECK);
1053                        break;
1054                case Opt_debug:
1055                        set_opt (sbi->s_mount_opt, DEBUG);
1056                        break;
1057                case Opt_oldalloc:
1058                        ext3_msg(sb, KERN_WARNING,
1059                                "Ignoring deprecated oldalloc option");
1060                        break;
1061                case Opt_orlov:
1062                        ext3_msg(sb, KERN_WARNING,
1063                                "Ignoring deprecated orlov option");
1064                        break;
1065#ifdef CONFIG_EXT3_FS_XATTR
1066                case Opt_user_xattr:
1067                        set_opt (sbi->s_mount_opt, XATTR_USER);
1068                        break;
1069                case Opt_nouser_xattr:
1070                        clear_opt (sbi->s_mount_opt, XATTR_USER);
1071                        break;
1072#else
1073                case Opt_user_xattr:
1074                case Opt_nouser_xattr:
1075                        ext3_msg(sb, KERN_INFO,
1076                                "(no)user_xattr options not supported");
1077                        break;
1078#endif
1079#ifdef CONFIG_EXT3_FS_POSIX_ACL
1080                case Opt_acl:
1081                        set_opt(sbi->s_mount_opt, POSIX_ACL);
1082                        break;
1083                case Opt_noacl:
1084                        clear_opt(sbi->s_mount_opt, POSIX_ACL);
1085                        break;
1086#else
1087                case Opt_acl:
1088                case Opt_noacl:
1089                        ext3_msg(sb, KERN_INFO,
1090                                "(no)acl options not supported");
1091                        break;
1092#endif
1093                case Opt_reservation:
1094                        set_opt(sbi->s_mount_opt, RESERVATION);
1095                        break;
1096                case Opt_noreservation:
1097                        clear_opt(sbi->s_mount_opt, RESERVATION);
1098                        break;
1099                case Opt_journal_update:
1100                        /* @@@ FIXME */
1101                        /* Eventually we will want to be able to create
1102                           a journal file here.  For now, only allow the
1103                           user to specify an existing inode to be the
1104                           journal file. */
1105                        if (is_remount) {
1106                                ext3_msg(sb, KERN_ERR, "error: cannot specify "
1107                                        "journal on remount");
1108                                return 0;
1109                        }
1110                        set_opt (sbi->s_mount_opt, UPDATE_JOURNAL);
1111                        break;
1112                case Opt_journal_inum:
1113                        if (is_remount) {
1114                                ext3_msg(sb, KERN_ERR, "error: cannot specify "
1115                                       "journal on remount");
1116                                return 0;
1117                        }
1118                        if (match_int(&args[0], &option))
1119                                return 0;
1120                        *inum = option;
1121                        break;
1122                case Opt_journal_dev:
1123                        if (is_remount) {
1124                                ext3_msg(sb, KERN_ERR, "error: cannot specify "
1125                                       "journal on remount");
1126                                return 0;
1127                        }
1128                        if (match_int(&args[0], &option))
1129                                return 0;
1130                        *journal_devnum = option;
1131                        break;
1132                case Opt_noload:
1133                        set_opt (sbi->s_mount_opt, NOLOAD);
1134                        break;
1135                case Opt_commit:
1136                        if (match_int(&args[0], &option))
1137                                return 0;
1138                        if (option < 0)
1139                                return 0;
1140                        if (option == 0)
1141                                option = JBD_DEFAULT_MAX_COMMIT_AGE;
1142                        sbi->s_commit_interval = HZ * option;
1143                        break;
1144                case Opt_data_journal:
1145                        data_opt = EXT3_MOUNT_JOURNAL_DATA;
1146                        goto datacheck;
1147                case Opt_data_ordered:
1148                        data_opt = EXT3_MOUNT_ORDERED_DATA;
1149                        goto datacheck;
1150                case Opt_data_writeback:
1151                        data_opt = EXT3_MOUNT_WRITEBACK_DATA;
1152                datacheck:
1153                        if (is_remount) {
1154                                if (test_opt(sb, DATA_FLAGS) == data_opt)
1155                                        break;
1156                                ext3_msg(sb, KERN_ERR,
1157                                        "error: cannot change "
1158                                        "data mode on remount. The filesystem "
1159                                        "is mounted in data=%s mode and you "
1160                                        "try to remount it in data=%s mode.",
1161                                        data_mode_string(test_opt(sb,
1162                                                        DATA_FLAGS)),
1163                                        data_mode_string(data_opt));
1164                                return 0;
1165                        } else {
1166                                clear_opt(sbi->s_mount_opt, DATA_FLAGS);
1167                                sbi->s_mount_opt |= data_opt;
1168                        }
1169                        break;
1170                case Opt_data_err_abort:
1171                        set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1172                        break;
1173                case Opt_data_err_ignore:
1174                        clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1175                        break;
1176#ifdef CONFIG_QUOTA
1177                case Opt_usrjquota:
1178                        if (!set_qf_name(sb, USRQUOTA, &args[0]))
1179                                return 0;
1180                        break;
1181                case Opt_grpjquota:
1182                        if (!set_qf_name(sb, GRPQUOTA, &args[0]))
1183                                return 0;
1184                        break;
1185                case Opt_offusrjquota:
1186                        if (!clear_qf_name(sb, USRQUOTA))
1187                                return 0;
1188                        break;
1189                case Opt_offgrpjquota:
1190                        if (!clear_qf_name(sb, GRPQUOTA))
1191                                return 0;
1192                        break;
1193                case Opt_jqfmt_vfsold:
1194                        qfmt = QFMT_VFS_OLD;
1195                        goto set_qf_format;
1196                case Opt_jqfmt_vfsv0:
1197                        qfmt = QFMT_VFS_V0;
1198                        goto set_qf_format;
1199                case Opt_jqfmt_vfsv1:
1200                        qfmt = QFMT_VFS_V1;
1201set_qf_format:
1202                        if (sb_any_quota_loaded(sb) &&
1203                            sbi->s_jquota_fmt != qfmt) {
1204                                ext3_msg(sb, KERN_ERR, "error: cannot change "
1205                                        "journaled quota options when "
1206                                        "quota turned on.");
1207                                return 0;
1208                        }
1209                        sbi->s_jquota_fmt = qfmt;
1210                        break;
1211                case Opt_quota:
1212                case Opt_usrquota:
1213                        set_opt(sbi->s_mount_opt, QUOTA);
1214                        set_opt(sbi->s_mount_opt, USRQUOTA);
1215                        break;
1216                case Opt_grpquota:
1217                        set_opt(sbi->s_mount_opt, QUOTA);
1218                        set_opt(sbi->s_mount_opt, GRPQUOTA);
1219                        break;
1220                case Opt_noquota:
1221                        if (sb_any_quota_loaded(sb)) {
1222                                ext3_msg(sb, KERN_ERR, "error: cannot change "
1223                                        "quota options when quota turned on.");
1224                                return 0;
1225                        }
1226                        clear_opt(sbi->s_mount_opt, QUOTA);
1227                        clear_opt(sbi->s_mount_opt, USRQUOTA);
1228                        clear_opt(sbi->s_mount_opt, GRPQUOTA);
1229                        break;
1230#else
1231                case Opt_quota:
1232                case Opt_usrquota:
1233                case Opt_grpquota:
1234                        ext3_msg(sb, KERN_ERR,
1235                                "error: quota options not supported.");
1236                        break;
1237                case Opt_usrjquota:
1238                case Opt_grpjquota:
1239                case Opt_offusrjquota:
1240                case Opt_offgrpjquota:
1241                case Opt_jqfmt_vfsold:
1242                case Opt_jqfmt_vfsv0:
1243                case Opt_jqfmt_vfsv1:
1244                        ext3_msg(sb, KERN_ERR,
1245                                "error: journaled quota options not "
1246                                "supported.");
1247                        break;
1248                case Opt_noquota:
1249                        break;
1250#endif
1251                case Opt_abort:
1252                        set_opt(sbi->s_mount_opt, ABORT);
1253                        break;
1254                case Opt_nobarrier:
1255                        clear_opt(sbi->s_mount_opt, BARRIER);
1256                        break;
1257                case Opt_barrier:
1258                        if (args[0].from) {
1259                                if (match_int(&args[0], &option))
1260                                        return 0;
1261                        } else
1262                                option = 1;     /* No argument, default to 1 */
1263                        if (option)
1264                                set_opt(sbi->s_mount_opt, BARRIER);
1265                        else
1266                                clear_opt(sbi->s_mount_opt, BARRIER);
1267                        break;
1268                case Opt_ignore:
1269                        break;
1270                case Opt_resize:
1271                        if (!is_remount) {
1272                                ext3_msg(sb, KERN_ERR,
1273                                        "error: resize option only available "
1274                                        "for remount");
1275                                return 0;
1276                        }
1277                        if (match_int(&args[0], &option) != 0)
1278                                return 0;
1279                        *n_blocks_count = option;
1280                        break;
1281                case Opt_nobh:
1282                        ext3_msg(sb, KERN_WARNING,
1283                                "warning: ignoring deprecated nobh option");
1284                        break;
1285                case Opt_bh:
1286                        ext3_msg(sb, KERN_WARNING,
1287                                "warning: ignoring deprecated bh option");
1288                        break;
1289                default:
1290                        ext3_msg(sb, KERN_ERR,
1291                                "error: unrecognized mount option \"%s\" "
1292                                "or missing value", p);
1293                        return 0;
1294                }
1295        }
1296#ifdef CONFIG_QUOTA
1297        if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1298                if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
1299                        clear_opt(sbi->s_mount_opt, USRQUOTA);
1300                if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
1301                        clear_opt(sbi->s_mount_opt, GRPQUOTA);
1302
1303                if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
1304                        ext3_msg(sb, KERN_ERR, "error: old and new quota "
1305                                        "format mixing.");
1306                        return 0;
1307                }
1308
1309                if (!sbi->s_jquota_fmt) {
1310                        ext3_msg(sb, KERN_ERR, "error: journaled quota format "
1311                                        "not specified.");
1312                        return 0;
1313                }
1314        } else {
1315                if (sbi->s_jquota_fmt) {
1316                        ext3_msg(sb, KERN_ERR, "error: journaled quota format "
1317                                        "specified with no journaling "
1318                                        "enabled.");
1319                        return 0;
1320                }
1321        }
1322#endif
1323        return 1;
1324}
1325
1326static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
1327                            int read_only)
1328{
1329        struct ext3_sb_info *sbi = EXT3_SB(sb);
1330        int res = 0;
1331
1332        if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) {
1333                ext3_msg(sb, KERN_ERR,
1334                        "error: revision level too high, "
1335                        "forcing read-only mode");
1336                res = MS_RDONLY;
1337        }
1338        if (read_only)
1339                return res;
1340        if (!(sbi->s_mount_state & EXT3_VALID_FS))
1341                ext3_msg(sb, KERN_WARNING,
1342                        "warning: mounting unchecked fs, "
1343                        "running e2fsck is recommended");
1344        else if ((sbi->s_mount_state & EXT3_ERROR_FS))
1345                ext3_msg(sb, KERN_WARNING,
1346                        "warning: mounting fs with errors, "
1347                        "running e2fsck is recommended");
1348        else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
1349                 le16_to_cpu(es->s_mnt_count) >=
1350                        le16_to_cpu(es->s_max_mnt_count))
1351                ext3_msg(sb, KERN_WARNING,
1352                        "warning: maximal mount count reached, "
1353                        "running e2fsck is recommended");
1354        else if (le32_to_cpu(es->s_checkinterval) &&
1355                (le32_to_cpu(es->s_lastcheck) +
1356                        le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1357                ext3_msg(sb, KERN_WARNING,
1358                        "warning: checktime reached, "
1359                        "running e2fsck is recommended");
1360#if 0
1361                /* @@@ We _will_ want to clear the valid bit if we find
1362                   inconsistencies, to force a fsck at reboot.  But for
1363                   a plain journaled filesystem we can keep it set as
1364                   valid forever! :) */
1365        es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
1366#endif
1367        if (!le16_to_cpu(es->s_max_mnt_count))
1368                es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
1369        le16_add_cpu(&es->s_mnt_count, 1);
1370        es->s_mtime = cpu_to_le32(get_seconds());
1371        ext3_update_dynamic_rev(sb);
1372        EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
1373
1374        ext3_commit_super(sb, es, 1);
1375        if (test_opt(sb, DEBUG))
1376                ext3_msg(sb, KERN_INFO, "[bs=%lu, gc=%lu, "
1377                                "bpg=%lu, ipg=%lu, mo=%04lx]",
1378                        sb->s_blocksize,
1379                        sbi->s_groups_count,
1380                        EXT3_BLOCKS_PER_GROUP(sb),
1381                        EXT3_INODES_PER_GROUP(sb),
1382                        sbi->s_mount_opt);
1383
1384        if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
1385                char b[BDEVNAME_SIZE];
1386                ext3_msg(sb, KERN_INFO, "using external journal on %s",
1387                        bdevname(EXT3_SB(sb)->s_journal->j_dev, b));
1388        } else {
1389                ext3_msg(sb, KERN_INFO, "using internal journal");
1390        }
1391        cleancache_init_fs(sb);
1392        return res;
1393}
1394
1395/* Called at mount-time, super-block is locked */
1396static int ext3_check_descriptors(struct super_block *sb)
1397{
1398        struct ext3_sb_info *sbi = EXT3_SB(sb);
1399        int i;
1400
1401        ext3_debug ("Checking group descriptors");
1402
1403        for (i = 0; i < sbi->s_groups_count; i++) {
1404                struct ext3_group_desc *gdp = ext3_get_group_desc(sb, i, NULL);
1405                ext3_fsblk_t first_block = ext3_group_first_block_no(sb, i);
1406                ext3_fsblk_t last_block;
1407
1408                if (i == sbi->s_groups_count - 1)
1409                        last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
1410                else
1411                        last_block = first_block +
1412                                (EXT3_BLOCKS_PER_GROUP(sb) - 1);
1413
1414                if (le32_to_cpu(gdp->bg_block_bitmap) < first_block ||
1415                    le32_to_cpu(gdp->bg_block_bitmap) > last_block)
1416                {
1417                        ext3_error (sb, "ext3_check_descriptors",
1418                                    "Block bitmap for group %d"
1419                                    " not in group (block %lu)!",
1420                                    i, (unsigned long)
1421                                        le32_to_cpu(gdp->bg_block_bitmap));
1422                        return 0;
1423                }
1424                if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block ||
1425                    le32_to_cpu(gdp->bg_inode_bitmap) > last_block)
1426                {
1427                        ext3_error (sb, "ext3_check_descriptors",
1428                                    "Inode bitmap for group %d"
1429                                    " not in group (block %lu)!",
1430                                    i, (unsigned long)
1431                                        le32_to_cpu(gdp->bg_inode_bitmap));
1432                        return 0;
1433                }
1434                if (le32_to_cpu(gdp->bg_inode_table) < first_block ||
1435                    le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group - 1 >
1436                    last_block)
1437                {
1438                        ext3_error (sb, "ext3_check_descriptors",
1439                                    "Inode table for group %d"
1440                                    " not in group (block %lu)!",
1441                                    i, (unsigned long)
1442                                        le32_to_cpu(gdp->bg_inode_table));
1443                        return 0;
1444                }
1445        }
1446
1447        sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb));
1448        sbi->s_es->s_free_inodes_count=cpu_to_le32(ext3_count_free_inodes(sb));
1449        return 1;
1450}
1451
1452
1453/* ext3_orphan_cleanup() walks a singly-linked list of inodes (starting at
1454 * the superblock) which were deleted from all directories, but held open by
1455 * a process at the time of a crash.  We walk the list and try to delete these
1456 * inodes at recovery time (only with a read-write filesystem).
1457 *
1458 * In order to keep the orphan inode chain consistent during traversal (in
1459 * case of crash during recovery), we link each inode into the superblock
1460 * orphan list_head and handle it the same way as an inode deletion during
1461 * normal operation (which journals the operations for us).
1462 *
1463 * We only do an iget() and an iput() on each inode, which is very safe if we
1464 * accidentally point at an in-use or already deleted inode.  The worst that
1465 * can happen in this case is that we get a "bit already cleared" message from
1466 * ext3_free_inode().  The only reason we would point at a wrong inode is if
1467 * e2fsck was run on this filesystem, and it must have already done the orphan
1468 * inode cleanup for us, so we can safely abort without any further action.
1469 */
1470static void ext3_orphan_cleanup (struct super_block * sb,
1471                                 struct ext3_super_block * es)
1472{
1473        unsigned int s_flags = sb->s_flags;
1474        int nr_orphans = 0, nr_truncates = 0;
1475#ifdef CONFIG_QUOTA
1476        int i;
1477#endif
1478        if (!es->s_last_orphan) {
1479                jbd_debug(4, "no orphan inodes to clean up\n");
1480                return;
1481        }
1482
1483        if (bdev_read_only(sb->s_bdev)) {
1484                ext3_msg(sb, KERN_ERR, "error: write access "
1485                        "unavailable, skipping orphan cleanup.");
1486                return;
1487        }
1488
1489        /* Check if feature set allows readwrite operations */
1490        if (EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) {
1491                ext3_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
1492                         "unknown ROCOMPAT features");
1493                return;
1494        }
1495
1496        if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
1497                /* don't clear list on RO mount w/ errors */
1498                if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
1499                        jbd_debug(1, "Errors on filesystem, "
1500                                  "clearing orphan list.\n");
1501                        es->s_last_orphan = 0;
1502                }
1503                jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
1504                return;
1505        }
1506
1507        if (s_flags & MS_RDONLY) {
1508                ext3_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
1509                sb->s_flags &= ~MS_RDONLY;
1510        }
1511#ifdef CONFIG_QUOTA
1512        /* Needed for iput() to work correctly and not trash data */
1513        sb->s_flags |= MS_ACTIVE;
1514        /* Turn on quotas so that they are updated correctly */
1515        for (i = 0; i < MAXQUOTAS; i++) {
1516                if (EXT3_SB(sb)->s_qf_names[i]) {
1517                        int ret = ext3_quota_on_mount(sb, i);
1518                        if (ret < 0)
1519                                ext3_msg(sb, KERN_ERR,
1520                                        "error: cannot turn on journaled "
1521                                        "quota: %d", ret);
1522                }
1523        }
1524#endif
1525
1526        while (es->s_last_orphan) {
1527                struct inode *inode;
1528
1529                inode = ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
1530                if (IS_ERR(inode)) {
1531                        es->s_last_orphan = 0;
1532                        break;
1533                }
1534
1535                list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
1536                dquot_initialize(inode);
1537                if (inode->i_nlink) {
1538                        printk(KERN_DEBUG
1539                                "%s: truncating inode %lu to %Ld bytes\n",
1540                                __func__, inode->i_ino, inode->i_size);
1541                        jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
1542                                  inode->i_ino, inode->i_size);
1543                        ext3_truncate(inode);
1544                        nr_truncates++;
1545                } else {
1546                        printk(KERN_DEBUG
1547                                "%s: deleting unreferenced inode %lu\n",
1548                                __func__, inode->i_ino);
1549                        jbd_debug(2, "deleting unreferenced inode %lu\n",
1550                                  inode->i_ino);
1551                        nr_orphans++;
1552                }
1553                iput(inode);  /* The delete magic happens here! */
1554        }
1555
1556#define PLURAL(x) (x), ((x)==1) ? "" : "s"
1557
1558        if (nr_orphans)
1559                ext3_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
1560                       PLURAL(nr_orphans));
1561        if (nr_truncates)
1562                ext3_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
1563                       PLURAL(nr_truncates));
1564#ifdef CONFIG_QUOTA
1565        /* Turn quotas off */
1566        for (i = 0; i < MAXQUOTAS; i++) {
1567                if (sb_dqopt(sb)->files[i])
1568                        dquot_quota_off(sb, i);
1569        }
1570#endif
1571        sb->s_flags = s_flags; /* Restore MS_RDONLY status */
1572}
1573
1574/*
1575 * Maximal file size.  There is a direct, and {,double-,triple-}indirect
1576 * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks.
1577 * We need to be 1 filesystem block less than the 2^32 sector limit.
1578 */
1579static loff_t ext3_max_size(int bits)
1580{
1581        loff_t res = EXT3_NDIR_BLOCKS;
1582        int meta_blocks;
1583        loff_t upper_limit;
1584
1585        /* This is calculated to be the largest file size for a
1586         * dense, file such that the total number of
1587         * sectors in the file, including data and all indirect blocks,
1588         * does not exceed 2^32 -1
1589         * __u32 i_blocks representing the total number of
1590         * 512 bytes blocks of the file
1591         */
1592        upper_limit = (1LL << 32) - 1;
1593
1594        /* total blocks in file system block size */
1595        upper_limit >>= (bits - 9);
1596
1597
1598        /* indirect blocks */
1599        meta_blocks = 1;
1600        /* double indirect blocks */
1601        meta_blocks += 1 + (1LL << (bits-2));
1602        /* tripple indirect blocks */
1603        meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
1604
1605        upper_limit -= meta_blocks;
1606        upper_limit <<= bits;
1607
1608        res += 1LL << (bits-2);
1609        res += 1LL << (2*(bits-2));
1610        res += 1LL << (3*(bits-2));
1611        res <<= bits;
1612        if (res > upper_limit)
1613                res = upper_limit;
1614
1615        if (res > MAX_LFS_FILESIZE)
1616                res = MAX_LFS_FILESIZE;
1617
1618        return res;
1619}
1620
1621static ext3_fsblk_t descriptor_loc(struct super_block *sb,
1622                                    ext3_fsblk_t logic_sb_block,
1623                                    int nr)
1624{
1625        struct ext3_sb_info *sbi = EXT3_SB(sb);
1626        unsigned long bg, first_meta_bg;
1627        int has_super = 0;
1628
1629        first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
1630
1631        if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) ||
1632            nr < first_meta_bg)
1633                return (logic_sb_block + nr + 1);
1634        bg = sbi->s_desc_per_block * nr;
1635        if (ext3_bg_has_super(sb, bg))
1636                has_super = 1;
1637        return (has_super + ext3_group_first_block_no(sb, bg));
1638}
1639
1640
1641static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1642{
1643        struct buffer_head * bh;
1644        struct ext3_super_block *es = NULL;
1645        struct ext3_sb_info *sbi;
1646        ext3_fsblk_t block;
1647        ext3_fsblk_t sb_block = get_sb_block(&data, sb);
1648        ext3_fsblk_t logic_sb_block;
1649        unsigned long offset = 0;
1650        unsigned int journal_inum = 0;
1651        unsigned long journal_devnum = 0;
1652        unsigned long def_mount_opts;
1653        struct inode *root;
1654        int blocksize;
1655        int hblock;
1656        int db_count;
1657        int i;
1658        int needs_recovery;
1659        int ret = -EINVAL;
1660        __le32 features;
1661        int err;
1662
1663        sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
1664        if (!sbi)
1665                return -ENOMEM;
1666
1667        sbi->s_blockgroup_lock =
1668                kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
1669        if (!sbi->s_blockgroup_lock) {
1670                kfree(sbi);
1671                return -ENOMEM;
1672        }
1673        sb->s_fs_info = sbi;
1674        sbi->s_sb_block = sb_block;
1675
1676        blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
1677        if (!blocksize) {
1678                ext3_msg(sb, KERN_ERR, "error: unable to set blocksize");
1679                goto out_fail;
1680        }
1681
1682        /*
1683         * The ext3 superblock will not be buffer aligned for other than 1kB
1684         * block sizes.  We need to calculate the offset from buffer start.
1685         */
1686        if (blocksize != EXT3_MIN_BLOCK_SIZE) {
1687                logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
1688                offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
1689        } else {
1690                logic_sb_block = sb_block;
1691        }
1692
1693        if (!(bh = sb_bread(sb, logic_sb_block))) {
1694                ext3_msg(sb, KERN_ERR, "error: unable to read superblock");
1695                goto out_fail;
1696        }
1697        /*
1698         * Note: s_es must be initialized as soon as possible because
1699         *       some ext3 macro-instructions depend on its value
1700         */
1701        es = (struct ext3_super_block *) (bh->b_data + offset);
1702        sbi->s_es = es;
1703        sb->s_magic = le16_to_cpu(es->s_magic);
1704        if (sb->s_magic != EXT3_SUPER_MAGIC)
1705                goto cantfind_ext3;
1706
1707        /* Set defaults before we parse the mount options */
1708        def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
1709        if (def_mount_opts & EXT3_DEFM_DEBUG)
1710                set_opt(sbi->s_mount_opt, DEBUG);
1711        if (def_mount_opts & EXT3_DEFM_BSDGROUPS)
1712                set_opt(sbi->s_mount_opt, GRPID);
1713        if (def_mount_opts & EXT3_DEFM_UID16)
1714                set_opt(sbi->s_mount_opt, NO_UID32);
1715#ifdef CONFIG_EXT3_FS_XATTR
1716        if (def_mount_opts & EXT3_DEFM_XATTR_USER)
1717                set_opt(sbi->s_mount_opt, XATTR_USER);
1718#endif
1719#ifdef CONFIG_EXT3_FS_POSIX_ACL
1720        if (def_mount_opts & EXT3_DEFM_ACL)
1721                set_opt(sbi->s_mount_opt, POSIX_ACL);
1722#endif
1723        if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA)
1724                set_opt(sbi->s_mount_opt, JOURNAL_DATA);
1725        else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED)
1726                set_opt(sbi->s_mount_opt, ORDERED_DATA);
1727        else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK)
1728                set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
1729
1730        if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC)
1731                set_opt(sbi->s_mount_opt, ERRORS_PANIC);
1732        else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_CONTINUE)
1733                set_opt(sbi->s_mount_opt, ERRORS_CONT);
1734        else
1735                set_opt(sbi->s_mount_opt, ERRORS_RO);
1736
1737        sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
1738        sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
1739
1740        /* enable barriers by default */
1741        set_opt(sbi->s_mount_opt, BARRIER);
1742        set_opt(sbi->s_mount_opt, RESERVATION);
1743
1744        if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
1745                            NULL, 0))
1746                goto failed_mount;
1747
1748        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
1749                (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
1750
1751        if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV &&
1752            (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) ||
1753             EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
1754             EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
1755                ext3_msg(sb, KERN_WARNING,
1756                        "warning: feature flags set on rev 0 fs, "
1757                        "running e2fsck is recommended");
1758        /*
1759         * Check feature flags regardless of the revision level, since we
1760         * previously didn't change the revision level when setting the flags,
1761         * so there is a chance incompat flags are set on a rev 0 filesystem.
1762         */
1763        features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP);
1764        if (features) {
1765                ext3_msg(sb, KERN_ERR,
1766                        "error: couldn't mount because of unsupported "
1767                        "optional features (%x)", le32_to_cpu(features));
1768                goto failed_mount;
1769        }
1770        features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP);
1771        if (!(sb->s_flags & MS_RDONLY) && features) {
1772                ext3_msg(sb, KERN_ERR,
1773                        "error: couldn't mount RDWR because of unsupported "
1774                        "optional features (%x)", le32_to_cpu(features));
1775                goto failed_mount;
1776        }
1777        blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
1778
1779        if (blocksize < EXT3_MIN_BLOCK_SIZE ||
1780            blocksize > EXT3_MAX_BLOCK_SIZE) {
1781                ext3_msg(sb, KERN_ERR,
1782                        "error: couldn't mount because of unsupported "
1783                        "filesystem blocksize %d", blocksize);
1784                goto failed_mount;
1785        }
1786
1787        hblock = bdev_logical_block_size(sb->s_bdev);
1788        if (sb->s_blocksize != blocksize) {
1789                /*
1790                 * Make sure the blocksize for the filesystem is larger
1791                 * than the hardware sectorsize for the machine.
1792                 */
1793                if (blocksize < hblock) {
1794                        ext3_msg(sb, KERN_ERR,
1795                                "error: fsblocksize %d too small for "
1796                                "hardware sectorsize %d", blocksize, hblock);
1797                        goto failed_mount;
1798                }
1799
1800                brelse (bh);
1801                if (!sb_set_blocksize(sb, blocksize)) {
1802                        ext3_msg(sb, KERN_ERR,
1803                                "error: bad blocksize %d", blocksize);
1804                        goto out_fail;
1805                }
1806                logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
1807                offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
1808                bh = sb_bread(sb, logic_sb_block);
1809                if (!bh) {
1810                        ext3_msg(sb, KERN_ERR,
1811                               "error: can't read superblock on 2nd try");
1812                        goto failed_mount;
1813                }
1814                es = (struct ext3_super_block *)(bh->b_data + offset);
1815                sbi->s_es = es;
1816                if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
1817                        ext3_msg(sb, KERN_ERR,
1818                                "error: magic mismatch");
1819                        goto failed_mount;
1820                }
1821        }
1822
1823        sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits);
1824
1825        if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) {
1826                sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE;
1827                sbi->s_first_ino = EXT3_GOOD_OLD_FIRST_INO;
1828        } else {
1829                sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
1830                sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
1831                if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) ||
1832                    (!is_power_of_2(sbi->s_inode_size)) ||
1833                    (sbi->s_inode_size > blocksize)) {
1834                        ext3_msg(sb, KERN_ERR,
1835                                "error: unsupported inode size: %d",
1836                                sbi->s_inode_size);
1837                        goto failed_mount;
1838                }
1839        }
1840        sbi->s_frag_size = EXT3_MIN_FRAG_SIZE <<
1841                                   le32_to_cpu(es->s_log_frag_size);
1842        if (blocksize != sbi->s_frag_size) {
1843                ext3_msg(sb, KERN_ERR,
1844                       "error: fragsize %lu != blocksize %u (unsupported)",
1845                       sbi->s_frag_size, blocksize);
1846                goto failed_mount;
1847        }
1848        sbi->s_frags_per_block = 1;
1849        sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
1850        sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
1851        sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
1852        if (EXT3_INODE_SIZE(sb) == 0 || EXT3_INODES_PER_GROUP(sb) == 0)
1853                goto cantfind_ext3;
1854        sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb);
1855        if (sbi->s_inodes_per_block == 0)
1856                goto cantfind_ext3;
1857        sbi->s_itb_per_group = sbi->s_inodes_per_group /
1858                                        sbi->s_inodes_per_block;
1859        sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc);
1860        sbi->s_sbh = bh;
1861        sbi->s_mount_state = le16_to_cpu(es->s_state);
1862        sbi->s_addr_per_block_bits = ilog2(EXT3_ADDR_PER_BLOCK(sb));
1863        sbi->s_desc_per_block_bits = ilog2(EXT3_DESC_PER_BLOCK(sb));
1864        for (i=0; i < 4; i++)
1865                sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
1866        sbi->s_def_hash_version = es->s_def_hash_version;
1867        i = le32_to_cpu(es->s_flags);
1868        if (i & EXT2_FLAGS_UNSIGNED_HASH)
1869                sbi->s_hash_unsigned = 3;
1870        else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
1871#ifdef __CHAR_UNSIGNED__
1872                es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
1873                sbi->s_hash_unsigned = 3;
1874#else
1875                es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
1876#endif
1877        }
1878
1879        if (sbi->s_blocks_per_group > blocksize * 8) {
1880                ext3_msg(sb, KERN_ERR,
1881                        "#blocks per group too big: %lu",
1882                        sbi->s_blocks_per_group);
1883                goto failed_mount;
1884        }
1885        if (sbi->s_frags_per_group > blocksize * 8) {
1886                ext3_msg(sb, KERN_ERR,
1887                        "error: #fragments per group too big: %lu",
1888                        sbi->s_frags_per_group);
1889                goto failed_mount;
1890        }
1891        if (sbi->s_inodes_per_group > blocksize * 8) {
1892                ext3_msg(sb, KERN_ERR,
1893                        "error: #inodes per group too big: %lu",
1894                        sbi->s_inodes_per_group);
1895                goto failed_mount;
1896        }
1897
1898        err = generic_check_addressable(sb->s_blocksize_bits,
1899                                        le32_to_cpu(es->s_blocks_count));
1900        if (err) {
1901                ext3_msg(sb, KERN_ERR,
1902                        "error: filesystem is too large to mount safely");
1903                if (sizeof(sector_t) < 8)
1904                        ext3_msg(sb, KERN_ERR,
1905                                "error: CONFIG_LBDAF not enabled");
1906                ret = err;
1907                goto failed_mount;
1908        }
1909
1910        if (EXT3_BLOCKS_PER_GROUP(sb) == 0)
1911                goto cantfind_ext3;
1912        sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
1913                               le32_to_cpu(es->s_first_data_block) - 1)
1914                                       / EXT3_BLOCKS_PER_GROUP(sb)) + 1;
1915        db_count = DIV_ROUND_UP(sbi->s_groups_count, EXT3_DESC_PER_BLOCK(sb));
1916        sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
1917                                    GFP_KERNEL);
1918        if (sbi->s_group_desc == NULL) {
1919                ext3_msg(sb, KERN_ERR,
1920                        "error: not enough memory");
1921                ret = -ENOMEM;
1922                goto failed_mount;
1923        }
1924
1925        bgl_lock_init(sbi->s_blockgroup_lock);
1926
1927        for (i = 0; i < db_count; i++) {
1928                block = descriptor_loc(sb, logic_sb_block, i);
1929                sbi->s_group_desc[i] = sb_bread(sb, block);
1930                if (!sbi->s_group_desc[i]) {
1931                        ext3_msg(sb, KERN_ERR,
1932                                "error: can't read group descriptor %d", i);
1933                        db_count = i;
1934                        goto failed_mount2;
1935                }
1936        }
1937        if (!ext3_check_descriptors (sb)) {
1938                ext3_msg(sb, KERN_ERR,
1939                        "error: group descriptors corrupted");
1940                goto failed_mount2;
1941        }
1942        sbi->s_gdb_count = db_count;
1943        get_random_bytes(&sbi->s_next_generation, sizeof(u32));
1944        spin_lock_init(&sbi->s_next_gen_lock);
1945
1946        /* per fileystem reservation list head & lock */
1947        spin_lock_init(&sbi->s_rsv_window_lock);
1948        sbi->s_rsv_window_root = RB_ROOT;
1949        /* Add a single, static dummy reservation to the start of the
1950         * reservation window list --- it gives us a placeholder for
1951         * append-at-start-of-list which makes the allocation logic
1952         * _much_ simpler. */
1953        sbi->s_rsv_window_head.rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
1954        sbi->s_rsv_window_head.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
1955        sbi->s_rsv_window_head.rsv_alloc_hit = 0;
1956        sbi->s_rsv_window_head.rsv_goal_size = 0;
1957        ext3_rsv_window_add(sb, &sbi->s_rsv_window_head);
1958
1959        /*
1960         * set up enough so that it can read an inode
1961         */
1962        sb->s_op = &ext3_sops;
1963        sb->s_export_op = &ext3_export_ops;
1964        sb->s_xattr = ext3_xattr_handlers;
1965#ifdef CONFIG_QUOTA
1966        sb->s_qcop = &ext3_qctl_operations;
1967        sb->dq_op = &ext3_quota_operations;
1968#endif
1969        memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
1970        INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
1971        mutex_init(&sbi->s_orphan_lock);
1972        mutex_init(&sbi->s_resize_lock);
1973
1974        sb->s_root = NULL;
1975
1976        needs_recovery = (es->s_last_orphan != 0 ||
1977                          EXT3_HAS_INCOMPAT_FEATURE(sb,
1978                                    EXT3_FEATURE_INCOMPAT_RECOVER));
1979
1980        /*
1981         * The first inode we look at is the journal inode.  Don't try
1982         * root first: it may be modified in the journal!
1983         */
1984        if (!test_opt(sb, NOLOAD) &&
1985            EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
1986                if (ext3_load_journal(sb, es, journal_devnum))
1987                        goto failed_mount2;
1988        } else if (journal_inum) {
1989                if (ext3_create_journal(sb, es, journal_inum))
1990                        goto failed_mount2;
1991        } else {
1992                if (!silent)
1993                        ext3_msg(sb, KERN_ERR,
1994                                "error: no journal found. "
1995                                "mounting ext3 over ext2?");
1996                goto failed_mount2;
1997        }
1998        err = percpu_counter_init(&sbi->s_freeblocks_counter,
1999                        ext3_count_free_blocks(sb));
2000        if (!err) {
2001                err = percpu_counter_init(&sbi->s_freeinodes_counter,
2002                                ext3_count_free_inodes(sb));
2003        }
2004        if (!err) {
2005                err = percpu_counter_init(&sbi->s_dirs_counter,
2006                                ext3_count_dirs(sb));
2007        }
2008        if (err) {
2009                ext3_msg(sb, KERN_ERR, "error: insufficient memory");
2010                ret = err;
2011                goto failed_mount3;
2012        }
2013
2014        /* We have now updated the journal if required, so we can
2015         * validate the data journaling mode. */
2016        switch (test_opt(sb, DATA_FLAGS)) {
2017        case 0:
2018                /* No mode set, assume a default based on the journal
2019                   capabilities: ORDERED_DATA if the journal can
2020                   cope, else JOURNAL_DATA */
2021                if (journal_check_available_features
2022                    (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE))
2023                        set_opt(sbi->s_mount_opt, DEFAULT_DATA_MODE);
2024                else
2025                        set_opt(sbi->s_mount_opt, JOURNAL_DATA);
2026                break;
2027
2028        case EXT3_MOUNT_ORDERED_DATA:
2029        case EXT3_MOUNT_WRITEBACK_DATA:
2030                if (!journal_check_available_features
2031                    (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) {
2032                        ext3_msg(sb, KERN_ERR,
2033                                "error: journal does not support "
2034                                "requested data journaling mode");
2035                        goto failed_mount3;
2036                }
2037        default:
2038                break;
2039        }
2040
2041        /*
2042         * The journal_load will have done any necessary log recovery,
2043         * so we can safely mount the rest of the filesystem now.
2044         */
2045
2046        root = ext3_iget(sb, EXT3_ROOT_INO);
2047        if (IS_ERR(root)) {
2048                ext3_msg(sb, KERN_ERR, "error: get root inode failed");
2049                ret = PTR_ERR(root);
2050                goto failed_mount3;
2051        }
2052        if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
2053                iput(root);
2054                ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck");
2055                goto failed_mount3;
2056        }
2057        sb->s_root = d_make_root(root);
2058        if (!sb->s_root) {
2059                ext3_msg(sb, KERN_ERR, "error: get root dentry failed");
2060                ret = -ENOMEM;
2061                goto failed_mount3;
2062        }
2063
2064        if (ext3_setup_super(sb, es, sb->s_flags & MS_RDONLY))
2065                sb->s_flags |= MS_RDONLY;
2066
2067        EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
2068        ext3_orphan_cleanup(sb, es);
2069        EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
2070        if (needs_recovery) {
2071                ext3_mark_recovery_complete(sb, es);
2072                ext3_msg(sb, KERN_INFO, "recovery complete");
2073        }
2074        ext3_msg(sb, KERN_INFO, "mounted filesystem with %s data mode",
2075                test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
2076                test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
2077                "writeback");
2078
2079        return 0;
2080
2081cantfind_ext3:
2082        if (!silent)
2083                ext3_msg(sb, KERN_INFO,
2084                        "error: can't find ext3 filesystem on dev %s.",
2085                       sb->s_id);
2086        goto failed_mount;
2087
2088failed_mount3:
2089        percpu_counter_destroy(&sbi->s_freeblocks_counter);
2090        percpu_counter_destroy(&sbi->s_freeinodes_counter);
2091        percpu_counter_destroy(&sbi->s_dirs_counter);
2092        journal_destroy(sbi->s_journal);
2093failed_mount2:
2094        for (i = 0; i < db_count; i++)
2095                brelse(sbi->s_group_desc[i]);
2096        kfree(sbi->s_group_desc);
2097failed_mount:
2098#ifdef CONFIG_QUOTA
2099        for (i = 0; i < MAXQUOTAS; i++)
2100                kfree(sbi->s_qf_names[i]);
2101#endif
2102        ext3_blkdev_remove(sbi);
2103        brelse(bh);
2104out_fail:
2105        sb->s_fs_info = NULL;
2106        kfree(sbi->s_blockgroup_lock);
2107        kfree(sbi);
2108        return ret;
2109}
2110
2111/*
2112 * Setup any per-fs journal parameters now.  We'll do this both on
2113 * initial mount, once the journal has been initialised but before we've
2114 * done any recovery; and again on any subsequent remount.
2115 */
2116static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
2117{
2118        struct ext3_sb_info *sbi = EXT3_SB(sb);
2119
2120        if (sbi->s_commit_interval)
2121                journal->j_commit_interval = sbi->s_commit_interval;
2122        /* We could also set up an ext3-specific default for the commit
2123         * interval here, but for now we'll just fall back to the jbd
2124         * default. */
2125
2126        spin_lock(&journal->j_state_lock);
2127        if (test_opt(sb, BARRIER))
2128                journal->j_flags |= JFS_BARRIER;
2129        else
2130                journal->j_flags &= ~JFS_BARRIER;
2131        if (test_opt(sb, DATA_ERR_ABORT))
2132                journal->j_flags |= JFS_ABORT_ON_SYNCDATA_ERR;
2133        else
2134                journal->j_flags &= ~JFS_ABORT_ON_SYNCDATA_ERR;
2135        spin_unlock(&journal->j_state_lock);
2136}
2137
2138static journal_t *ext3_get_journal(struct super_block *sb,
2139                                   unsigned int journal_inum)
2140{
2141        struct inode *journal_inode;
2142        journal_t *journal;
2143
2144        /* First, test for the existence of a valid inode on disk.  Bad
2145         * things happen if we iget() an unused inode, as the subsequent
2146         * iput() will try to delete it. */
2147
2148        journal_inode = ext3_iget(sb, journal_inum);
2149        if (IS_ERR(journal_inode)) {
2150                ext3_msg(sb, KERN_ERR, "error: no journal found");
2151                return NULL;
2152        }
2153        if (!journal_inode->i_nlink) {
2154                make_bad_inode(journal_inode);
2155                iput(journal_inode);
2156                ext3_msg(sb, KERN_ERR, "error: journal inode is deleted");
2157                return NULL;
2158        }
2159
2160        jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
2161                  journal_inode, journal_inode->i_size);
2162        if (!S_ISREG(journal_inode->i_mode)) {
2163                ext3_msg(sb, KERN_ERR, "error: invalid journal inode");
2164                iput(journal_inode);
2165                return NULL;
2166        }
2167
2168        journal = journal_init_inode(journal_inode);
2169        if (!journal) {
2170                ext3_msg(sb, KERN_ERR, "error: could not load journal inode");
2171                iput(journal_inode);
2172                return NULL;
2173        }
2174        journal->j_private = sb;
2175        ext3_init_journal_params(sb, journal);
2176        return journal;
2177}
2178
2179static journal_t *ext3_get_dev_journal(struct super_block *sb,
2180                                       dev_t j_dev)
2181{
2182        struct buffer_head * bh;
2183        journal_t *journal;
2184        ext3_fsblk_t start;
2185        ext3_fsblk_t len;
2186        int hblock, blocksize;
2187        ext3_fsblk_t sb_block;
2188        unsigned long offset;
2189        struct ext3_super_block * es;
2190        struct block_device *bdev;
2191
2192        bdev = ext3_blkdev_get(j_dev, sb);
2193        if (bdev == NULL)
2194                return NULL;
2195
2196        blocksize = sb->s_blocksize;
2197        hblock = bdev_logical_block_size(bdev);
2198        if (blocksize < hblock) {
2199                ext3_msg(sb, KERN_ERR,
2200                        "error: blocksize too small for journal device");
2201                goto out_bdev;
2202        }
2203
2204        sb_block = EXT3_MIN_BLOCK_SIZE / blocksize;
2205        offset = EXT3_MIN_BLOCK_SIZE % blocksize;
2206        set_blocksize(bdev, blocksize);
2207        if (!(bh = __bread(bdev, sb_block, blocksize))) {
2208                ext3_msg(sb, KERN_ERR, "error: couldn't read superblock of "
2209                        "external journal");
2210                goto out_bdev;
2211        }
2212
2213        es = (struct ext3_super_block *) (bh->b_data + offset);
2214        if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) ||
2215            !(le32_to_cpu(es->s_feature_incompat) &
2216              EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
2217                ext3_msg(sb, KERN_ERR, "error: external journal has "
2218                        "bad superblock");
2219                brelse(bh);
2220                goto out_bdev;
2221        }
2222
2223        if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
2224                ext3_msg(sb, KERN_ERR, "error: journal UUID does not match");
2225                brelse(bh);
2226                goto out_bdev;
2227        }
2228
2229        len = le32_to_cpu(es->s_blocks_count);
2230        start = sb_block + 1;
2231        brelse(bh);     /* we're done with the superblock */
2232
2233        journal = journal_init_dev(bdev, sb->s_bdev,
2234                                        start, len, blocksize);
2235        if (!journal) {
2236                ext3_msg(sb, KERN_ERR,
2237                        "error: failed to create device journal");
2238                goto out_bdev;
2239        }
2240        journal->j_private = sb;
2241        if (!bh_uptodate_or_lock(journal->j_sb_buffer)) {
2242                if (bh_submit_read(journal->j_sb_buffer)) {
2243                        ext3_msg(sb, KERN_ERR, "I/O error on journal device");
2244                        goto out_journal;
2245                }
2246        }
2247        if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
2248                ext3_msg(sb, KERN_ERR,
2249                        "error: external journal has more than one "
2250                        "user (unsupported) - %d",
2251                        be32_to_cpu(journal->j_superblock->s_nr_users));
2252                goto out_journal;
2253        }
2254        EXT3_SB(sb)->journal_bdev = bdev;
2255        ext3_init_journal_params(sb, journal);
2256        return journal;
2257out_journal:
2258        journal_destroy(journal);
2259out_bdev:
2260        ext3_blkdev_put(bdev);
2261        return NULL;
2262}
2263
2264static int ext3_load_journal(struct super_block *sb,
2265                             struct ext3_super_block *es,
2266                             unsigned long journal_devnum)
2267{
2268        journal_t *journal;
2269        unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
2270        dev_t journal_dev;
2271        int err = 0;
2272        int really_read_only;
2273
2274        if (journal_devnum &&
2275            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
2276                ext3_msg(sb, KERN_INFO, "external journal device major/minor "
2277                        "numbers have changed");
2278                journal_dev = new_decode_dev(journal_devnum);
2279        } else
2280                journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
2281
2282        really_read_only = bdev_read_only(sb->s_bdev);
2283
2284        /*
2285         * Are we loading a blank journal or performing recovery after a
2286         * crash?  For recovery, we need to check in advance whether we
2287         * can get read-write access to the device.
2288         */
2289
2290        if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) {
2291                if (sb->s_flags & MS_RDONLY) {
2292                        ext3_msg(sb, KERN_INFO,
2293                                "recovery required on readonly filesystem");
2294                        if (really_read_only) {
2295                                ext3_msg(sb, KERN_ERR, "error: write access "
2296                                        "unavailable, cannot proceed");
2297                                return -EROFS;
2298                        }
2299                        ext3_msg(sb, KERN_INFO,
2300                                "write access will be enabled during recovery");
2301                }
2302        }
2303
2304        if (journal_inum && journal_dev) {
2305                ext3_msg(sb, KERN_ERR, "error: filesystem has both journal "
2306                       "and inode journals");
2307                return -EINVAL;
2308        }
2309
2310        if (journal_inum) {
2311                if (!(journal = ext3_get_journal(sb, journal_inum)))
2312                        return -EINVAL;
2313        } else {
2314                if (!(journal = ext3_get_dev_journal(sb, journal_dev)))
2315                        return -EINVAL;
2316        }
2317
2318        if (!(journal->j_flags & JFS_BARRIER))
2319                printk(KERN_INFO "EXT3-fs: barriers not enabled\n");
2320
2321        if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
2322                err = journal_update_format(journal);
2323                if (err)  {
2324                        ext3_msg(sb, KERN_ERR, "error updating journal");
2325                        journal_destroy(journal);
2326                        return err;
2327                }
2328        }
2329
2330        if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER))
2331                err = journal_wipe(journal, !really_read_only);
2332        if (!err)
2333                err = journal_load(journal);
2334
2335        if (err) {
2336                ext3_msg(sb, KERN_ERR, "error loading journal");
2337                journal_destroy(journal);
2338                return err;
2339        }
2340
2341        EXT3_SB(sb)->s_journal = journal;
2342        ext3_clear_journal_err(sb, es);
2343
2344        if (!really_read_only && journal_devnum &&
2345            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
2346                es->s_journal_dev = cpu_to_le32(journal_devnum);
2347
2348                /* Make sure we flush the recovery flag to disk. */
2349                ext3_commit_super(sb, es, 1);
2350        }
2351
2352        return 0;
2353}
2354
2355static int ext3_create_journal(struct super_block *sb,
2356                               struct ext3_super_block *es,
2357                               unsigned int journal_inum)
2358{
2359        journal_t *journal;
2360        int err;
2361
2362        if (sb->s_flags & MS_RDONLY) {
2363                ext3_msg(sb, KERN_ERR,
2364                        "error: readonly filesystem when trying to "
2365                        "create journal");
2366                return -EROFS;
2367        }
2368
2369        journal = ext3_get_journal(sb, journal_inum);
2370        if (!journal)
2371                return -EINVAL;
2372
2373        ext3_msg(sb, KERN_INFO, "creating new journal on inode %u",
2374               journal_inum);
2375
2376        err = journal_create(journal);
2377        if (err) {
2378                ext3_msg(sb, KERN_ERR, "error creating journal");
2379                journal_destroy(journal);
2380                return -EIO;
2381        }
2382
2383        EXT3_SB(sb)->s_journal = journal;
2384
2385        ext3_update_dynamic_rev(sb);
2386        EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2387        EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL);
2388
2389        es->s_journal_inum = cpu_to_le32(journal_inum);
2390
2391        /* Make sure we flush the recovery flag to disk. */
2392        ext3_commit_super(sb, es, 1);
2393
2394        return 0;
2395}
2396
2397static int ext3_commit_super(struct super_block *sb,
2398                               struct ext3_super_block *es,
2399                               int sync)
2400{
2401        struct buffer_head *sbh = EXT3_SB(sb)->s_sbh;
2402        int error = 0;
2403
2404        if (!sbh)
2405                return error;
2406
2407        if (buffer_write_io_error(sbh)) {
2408                /*
2409                 * Oh, dear.  A previous attempt to write the
2410                 * superblock failed.  This could happen because the
2411                 * USB device was yanked out.  Or it could happen to
2412                 * be a transient write error and maybe the block will
2413                 * be remapped.  Nothing we can do but to retry the
2414                 * write and hope for the best.
2415                 */
2416                ext3_msg(sb, KERN_ERR, "previous I/O error to "
2417                       "superblock detected");
2418                clear_buffer_write_io_error(sbh);
2419                set_buffer_uptodate(sbh);
2420        }
2421        /*
2422         * If the file system is mounted read-only, don't update the
2423         * superblock write time.  This avoids updating the superblock
2424         * write time when we are mounting the root file system
2425         * read/only but we need to replay the journal; at that point,
2426         * for people who are east of GMT and who make their clock
2427         * tick in localtime for Windows bug-for-bug compatibility,
2428         * the clock is set in the future, and this will cause e2fsck
2429         * to complain and force a full file system check.
2430         */
2431        if (!(sb->s_flags & MS_RDONLY))
2432                es->s_wtime = cpu_to_le32(get_seconds());
2433        es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb));
2434        es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb));
2435        BUFFER_TRACE(sbh, "marking dirty");
2436        mark_buffer_dirty(sbh);
2437        if (sync) {
2438                error = sync_dirty_buffer(sbh);
2439                if (buffer_write_io_error(sbh)) {
2440                        ext3_msg(sb, KERN_ERR, "I/O error while writing "
2441                               "superblock");
2442                        clear_buffer_write_io_error(sbh);
2443                        set_buffer_uptodate(sbh);
2444                }
2445        }
2446        return error;
2447}
2448
2449
2450/*
2451 * Have we just finished recovery?  If so, and if we are mounting (or
2452 * remounting) the filesystem readonly, then we will end up with a
2453 * consistent fs on disk.  Record that fact.
2454 */
2455static void ext3_mark_recovery_complete(struct super_block * sb,
2456                                        struct ext3_super_block * es)
2457{
2458        journal_t *journal = EXT3_SB(sb)->s_journal;
2459
2460        journal_lock_updates(journal);
2461        if (journal_flush(journal) < 0)
2462                goto out;
2463
2464        if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
2465            sb->s_flags & MS_RDONLY) {
2466                EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2467                ext3_commit_super(sb, es, 1);
2468        }
2469
2470out:
2471        journal_unlock_updates(journal);
2472}
2473
2474/*
2475 * If we are mounting (or read-write remounting) a filesystem whose journal
2476 * has recorded an error from a previous lifetime, move that error to the
2477 * main filesystem now.
2478 */
2479static void ext3_clear_journal_err(struct super_block *sb,
2480                                   struct ext3_super_block *es)
2481{
2482        journal_t *journal;
2483        int j_errno;
2484        const char *errstr;
2485
2486        journal = EXT3_SB(sb)->s_journal;
2487
2488        /*
2489         * Now check for any error status which may have been recorded in the
2490         * journal by a prior ext3_error() or ext3_abort()
2491         */
2492
2493        j_errno = journal_errno(journal);
2494        if (j_errno) {
2495                char nbuf[16];
2496
2497                errstr = ext3_decode_error(sb, j_errno, nbuf);
2498                ext3_warning(sb, __func__, "Filesystem error recorded "
2499                             "from previous mount: %s", errstr);
2500                ext3_warning(sb, __func__, "Marking fs in need of "
2501                             "filesystem check.");
2502
2503                EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
2504                es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
2505                ext3_commit_super (sb, es, 1);
2506
2507                journal_clear_err(journal);
2508        }
2509}
2510
2511/*
2512 * Force the running and committing transactions to commit,
2513 * and wait on the commit.
2514 */
2515int ext3_force_commit(struct super_block *sb)
2516{
2517        journal_t *journal;
2518        int ret;
2519
2520        if (sb->s_flags & MS_RDONLY)
2521                return 0;
2522
2523        journal = EXT3_SB(sb)->s_journal;
2524        ret = ext3_journal_force_commit(journal);
2525        return ret;
2526}
2527
2528static int ext3_sync_fs(struct super_block *sb, int wait)
2529{
2530        tid_t target;
2531
2532        trace_ext3_sync_fs(sb, wait);
2533        /*
2534         * Writeback quota in non-journalled quota case - journalled quota has
2535         * no dirty dquots
2536         */
2537        dquot_writeback_dquots(sb, -1);
2538        if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
2539                if (wait)
2540                        log_wait_commit(EXT3_SB(sb)->s_journal, target);
2541        }
2542        return 0;
2543}
2544
2545/*
2546 * LVM calls this function before a (read-only) snapshot is created.  This
2547 * gives us a chance to flush the journal completely and mark the fs clean.
2548 */
2549static int ext3_freeze(struct super_block *sb)
2550{
2551        int error = 0;
2552        journal_t *journal;
2553
2554        if (!(sb->s_flags & MS_RDONLY)) {
2555                journal = EXT3_SB(sb)->s_journal;
2556
2557                /* Now we set up the journal barrier. */
2558                journal_lock_updates(journal);
2559
2560                /*
2561                 * We don't want to clear needs_recovery flag when we failed
2562                 * to flush the journal.
2563                 */
2564                error = journal_flush(journal);
2565                if (error < 0)
2566                        goto out;
2567
2568                /* Journal blocked and flushed, clear needs_recovery flag. */
2569                EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2570                error = ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
2571                if (error)
2572                        goto out;
2573        }
2574        return 0;
2575
2576out:
2577        journal_unlock_updates(journal);
2578        return error;
2579}
2580
2581/*
2582 * Called by LVM after the snapshot is done.  We need to reset the RECOVER
2583 * flag here, even though the filesystem is not technically dirty yet.
2584 */
2585static int ext3_unfreeze(struct super_block *sb)
2586{
2587        if (!(sb->s_flags & MS_RDONLY)) {
2588                /* Reser the needs_recovery flag before the fs is unlocked. */
2589                EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2590                ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
2591                journal_unlock_updates(EXT3_SB(sb)->s_journal);
2592        }
2593        return 0;
2594}
2595
2596static int ext3_remount (struct super_block * sb, int * flags, char * data)
2597{
2598        struct ext3_super_block * es;
2599        struct ext3_sb_info *sbi = EXT3_SB(sb);
2600        ext3_fsblk_t n_blocks_count = 0;
2601        unsigned long old_sb_flags;
2602        struct ext3_mount_options old_opts;
2603        int enable_quota = 0;
2604        int err;
2605#ifdef CONFIG_QUOTA
2606        int i;
2607#endif
2608
2609        /* Store the original options */
2610        old_sb_flags = sb->s_flags;
2611        old_opts.s_mount_opt = sbi->s_mount_opt;
2612        old_opts.s_resuid = sbi->s_resuid;
2613        old_opts.s_resgid = sbi->s_resgid;
2614        old_opts.s_commit_interval = sbi->s_commit_interval;
2615#ifdef CONFIG_QUOTA
2616        old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
2617        for (i = 0; i < MAXQUOTAS; i++)
2618                if (sbi->s_qf_names[i]) {
2619                        old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
2620                                                         GFP_KERNEL);
2621                        if (!old_opts.s_qf_names[i]) {
2622                                int j;
2623
2624                                for (j = 0; j < i; j++)
2625                                        kfree(old_opts.s_qf_names[j]);
2626                                return -ENOMEM;
2627                        }
2628                } else
2629                        old_opts.s_qf_names[i] = NULL;
2630#endif
2631
2632        /*
2633         * Allow the "check" option to be passed as a remount option.
2634         */
2635        if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) {
2636                err = -EINVAL;
2637                goto restore_opts;
2638        }
2639
2640        if (test_opt(sb, ABORT))
2641                ext3_abort(sb, __func__, "Abort forced by user");
2642
2643        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2644                (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
2645
2646        es = sbi->s_es;
2647
2648        ext3_init_journal_params(sb, sbi->s_journal);
2649
2650        if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
2651                n_blocks_count > le32_to_cpu(es->s_blocks_count)) {
2652                if (test_opt(sb, ABORT)) {
2653                        err = -EROFS;
2654                        goto restore_opts;
2655                }
2656
2657                if (*flags & MS_RDONLY) {
2658                        err = dquot_suspend(sb, -1);
2659                        if (err < 0)
2660                                goto restore_opts;
2661
2662                        /*
2663                         * First of all, the unconditional stuff we have to do
2664                         * to disable replay of the journal when we next remount
2665                         */
2666                        sb->s_flags |= MS_RDONLY;
2667
2668                        /*
2669                         * OK, test if we are remounting a valid rw partition
2670                         * readonly, and if so set the rdonly flag and then
2671                         * mark the partition as valid again.
2672                         */
2673                        if (!(es->s_state & cpu_to_le16(EXT3_VALID_FS)) &&
2674                            (sbi->s_mount_state & EXT3_VALID_FS))
2675                                es->s_state = cpu_to_le16(sbi->s_mount_state);
2676
2677                        ext3_mark_recovery_complete(sb, es);
2678                } else {
2679                        __le32 ret;
2680                        if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
2681                                        ~EXT3_FEATURE_RO_COMPAT_SUPP))) {
2682                                ext3_msg(sb, KERN_WARNING,
2683                                        "warning: couldn't remount RDWR "
2684                                        "because of unsupported optional "
2685                                        "features (%x)", le32_to_cpu(ret));
2686                                err = -EROFS;
2687                                goto restore_opts;
2688                        }
2689
2690                        /*
2691                         * If we have an unprocessed orphan list hanging
2692                         * around from a previously readonly bdev mount,
2693                         * require a full umount & mount for now.
2694                         */
2695                        if (es->s_last_orphan) {
2696                                ext3_msg(sb, KERN_WARNING, "warning: couldn't "
2697                                       "remount RDWR because of unprocessed "
2698                                       "orphan inode list.  Please "
2699                                       "umount & mount instead.");
2700                                err = -EINVAL;
2701                                goto restore_opts;
2702                        }
2703
2704                        /*
2705                         * Mounting a RDONLY partition read-write, so reread
2706                         * and store the current valid flag.  (It may have
2707                         * been changed by e2fsck since we originally mounted
2708                         * the partition.)
2709                         */
2710                        ext3_clear_journal_err(sb, es);
2711                        sbi->s_mount_state = le16_to_cpu(es->s_state);
2712                        if ((err = ext3_group_extend(sb, es, n_blocks_count)))
2713                                goto restore_opts;
2714                        if (!ext3_setup_super (sb, es, 0))
2715                                sb->s_flags &= ~MS_RDONLY;
2716                        enable_quota = 1;
2717                }
2718        }
2719#ifdef CONFIG_QUOTA
2720        /* Release old quota file names */
2721        for (i = 0; i < MAXQUOTAS; i++)
2722                kfree(old_opts.s_qf_names[i]);
2723#endif
2724        if (enable_quota)
2725                dquot_resume(sb, -1);
2726        return 0;
2727restore_opts:
2728        sb->s_flags = old_sb_flags;
2729        sbi->s_mount_opt = old_opts.s_mount_opt;
2730        sbi->s_resuid = old_opts.s_resuid;
2731        sbi->s_resgid = old_opts.s_resgid;
2732        sbi->s_commit_interval = old_opts.s_commit_interval;
2733#ifdef CONFIG_QUOTA
2734        sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
2735        for (i = 0; i < MAXQUOTAS; i++) {
2736                kfree(sbi->s_qf_names[i]);
2737                sbi->s_qf_names[i] = old_opts.s_qf_names[i];
2738        }
2739#endif
2740        return err;
2741}
2742
2743static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
2744{
2745        struct super_block *sb = dentry->d_sb;
2746        struct ext3_sb_info *sbi = EXT3_SB(sb);
2747        struct ext3_super_block *es = sbi->s_es;
2748        u64 fsid;
2749
2750        if (test_opt(sb, MINIX_DF)) {
2751                sbi->s_overhead_last = 0;
2752        } else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) {
2753                unsigned long ngroups = sbi->s_groups_count, i;
2754                ext3_fsblk_t overhead = 0;
2755                smp_rmb();
2756
2757                /*
2758                 * Compute the overhead (FS structures).  This is constant
2759                 * for a given filesystem unless the number of block groups
2760                 * changes so we cache the previous value until it does.
2761                 */
2762
2763                /*
2764                 * All of the blocks before first_data_block are
2765                 * overhead
2766                 */
2767                overhead = le32_to_cpu(es->s_first_data_block);
2768
2769                /*
2770                 * Add the overhead attributed to the superblock and
2771                 * block group descriptors.  If the sparse superblocks
2772                 * feature is turned on, then not all groups have this.
2773                 */
2774                for (i = 0; i < ngroups; i++) {
2775                        overhead += ext3_bg_has_super(sb, i) +
2776                                ext3_bg_num_gdb(sb, i);
2777                        cond_resched();
2778                }
2779
2780                /*
2781                 * Every block group has an inode bitmap, a block
2782                 * bitmap, and an inode table.
2783                 */
2784                overhead += ngroups * (2 + sbi->s_itb_per_group);
2785                sbi->s_overhead_last = overhead;
2786                smp_wmb();
2787                sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
2788        }
2789
2790        buf->f_type = EXT3_SUPER_MAGIC;
2791        buf->f_bsize = sb->s_blocksize;
2792        buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last;
2793        buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
2794        buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
2795        if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
2796                buf->f_bavail = 0;
2797        buf->f_files = le32_to_cpu(es->s_inodes_count);
2798        buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
2799        buf->f_namelen = EXT3_NAME_LEN;
2800        fsid = le64_to_cpup((void *)es->s_uuid) ^
2801               le64_to_cpup((void *)es->s_uuid + sizeof(u64));
2802        buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
2803        buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
2804        return 0;
2805}
2806
2807/* Helper function for writing quotas on sync - we need to start transaction before quota file
2808 * is locked for write. Otherwise the are possible deadlocks:
2809 * Process 1                         Process 2
2810 * ext3_create()                     quota_sync()
2811 *   journal_start()                   write_dquot()
2812 *   dquot_initialize()                       down(dqio_mutex)
2813 *     down(dqio_mutex)                    journal_start()
2814 *
2815 */
2816
2817#ifdef CONFIG_QUOTA
2818
2819static inline struct inode *dquot_to_inode(struct dquot *dquot)
2820{
2821        return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
2822}
2823
2824static int ext3_write_dquot(struct dquot *dquot)
2825{
2826        int ret, err;
2827        handle_t *handle;
2828        struct inode *inode;
2829
2830        inode = dquot_to_inode(dquot);
2831        handle = ext3_journal_start(inode,
2832                                        EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
2833        if (IS_ERR(handle))
2834                return PTR_ERR(handle);
2835        ret = dquot_commit(dquot);
2836        err = ext3_journal_stop(handle);
2837        if (!ret)
2838                ret = err;
2839        return ret;
2840}
2841
2842static int ext3_acquire_dquot(struct dquot *dquot)
2843{
2844        int ret, err;
2845        handle_t *handle;
2846
2847        handle = ext3_journal_start(dquot_to_inode(dquot),
2848                                        EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb));
2849        if (IS_ERR(handle))
2850                return PTR_ERR(handle);
2851        ret = dquot_acquire(dquot);
2852        err = ext3_journal_stop(handle);
2853        if (!ret)
2854                ret = err;
2855        return ret;
2856}
2857
2858static int ext3_release_dquot(struct dquot *dquot)
2859{
2860        int ret, err;
2861        handle_t *handle;
2862
2863        handle = ext3_journal_start(dquot_to_inode(dquot),
2864                                        EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
2865        if (IS_ERR(handle)) {
2866                /* Release dquot anyway to avoid endless cycle in dqput() */
2867                dquot_release(dquot);
2868                return PTR_ERR(handle);
2869        }
2870        ret = dquot_release(dquot);
2871        err = ext3_journal_stop(handle);
2872        if (!ret)
2873                ret = err;
2874        return ret;
2875}
2876
2877static int ext3_mark_dquot_dirty(struct dquot *dquot)
2878{
2879        /* Are we journaling quotas? */
2880        if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
2881            EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
2882                dquot_mark_dquot_dirty(dquot);
2883                return ext3_write_dquot(dquot);
2884        } else {
2885                return dquot_mark_dquot_dirty(dquot);
2886        }
2887}
2888
2889static int ext3_write_info(struct super_block *sb, int type)
2890{
2891        int ret, err;
2892        handle_t *handle;
2893
2894        /* Data block + inode block */
2895        handle = ext3_journal_start(sb->s_root->d_inode, 2);
2896        if (IS_ERR(handle))
2897                return PTR_ERR(handle);
2898        ret = dquot_commit_info(sb, type);
2899        err = ext3_journal_stop(handle);
2900        if (!ret)
2901                ret = err;
2902        return ret;
2903}
2904
2905/*
2906 * Turn on quotas during mount time - we need to find
2907 * the quota file and such...
2908 */
2909static int ext3_quota_on_mount(struct super_block *sb, int type)
2910{
2911        return dquot_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
2912                                        EXT3_SB(sb)->s_jquota_fmt, type);
2913}
2914
2915/*
2916 * Standard function to be called on quota_on
2917 */
2918static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2919                         struct path *path)
2920{
2921        int err;
2922
2923        if (!test_opt(sb, QUOTA))
2924                return -EINVAL;
2925
2926        /* Quotafile not on the same filesystem? */
2927        if (path->dentry->d_sb != sb)
2928                return -EXDEV;
2929        /* Journaling quota? */
2930        if (EXT3_SB(sb)->s_qf_names[type]) {
2931                /* Quotafile not of fs root? */
2932                if (path->dentry->d_parent != sb->s_root)
2933                        ext3_msg(sb, KERN_WARNING,
2934                                "warning: Quota file not on filesystem root. "
2935                                "Journaled quota will not work.");
2936        }
2937
2938        /*
2939         * When we journal data on quota file, we have to flush journal to see
2940         * all updates to the file when we bypass pagecache...
2941         */
2942        if (ext3_should_journal_data(path->dentry->d_inode)) {
2943                /*
2944                 * We don't need to lock updates but journal_flush() could
2945                 * otherwise be livelocked...
2946                 */
2947                journal_lock_updates(EXT3_SB(sb)->s_journal);
2948                err = journal_flush(EXT3_SB(sb)->s_journal);
2949                journal_unlock_updates(EXT3_SB(sb)->s_journal);
2950                if (err)
2951                        return err;
2952        }
2953
2954        return dquot_quota_on(sb, type, format_id, path);
2955}
2956
2957/* Read data from quotafile - avoid pagecache and such because we cannot afford
2958 * acquiring the locks... As quota files are never truncated and quota code
2959 * itself serializes the operations (and no one else should touch the files)
2960 * we don't have to be afraid of races */
2961static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
2962                               size_t len, loff_t off)
2963{
2964        struct inode *inode = sb_dqopt(sb)->files[type];
2965        sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
2966        int err = 0;
2967        int offset = off & (sb->s_blocksize - 1);
2968        int tocopy;
2969        size_t toread;
2970        struct buffer_head *bh;
2971        loff_t i_size = i_size_read(inode);
2972
2973        if (off > i_size)
2974                return 0;
2975        if (off+len > i_size)
2976                len = i_size-off;
2977        toread = len;
2978        while (toread > 0) {
2979                tocopy = sb->s_blocksize - offset < toread ?
2980                                sb->s_blocksize - offset : toread;
2981                bh = ext3_bread(NULL, inode, blk, 0, &err);
2982                if (err)
2983                        return err;
2984                if (!bh)        /* A hole? */
2985                        memset(data, 0, tocopy);
2986                else
2987                        memcpy(data, bh->b_data+offset, tocopy);
2988                brelse(bh);
2989                offset = 0;
2990                toread -= tocopy;
2991                data += tocopy;
2992                blk++;
2993        }
2994        return len;
2995}
2996
2997/* Write to quotafile (we know the transaction is already started and has
2998 * enough credits) */
2999static ssize_t ext3_quota_write(struct super_block *sb, int type,
3000                                const char *data, size_t len, loff_t off)
3001{
3002        struct inode *inode = sb_dqopt(sb)->files[type];
3003        sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
3004        int err = 0;
3005        int offset = off & (sb->s_blocksize - 1);
3006        int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL;
3007        struct buffer_head *bh;
3008        handle_t *handle = journal_current_handle();
3009
3010        if (!handle) {
3011                ext3_msg(sb, KERN_WARNING,
3012                        "warning: quota write (off=%llu, len=%llu)"
3013                        " cancelled because transaction is not started.",
3014                        (unsigned long long)off, (unsigned long long)len);
3015                return -EIO;
3016        }
3017
3018        /*
3019         * Since we account only one data block in transaction credits,
3020         * then it is impossible to cross a block boundary.
3021         */
3022        if (sb->s_blocksize - offset < len) {
3023                ext3_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
3024                        " cancelled because not block aligned",
3025                        (unsigned long long)off, (unsigned long long)len);
3026                return -EIO;
3027        }
3028        bh = ext3_bread(handle, inode, blk, 1, &err);
3029        if (!bh)
3030                goto out;
3031        if (journal_quota) {
3032                err = ext3_journal_get_write_access(handle, bh);
3033                if (err) {
3034                        brelse(bh);
3035                        goto out;
3036                }
3037        }
3038        lock_buffer(bh);
3039        memcpy(bh->b_data+offset, data, len);
3040        flush_dcache_page(bh->b_page);
3041        unlock_buffer(bh);
3042        if (journal_quota)
3043                err = ext3_journal_dirty_metadata(handle, bh);
3044        else {
3045                /* Always do at least ordered writes for quotas */
3046                err = ext3_journal_dirty_data(handle, bh);
3047                mark_buffer_dirty(bh);
3048        }
3049        brelse(bh);
3050out:
3051        if (err)
3052                return err;
3053        if (inode->i_size < off + len) {
3054                i_size_write(inode, off + len);
3055                EXT3_I(inode)->i_disksize = inode->i_size;
3056        }
3057        inode->i_version++;
3058        inode->i_mtime = inode->i_ctime = CURRENT_TIME;
3059        ext3_mark_inode_dirty(handle, inode);
3060        return len;
3061}
3062
3063#endif
3064
3065static struct dentry *ext3_mount(struct file_system_type *fs_type,
3066        int flags, const char *dev_name, void *data)
3067{
3068        return mount_bdev(fs_type, flags, dev_name, data, ext3_fill_super);
3069}
3070
3071static struct file_system_type ext3_fs_type = {
3072        .owner          = THIS_MODULE,
3073        .name           = "ext3",
3074        .mount          = ext3_mount,
3075        .kill_sb        = kill_block_super,
3076        .fs_flags       = FS_REQUIRES_DEV,
3077};
3078MODULE_ALIAS_FS("ext3");
3079
3080static int __init init_ext3_fs(void)
3081{
3082        int err = init_ext3_xattr();
3083        if (err)
3084                return err;
3085        err = init_inodecache();
3086        if (err)
3087                goto out1;
3088        err = register_filesystem(&ext3_fs_type);
3089        if (err)
3090                goto out;
3091        return 0;
3092out:
3093        destroy_inodecache();
3094out1:
3095        exit_ext3_xattr();
3096        return err;
3097}
3098
3099static void __exit exit_ext3_fs(void)
3100{
3101        unregister_filesystem(&ext3_fs_type);
3102        destroy_inodecache();
3103        exit_ext3_xattr();
3104}
3105
3106MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
3107MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
3108MODULE_LICENSE("GPL");
3109module_init(init_ext3_fs)
3110module_exit(exit_ext3_fs)
3111