LXR linux/fs/ext4/super.c

   1/*
   2 *  linux/fs/ext4/super.c
   3 *
   4 * Copyright (C) 1992, 1993, 1994, 1995
   5 * Remy Card (card@masi.ibp.fr)
   6 * Laboratoire MASI - Institut Blaise Pascal
   7 * Universite Pierre et Marie Curie (Paris VI)
   8 *
   9 *  from
  10 *
  11 *  linux/fs/minix/inode.c
  12 *
  13 *  Copyright (C) 1991, 1992  Linus Torvalds
  14 *
  15 *  Big-endian to little-endian byte-swapping/bitmaps by
  16 *        David S. Miller (davem@caip.rutgers.edu), 1995
  17 */
  18
  19#include <linux/module.h>
  20#include <linux/string.h>
  21#include <linux/fs.h>
  22#include <linux/time.h>
  23#include <linux/vmalloc.h>
  24#include <linux/jbd2.h>
  25#include <linux/slab.h>
  26#include <linux/init.h>
  27#include <linux/blkdev.h>
  28#include <linux/parser.h>
  29#include <linux/smp_lock.h>
  30#include <linux/buffer_head.h>
  31#include <linux/exportfs.h>
  32#include <linux/vfs.h>
  33#include <linux/random.h>
  34#include <linux/mount.h>
  35#include <linux/namei.h>
  36#include <linux/quotaops.h>
  37#include <linux/seq_file.h>
  38#include <linux/proc_fs.h>
  39#include <linux/ctype.h>
  40#include <linux/log2.h>
  41#include <linux/crc16.h>
  42#include <asm/uaccess.h>
  43
  44#include "ext4.h"
  45#include "ext4_jbd2.h"
  46#include "xattr.h"
  47#include "acl.h"
  48#include "mballoc.h"
  49
  50#define CREATE_TRACE_POINTS
  51#include <trace/events/ext4.h>
  52
  53struct proc_dir_entry *ext4_proc_root;
  54static struct kset *ext4_kset;
  55
  56static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
  57                             unsigned long journal_devnum);
  58static int ext4_commit_super(struct super_block *sb, int sync);
  59static void ext4_mark_recovery_complete(struct super_block *sb,
  60                                        struct ext4_super_block *es);
  61static void ext4_clear_journal_err(struct super_block *sb,
  62                                   struct ext4_super_block *es);
  63static int ext4_sync_fs(struct super_block *sb, int wait);
  64static const char *ext4_decode_error(struct super_block *sb, int errno,
  65                                     char nbuf[16]);
  66static int ext4_remount(struct super_block *sb, int *flags, char *data);
  67static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
  68static int ext4_unfreeze(struct super_block *sb);
  69static void ext4_write_super(struct super_block *sb);
  70static int ext4_freeze(struct super_block *sb);
  71
  72
  73ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
  74                               struct ext4_group_desc *bg)
  75{
  76        return le32_to_cpu(bg->bg_block_bitmap_lo) |
  77                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
  78                 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
  79}
  80
  81ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
  82                               struct ext4_group_desc *bg)
  83{
  84        return le32_to_cpu(bg->bg_inode_bitmap_lo) |
  85                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
  86                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
  87}
  88
  89ext4_fsblk_t ext4_inode_table(struct super_block *sb,
  90                              struct ext4_group_desc *bg)
  91{
  92        return le32_to_cpu(bg->bg_inode_table_lo) |
  93                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
  94                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
  95}
  96
  97__u32 ext4_free_blks_count(struct super_block *sb,
  98                              struct ext4_group_desc *bg)
  99{
 100        return le16_to_cpu(bg->bg_free_blocks_count_lo) |
 101                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 102                 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
 103}
 104
 105__u32 ext4_free_inodes_count(struct super_block *sb,
 106                              struct ext4_group_desc *bg)
 107{
 108        return le16_to_cpu(bg->bg_free_inodes_count_lo) |
 109                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 110                 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
 111}
 112
 113__u32 ext4_used_dirs_count(struct super_block *sb,
 114                              struct ext4_group_desc *bg)
 115{
 116        return le16_to_cpu(bg->bg_used_dirs_count_lo) |
 117                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 118                 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
 119}
 120
 121__u32 ext4_itable_unused_count(struct super_block *sb,
 122                              struct ext4_group_desc *bg)
 123{
 124        return le16_to_cpu(bg->bg_itable_unused_lo) |
 125                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 126                 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
 127}
 128
 129void ext4_block_bitmap_set(struct super_block *sb,
 130                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 131{
 132        bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
 133        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 134                bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
 135}
 136
 137void ext4_inode_bitmap_set(struct super_block *sb,
 138                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 139{
 140        bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
 141        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 142                bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
 143}
 144
 145void ext4_inode_table_set(struct super_block *sb,
 146                          struct ext4_group_desc *bg, ext4_fsblk_t blk)
 147{
 148        bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
 149        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 150                bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
 151}
 152
 153void ext4_free_blks_set(struct super_block *sb,
 154                          struct ext4_group_desc *bg, __u32 count)
 155{
 156        bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
 157        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 158                bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
 159}
 160
 161void ext4_free_inodes_set(struct super_block *sb,
 162                          struct ext4_group_desc *bg, __u32 count)
 163{
 164        bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
 165        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 166                bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
 167}
 168
 169void ext4_used_dirs_set(struct super_block *sb,
 170                          struct ext4_group_desc *bg, __u32 count)
 171{
 172        bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
 173        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 174                bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
 175}
 176
 177void ext4_itable_unused_set(struct super_block *sb,
 178                          struct ext4_group_desc *bg, __u32 count)
 179{
 180        bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
 181        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 182                bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
 183}
 184
 185
 186/* Just increment the non-pointer handle value */
 187static handle_t *ext4_get_nojournal(void)
 188{
 189        handle_t *handle = current->journal_info;
 190        unsigned long ref_cnt = (unsigned long)handle;
 191
 192        BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT);
 193
 194        ref_cnt++;
 195        handle = (handle_t *)ref_cnt;
 196
 197        current->journal_info = handle;
 198        return handle;
 199}
 200
 201
 202/* Decrement the non-pointer handle value */
 203static void ext4_put_nojournal(handle_t *handle)
 204{
 205        unsigned long ref_cnt = (unsigned long)handle;
 206
 207        BUG_ON(ref_cnt == 0);
 208
 209        ref_cnt--;
 210        handle = (handle_t *)ref_cnt;
 211
 212        current->journal_info = handle;
 213}
 214
 215/*
 216 * Wrappers for jbd2_journal_start/end.
 217 *
 218 * The only special thing we need to do here is to make sure that all
 219 * journal_end calls result in the superblock being marked dirty, so
 220 * that sync() will call the filesystem's write_super callback if
 221 * appropriate.
 222 */
 223handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
 224{
 225        journal_t *journal;
 226
 227        if (sb->s_flags & MS_RDONLY)
 228                return ERR_PTR(-EROFS);
 229
 230        /* Special case here: if the journal has aborted behind our
 231         * backs (eg. EIO in the commit thread), then we still need to
 232         * take the FS itself readonly cleanly. */
 233        journal = EXT4_SB(sb)->s_journal;
 234        if (journal) {
 235                if (is_journal_aborted(journal)) {
 236                        ext4_abort(sb, __func__, "Detected aborted journal");
 237                        return ERR_PTR(-EROFS);
 238                }
 239                return jbd2_journal_start(journal, nblocks);
 240        }
 241        return ext4_get_nojournal();
 242}
 243
 244/*
 245 * The only special thing we need to do here is to make sure that all
 246 * jbd2_journal_stop calls result in the superblock being marked dirty, so
 247 * that sync() will call the filesystem's write_super callback if
 248 * appropriate.
 249 */
 250int __ext4_journal_stop(const char *where, handle_t *handle)
 251{
 252        struct super_block *sb;
 253        int err;
 254        int rc;
 255
 256        if (!ext4_handle_valid(handle)) {
 257                ext4_put_nojournal(handle);
 258                return 0;
 259        }
 260        sb = handle->h_transaction->t_journal->j_private;
 261        err = handle->h_err;
 262        rc = jbd2_journal_stop(handle);
 263
 264        if (!err)
 265                err = rc;
 266        if (err)
 267                __ext4_std_error(sb, where, err);
 268        return err;
 269}
 270
 271void ext4_journal_abort_handle(const char *caller, const char *err_fn,
 272                struct buffer_head *bh, handle_t *handle, int err)
 273{
 274        char nbuf[16];
 275        const char *errstr = ext4_decode_error(NULL, err, nbuf);
 276
 277        BUG_ON(!ext4_handle_valid(handle));
 278
 279        if (bh)
 280                BUFFER_TRACE(bh, "abort");
 281
 282        if (!handle->h_err)
 283                handle->h_err = err;
 284
 285        if (is_handle_aborted(handle))
 286                return;
 287
 288        printk(KERN_ERR "%s: aborting transaction: %s in %s\n",
 289               caller, errstr, err_fn);
 290
 291        jbd2_journal_abort_handle(handle);
 292}
 293
 294/* Deal with the reporting of failure conditions on a filesystem such as
 295 * inconsistencies detected or read IO failures.
 296 *
 297 * On ext2, we can store the error state of the filesystem in the
 298 * superblock.  That is not possible on ext4, because we may have other
 299 * write ordering constraints on the superblock which prevent us from
 300 * writing it out straight away; and given that the journal is about to
 301 * be aborted, we can't rely on the current, or future, transactions to
 302 * write out the superblock safely.
 303 *
 304 * We'll just use the jbd2_journal_abort() error code to record an error in
 305 * the journal instead.  On recovery, the journal will compain about
 306 * that error until we've noted it down and cleared it.
 307 */
 308
 309static void ext4_handle_error(struct super_block *sb)
 310{
 311        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 312
 313        EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 314        es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
 315
 316        if (sb->s_flags & MS_RDONLY)
 317                return;
 318
 319        if (!test_opt(sb, ERRORS_CONT)) {
 320                journal_t *journal = EXT4_SB(sb)->s_journal;
 321
 322                EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
 323                if (journal)
 324                        jbd2_journal_abort(journal, -EIO);
 325        }
 326        if (test_opt(sb, ERRORS_RO)) {
 327                ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 328                sb->s_flags |= MS_RDONLY;
 329        }
 330        ext4_commit_super(sb, 1);
 331        if (test_opt(sb, ERRORS_PANIC))
 332                panic("EXT4-fs (device %s): panic forced after error\n",
 333                        sb->s_id);
 334}
 335
 336void ext4_error(struct super_block *sb, const char *function,
 337                const char *fmt, ...)
 338{
 339        va_list args;
 340
 341        va_start(args, fmt);
 342        printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
 343        vprintk(fmt, args);
 344        printk("\n");
 345        va_end(args);
 346
 347        ext4_handle_error(sb);
 348}
 349
 350static const char *ext4_decode_error(struct super_block *sb, int errno,
 351                                     char nbuf[16])
 352{
 353        char *errstr = NULL;
 354
 355        switch (errno) {
 356        case -EIO:
 357                errstr = "IO failure";
 358                break;
 359        case -ENOMEM:
 360                errstr = "Out of memory";
 361                break;
 362        case -EROFS:
 363                if (!sb || (EXT4_SB(sb)->s_journal &&
 364                            EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
 365                        errstr = "Journal has aborted";
 366                else
 367                        errstr = "Readonly filesystem";
 368                break;
 369        default:
 370                /* If the caller passed in an extra buffer for unknown
 371                 * errors, textualise them now.  Else we just return
 372                 * NULL. */
 373                if (nbuf) {
 374                        /* Check for truncated error codes... */
 375                        if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
 376                                errstr = nbuf;
 377                }
 378                break;
 379        }
 380
 381        return errstr;
 382}
 383
 384/* __ext4_std_error decodes expected errors from journaling functions
 385 * automatically and invokes the appropriate error response.  */
 386
 387void __ext4_std_error(struct super_block *sb, const char *function, int errno)
 388{
 389        char nbuf[16];
 390        const char *errstr;
 391
 392        /* Special case: if the error is EROFS, and we're not already
 393         * inside a transaction, then there's really no point in logging
 394         * an error. */
 395        if (errno == -EROFS && journal_current_handle() == NULL &&
 396            (sb->s_flags & MS_RDONLY))
 397                return;
 398
 399        errstr = ext4_decode_error(sb, errno, nbuf);
 400        printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
 401               sb->s_id, function, errstr);
 402
 403        ext4_handle_error(sb);
 404}
 405
 406/*
 407 * ext4_abort is a much stronger failure handler than ext4_error.  The
 408 * abort function may be used to deal with unrecoverable failures such
 409 * as journal IO errors or ENOMEM at a critical moment in log management.
 410 *
 411 * We unconditionally force the filesystem into an ABORT|READONLY state,
 412 * unless the error response on the fs has been set to panic in which
 413 * case we take the easy way out and panic immediately.
 414 */
 415
 416void ext4_abort(struct super_block *sb, const char *function,
 417                const char *fmt, ...)
 418{
 419        va_list args;
 420
 421        va_start(args, fmt);
 422        printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
 423        vprintk(fmt, args);
 424        printk("\n");
 425        va_end(args);
 426
 427        if (test_opt(sb, ERRORS_PANIC))
 428                panic("EXT4-fs panic from previous error\n");
 429
 430        if (sb->s_flags & MS_RDONLY)
 431                return;
 432
 433        ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 434        EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 435        sb->s_flags |= MS_RDONLY;
 436        EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
 437        if (EXT4_SB(sb)->s_journal)
 438                jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 439}
 440
 441void ext4_msg (struct super_block * sb, const char *prefix,
 442                   const char *fmt, ...)
 443{
 444        va_list args;
 445
 446        va_start(args, fmt);
 447        printk("%sEXT4-fs (%s): ", prefix, sb->s_id);
 448        vprintk(fmt, args);
 449        printk("\n");
 450        va_end(args);
 451}
 452
 453void ext4_warning(struct super_block *sb, const char *function,
 454                  const char *fmt, ...)
 455{
 456        va_list args;
 457
 458        va_start(args, fmt);
 459        printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ",
 460               sb->s_id, function);
 461        vprintk(fmt, args);
 462        printk("\n");
 463        va_end(args);
 464}
 465
 466void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
 467                           const char *function, const char *fmt, ...)
 468__releases(bitlock)
 469__acquires(bitlock)
 470{
 471        va_list args;
 472        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 473
 474        va_start(args, fmt);
 475        printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
 476        vprintk(fmt, args);
 477        printk("\n");
 478        va_end(args);
 479
 480        if (test_opt(sb, ERRORS_CONT)) {
 481                EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 482                es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
 483                ext4_commit_super(sb, 0);
 484                return;
 485        }
 486        ext4_unlock_group(sb, grp);
 487        ext4_handle_error(sb);
 488        /*
 489         * We only get here in the ERRORS_RO case; relocking the group
 490         * may be dangerous, but nothing bad will happen since the
 491         * filesystem will have already been marked read/only and the
 492         * journal has been aborted.  We return 1 as a hint to callers
 493         * who might what to use the return value from
 494         * ext4_grp_locked_error() to distinguish beween the
 495         * ERRORS_CONT and ERRORS_RO case, and perhaps return more
 496         * aggressively from the ext4 function in question, with a
 497         * more appropriate error code.
 498         */
 499        ext4_lock_group(sb, grp);
 500        return;
 501}
 502
 503void ext4_update_dynamic_rev(struct super_block *sb)
 504{
 505        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 506
 507        if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
 508                return;
 509
 510        ext4_warning(sb, __func__,
 511                     "updating to rev %d because of new feature flag, "
 512                     "running e2fsck is recommended",
 513                     EXT4_DYNAMIC_REV);
 514
 515        es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
 516        es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
 517        es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
 518        /* leave es->s_feature_*compat flags alone */
 519        /* es->s_uuid will be set by e2fsck if empty */
 520
 521        /*
 522         * The rest of the superblock fields should be zero, and if not it
 523         * means they are likely already in use, so leave them alone.  We
 524         * can leave it up to e2fsck to clean up any inconsistencies there.
 525         */
 526}
 527
 528/*
 529 * Open the external journal device
 530 */
 531static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
 532{
 533        struct block_device *bdev;
 534        char b[BDEVNAME_SIZE];
 535
 536        bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
 537        if (IS_ERR(bdev))
 538                goto fail;
 539        return bdev;
 540
 541fail:
 542        ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
 543                        __bdevname(dev, b), PTR_ERR(bdev));
 544        return NULL;
 545}
 546
 547/*
 548 * Release the journal device
 549 */
 550static int ext4_blkdev_put(struct block_device *bdev)
 551{
 552        bd_release(bdev);
 553        return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
 554}
 555
 556static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
 557{
 558        struct block_device *bdev;
 559        int ret = -ENODEV;
 560
 561        bdev = sbi->journal_bdev;
 562        if (bdev) {
 563                ret = ext4_blkdev_put(bdev);
 564                sbi->journal_bdev = NULL;
 565        }
 566        return ret;
 567}
 568
 569static inline struct inode *orphan_list_entry(struct list_head *l)
 570{
 571        return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
 572}
 573
 574static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
 575{
 576        struct list_head *l;
 577
 578        ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
 579                 le32_to_cpu(sbi->s_es->s_last_orphan));
 580
 581        printk(KERN_ERR "sb_info orphan list:\n");
 582        list_for_each(l, &sbi->s_orphan) {
 583                struct inode *inode = orphan_list_entry(l);
 584                printk(KERN_ERR "  "
 585                       "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
 586                       inode->i_sb->s_id, inode->i_ino, inode,
 587                       inode->i_mode, inode->i_nlink,
 588                       NEXT_ORPHAN(inode));
 589        }
 590}
 591
 592static void ext4_put_super(struct super_block *sb)
 593{
 594        struct ext4_sb_info *sbi = EXT4_SB(sb);
 595        struct ext4_super_block *es = sbi->s_es;
 596        int i, err;
 597
 598        flush_workqueue(sbi->dio_unwritten_wq);
 599        destroy_workqueue(sbi->dio_unwritten_wq);
 600
 601        lock_super(sb);
 602        lock_kernel();
 603        if (sb->s_dirt)
 604                ext4_commit_super(sb, 1);
 605
 606        ext4_release_system_zone(sb);
 607        ext4_mb_release(sb);
 608        ext4_ext_release(sb);
 609        ext4_xattr_put_super(sb);
 610        if (sbi->s_journal) {
 611                err = jbd2_journal_destroy(sbi->s_journal);
 612                sbi->s_journal = NULL;
 613                if (err < 0)
 614                        ext4_abort(sb, __func__,
 615                                   "Couldn't clean up the journal");
 616        }
 617        if (!(sb->s_flags & MS_RDONLY)) {
 618                EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 619                es->s_state = cpu_to_le16(sbi->s_mount_state);
 620                ext4_commit_super(sb, 1);
 621        }
 622        if (sbi->s_proc) {
 623                remove_proc_entry(sb->s_id, ext4_proc_root);
 624        }
 625        kobject_del(&sbi->s_kobj);
 626
 627        for (i = 0; i < sbi->s_gdb_count; i++)
 628                brelse(sbi->s_group_desc[i]);
 629        kfree(sbi->s_group_desc);
 630        if (is_vmalloc_addr(sbi->s_flex_groups))
 631                vfree(sbi->s_flex_groups);
 632        else
 633                kfree(sbi->s_flex_groups);
 634        percpu_counter_destroy(&sbi->s_freeblocks_counter);
 635        percpu_counter_destroy(&sbi->s_freeinodes_counter);
 636        percpu_counter_destroy(&sbi->s_dirs_counter);
 637        percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
 638        brelse(sbi->s_sbh);
 639#ifdef CONFIG_QUOTA
 640        for (i = 0; i < MAXQUOTAS; i++)
 641                kfree(sbi->s_qf_names[i]);
 642#endif
 643
 644        /* Debugging code just in case the in-memory inode orphan list
 645         * isn't empty.  The on-disk one can be non-empty if we've
 646         * detected an error and taken the fs readonly, but the
 647         * in-memory list had better be clean by this point. */
 648        if (!list_empty(&sbi->s_orphan))
 649                dump_orphan_list(sb, sbi);
 650        J_ASSERT(list_empty(&sbi->s_orphan));
 651
 652        invalidate_bdev(sb->s_bdev);
 653        if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
 654                /*
 655                 * Invalidate the journal device's buffers.  We don't want them
 656                 * floating about in memory - the physical journal device may
 657                 * hotswapped, and it breaks the `ro-after' testing code.
 658                 */
 659                sync_blockdev(sbi->journal_bdev);
 660                invalidate_bdev(sbi->journal_bdev);
 661                ext4_blkdev_remove(sbi);
 662        }
 663        sb->s_fs_info = NULL;
 664        /*
 665         * Now that we are completely done shutting down the
 666         * superblock, we need to actually destroy the kobject.
 667         */
 668        unlock_kernel();
 669        unlock_super(sb);
 670        kobject_put(&sbi->s_kobj);
 671        wait_for_completion(&sbi->s_kobj_unregister);
 672        kfree(sbi->s_blockgroup_lock);
 673        kfree(sbi);
 674}
 675
 676static struct kmem_cache *ext4_inode_cachep;
 677
 678/*
 679 * Called inside transaction, so use GFP_NOFS
 680 */
 681static struct inode *ext4_alloc_inode(struct super_block *sb)
 682{
 683        struct ext4_inode_info *ei;
 684
 685        ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
 686        if (!ei)
 687                return NULL;
 688
 689        ei->vfs_inode.i_version = 1;
 690        ei->vfs_inode.i_data.writeback_index = 0;
 691        memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
 692        INIT_LIST_HEAD(&ei->i_prealloc_list);
 693        spin_lock_init(&ei->i_prealloc_lock);
 694        /*
 695         * Note:  We can be called before EXT4_SB(sb)->s_journal is set,
 696         * therefore it can be null here.  Don't check it, just initialize
 697         * jinode.
 698         */
 699        jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
 700        ei->i_reserved_data_blocks = 0;
 701        ei->i_reserved_meta_blocks = 0;
 702        ei->i_allocated_meta_blocks = 0;
 703        ei->i_delalloc_reserved_flag = 0;
 704        spin_lock_init(&(ei->i_block_reservation_lock));
 705        INIT_LIST_HEAD(&ei->i_aio_dio_complete_list);
 706        ei->cur_aio_dio = NULL;
 707
 708        return &ei->vfs_inode;
 709}
 710
 711static void ext4_destroy_inode(struct inode *inode)
 712{
 713        if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
 714                ext4_msg(inode->i_sb, KERN_ERR,
 715                         "Inode %lu (%p): orphan list check failed!",
 716                         inode->i_ino, EXT4_I(inode));
 717                print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
 718                                EXT4_I(inode), sizeof(struct ext4_inode_info),
 719                                true);
 720                dump_stack();
 721        }
 722        kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
 723}
 724
 725static void init_once(void *foo)
 726{
 727        struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
 728
 729        INIT_LIST_HEAD(&ei->i_orphan);
 730#ifdef CONFIG_EXT4_FS_XATTR
 731        init_rwsem(&ei->xattr_sem);
 732#endif
 733        init_rwsem(&ei->i_data_sem);
 734        inode_init_once(&ei->vfs_inode);
 735}
 736
 737static int init_inodecache(void)
 738{
 739        ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
 740                                             sizeof(struct ext4_inode_info),
 741                                             0, (SLAB_RECLAIM_ACCOUNT|
 742                                                SLAB_MEM_SPREAD),
 743                                             init_once);
 744        if (ext4_inode_cachep == NULL)
 745                return -ENOMEM;
 746        return 0;
 747}
 748
 749static void destroy_inodecache(void)
 750{
 751        kmem_cache_destroy(ext4_inode_cachep);
 752}
 753
 754static void ext4_clear_inode(struct inode *inode)
 755{
 756        ext4_discard_preallocations(inode);
 757        if (EXT4_JOURNAL(inode))
 758                jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
 759                                       &EXT4_I(inode)->jinode);
 760}
 761
 762static inline void ext4_show_quota_options(struct seq_file *seq,
 763                                           struct super_block *sb)
 764{
 765#if defined(CONFIG_QUOTA)
 766        struct ext4_sb_info *sbi = EXT4_SB(sb);
 767
 768        if (sbi->s_jquota_fmt)
 769                seq_printf(seq, ",jqfmt=%s",
 770                (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0");
 771
 772        if (sbi->s_qf_names[USRQUOTA])
 773                seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
 774
 775        if (sbi->s_qf_names[GRPQUOTA])
 776                seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
 777
 778        if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA)
 779                seq_puts(seq, ",usrquota");
 780
 781        if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)
 782                seq_puts(seq, ",grpquota");
 783#endif
 784}
 785
 786/*
 787 * Show an option if
 788 *  - it's set to a non-default value OR
 789 *  - if the per-sb default is different from the global default
 790 */
 791static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
 792{
 793        int def_errors;
 794        unsigned long def_mount_opts;
 795        struct super_block *sb = vfs->mnt_sb;
 796        struct ext4_sb_info *sbi = EXT4_SB(sb);
 797        struct ext4_super_block *es = sbi->s_es;
 798
 799        def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
 800        def_errors     = le16_to_cpu(es->s_errors);
 801
 802        if (sbi->s_sb_block != 1)
 803                seq_printf(seq, ",sb=%llu", sbi->s_sb_block);
 804        if (test_opt(sb, MINIX_DF))
 805                seq_puts(seq, ",minixdf");
 806        if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS))
 807                seq_puts(seq, ",grpid");
 808        if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS))
 809                seq_puts(seq, ",nogrpid");
 810        if (sbi->s_resuid != EXT4_DEF_RESUID ||
 811            le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) {
 812                seq_printf(seq, ",resuid=%u", sbi->s_resuid);
 813        }
 814        if (sbi->s_resgid != EXT4_DEF_RESGID ||
 815            le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) {
 816                seq_printf(seq, ",resgid=%u", sbi->s_resgid);
 817        }
 818        if (test_opt(sb, ERRORS_RO)) {
 819                if (def_errors == EXT4_ERRORS_PANIC ||
 820                    def_errors == EXT4_ERRORS_CONTINUE) {
 821                        seq_puts(seq, ",errors=remount-ro");
 822                }
 823        }
 824        if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
 825                seq_puts(seq, ",errors=continue");
 826        if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
 827                seq_puts(seq, ",errors=panic");
 828        if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16))
 829                seq_puts(seq, ",nouid32");
 830        if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG))
 831                seq_puts(seq, ",debug");
 832        if (test_opt(sb, OLDALLOC))
 833                seq_puts(seq, ",oldalloc");
 834#ifdef CONFIG_EXT4_FS_XATTR
 835        if (test_opt(sb, XATTR_USER) &&
 836                !(def_mount_opts & EXT4_DEFM_XATTR_USER))
 837                seq_puts(seq, ",user_xattr");
 838        if (!test_opt(sb, XATTR_USER) &&
 839            (def_mount_opts & EXT4_DEFM_XATTR_USER)) {
 840                seq_puts(seq, ",nouser_xattr");
 841        }
 842#endif
 843#ifdef CONFIG_EXT4_FS_POSIX_ACL
 844        if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
 845                seq_puts(seq, ",acl");
 846        if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
 847                seq_puts(seq, ",noacl");
 848#endif
 849        if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
 850                seq_printf(seq, ",commit=%u",
 851                           (unsigned) (sbi->s_commit_interval / HZ));
 852        }
 853        if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) {
 854                seq_printf(seq, ",min_batch_time=%u",
 855                           (unsigned) sbi->s_min_batch_time);
 856        }
 857        if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
 858                seq_printf(seq, ",max_batch_time=%u",
 859                           (unsigned) sbi->s_min_batch_time);
 860        }
 861
 862        /*
 863         * We're changing the default of barrier mount option, so
 864         * let's always display its mount state so it's clear what its
 865         * status is.
 866         */
 867        seq_puts(seq, ",barrier=");
 868        seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
 869        if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
 870                seq_puts(seq, ",journal_async_commit");
 871        if (test_opt(sb, NOBH))
 872                seq_puts(seq, ",nobh");
 873        if (test_opt(sb, I_VERSION))
 874                seq_puts(seq, ",i_version");
 875        if (!test_opt(sb, DELALLOC))
 876                seq_puts(seq, ",nodelalloc");
 877
 878
 879        if (sbi->s_stripe)
 880                seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
 881        /*
 882         * journal mode get enabled in different ways
 883         * So just print the value even if we didn't specify it
 884         */
 885        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
 886                seq_puts(seq, ",data=journal");
 887        else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
 888                seq_puts(seq, ",data=ordered");
 889        else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
 890                seq_puts(seq, ",data=writeback");
 891
 892        if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
 893                seq_printf(seq, ",inode_readahead_blks=%u",
 894                           sbi->s_inode_readahead_blks);
 895
 896        if (test_opt(sb, DATA_ERR_ABORT))
 897                seq_puts(seq, ",data_err=abort");
 898
 899        if (test_opt(sb, NO_AUTO_DA_ALLOC))
 900                seq_puts(seq, ",noauto_da_alloc");
 901
 902        ext4_show_quota_options(seq, sb);
 903
 904        return 0;
 905}
 906
 907static struct inode *ext4_nfs_get_inode(struct super_block *sb,
 908                                        u64 ino, u32 generation)
 909{
 910        struct inode *inode;
 911
 912        if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
 913                return ERR_PTR(-ESTALE);
 914        if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
 915                return ERR_PTR(-ESTALE);
 916
 917        /* iget isn't really right if the inode is currently unallocated!!
 918         *
 919         * ext4_read_inode will return a bad_inode if the inode had been
 920         * deleted, so we should be safe.
 921         *
 922         * Currently we don't know the generation for parent directory, so
 923         * a generation of 0 means "accept any"
 924         */
 925        inode = ext4_iget(sb, ino);
 926        if (IS_ERR(inode))
 927                return ERR_CAST(inode);
 928        if (generation && inode->i_generation != generation) {
 929                iput(inode);
 930                return ERR_PTR(-ESTALE);
 931        }
 932
 933        return inode;
 934}
 935
 936static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
 937                                        int fh_len, int fh_type)
 938{
 939        return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
 940                                    ext4_nfs_get_inode);
 941}
 942
 943static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
 944                                        int fh_len, int fh_type)
 945{
 946        return generic_fh_to_parent(sb, fid, fh_len, fh_type,
 947                                    ext4_nfs_get_inode);
 948}
 949
 950/*
 951 * Try to release metadata pages (indirect blocks, directories) which are
 952 * mapped via the block device.  Since these pages could have journal heads
 953 * which would prevent try_to_free_buffers() from freeing them, we must use
 954 * jbd2 layer's try_to_free_buffers() function to release them.
 955 */
 956static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
 957                                 gfp_t wait)
 958{
 959        journal_t *journal = EXT4_SB(sb)->s_journal;
 960
 961        WARN_ON(PageChecked(page));
 962        if (!page_has_buffers(page))
 963                return 0;
 964        if (journal)
 965                return jbd2_journal_try_to_free_buffers(journal, page,
 966                                                        wait & ~__GFP_WAIT);
 967        return try_to_free_buffers(page);
 968}
 969
 970#ifdef CONFIG_QUOTA
 971#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
 972#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
 973
 974static int ext4_write_dquot(struct dquot *dquot);
 975static int ext4_acquire_dquot(struct dquot *dquot);
 976static int ext4_release_dquot(struct dquot *dquot);
 977static int ext4_mark_dquot_dirty(struct dquot *dquot);
 978static int ext4_write_info(struct super_block *sb, int type);
 979static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 980                                char *path, int remount);
 981static int ext4_quota_on_mount(struct super_block *sb, int type);
 982static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
 983                               size_t len, loff_t off);
 984static ssize_t ext4_quota_write(struct super_block *sb, int type,
 985                                const char *data, size_t len, loff_t off);
 986
 987static const struct dquot_operations ext4_quota_operations = {
 988        .initialize     = dquot_initialize,
 989        .drop           = dquot_drop,
 990        .alloc_space    = dquot_alloc_space,
 991        .reserve_space  = dquot_reserve_space,
 992        .claim_space    = dquot_claim_space,
 993        .release_rsv    = dquot_release_reserved_space,
 994        .get_reserved_space = ext4_get_reserved_space,
 995        .alloc_inode    = dquot_alloc_inode,
 996        .free_space     = dquot_free_space,
 997        .free_inode     = dquot_free_inode,
 998        .transfer       = dquot_transfer,
 999        .write_dquot    = ext4_write_dquot,
1000        .acquire_dquot  = ext4_acquire_dquot,

1001        .release_dquot  = ext4_release_dquot,
1002        .mark_dirty     = ext4_mark_dquot_dirty,
1003        .write_info     = ext4_write_info,
1004        .alloc_dquot    = dquot_alloc,
1005        .destroy_dquot  = dquot_destroy,
1006};
1007
1008static const struct quotactl_ops ext4_qctl_operations = {
1009        .quota_on       = ext4_quota_on,
1010        .quota_off      = vfs_quota_off,
1011        .quota_sync     = vfs_quota_sync,
1012        .get_info       = vfs_get_dqinfo,
1013        .set_info       = vfs_set_dqinfo,
1014        .get_dqblk      = vfs_get_dqblk,
1015        .set_dqblk      = vfs_set_dqblk
1016};
1017#endif
1018
1019static const struct super_operations ext4_sops = {
1020        .alloc_inode    = ext4_alloc_inode,
1021        .destroy_inode  = ext4_destroy_inode,
1022        .write_inode    = ext4_write_inode,
1023        .dirty_inode    = ext4_dirty_inode,
1024        .delete_inode   = ext4_delete_inode,
1025        .put_super      = ext4_put_super,
1026        .sync_fs        = ext4_sync_fs,
1027        .freeze_fs      = ext4_freeze,
1028        .unfreeze_fs    = ext4_unfreeze,
1029        .statfs         = ext4_statfs,
1030        .remount_fs     = ext4_remount,
1031        .clear_inode    = ext4_clear_inode,
1032        .show_options   = ext4_show_options,
1033#ifdef CONFIG_QUOTA
1034        .quota_read     = ext4_quota_read,
1035        .quota_write    = ext4_quota_write,
1036#endif
1037        .bdev_try_to_free_page = bdev_try_to_free_page,
1038};
1039
1040static const struct super_operations ext4_nojournal_sops = {
1041        .alloc_inode    = ext4_alloc_inode,
1042        .destroy_inode  = ext4_destroy_inode,
1043        .write_inode    = ext4_write_inode,
1044        .dirty_inode    = ext4_dirty_inode,
1045        .delete_inode   = ext4_delete_inode,
1046        .write_super    = ext4_write_super,
1047        .put_super      = ext4_put_super,
1048        .statfs         = ext4_statfs,
1049        .remount_fs     = ext4_remount,
1050        .clear_inode    = ext4_clear_inode,
1051        .show_options   = ext4_show_options,
1052#ifdef CONFIG_QUOTA
1053        .quota_read     = ext4_quota_read,
1054        .quota_write    = ext4_quota_write,
1055#endif
1056        .bdev_try_to_free_page = bdev_try_to_free_page,
1057};
1058
1059static const struct export_operations ext4_export_ops = {
1060        .fh_to_dentry = ext4_fh_to_dentry,
1061        .fh_to_parent = ext4_fh_to_parent,
1062        .get_parent = ext4_get_parent,
1063};
1064
1065enum {
1066        Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1067        Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
1068        Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
1069        Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
1070        Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh,
1071        Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
1072        Opt_journal_update, Opt_journal_dev,
1073        Opt_journal_checksum, Opt_journal_async_commit,
1074        Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1075        Opt_data_err_abort, Opt_data_err_ignore,
1076        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1077        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
1078        Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
1079        Opt_usrquota, Opt_grpquota, Opt_i_version,
1080        Opt_stripe, Opt_delalloc, Opt_nodelalloc,
1081        Opt_block_validity, Opt_noblock_validity,
1082        Opt_inode_readahead_blks, Opt_journal_ioprio
1083};
1084
1085static const match_table_t tokens = {
1086        {Opt_bsd_df, "bsddf"},
1087        {Opt_minix_df, "minixdf"},
1088        {Opt_grpid, "grpid"},
1089        {Opt_grpid, "bsdgroups"},
1090        {Opt_nogrpid, "nogrpid"},
1091        {Opt_nogrpid, "sysvgroups"},
1092        {Opt_resgid, "resgid=%u"},
1093        {Opt_resuid, "resuid=%u"},
1094        {Opt_sb, "sb=%u"},
1095        {Opt_err_cont, "errors=continue"},
1096        {Opt_err_panic, "errors=panic"},
1097        {Opt_err_ro, "errors=remount-ro"},
1098        {Opt_nouid32, "nouid32"},
1099        {Opt_debug, "debug"},
1100        {Opt_oldalloc, "oldalloc"},
1101        {Opt_orlov, "orlov"},
1102        {Opt_user_xattr, "user_xattr"},
1103        {Opt_nouser_xattr, "nouser_xattr"},
1104        {Opt_acl, "acl"},
1105        {Opt_noacl, "noacl"},
1106        {Opt_noload, "noload"},
1107        {Opt_nobh, "nobh"},
1108        {Opt_bh, "bh"},
1109        {Opt_commit, "commit=%u"},
1110        {Opt_min_batch_time, "min_batch_time=%u"},
1111        {Opt_max_batch_time, "max_batch_time=%u"},
1112        {Opt_journal_update, "journal=update"},
1113        {Opt_journal_dev, "journal_dev=%u"},
1114        {Opt_journal_checksum, "journal_checksum"},
1115        {Opt_journal_async_commit, "journal_async_commit"},
1116        {Opt_abort, "abort"},
1117        {Opt_data_journal, "data=journal"},
1118        {Opt_data_ordered, "data=ordered"},
1119        {Opt_data_writeback, "data=writeback"},
1120        {Opt_data_err_abort, "data_err=abort"},
1121        {Opt_data_err_ignore, "data_err=ignore"},
1122        {Opt_offusrjquota, "usrjquota="},
1123        {Opt_usrjquota, "usrjquota=%s"},
1124        {Opt_offgrpjquota, "grpjquota="},
1125        {Opt_grpjquota, "grpjquota=%s"},
1126        {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1127        {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1128        {Opt_grpquota, "grpquota"},
1129        {Opt_noquota, "noquota"},
1130        {Opt_quota, "quota"},
1131        {Opt_usrquota, "usrquota"},
1132        {Opt_barrier, "barrier=%u"},
1133        {Opt_barrier, "barrier"},
1134        {Opt_nobarrier, "nobarrier"},
1135        {Opt_i_version, "i_version"},
1136        {Opt_stripe, "stripe=%u"},
1137        {Opt_resize, "resize"},
1138        {Opt_delalloc, "delalloc"},
1139        {Opt_nodelalloc, "nodelalloc"},
1140        {Opt_block_validity, "block_validity"},
1141        {Opt_noblock_validity, "noblock_validity"},
1142        {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1143        {Opt_journal_ioprio, "journal_ioprio=%u"},
1144        {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1145        {Opt_auto_da_alloc, "auto_da_alloc"},
1146        {Opt_noauto_da_alloc, "noauto_da_alloc"},
1147        {Opt_err, NULL},
1148};
1149
1150static ext4_fsblk_t get_sb_block(void **data)
1151{
1152        ext4_fsblk_t    sb_block;
1153        char            *options = (char *) *data;
1154
1155        if (!options || strncmp(options, "sb=", 3) != 0)
1156                return 1;       /* Default location */
1157
1158        options += 3;
1159        /* TODO: use simple_strtoll with >32bit ext4 */
1160        sb_block = simple_strtoul(options, &options, 0);
1161        if (*options && *options != ',') {
1162                printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
1163                       (char *) *data);
1164                return 1;
1165        }
1166        if (*options == ',')
1167                options++;
1168        *data = (void *) options;
1169
1170        return sb_block;
1171}
1172
1173#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1174
1175static int parse_options(char *options, struct super_block *sb,
1176                         unsigned long *journal_devnum,
1177                         unsigned int *journal_ioprio,
1178                         ext4_fsblk_t *n_blocks_count, int is_remount)
1179{
1180        struct ext4_sb_info *sbi = EXT4_SB(sb);
1181        char *p;
1182        substring_t args[MAX_OPT_ARGS];
1183        int data_opt = 0;
1184        int option;
1185#ifdef CONFIG_QUOTA
1186        int qtype, qfmt;
1187        char *qname;
1188#endif
1189
1190        if (!options)
1191                return 1;
1192
1193        while ((p = strsep(&options, ",")) != NULL) {
1194                int token;
1195                if (!*p)
1196                        continue;
1197
1198                token = match_token(p, tokens, args);
1199                switch (token) {
1200                case Opt_bsd_df:
1201                        clear_opt(sbi->s_mount_opt, MINIX_DF);
1202                        break;
1203                case Opt_minix_df:
1204                        set_opt(sbi->s_mount_opt, MINIX_DF);
1205                        break;
1206                case Opt_grpid:
1207                        set_opt(sbi->s_mount_opt, GRPID);
1208                        break;
1209                case Opt_nogrpid:
1210                        clear_opt(sbi->s_mount_opt, GRPID);
1211                        break;
1212                case Opt_resuid:
1213                        if (match_int(&args[0], &option))
1214                                return 0;
1215                        sbi->s_resuid = option;
1216                        break;
1217                case Opt_resgid:
1218                        if (match_int(&args[0], &option))
1219                                return 0;
1220                        sbi->s_resgid = option;
1221                        break;
1222                case Opt_sb:
1223                        /* handled by get_sb_block() instead of here */
1224                        /* *sb_block = match_int(&args[0]); */
1225                        break;
1226                case Opt_err_panic:
1227                        clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1228                        clear_opt(sbi->s_mount_opt, ERRORS_RO);
1229                        set_opt(sbi->s_mount_opt, ERRORS_PANIC);
1230                        break;
1231                case Opt_err_ro:
1232                        clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1233                        clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1234                        set_opt(sbi->s_mount_opt, ERRORS_RO);
1235                        break;
1236                case Opt_err_cont:
1237                        clear_opt(sbi->s_mount_opt, ERRORS_RO);
1238                        clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1239                        set_opt(sbi->s_mount_opt, ERRORS_CONT);
1240                        break;
1241                case Opt_nouid32:
1242                        set_opt(sbi->s_mount_opt, NO_UID32);
1243                        break;
1244                case Opt_debug:
1245                        set_opt(sbi->s_mount_opt, DEBUG);
1246                        break;
1247                case Opt_oldalloc:
1248                        set_opt(sbi->s_mount_opt, OLDALLOC);
1249                        break;
1250                case Opt_orlov:
1251                        clear_opt(sbi->s_mount_opt, OLDALLOC);
1252                        break;
1253#ifdef CONFIG_EXT4_FS_XATTR
1254                case Opt_user_xattr:
1255                        set_opt(sbi->s_mount_opt, XATTR_USER);
1256                        break;
1257                case Opt_nouser_xattr:
1258                        clear_opt(sbi->s_mount_opt, XATTR_USER);
1259                        break;
1260#else
1261                case Opt_user_xattr:
1262                case Opt_nouser_xattr:
1263                        ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported");
1264                        break;
1265#endif
1266#ifdef CONFIG_EXT4_FS_POSIX_ACL
1267                case Opt_acl:
1268                        set_opt(sbi->s_mount_opt, POSIX_ACL);
1269                        break;
1270                case Opt_noacl:
1271                        clear_opt(sbi->s_mount_opt, POSIX_ACL);
1272                        break;
1273#else
1274                case Opt_acl:
1275                case Opt_noacl:
1276                        ext4_msg(sb, KERN_ERR, "(no)acl options not supported");
1277                        break;
1278#endif
1279                case Opt_journal_update:
1280                        /* @@@ FIXME */
1281                        /* Eventually we will want to be able to create
1282                           a journal file here.  For now, only allow the
1283                           user to specify an existing inode to be the
1284                           journal file. */
1285                        if (is_remount) {
1286                                ext4_msg(sb, KERN_ERR,
1287                                         "Cannot specify journal on remount");
1288                                return 0;
1289                        }
1290                        set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
1291                        break;
1292                case Opt_journal_dev:
1293                        if (is_remount) {
1294                                ext4_msg(sb, KERN_ERR,
1295                                        "Cannot specify journal on remount");
1296                                return 0;
1297                        }
1298                        if (match_int(&args[0], &option))
1299                                return 0;
1300                        *journal_devnum = option;
1301                        break;
1302                case Opt_journal_checksum:
1303                        set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1304                        break;
1305                case Opt_journal_async_commit:
1306                        set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT);
1307                        set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1308                        break;
1309                case Opt_noload:
1310                        set_opt(sbi->s_mount_opt, NOLOAD);
1311                        break;
1312                case Opt_commit:
1313                        if (match_int(&args[0], &option))
1314                                return 0;
1315                        if (option < 0)
1316                                return 0;
1317                        if (option == 0)
1318                                option = JBD2_DEFAULT_MAX_COMMIT_AGE;
1319                        sbi->s_commit_interval = HZ * option;
1320                        break;
1321                case Opt_max_batch_time:
1322                        if (match_int(&args[0], &option))
1323                                return 0;
1324                        if (option < 0)
1325                                return 0;
1326                        if (option == 0)
1327                                option = EXT4_DEF_MAX_BATCH_TIME;
1328                        sbi->s_max_batch_time = option;
1329                        break;
1330                case Opt_min_batch_time:
1331                        if (match_int(&args[0], &option))
1332                                return 0;
1333                        if (option < 0)
1334                                return 0;
1335                        sbi->s_min_batch_time = option;
1336                        break;
1337                case Opt_data_journal:
1338                        data_opt = EXT4_MOUNT_JOURNAL_DATA;
1339                        goto datacheck;
1340                case Opt_data_ordered:
1341                        data_opt = EXT4_MOUNT_ORDERED_DATA;
1342                        goto datacheck;
1343                case Opt_data_writeback:
1344                        data_opt = EXT4_MOUNT_WRITEBACK_DATA;
1345                datacheck:
1346                        if (is_remount) {
1347                                if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS)
1348                                                != data_opt) {
1349                                        ext4_msg(sb, KERN_ERR,
1350                                                "Cannot change data mode on remount");
1351                                        return 0;
1352                                }
1353                        } else {
1354                                sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS;
1355                                sbi->s_mount_opt |= data_opt;
1356                        }
1357                        break;
1358                case Opt_data_err_abort:
1359                        set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1360                        break;
1361                case Opt_data_err_ignore:
1362                        clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1363                        break;
1364#ifdef CONFIG_QUOTA
1365                case Opt_usrjquota:
1366                        qtype = USRQUOTA;
1367                        goto set_qf_name;
1368                case Opt_grpjquota:
1369                        qtype = GRPQUOTA;
1370set_qf_name:
1371                        if (sb_any_quota_loaded(sb) &&
1372                            !sbi->s_qf_names[qtype]) {
1373                                ext4_msg(sb, KERN_ERR,
1374                                       "Cannot change journaled "
1375                                       "quota options when quota turned on");
1376                                return 0;
1377                        }
1378                        qname = match_strdup(&args[0]);
1379                        if (!qname) {
1380                                ext4_msg(sb, KERN_ERR,
1381                                        "Not enough memory for "
1382                                        "storing quotafile name");
1383                                return 0;
1384                        }
1385                        if (sbi->s_qf_names[qtype] &&
1386                            strcmp(sbi->s_qf_names[qtype], qname)) {
1387                                ext4_msg(sb, KERN_ERR,
1388                                        "%s quota file already "
1389                                        "specified", QTYPE2NAME(qtype));
1390                                kfree(qname);
1391                                return 0;
1392                        }
1393                        sbi->s_qf_names[qtype] = qname;
1394                        if (strchr(sbi->s_qf_names[qtype], '/')) {
1395                                ext4_msg(sb, KERN_ERR,
1396                                        "quotafile must be on "
1397                                        "filesystem root");
1398                                kfree(sbi->s_qf_names[qtype]);
1399                                sbi->s_qf_names[qtype] = NULL;
1400                                return 0;
1401                        }
1402                        set_opt(sbi->s_mount_opt, QUOTA);
1403                        break;
1404                case Opt_offusrjquota:
1405                        qtype = USRQUOTA;
1406                        goto clear_qf_name;
1407                case Opt_offgrpjquota:
1408                        qtype = GRPQUOTA;
1409clear_qf_name:
1410                        if (sb_any_quota_loaded(sb) &&
1411                            sbi->s_qf_names[qtype]) {
1412                                ext4_msg(sb, KERN_ERR, "Cannot change "
1413                                        "journaled quota options when "
1414                                        "quota turned on");
1415                                return 0;
1416                        }
1417                        /*
1418                         * The space will be released later when all options
1419                         * are confirmed to be correct
1420                         */
1421                        sbi->s_qf_names[qtype] = NULL;
1422                        break;
1423                case Opt_jqfmt_vfsold:
1424                        qfmt = QFMT_VFS_OLD;
1425                        goto set_qf_format;
1426                case Opt_jqfmt_vfsv0:
1427                        qfmt = QFMT_VFS_V0;
1428set_qf_format:
1429                        if (sb_any_quota_loaded(sb) &&
1430                            sbi->s_jquota_fmt != qfmt) {
1431                                ext4_msg(sb, KERN_ERR, "Cannot change "
1432                                        "journaled quota options when "
1433                                        "quota turned on");
1434                                return 0;
1435                        }
1436                        sbi->s_jquota_fmt = qfmt;
1437                        break;
1438                case Opt_quota:
1439                case Opt_usrquota:
1440                        set_opt(sbi->s_mount_opt, QUOTA);
1441                        set_opt(sbi->s_mount_opt, USRQUOTA);
1442                        break;
1443                case Opt_grpquota:
1444                        set_opt(sbi->s_mount_opt, QUOTA);
1445                        set_opt(sbi->s_mount_opt, GRPQUOTA);
1446                        break;
1447                case Opt_noquota:
1448                        if (sb_any_quota_loaded(sb)) {
1449                                ext4_msg(sb, KERN_ERR, "Cannot change quota "
1450                                        "options when quota turned on");
1451                                return 0;
1452                        }
1453                        clear_opt(sbi->s_mount_opt, QUOTA);
1454                        clear_opt(sbi->s_mount_opt, USRQUOTA);
1455                        clear_opt(sbi->s_mount_opt, GRPQUOTA);
1456                        break;
1457#else
1458                case Opt_quota:
1459                case Opt_usrquota:
1460                case Opt_grpquota:
1461                        ext4_msg(sb, KERN_ERR,
1462                                "quota options not supported");
1463                        break;
1464                case Opt_usrjquota:
1465                case Opt_grpjquota:
1466                case Opt_offusrjquota:
1467                case Opt_offgrpjquota:
1468                case Opt_jqfmt_vfsold:
1469                case Opt_jqfmt_vfsv0:
1470                        ext4_msg(sb, KERN_ERR,
1471                                "journaled quota options not supported");
1472                        break;
1473                case Opt_noquota:
1474                        break;
1475#endif
1476                case Opt_abort:
1477                        sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
1478                        break;
1479                case Opt_nobarrier:
1480                        clear_opt(sbi->s_mount_opt, BARRIER);
1481                        break;
1482                case Opt_barrier:
1483                        if (match_int(&args[0], &option)) {
1484                                set_opt(sbi->s_mount_opt, BARRIER);
1485                                break;
1486                        }
1487                        if (option)
1488                                set_opt(sbi->s_mount_opt, BARRIER);
1489                        else
1490                                clear_opt(sbi->s_mount_opt, BARRIER);
1491                        break;
1492                case Opt_ignore:
1493                        break;
1494                case Opt_resize:
1495                        if (!is_remount) {
1496                                ext4_msg(sb, KERN_ERR,
1497                                        "resize option only available "
1498                                        "for remount");
1499                                return 0;
1500                        }
1501                        if (match_int(&args[0], &option) != 0)
1502                                return 0;
1503                        *n_blocks_count = option;
1504                        break;
1505                case Opt_nobh:
1506                        set_opt(sbi->s_mount_opt, NOBH);
1507                        break;
1508                case Opt_bh:
1509                        clear_opt(sbi->s_mount_opt, NOBH);
1510                        break;
1511                case Opt_i_version:
1512                        set_opt(sbi->s_mount_opt, I_VERSION);
1513                        sb->s_flags |= MS_I_VERSION;
1514                        break;
1515                case Opt_nodelalloc:
1516                        clear_opt(sbi->s_mount_opt, DELALLOC);
1517                        break;
1518                case Opt_stripe:
1519                        if (match_int(&args[0], &option))
1520                                return 0;
1521                        if (option < 0)
1522                                return 0;
1523                        sbi->s_stripe = option;
1524                        break;
1525                case Opt_delalloc:
1526                        set_opt(sbi->s_mount_opt, DELALLOC);
1527                        break;
1528                case Opt_block_validity:
1529                        set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
1530                        break;
1531                case Opt_noblock_validity:
1532                        clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
1533                        break;
1534                case Opt_inode_readahead_blks:
1535                        if (match_int(&args[0], &option))
1536                                return 0;
1537                        if (option < 0 || option > (1 << 30))
1538                                return 0;
1539                        if (!is_power_of_2(option)) {
1540                                ext4_msg(sb, KERN_ERR,
1541                                         "EXT4-fs: inode_readahead_blks"
1542                                         " must be a power of 2");
1543                                return 0;
1544                        }
1545                        sbi->s_inode_readahead_blks = option;
1546                        break;
1547                case Opt_journal_ioprio:
1548                        if (match_int(&args[0], &option))
1549                                return 0;
1550                        if (option < 0 || option > 7)
1551                                break;
1552                        *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE,
1553                                                            option);
1554                        break;
1555                case Opt_noauto_da_alloc:
1556                        set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1557                        break;
1558                case Opt_auto_da_alloc:
1559                        if (match_int(&args[0], &option)) {
1560                                clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
1561                                break;
1562                        }
1563                        if (option)
1564                                clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
1565                        else
1566                                set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1567                        break;
1568                default:
1569                        ext4_msg(sb, KERN_ERR,
1570                               "Unrecognized mount option \"%s\" "
1571                               "or missing value", p);
1572                        return 0;
1573                }
1574        }
1575#ifdef CONFIG_QUOTA
1576        if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1577                if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) &&
1578                     sbi->s_qf_names[USRQUOTA])
1579                        clear_opt(sbi->s_mount_opt, USRQUOTA);
1580
1581                if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) &&
1582                     sbi->s_qf_names[GRPQUOTA])
1583                        clear_opt(sbi->s_mount_opt, GRPQUOTA);
1584
1585                if ((sbi->s_qf_names[USRQUOTA] &&
1586                                (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) ||
1587                    (sbi->s_qf_names[GRPQUOTA] &&
1588                                (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) {
1589                        ext4_msg(sb, KERN_ERR, "old and new quota "
1590                                        "format mixing");
1591                        return 0;
1592                }
1593
1594                if (!sbi->s_jquota_fmt) {
1595                        ext4_msg(sb, KERN_ERR, "journaled quota format "
1596                                        "not specified");
1597                        return 0;
1598                }
1599        } else {
1600                if (sbi->s_jquota_fmt) {
1601                        ext4_msg(sb, KERN_ERR, "journaled quota format "
1602                                        "specified with no journaling "
1603                                        "enabled");
1604                        return 0;
1605                }
1606        }
1607#endif
1608        return 1;
1609}
1610
1611static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1612                            int read_only)
1613{
1614        struct ext4_sb_info *sbi = EXT4_SB(sb);
1615        int res = 0;
1616
1617        if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
1618                ext4_msg(sb, KERN_ERR, "revision level too high, "
1619                         "forcing read-only mode");
1620                res = MS_RDONLY;
1621        }
1622        if (read_only)
1623                return res;
1624        if (!(sbi->s_mount_state & EXT4_VALID_FS))
1625                ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
1626                         "running e2fsck is recommended");
1627        else if ((sbi->s_mount_state & EXT4_ERROR_FS))
1628                ext4_msg(sb, KERN_WARNING,
1629                         "warning: mounting fs with errors, "
1630                         "running e2fsck is recommended");
1631        else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
1632                 le16_to_cpu(es->s_mnt_count) >=
1633                 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1634                ext4_msg(sb, KERN_WARNING,
1635                         "warning: maximal mount count reached, "
1636                         "running e2fsck is recommended");
1637        else if (le32_to_cpu(es->s_checkinterval) &&
1638                (le32_to_cpu(es->s_lastcheck) +
1639                        le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1640                ext4_msg(sb, KERN_WARNING,
1641                         "warning: checktime reached, "
1642                         "running e2fsck is recommended");
1643        if (!sbi->s_journal)
1644                es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1645        if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1646                es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
1647        le16_add_cpu(&es->s_mnt_count, 1);
1648        es->s_mtime = cpu_to_le32(get_seconds());
1649        ext4_update_dynamic_rev(sb);
1650        if (sbi->s_journal)
1651                EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
1652
1653        ext4_commit_super(sb, 1);
1654        if (test_opt(sb, DEBUG))
1655                printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1656                                "bpg=%lu, ipg=%lu, mo=%04x]\n",
1657                        sb->s_blocksize,
1658                        sbi->s_groups_count,
1659                        EXT4_BLOCKS_PER_GROUP(sb),
1660                        EXT4_INODES_PER_GROUP(sb),
1661                        sbi->s_mount_opt);
1662
1663        return res;
1664}
1665
1666static int ext4_fill_flex_info(struct super_block *sb)
1667{
1668        struct ext4_sb_info *sbi = EXT4_SB(sb);
1669        struct ext4_group_desc *gdp = NULL;
1670        ext4_group_t flex_group_count;
1671        ext4_group_t flex_group;
1672        int groups_per_flex = 0;
1673        size_t size;
1674        int i;
1675
1676        if (!sbi->s_es->s_log_groups_per_flex) {
1677                sbi->s_log_groups_per_flex = 0;
1678                return 1;
1679        }
1680
1681        sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1682        groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1683
1684        /* We allocate both existing and potentially added groups */
1685        flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
1686                        ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
1687                              EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
1688        size = flex_group_count * sizeof(struct flex_groups);
1689        sbi->s_flex_groups = kzalloc(size, GFP_KERNEL);
1690        if (sbi->s_flex_groups == NULL) {
1691                sbi->s_flex_groups = vmalloc(size);
1692                if (sbi->s_flex_groups)
1693                        memset(sbi->s_flex_groups, 0, size);
1694        }
1695        if (sbi->s_flex_groups == NULL) {
1696                ext4_msg(sb, KERN_ERR, "not enough memory for "
1697                                "%u flex groups", flex_group_count);
1698                goto failed;
1699        }
1700
1701        for (i = 0; i < sbi->s_groups_count; i++) {
1702                gdp = ext4_get_group_desc(sb, i, NULL);
1703
1704                flex_group = ext4_flex_group(sbi, i);
1705                atomic_add(ext4_free_inodes_count(sb, gdp),
1706                           &sbi->s_flex_groups[flex_group].free_inodes);
1707                atomic_add(ext4_free_blks_count(sb, gdp),
1708                           &sbi->s_flex_groups[flex_group].free_blocks);
1709                atomic_add(ext4_used_dirs_count(sb, gdp),
1710                           &sbi->s_flex_groups[flex_group].used_dirs);
1711        }
1712
1713        return 1;
1714failed:
1715        return 0;
1716}
1717
1718__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
1719                            struct ext4_group_desc *gdp)
1720{
1721        __u16 crc = 0;
1722
1723        if (sbi->s_es->s_feature_ro_compat &
1724            cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
1725                int offset = offsetof(struct ext4_group_desc, bg_checksum);
1726                __le32 le_group = cpu_to_le32(block_group);
1727
1728                crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
1729                crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
1730                crc = crc16(crc, (__u8 *)gdp, offset);
1731                offset += sizeof(gdp->bg_checksum); /* skip checksum */
1732                /* for checksum of struct ext4_group_desc do the rest...*/
1733                if ((sbi->s_es->s_feature_incompat &
1734                     cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
1735                    offset < le16_to_cpu(sbi->s_es->s_desc_size))
1736                        crc = crc16(crc, (__u8 *)gdp + offset,
1737                                    le16_to_cpu(sbi->s_es->s_desc_size) -
1738                                        offset);
1739        }
1740
1741        return cpu_to_le16(crc);
1742}
1743
1744int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
1745                                struct ext4_group_desc *gdp)
1746{
1747        if ((sbi->s_es->s_feature_ro_compat &
1748             cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) &&
1749            (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp)))
1750                return 0;
1751
1752        return 1;
1753}
1754
1755/* Called at mount-time, super-block is locked */
1756static int ext4_check_descriptors(struct super_block *sb)
1757{
1758        struct ext4_sb_info *sbi = EXT4_SB(sb);
1759        ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
1760        ext4_fsblk_t last_block;
1761        ext4_fsblk_t block_bitmap;
1762        ext4_fsblk_t inode_bitmap;
1763        ext4_fsblk_t inode_table;
1764        int flexbg_flag = 0;
1765        ext4_group_t i;
1766
1767        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
1768                flexbg_flag = 1;
1769
1770        ext4_debug("Checking group descriptors");
1771
1772        for (i = 0; i < sbi->s_groups_count; i++) {
1773                struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
1774
1775                if (i == sbi->s_groups_count - 1 || flexbg_flag)
1776                        last_block = ext4_blocks_count(sbi->s_es) - 1;
1777                else
1778                        last_block = first_block +
1779                                (EXT4_BLOCKS_PER_GROUP(sb) - 1);
1780
1781                block_bitmap = ext4_block_bitmap(sb, gdp);
1782                if (block_bitmap < first_block || block_bitmap > last_block) {
1783                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
1784                               "Block bitmap for group %u not in group "
1785                               "(block %llu)!", i, block_bitmap);
1786                        return 0;
1787                }
1788                inode_bitmap = ext4_inode_bitmap(sb, gdp);
1789                if (inode_bitmap < first_block || inode_bitmap > last_block) {
1790                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
1791                               "Inode bitmap for group %u not in group "
1792                               "(block %llu)!", i, inode_bitmap);
1793                        return 0;
1794                }
1795                inode_table = ext4_inode_table(sb, gdp);
1796                if (inode_table < first_block ||
1797                    inode_table + sbi->s_itb_per_group - 1 > last_block) {
1798                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
1799                               "Inode table for group %u not in group "
1800                               "(block %llu)!", i, inode_table);
1801                        return 0;
1802                }
1803                ext4_lock_group(sb, i);
1804                if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1805                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
1806                                 "Checksum for group %u failed (%u!=%u)",
1807                                 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1808                                     gdp)), le16_to_cpu(gdp->bg_checksum));
1809                        if (!(sb->s_flags & MS_RDONLY)) {
1810                                ext4_unlock_group(sb, i);
1811                                return 0;
1812                        }
1813                }
1814                ext4_unlock_group(sb, i);
1815                if (!flexbg_flag)
1816                        first_block += EXT4_BLOCKS_PER_GROUP(sb);
1817        }
1818
1819        ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
1820        sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
1821        return 1;
1822}
1823
1824/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
1825 * the superblock) which were deleted from all directories, but held open by
1826 * a process at the time of a crash.  We walk the list and try to delete these
1827 * inodes at recovery time (only with a read-write filesystem).
1828 *
1829 * In order to keep the orphan inode chain consistent during traversal (in
1830 * case of crash during recovery), we link each inode into the superblock
1831 * orphan list_head and handle it the same way as an inode deletion during
1832 * normal operation (which journals the operations for us).
1833 *
1834 * We only do an iget() and an iput() on each inode, which is very safe if we
1835 * accidentally point at an in-use or already deleted inode.  The worst that
1836 * can happen in this case is that we get a "bit already cleared" message from
1837 * ext4_free_inode().  The only reason we would point at a wrong inode is if
1838 * e2fsck was run on this filesystem, and it must have already done the orphan
1839 * inode cleanup for us, so we can safely abort without any further action.
1840 */
1841static void ext4_orphan_cleanup(struct super_block *sb,
1842                                struct ext4_super_block *es)
1843{
1844        unsigned int s_flags = sb->s_flags;
1845        int nr_orphans = 0, nr_truncates = 0;
1846#ifdef CONFIG_QUOTA
1847        int i;
1848#endif
1849        if (!es->s_last_orphan) {
1850                jbd_debug(4, "no orphan inodes to clean up\n");
1851                return;
1852        }
1853
1854        if (bdev_read_only(sb->s_bdev)) {
1855                ext4_msg(sb, KERN_ERR, "write access "
1856                        "unavailable, skipping orphan cleanup");
1857                return;
1858        }
1859
1860        if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
1861                if (es->s_last_orphan)
1862                        jbd_debug(1, "Errors on filesystem, "
1863                                  "clearing orphan list.\n");
1864                es->s_last_orphan = 0;
1865                jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
1866                return;
1867        }
1868
1869        if (s_flags & MS_RDONLY) {
1870                ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
1871                sb->s_flags &= ~MS_RDONLY;
1872        }
1873#ifdef CONFIG_QUOTA
1874        /* Needed for iput() to work correctly and not trash data */
1875        sb->s_flags |= MS_ACTIVE;
1876        /* Turn on quotas so that they are updated correctly */
1877        for (i = 0; i < MAXQUOTAS; i++) {
1878                if (EXT4_SB(sb)->s_qf_names[i]) {
1879                        int ret = ext4_quota_on_mount(sb, i);
1880                        if (ret < 0)
1881                                ext4_msg(sb, KERN_ERR,
1882                                        "Cannot turn on journaled "
1883                                        "quota: error %d", ret);
1884                }
1885        }
1886#endif
1887
1888        while (es->s_last_orphan) {
1889                struct inode *inode;
1890
1891                inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
1892                if (IS_ERR(inode)) {
1893                        es->s_last_orphan = 0;
1894                        break;
1895                }
1896
1897                list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
1898                vfs_dq_init(inode);
1899                if (inode->i_nlink) {
1900                        ext4_msg(sb, KERN_DEBUG,
1901                                "%s: truncating inode %lu to %lld bytes",
1902                                __func__, inode->i_ino, inode->i_size);
1903                        jbd_debug(2, "truncating inode %lu to %lld bytes\n",
1904                                  inode->i_ino, inode->i_size);
1905                        ext4_truncate(inode);
1906                        nr_truncates++;
1907                } else {
1908                        ext4_msg(sb, KERN_DEBUG,
1909                                "%s: deleting unreferenced inode %lu",
1910                                __func__, inode->i_ino);
1911                        jbd_debug(2, "deleting unreferenced inode %lu\n",
1912                                  inode->i_ino);
1913                        nr_orphans++;
1914                }
1915                iput(inode);  /* The delete magic happens here! */
1916        }
1917
1918#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
1919
1920        if (nr_orphans)
1921                ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
1922                       PLURAL(nr_orphans));
1923        if (nr_truncates)
1924                ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
1925                       PLURAL(nr_truncates));
1926#ifdef CONFIG_QUOTA
1927        /* Turn quotas off */
1928        for (i = 0; i < MAXQUOTAS; i++) {
1929                if (sb_dqopt(sb)->files[i])
1930                        vfs_quota_off(sb, i, 0);
1931        }
1932#endif
1933        sb->s_flags = s_flags; /* Restore MS_RDONLY status */
1934}
1935
1936/*
1937 * Maximal extent format file size.
1938 * Resulting logical blkno at s_maxbytes must fit in our on-disk
1939 * extent format containers, within a sector_t, and within i_blocks
1940 * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
1941 * so that won't be a limiting factor.
1942 *
1943 * Note, this does *not* consider any metadata overhead for vfs i_blocks.
1944 */
1945static loff_t ext4_max_size(int blkbits, int has_huge_files)
1946{
1947        loff_t res;
1948        loff_t upper_limit = MAX_LFS_FILESIZE;
1949
1950        /* small i_blocks in vfs inode? */
1951        if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1952                /*
1953                 * CONFIG_LBDAF is not enabled implies the inode
1954                 * i_block represent total blocks in 512 bytes
1955                 * 32 == size of vfs inode i_blocks * 8
1956                 */
1957                upper_limit = (1LL << 32) - 1;
1958
1959                /* total blocks in file system block size */
1960                upper_limit >>= (blkbits - 9);
1961                upper_limit <<= blkbits;
1962        }
1963
1964        /* 32-bit extent-start container, ee_block */
1965        res = 1LL << 32;
1966        res <<= blkbits;
1967        res -= 1;
1968
1969        /* Sanity check against vm- & vfs- imposed limits */
1970        if (res > upper_limit)
1971                res = upper_limit;
1972
1973        return res;
1974}
1975
1976/*
1977 * Maximal bitmap file size.  There is a direct, and {,double-,triple-}indirect
1978 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
1979 * We need to be 1 filesystem block less than the 2^48 sector limit.
1980 */
1981static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
1982{
1983        loff_t res = EXT4_NDIR_BLOCKS;
1984        int meta_blocks;
1985        loff_t upper_limit;
1986        /* This is calculated to be the largest file size for a dense, block
1987         * mapped file such that the file's total number of 512-byte sectors,
1988         * including data and all indirect blocks, does not exceed (2^48 - 1).
1989         *
1990         * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
1991         * number of 512-byte sectors of the file.
1992         */
1993
1994        if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1995                /*
1996                 * !has_huge_files or CONFIG_LBDAF not enabled implies that
1997                 * the inode i_block field represents total file blocks in
1998                 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
1999                 */
2000                upper_limit = (1LL << 32) - 1;

2001
2002                /* total blocks in file system block size */
2003                upper_limit >>= (bits - 9);
2004
2005        } else {
2006                /*
2007                 * We use 48 bit ext4_inode i_blocks
2008                 * With EXT4_HUGE_FILE_FL set the i_blocks
2009                 * represent total number of blocks in
2010                 * file system block size
2011                 */
2012                upper_limit = (1LL << 48) - 1;
2013
2014        }
2015
2016        /* indirect blocks */
2017        meta_blocks = 1;
2018        /* double indirect blocks */
2019        meta_blocks += 1 + (1LL << (bits-2));
2020        /* tripple indirect blocks */
2021        meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
2022
2023        upper_limit -= meta_blocks;
2024        upper_limit <<= bits;
2025
2026        res += 1LL << (bits-2);
2027        res += 1LL << (2*(bits-2));
2028        res += 1LL << (3*(bits-2));
2029        res <<= bits;
2030        if (res > upper_limit)
2031                res = upper_limit;
2032
2033        if (res > MAX_LFS_FILESIZE)
2034                res = MAX_LFS_FILESIZE;
2035
2036        return res;
2037}
2038
2039static ext4_fsblk_t descriptor_loc(struct super_block *sb,
2040                                   ext4_fsblk_t logical_sb_block, int nr)
2041{
2042        struct ext4_sb_info *sbi = EXT4_SB(sb);
2043        ext4_group_t bg, first_meta_bg;
2044        int has_super = 0;
2045
2046        first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
2047
2048        if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
2049            nr < first_meta_bg)
2050                return logical_sb_block + nr + 1;
2051        bg = sbi->s_desc_per_block * nr;
2052        if (ext4_bg_has_super(sb, bg))
2053                has_super = 1;
2054
2055        return (has_super + ext4_group_first_block_no(sb, bg));
2056}
2057
2058/**
2059 * ext4_get_stripe_size: Get the stripe size.
2060 * @sbi: In memory super block info
2061 *
2062 * If we have specified it via mount option, then
2063 * use the mount option value. If the value specified at mount time is
2064 * greater than the blocks per group use the super block value.
2065 * If the super block value is greater than blocks per group return 0.
2066 * Allocator needs it be less than blocks per group.
2067 *
2068 */
2069static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
2070{
2071        unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
2072        unsigned long stripe_width =
2073                        le32_to_cpu(sbi->s_es->s_raid_stripe_width);
2074
2075        if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
2076                return sbi->s_stripe;
2077
2078        if (stripe_width <= sbi->s_blocks_per_group)
2079                return stripe_width;
2080
2081        if (stride <= sbi->s_blocks_per_group)
2082                return stride;
2083
2084        return 0;
2085}
2086
2087/* sysfs supprt */
2088
2089struct ext4_attr {
2090        struct attribute attr;
2091        ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *);
2092        ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, 
2093                         const char *, size_t);
2094        int offset;
2095};
2096
2097static int parse_strtoul(const char *buf,
2098                unsigned long max, unsigned long *value)
2099{
2100        char *endp;
2101
2102        while (*buf && isspace(*buf))
2103                buf++;
2104        *value = simple_strtoul(buf, &endp, 0);
2105        while (*endp && isspace(*endp))
2106                endp++;
2107        if (*endp || *value > max)
2108                return -EINVAL;
2109
2110        return 0;
2111}
2112
2113static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
2114                                              struct ext4_sb_info *sbi,
2115                                              char *buf)
2116{
2117        return snprintf(buf, PAGE_SIZE, "%llu\n",
2118                        (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
2119}
2120
2121static ssize_t session_write_kbytes_show(struct ext4_attr *a,
2122                                         struct ext4_sb_info *sbi, char *buf)
2123{
2124        struct super_block *sb = sbi->s_buddy_cache->i_sb;
2125
2126        return snprintf(buf, PAGE_SIZE, "%lu\n",
2127                        (part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2128                         sbi->s_sectors_written_start) >> 1);
2129}
2130
2131static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2132                                          struct ext4_sb_info *sbi, char *buf)
2133{
2134        struct super_block *sb = sbi->s_buddy_cache->i_sb;
2135
2136        return snprintf(buf, PAGE_SIZE, "%llu\n",
2137                        sbi->s_kbytes_written + 
2138                        ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2139                          EXT4_SB(sb)->s_sectors_written_start) >> 1));
2140}
2141
2142static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2143                                          struct ext4_sb_info *sbi,
2144                                          const char *buf, size_t count)
2145{
2146        unsigned long t;
2147
2148        if (parse_strtoul(buf, 0x40000000, &t))
2149                return -EINVAL;
2150
2151        if (!is_power_of_2(t))
2152                return -EINVAL;
2153
2154        sbi->s_inode_readahead_blks = t;
2155        return count;
2156}
2157
2158static ssize_t sbi_ui_show(struct ext4_attr *a,
2159                           struct ext4_sb_info *sbi, char *buf)
2160{
2161        unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
2162
2163        return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
2164}
2165
2166static ssize_t sbi_ui_store(struct ext4_attr *a,
2167                            struct ext4_sb_info *sbi,
2168                            const char *buf, size_t count)
2169{
2170        unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
2171        unsigned long t;
2172
2173        if (parse_strtoul(buf, 0xffffffff, &t))
2174                return -EINVAL;
2175        *ui = t;
2176        return count;
2177}
2178
2179#define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \
2180static struct ext4_attr ext4_attr_##_name = {                   \
2181        .attr = {.name = __stringify(_name), .mode = _mode },   \
2182        .show   = _show,                                        \
2183        .store  = _store,                                       \
2184        .offset = offsetof(struct ext4_sb_info, _elname),       \
2185}
2186#define EXT4_ATTR(name, mode, show, store) \
2187static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
2188
2189#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
2190#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
2191#define EXT4_RW_ATTR_SBI_UI(name, elname)       \
2192        EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
2193#define ATTR_LIST(name) &ext4_attr_##name.attr
2194
2195EXT4_RO_ATTR(delayed_allocation_blocks);
2196EXT4_RO_ATTR(session_write_kbytes);
2197EXT4_RO_ATTR(lifetime_write_kbytes);
2198EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
2199                 inode_readahead_blks_store, s_inode_readahead_blks);
2200EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
2201EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
2202EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
2203EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
2204EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
2205EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
2206EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
2207EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump);
2208
2209static struct attribute *ext4_attrs[] = {
2210        ATTR_LIST(delayed_allocation_blocks),
2211        ATTR_LIST(session_write_kbytes),
2212        ATTR_LIST(lifetime_write_kbytes),
2213        ATTR_LIST(inode_readahead_blks),
2214        ATTR_LIST(inode_goal),
2215        ATTR_LIST(mb_stats),
2216        ATTR_LIST(mb_max_to_scan),
2217        ATTR_LIST(mb_min_to_scan),
2218        ATTR_LIST(mb_order2_req),
2219        ATTR_LIST(mb_stream_req),
2220        ATTR_LIST(mb_group_prealloc),
2221        ATTR_LIST(max_writeback_mb_bump),
2222        NULL,
2223};
2224
2225static ssize_t ext4_attr_show(struct kobject *kobj,
2226                              struct attribute *attr, char *buf)
2227{
2228        struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2229                                                s_kobj);
2230        struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2231
2232        return a->show ? a->show(a, sbi, buf) : 0;
2233}
2234
2235static ssize_t ext4_attr_store(struct kobject *kobj,
2236                               struct attribute *attr,
2237                               const char *buf, size_t len)
2238{
2239        struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2240                                                s_kobj);
2241        struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2242
2243        return a->store ? a->store(a, sbi, buf, len) : 0;
2244}
2245
2246static void ext4_sb_release(struct kobject *kobj)
2247{
2248        struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2249                                                s_kobj);
2250        complete(&sbi->s_kobj_unregister);
2251}
2252
2253
2254static struct sysfs_ops ext4_attr_ops = {
2255        .show   = ext4_attr_show,
2256        .store  = ext4_attr_store,
2257};
2258
2259static struct kobj_type ext4_ktype = {
2260        .default_attrs  = ext4_attrs,
2261        .sysfs_ops      = &ext4_attr_ops,
2262        .release        = ext4_sb_release,
2263};
2264
2265/*
2266 * Check whether this filesystem can be mounted based on
2267 * the features present and the RDONLY/RDWR mount requested.
2268 * Returns 1 if this filesystem can be mounted as requested,
2269 * 0 if it cannot be.
2270 */
2271static int ext4_feature_set_ok(struct super_block *sb, int readonly)
2272{
2273        if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) {
2274                ext4_msg(sb, KERN_ERR,
2275                        "Couldn't mount because of "
2276                        "unsupported optional features (%x)",
2277                        (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
2278                        ~EXT4_FEATURE_INCOMPAT_SUPP));
2279                return 0;
2280        }
2281
2282        if (readonly)
2283                return 1;
2284
2285        /* Check that feature set is OK for a read-write mount */
2286        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) {
2287                ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
2288                         "unsupported optional features (%x)",
2289                         (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
2290                                ~EXT4_FEATURE_RO_COMPAT_SUPP));
2291                return 0;
2292        }
2293        /*
2294         * Large file size enabled file system can only be mounted
2295         * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF
2296         */
2297        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
2298                if (sizeof(blkcnt_t) < sizeof(u64)) {
2299                        ext4_msg(sb, KERN_ERR, "Filesystem with huge files "
2300                                 "cannot be mounted RDWR without "
2301                                 "CONFIG_LBDAF");
2302                        return 0;
2303                }
2304        }
2305        return 1;
2306}
2307
2308static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2309                                __releases(kernel_lock)
2310                                __acquires(kernel_lock)
2311{
2312        struct buffer_head *bh;
2313        struct ext4_super_block *es = NULL;
2314        struct ext4_sb_info *sbi;
2315        ext4_fsblk_t block;
2316        ext4_fsblk_t sb_block = get_sb_block(&data);
2317        ext4_fsblk_t logical_sb_block;
2318        unsigned long offset = 0;
2319        unsigned long journal_devnum = 0;
2320        unsigned long def_mount_opts;
2321        struct inode *root;
2322        char *cp;
2323        const char *descr;
2324        int ret = -EINVAL;
2325        int blocksize;
2326        unsigned int db_count;
2327        unsigned int i;
2328        int needs_recovery, has_huge_files;
2329        __u64 blocks_count;
2330        int err;
2331        unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
2332
2333        sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
2334        if (!sbi)
2335                return -ENOMEM;
2336
2337        sbi->s_blockgroup_lock =
2338                kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
2339        if (!sbi->s_blockgroup_lock) {
2340                kfree(sbi);
2341                return -ENOMEM;
2342        }
2343        sb->s_fs_info = sbi;
2344        sbi->s_mount_opt = 0;
2345        sbi->s_resuid = EXT4_DEF_RESUID;
2346        sbi->s_resgid = EXT4_DEF_RESGID;
2347        sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
2348        sbi->s_sb_block = sb_block;
2349        sbi->s_sectors_written_start = part_stat_read(sb->s_bdev->bd_part,
2350                                                      sectors[1]);
2351
2352        unlock_kernel();
2353
2354        /* Cleanup superblock name */
2355        for (cp = sb->s_id; (cp = strchr(cp, '/'));)
2356                *cp = '!';
2357
2358        blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
2359        if (!blocksize) {
2360                ext4_msg(sb, KERN_ERR, "unable to set blocksize");
2361                goto out_fail;
2362        }
2363
2364        /*
2365         * The ext4 superblock will not be buffer aligned for other than 1kB
2366         * block sizes.  We need to calculate the offset from buffer start.
2367         */
2368        if (blocksize != EXT4_MIN_BLOCK_SIZE) {
2369                logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
2370                offset = do_div(logical_sb_block, blocksize);
2371        } else {
2372                logical_sb_block = sb_block;
2373        }
2374
2375        if (!(bh = sb_bread(sb, logical_sb_block))) {
2376                ext4_msg(sb, KERN_ERR, "unable to read superblock");
2377                goto out_fail;
2378        }
2379        /*
2380         * Note: s_es must be initialized as soon as possible because
2381         *       some ext4 macro-instructions depend on its value
2382         */
2383        es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
2384        sbi->s_es = es;
2385        sb->s_magic = le16_to_cpu(es->s_magic);
2386        if (sb->s_magic != EXT4_SUPER_MAGIC)
2387                goto cantfind_ext4;
2388        sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
2389
2390        /* Set defaults before we parse the mount options */
2391        def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
2392        if (def_mount_opts & EXT4_DEFM_DEBUG)
2393                set_opt(sbi->s_mount_opt, DEBUG);
2394        if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
2395                set_opt(sbi->s_mount_opt, GRPID);
2396        if (def_mount_opts & EXT4_DEFM_UID16)
2397                set_opt(sbi->s_mount_opt, NO_UID32);
2398#ifdef CONFIG_EXT4_FS_XATTR
2399        if (def_mount_opts & EXT4_DEFM_XATTR_USER)
2400                set_opt(sbi->s_mount_opt, XATTR_USER);
2401#endif
2402#ifdef CONFIG_EXT4_FS_POSIX_ACL
2403        if (def_mount_opts & EXT4_DEFM_ACL)
2404                set_opt(sbi->s_mount_opt, POSIX_ACL);
2405#endif
2406        if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
2407                sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
2408        else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
2409                sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
2410        else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
2411                sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA;
2412
2413        if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
2414                set_opt(sbi->s_mount_opt, ERRORS_PANIC);
2415        else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
2416                set_opt(sbi->s_mount_opt, ERRORS_CONT);
2417        else
2418                set_opt(sbi->s_mount_opt, ERRORS_RO);
2419
2420        sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
2421        sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
2422        sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
2423        sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
2424        sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
2425
2426        set_opt(sbi->s_mount_opt, BARRIER);
2427
2428        /*
2429         * enable delayed allocation by default
2430         * Use -o nodelalloc to turn it off
2431         */
2432        set_opt(sbi->s_mount_opt, DELALLOC);
2433
2434        if (!parse_options((char *) data, sb, &journal_devnum,
2435                           &journal_ioprio, NULL, 0))
2436                goto failed_mount;
2437
2438        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2439                ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
2440
2441        if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
2442            (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
2443             EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
2444             EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
2445                ext4_msg(sb, KERN_WARNING,
2446                       "feature flags set on rev 0 fs, "
2447                       "running e2fsck is recommended");
2448
2449        /*
2450         * Check feature flags regardless of the revision level, since we
2451         * previously didn't change the revision level when setting the flags,
2452         * so there is a chance incompat flags are set on a rev 0 filesystem.
2453         */
2454        if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
2455                goto failed_mount;
2456
2457        blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
2458
2459        if (blocksize < EXT4_MIN_BLOCK_SIZE ||
2460            blocksize > EXT4_MAX_BLOCK_SIZE) {
2461                ext4_msg(sb, KERN_ERR,
2462                       "Unsupported filesystem blocksize %d", blocksize);
2463                goto failed_mount;
2464        }
2465
2466        if (sb->s_blocksize != blocksize) {
2467                /* Validate the filesystem blocksize */
2468                if (!sb_set_blocksize(sb, blocksize)) {
2469                        ext4_msg(sb, KERN_ERR, "bad block size %d",
2470                                        blocksize);
2471                        goto failed_mount;
2472                }
2473
2474                brelse(bh);
2475                logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
2476                offset = do_div(logical_sb_block, blocksize);
2477                bh = sb_bread(sb, logical_sb_block);
2478                if (!bh) {
2479                        ext4_msg(sb, KERN_ERR,
2480                               "Can't read superblock on 2nd try");
2481                        goto failed_mount;
2482                }
2483                es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
2484                sbi->s_es = es;
2485                if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
2486                        ext4_msg(sb, KERN_ERR,
2487                               "Magic mismatch, very weird!");
2488                        goto failed_mount;
2489                }
2490        }
2491
2492        has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
2493                                EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
2494        sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
2495                                                      has_huge_files);
2496        sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
2497
2498        if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
2499                sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
2500                sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
2501        } else {
2502                sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
2503                sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
2504                if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
2505                    (!is_power_of_2(sbi->s_inode_size)) ||
2506                    (sbi->s_inode_size > blocksize)) {
2507                        ext4_msg(sb, KERN_ERR,
2508                               "unsupported inode size: %d",
2509                               sbi->s_inode_size);
2510                        goto failed_mount;
2511                }
2512                if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
2513                        sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
2514        }
2515
2516        sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
2517        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
2518                if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
2519                    sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
2520                    !is_power_of_2(sbi->s_desc_size)) {
2521                        ext4_msg(sb, KERN_ERR,
2522                               "unsupported descriptor size %lu",
2523                               sbi->s_desc_size);
2524                        goto failed_mount;
2525                }
2526        } else
2527                sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
2528
2529        sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
2530        sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
2531        if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
2532                goto cantfind_ext4;
2533
2534        sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
2535        if (sbi->s_inodes_per_block == 0)
2536                goto cantfind_ext4;
2537        sbi->s_itb_per_group = sbi->s_inodes_per_group /
2538                                        sbi->s_inodes_per_block;
2539        sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
2540        sbi->s_sbh = bh;
2541        sbi->s_mount_state = le16_to_cpu(es->s_state);
2542        sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
2543        sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
2544
2545        for (i = 0; i < 4; i++)
2546                sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
2547        sbi->s_def_hash_version = es->s_def_hash_version;
2548        i = le32_to_cpu(es->s_flags);
2549        if (i & EXT2_FLAGS_UNSIGNED_HASH)
2550                sbi->s_hash_unsigned = 3;
2551        else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
2552#ifdef __CHAR_UNSIGNED__
2553                es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
2554                sbi->s_hash_unsigned = 3;
2555#else
2556                es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
2557#endif
2558                sb->s_dirt = 1;
2559        }
2560
2561        if (sbi->s_blocks_per_group > blocksize * 8) {
2562                ext4_msg(sb, KERN_ERR,
2563                       "#blocks per group too big: %lu",
2564                       sbi->s_blocks_per_group);
2565                goto failed_mount;
2566        }
2567        if (sbi->s_inodes_per_group > blocksize * 8) {
2568                ext4_msg(sb, KERN_ERR,
2569                       "#inodes per group too big: %lu",
2570                       sbi->s_inodes_per_group);
2571                goto failed_mount;
2572        }
2573
2574        /*
2575         * Test whether we have more sectors than will fit in sector_t,
2576         * and whether the max offset is addressable by the page cache.
2577         */
2578        if ((ext4_blocks_count(es) >
2579             (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) ||
2580            (ext4_blocks_count(es) >
2581             (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) {
2582                ext4_msg(sb, KERN_ERR, "filesystem"
2583                         " too large to mount safely on this system");
2584                if (sizeof(sector_t) < 8)
2585                        ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
2586                ret = -EFBIG;
2587                goto failed_mount;
2588        }
2589
2590        if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
2591                goto cantfind_ext4;
2592
2593        /* check blocks count against device size */
2594        blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2595        if (blocks_count && ext4_blocks_count(es) > blocks_count) {
2596                ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
2597                       "exceeds size of device (%llu blocks)",
2598                       ext4_blocks_count(es), blocks_count);
2599                goto failed_mount;
2600        }
2601
2602        /*
2603         * It makes no sense for the first data block to be beyond the end
2604         * of the filesystem.
2605         */
2606        if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
2607                ext4_msg(sb, KERN_WARNING, "bad geometry: first data"
2608                         "block %u is beyond end of filesystem (%llu)",
2609                         le32_to_cpu(es->s_first_data_block),
2610                         ext4_blocks_count(es));
2611                goto failed_mount;
2612        }
2613        blocks_count = (ext4_blocks_count(es) -
2614                        le32_to_cpu(es->s_first_data_block) +
2615                        EXT4_BLOCKS_PER_GROUP(sb) - 1);
2616        do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
2617        if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
2618                ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
2619                       "(block count %llu, first data block %u, "
2620                       "blocks per group %lu)", sbi->s_groups_count,
2621                       ext4_blocks_count(es),
2622                       le32_to_cpu(es->s_first_data_block),
2623                       EXT4_BLOCKS_PER_GROUP(sb));
2624                goto failed_mount;
2625        }
2626        sbi->s_groups_count = blocks_count;
2627        sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
2628                        (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
2629        db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
2630                   EXT4_DESC_PER_BLOCK(sb);
2631        sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
2632                                    GFP_KERNEL);
2633        if (sbi->s_group_desc == NULL) {
2634                ext4_msg(sb, KERN_ERR, "not enough memory");
2635                goto failed_mount;
2636        }
2637
2638#ifdef CONFIG_PROC_FS
2639        if (ext4_proc_root)
2640                sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
2641#endif
2642
2643        bgl_lock_init(sbi->s_blockgroup_lock);
2644
2645        for (i = 0; i < db_count; i++) {
2646                block = descriptor_loc(sb, logical_sb_block, i);
2647                sbi->s_group_desc[i] = sb_bread(sb, block);
2648                if (!sbi->s_group_desc[i]) {
2649                        ext4_msg(sb, KERN_ERR,
2650                               "can't read group descriptor %d", i);
2651                        db_count = i;
2652                        goto failed_mount2;
2653                }
2654        }
2655        if (!ext4_check_descriptors(sb)) {
2656                ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
2657                goto failed_mount2;
2658        }
2659        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
2660                if (!ext4_fill_flex_info(sb)) {
2661                        ext4_msg(sb, KERN_ERR,
2662                               "unable to initialize "
2663                               "flex_bg meta info!");
2664                        goto failed_mount2;
2665                }
2666
2667        sbi->s_gdb_count = db_count;
2668        get_random_bytes(&sbi->s_next_generation, sizeof(u32));
2669        spin_lock_init(&sbi->s_next_gen_lock);
2670
2671        err = percpu_counter_init(&sbi->s_freeblocks_counter,
2672                        ext4_count_free_blocks(sb));
2673        if (!err) {
2674                err = percpu_counter_init(&sbi->s_freeinodes_counter,
2675                                ext4_count_free_inodes(sb));
2676        }
2677        if (!err) {
2678                err = percpu_counter_init(&sbi->s_dirs_counter,
2679                                ext4_count_dirs(sb));
2680        }
2681        if (!err) {
2682                err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
2683        }
2684        if (err) {
2685                ext4_msg(sb, KERN_ERR, "insufficient memory");
2686                goto failed_mount3;
2687        }
2688
2689        sbi->s_stripe = ext4_get_stripe_size(sbi);
2690        sbi->s_max_writeback_mb_bump = 128;
2691
2692        /*
2693         * set up enough so that it can read an inode
2694         */
2695        if (!test_opt(sb, NOLOAD) &&
2696            EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
2697                sb->s_op = &ext4_sops;
2698        else
2699                sb->s_op = &ext4_nojournal_sops;
2700        sb->s_export_op = &ext4_export_ops;
2701        sb->s_xattr = ext4_xattr_handlers;
2702#ifdef CONFIG_QUOTA
2703        sb->s_qcop = &ext4_qctl_operations;
2704        sb->dq_op = &ext4_quota_operations;
2705#endif
2706        INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
2707        mutex_init(&sbi->s_orphan_lock);
2708        mutex_init(&sbi->s_resize_lock);
2709
2710        sb->s_root = NULL;
2711
2712        needs_recovery = (es->s_last_orphan != 0 ||
2713                          EXT4_HAS_INCOMPAT_FEATURE(sb,
2714                                    EXT4_FEATURE_INCOMPAT_RECOVER));
2715
2716        /*
2717         * The first inode we look at is the journal inode.  Don't try
2718         * root first: it may be modified in the journal!
2719         */
2720        if (!test_opt(sb, NOLOAD) &&
2721            EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
2722                if (ext4_load_journal(sb, es, journal_devnum))
2723                        goto failed_mount3;
2724                if (!(sb->s_flags & MS_RDONLY) &&
2725                    EXT4_SB(sb)->s_journal->j_failed_commit) {
2726                        ext4_msg(sb, KERN_CRIT, "error: "
2727                               "ext4_fill_super: Journal transaction "
2728                               "%u is corrupt",
2729                               EXT4_SB(sb)->s_journal->j_failed_commit);
2730                        if (test_opt(sb, ERRORS_RO)) {
2731                                ext4_msg(sb, KERN_CRIT,
2732                                       "Mounting filesystem read-only");
2733                                sb->s_flags |= MS_RDONLY;
2734                                EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2735                                es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2736                        }
2737                        if (test_opt(sb, ERRORS_PANIC)) {
2738                                EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2739                                es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2740                                ext4_commit_super(sb, 1);
2741                                goto failed_mount4;
2742                        }
2743                }
2744        } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
2745              EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
2746                ext4_msg(sb, KERN_ERR, "required journal recovery "
2747                       "suppressed and not mounted read-only");
2748                goto failed_mount4;
2749        } else {
2750                clear_opt(sbi->s_mount_opt, DATA_FLAGS);
2751                set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
2752                sbi->s_journal = NULL;
2753                needs_recovery = 0;
2754                goto no_journal;
2755        }
2756
2757        if (ext4_blocks_count(es) > 0xffffffffULL &&
2758            !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
2759                                       JBD2_FEATURE_INCOMPAT_64BIT)) {
2760                ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
2761                goto failed_mount4;
2762        }
2763
2764        if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
2765                jbd2_journal_set_features(sbi->s_journal,
2766                                JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2767                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2768        } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
2769                jbd2_journal_set_features(sbi->s_journal,
2770                                JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0);
2771                jbd2_journal_clear_features(sbi->s_journal, 0, 0,
2772                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2773        } else {
2774                jbd2_journal_clear_features(sbi->s_journal,
2775                                JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2776                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2777        }
2778
2779        /* We have now updated the journal if required, so we can
2780         * validate the data journaling mode. */
2781        switch (test_opt(sb, DATA_FLAGS)) {
2782        case 0:
2783                /* No mode set, assume a default based on the journal
2784                 * capabilities: ORDERED_DATA if the journal can
2785                 * cope, else JOURNAL_DATA
2786                 */
2787                if (jbd2_journal_check_available_features
2788                    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
2789                        set_opt(sbi->s_mount_opt, ORDERED_DATA);
2790                else
2791                        set_opt(sbi->s_mount_opt, JOURNAL_DATA);
2792                break;
2793
2794        case EXT4_MOUNT_ORDERED_DATA:
2795        case EXT4_MOUNT_WRITEBACK_DATA:
2796                if (!jbd2_journal_check_available_features
2797                    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
2798                        ext4_msg(sb, KERN_ERR, "Journal does not support "
2799                               "requested data journaling mode");
2800                        goto failed_mount4;
2801                }
2802        default:
2803                break;
2804        }
2805        set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
2806
2807no_journal:
2808
2809        if (test_opt(sb, NOBH)) {
2810                if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
2811                        ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - "
2812                                "its supported only with writeback mode");
2813                        clear_opt(sbi->s_mount_opt, NOBH);
2814                }
2815        }
2816        EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten");
2817        if (!EXT4_SB(sb)->dio_unwritten_wq) {
2818                printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
2819                goto failed_mount_wq;
2820        }
2821
2822        /*
2823         * The jbd2_journal_load will have done any necessary log recovery,
2824         * so we can safely mount the rest of the filesystem now.
2825         */
2826
2827        root = ext4_iget(sb, EXT4_ROOT_INO);
2828        if (IS_ERR(root)) {
2829                ext4_msg(sb, KERN_ERR, "get root inode failed");
2830                ret = PTR_ERR(root);
2831                goto failed_mount4;
2832        }
2833        if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
2834                iput(root);
2835                ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
2836                goto failed_mount4;
2837        }
2838        sb->s_root = d_alloc_root(root);
2839        if (!sb->s_root) {
2840                ext4_msg(sb, KERN_ERR, "get root dentry failed");
2841                iput(root);
2842                ret = -ENOMEM;
2843                goto failed_mount4;
2844        }
2845
2846        ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY);
2847
2848        /* determine the minimum size of new large inodes, if present */
2849        if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
2850                sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
2851                                                     EXT4_GOOD_OLD_INODE_SIZE;
2852                if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
2853                                       EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
2854                        if (sbi->s_want_extra_isize <
2855                            le16_to_cpu(es->s_want_extra_isize))
2856                                sbi->s_want_extra_isize =
2857                                        le16_to_cpu(es->s_want_extra_isize);
2858                        if (sbi->s_want_extra_isize <
2859                            le16_to_cpu(es->s_min_extra_isize))
2860                                sbi->s_want_extra_isize =
2861                                        le16_to_cpu(es->s_min_extra_isize);
2862                }
2863        }
2864        /* Check if enough inode space is available */
2865        if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
2866                                                        sbi->s_inode_size) {
2867                sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
2868                                                       EXT4_GOOD_OLD_INODE_SIZE;
2869                ext4_msg(sb, KERN_INFO, "required extra inode space not"
2870                         "available");
2871        }
2872
2873        if (test_opt(sb, DELALLOC) &&
2874            (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
2875                ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - "
2876                         "requested data journaling mode");
2877                clear_opt(sbi->s_mount_opt, DELALLOC);
2878        }
2879
2880        err = ext4_setup_system_zone(sb);
2881        if (err) {
2882                ext4_msg(sb, KERN_ERR, "failed to initialize system "
2883                         "zone (%d)\n", err);
2884                goto failed_mount4;
2885        }
2886
2887        ext4_ext_init(sb);
2888        err = ext4_mb_init(sb, needs_recovery);
2889        if (err) {
2890                ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)",
2891                         err);
2892                goto failed_mount4;
2893        }
2894
2895        sbi->s_kobj.kset = ext4_kset;
2896        init_completion(&sbi->s_kobj_unregister);
2897        err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
2898                                   "%s", sb->s_id);
2899        if (err) {
2900                ext4_mb_release(sb);
2901                ext4_ext_release(sb);
2902                goto failed_mount4;
2903        };
2904
2905        EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
2906        ext4_orphan_cleanup(sb, es);
2907        EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
2908        if (needs_recovery) {
2909                ext4_msg(sb, KERN_INFO, "recovery complete");
2910                ext4_mark_recovery_complete(sb, es);
2911        }
2912        if (EXT4_SB(sb)->s_journal) {
2913                if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
2914                        descr = " journalled data mode";
2915                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
2916                        descr = " ordered data mode";
2917                else
2918                        descr = " writeback data mode";
2919        } else
2920                descr = "out journal";
2921
2922        ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr);
2923
2924        lock_kernel();
2925        return 0;
2926
2927cantfind_ext4:
2928        if (!silent)
2929                ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
2930        goto failed_mount;
2931
2932failed_mount4:
2933        ext4_msg(sb, KERN_ERR, "mount failed");
2934        destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
2935failed_mount_wq:
2936        ext4_release_system_zone(sb);
2937        if (sbi->s_journal) {
2938                jbd2_journal_destroy(sbi->s_journal);
2939                sbi->s_journal = NULL;
2940        }
2941failed_mount3:
2942        if (sbi->s_flex_groups) {
2943                if (is_vmalloc_addr(sbi->s_flex_groups))
2944                        vfree(sbi->s_flex_groups);
2945                else
2946                        kfree(sbi->s_flex_groups);
2947        }
2948        percpu_counter_destroy(&sbi->s_freeblocks_counter);
2949        percpu_counter_destroy(&sbi->s_freeinodes_counter);
2950        percpu_counter_destroy(&sbi->s_dirs_counter);
2951        percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
2952failed_mount2:
2953        for (i = 0; i < db_count; i++)
2954                brelse(sbi->s_group_desc[i]);
2955        kfree(sbi->s_group_desc);
2956failed_mount:
2957        if (sbi->s_proc) {
2958                remove_proc_entry(sb->s_id, ext4_proc_root);
2959        }
2960#ifdef CONFIG_QUOTA
2961        for (i = 0; i < MAXQUOTAS; i++)
2962                kfree(sbi->s_qf_names[i]);
2963#endif
2964        ext4_blkdev_remove(sbi);
2965        brelse(bh);
2966out_fail:
2967        sb->s_fs_info = NULL;
2968        kfree(sbi->s_blockgroup_lock);
2969        kfree(sbi);
2970        lock_kernel();
2971        return ret;
2972}
2973
2974/*
2975 * Setup any per-fs journal parameters now.  We'll do this both on
2976 * initial mount, once the journal has been initialised but before we've
2977 * done any recovery; and again on any subsequent remount.
2978 */
2979static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
2980{
2981        struct ext4_sb_info *sbi = EXT4_SB(sb);
2982
2983        journal->j_commit_interval = sbi->s_commit_interval;
2984        journal->j_min_batch_time = sbi->s_min_batch_time;
2985        journal->j_max_batch_time = sbi->s_max_batch_time;
2986
2987        spin_lock(&journal->j_state_lock);
2988        if (test_opt(sb, BARRIER))
2989                journal->j_flags |= JBD2_BARRIER;
2990        else
2991                journal->j_flags &= ~JBD2_BARRIER;
2992        if (test_opt(sb, DATA_ERR_ABORT))
2993                journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
2994        else
2995                journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
2996        spin_unlock(&journal->j_state_lock);
2997}
2998
2999static journal_t *ext4_get_journal(struct super_block *sb,
3000                                   unsigned int journal_inum)

3001{
3002        struct inode *journal_inode;
3003        journal_t *journal;
3004
3005        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
3006
3007        /* First, test for the existence of a valid inode on disk.  Bad
3008         * things happen if we iget() an unused inode, as the subsequent
3009         * iput() will try to delete it. */
3010
3011        journal_inode = ext4_iget(sb, journal_inum);
3012        if (IS_ERR(journal_inode)) {
3013                ext4_msg(sb, KERN_ERR, "no journal found");
3014                return NULL;
3015        }
3016        if (!journal_inode->i_nlink) {
3017                make_bad_inode(journal_inode);
3018                iput(journal_inode);
3019                ext4_msg(sb, KERN_ERR, "journal inode is deleted");
3020                return NULL;
3021        }
3022
3023        jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
3024                  journal_inode, journal_inode->i_size);
3025        if (!S_ISREG(journal_inode->i_mode)) {
3026                ext4_msg(sb, KERN_ERR, "invalid journal inode");
3027                iput(journal_inode);
3028                return NULL;
3029        }
3030
3031        journal = jbd2_journal_init_inode(journal_inode);
3032        if (!journal) {
3033                ext4_msg(sb, KERN_ERR, "Could not load journal inode");
3034                iput(journal_inode);
3035                return NULL;
3036        }
3037        journal->j_private = sb;
3038        ext4_init_journal_params(sb, journal);
3039        return journal;
3040}
3041
3042static journal_t *ext4_get_dev_journal(struct super_block *sb,
3043                                       dev_t j_dev)
3044{
3045        struct buffer_head *bh;
3046        journal_t *journal;
3047        ext4_fsblk_t start;
3048        ext4_fsblk_t len;
3049        int hblock, blocksize;
3050        ext4_fsblk_t sb_block;
3051        unsigned long offset;
3052        struct ext4_super_block *es;
3053        struct block_device *bdev;
3054
3055        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
3056
3057        bdev = ext4_blkdev_get(j_dev, sb);
3058        if (bdev == NULL)
3059                return NULL;
3060
3061        if (bd_claim(bdev, sb)) {
3062                ext4_msg(sb, KERN_ERR,
3063                        "failed to claim external journal device");
3064                blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
3065                return NULL;
3066        }
3067
3068        blocksize = sb->s_blocksize;
3069        hblock = bdev_logical_block_size(bdev);
3070        if (blocksize < hblock) {
3071                ext4_msg(sb, KERN_ERR,
3072                        "blocksize too small for journal device");
3073                goto out_bdev;
3074        }
3075
3076        sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
3077        offset = EXT4_MIN_BLOCK_SIZE % blocksize;
3078        set_blocksize(bdev, blocksize);
3079        if (!(bh = __bread(bdev, sb_block, blocksize))) {
3080                ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
3081                       "external journal");
3082                goto out_bdev;
3083        }
3084
3085        es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
3086        if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
3087            !(le32_to_cpu(es->s_feature_incompat) &
3088              EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
3089                ext4_msg(sb, KERN_ERR, "external journal has "
3090                                        "bad superblock");
3091                brelse(bh);
3092                goto out_bdev;
3093        }
3094
3095        if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
3096                ext4_msg(sb, KERN_ERR, "journal UUID does not match");
3097                brelse(bh);
3098                goto out_bdev;
3099        }
3100
3101        len = ext4_blocks_count(es);
3102        start = sb_block + 1;
3103        brelse(bh);     /* we're done with the superblock */
3104
3105        journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
3106                                        start, len, blocksize);
3107        if (!journal) {
3108                ext4_msg(sb, KERN_ERR, "failed to create device journal");
3109                goto out_bdev;
3110        }
3111        journal->j_private = sb;
3112        ll_rw_block(READ, 1, &journal->j_sb_buffer);
3113        wait_on_buffer(journal->j_sb_buffer);
3114        if (!buffer_uptodate(journal->j_sb_buffer)) {
3115                ext4_msg(sb, KERN_ERR, "I/O error on journal device");
3116                goto out_journal;
3117        }
3118        if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
3119                ext4_msg(sb, KERN_ERR, "External journal has more than one "
3120                                        "user (unsupported) - %d",
3121                        be32_to_cpu(journal->j_superblock->s_nr_users));
3122                goto out_journal;
3123        }
3124        EXT4_SB(sb)->journal_bdev = bdev;
3125        ext4_init_journal_params(sb, journal);
3126        return journal;
3127
3128out_journal:
3129        jbd2_journal_destroy(journal);
3130out_bdev:
3131        ext4_blkdev_put(bdev);
3132        return NULL;
3133}
3134
3135static int ext4_load_journal(struct super_block *sb,
3136                             struct ext4_super_block *es,
3137                             unsigned long journal_devnum)
3138{
3139        journal_t *journal;
3140        unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
3141        dev_t journal_dev;
3142        int err = 0;
3143        int really_read_only;
3144
3145        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
3146
3147        if (journal_devnum &&
3148            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
3149                ext4_msg(sb, KERN_INFO, "external journal device major/minor "
3150                        "numbers have changed");
3151                journal_dev = new_decode_dev(journal_devnum);
3152        } else
3153                journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
3154
3155        really_read_only = bdev_read_only(sb->s_bdev);
3156
3157        /*
3158         * Are we loading a blank journal or performing recovery after a
3159         * crash?  For recovery, we need to check in advance whether we
3160         * can get read-write access to the device.
3161         */
3162        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
3163                if (sb->s_flags & MS_RDONLY) {
3164                        ext4_msg(sb, KERN_INFO, "INFO: recovery "
3165                                        "required on readonly filesystem");
3166                        if (really_read_only) {
3167                                ext4_msg(sb, KERN_ERR, "write access "
3168                                        "unavailable, cannot proceed");
3169                                return -EROFS;
3170                        }
3171                        ext4_msg(sb, KERN_INFO, "write access will "
3172                               "be enabled during recovery");
3173                }
3174        }
3175
3176        if (journal_inum && journal_dev) {
3177                ext4_msg(sb, KERN_ERR, "filesystem has both journal "
3178                       "and inode journals!");
3179                return -EINVAL;
3180        }
3181
3182        if (journal_inum) {
3183                if (!(journal = ext4_get_journal(sb, journal_inum)))
3184                        return -EINVAL;
3185        } else {
3186                if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
3187                        return -EINVAL;
3188        }
3189
3190        if (!(journal->j_flags & JBD2_BARRIER))
3191                ext4_msg(sb, KERN_INFO, "barriers disabled");
3192
3193        if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
3194                err = jbd2_journal_update_format(journal);
3195                if (err)  {
3196                        ext4_msg(sb, KERN_ERR, "error updating journal");
3197                        jbd2_journal_destroy(journal);
3198                        return err;
3199                }
3200        }
3201
3202        if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
3203                err = jbd2_journal_wipe(journal, !really_read_only);
3204        if (!err)
3205                err = jbd2_journal_load(journal);
3206
3207        if (err) {
3208                ext4_msg(sb, KERN_ERR, "error loading journal");
3209                jbd2_journal_destroy(journal);
3210                return err;
3211        }
3212
3213        EXT4_SB(sb)->s_journal = journal;
3214        ext4_clear_journal_err(sb, es);
3215
3216        if (journal_devnum &&
3217            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
3218                es->s_journal_dev = cpu_to_le32(journal_devnum);
3219
3220                /* Make sure we flush the recovery flag to disk. */
3221                ext4_commit_super(sb, 1);
3222        }
3223
3224        return 0;
3225}
3226
3227static int ext4_commit_super(struct super_block *sb, int sync)
3228{
3229        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
3230        struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
3231        int error = 0;
3232
3233        if (!sbh)
3234                return error;
3235        if (buffer_write_io_error(sbh)) {
3236                /*
3237                 * Oh, dear.  A previous attempt to write the
3238                 * superblock failed.  This could happen because the
3239                 * USB device was yanked out.  Or it could happen to
3240                 * be a transient write error and maybe the block will
3241                 * be remapped.  Nothing we can do but to retry the
3242                 * write and hope for the best.
3243                 */
3244                ext4_msg(sb, KERN_ERR, "previous I/O error to "
3245                       "superblock detected");
3246                clear_buffer_write_io_error(sbh);
3247                set_buffer_uptodate(sbh);
3248        }
3249        /*
3250         * If the file system is mounted read-only, don't update the
3251         * superblock write time.  This avoids updating the superblock
3252         * write time when we are mounting the root file system
3253         * read/only but we need to replay the journal; at that point,
3254         * for people who are east of GMT and who make their clock
3255         * tick in localtime for Windows bug-for-bug compatibility,
3256         * the clock is set in the future, and this will cause e2fsck
3257         * to complain and force a full file system check.
3258         */
3259        if (!(sb->s_flags & MS_RDONLY))
3260                es->s_wtime = cpu_to_le32(get_seconds());
3261        es->s_kbytes_written =
3262                cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + 
3263                            ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
3264                              EXT4_SB(sb)->s_sectors_written_start) >> 1));
3265        ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
3266                                        &EXT4_SB(sb)->s_freeblocks_counter));
3267        es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
3268                                        &EXT4_SB(sb)->s_freeinodes_counter));
3269        sb->s_dirt = 0;
3270        BUFFER_TRACE(sbh, "marking dirty");
3271        mark_buffer_dirty(sbh);
3272        if (sync) {
3273                error = sync_dirty_buffer(sbh);
3274                if (error)
3275                        return error;
3276
3277                error = buffer_write_io_error(sbh);
3278                if (error) {
3279                        ext4_msg(sb, KERN_ERR, "I/O error while writing "
3280                               "superblock");
3281                        clear_buffer_write_io_error(sbh);
3282                        set_buffer_uptodate(sbh);
3283                }
3284        }
3285        return error;
3286}
3287
3288/*
3289 * Have we just finished recovery?  If so, and if we are mounting (or
3290 * remounting) the filesystem readonly, then we will end up with a
3291 * consistent fs on disk.  Record that fact.
3292 */
3293static void ext4_mark_recovery_complete(struct super_block *sb,
3294                                        struct ext4_super_block *es)
3295{
3296        journal_t *journal = EXT4_SB(sb)->s_journal;
3297
3298        if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
3299                BUG_ON(journal != NULL);
3300                return;
3301        }
3302        jbd2_journal_lock_updates(journal);
3303        if (jbd2_journal_flush(journal) < 0)
3304                goto out;
3305
3306        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
3307            sb->s_flags & MS_RDONLY) {
3308                EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3309                ext4_commit_super(sb, 1);
3310        }
3311
3312out:
3313        jbd2_journal_unlock_updates(journal);
3314}
3315
3316/*
3317 * If we are mounting (or read-write remounting) a filesystem whose journal
3318 * has recorded an error from a previous lifetime, move that error to the
3319 * main filesystem now.
3320 */
3321static void ext4_clear_journal_err(struct super_block *sb,
3322                                   struct ext4_super_block *es)
3323{
3324        journal_t *journal;
3325        int j_errno;
3326        const char *errstr;
3327
3328        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
3329
3330        journal = EXT4_SB(sb)->s_journal;
3331
3332        /*
3333         * Now check for any error status which may have been recorded in the
3334         * journal by a prior ext4_error() or ext4_abort()
3335         */
3336
3337        j_errno = jbd2_journal_errno(journal);
3338        if (j_errno) {
3339                char nbuf[16];
3340
3341                errstr = ext4_decode_error(sb, j_errno, nbuf);
3342                ext4_warning(sb, __func__, "Filesystem error recorded "
3343                             "from previous mount: %s", errstr);
3344                ext4_warning(sb, __func__, "Marking fs in need of "
3345                             "filesystem check.");
3346
3347                EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
3348                es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
3349                ext4_commit_super(sb, 1);
3350
3351                jbd2_journal_clear_err(journal);
3352        }
3353}
3354
3355/*
3356 * Force the running and committing transactions to commit,
3357 * and wait on the commit.
3358 */
3359int ext4_force_commit(struct super_block *sb)
3360{
3361        journal_t *journal;
3362        int ret = 0;
3363
3364        if (sb->s_flags & MS_RDONLY)
3365                return 0;
3366
3367        journal = EXT4_SB(sb)->s_journal;
3368        if (journal)
3369                ret = ext4_journal_force_commit(journal);
3370
3371        return ret;
3372}
3373
3374static void ext4_write_super(struct super_block *sb)
3375{
3376        lock_super(sb);
3377        ext4_commit_super(sb, 1);
3378        unlock_super(sb);
3379}
3380
3381static int ext4_sync_fs(struct super_block *sb, int wait)
3382{
3383        int ret = 0;
3384        tid_t target;
3385        struct ext4_sb_info *sbi = EXT4_SB(sb);
3386
3387        trace_ext4_sync_fs(sb, wait);
3388        flush_workqueue(sbi->dio_unwritten_wq);
3389        if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
3390                if (wait)
3391                        jbd2_log_wait_commit(sbi->s_journal, target);
3392        }
3393        return ret;
3394}
3395
3396/*
3397 * LVM calls this function before a (read-only) snapshot is created.  This
3398 * gives us a chance to flush the journal completely and mark the fs clean.
3399 */
3400static int ext4_freeze(struct super_block *sb)
3401{
3402        int error = 0;
3403        journal_t *journal;
3404
3405        if (sb->s_flags & MS_RDONLY)
3406                return 0;
3407
3408        journal = EXT4_SB(sb)->s_journal;
3409
3410        /* Now we set up the journal barrier. */
3411        jbd2_journal_lock_updates(journal);
3412
3413        /*
3414         * Don't clear the needs_recovery flag if we failed to flush
3415         * the journal.
3416         */
3417        error = jbd2_journal_flush(journal);
3418        if (error < 0) {
3419        out:
3420                jbd2_journal_unlock_updates(journal);
3421                return error;
3422        }
3423
3424        /* Journal blocked and flushed, clear needs_recovery flag. */
3425        EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3426        error = ext4_commit_super(sb, 1);
3427        if (error)
3428                goto out;
3429        return 0;
3430}
3431
3432/*
3433 * Called by LVM after the snapshot is done.  We need to reset the RECOVER
3434 * flag here, even though the filesystem is not technically dirty yet.
3435 */
3436static int ext4_unfreeze(struct super_block *sb)
3437{
3438        if (sb->s_flags & MS_RDONLY)
3439                return 0;
3440
3441        lock_super(sb);
3442        /* Reset the needs_recovery flag before the fs is unlocked. */
3443        EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3444        ext4_commit_super(sb, 1);
3445        unlock_super(sb);
3446        jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3447        return 0;
3448}
3449
3450static int ext4_remount(struct super_block *sb, int *flags, char *data)
3451{
3452        struct ext4_super_block *es;
3453        struct ext4_sb_info *sbi = EXT4_SB(sb);
3454        ext4_fsblk_t n_blocks_count = 0;
3455        unsigned long old_sb_flags;
3456        struct ext4_mount_options old_opts;
3457        ext4_group_t g;
3458        unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
3459        int err;
3460#ifdef CONFIG_QUOTA
3461        int i;
3462#endif
3463
3464        lock_kernel();
3465
3466        /* Store the original options */
3467        lock_super(sb);
3468        old_sb_flags = sb->s_flags;
3469        old_opts.s_mount_opt = sbi->s_mount_opt;
3470        old_opts.s_resuid = sbi->s_resuid;
3471        old_opts.s_resgid = sbi->s_resgid;
3472        old_opts.s_commit_interval = sbi->s_commit_interval;
3473        old_opts.s_min_batch_time = sbi->s_min_batch_time;
3474        old_opts.s_max_batch_time = sbi->s_max_batch_time;
3475#ifdef CONFIG_QUOTA
3476        old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
3477        for (i = 0; i < MAXQUOTAS; i++)
3478                old_opts.s_qf_names[i] = sbi->s_qf_names[i];
3479#endif
3480        if (sbi->s_journal && sbi->s_journal->j_task->io_context)
3481                journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
3482
3483        /*
3484         * Allow the "check" option to be passed as a remount option.
3485         */
3486        if (!parse_options(data, sb, NULL, &journal_ioprio,
3487                           &n_blocks_count, 1)) {
3488                err = -EINVAL;
3489                goto restore_opts;
3490        }
3491
3492        if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
3493                ext4_abort(sb, __func__, "Abort forced by user");
3494
3495        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3496                ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
3497
3498        es = sbi->s_es;
3499
3500        if (sbi->s_journal) {
3501                ext4_init_journal_params(sb, sbi->s_journal);
3502                set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
3503        }
3504
3505        if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
3506                n_blocks_count > ext4_blocks_count(es)) {
3507                if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
3508                        err = -EROFS;
3509                        goto restore_opts;
3510                }
3511
3512                if (*flags & MS_RDONLY) {
3513                        /*
3514                         * First of all, the unconditional stuff we have to do
3515                         * to disable replay of the journal when we next remount
3516                         */
3517                        sb->s_flags |= MS_RDONLY;
3518
3519                        /*
3520                         * OK, test if we are remounting a valid rw partition
3521                         * readonly, and if so set the rdonly flag and then
3522                         * mark the partition as valid again.
3523                         */
3524                        if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
3525                            (sbi->s_mount_state & EXT4_VALID_FS))
3526                                es->s_state = cpu_to_le16(sbi->s_mount_state);
3527
3528                        if (sbi->s_journal)
3529                                ext4_mark_recovery_complete(sb, es);
3530                } else {
3531                        /* Make sure we can mount this feature set readwrite */
3532                        if (!ext4_feature_set_ok(sb, 0)) {
3533                                err = -EROFS;
3534                                goto restore_opts;
3535                        }
3536                        /*
3537                         * Make sure the group descriptor checksums
3538                         * are sane.  If they aren't, refuse to remount r/w.
3539                         */
3540                        for (g = 0; g < sbi->s_groups_count; g++) {
3541                                struct ext4_group_desc *gdp =
3542                                        ext4_get_group_desc(sb, g, NULL);
3543
3544                                if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
3545                                        ext4_msg(sb, KERN_ERR,
3546               "ext4_remount: Checksum for group %u failed (%u!=%u)",
3547                g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
3548                                               le16_to_cpu(gdp->bg_checksum));
3549                                        err = -EINVAL;
3550                                        goto restore_opts;
3551                                }
3552                        }
3553
3554                        /*
3555                         * If we have an unprocessed orphan list hanging
3556                         * around from a previously readonly bdev mount,
3557                         * require a full umount/remount for now.
3558                         */
3559                        if (es->s_last_orphan) {
3560                                ext4_msg(sb, KERN_WARNING, "Couldn't "
3561                                       "remount RDWR because of unprocessed "
3562                                       "orphan inode list.  Please "
3563                                       "umount/remount instead");
3564                                err = -EINVAL;
3565                                goto restore_opts;
3566                        }
3567
3568                        /*
3569                         * Mounting a RDONLY partition read-write, so reread
3570                         * and store the current valid flag.  (It may have
3571                         * been changed by e2fsck since we originally mounted
3572                         * the partition.)
3573                         */
3574                        if (sbi->s_journal)
3575                                ext4_clear_journal_err(sb, es);
3576                        sbi->s_mount_state = le16_to_cpu(es->s_state);
3577                        if ((err = ext4_group_extend(sb, es, n_blocks_count)))
3578                                goto restore_opts;
3579                        if (!ext4_setup_super(sb, es, 0))
3580                                sb->s_flags &= ~MS_RDONLY;
3581                }
3582        }
3583        ext4_setup_system_zone(sb);
3584        if (sbi->s_journal == NULL)
3585                ext4_commit_super(sb, 1);
3586
3587#ifdef CONFIG_QUOTA
3588        /* Release old quota file names */
3589        for (i = 0; i < MAXQUOTAS; i++)
3590                if (old_opts.s_qf_names[i] &&
3591                    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
3592                        kfree(old_opts.s_qf_names[i]);
3593#endif
3594        unlock_super(sb);
3595        unlock_kernel();
3596        return 0;
3597
3598restore_opts:
3599        sb->s_flags = old_sb_flags;
3600        sbi->s_mount_opt = old_opts.s_mount_opt;
3601        sbi->s_resuid = old_opts.s_resuid;
3602        sbi->s_resgid = old_opts.s_resgid;
3603        sbi->s_commit_interval = old_opts.s_commit_interval;
3604        sbi->s_min_batch_time = old_opts.s_min_batch_time;
3605        sbi->s_max_batch_time = old_opts.s_max_batch_time;
3606#ifdef CONFIG_QUOTA
3607        sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
3608        for (i = 0; i < MAXQUOTAS; i++) {
3609                if (sbi->s_qf_names[i] &&
3610                    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
3611                        kfree(sbi->s_qf_names[i]);
3612                sbi->s_qf_names[i] = old_opts.s_qf_names[i];
3613        }
3614#endif
3615        unlock_super(sb);
3616        unlock_kernel();
3617        return err;
3618}
3619
3620static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3621{
3622        struct super_block *sb = dentry->d_sb;
3623        struct ext4_sb_info *sbi = EXT4_SB(sb);
3624        struct ext4_super_block *es = sbi->s_es;
3625        u64 fsid;
3626
3627        if (test_opt(sb, MINIX_DF)) {
3628                sbi->s_overhead_last = 0;
3629        } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
3630                ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3631                ext4_fsblk_t overhead = 0;
3632
3633                /*
3634                 * Compute the overhead (FS structures).  This is constant
3635                 * for a given filesystem unless the number of block groups
3636                 * changes so we cache the previous value until it does.
3637                 */
3638
3639                /*
3640                 * All of the blocks before first_data_block are
3641                 * overhead
3642                 */
3643                overhead = le32_to_cpu(es->s_first_data_block);
3644
3645                /*
3646                 * Add the overhead attributed to the superblock and
3647                 * block group descriptors.  If the sparse superblocks
3648                 * feature is turned on, then not all groups have this.
3649                 */
3650                for (i = 0; i < ngroups; i++) {
3651                        overhead += ext4_bg_has_super(sb, i) +
3652                                ext4_bg_num_gdb(sb, i);
3653                        cond_resched();
3654                }
3655
3656                /*
3657                 * Every block group has an inode bitmap, a block
3658                 * bitmap, and an inode table.
3659                 */
3660                overhead += ngroups * (2 + sbi->s_itb_per_group);
3661                sbi->s_overhead_last = overhead;
3662                smp_wmb();
3663                sbi->s_blocks_last = ext4_blocks_count(es);
3664        }
3665
3666        buf->f_type = EXT4_SUPER_MAGIC;
3667        buf->f_bsize = sb->s_blocksize;
3668        buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
3669        buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
3670                       percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
3671        ext4_free_blocks_count_set(es, buf->f_bfree);
3672        buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
3673        if (buf->f_bfree < ext4_r_blocks_count(es))
3674                buf->f_bavail = 0;
3675        buf->f_files = le32_to_cpu(es->s_inodes_count);
3676        buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
3677        es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
3678        buf->f_namelen = EXT4_NAME_LEN;
3679        fsid = le64_to_cpup((void *)es->s_uuid) ^
3680               le64_to_cpup((void *)es->s_uuid + sizeof(u64));
3681        buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
3682        buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
3683
3684        return 0;
3685}
3686
3687/* Helper function for writing quotas on sync - we need to start transaction
3688 * before quota file is locked for write. Otherwise the are possible deadlocks:
3689 * Process 1                         Process 2
3690 * ext4_create()                     quota_sync()
3691 *   jbd2_journal_start()                  write_dquot()
3692 *   vfs_dq_init()                         down(dqio_mutex)
3693 *     down(dqio_mutex)                    jbd2_journal_start()
3694 *
3695 */
3696
3697#ifdef CONFIG_QUOTA
3698
3699static inline struct inode *dquot_to_inode(struct dquot *dquot)
3700{
3701        return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
3702}
3703
3704static int ext4_write_dquot(struct dquot *dquot)
3705{
3706        int ret, err;
3707        handle_t *handle;
3708        struct inode *inode;
3709
3710        inode = dquot_to_inode(dquot);
3711        handle = ext4_journal_start(inode,
3712                                    EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
3713        if (IS_ERR(handle))
3714                return PTR_ERR(handle);
3715        ret = dquot_commit(dquot);
3716        err = ext4_journal_stop(handle);
3717        if (!ret)
3718                ret = err;
3719        return ret;
3720}
3721
3722static int ext4_acquire_dquot(struct dquot *dquot)
3723{
3724        int ret, err;
3725        handle_t *handle;
3726
3727        handle = ext4_journal_start(dquot_to_inode(dquot),
3728                                    EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
3729        if (IS_ERR(handle))
3730                return PTR_ERR(handle);
3731        ret = dquot_acquire(dquot);
3732        err = ext4_journal_stop(handle);
3733        if (!ret)
3734                ret = err;
3735        return ret;
3736}
3737
3738static int ext4_release_dquot(struct dquot *dquot)
3739{
3740        int ret, err;
3741        handle_t *handle;
3742
3743        handle = ext4_journal_start(dquot_to_inode(dquot),
3744                                    EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
3745        if (IS_ERR(handle)) {
3746                /* Release dquot anyway to avoid endless cycle in dqput() */
3747                dquot_release(dquot);
3748                return PTR_ERR(handle);
3749        }
3750        ret = dquot_release(dquot);
3751        err = ext4_journal_stop(handle);
3752        if (!ret)
3753                ret = err;
3754        return ret;
3755}
3756
3757static int ext4_mark_dquot_dirty(struct dquot *dquot)
3758{
3759        /* Are we journaling quotas? */
3760        if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
3761            EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
3762                dquot_mark_dquot_dirty(dquot);
3763                return ext4_write_dquot(dquot);
3764        } else {
3765                return dquot_mark_dquot_dirty(dquot);
3766        }
3767}
3768
3769static int ext4_write_info(struct super_block *sb, int type)
3770{
3771        int ret, err;
3772        handle_t *handle;
3773
3774        /* Data block + inode block */
3775        handle = ext4_journal_start(sb->s_root->d_inode, 2);
3776        if (IS_ERR(handle))
3777                return PTR_ERR(handle);
3778        ret = dquot_commit_info(sb, type);
3779        err = ext4_journal_stop(handle);
3780        if (!ret)
3781                ret = err;
3782        return ret;
3783}
3784
3785/*
3786 * Turn on quotas during mount time - we need to find
3787 * the quota file and such...
3788 */
3789static int ext4_quota_on_mount(struct super_block *sb, int type)
3790{
3791        return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
3792                                  EXT4_SB(sb)->s_jquota_fmt, type);
3793}
3794
3795/*
3796 * Standard function to be called on quota_on
3797 */
3798static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3799                         char *name, int remount)
3800{
3801        int err;
3802        struct path path;
3803
3804        if (!test_opt(sb, QUOTA))
3805                return -EINVAL;
3806        /* When remounting, no checks are needed and in fact, name is NULL */
3807        if (remount)
3808                return vfs_quota_on(sb, type, format_id, name, remount);
3809
3810        err = kern_path(name, LOOKUP_FOLLOW, &path);
3811        if (err)
3812                return err;
3813
3814        /* Quotafile not on the same filesystem? */
3815        if (path.mnt->mnt_sb != sb) {
3816                path_put(&path);
3817                return -EXDEV;
3818        }
3819        /* Journaling quota? */
3820        if (EXT4_SB(sb)->s_qf_names[type]) {
3821                /* Quotafile not in fs root? */
3822                if (path.dentry->d_parent != sb->s_root)
3823                        ext4_msg(sb, KERN_WARNING,
3824                                "Quota file not on filesystem root. "
3825                                "Journaled quota will not work");
3826        }
3827
3828        /*
3829         * When we journal data on quota file, we have to flush journal to see
3830         * all updates to the file when we bypass pagecache...
3831         */
3832        if (EXT4_SB(sb)->s_journal &&
3833            ext4_should_journal_data(path.dentry->d_inode)) {
3834                /*
3835                 * We don't need to lock updates but journal_flush() could
3836                 * otherwise be livelocked...
3837                 */
3838                jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
3839                err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
3840                jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3841                if (err) {
3842                        path_put(&path);
3843                        return err;
3844                }
3845        }
3846
3847        err = vfs_quota_on_path(sb, type, format_id, &path);
3848        path_put(&path);
3849        return err;
3850}
3851
3852/* Read data from quotafile - avoid pagecache and such because we cannot afford
3853 * acquiring the locks... As quota files are never truncated and quota code
3854 * itself serializes the operations (and noone else should touch the files)
3855 * we don't have to be afraid of races */
3856static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
3857                               size_t len, loff_t off)
3858{
3859        struct inode *inode = sb_dqopt(sb)->files[type];
3860        ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
3861        int err = 0;
3862        int offset = off & (sb->s_blocksize - 1);
3863        int tocopy;
3864        size_t toread;
3865        struct buffer_head *bh;
3866        loff_t i_size = i_size_read(inode);
3867
3868        if (off > i_size)
3869                return 0;
3870        if (off+len > i_size)
3871                len = i_size-off;
3872        toread = len;
3873        while (toread > 0) {
3874                tocopy = sb->s_blocksize - offset < toread ?
3875                                sb->s_blocksize - offset : toread;
3876                bh = ext4_bread(NULL, inode, blk, 0, &err);
3877                if (err)
3878                        return err;
3879                if (!bh)        /* A hole? */
3880                        memset(data, 0, tocopy);
3881                else
3882                        memcpy(data, bh->b_data+offset, tocopy);
3883                brelse(bh);
3884                offset = 0;
3885                toread -= tocopy;
3886                data += tocopy;
3887                blk++;
3888        }
3889        return len;
3890}
3891
3892/* Write to quotafile (we know the transaction is already started and has
3893 * enough credits) */
3894static ssize_t ext4_quota_write(struct super_block *sb, int type,
3895                                const char *data, size_t len, loff_t off)
3896{
3897        struct inode *inode = sb_dqopt(sb)->files[type];
3898        ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
3899        int err = 0;
3900        int offset = off & (sb->s_blocksize - 1);
3901        int tocopy;
3902        int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL;
3903        size_t towrite = len;
3904        struct buffer_head *bh;
3905        handle_t *handle = journal_current_handle();
3906
3907        if (EXT4_SB(sb)->s_journal && !handle) {
3908                ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
3909                        " cancelled because transaction is not started",
3910                        (unsigned long long)off, (unsigned long long)len);
3911                return -EIO;
3912        }
3913        mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
3914        while (towrite > 0) {
3915                tocopy = sb->s_blocksize - offset < towrite ?
3916                                sb->s_blocksize - offset : towrite;
3917                bh = ext4_bread(handle, inode, blk, 1, &err);
3918                if (!bh)
3919                        goto out;
3920                if (journal_quota) {
3921                        err = ext4_journal_get_write_access(handle, bh);
3922                        if (err) {
3923                                brelse(bh);
3924                                goto out;
3925                        }
3926                }
3927                lock_buffer(bh);
3928                memcpy(bh->b_data+offset, data, tocopy);
3929                flush_dcache_page(bh->b_page);
3930                unlock_buffer(bh);
3931                if (journal_quota)
3932                        err = ext4_handle_dirty_metadata(handle, NULL, bh);
3933                else {
3934                        /* Always do at least ordered writes for quotas */
3935                        err = ext4_jbd2_file_inode(handle, inode);
3936                        mark_buffer_dirty(bh);
3937                }
3938                brelse(bh);
3939                if (err)
3940                        goto out;
3941                offset = 0;
3942                towrite -= tocopy;
3943                data += tocopy;
3944                blk++;
3945        }
3946out:
3947        if (len == towrite) {
3948                mutex_unlock(&inode->i_mutex);
3949                return err;
3950        }
3951        if (inode->i_size < off+len-towrite) {
3952                i_size_write(inode, off+len-towrite);
3953                EXT4_I(inode)->i_disksize = inode->i_size;
3954        }
3955        inode->i_mtime = inode->i_ctime = CURRENT_TIME;
3956        ext4_mark_inode_dirty(handle, inode);
3957        mutex_unlock(&inode->i_mutex);
3958        return len - towrite;
3959}
3960
3961#endif
3962
3963static int ext4_get_sb(struct file_system_type *fs_type, int flags,
3964                       const char *dev_name, void *data, struct vfsmount *mnt)
3965{
3966        return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
3967}
3968
3969static struct file_system_type ext4_fs_type = {
3970        .owner          = THIS_MODULE,
3971        .name           = "ext4",
3972        .get_sb         = ext4_get_sb,
3973        .kill_sb        = kill_block_super,
3974        .fs_flags       = FS_REQUIRES_DEV,
3975};
3976
3977static int __init init_ext4_fs(void)
3978{
3979        int err;
3980
3981        err = init_ext4_system_zone();
3982        if (err)
3983                return err;
3984        ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
3985        if (!ext4_kset)
3986                goto out4;
3987        ext4_proc_root = proc_mkdir("fs/ext4", NULL);
3988        err = init_ext4_mballoc();
3989        if (err)
3990                goto out3;
3991
3992        err = init_ext4_xattr();
3993        if (err)
3994                goto out2;
3995        err = init_inodecache();
3996        if (err)
3997                goto out1;
3998        err = register_filesystem(&ext4_fs_type);
3999        if (err)
4000                goto out;

4001        return 0;
4002out:
4003        destroy_inodecache();
4004out1:
4005        exit_ext4_xattr();
4006out2:
4007        exit_ext4_mballoc();
4008out3:
4009        remove_proc_entry("fs/ext4", NULL);
4010        kset_unregister(ext4_kset);
4011out4:
4012        exit_ext4_system_zone();
4013        return err;
4014}
4015
4016static void __exit exit_ext4_fs(void)
4017{
4018        unregister_filesystem(&ext4_fs_type);
4019        destroy_inodecache();
4020        exit_ext4_xattr();
4021        exit_ext4_mballoc();
4022        remove_proc_entry("fs/ext4", NULL);
4023        kset_unregister(ext4_kset);
4024        exit_ext4_system_zone();
4025}
4026
4027MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
4028MODULE_DESCRIPTION("Fourth Extended Filesystem");
4029MODULE_LICENSE("GPL");
4030module_init(init_ext4_fs)
4031module_exit(exit_ext4_fs)
4032