LXR linux/fs/ext4/super.c

   1/*
   2 *  linux/fs/ext4/super.c
   3 *
   4 * Copyright (C) 1992, 1993, 1994, 1995
   5 * Remy Card (card@masi.ibp.fr)
   6 * Laboratoire MASI - Institut Blaise Pascal
   7 * Universite Pierre et Marie Curie (Paris VI)
   8 *
   9 *  from
  10 *
  11 *  linux/fs/minix/inode.c
  12 *
  13 *  Copyright (C) 1991, 1992  Linus Torvalds
  14 *
  15 *  Big-endian to little-endian byte-swapping/bitmaps by
  16 *        David S. Miller (davem@caip.rutgers.edu), 1995
  17 */
  18
  19#include <linux/module.h>
  20#include <linux/string.h>
  21#include <linux/fs.h>
  22#include <linux/time.h>
  23#include <linux/vmalloc.h>
  24#include <linux/jbd2.h>
  25#include <linux/slab.h>
  26#include <linux/init.h>
  27#include <linux/blkdev.h>
  28#include <linux/parser.h>
  29#include <linux/buffer_head.h>
  30#include <linux/exportfs.h>
  31#include <linux/vfs.h>
  32#include <linux/random.h>
  33#include <linux/mount.h>
  34#include <linux/namei.h>
  35#include <linux/quotaops.h>
  36#include <linux/seq_file.h>
  37#include <linux/proc_fs.h>
  38#include <linux/ctype.h>
  39#include <linux/log2.h>
  40#include <linux/crc16.h>
  41#include <linux/cleancache.h>
  42#include <asm/uaccess.h>
  43
  44#include <linux/kthread.h>
  45#include <linux/freezer.h>
  46
  47#include "ext4.h"
  48#include "ext4_extents.h"       /* Needed for trace points definition */
  49#include "ext4_jbd2.h"
  50#include "xattr.h"
  51#include "acl.h"
  52#include "mballoc.h"
  53
  54#define CREATE_TRACE_POINTS
  55#include <trace/events/ext4.h>
  56
  57static struct proc_dir_entry *ext4_proc_root;
  58static struct kset *ext4_kset;
  59static struct ext4_lazy_init *ext4_li_info;
  60static struct mutex ext4_li_mtx;
  61static struct ext4_features *ext4_feat;
  62
  63static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
  64                             unsigned long journal_devnum);
  65static int ext4_show_options(struct seq_file *seq, struct dentry *root);
  66static int ext4_commit_super(struct super_block *sb, int sync);
  67static void ext4_mark_recovery_complete(struct super_block *sb,
  68                                        struct ext4_super_block *es);
  69static void ext4_clear_journal_err(struct super_block *sb,
  70                                   struct ext4_super_block *es);
  71static int ext4_sync_fs(struct super_block *sb, int wait);
  72static int ext4_remount(struct super_block *sb, int *flags, char *data);
  73static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
  74static int ext4_unfreeze(struct super_block *sb);
  75static int ext4_freeze(struct super_block *sb);
  76static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
  77                       const char *dev_name, void *data);
  78static inline int ext2_feature_set_ok(struct super_block *sb);
  79static inline int ext3_feature_set_ok(struct super_block *sb);
  80static int ext4_feature_set_ok(struct super_block *sb, int readonly);
  81static void ext4_destroy_lazyinit_thread(void);
  82static void ext4_unregister_li_request(struct super_block *sb);
  83static void ext4_clear_request_list(void);
  84
  85#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
  86static struct file_system_type ext2_fs_type = {
  87        .owner          = THIS_MODULE,
  88        .name           = "ext2",
  89        .mount          = ext4_mount,
  90        .kill_sb        = kill_block_super,
  91        .fs_flags       = FS_REQUIRES_DEV,
  92};
  93MODULE_ALIAS_FS("ext2");
  94MODULE_ALIAS("ext2");
  95#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
  96#else
  97#define IS_EXT2_SB(sb) (0)
  98#endif
  99
 100
 101#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
 102static struct file_system_type ext3_fs_type = {
 103        .owner          = THIS_MODULE,
 104        .name           = "ext3",
 105        .mount          = ext4_mount,
 106        .kill_sb        = kill_block_super,
 107        .fs_flags       = FS_REQUIRES_DEV,
 108};
 109MODULE_ALIAS_FS("ext3");
 110MODULE_ALIAS("ext3");
 111#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
 112#else
 113#define IS_EXT3_SB(sb) (0)
 114#endif
 115
 116static int ext4_verify_csum_type(struct super_block *sb,
 117                                 struct ext4_super_block *es)
 118{
 119        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
 120                                        EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
 121                return 1;
 122
 123        return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
 124}
 125
 126static __le32 ext4_superblock_csum(struct super_block *sb,
 127                                   struct ext4_super_block *es)
 128{
 129        struct ext4_sb_info *sbi = EXT4_SB(sb);
 130        int offset = offsetof(struct ext4_super_block, s_checksum);
 131        __u32 csum;
 132
 133        csum = ext4_chksum(sbi, ~0, (char *)es, offset);
 134
 135        return cpu_to_le32(csum);
 136}
 137
 138int ext4_superblock_csum_verify(struct super_block *sb,
 139                                struct ext4_super_block *es)
 140{
 141        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
 142                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
 143                return 1;
 144
 145        return es->s_checksum == ext4_superblock_csum(sb, es);
 146}
 147
 148void ext4_superblock_csum_set(struct super_block *sb)
 149{
 150        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 151
 152        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
 153                EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
 154                return;
 155
 156        es->s_checksum = ext4_superblock_csum(sb, es);
 157}
 158
 159void *ext4_kvmalloc(size_t size, gfp_t flags)
 160{
 161        void *ret;
 162
 163        ret = kmalloc(size, flags);
 164        if (!ret)
 165                ret = __vmalloc(size, flags, PAGE_KERNEL);
 166        return ret;
 167}
 168
 169void *ext4_kvzalloc(size_t size, gfp_t flags)
 170{
 171        void *ret;
 172
 173        ret = kzalloc(size, flags);
 174        if (!ret)
 175                ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
 176        return ret;
 177}
 178
 179void ext4_kvfree(void *ptr)
 180{
 181        if (is_vmalloc_addr(ptr))
 182                vfree(ptr);
 183        else
 184                kfree(ptr);
 185
 186}
 187
 188ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
 189                               struct ext4_group_desc *bg)
 190{
 191        return le32_to_cpu(bg->bg_block_bitmap_lo) |
 192                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 193                 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
 194}
 195
 196ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
 197                               struct ext4_group_desc *bg)
 198{
 199        return le32_to_cpu(bg->bg_inode_bitmap_lo) |
 200                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 201                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
 202}
 203
 204ext4_fsblk_t ext4_inode_table(struct super_block *sb,
 205                              struct ext4_group_desc *bg)
 206{
 207        return le32_to_cpu(bg->bg_inode_table_lo) |
 208                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 209                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
 210}
 211
 212__u32 ext4_free_group_clusters(struct super_block *sb,
 213                               struct ext4_group_desc *bg)
 214{
 215        return le16_to_cpu(bg->bg_free_blocks_count_lo) |
 216                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 217                 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
 218}
 219
 220__u32 ext4_free_inodes_count(struct super_block *sb,
 221                              struct ext4_group_desc *bg)
 222{
 223        return le16_to_cpu(bg->bg_free_inodes_count_lo) |
 224                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 225                 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
 226}
 227
 228__u32 ext4_used_dirs_count(struct super_block *sb,
 229                              struct ext4_group_desc *bg)
 230{
 231        return le16_to_cpu(bg->bg_used_dirs_count_lo) |
 232                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 233                 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
 234}
 235
 236__u32 ext4_itable_unused_count(struct super_block *sb,
 237                              struct ext4_group_desc *bg)
 238{
 239        return le16_to_cpu(bg->bg_itable_unused_lo) |
 240                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 241                 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
 242}
 243
 244void ext4_block_bitmap_set(struct super_block *sb,
 245                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 246{
 247        bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
 248        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 249                bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
 250}
 251
 252void ext4_inode_bitmap_set(struct super_block *sb,
 253                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 254{
 255        bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
 256        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 257                bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
 258}
 259
 260void ext4_inode_table_set(struct super_block *sb,
 261                          struct ext4_group_desc *bg, ext4_fsblk_t blk)
 262{
 263        bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
 264        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 265                bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
 266}
 267
 268void ext4_free_group_clusters_set(struct super_block *sb,
 269                                  struct ext4_group_desc *bg, __u32 count)
 270{
 271        bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
 272        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 273                bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
 274}
 275
 276void ext4_free_inodes_set(struct super_block *sb,
 277                          struct ext4_group_desc *bg, __u32 count)
 278{
 279        bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
 280        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 281                bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
 282}
 283
 284void ext4_used_dirs_set(struct super_block *sb,
 285                          struct ext4_group_desc *bg, __u32 count)
 286{
 287        bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
 288        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 289                bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
 290}
 291
 292void ext4_itable_unused_set(struct super_block *sb,
 293                          struct ext4_group_desc *bg, __u32 count)
 294{
 295        bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
 296        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 297                bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
 298}
 299
 300
 301static void __save_error_info(struct super_block *sb, const char *func,
 302                            unsigned int line)
 303{
 304        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 305
 306        EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 307        es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
 308        es->s_last_error_time = cpu_to_le32(get_seconds());
 309        strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
 310        es->s_last_error_line = cpu_to_le32(line);
 311        if (!es->s_first_error_time) {
 312                es->s_first_error_time = es->s_last_error_time;
 313                strncpy(es->s_first_error_func, func,
 314                        sizeof(es->s_first_error_func));
 315                es->s_first_error_line = cpu_to_le32(line);
 316                es->s_first_error_ino = es->s_last_error_ino;
 317                es->s_first_error_block = es->s_last_error_block;
 318        }
 319        /*
 320         * Start the daily error reporting function if it hasn't been
 321         * started already
 322         */
 323        if (!es->s_error_count)
 324                mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
 325        le32_add_cpu(&es->s_error_count, 1);
 326}
 327
 328static void save_error_info(struct super_block *sb, const char *func,
 329                            unsigned int line)
 330{
 331        __save_error_info(sb, func, line);
 332        ext4_commit_super(sb, 1);
 333}
 334
 335/*
 336 * The del_gendisk() function uninitializes the disk-specific data
 337 * structures, including the bdi structure, without telling anyone
 338 * else.  Once this happens, any attempt to call mark_buffer_dirty()
 339 * (for example, by ext4_commit_super), will cause a kernel OOPS.
 340 * This is a kludge to prevent these oops until we can put in a proper
 341 * hook in del_gendisk() to inform the VFS and file system layers.
 342 */
 343static int block_device_ejected(struct super_block *sb)
 344{
 345        struct inode *bd_inode = sb->s_bdev->bd_inode;
 346        struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info;
 347
 348        return bdi->dev == NULL;
 349}
 350
 351static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
 352{
 353        struct super_block              *sb = journal->j_private;
 354        struct ext4_sb_info             *sbi = EXT4_SB(sb);
 355        int                             error = is_journal_aborted(journal);
 356        struct ext4_journal_cb_entry    *jce, *tmp;
 357
 358        spin_lock(&sbi->s_md_lock);
 359        list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
 360                list_del_init(&jce->jce_list);
 361                spin_unlock(&sbi->s_md_lock);
 362                jce->jce_func(sb, jce, error);
 363                spin_lock(&sbi->s_md_lock);
 364        }
 365        spin_unlock(&sbi->s_md_lock);
 366}
 367
 368/* Deal with the reporting of failure conditions on a filesystem such as
 369 * inconsistencies detected or read IO failures.
 370 *
 371 * On ext2, we can store the error state of the filesystem in the
 372 * superblock.  That is not possible on ext4, because we may have other
 373 * write ordering constraints on the superblock which prevent us from
 374 * writing it out straight away; and given that the journal is about to
 375 * be aborted, we can't rely on the current, or future, transactions to
 376 * write out the superblock safely.
 377 *
 378 * We'll just use the jbd2_journal_abort() error code to record an error in
 379 * the journal instead.  On recovery, the journal will complain about
 380 * that error until we've noted it down and cleared it.
 381 */
 382
 383static void ext4_handle_error(struct super_block *sb)
 384{
 385        if (sb->s_flags & MS_RDONLY)
 386                return;
 387
 388        if (!test_opt(sb, ERRORS_CONT)) {
 389                journal_t *journal = EXT4_SB(sb)->s_journal;
 390
 391                EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
 392                if (journal)
 393                        jbd2_journal_abort(journal, -EIO);
 394        }
 395        if (test_opt(sb, ERRORS_RO)) {
 396                ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 397                sb->s_flags |= MS_RDONLY;
 398        }
 399        if (test_opt(sb, ERRORS_PANIC))
 400                panic("EXT4-fs (device %s): panic forced after error\n",
 401                        sb->s_id);
 402}
 403
 404void __ext4_error(struct super_block *sb, const char *function,
 405                  unsigned int line, const char *fmt, ...)
 406{
 407        struct va_format vaf;
 408        va_list args;
 409
 410        va_start(args, fmt);
 411        vaf.fmt = fmt;
 412        vaf.va = &args;
 413        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
 414               sb->s_id, function, line, current->comm, &vaf);
 415        va_end(args);
 416        save_error_info(sb, function, line);
 417
 418        ext4_handle_error(sb);
 419}
 420
 421void ext4_error_inode(struct inode *inode, const char *function,
 422                      unsigned int line, ext4_fsblk_t block,
 423                      const char *fmt, ...)
 424{
 425        va_list args;
 426        struct va_format vaf;
 427        struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
 428
 429        es->s_last_error_ino = cpu_to_le32(inode->i_ino);
 430        es->s_last_error_block = cpu_to_le64(block);
 431        save_error_info(inode->i_sb, function, line);
 432        va_start(args, fmt);
 433        vaf.fmt = fmt;
 434        vaf.va = &args;
 435        if (block)
 436                printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
 437                       "inode #%lu: block %llu: comm %s: %pV\n",
 438                       inode->i_sb->s_id, function, line, inode->i_ino,
 439                       block, current->comm, &vaf);
 440        else
 441                printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
 442                       "inode #%lu: comm %s: %pV\n",
 443                       inode->i_sb->s_id, function, line, inode->i_ino,
 444                       current->comm, &vaf);
 445        va_end(args);
 446
 447        ext4_handle_error(inode->i_sb);
 448}
 449
 450void ext4_error_file(struct file *file, const char *function,
 451                     unsigned int line, ext4_fsblk_t block,
 452                     const char *fmt, ...)
 453{
 454        va_list args;
 455        struct va_format vaf;
 456        struct ext4_super_block *es;
 457        struct inode *inode = file_inode(file);
 458        char pathname[80], *path;
 459
 460        es = EXT4_SB(inode->i_sb)->s_es;
 461        es->s_last_error_ino = cpu_to_le32(inode->i_ino);
 462        save_error_info(inode->i_sb, function, line);
 463        path = d_path(&(file->f_path), pathname, sizeof(pathname));
 464        if (IS_ERR(path))
 465                path = "(unknown)";
 466        va_start(args, fmt);
 467        vaf.fmt = fmt;
 468        vaf.va = &args;
 469        if (block)
 470                printk(KERN_CRIT
 471                       "EXT4-fs error (device %s): %s:%d: inode #%lu: "
 472                       "block %llu: comm %s: path %s: %pV\n",
 473                       inode->i_sb->s_id, function, line, inode->i_ino,
 474                       block, current->comm, path, &vaf);
 475        else
 476                printk(KERN_CRIT
 477                       "EXT4-fs error (device %s): %s:%d: inode #%lu: "
 478                       "comm %s: path %s: %pV\n",
 479                       inode->i_sb->s_id, function, line, inode->i_ino,
 480                       current->comm, path, &vaf);
 481        va_end(args);
 482
 483        ext4_handle_error(inode->i_sb);
 484}
 485
 486const char *ext4_decode_error(struct super_block *sb, int errno,
 487                              char nbuf[16])
 488{
 489        char *errstr = NULL;
 490
 491        switch (errno) {
 492        case -EIO:
 493                errstr = "IO failure";
 494                break;
 495        case -ENOMEM:
 496                errstr = "Out of memory";
 497                break;
 498        case -EROFS:
 499                if (!sb || (EXT4_SB(sb)->s_journal &&
 500                            EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
 501                        errstr = "Journal has aborted";
 502                else
 503                        errstr = "Readonly filesystem";
 504                break;
 505        default:
 506                /* If the caller passed in an extra buffer for unknown
 507                 * errors, textualise them now.  Else we just return
 508                 * NULL. */
 509                if (nbuf) {
 510                        /* Check for truncated error codes... */
 511                        if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
 512                                errstr = nbuf;
 513                }
 514                break;
 515        }
 516
 517        return errstr;
 518}
 519
 520/* __ext4_std_error decodes expected errors from journaling functions
 521 * automatically and invokes the appropriate error response.  */
 522
 523void __ext4_std_error(struct super_block *sb, const char *function,
 524                      unsigned int line, int errno)
 525{
 526        char nbuf[16];
 527        const char *errstr;
 528
 529        /* Special case: if the error is EROFS, and we're not already
 530         * inside a transaction, then there's really no point in logging
 531         * an error. */
 532        if (errno == -EROFS && journal_current_handle() == NULL &&
 533            (sb->s_flags & MS_RDONLY))
 534                return;
 535
 536        errstr = ext4_decode_error(sb, errno, nbuf);
 537        printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
 538               sb->s_id, function, line, errstr);
 539        save_error_info(sb, function, line);
 540
 541        ext4_handle_error(sb);
 542}
 543
 544/*
 545 * ext4_abort is a much stronger failure handler than ext4_error.  The
 546 * abort function may be used to deal with unrecoverable failures such
 547 * as journal IO errors or ENOMEM at a critical moment in log management.
 548 *
 549 * We unconditionally force the filesystem into an ABORT|READONLY state,
 550 * unless the error response on the fs has been set to panic in which
 551 * case we take the easy way out and panic immediately.
 552 */
 553
 554void __ext4_abort(struct super_block *sb, const char *function,
 555                unsigned int line, const char *fmt, ...)
 556{
 557        va_list args;
 558
 559        save_error_info(sb, function, line);
 560        va_start(args, fmt);
 561        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id,
 562               function, line);
 563        vprintk(fmt, args);
 564        printk("\n");
 565        va_end(args);
 566
 567        if ((sb->s_flags & MS_RDONLY) == 0) {
 568                ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 569                sb->s_flags |= MS_RDONLY;
 570                EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
 571                if (EXT4_SB(sb)->s_journal)
 572                        jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 573                save_error_info(sb, function, line);
 574        }
 575        if (test_opt(sb, ERRORS_PANIC))
 576                panic("EXT4-fs panic from previous error\n");
 577}
 578
 579void ext4_msg(struct super_block *sb, const char *prefix, const char *fmt, ...)
 580{
 581        struct va_format vaf;
 582        va_list args;
 583
 584        va_start(args, fmt);
 585        vaf.fmt = fmt;
 586        vaf.va = &args;
 587        printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
 588        va_end(args);
 589}
 590
 591void __ext4_warning(struct super_block *sb, const char *function,
 592                    unsigned int line, const char *fmt, ...)
 593{
 594        struct va_format vaf;
 595        va_list args;
 596
 597        va_start(args, fmt);
 598        vaf.fmt = fmt;
 599        vaf.va = &args;
 600        printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
 601               sb->s_id, function, line, &vaf);
 602        va_end(args);
 603}
 604
 605void __ext4_grp_locked_error(const char *function, unsigned int line,
 606                             struct super_block *sb, ext4_group_t grp,
 607                             unsigned long ino, ext4_fsblk_t block,
 608                             const char *fmt, ...)
 609__releases(bitlock)
 610__acquires(bitlock)
 611{
 612        struct va_format vaf;
 613        va_list args;
 614        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 615
 616        es->s_last_error_ino = cpu_to_le32(ino);
 617        es->s_last_error_block = cpu_to_le64(block);
 618        __save_error_info(sb, function, line);
 619
 620        va_start(args, fmt);
 621
 622        vaf.fmt = fmt;
 623        vaf.va = &args;
 624        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
 625               sb->s_id, function, line, grp);
 626        if (ino)
 627                printk(KERN_CONT "inode %lu: ", ino);
 628        if (block)
 629                printk(KERN_CONT "block %llu:", (unsigned long long) block);
 630        printk(KERN_CONT "%pV\n", &vaf);
 631        va_end(args);
 632
 633        if (test_opt(sb, ERRORS_CONT)) {
 634                ext4_commit_super(sb, 0);
 635                return;
 636        }
 637
 638        ext4_unlock_group(sb, grp);
 639        ext4_handle_error(sb);
 640        /*
 641         * We only get here in the ERRORS_RO case; relocking the group
 642         * may be dangerous, but nothing bad will happen since the
 643         * filesystem will have already been marked read/only and the
 644         * journal has been aborted.  We return 1 as a hint to callers
 645         * who might what to use the return value from
 646         * ext4_grp_locked_error() to distinguish between the
 647         * ERRORS_CONT and ERRORS_RO case, and perhaps return more
 648         * aggressively from the ext4 function in question, with a
 649         * more appropriate error code.
 650         */
 651        ext4_lock_group(sb, grp);
 652        return;
 653}
 654
 655void ext4_update_dynamic_rev(struct super_block *sb)
 656{
 657        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 658
 659        if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
 660                return;
 661
 662        ext4_warning(sb,
 663                     "updating to rev %d because of new feature flag, "
 664                     "running e2fsck is recommended",
 665                     EXT4_DYNAMIC_REV);
 666
 667        es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
 668        es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
 669        es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
 670        /* leave es->s_feature_*compat flags alone */
 671        /* es->s_uuid will be set by e2fsck if empty */
 672
 673        /*
 674         * The rest of the superblock fields should be zero, and if not it
 675         * means they are likely already in use, so leave them alone.  We
 676         * can leave it up to e2fsck to clean up any inconsistencies there.
 677         */
 678}
 679
 680/*
 681 * Open the external journal device
 682 */
 683static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
 684{
 685        struct block_device *bdev;
 686        char b[BDEVNAME_SIZE];
 687
 688        bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
 689        if (IS_ERR(bdev))
 690                goto fail;
 691        return bdev;
 692
 693fail:
 694        ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
 695                        __bdevname(dev, b), PTR_ERR(bdev));
 696        return NULL;
 697}
 698
 699/*
 700 * Release the journal device
 701 */
 702static int ext4_blkdev_put(struct block_device *bdev)
 703{
 704        return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 705}
 706
 707static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
 708{
 709        struct block_device *bdev;
 710        int ret = -ENODEV;
 711
 712        bdev = sbi->journal_bdev;
 713        if (bdev) {
 714                ret = ext4_blkdev_put(bdev);
 715                sbi->journal_bdev = NULL;
 716        }
 717        return ret;
 718}
 719
 720static inline struct inode *orphan_list_entry(struct list_head *l)
 721{
 722        return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
 723}
 724
 725static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
 726{
 727        struct list_head *l;
 728
 729        ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
 730                 le32_to_cpu(sbi->s_es->s_last_orphan));
 731
 732        printk(KERN_ERR "sb_info orphan list:\n");
 733        list_for_each(l, &sbi->s_orphan) {
 734                struct inode *inode = orphan_list_entry(l);
 735                printk(KERN_ERR "  "
 736                       "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
 737                       inode->i_sb->s_id, inode->i_ino, inode,
 738                       inode->i_mode, inode->i_nlink,
 739                       NEXT_ORPHAN(inode));
 740        }
 741}
 742
 743static void ext4_put_super(struct super_block *sb)
 744{
 745        struct ext4_sb_info *sbi = EXT4_SB(sb);
 746        struct ext4_super_block *es = sbi->s_es;
 747        int i, err;
 748
 749        ext4_unregister_li_request(sb);
 750        dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
 751
 752        flush_workqueue(sbi->dio_unwritten_wq);
 753        destroy_workqueue(sbi->dio_unwritten_wq);
 754
 755        if (sbi->s_journal) {
 756                err = jbd2_journal_destroy(sbi->s_journal);
 757                sbi->s_journal = NULL;
 758                if (err < 0)
 759                        ext4_abort(sb, "Couldn't clean up the journal");
 760        }
 761
 762        ext4_es_unregister_shrinker(sb);
 763        del_timer(&sbi->s_err_report);
 764        ext4_release_system_zone(sb);
 765        ext4_mb_release(sb);
 766        ext4_ext_release(sb);
 767        ext4_xattr_put_super(sb);
 768
 769        if (!(sb->s_flags & MS_RDONLY)) {
 770                EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 771                es->s_state = cpu_to_le16(sbi->s_mount_state);
 772        }
 773        if (!(sb->s_flags & MS_RDONLY))
 774                ext4_commit_super(sb, 1);
 775
 776        if (sbi->s_proc) {
 777                remove_proc_entry("options", sbi->s_proc);
 778                remove_proc_entry(sb->s_id, ext4_proc_root);
 779        }
 780        kobject_del(&sbi->s_kobj);
 781
 782        for (i = 0; i < sbi->s_gdb_count; i++)
 783                brelse(sbi->s_group_desc[i]);
 784        ext4_kvfree(sbi->s_group_desc);
 785        ext4_kvfree(sbi->s_flex_groups);
 786        percpu_counter_destroy(&sbi->s_freeclusters_counter);
 787        percpu_counter_destroy(&sbi->s_freeinodes_counter);
 788        percpu_counter_destroy(&sbi->s_dirs_counter);
 789        percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
 790        percpu_counter_destroy(&sbi->s_extent_cache_cnt);
 791        brelse(sbi->s_sbh);
 792#ifdef CONFIG_QUOTA
 793        for (i = 0; i < MAXQUOTAS; i++)
 794                kfree(sbi->s_qf_names[i]);
 795#endif
 796
 797        /* Debugging code just in case the in-memory inode orphan list
 798         * isn't empty.  The on-disk one can be non-empty if we've
 799         * detected an error and taken the fs readonly, but the
 800         * in-memory list had better be clean by this point. */
 801        if (!list_empty(&sbi->s_orphan))
 802                dump_orphan_list(sb, sbi);
 803        J_ASSERT(list_empty(&sbi->s_orphan));
 804
 805        invalidate_bdev(sb->s_bdev);
 806        if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
 807                /*
 808                 * Invalidate the journal device's buffers.  We don't want them
 809                 * floating about in memory - the physical journal device may
 810                 * hotswapped, and it breaks the `ro-after' testing code.
 811                 */
 812                sync_blockdev(sbi->journal_bdev);
 813                invalidate_bdev(sbi->journal_bdev);
 814                ext4_blkdev_remove(sbi);
 815        }
 816        if (sbi->s_mmp_tsk)
 817                kthread_stop(sbi->s_mmp_tsk);
 818        sb->s_fs_info = NULL;
 819        /*
 820         * Now that we are completely done shutting down the
 821         * superblock, we need to actually destroy the kobject.
 822         */
 823        kobject_put(&sbi->s_kobj);
 824        wait_for_completion(&sbi->s_kobj_unregister);
 825        if (sbi->s_chksum_driver)
 826                crypto_free_shash(sbi->s_chksum_driver);
 827        kfree(sbi->s_blockgroup_lock);
 828        kfree(sbi);
 829}
 830
 831static struct kmem_cache *ext4_inode_cachep;
 832
 833/*
 834 * Called inside transaction, so use GFP_NOFS
 835 */
 836static struct inode *ext4_alloc_inode(struct super_block *sb)
 837{
 838        struct ext4_inode_info *ei;
 839
 840        ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
 841        if (!ei)
 842                return NULL;
 843
 844        ei->vfs_inode.i_version = 1;
 845        INIT_LIST_HEAD(&ei->i_prealloc_list);
 846        spin_lock_init(&ei->i_prealloc_lock);
 847        ext4_es_init_tree(&ei->i_es_tree);
 848        rwlock_init(&ei->i_es_lock);
 849        INIT_LIST_HEAD(&ei->i_es_lru);
 850        ei->i_es_lru_nr = 0;
 851        ei->i_reserved_data_blocks = 0;
 852        ei->i_reserved_meta_blocks = 0;
 853        ei->i_allocated_meta_blocks = 0;
 854        ei->i_da_metadata_calc_len = 0;
 855        ei->i_da_metadata_calc_last_lblock = 0;
 856        spin_lock_init(&(ei->i_block_reservation_lock));
 857#ifdef CONFIG_QUOTA
 858        ei->i_reserved_quota = 0;
 859#endif
 860        ei->jinode = NULL;
 861        INIT_LIST_HEAD(&ei->i_completed_io_list);
 862        spin_lock_init(&ei->i_completed_io_lock);
 863        ei->i_sync_tid = 0;
 864        ei->i_datasync_tid = 0;
 865        atomic_set(&ei->i_ioend_count, 0);
 866        atomic_set(&ei->i_unwritten, 0);
 867        INIT_WORK(&ei->i_unwritten_work, ext4_end_io_work);
 868
 869        return &ei->vfs_inode;
 870}
 871
 872static int ext4_drop_inode(struct inode *inode)
 873{
 874        int drop = generic_drop_inode(inode);
 875
 876        trace_ext4_drop_inode(inode, drop);
 877        return drop;
 878}
 879
 880static void ext4_i_callback(struct rcu_head *head)
 881{
 882        struct inode *inode = container_of(head, struct inode, i_rcu);
 883        kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
 884}
 885
 886static void ext4_destroy_inode(struct inode *inode)
 887{
 888        if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
 889                ext4_msg(inode->i_sb, KERN_ERR,
 890                         "Inode %lu (%p): orphan list check failed!",
 891                         inode->i_ino, EXT4_I(inode));
 892                print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
 893                                EXT4_I(inode), sizeof(struct ext4_inode_info),
 894                                true);
 895                dump_stack();
 896        }
 897        call_rcu(&inode->i_rcu, ext4_i_callback);
 898}
 899
 900static void init_once(void *foo)
 901{
 902        struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
 903
 904        INIT_LIST_HEAD(&ei->i_orphan);
 905        init_rwsem(&ei->xattr_sem);
 906        init_rwsem(&ei->i_data_sem);
 907        inode_init_once(&ei->vfs_inode);
 908}
 909
 910static int init_inodecache(void)
 911{
 912        ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
 913                                             sizeof(struct ext4_inode_info),
 914                                             0, (SLAB_RECLAIM_ACCOUNT|
 915                                                SLAB_MEM_SPREAD),
 916                                             init_once);
 917        if (ext4_inode_cachep == NULL)
 918                return -ENOMEM;
 919        return 0;
 920}
 921
 922static void destroy_inodecache(void)
 923{
 924        /*
 925         * Make sure all delayed rcu free inodes are flushed before we
 926         * destroy cache.
 927         */
 928        rcu_barrier();
 929        kmem_cache_destroy(ext4_inode_cachep);
 930}
 931
 932void ext4_clear_inode(struct inode *inode)
 933{
 934        invalidate_inode_buffers(inode);
 935        clear_inode(inode);
 936        dquot_drop(inode);
 937        ext4_discard_preallocations(inode);
 938        ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
 939        ext4_es_lru_del(inode);
 940        if (EXT4_I(inode)->jinode) {
 941                jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
 942                                               EXT4_I(inode)->jinode);
 943                jbd2_free_inode(EXT4_I(inode)->jinode);
 944                EXT4_I(inode)->jinode = NULL;
 945        }
 946}
 947
 948static struct inode *ext4_nfs_get_inode(struct super_block *sb,
 949                                        u64 ino, u32 generation)
 950{
 951        struct inode *inode;
 952
 953        if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
 954                return ERR_PTR(-ESTALE);
 955        if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
 956                return ERR_PTR(-ESTALE);
 957
 958        /* iget isn't really right if the inode is currently unallocated!!
 959         *
 960         * ext4_read_inode will return a bad_inode if the inode had been
 961         * deleted, so we should be safe.
 962         *
 963         * Currently we don't know the generation for parent directory, so
 964         * a generation of 0 means "accept any"
 965         */
 966        inode = ext4_iget(sb, ino);
 967        if (IS_ERR(inode))
 968                return ERR_CAST(inode);
 969        if (generation && inode->i_generation != generation) {
 970                iput(inode);
 971                return ERR_PTR(-ESTALE);
 972        }
 973
 974        return inode;
 975}
 976
 977static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
 978                                        int fh_len, int fh_type)
 979{
 980        return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
 981                                    ext4_nfs_get_inode);
 982}
 983
 984static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
 985                                        int fh_len, int fh_type)
 986{
 987        return generic_fh_to_parent(sb, fid, fh_len, fh_type,
 988                                    ext4_nfs_get_inode);
 989}
 990
 991/*
 992 * Try to release metadata pages (indirect blocks, directories) which are
 993 * mapped via the block device.  Since these pages could have journal heads
 994 * which would prevent try_to_free_buffers() from freeing them, we must use
 995 * jbd2 layer's try_to_free_buffers() function to release them.
 996 */
 997static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
 998                                 gfp_t wait)
 999{
1000        journal_t *journal = EXT4_SB(sb)->s_journal;

1001
1002        WARN_ON(PageChecked(page));
1003        if (!page_has_buffers(page))
1004                return 0;
1005        if (journal)
1006                return jbd2_journal_try_to_free_buffers(journal, page,
1007                                                        wait & ~__GFP_WAIT);
1008        return try_to_free_buffers(page);
1009}
1010
1011#ifdef CONFIG_QUOTA
1012#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
1013#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
1014
1015static int ext4_write_dquot(struct dquot *dquot);
1016static int ext4_acquire_dquot(struct dquot *dquot);
1017static int ext4_release_dquot(struct dquot *dquot);
1018static int ext4_mark_dquot_dirty(struct dquot *dquot);
1019static int ext4_write_info(struct super_block *sb, int type);
1020static int ext4_quota_on(struct super_block *sb, int type, int format_id,
1021                         struct path *path);
1022static int ext4_quota_on_sysfile(struct super_block *sb, int type,
1023                                 int format_id);
1024static int ext4_quota_off(struct super_block *sb, int type);
1025static int ext4_quota_off_sysfile(struct super_block *sb, int type);
1026static int ext4_quota_on_mount(struct super_block *sb, int type);
1027static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
1028                               size_t len, loff_t off);
1029static ssize_t ext4_quota_write(struct super_block *sb, int type,
1030                                const char *data, size_t len, loff_t off);
1031static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
1032                             unsigned int flags);
1033static int ext4_enable_quotas(struct super_block *sb);
1034
1035static const struct dquot_operations ext4_quota_operations = {
1036        .get_reserved_space = ext4_get_reserved_space,
1037        .write_dquot    = ext4_write_dquot,
1038        .acquire_dquot  = ext4_acquire_dquot,
1039        .release_dquot  = ext4_release_dquot,
1040        .mark_dirty     = ext4_mark_dquot_dirty,
1041        .write_info     = ext4_write_info,
1042        .alloc_dquot    = dquot_alloc,
1043        .destroy_dquot  = dquot_destroy,
1044};
1045
1046static const struct quotactl_ops ext4_qctl_operations = {
1047        .quota_on       = ext4_quota_on,
1048        .quota_off      = ext4_quota_off,
1049        .quota_sync     = dquot_quota_sync,
1050        .get_info       = dquot_get_dqinfo,
1051        .set_info       = dquot_set_dqinfo,
1052        .get_dqblk      = dquot_get_dqblk,
1053        .set_dqblk      = dquot_set_dqblk
1054};
1055
1056static const struct quotactl_ops ext4_qctl_sysfile_operations = {
1057        .quota_on_meta  = ext4_quota_on_sysfile,
1058        .quota_off      = ext4_quota_off_sysfile,
1059        .quota_sync     = dquot_quota_sync,
1060        .get_info       = dquot_get_dqinfo,
1061        .set_info       = dquot_set_dqinfo,
1062        .get_dqblk      = dquot_get_dqblk,
1063        .set_dqblk      = dquot_set_dqblk
1064};
1065#endif
1066
1067static const struct super_operations ext4_sops = {
1068        .alloc_inode    = ext4_alloc_inode,
1069        .destroy_inode  = ext4_destroy_inode,
1070        .write_inode    = ext4_write_inode,
1071        .dirty_inode    = ext4_dirty_inode,
1072        .drop_inode     = ext4_drop_inode,
1073        .evict_inode    = ext4_evict_inode,
1074        .put_super      = ext4_put_super,
1075        .sync_fs        = ext4_sync_fs,
1076        .freeze_fs      = ext4_freeze,
1077        .unfreeze_fs    = ext4_unfreeze,
1078        .statfs         = ext4_statfs,
1079        .remount_fs     = ext4_remount,
1080        .show_options   = ext4_show_options,
1081#ifdef CONFIG_QUOTA
1082        .quota_read     = ext4_quota_read,
1083        .quota_write    = ext4_quota_write,
1084#endif
1085        .bdev_try_to_free_page = bdev_try_to_free_page,
1086};
1087
1088static const struct super_operations ext4_nojournal_sops = {
1089        .alloc_inode    = ext4_alloc_inode,
1090        .destroy_inode  = ext4_destroy_inode,
1091        .write_inode    = ext4_write_inode,
1092        .dirty_inode    = ext4_dirty_inode,
1093        .drop_inode     = ext4_drop_inode,
1094        .evict_inode    = ext4_evict_inode,
1095        .put_super      = ext4_put_super,
1096        .statfs         = ext4_statfs,
1097        .remount_fs     = ext4_remount,
1098        .show_options   = ext4_show_options,
1099#ifdef CONFIG_QUOTA
1100        .quota_read     = ext4_quota_read,
1101        .quota_write    = ext4_quota_write,
1102#endif
1103        .bdev_try_to_free_page = bdev_try_to_free_page,
1104};
1105
1106static const struct export_operations ext4_export_ops = {
1107        .fh_to_dentry = ext4_fh_to_dentry,
1108        .fh_to_parent = ext4_fh_to_parent,
1109        .get_parent = ext4_get_parent,
1110};
1111
1112enum {
1113        Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1114        Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
1115        Opt_nouid32, Opt_debug, Opt_removed,
1116        Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
1117        Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
1118        Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
1119        Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit,
1120        Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1121        Opt_data_err_abort, Opt_data_err_ignore,
1122        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1123        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
1124        Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
1125        Opt_usrquota, Opt_grpquota, Opt_i_version,
1126        Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
1127        Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1128        Opt_inode_readahead_blks, Opt_journal_ioprio,
1129        Opt_dioread_nolock, Opt_dioread_lock,
1130        Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1131        Opt_max_dir_size_kb,
1132};
1133
1134static const match_table_t tokens = {
1135        {Opt_bsd_df, "bsddf"},
1136        {Opt_minix_df, "minixdf"},
1137        {Opt_grpid, "grpid"},
1138        {Opt_grpid, "bsdgroups"},
1139        {Opt_nogrpid, "nogrpid"},
1140        {Opt_nogrpid, "sysvgroups"},
1141        {Opt_resgid, "resgid=%u"},
1142        {Opt_resuid, "resuid=%u"},
1143        {Opt_sb, "sb=%u"},
1144        {Opt_err_cont, "errors=continue"},
1145        {Opt_err_panic, "errors=panic"},
1146        {Opt_err_ro, "errors=remount-ro"},
1147        {Opt_nouid32, "nouid32"},
1148        {Opt_debug, "debug"},
1149        {Opt_removed, "oldalloc"},
1150        {Opt_removed, "orlov"},
1151        {Opt_user_xattr, "user_xattr"},
1152        {Opt_nouser_xattr, "nouser_xattr"},
1153        {Opt_acl, "acl"},
1154        {Opt_noacl, "noacl"},
1155        {Opt_noload, "norecovery"},
1156        {Opt_noload, "noload"},
1157        {Opt_removed, "nobh"},
1158        {Opt_removed, "bh"},
1159        {Opt_commit, "commit=%u"},
1160        {Opt_min_batch_time, "min_batch_time=%u"},
1161        {Opt_max_batch_time, "max_batch_time=%u"},
1162        {Opt_journal_dev, "journal_dev=%u"},
1163        {Opt_journal_checksum, "journal_checksum"},
1164        {Opt_journal_async_commit, "journal_async_commit"},
1165        {Opt_abort, "abort"},
1166        {Opt_data_journal, "data=journal"},
1167        {Opt_data_ordered, "data=ordered"},
1168        {Opt_data_writeback, "data=writeback"},
1169        {Opt_data_err_abort, "data_err=abort"},
1170        {Opt_data_err_ignore, "data_err=ignore"},
1171        {Opt_offusrjquota, "usrjquota="},
1172        {Opt_usrjquota, "usrjquota=%s"},
1173        {Opt_offgrpjquota, "grpjquota="},
1174        {Opt_grpjquota, "grpjquota=%s"},
1175        {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1176        {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1177        {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
1178        {Opt_grpquota, "grpquota"},
1179        {Opt_noquota, "noquota"},
1180        {Opt_quota, "quota"},
1181        {Opt_usrquota, "usrquota"},
1182        {Opt_barrier, "barrier=%u"},
1183        {Opt_barrier, "barrier"},
1184        {Opt_nobarrier, "nobarrier"},
1185        {Opt_i_version, "i_version"},
1186        {Opt_stripe, "stripe=%u"},
1187        {Opt_delalloc, "delalloc"},
1188        {Opt_nodelalloc, "nodelalloc"},
1189        {Opt_removed, "mblk_io_submit"},
1190        {Opt_removed, "nomblk_io_submit"},
1191        {Opt_block_validity, "block_validity"},
1192        {Opt_noblock_validity, "noblock_validity"},
1193        {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1194        {Opt_journal_ioprio, "journal_ioprio=%u"},
1195        {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1196        {Opt_auto_da_alloc, "auto_da_alloc"},
1197        {Opt_noauto_da_alloc, "noauto_da_alloc"},
1198        {Opt_dioread_nolock, "dioread_nolock"},
1199        {Opt_dioread_lock, "dioread_lock"},
1200        {Opt_discard, "discard"},
1201        {Opt_nodiscard, "nodiscard"},
1202        {Opt_init_itable, "init_itable=%u"},
1203        {Opt_init_itable, "init_itable"},
1204        {Opt_noinit_itable, "noinit_itable"},
1205        {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
1206        {Opt_removed, "check=none"},    /* mount option from ext2/3 */
1207        {Opt_removed, "nocheck"},       /* mount option from ext2/3 */
1208        {Opt_removed, "reservation"},   /* mount option from ext2/3 */
1209        {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
1210        {Opt_removed, "journal=%u"},    /* mount option from ext2/3 */
1211        {Opt_err, NULL},
1212};
1213
1214static ext4_fsblk_t get_sb_block(void **data)
1215{
1216        ext4_fsblk_t    sb_block;
1217        char            *options = (char *) *data;
1218
1219        if (!options || strncmp(options, "sb=", 3) != 0)
1220                return 1;       /* Default location */
1221
1222        options += 3;
1223        /* TODO: use simple_strtoll with >32bit ext4 */
1224        sb_block = simple_strtoul(options, &options, 0);
1225        if (*options && *options != ',') {
1226                printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
1227                       (char *) *data);
1228                return 1;
1229        }
1230        if (*options == ',')
1231                options++;
1232        *data = (void *) options;
1233
1234        return sb_block;
1235}
1236
1237#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1238static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n"
1239        "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
1240
1241#ifdef CONFIG_QUOTA
1242static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
1243{
1244        struct ext4_sb_info *sbi = EXT4_SB(sb);
1245        char *qname;
1246        int ret = -1;
1247
1248        if (sb_any_quota_loaded(sb) &&
1249                !sbi->s_qf_names[qtype]) {
1250                ext4_msg(sb, KERN_ERR,
1251                        "Cannot change journaled "
1252                        "quota options when quota turned on");
1253                return -1;
1254        }
1255        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
1256                ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options "
1257                         "when QUOTA feature is enabled");
1258                return -1;
1259        }
1260        qname = match_strdup(args);
1261        if (!qname) {
1262                ext4_msg(sb, KERN_ERR,
1263                        "Not enough memory for storing quotafile name");
1264                return -1;
1265        }
1266        if (sbi->s_qf_names[qtype]) {
1267                if (strcmp(sbi->s_qf_names[qtype], qname) == 0)
1268                        ret = 1;
1269                else
1270                        ext4_msg(sb, KERN_ERR,
1271                                 "%s quota file already specified",
1272                                 QTYPE2NAME(qtype));
1273                goto errout;
1274        }
1275        if (strchr(qname, '/')) {
1276                ext4_msg(sb, KERN_ERR,
1277                        "quotafile must be on filesystem root");
1278                goto errout;
1279        }
1280        sbi->s_qf_names[qtype] = qname;
1281        set_opt(sb, QUOTA);
1282        return 1;
1283errout:
1284        kfree(qname);
1285        return ret;
1286}
1287
1288static int clear_qf_name(struct super_block *sb, int qtype)
1289{
1290
1291        struct ext4_sb_info *sbi = EXT4_SB(sb);
1292
1293        if (sb_any_quota_loaded(sb) &&
1294                sbi->s_qf_names[qtype]) {
1295                ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
1296                        " when quota turned on");
1297                return -1;
1298        }
1299        kfree(sbi->s_qf_names[qtype]);
1300        sbi->s_qf_names[qtype] = NULL;
1301        return 1;
1302}
1303#endif
1304
1305#define MOPT_SET        0x0001
1306#define MOPT_CLEAR      0x0002
1307#define MOPT_NOSUPPORT  0x0004
1308#define MOPT_EXPLICIT   0x0008
1309#define MOPT_CLEAR_ERR  0x0010
1310#define MOPT_GTE0       0x0020
1311#ifdef CONFIG_QUOTA
1312#define MOPT_Q          0
1313#define MOPT_QFMT       0x0040
1314#else
1315#define MOPT_Q          MOPT_NOSUPPORT
1316#define MOPT_QFMT       MOPT_NOSUPPORT
1317#endif
1318#define MOPT_DATAJ      0x0080
1319#define MOPT_NO_EXT2    0x0100
1320#define MOPT_NO_EXT3    0x0200
1321#define MOPT_EXT4_ONLY  (MOPT_NO_EXT2 | MOPT_NO_EXT3)
1322
1323static const struct mount_opts {
1324        int     token;
1325        int     mount_opt;
1326        int     flags;
1327} ext4_mount_opts[] = {
1328        {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
1329        {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
1330        {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
1331        {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
1332        {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
1333        {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
1334        {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
1335         MOPT_EXT4_ONLY | MOPT_SET},
1336        {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
1337         MOPT_EXT4_ONLY | MOPT_CLEAR},
1338        {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
1339        {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
1340        {Opt_delalloc, EXT4_MOUNT_DELALLOC,
1341         MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1342        {Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
1343         MOPT_EXT4_ONLY | MOPT_CLEAR | MOPT_EXPLICIT},
1344        {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1345         MOPT_EXT4_ONLY | MOPT_SET},
1346        {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
1347                                    EXT4_MOUNT_JOURNAL_CHECKSUM),
1348         MOPT_EXT4_ONLY | MOPT_SET},
1349        {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
1350        {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR},
1351        {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
1352        {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
1353        {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
1354         MOPT_NO_EXT2 | MOPT_SET},
1355        {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
1356         MOPT_NO_EXT2 | MOPT_CLEAR},
1357        {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
1358        {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
1359        {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
1360        {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
1361        {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
1362        {Opt_commit, 0, MOPT_GTE0},
1363        {Opt_max_batch_time, 0, MOPT_GTE0},
1364        {Opt_min_batch_time, 0, MOPT_GTE0},
1365        {Opt_inode_readahead_blks, 0, MOPT_GTE0},
1366        {Opt_init_itable, 0, MOPT_GTE0},
1367        {Opt_stripe, 0, MOPT_GTE0},
1368        {Opt_resuid, 0, MOPT_GTE0},
1369        {Opt_resgid, 0, MOPT_GTE0},
1370        {Opt_journal_dev, 0, MOPT_GTE0},
1371        {Opt_journal_ioprio, 0, MOPT_GTE0},
1372        {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1373        {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1374        {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
1375         MOPT_NO_EXT2 | MOPT_DATAJ},
1376        {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1377        {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
1378#ifdef CONFIG_EXT4_FS_POSIX_ACL
1379        {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
1380        {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
1381#else
1382        {Opt_acl, 0, MOPT_NOSUPPORT},
1383        {Opt_noacl, 0, MOPT_NOSUPPORT},
1384#endif
1385        {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
1386        {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
1387        {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
1388        {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
1389                                                        MOPT_SET | MOPT_Q},
1390        {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
1391                                                        MOPT_SET | MOPT_Q},
1392        {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
1393                       EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q},
1394        {Opt_usrjquota, 0, MOPT_Q},
1395        {Opt_grpjquota, 0, MOPT_Q},
1396        {Opt_offusrjquota, 0, MOPT_Q},
1397        {Opt_offgrpjquota, 0, MOPT_Q},
1398        {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
1399        {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
1400        {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
1401        {Opt_max_dir_size_kb, 0, MOPT_GTE0},
1402        {Opt_err, 0, 0}
1403};
1404
1405static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1406                            substring_t *args, unsigned long *journal_devnum,
1407                            unsigned int *journal_ioprio, int is_remount)
1408{
1409        struct ext4_sb_info *sbi = EXT4_SB(sb);
1410        const struct mount_opts *m;
1411        kuid_t uid;
1412        kgid_t gid;
1413        int arg = 0;
1414
1415#ifdef CONFIG_QUOTA
1416        if (token == Opt_usrjquota)
1417                return set_qf_name(sb, USRQUOTA, &args[0]);
1418        else if (token == Opt_grpjquota)
1419                return set_qf_name(sb, GRPQUOTA, &args[0]);
1420        else if (token == Opt_offusrjquota)
1421                return clear_qf_name(sb, USRQUOTA);
1422        else if (token == Opt_offgrpjquota)
1423                return clear_qf_name(sb, GRPQUOTA);
1424#endif
1425        switch (token) {
1426        case Opt_noacl:
1427        case Opt_nouser_xattr:
1428                ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5");
1429                break;
1430        case Opt_sb:
1431                return 1;       /* handled by get_sb_block() */
1432        case Opt_removed:
1433                ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
1434                return 1;
1435        case Opt_abort:
1436                sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
1437                return 1;
1438        case Opt_i_version:
1439                sb->s_flags |= MS_I_VERSION;
1440                return 1;
1441        }
1442
1443        for (m = ext4_mount_opts; m->token != Opt_err; m++)
1444                if (token == m->token)
1445                        break;
1446
1447        if (m->token == Opt_err) {
1448                ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
1449                         "or missing value", opt);
1450                return -1;
1451        }
1452
1453        if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
1454                ext4_msg(sb, KERN_ERR,
1455                         "Mount option \"%s\" incompatible with ext2", opt);
1456                return -1;
1457        }
1458        if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
1459                ext4_msg(sb, KERN_ERR,
1460                         "Mount option \"%s\" incompatible with ext3", opt);
1461                return -1;
1462        }
1463
1464        if (args->from && match_int(args, &arg))
1465                return -1;
1466        if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
1467                return -1;
1468        if (m->flags & MOPT_EXPLICIT)
1469                set_opt2(sb, EXPLICIT_DELALLOC);
1470        if (m->flags & MOPT_CLEAR_ERR)
1471                clear_opt(sb, ERRORS_MASK);
1472        if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
1473                ext4_msg(sb, KERN_ERR, "Cannot change quota "
1474                         "options when quota turned on");
1475                return -1;
1476        }
1477
1478        if (m->flags & MOPT_NOSUPPORT) {
1479                ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
1480        } else if (token == Opt_commit) {
1481                if (arg == 0)
1482                        arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
1483                sbi->s_commit_interval = HZ * arg;
1484        } else if (token == Opt_max_batch_time) {
1485                if (arg == 0)
1486                        arg = EXT4_DEF_MAX_BATCH_TIME;
1487                sbi->s_max_batch_time = arg;
1488        } else if (token == Opt_min_batch_time) {
1489                sbi->s_min_batch_time = arg;
1490        } else if (token == Opt_inode_readahead_blks) {
1491                if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) {
1492                        ext4_msg(sb, KERN_ERR,
1493                                 "EXT4-fs: inode_readahead_blks must be "
1494                                 "0 or a power of 2 smaller than 2^31");
1495                        return -1;
1496                }
1497                sbi->s_inode_readahead_blks = arg;
1498        } else if (token == Opt_init_itable) {
1499                set_opt(sb, INIT_INODE_TABLE);
1500                if (!args->from)
1501                        arg = EXT4_DEF_LI_WAIT_MULT;
1502                sbi->s_li_wait_mult = arg;
1503        } else if (token == Opt_max_dir_size_kb) {
1504                sbi->s_max_dir_size_kb = arg;
1505        } else if (token == Opt_stripe) {
1506                sbi->s_stripe = arg;
1507        } else if (token == Opt_resuid) {
1508                uid = make_kuid(current_user_ns(), arg);
1509                if (!uid_valid(uid)) {
1510                        ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg);
1511                        return -1;
1512                }
1513                sbi->s_resuid = uid;
1514        } else if (token == Opt_resgid) {
1515                gid = make_kgid(current_user_ns(), arg);
1516                if (!gid_valid(gid)) {
1517                        ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg);
1518                        return -1;
1519                }
1520                sbi->s_resgid = gid;
1521        } else if (token == Opt_journal_dev) {
1522                if (is_remount) {
1523                        ext4_msg(sb, KERN_ERR,
1524                                 "Cannot specify journal on remount");
1525                        return -1;
1526                }
1527                *journal_devnum = arg;
1528        } else if (token == Opt_journal_ioprio) {
1529                if (arg > 7) {
1530                        ext4_msg(sb, KERN_ERR, "Invalid journal IO priority"
1531                                 " (must be 0-7)");
1532                        return -1;
1533                }
1534                *journal_ioprio =
1535                        IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
1536        } else if (m->flags & MOPT_DATAJ) {
1537                if (is_remount) {
1538                        if (!sbi->s_journal)
1539                                ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
1540                        else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) {
1541                                ext4_msg(sb, KERN_ERR,
1542                                         "Cannot change data mode on remount");
1543                                return -1;
1544                        }
1545                } else {
1546                        clear_opt(sb, DATA_FLAGS);
1547                        sbi->s_mount_opt |= m->mount_opt;
1548                }
1549#ifdef CONFIG_QUOTA
1550        } else if (m->flags & MOPT_QFMT) {
1551                if (sb_any_quota_loaded(sb) &&
1552                    sbi->s_jquota_fmt != m->mount_opt) {
1553                        ext4_msg(sb, KERN_ERR, "Cannot change journaled "
1554                                 "quota options when quota turned on");
1555                        return -1;
1556                }
1557                if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
1558                                               EXT4_FEATURE_RO_COMPAT_QUOTA)) {
1559                        ext4_msg(sb, KERN_ERR,
1560                                 "Cannot set journaled quota options "
1561                                 "when QUOTA feature is enabled");
1562                        return -1;
1563                }
1564                sbi->s_jquota_fmt = m->mount_opt;
1565#endif
1566        } else {
1567                if (!args->from)
1568                        arg = 1;
1569                if (m->flags & MOPT_CLEAR)
1570                        arg = !arg;
1571                else if (unlikely(!(m->flags & MOPT_SET))) {
1572                        ext4_msg(sb, KERN_WARNING,
1573                                 "buggy handling of option %s", opt);
1574                        WARN_ON(1);
1575                        return -1;
1576                }
1577                if (arg != 0)
1578                        sbi->s_mount_opt |= m->mount_opt;
1579                else
1580                        sbi->s_mount_opt &= ~m->mount_opt;
1581        }
1582        return 1;
1583}
1584
1585static int parse_options(char *options, struct super_block *sb,
1586                         unsigned long *journal_devnum,
1587                         unsigned int *journal_ioprio,
1588                         int is_remount)
1589{
1590        struct ext4_sb_info *sbi = EXT4_SB(sb);
1591        char *p;
1592        substring_t args[MAX_OPT_ARGS];
1593        int token;
1594
1595        if (!options)
1596                return 1;
1597
1598        while ((p = strsep(&options, ",")) != NULL) {
1599                if (!*p)
1600                        continue;
1601                /*
1602                 * Initialize args struct so we know whether arg was
1603                 * found; some options take optional arguments.
1604                 */
1605                args[0].to = args[0].from = NULL;
1606                token = match_token(p, tokens, args);
1607                if (handle_mount_opt(sb, p, token, args, journal_devnum,
1608                                     journal_ioprio, is_remount) < 0)
1609                        return 0;
1610        }
1611#ifdef CONFIG_QUOTA
1612        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
1613            (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) {
1614                ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA "
1615                         "feature is enabled");
1616                return 0;
1617        }
1618        if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1619                if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
1620                        clear_opt(sb, USRQUOTA);
1621
1622                if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
1623                        clear_opt(sb, GRPQUOTA);
1624
1625                if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
1626                        ext4_msg(sb, KERN_ERR, "old and new quota "
1627                                        "format mixing");
1628                        return 0;
1629                }
1630
1631                if (!sbi->s_jquota_fmt) {
1632                        ext4_msg(sb, KERN_ERR, "journaled quota format "
1633                                        "not specified");
1634                        return 0;
1635                }
1636        } else {
1637                if (sbi->s_jquota_fmt) {
1638                        ext4_msg(sb, KERN_ERR, "journaled quota format "
1639                                        "specified with no journaling "
1640                                        "enabled");
1641                        return 0;
1642                }
1643        }
1644#endif
1645        if (test_opt(sb, DIOREAD_NOLOCK)) {
1646                int blocksize =
1647                        BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
1648
1649                if (blocksize < PAGE_CACHE_SIZE) {
1650                        ext4_msg(sb, KERN_ERR, "can't mount with "
1651                                 "dioread_nolock if block size != PAGE_SIZE");
1652                        return 0;
1653                }
1654        }
1655        return 1;
1656}
1657
1658static inline void ext4_show_quota_options(struct seq_file *seq,
1659                                           struct super_block *sb)
1660{
1661#if defined(CONFIG_QUOTA)
1662        struct ext4_sb_info *sbi = EXT4_SB(sb);
1663
1664        if (sbi->s_jquota_fmt) {
1665                char *fmtname = "";
1666
1667                switch (sbi->s_jquota_fmt) {
1668                case QFMT_VFS_OLD:
1669                        fmtname = "vfsold";
1670                        break;
1671                case QFMT_VFS_V0:
1672                        fmtname = "vfsv0";
1673                        break;
1674                case QFMT_VFS_V1:
1675                        fmtname = "vfsv1";
1676                        break;
1677                }
1678                seq_printf(seq, ",jqfmt=%s", fmtname);
1679        }
1680
1681        if (sbi->s_qf_names[USRQUOTA])
1682                seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
1683
1684        if (sbi->s_qf_names[GRPQUOTA])
1685                seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
1686
1687        if (test_opt(sb, USRQUOTA))
1688                seq_puts(seq, ",usrquota");
1689
1690        if (test_opt(sb, GRPQUOTA))
1691                seq_puts(seq, ",grpquota");
1692#endif
1693}
1694
1695static const char *token2str(int token)
1696{
1697        const struct match_token *t;
1698
1699        for (t = tokens; t->token != Opt_err; t++)
1700                if (t->token == token && !strchr(t->pattern, '='))
1701                        break;
1702        return t->pattern;
1703}
1704
1705/*
1706 * Show an option if
1707 *  - it's set to a non-default value OR
1708 *  - if the per-sb default is different from the global default
1709 */
1710static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
1711                              int nodefs)
1712{
1713        struct ext4_sb_info *sbi = EXT4_SB(sb);
1714        struct ext4_super_block *es = sbi->s_es;
1715        int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt;
1716        const struct mount_opts *m;
1717        char sep = nodefs ? '\n' : ',';
1718
1719#define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
1720#define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
1721
1722        if (sbi->s_sb_block != 1)
1723                SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
1724
1725        for (m = ext4_mount_opts; m->token != Opt_err; m++) {
1726                int want_set = m->flags & MOPT_SET;
1727                if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
1728                    (m->flags & MOPT_CLEAR_ERR))
1729                        continue;
1730                if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
1731                        continue; /* skip if same as the default */
1732                if ((want_set &&
1733                     (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
1734                    (!want_set && (sbi->s_mount_opt & m->mount_opt)))
1735                        continue; /* select Opt_noFoo vs Opt_Foo */
1736                SEQ_OPTS_PRINT("%s", token2str(m->token));
1737        }
1738
1739        if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
1740            le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
1741                SEQ_OPTS_PRINT("resuid=%u",
1742                                from_kuid_munged(&init_user_ns, sbi->s_resuid));
1743        if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
1744            le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
1745                SEQ_OPTS_PRINT("resgid=%u",
1746                                from_kgid_munged(&init_user_ns, sbi->s_resgid));
1747        def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
1748        if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
1749                SEQ_OPTS_PUTS("errors=remount-ro");
1750        if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
1751                SEQ_OPTS_PUTS("errors=continue");
1752        if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
1753                SEQ_OPTS_PUTS("errors=panic");
1754        if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
1755                SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
1756        if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
1757                SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
1758        if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
1759                SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
1760        if (sb->s_flags & MS_I_VERSION)
1761                SEQ_OPTS_PUTS("i_version");
1762        if (nodefs || sbi->s_stripe)
1763                SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
1764        if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) {
1765                if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
1766                        SEQ_OPTS_PUTS("data=journal");
1767                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
1768                        SEQ_OPTS_PUTS("data=ordered");
1769                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
1770                        SEQ_OPTS_PUTS("data=writeback");
1771        }
1772        if (nodefs ||
1773            sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
1774                SEQ_OPTS_PRINT("inode_readahead_blks=%u",
1775                               sbi->s_inode_readahead_blks);
1776
1777        if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
1778                       (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
1779                SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
1780        if (nodefs || sbi->s_max_dir_size_kb)
1781                SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
1782
1783        ext4_show_quota_options(seq, sb);
1784        return 0;
1785}
1786
1787static int ext4_show_options(struct seq_file *seq, struct dentry *root)
1788{
1789        return _ext4_show_options(seq, root->d_sb, 0);
1790}
1791
1792static int options_seq_show(struct seq_file *seq, void *offset)
1793{
1794        struct super_block *sb = seq->private;
1795        int rc;
1796
1797        seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw");
1798        rc = _ext4_show_options(seq, sb, 1);
1799        seq_puts(seq, "\n");
1800        return rc;
1801}
1802
1803static int options_open_fs(struct inode *inode, struct file *file)
1804{
1805        return single_open(file, options_seq_show, PDE(inode)->data);
1806}
1807
1808static const struct file_operations ext4_seq_options_fops = {
1809        .owner = THIS_MODULE,
1810        .open = options_open_fs,
1811        .read = seq_read,
1812        .llseek = seq_lseek,
1813        .release = single_release,
1814};
1815
1816static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1817                            int read_only)
1818{
1819        struct ext4_sb_info *sbi = EXT4_SB(sb);
1820        int res = 0;
1821
1822        if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
1823                ext4_msg(sb, KERN_ERR, "revision level too high, "
1824                         "forcing read-only mode");
1825                res = MS_RDONLY;
1826        }
1827        if (read_only)
1828                goto done;
1829        if (!(sbi->s_mount_state & EXT4_VALID_FS))
1830                ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
1831                         "running e2fsck is recommended");
1832        else if ((sbi->s_mount_state & EXT4_ERROR_FS))
1833                ext4_msg(sb, KERN_WARNING,
1834                         "warning: mounting fs with errors, "
1835                         "running e2fsck is recommended");
1836        else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
1837                 le16_to_cpu(es->s_mnt_count) >=
1838                 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1839                ext4_msg(sb, KERN_WARNING,
1840                         "warning: maximal mount count reached, "
1841                         "running e2fsck is recommended");
1842        else if (le32_to_cpu(es->s_checkinterval) &&
1843                (le32_to_cpu(es->s_lastcheck) +
1844                        le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1845                ext4_msg(sb, KERN_WARNING,
1846                         "warning: checktime reached, "
1847                         "running e2fsck is recommended");
1848        if (!sbi->s_journal)
1849                es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1850        if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1851                es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
1852        le16_add_cpu(&es->s_mnt_count, 1);
1853        es->s_mtime = cpu_to_le32(get_seconds());
1854        ext4_update_dynamic_rev(sb);
1855        if (sbi->s_journal)
1856                EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
1857
1858        ext4_commit_super(sb, 1);
1859done:
1860        if (test_opt(sb, DEBUG))
1861                printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1862                                "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
1863                        sb->s_blocksize,
1864                        sbi->s_groups_count,
1865                        EXT4_BLOCKS_PER_GROUP(sb),
1866                        EXT4_INODES_PER_GROUP(sb),
1867                        sbi->s_mount_opt, sbi->s_mount_opt2);
1868
1869        cleancache_init_fs(sb);
1870        return res;
1871}
1872
1873int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
1874{
1875        struct ext4_sb_info *sbi = EXT4_SB(sb);
1876        struct flex_groups *new_groups;
1877        int size;
1878
1879        if (!sbi->s_log_groups_per_flex)
1880                return 0;
1881
1882        size = ext4_flex_group(sbi, ngroup - 1) + 1;
1883        if (size <= sbi->s_flex_groups_allocated)
1884                return 0;
1885
1886        size = roundup_pow_of_two(size * sizeof(struct flex_groups));
1887        new_groups = ext4_kvzalloc(size, GFP_KERNEL);
1888        if (!new_groups) {
1889                ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups",
1890                         size / (int) sizeof(struct flex_groups));
1891                return -ENOMEM;
1892        }
1893
1894        if (sbi->s_flex_groups) {
1895                memcpy(new_groups, sbi->s_flex_groups,
1896                       (sbi->s_flex_groups_allocated *
1897                        sizeof(struct flex_groups)));
1898                ext4_kvfree(sbi->s_flex_groups);
1899        }
1900        sbi->s_flex_groups = new_groups;
1901        sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
1902        return 0;
1903}
1904
1905static int ext4_fill_flex_info(struct super_block *sb)
1906{
1907        struct ext4_sb_info *sbi = EXT4_SB(sb);
1908        struct ext4_group_desc *gdp = NULL;
1909        ext4_group_t flex_group;
1910        unsigned int groups_per_flex = 0;
1911        int i, err;
1912
1913        sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1914        if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
1915                sbi->s_log_groups_per_flex = 0;
1916                return 1;
1917        }
1918        groups_per_flex = 1U << sbi->s_log_groups_per_flex;
1919
1920        err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
1921        if (err)
1922                goto failed;
1923
1924        for (i = 0; i < sbi->s_groups_count; i++) {
1925                gdp = ext4_get_group_desc(sb, i, NULL);
1926
1927                flex_group = ext4_flex_group(sbi, i);
1928                atomic_add(ext4_free_inodes_count(sb, gdp),
1929                           &sbi->s_flex_groups[flex_group].free_inodes);
1930                atomic64_add(ext4_free_group_clusters(sb, gdp),
1931                             &sbi->s_flex_groups[flex_group].free_clusters);
1932                atomic_add(ext4_used_dirs_count(sb, gdp),
1933                           &sbi->s_flex_groups[flex_group].used_dirs);
1934        }
1935
1936        return 1;
1937failed:
1938        return 0;
1939}
1940
1941static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
1942                                   struct ext4_group_desc *gdp)
1943{
1944        int offset;
1945        __u16 crc = 0;
1946        __le32 le_group = cpu_to_le32(block_group);
1947
1948        if ((sbi->s_es->s_feature_ro_compat &
1949             cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) {
1950                /* Use new metadata_csum algorithm */
1951                __u16 old_csum;
1952                __u32 csum32;
1953
1954                old_csum = gdp->bg_checksum;
1955                gdp->bg_checksum = 0;
1956                csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
1957                                     sizeof(le_group));
1958                csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp,
1959                                     sbi->s_desc_size);
1960                gdp->bg_checksum = old_csum;
1961
1962                crc = csum32 & 0xFFFF;
1963                goto out;
1964        }
1965
1966        /* old crc16 code */
1967        offset = offsetof(struct ext4_group_desc, bg_checksum);
1968
1969        crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
1970        crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
1971        crc = crc16(crc, (__u8 *)gdp, offset);
1972        offset += sizeof(gdp->bg_checksum); /* skip checksum */
1973        /* for checksum of struct ext4_group_desc do the rest...*/
1974        if ((sbi->s_es->s_feature_incompat &
1975             cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
1976            offset < le16_to_cpu(sbi->s_es->s_desc_size))
1977                crc = crc16(crc, (__u8 *)gdp + offset,
1978                            le16_to_cpu(sbi->s_es->s_desc_size) -
1979                                offset);
1980
1981out:
1982        return cpu_to_le16(crc);
1983}
1984
1985int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
1986                                struct ext4_group_desc *gdp)
1987{
1988        if (ext4_has_group_desc_csum(sb) &&
1989            (gdp->bg_checksum != ext4_group_desc_csum(EXT4_SB(sb),
1990                                                      block_group, gdp)))
1991                return 0;
1992
1993        return 1;
1994}
1995
1996void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
1997                              struct ext4_group_desc *gdp)
1998{
1999        if (!ext4_has_group_desc_csum(sb))
2000                return;

2001        gdp->bg_checksum = ext4_group_desc_csum(EXT4_SB(sb), block_group, gdp);
2002}
2003
2004/* Called at mount-time, super-block is locked */
2005static int ext4_check_descriptors(struct super_block *sb,
2006                                  ext4_group_t *first_not_zeroed)
2007{
2008        struct ext4_sb_info *sbi = EXT4_SB(sb);
2009        ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
2010        ext4_fsblk_t last_block;
2011        ext4_fsblk_t block_bitmap;
2012        ext4_fsblk_t inode_bitmap;
2013        ext4_fsblk_t inode_table;
2014        int flexbg_flag = 0;
2015        ext4_group_t i, grp = sbi->s_groups_count;
2016
2017        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
2018                flexbg_flag = 1;
2019
2020        ext4_debug("Checking group descriptors");
2021
2022        for (i = 0; i < sbi->s_groups_count; i++) {
2023                struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
2024
2025                if (i == sbi->s_groups_count - 1 || flexbg_flag)
2026                        last_block = ext4_blocks_count(sbi->s_es) - 1;
2027                else
2028                        last_block = first_block +
2029                                (EXT4_BLOCKS_PER_GROUP(sb) - 1);
2030
2031                if ((grp == sbi->s_groups_count) &&
2032                   !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2033                        grp = i;
2034
2035                block_bitmap = ext4_block_bitmap(sb, gdp);
2036                if (block_bitmap < first_block || block_bitmap > last_block) {
2037                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2038                               "Block bitmap for group %u not in group "
2039                               "(block %llu)!", i, block_bitmap);
2040                        return 0;
2041                }
2042                inode_bitmap = ext4_inode_bitmap(sb, gdp);
2043                if (inode_bitmap < first_block || inode_bitmap > last_block) {
2044                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2045                               "Inode bitmap for group %u not in group "
2046                               "(block %llu)!", i, inode_bitmap);
2047                        return 0;
2048                }
2049                inode_table = ext4_inode_table(sb, gdp);
2050                if (inode_table < first_block ||
2051                    inode_table + sbi->s_itb_per_group - 1 > last_block) {
2052                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2053                               "Inode table for group %u not in group "
2054                               "(block %llu)!", i, inode_table);
2055                        return 0;
2056                }
2057                ext4_lock_group(sb, i);
2058                if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
2059                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2060                                 "Checksum for group %u failed (%u!=%u)",
2061                                 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
2062                                     gdp)), le16_to_cpu(gdp->bg_checksum));
2063                        if (!(sb->s_flags & MS_RDONLY)) {
2064                                ext4_unlock_group(sb, i);
2065                                return 0;
2066                        }
2067                }
2068                ext4_unlock_group(sb, i);
2069                if (!flexbg_flag)
2070                        first_block += EXT4_BLOCKS_PER_GROUP(sb);
2071        }
2072        if (NULL != first_not_zeroed)
2073                *first_not_zeroed = grp;
2074
2075        ext4_free_blocks_count_set(sbi->s_es,
2076                                   EXT4_C2B(sbi, ext4_count_free_clusters(sb)));
2077        sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
2078        return 1;
2079}
2080
2081/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
2082 * the superblock) which were deleted from all directories, but held open by
2083 * a process at the time of a crash.  We walk the list and try to delete these
2084 * inodes at recovery time (only with a read-write filesystem).
2085 *
2086 * In order to keep the orphan inode chain consistent during traversal (in
2087 * case of crash during recovery), we link each inode into the superblock
2088 * orphan list_head and handle it the same way as an inode deletion during
2089 * normal operation (which journals the operations for us).
2090 *
2091 * We only do an iget() and an iput() on each inode, which is very safe if we
2092 * accidentally point at an in-use or already deleted inode.  The worst that
2093 * can happen in this case is that we get a "bit already cleared" message from
2094 * ext4_free_inode().  The only reason we would point at a wrong inode is if
2095 * e2fsck was run on this filesystem, and it must have already done the orphan
2096 * inode cleanup for us, so we can safely abort without any further action.
2097 */
2098static void ext4_orphan_cleanup(struct super_block *sb,
2099                                struct ext4_super_block *es)
2100{
2101        unsigned int s_flags = sb->s_flags;
2102        int nr_orphans = 0, nr_truncates = 0;
2103#ifdef CONFIG_QUOTA
2104        int i;
2105#endif
2106        if (!es->s_last_orphan) {
2107                jbd_debug(4, "no orphan inodes to clean up\n");
2108                return;
2109        }
2110
2111        if (bdev_read_only(sb->s_bdev)) {
2112                ext4_msg(sb, KERN_ERR, "write access "
2113                        "unavailable, skipping orphan cleanup");
2114                return;
2115        }
2116
2117        /* Check if feature set would not allow a r/w mount */
2118        if (!ext4_feature_set_ok(sb, 0)) {
2119                ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
2120                         "unknown ROCOMPAT features");
2121                return;
2122        }
2123
2124        if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2125                /* don't clear list on RO mount w/ errors */
2126                if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
2127                        jbd_debug(1, "Errors on filesystem, "
2128                                  "clearing orphan list.\n");
2129                        es->s_last_orphan = 0;
2130                }
2131                jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
2132                return;
2133        }
2134
2135        if (s_flags & MS_RDONLY) {
2136                ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
2137                sb->s_flags &= ~MS_RDONLY;
2138        }
2139#ifdef CONFIG_QUOTA
2140        /* Needed for iput() to work correctly and not trash data */
2141        sb->s_flags |= MS_ACTIVE;
2142        /* Turn on quotas so that they are updated correctly */
2143        for (i = 0; i < MAXQUOTAS; i++) {
2144                if (EXT4_SB(sb)->s_qf_names[i]) {
2145                        int ret = ext4_quota_on_mount(sb, i);
2146                        if (ret < 0)
2147                                ext4_msg(sb, KERN_ERR,
2148                                        "Cannot turn on journaled "
2149                                        "quota: error %d", ret);
2150                }
2151        }
2152#endif
2153
2154        while (es->s_last_orphan) {
2155                struct inode *inode;
2156
2157                inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
2158                if (IS_ERR(inode)) {
2159                        es->s_last_orphan = 0;
2160                        break;
2161                }
2162
2163                list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
2164                dquot_initialize(inode);
2165                if (inode->i_nlink) {
2166                        ext4_msg(sb, KERN_DEBUG,
2167                                "%s: truncating inode %lu to %lld bytes",
2168                                __func__, inode->i_ino, inode->i_size);
2169                        jbd_debug(2, "truncating inode %lu to %lld bytes\n",
2170                                  inode->i_ino, inode->i_size);
2171                        mutex_lock(&inode->i_mutex);
2172                        ext4_truncate(inode);
2173                        mutex_unlock(&inode->i_mutex);
2174                        nr_truncates++;
2175                } else {
2176                        ext4_msg(sb, KERN_DEBUG,
2177                                "%s: deleting unreferenced inode %lu",
2178                                __func__, inode->i_ino);
2179                        jbd_debug(2, "deleting unreferenced inode %lu\n",
2180                                  inode->i_ino);
2181                        nr_orphans++;
2182                }
2183                iput(inode);  /* The delete magic happens here! */
2184        }
2185
2186#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
2187
2188        if (nr_orphans)
2189                ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
2190                       PLURAL(nr_orphans));
2191        if (nr_truncates)
2192                ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
2193                       PLURAL(nr_truncates));
2194#ifdef CONFIG_QUOTA
2195        /* Turn quotas off */
2196        for (i = 0; i < MAXQUOTAS; i++) {
2197                if (sb_dqopt(sb)->files[i])
2198                        dquot_quota_off(sb, i);
2199        }
2200#endif
2201        sb->s_flags = s_flags; /* Restore MS_RDONLY status */
2202}
2203
2204/*
2205 * Maximal extent format file size.
2206 * Resulting logical blkno at s_maxbytes must fit in our on-disk
2207 * extent format containers, within a sector_t, and within i_blocks
2208 * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
2209 * so that won't be a limiting factor.
2210 *
2211 * However there is other limiting factor. We do store extents in the form
2212 * of starting block and length, hence the resulting length of the extent
2213 * covering maximum file size must fit into on-disk format containers as
2214 * well. Given that length is always by 1 unit bigger than max unit (because
2215 * we count 0 as well) we have to lower the s_maxbytes by one fs block.
2216 *
2217 * Note, this does *not* consider any metadata overhead for vfs i_blocks.
2218 */
2219static loff_t ext4_max_size(int blkbits, int has_huge_files)
2220{
2221        loff_t res;
2222        loff_t upper_limit = MAX_LFS_FILESIZE;
2223
2224        /* small i_blocks in vfs inode? */
2225        if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
2226                /*
2227                 * CONFIG_LBDAF is not enabled implies the inode
2228                 * i_block represent total blocks in 512 bytes
2229                 * 32 == size of vfs inode i_blocks * 8
2230                 */
2231                upper_limit = (1LL << 32) - 1;
2232
2233                /* total blocks in file system block size */
2234                upper_limit >>= (blkbits - 9);
2235                upper_limit <<= blkbits;
2236        }
2237
2238        /*
2239         * 32-bit extent-start container, ee_block. We lower the maxbytes
2240         * by one fs block, so ee_len can cover the extent of maximum file
2241         * size
2242         */
2243        res = (1LL << 32) - 1;
2244        res <<= blkbits;
2245
2246        /* Sanity check against vm- & vfs- imposed limits */
2247        if (res > upper_limit)
2248                res = upper_limit;
2249
2250        return res;
2251}
2252
2253/*
2254 * Maximal bitmap file size.  There is a direct, and {,double-,triple-}indirect
2255 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
2256 * We need to be 1 filesystem block less than the 2^48 sector limit.
2257 */
2258static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
2259{
2260        loff_t res = EXT4_NDIR_BLOCKS;
2261        int meta_blocks;
2262        loff_t upper_limit;
2263        /* This is calculated to be the largest file size for a dense, block
2264         * mapped file such that the file's total number of 512-byte sectors,
2265         * including data and all indirect blocks, does not exceed (2^48 - 1).
2266         *
2267         * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
2268         * number of 512-byte sectors of the file.
2269         */
2270
2271        if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
2272                /*
2273                 * !has_huge_files or CONFIG_LBDAF not enabled implies that
2274                 * the inode i_block field represents total file blocks in
2275                 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
2276                 */
2277                upper_limit = (1LL << 32) - 1;
2278
2279                /* total blocks in file system block size */
2280                upper_limit >>= (bits - 9);
2281
2282        } else {
2283                /*
2284                 * We use 48 bit ext4_inode i_blocks
2285                 * With EXT4_HUGE_FILE_FL set the i_blocks
2286                 * represent total number of blocks in
2287                 * file system block size
2288                 */
2289                upper_limit = (1LL << 48) - 1;
2290
2291        }
2292
2293        /* indirect blocks */
2294        meta_blocks = 1;
2295        /* double indirect blocks */
2296        meta_blocks += 1 + (1LL << (bits-2));
2297        /* tripple indirect blocks */
2298        meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
2299
2300        upper_limit -= meta_blocks;
2301        upper_limit <<= bits;
2302
2303        res += 1LL << (bits-2);
2304        res += 1LL << (2*(bits-2));
2305        res += 1LL << (3*(bits-2));
2306        res <<= bits;
2307        if (res > upper_limit)
2308                res = upper_limit;
2309
2310        if (res > MAX_LFS_FILESIZE)
2311                res = MAX_LFS_FILESIZE;
2312
2313        return res;
2314}
2315
2316static ext4_fsblk_t descriptor_loc(struct super_block *sb,
2317                                   ext4_fsblk_t logical_sb_block, int nr)
2318{
2319        struct ext4_sb_info *sbi = EXT4_SB(sb);
2320        ext4_group_t bg, first_meta_bg;
2321        int has_super = 0;
2322
2323        first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
2324
2325        if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
2326            nr < first_meta_bg)
2327                return logical_sb_block + nr + 1;
2328        bg = sbi->s_desc_per_block * nr;
2329        if (ext4_bg_has_super(sb, bg))
2330                has_super = 1;
2331
2332        return (has_super + ext4_group_first_block_no(sb, bg));
2333}
2334
2335/**
2336 * ext4_get_stripe_size: Get the stripe size.
2337 * @sbi: In memory super block info
2338 *
2339 * If we have specified it via mount option, then
2340 * use the mount option value. If the value specified at mount time is
2341 * greater than the blocks per group use the super block value.
2342 * If the super block value is greater than blocks per group return 0.
2343 * Allocator needs it be less than blocks per group.
2344 *
2345 */
2346static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
2347{
2348        unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
2349        unsigned long stripe_width =
2350                        le32_to_cpu(sbi->s_es->s_raid_stripe_width);
2351        int ret;
2352
2353        if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
2354                ret = sbi->s_stripe;
2355        else if (stripe_width <= sbi->s_blocks_per_group)
2356                ret = stripe_width;
2357        else if (stride <= sbi->s_blocks_per_group)
2358                ret = stride;
2359        else
2360                ret = 0;
2361
2362        /*
2363         * If the stripe width is 1, this makes no sense and
2364         * we set it to 0 to turn off stripe handling code.
2365         */
2366        if (ret <= 1)
2367                ret = 0;
2368
2369        return ret;
2370}
2371
2372/* sysfs supprt */
2373
2374struct ext4_attr {
2375        struct attribute attr;
2376        ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *);
2377        ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *,
2378                         const char *, size_t);
2379        int offset;
2380};
2381
2382static int parse_strtoul(const char *buf,
2383                unsigned long max, unsigned long *value)
2384{
2385        char *endp;
2386
2387        *value = simple_strtoul(skip_spaces(buf), &endp, 0);
2388        endp = skip_spaces(endp);
2389        if (*endp || *value > max)
2390                return -EINVAL;
2391
2392        return 0;
2393}
2394
2395static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
2396                                              struct ext4_sb_info *sbi,
2397                                              char *buf)
2398{
2399        return snprintf(buf, PAGE_SIZE, "%llu\n",
2400                (s64) EXT4_C2B(sbi,
2401                        percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
2402}
2403
2404static ssize_t session_write_kbytes_show(struct ext4_attr *a,
2405                                         struct ext4_sb_info *sbi, char *buf)
2406{
2407        struct super_block *sb = sbi->s_buddy_cache->i_sb;
2408
2409        if (!sb->s_bdev->bd_part)
2410                return snprintf(buf, PAGE_SIZE, "0\n");
2411        return snprintf(buf, PAGE_SIZE, "%lu\n",
2412                        (part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2413                         sbi->s_sectors_written_start) >> 1);
2414}
2415
2416static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2417                                          struct ext4_sb_info *sbi, char *buf)
2418{
2419        struct super_block *sb = sbi->s_buddy_cache->i_sb;
2420
2421        if (!sb->s_bdev->bd_part)
2422                return snprintf(buf, PAGE_SIZE, "0\n");
2423        return snprintf(buf, PAGE_SIZE, "%llu\n",
2424                        (unsigned long long)(sbi->s_kbytes_written +
2425                        ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2426                          EXT4_SB(sb)->s_sectors_written_start) >> 1)));
2427}
2428
2429static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2430                                          struct ext4_sb_info *sbi,
2431                                          const char *buf, size_t count)
2432{
2433        unsigned long t;
2434
2435        if (parse_strtoul(buf, 0x40000000, &t))
2436                return -EINVAL;
2437
2438        if (t && !is_power_of_2(t))
2439                return -EINVAL;
2440
2441        sbi->s_inode_readahead_blks = t;
2442        return count;
2443}
2444
2445static ssize_t sbi_ui_show(struct ext4_attr *a,
2446                           struct ext4_sb_info *sbi, char *buf)
2447{
2448        unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
2449
2450        return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
2451}
2452
2453static ssize_t sbi_ui_store(struct ext4_attr *a,
2454                            struct ext4_sb_info *sbi,
2455                            const char *buf, size_t count)
2456{
2457        unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
2458        unsigned long t;
2459
2460        if (parse_strtoul(buf, 0xffffffff, &t))
2461                return -EINVAL;
2462        *ui = t;
2463        return count;
2464}
2465
2466static ssize_t trigger_test_error(struct ext4_attr *a,
2467                                  struct ext4_sb_info *sbi,
2468                                  const char *buf, size_t count)
2469{
2470        int len = count;
2471
2472        if (!capable(CAP_SYS_ADMIN))
2473                return -EPERM;
2474
2475        if (len && buf[len-1] == '\n')
2476                len--;
2477
2478        if (len)
2479                ext4_error(sbi->s_sb, "%.*s", len, buf);
2480        return count;
2481}
2482
2483#define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \
2484static struct ext4_attr ext4_attr_##_name = {                   \
2485        .attr = {.name = __stringify(_name), .mode = _mode },   \
2486        .show   = _show,                                        \
2487        .store  = _store,                                       \
2488        .offset = offsetof(struct ext4_sb_info, _elname),       \
2489}
2490#define EXT4_ATTR(name, mode, show, store) \
2491static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
2492
2493#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL)
2494#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
2495#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
2496#define EXT4_RW_ATTR_SBI_UI(name, elname)       \
2497        EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
2498#define ATTR_LIST(name) &ext4_attr_##name.attr
2499
2500EXT4_RO_ATTR(delayed_allocation_blocks);
2501EXT4_RO_ATTR(session_write_kbytes);
2502EXT4_RO_ATTR(lifetime_write_kbytes);
2503EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
2504                 inode_readahead_blks_store, s_inode_readahead_blks);
2505EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
2506EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
2507EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
2508EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
2509EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
2510EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
2511EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
2512EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump);
2513EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
2514EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error);
2515
2516static struct attribute *ext4_attrs[] = {
2517        ATTR_LIST(delayed_allocation_blocks),
2518        ATTR_LIST(session_write_kbytes),
2519        ATTR_LIST(lifetime_write_kbytes),
2520        ATTR_LIST(inode_readahead_blks),
2521        ATTR_LIST(inode_goal),
2522        ATTR_LIST(mb_stats),
2523        ATTR_LIST(mb_max_to_scan),
2524        ATTR_LIST(mb_min_to_scan),
2525        ATTR_LIST(mb_order2_req),
2526        ATTR_LIST(mb_stream_req),
2527        ATTR_LIST(mb_group_prealloc),
2528        ATTR_LIST(max_writeback_mb_bump),
2529        ATTR_LIST(extent_max_zeroout_kb),
2530        ATTR_LIST(trigger_fs_error),
2531        NULL,
2532};
2533
2534/* Features this copy of ext4 supports */
2535EXT4_INFO_ATTR(lazy_itable_init);
2536EXT4_INFO_ATTR(batched_discard);
2537EXT4_INFO_ATTR(meta_bg_resize);
2538
2539static struct attribute *ext4_feat_attrs[] = {
2540        ATTR_LIST(lazy_itable_init),
2541        ATTR_LIST(batched_discard),
2542        ATTR_LIST(meta_bg_resize),
2543        NULL,
2544};
2545
2546static ssize_t ext4_attr_show(struct kobject *kobj,
2547                              struct attribute *attr, char *buf)
2548{
2549        struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2550                                                s_kobj);
2551        struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2552
2553        return a->show ? a->show(a, sbi, buf) : 0;
2554}
2555
2556static ssize_t ext4_attr_store(struct kobject *kobj,
2557                               struct attribute *attr,
2558                               const char *buf, size_t len)
2559{
2560        struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2561                                                s_kobj);
2562        struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2563
2564        return a->store ? a->store(a, sbi, buf, len) : 0;
2565}
2566
2567static void ext4_sb_release(struct kobject *kobj)
2568{
2569        struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2570                                                s_kobj);
2571        complete(&sbi->s_kobj_unregister);
2572}
2573
2574static const struct sysfs_ops ext4_attr_ops = {
2575        .show   = ext4_attr_show,
2576        .store  = ext4_attr_store,
2577};
2578
2579static struct kobj_type ext4_ktype = {
2580        .default_attrs  = ext4_attrs,
2581        .sysfs_ops      = &ext4_attr_ops,
2582        .release        = ext4_sb_release,
2583};
2584
2585static void ext4_feat_release(struct kobject *kobj)
2586{
2587        complete(&ext4_feat->f_kobj_unregister);
2588}
2589
2590static struct kobj_type ext4_feat_ktype = {
2591        .default_attrs  = ext4_feat_attrs,
2592        .sysfs_ops      = &ext4_attr_ops,
2593        .release        = ext4_feat_release,
2594};
2595
2596/*
2597 * Check whether this filesystem can be mounted based on
2598 * the features present and the RDONLY/RDWR mount requested.
2599 * Returns 1 if this filesystem can be mounted as requested,
2600 * 0 if it cannot be.
2601 */
2602static int ext4_feature_set_ok(struct super_block *sb, int readonly)
2603{
2604        if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) {
2605                ext4_msg(sb, KERN_ERR,
2606                        "Couldn't mount because of "
2607                        "unsupported optional features (%x)",
2608                        (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
2609                        ~EXT4_FEATURE_INCOMPAT_SUPP));
2610                return 0;
2611        }
2612
2613        if (readonly)
2614                return 1;
2615
2616        /* Check that feature set is OK for a read-write mount */
2617        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) {
2618                ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
2619                         "unsupported optional features (%x)",
2620                         (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
2621                                ~EXT4_FEATURE_RO_COMPAT_SUPP));
2622                return 0;
2623        }
2624        /*
2625         * Large file size enabled file system can only be mounted
2626         * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF
2627         */
2628        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
2629                if (sizeof(blkcnt_t) < sizeof(u64)) {
2630                        ext4_msg(sb, KERN_ERR, "Filesystem with huge files "
2631                                 "cannot be mounted RDWR without "
2632                                 "CONFIG_LBDAF");
2633                        return 0;
2634                }
2635        }
2636        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) &&
2637            !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
2638                ext4_msg(sb, KERN_ERR,
2639                         "Can't support bigalloc feature without "
2640                         "extents feature\n");
2641                return 0;
2642        }
2643
2644#ifndef CONFIG_QUOTA
2645        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
2646            !readonly) {
2647                ext4_msg(sb, KERN_ERR,
2648                         "Filesystem with quota feature cannot be mounted RDWR "
2649                         "without CONFIG_QUOTA");
2650                return 0;
2651        }
2652#endif  /* CONFIG_QUOTA */
2653        return 1;
2654}
2655
2656/*
2657 * This function is called once a day if we have errors logged
2658 * on the file system
2659 */
2660static void print_daily_error_info(unsigned long arg)
2661{
2662        struct super_block *sb = (struct super_block *) arg;
2663        struct ext4_sb_info *sbi;
2664        struct ext4_super_block *es;
2665
2666        sbi = EXT4_SB(sb);
2667        es = sbi->s_es;
2668
2669        if (es->s_error_count)
2670                ext4_msg(sb, KERN_NOTICE, "error count: %u",
2671                         le32_to_cpu(es->s_error_count));
2672        if (es->s_first_error_time) {
2673                printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d",
2674                       sb->s_id, le32_to_cpu(es->s_first_error_time),
2675                       (int) sizeof(es->s_first_error_func),
2676                       es->s_first_error_func,
2677                       le32_to_cpu(es->s_first_error_line));
2678                if (es->s_first_error_ino)
2679                        printk(": inode %u",
2680                               le32_to_cpu(es->s_first_error_ino));
2681                if (es->s_first_error_block)
2682                        printk(": block %llu", (unsigned long long)
2683                               le64_to_cpu(es->s_first_error_block));
2684                printk("\n");
2685        }
2686        if (es->s_last_error_time) {
2687                printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d",
2688                       sb->s_id, le32_to_cpu(es->s_last_error_time),
2689                       (int) sizeof(es->s_last_error_func),
2690                       es->s_last_error_func,
2691                       le32_to_cpu(es->s_last_error_line));
2692                if (es->s_last_error_ino)
2693                        printk(": inode %u",
2694                               le32_to_cpu(es->s_last_error_ino));
2695                if (es->s_last_error_block)
2696                        printk(": block %llu", (unsigned long long)
2697                               le64_to_cpu(es->s_last_error_block));
2698                printk("\n");
2699        }
2700        mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
2701}
2702
2703/* Find next suitable group and run ext4_init_inode_table */
2704static int ext4_run_li_request(struct ext4_li_request *elr)
2705{
2706        struct ext4_group_desc *gdp = NULL;
2707        ext4_group_t group, ngroups;
2708        struct super_block *sb;
2709        unsigned long timeout = 0;
2710        int ret = 0;
2711
2712        sb = elr->lr_super;
2713        ngroups = EXT4_SB(sb)->s_groups_count;
2714
2715        sb_start_write(sb);
2716        for (group = elr->lr_next_group; group < ngroups; group++) {
2717                gdp = ext4_get_group_desc(sb, group, NULL);
2718                if (!gdp) {
2719                        ret = 1;
2720                        break;
2721                }
2722
2723                if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2724                        break;
2725        }
2726
2727        if (group >= ngroups)
2728                ret = 1;
2729
2730        if (!ret) {
2731                timeout = jiffies;
2732                ret = ext4_init_inode_table(sb, group,
2733                                            elr->lr_timeout ? 0 : 1);
2734                if (elr->lr_timeout == 0) {
2735                        timeout = (jiffies - timeout) *
2736                                  elr->lr_sbi->s_li_wait_mult;
2737                        elr->lr_timeout = timeout;
2738                }
2739                elr->lr_next_sched = jiffies + elr->lr_timeout;
2740                elr->lr_next_group = group + 1;
2741        }
2742        sb_end_write(sb);
2743
2744        return ret;
2745}
2746
2747/*
2748 * Remove lr_request from the list_request and free the
2749 * request structure. Should be called with li_list_mtx held
2750 */
2751static void ext4_remove_li_request(struct ext4_li_request *elr)
2752{
2753        struct ext4_sb_info *sbi;
2754
2755        if (!elr)
2756                return;
2757
2758        sbi = elr->lr_sbi;
2759
2760        list_del(&elr->lr_request);
2761        sbi->s_li_request = NULL;
2762        kfree(elr);
2763}
2764
2765static void ext4_unregister_li_request(struct super_block *sb)
2766{
2767        mutex_lock(&ext4_li_mtx);
2768        if (!ext4_li_info) {
2769                mutex_unlock(&ext4_li_mtx);
2770                return;
2771        }
2772
2773        mutex_lock(&ext4_li_info->li_list_mtx);
2774        ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
2775        mutex_unlock(&ext4_li_info->li_list_mtx);
2776        mutex_unlock(&ext4_li_mtx);
2777}
2778
2779static struct task_struct *ext4_lazyinit_task;
2780
2781/*
2782 * This is the function where ext4lazyinit thread lives. It walks
2783 * through the request list searching for next scheduled filesystem.
2784 * When such a fs is found, run the lazy initialization request
2785 * (ext4_rn_li_request) and keep track of the time spend in this
2786 * function. Based on that time we compute next schedule time of
2787 * the request. When walking through the list is complete, compute
2788 * next waking time and put itself into sleep.
2789 */
2790static int ext4_lazyinit_thread(void *arg)
2791{
2792        struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
2793        struct list_head *pos, *n;
2794        struct ext4_li_request *elr;
2795        unsigned long next_wakeup, cur;
2796
2797        BUG_ON(NULL == eli);
2798
2799cont_thread:
2800        while (true) {
2801                next_wakeup = MAX_JIFFY_OFFSET;
2802
2803                mutex_lock(&eli->li_list_mtx);
2804                if (list_empty(&eli->li_request_list)) {
2805                        mutex_unlock(&eli->li_list_mtx);
2806                        goto exit_thread;
2807                }
2808
2809                list_for_each_safe(pos, n, &eli->li_request_list) {
2810                        elr = list_entry(pos, struct ext4_li_request,
2811                                         lr_request);
2812
2813                        if (time_after_eq(jiffies, elr->lr_next_sched)) {
2814                                if (ext4_run_li_request(elr) != 0) {
2815                                        /* error, remove the lazy_init job */
2816                                        ext4_remove_li_request(elr);
2817                                        continue;
2818                                }
2819                        }
2820
2821                        if (time_before(elr->lr_next_sched, next_wakeup))
2822                                next_wakeup = elr->lr_next_sched;
2823                }
2824                mutex_unlock(&eli->li_list_mtx);
2825
2826                try_to_freeze();
2827
2828                cur = jiffies;
2829                if ((time_after_eq(cur, next_wakeup)) ||
2830                    (MAX_JIFFY_OFFSET == next_wakeup)) {
2831                        cond_resched();
2832                        continue;
2833                }
2834
2835                schedule_timeout_interruptible(next_wakeup - cur);
2836
2837                if (kthread_should_stop()) {
2838                        ext4_clear_request_list();
2839                        goto exit_thread;
2840                }
2841        }
2842
2843exit_thread:
2844        /*
2845         * It looks like the request list is empty, but we need
2846         * to check it under the li_list_mtx lock, to prevent any
2847         * additions into it, and of course we should lock ext4_li_mtx
2848         * to atomically free the list and ext4_li_info, because at
2849         * this point another ext4 filesystem could be registering
2850         * new one.
2851         */
2852        mutex_lock(&ext4_li_mtx);
2853        mutex_lock(&eli->li_list_mtx);
2854        if (!list_empty(&eli->li_request_list)) {
2855                mutex_unlock(&eli->li_list_mtx);
2856                mutex_unlock(&ext4_li_mtx);
2857                goto cont_thread;
2858        }
2859        mutex_unlock(&eli->li_list_mtx);
2860        kfree(ext4_li_info);
2861        ext4_li_info = NULL;
2862        mutex_unlock(&ext4_li_mtx);
2863
2864        return 0;
2865}
2866
2867static void ext4_clear_request_list(void)
2868{
2869        struct list_head *pos, *n;
2870        struct ext4_li_request *elr;
2871
2872        mutex_lock(&ext4_li_info->li_list_mtx);
2873        list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
2874                elr = list_entry(pos, struct ext4_li_request,
2875                                 lr_request);
2876                ext4_remove_li_request(elr);
2877        }
2878        mutex_unlock(&ext4_li_info->li_list_mtx);
2879}
2880
2881static int ext4_run_lazyinit_thread(void)
2882{
2883        ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
2884                                         ext4_li_info, "ext4lazyinit");
2885        if (IS_ERR(ext4_lazyinit_task)) {
2886                int err = PTR_ERR(ext4_lazyinit_task);
2887                ext4_clear_request_list();
2888                kfree(ext4_li_info);
2889                ext4_li_info = NULL;
2890                printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
2891                                 "initialization thread\n",
2892                                 err);
2893                return err;
2894        }
2895        ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
2896        return 0;
2897}
2898
2899/*
2900 * Check whether it make sense to run itable init. thread or not.
2901 * If there is at least one uninitialized inode table, return
2902 * corresponding group number, else the loop goes through all
2903 * groups and return total number of groups.
2904 */
2905static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
2906{
2907        ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
2908        struct ext4_group_desc *gdp = NULL;
2909
2910        for (group = 0; group < ngroups; group++) {
2911                gdp = ext4_get_group_desc(sb, group, NULL);
2912                if (!gdp)
2913                        continue;
2914
2915                if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2916                        break;
2917        }
2918
2919        return group;
2920}
2921
2922static int ext4_li_info_new(void)
2923{
2924        struct ext4_lazy_init *eli = NULL;
2925
2926        eli = kzalloc(sizeof(*eli), GFP_KERNEL);
2927        if (!eli)
2928                return -ENOMEM;
2929
2930        INIT_LIST_HEAD(&eli->li_request_list);
2931        mutex_init(&eli->li_list_mtx);
2932
2933        eli->li_state |= EXT4_LAZYINIT_QUIT;
2934
2935        ext4_li_info = eli;
2936
2937        return 0;
2938}
2939
2940static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
2941                                            ext4_group_t start)
2942{
2943        struct ext4_sb_info *sbi = EXT4_SB(sb);
2944        struct ext4_li_request *elr;
2945        unsigned long rnd;
2946
2947        elr = kzalloc(sizeof(*elr), GFP_KERNEL);
2948        if (!elr)
2949                return NULL;
2950
2951        elr->lr_super = sb;
2952        elr->lr_sbi = sbi;
2953        elr->lr_next_group = start;
2954
2955        /*
2956         * Randomize first schedule time of the request to
2957         * spread the inode table initialization requests
2958         * better.
2959         */
2960        get_random_bytes(&rnd, sizeof(rnd));
2961        elr->lr_next_sched = jiffies + (unsigned long)rnd %
2962                             (EXT4_DEF_LI_MAX_START_DELAY * HZ);
2963
2964        return elr;
2965}
2966
2967int ext4_register_li_request(struct super_block *sb,
2968                             ext4_group_t first_not_zeroed)
2969{
2970        struct ext4_sb_info *sbi = EXT4_SB(sb);
2971        struct ext4_li_request *elr = NULL;
2972        ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
2973        int ret = 0;
2974
2975        mutex_lock(&ext4_li_mtx);
2976        if (sbi->s_li_request != NULL) {
2977                /*
2978                 * Reset timeout so it can be computed again, because
2979                 * s_li_wait_mult might have changed.
2980                 */
2981                sbi->s_li_request->lr_timeout = 0;
2982                goto out;
2983        }
2984
2985        if (first_not_zeroed == ngroups ||
2986            (sb->s_flags & MS_RDONLY) ||
2987            !test_opt(sb, INIT_INODE_TABLE))
2988                goto out;
2989
2990        elr = ext4_li_request_new(sb, first_not_zeroed);
2991        if (!elr) {
2992                ret = -ENOMEM;
2993                goto out;
2994        }
2995
2996        if (NULL == ext4_li_info) {
2997                ret = ext4_li_info_new();
2998                if (ret)
2999                        goto out;
3000        }

3001
3002        mutex_lock(&ext4_li_info->li_list_mtx);
3003        list_add(&elr->lr_request, &ext4_li_info->li_request_list);
3004        mutex_unlock(&ext4_li_info->li_list_mtx);
3005
3006        sbi->s_li_request = elr;
3007        /*
3008         * set elr to NULL here since it has been inserted to
3009         * the request_list and the removal and free of it is
3010         * handled by ext4_clear_request_list from now on.
3011         */
3012        elr = NULL;
3013
3014        if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
3015                ret = ext4_run_lazyinit_thread();
3016                if (ret)
3017                        goto out;
3018        }
3019out:
3020        mutex_unlock(&ext4_li_mtx);
3021        if (ret)
3022                kfree(elr);
3023        return ret;
3024}
3025
3026/*
3027 * We do not need to lock anything since this is called on
3028 * module unload.
3029 */
3030static void ext4_destroy_lazyinit_thread(void)
3031{
3032        /*
3033         * If thread exited earlier
3034         * there's nothing to be done.
3035         */
3036        if (!ext4_li_info || !ext4_lazyinit_task)
3037                return;
3038
3039        kthread_stop(ext4_lazyinit_task);
3040}
3041
3042static int set_journal_csum_feature_set(struct super_block *sb)
3043{
3044        int ret = 1;
3045        int compat, incompat;
3046        struct ext4_sb_info *sbi = EXT4_SB(sb);
3047
3048        if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3049                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
3050                /* journal checksum v2 */
3051                compat = 0;
3052                incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2;
3053        } else {
3054                /* journal checksum v1 */
3055                compat = JBD2_FEATURE_COMPAT_CHECKSUM;
3056                incompat = 0;
3057        }
3058
3059        if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
3060                ret = jbd2_journal_set_features(sbi->s_journal,
3061                                compat, 0,
3062                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
3063                                incompat);
3064        } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
3065                ret = jbd2_journal_set_features(sbi->s_journal,
3066                                compat, 0,
3067                                incompat);
3068                jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3069                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3070        } else {
3071                jbd2_journal_clear_features(sbi->s_journal,
3072                                JBD2_FEATURE_COMPAT_CHECKSUM, 0,
3073                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
3074                                JBD2_FEATURE_INCOMPAT_CSUM_V2);
3075        }
3076
3077        return ret;
3078}
3079
3080/*
3081 * Note: calculating the overhead so we can be compatible with
3082 * historical BSD practice is quite difficult in the face of
3083 * clusters/bigalloc.  This is because multiple metadata blocks from
3084 * different block group can end up in the same allocation cluster.
3085 * Calculating the exact overhead in the face of clustered allocation
3086 * requires either O(all block bitmaps) in memory or O(number of block
3087 * groups**2) in time.  We will still calculate the superblock for
3088 * older file systems --- and if we come across with a bigalloc file
3089 * system with zero in s_overhead_clusters the estimate will be close to
3090 * correct especially for very large cluster sizes --- but for newer
3091 * file systems, it's better to calculate this figure once at mkfs
3092 * time, and store it in the superblock.  If the superblock value is
3093 * present (even for non-bigalloc file systems), we will use it.
3094 */
3095static int count_overhead(struct super_block *sb, ext4_group_t grp,
3096                          char *buf)
3097{
3098        struct ext4_sb_info     *sbi = EXT4_SB(sb);
3099        struct ext4_group_desc  *gdp;
3100        ext4_fsblk_t            first_block, last_block, b;
3101        ext4_group_t            i, ngroups = ext4_get_groups_count(sb);
3102        int                     s, j, count = 0;
3103
3104        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC))
3105                return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
3106                        sbi->s_itb_per_group + 2);
3107
3108        first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
3109                (grp * EXT4_BLOCKS_PER_GROUP(sb));
3110        last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
3111        for (i = 0; i < ngroups; i++) {
3112                gdp = ext4_get_group_desc(sb, i, NULL);
3113                b = ext4_block_bitmap(sb, gdp);
3114                if (b >= first_block && b <= last_block) {
3115                        ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3116                        count++;
3117                }
3118                b = ext4_inode_bitmap(sb, gdp);
3119                if (b >= first_block && b <= last_block) {
3120                        ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3121                        count++;
3122                }
3123                b = ext4_inode_table(sb, gdp);
3124                if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
3125                        for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
3126                                int c = EXT4_B2C(sbi, b - first_block);
3127                                ext4_set_bit(c, buf);
3128                                count++;
3129                        }
3130                if (i != grp)
3131                        continue;
3132                s = 0;
3133                if (ext4_bg_has_super(sb, grp)) {
3134                        ext4_set_bit(s++, buf);
3135                        count++;
3136                }
3137                for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) {
3138                        ext4_set_bit(EXT4_B2C(sbi, s++), buf);
3139                        count++;
3140                }
3141        }
3142        if (!count)
3143                return 0;
3144        return EXT4_CLUSTERS_PER_GROUP(sb) -
3145                ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
3146}
3147
3148/*
3149 * Compute the overhead and stash it in sbi->s_overhead
3150 */
3151int ext4_calculate_overhead(struct super_block *sb)
3152{
3153        struct ext4_sb_info *sbi = EXT4_SB(sb);
3154        struct ext4_super_block *es = sbi->s_es;
3155        ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3156        ext4_fsblk_t overhead = 0;
3157        char *buf = (char *) get_zeroed_page(GFP_KERNEL);
3158
3159        if (!buf)
3160                return -ENOMEM;
3161
3162        /*
3163         * Compute the overhead (FS structures).  This is constant
3164         * for a given filesystem unless the number of block groups
3165         * changes so we cache the previous value until it does.
3166         */
3167
3168        /*
3169         * All of the blocks before first_data_block are overhead
3170         */
3171        overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
3172
3173        /*
3174         * Add the overhead found in each block group
3175         */
3176        for (i = 0; i < ngroups; i++) {
3177                int blks;
3178
3179                blks = count_overhead(sb, i, buf);
3180                overhead += blks;
3181                if (blks)
3182                        memset(buf, 0, PAGE_SIZE);
3183                cond_resched();
3184        }
3185        /* Add the journal blocks as well */
3186        if (sbi->s_journal)
3187                overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
3188
3189        sbi->s_overhead = overhead;
3190        smp_wmb();
3191        free_page((unsigned long) buf);
3192        return 0;
3193}
3194
3195static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3196{
3197        char *orig_data = kstrdup(data, GFP_KERNEL);
3198        struct buffer_head *bh;
3199        struct ext4_super_block *es = NULL;
3200        struct ext4_sb_info *sbi;
3201        ext4_fsblk_t block;
3202        ext4_fsblk_t sb_block = get_sb_block(&data);
3203        ext4_fsblk_t logical_sb_block;
3204        unsigned long offset = 0;
3205        unsigned long journal_devnum = 0;
3206        unsigned long def_mount_opts;
3207        struct inode *root;
3208        char *cp;
3209        const char *descr;
3210        int ret = -ENOMEM;
3211        int blocksize, clustersize;
3212        unsigned int db_count;
3213        unsigned int i;
3214        int needs_recovery, has_huge_files, has_bigalloc;
3215        __u64 blocks_count;
3216        int err = 0;
3217        unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
3218        ext4_group_t first_not_zeroed;
3219
3220        sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
3221        if (!sbi)
3222                goto out_free_orig;
3223
3224        sbi->s_blockgroup_lock =
3225                kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
3226        if (!sbi->s_blockgroup_lock) {
3227                kfree(sbi);
3228                goto out_free_orig;
3229        }
3230        sb->s_fs_info = sbi;
3231        sbi->s_sb = sb;
3232        sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
3233        sbi->s_sb_block = sb_block;
3234        if (sb->s_bdev->bd_part)
3235                sbi->s_sectors_written_start =
3236                        part_stat_read(sb->s_bdev->bd_part, sectors[1]);
3237
3238        /* Cleanup superblock name */
3239        for (cp = sb->s_id; (cp = strchr(cp, '/'));)
3240                *cp = '!';
3241
3242        /* -EINVAL is default */
3243        ret = -EINVAL;
3244        blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
3245        if (!blocksize) {
3246                ext4_msg(sb, KERN_ERR, "unable to set blocksize");
3247                goto out_fail;
3248        }
3249
3250        /*
3251         * The ext4 superblock will not be buffer aligned for other than 1kB
3252         * block sizes.  We need to calculate the offset from buffer start.
3253         */
3254        if (blocksize != EXT4_MIN_BLOCK_SIZE) {
3255                logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
3256                offset = do_div(logical_sb_block, blocksize);
3257        } else {
3258                logical_sb_block = sb_block;
3259        }
3260
3261        if (!(bh = sb_bread(sb, logical_sb_block))) {
3262                ext4_msg(sb, KERN_ERR, "unable to read superblock");
3263                goto out_fail;
3264        }
3265        /*
3266         * Note: s_es must be initialized as soon as possible because
3267         *       some ext4 macro-instructions depend on its value
3268         */
3269        es = (struct ext4_super_block *) (bh->b_data + offset);
3270        sbi->s_es = es;
3271        sb->s_magic = le16_to_cpu(es->s_magic);
3272        if (sb->s_magic != EXT4_SUPER_MAGIC)
3273                goto cantfind_ext4;
3274        sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
3275
3276        /* Warn if metadata_csum and gdt_csum are both set. */
3277        if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3278                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
3279            EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
3280                ext4_warning(sb, KERN_INFO "metadata_csum and uninit_bg are "
3281                             "redundant flags; please run fsck.");
3282
3283        /* Check for a known checksum algorithm */
3284        if (!ext4_verify_csum_type(sb, es)) {
3285                ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3286                         "unknown checksum algorithm.");
3287                silent = 1;
3288                goto cantfind_ext4;
3289        }
3290
3291        /* Load the checksum driver */
3292        if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3293                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
3294                sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
3295                if (IS_ERR(sbi->s_chksum_driver)) {
3296                        ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
3297                        ret = PTR_ERR(sbi->s_chksum_driver);
3298                        sbi->s_chksum_driver = NULL;
3299                        goto failed_mount;
3300                }
3301        }
3302
3303        /* Check superblock checksum */
3304        if (!ext4_superblock_csum_verify(sb, es)) {
3305                ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3306                         "invalid superblock checksum.  Run e2fsck?");
3307                silent = 1;
3308                goto cantfind_ext4;
3309        }
3310
3311        /* Precompute checksum seed for all metadata */
3312        if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3313                        EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
3314                sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
3315                                               sizeof(es->s_uuid));
3316
3317        /* Set defaults before we parse the mount options */
3318        def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
3319        set_opt(sb, INIT_INODE_TABLE);
3320        if (def_mount_opts & EXT4_DEFM_DEBUG)
3321                set_opt(sb, DEBUG);
3322        if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
3323                set_opt(sb, GRPID);
3324        if (def_mount_opts & EXT4_DEFM_UID16)
3325                set_opt(sb, NO_UID32);
3326        /* xattr user namespace & acls are now defaulted on */
3327        set_opt(sb, XATTR_USER);
3328#ifdef CONFIG_EXT4_FS_POSIX_ACL
3329        set_opt(sb, POSIX_ACL);
3330#endif
3331        if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
3332                set_opt(sb, JOURNAL_DATA);
3333        else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
3334                set_opt(sb, ORDERED_DATA);
3335        else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
3336                set_opt(sb, WRITEBACK_DATA);
3337
3338        if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
3339                set_opt(sb, ERRORS_PANIC);
3340        else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
3341                set_opt(sb, ERRORS_CONT);
3342        else
3343                set_opt(sb, ERRORS_RO);
3344        if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)
3345                set_opt(sb, BLOCK_VALIDITY);
3346        if (def_mount_opts & EXT4_DEFM_DISCARD)
3347                set_opt(sb, DISCARD);
3348
3349        sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
3350        sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
3351        sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
3352        sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
3353        sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
3354
3355        if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
3356                set_opt(sb, BARRIER);
3357
3358        /*
3359         * enable delayed allocation by default
3360         * Use -o nodelalloc to turn it off
3361         */
3362        if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
3363            ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
3364                set_opt(sb, DELALLOC);
3365
3366        /*
3367         * set default s_li_wait_mult for lazyinit, for the case there is
3368         * no mount option specified.
3369         */
3370        sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
3371
3372        if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
3373                           &journal_devnum, &journal_ioprio, 0)) {
3374                ext4_msg(sb, KERN_WARNING,
3375                         "failed to parse options in superblock: %s",
3376                         sbi->s_es->s_mount_opts);
3377        }
3378        sbi->s_def_mount_opt = sbi->s_mount_opt;
3379        if (!parse_options((char *) data, sb, &journal_devnum,
3380                           &journal_ioprio, 0))
3381                goto failed_mount;
3382
3383        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
3384                printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
3385                            "with data=journal disables delayed "
3386                            "allocation and O_DIRECT support!\n");
3387                if (test_opt2(sb, EXPLICIT_DELALLOC)) {
3388                        ext4_msg(sb, KERN_ERR, "can't mount with "
3389                                 "both data=journal and delalloc");
3390                        goto failed_mount;
3391                }
3392                if (test_opt(sb, DIOREAD_NOLOCK)) {
3393                        ext4_msg(sb, KERN_ERR, "can't mount with "
3394                                 "both data=journal and delalloc");
3395                        goto failed_mount;
3396                }
3397                if (test_opt(sb, DELALLOC))
3398                        clear_opt(sb, DELALLOC);
3399        }
3400
3401        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3402                (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
3403
3404        if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
3405            (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
3406             EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
3407             EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
3408                ext4_msg(sb, KERN_WARNING,
3409                       "feature flags set on rev 0 fs, "
3410                       "running e2fsck is recommended");
3411
3412        if (IS_EXT2_SB(sb)) {
3413                if (ext2_feature_set_ok(sb))
3414                        ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
3415                                 "using the ext4 subsystem");
3416                else {
3417                        ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
3418                                 "to feature incompatibilities");
3419                        goto failed_mount;
3420                }
3421        }
3422
3423        if (IS_EXT3_SB(sb)) {
3424                if (ext3_feature_set_ok(sb))
3425                        ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
3426                                 "using the ext4 subsystem");
3427                else {
3428                        ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
3429                                 "to feature incompatibilities");
3430                        goto failed_mount;
3431                }
3432        }
3433
3434        /*
3435         * Check feature flags regardless of the revision level, since we
3436         * previously didn't change the revision level when setting the flags,
3437         * so there is a chance incompat flags are set on a rev 0 filesystem.
3438         */
3439        if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
3440                goto failed_mount;
3441
3442        blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
3443        if (blocksize < EXT4_MIN_BLOCK_SIZE ||
3444            blocksize > EXT4_MAX_BLOCK_SIZE) {
3445                ext4_msg(sb, KERN_ERR,
3446                       "Unsupported filesystem blocksize %d", blocksize);
3447                goto failed_mount;
3448        }
3449
3450        if (sb->s_blocksize != blocksize) {
3451                /* Validate the filesystem blocksize */
3452                if (!sb_set_blocksize(sb, blocksize)) {
3453                        ext4_msg(sb, KERN_ERR, "bad block size %d",
3454                                        blocksize);
3455                        goto failed_mount;
3456                }
3457
3458                brelse(bh);
3459                logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
3460                offset = do_div(logical_sb_block, blocksize);
3461                bh = sb_bread(sb, logical_sb_block);
3462                if (!bh) {
3463                        ext4_msg(sb, KERN_ERR,
3464                               "Can't read superblock on 2nd try");
3465                        goto failed_mount;
3466                }
3467                es = (struct ext4_super_block *)(bh->b_data + offset);
3468                sbi->s_es = es;
3469                if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
3470                        ext4_msg(sb, KERN_ERR,
3471                               "Magic mismatch, very weird!");
3472                        goto failed_mount;
3473                }
3474        }
3475
3476        has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3477                                EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
3478        sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
3479                                                      has_huge_files);
3480        sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
3481
3482        if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
3483                sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
3484                sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
3485        } else {
3486                sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
3487                sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
3488                if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
3489                    (!is_power_of_2(sbi->s_inode_size)) ||
3490                    (sbi->s_inode_size > blocksize)) {
3491                        ext4_msg(sb, KERN_ERR,
3492                               "unsupported inode size: %d",
3493                               sbi->s_inode_size);
3494                        goto failed_mount;
3495                }
3496                if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
3497                        sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
3498        }
3499
3500        sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
3501        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
3502                if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
3503                    sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
3504                    !is_power_of_2(sbi->s_desc_size)) {
3505                        ext4_msg(sb, KERN_ERR,
3506                               "unsupported descriptor size %lu",
3507                               sbi->s_desc_size);
3508                        goto failed_mount;
3509                }
3510        } else
3511                sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
3512
3513        sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
3514        sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
3515        if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
3516                goto cantfind_ext4;
3517
3518        sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
3519        if (sbi->s_inodes_per_block == 0)
3520                goto cantfind_ext4;
3521        sbi->s_itb_per_group = sbi->s_inodes_per_group /
3522                                        sbi->s_inodes_per_block;
3523        sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
3524        sbi->s_sbh = bh;
3525        sbi->s_mount_state = le16_to_cpu(es->s_state);
3526        sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
3527        sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
3528
3529        for (i = 0; i < 4; i++)
3530                sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
3531        sbi->s_def_hash_version = es->s_def_hash_version;
3532        i = le32_to_cpu(es->s_flags);
3533        if (i & EXT2_FLAGS_UNSIGNED_HASH)
3534                sbi->s_hash_unsigned = 3;
3535        else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
3536#ifdef __CHAR_UNSIGNED__
3537                es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
3538                sbi->s_hash_unsigned = 3;
3539#else
3540                es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
3541#endif
3542        }
3543
3544        /* Handle clustersize */
3545        clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
3546        has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3547                                EXT4_FEATURE_RO_COMPAT_BIGALLOC);
3548        if (has_bigalloc) {
3549                if (clustersize < blocksize) {
3550                        ext4_msg(sb, KERN_ERR,
3551                                 "cluster size (%d) smaller than "
3552                                 "block size (%d)", clustersize, blocksize);
3553                        goto failed_mount;
3554                }
3555                sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
3556                        le32_to_cpu(es->s_log_block_size);
3557                sbi->s_clusters_per_group =
3558                        le32_to_cpu(es->s_clusters_per_group);
3559                if (sbi->s_clusters_per_group > blocksize * 8) {
3560                        ext4_msg(sb, KERN_ERR,
3561                                 "#clusters per group too big: %lu",
3562                                 sbi->s_clusters_per_group);
3563                        goto failed_mount;
3564                }
3565                if (sbi->s_blocks_per_group !=
3566                    (sbi->s_clusters_per_group * (clustersize / blocksize))) {
3567                        ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
3568                                 "clusters per group (%lu) inconsistent",
3569                                 sbi->s_blocks_per_group,
3570                                 sbi->s_clusters_per_group);
3571                        goto failed_mount;
3572                }
3573        } else {
3574                if (clustersize != blocksize) {
3575                        ext4_warning(sb, "fragment/cluster size (%d) != "
3576                                     "block size (%d)", clustersize,
3577                                     blocksize);
3578                        clustersize = blocksize;
3579                }
3580                if (sbi->s_blocks_per_group > blocksize * 8) {
3581                        ext4_msg(sb, KERN_ERR,
3582                                 "#blocks per group too big: %lu",
3583                                 sbi->s_blocks_per_group);
3584                        goto failed_mount;
3585                }
3586                sbi->s_clusters_per_group = sbi->s_blocks_per_group;
3587                sbi->s_cluster_bits = 0;
3588        }
3589        sbi->s_cluster_ratio = clustersize / blocksize;
3590
3591        if (sbi->s_inodes_per_group > blocksize * 8) {
3592                ext4_msg(sb, KERN_ERR,
3593                       "#inodes per group too big: %lu",
3594                       sbi->s_inodes_per_group);
3595                goto failed_mount;
3596        }
3597
3598        /*
3599         * Test whether we have more sectors than will fit in sector_t,
3600         * and whether the max offset is addressable by the page cache.
3601         */
3602        err = generic_check_addressable(sb->s_blocksize_bits,
3603                                        ext4_blocks_count(es));
3604        if (err) {
3605                ext4_msg(sb, KERN_ERR, "filesystem"
3606                         " too large to mount safely on this system");
3607                if (sizeof(sector_t) < 8)
3608                        ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
3609                goto failed_mount;
3610        }
3611
3612        if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
3613                goto cantfind_ext4;
3614
3615        /* check blocks count against device size */
3616        blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
3617        if (blocks_count && ext4_blocks_count(es) > blocks_count) {
3618                ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
3619                       "exceeds size of device (%llu blocks)",
3620                       ext4_blocks_count(es), blocks_count);
3621                goto failed_mount;
3622        }
3623
3624        /*
3625         * It makes no sense for the first data block to be beyond the end
3626         * of the filesystem.
3627         */
3628        if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
3629                ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
3630                         "block %u is beyond end of filesystem (%llu)",
3631                         le32_to_cpu(es->s_first_data_block),
3632                         ext4_blocks_count(es));
3633                goto failed_mount;
3634        }
3635        blocks_count = (ext4_blocks_count(es) -
3636                        le32_to_cpu(es->s_first_data_block) +
3637                        EXT4_BLOCKS_PER_GROUP(sb) - 1);
3638        do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
3639        if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
3640                ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
3641                       "(block count %llu, first data block %u, "
3642                       "blocks per group %lu)", sbi->s_groups_count,
3643                       ext4_blocks_count(es),
3644                       le32_to_cpu(es->s_first_data_block),
3645                       EXT4_BLOCKS_PER_GROUP(sb));
3646                goto failed_mount;
3647        }
3648        sbi->s_groups_count = blocks_count;
3649        sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
3650                        (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
3651        db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
3652                   EXT4_DESC_PER_BLOCK(sb);
3653        sbi->s_group_desc = ext4_kvmalloc(db_count *
3654                                          sizeof(struct buffer_head *),
3655                                          GFP_KERNEL);
3656        if (sbi->s_group_desc == NULL) {
3657                ext4_msg(sb, KERN_ERR, "not enough memory");
3658                ret = -ENOMEM;
3659                goto failed_mount;
3660        }
3661
3662        if (ext4_proc_root)
3663                sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
3664
3665        if (sbi->s_proc)
3666                proc_create_data("options", S_IRUGO, sbi->s_proc,
3667                                 &ext4_seq_options_fops, sb);
3668
3669        bgl_lock_init(sbi->s_blockgroup_lock);
3670
3671        for (i = 0; i < db_count; i++) {
3672                block = descriptor_loc(sb, logical_sb_block, i);
3673                sbi->s_group_desc[i] = sb_bread(sb, block);
3674                if (!sbi->s_group_desc[i]) {
3675                        ext4_msg(sb, KERN_ERR,
3676                               "can't read group descriptor %d", i);
3677                        db_count = i;
3678                        goto failed_mount2;
3679                }
3680        }
3681        if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
3682                ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
3683                goto failed_mount2;
3684        }
3685        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
3686                if (!ext4_fill_flex_info(sb)) {
3687                        ext4_msg(sb, KERN_ERR,
3688                               "unable to initialize "
3689                               "flex_bg meta info!");
3690                        goto failed_mount2;
3691                }
3692
3693        sbi->s_gdb_count = db_count;
3694        get_random_bytes(&sbi->s_next_generation, sizeof(u32));
3695        spin_lock_init(&sbi->s_next_gen_lock);
3696
3697        init_timer(&sbi->s_err_report);
3698        sbi->s_err_report.function = print_daily_error_info;
3699        sbi->s_err_report.data = (unsigned long) sb;
3700
3701        err = percpu_counter_init(&sbi->s_freeclusters_counter,
3702                        ext4_count_free_clusters(sb));
3703        if (!err) {
3704                err = percpu_counter_init(&sbi->s_freeinodes_counter,
3705                                ext4_count_free_inodes(sb));
3706        }
3707        if (!err) {
3708                err = percpu_counter_init(&sbi->s_dirs_counter,
3709                                ext4_count_dirs(sb));
3710        }
3711        if (!err) {
3712                err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
3713        }
3714        if (!err) {
3715                err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0);
3716        }
3717        if (err) {
3718                ext4_msg(sb, KERN_ERR, "insufficient memory");
3719                goto failed_mount3;
3720        }
3721
3722        sbi->s_stripe = ext4_get_stripe_size(sbi);
3723        sbi->s_max_writeback_mb_bump = 128;
3724        sbi->s_extent_max_zeroout_kb = 32;
3725
3726        /* Register extent status tree shrinker */
3727        ext4_es_register_shrinker(sb);
3728
3729        /*
3730         * set up enough so that it can read an inode
3731         */
3732        if (!test_opt(sb, NOLOAD) &&
3733            EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
3734                sb->s_op = &ext4_sops;
3735        else
3736                sb->s_op = &ext4_nojournal_sops;
3737        sb->s_export_op = &ext4_export_ops;
3738        sb->s_xattr = ext4_xattr_handlers;
3739#ifdef CONFIG_QUOTA
3740        sb->dq_op = &ext4_quota_operations;
3741        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
3742                sb->s_qcop = &ext4_qctl_sysfile_operations;
3743        else
3744                sb->s_qcop = &ext4_qctl_operations;
3745#endif
3746        memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
3747
3748        INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
3749        mutex_init(&sbi->s_orphan_lock);
3750
3751        sb->s_root = NULL;
3752
3753        needs_recovery = (es->s_last_orphan != 0 ||
3754                          EXT4_HAS_INCOMPAT_FEATURE(sb,
3755                                    EXT4_FEATURE_INCOMPAT_RECOVER));
3756
3757        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) &&
3758            !(sb->s_flags & MS_RDONLY))
3759                if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
3760                        goto failed_mount3;
3761
3762        /*
3763         * The first inode we look at is the journal inode.  Don't try
3764         * root first: it may be modified in the journal!
3765         */
3766        if (!test_opt(sb, NOLOAD) &&
3767            EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
3768                if (ext4_load_journal(sb, es, journal_devnum))
3769                        goto failed_mount3;
3770        } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
3771              EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
3772                ext4_msg(sb, KERN_ERR, "required journal recovery "
3773                       "suppressed and not mounted read-only");
3774                goto failed_mount_wq;
3775        } else {
3776                clear_opt(sb, DATA_FLAGS);
3777                sbi->s_journal = NULL;
3778                needs_recovery = 0;
3779                goto no_journal;
3780        }
3781
3782        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT) &&
3783            !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
3784                                       JBD2_FEATURE_INCOMPAT_64BIT)) {
3785                ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
3786                goto failed_mount_wq;
3787        }
3788
3789        if (!set_journal_csum_feature_set(sb)) {
3790                ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
3791                         "feature set");
3792                goto failed_mount_wq;
3793        }
3794
3795        /* We have now updated the journal if required, so we can
3796         * validate the data journaling mode. */
3797        switch (test_opt(sb, DATA_FLAGS)) {
3798        case 0:
3799                /* No mode set, assume a default based on the journal
3800                 * capabilities: ORDERED_DATA if the journal can
3801                 * cope, else JOURNAL_DATA
3802                 */
3803                if (jbd2_journal_check_available_features
3804                    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
3805                        set_opt(sb, ORDERED_DATA);
3806                else
3807                        set_opt(sb, JOURNAL_DATA);
3808                break;
3809
3810        case EXT4_MOUNT_ORDERED_DATA:
3811        case EXT4_MOUNT_WRITEBACK_DATA:
3812                if (!jbd2_journal_check_available_features
3813                    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
3814                        ext4_msg(sb, KERN_ERR, "Journal does not support "
3815                               "requested data journaling mode");
3816                        goto failed_mount_wq;
3817                }
3818        default:
3819                break;
3820        }
3821        set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
3822
3823        sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
3824
3825        /*
3826         * The journal may have updated the bg summary counts, so we
3827         * need to update the global counters.
3828         */
3829        percpu_counter_set(&sbi->s_freeclusters_counter,
3830                           ext4_count_free_clusters(sb));
3831        percpu_counter_set(&sbi->s_freeinodes_counter,
3832                           ext4_count_free_inodes(sb));
3833        percpu_counter_set(&sbi->s_dirs_counter,
3834                           ext4_count_dirs(sb));
3835        percpu_counter_set(&sbi->s_dirtyclusters_counter, 0);
3836
3837no_journal:
3838        /*
3839         * Get the # of file system overhead blocks from the
3840         * superblock if present.
3841         */
3842        if (es->s_overhead_clusters)
3843                sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
3844        else {
3845                err = ext4_calculate_overhead(sb);
3846                if (err)
3847                        goto failed_mount_wq;
3848        }
3849
3850        /*
3851         * The maximum number of concurrent works can be high and
3852         * concurrency isn't really necessary.  Limit it to 1.
3853         */
3854        EXT4_SB(sb)->dio_unwritten_wq =
3855                alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
3856        if (!EXT4_SB(sb)->dio_unwritten_wq) {
3857                printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
3858                ret = -ENOMEM;
3859                goto failed_mount_wq;
3860        }
3861
3862        /*
3863         * The jbd2_journal_load will have done any necessary log recovery,
3864         * so we can safely mount the rest of the filesystem now.
3865         */
3866
3867        root = ext4_iget(sb, EXT4_ROOT_INO);
3868        if (IS_ERR(root)) {
3869                ext4_msg(sb, KERN_ERR, "get root inode failed");
3870                ret = PTR_ERR(root);
3871                root = NULL;
3872                goto failed_mount4;
3873        }
3874        if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
3875                ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
3876                iput(root);
3877                goto failed_mount4;
3878        }
3879        sb->s_root = d_make_root(root);
3880        if (!sb->s_root) {
3881                ext4_msg(sb, KERN_ERR, "get root dentry failed");
3882                ret = -ENOMEM;
3883                goto failed_mount4;
3884        }
3885
3886        if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY))
3887                sb->s_flags |= MS_RDONLY;
3888
3889        /* determine the minimum size of new large inodes, if present */
3890        if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
3891                sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
3892                                                     EXT4_GOOD_OLD_INODE_SIZE;
3893                if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3894                                       EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
3895                        if (sbi->s_want_extra_isize <
3896                            le16_to_cpu(es->s_want_extra_isize))
3897                                sbi->s_want_extra_isize =
3898                                        le16_to_cpu(es->s_want_extra_isize);
3899                        if (sbi->s_want_extra_isize <
3900                            le16_to_cpu(es->s_min_extra_isize))
3901                                sbi->s_want_extra_isize =
3902                                        le16_to_cpu(es->s_min_extra_isize);
3903                }
3904        }
3905        /* Check if enough inode space is available */
3906        if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
3907                                                        sbi->s_inode_size) {
3908                sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
3909                                                       EXT4_GOOD_OLD_INODE_SIZE;
3910                ext4_msg(sb, KERN_INFO, "required extra inode space not"
3911                         "available");
3912        }
3913
3914        err = ext4_setup_system_zone(sb);
3915        if (err) {
3916                ext4_msg(sb, KERN_ERR, "failed to initialize system "
3917                         "zone (%d)", err);
3918                goto failed_mount4a;
3919        }
3920
3921        ext4_ext_init(sb);
3922        err = ext4_mb_init(sb);
3923        if (err) {
3924                ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
3925                         err);
3926                goto failed_mount5;
3927        }
3928
3929        err = ext4_register_li_request(sb, first_not_zeroed);
3930        if (err)
3931                goto failed_mount6;
3932
3933        sbi->s_kobj.kset = ext4_kset;
3934        init_completion(&sbi->s_kobj_unregister);
3935        err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
3936                                   "%s", sb->s_id);
3937        if (err)
3938                goto failed_mount7;
3939
3940#ifdef CONFIG_QUOTA
3941        /* Enable quota usage during mount. */
3942        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
3943            !(sb->s_flags & MS_RDONLY)) {
3944                err = ext4_enable_quotas(sb);
3945                if (err)
3946                        goto failed_mount8;
3947        }
3948#endif  /* CONFIG_QUOTA */
3949
3950        EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
3951        ext4_orphan_cleanup(sb, es);
3952        EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
3953        if (needs_recovery) {
3954                ext4_msg(sb, KERN_INFO, "recovery complete");
3955                ext4_mark_recovery_complete(sb, es);
3956        }
3957        if (EXT4_SB(sb)->s_journal) {
3958                if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
3959                        descr = " journalled data mode";
3960                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
3961                        descr = " ordered data mode";
3962                else
3963                        descr = " writeback data mode";
3964        } else
3965                descr = "out journal";
3966
3967        if (test_opt(sb, DISCARD)) {
3968                struct request_queue *q = bdev_get_queue(sb->s_bdev);
3969                if (!blk_queue_discard(q))
3970                        ext4_msg(sb, KERN_WARNING,
3971                                 "mounting with \"discard\" option, but "
3972                                 "the device does not support discard");
3973        }
3974
3975        ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
3976                 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
3977                 *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
3978
3979        if (es->s_error_count)
3980                mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
3981
3982        kfree(orig_data);
3983        return 0;
3984
3985cantfind_ext4:
3986        if (!silent)
3987                ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
3988        goto failed_mount;
3989
3990#ifdef CONFIG_QUOTA
3991failed_mount8:
3992        kobject_del(&sbi->s_kobj);
3993#endif
3994failed_mount7:
3995        ext4_unregister_li_request(sb);
3996failed_mount6:
3997        ext4_mb_release(sb);
3998failed_mount5:
3999        ext4_ext_release(sb);
4000        ext4_release_system_zone(sb);

4001failed_mount4a:
4002        dput(sb->s_root);
4003        sb->s_root = NULL;
4004failed_mount4:
4005        ext4_msg(sb, KERN_ERR, "mount failed");
4006        destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
4007failed_mount_wq:
4008        if (sbi->s_journal) {
4009                jbd2_journal_destroy(sbi->s_journal);
4010                sbi->s_journal = NULL;
4011        }
4012failed_mount3:
4013        del_timer(&sbi->s_err_report);
4014        if (sbi->s_flex_groups)
4015                ext4_kvfree(sbi->s_flex_groups);
4016        percpu_counter_destroy(&sbi->s_freeclusters_counter);
4017        percpu_counter_destroy(&sbi->s_freeinodes_counter);
4018        percpu_counter_destroy(&sbi->s_dirs_counter);
4019        percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
4020        percpu_counter_destroy(&sbi->s_extent_cache_cnt);
4021        if (sbi->s_mmp_tsk)
4022                kthread_stop(sbi->s_mmp_tsk);
4023failed_mount2:
4024        for (i = 0; i < db_count; i++)
4025                brelse(sbi->s_group_desc[i]);
4026        ext4_kvfree(sbi->s_group_desc);
4027failed_mount:
4028        if (sbi->s_chksum_driver)
4029                crypto_free_shash(sbi->s_chksum_driver);
4030        if (sbi->s_proc) {
4031                remove_proc_entry("options", sbi->s_proc);
4032                remove_proc_entry(sb->s_id, ext4_proc_root);
4033        }
4034#ifdef CONFIG_QUOTA
4035        for (i = 0; i < MAXQUOTAS; i++)
4036                kfree(sbi->s_qf_names[i]);
4037#endif
4038        ext4_blkdev_remove(sbi);
4039        brelse(bh);
4040out_fail:
4041        sb->s_fs_info = NULL;
4042        kfree(sbi->s_blockgroup_lock);
4043        kfree(sbi);
4044out_free_orig:
4045        kfree(orig_data);
4046        return err ? err : ret;
4047}
4048
4049/*
4050 * Setup any per-fs journal parameters now.  We'll do this both on
4051 * initial mount, once the journal has been initialised but before we've
4052 * done any recovery; and again on any subsequent remount.
4053 */
4054static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
4055{
4056        struct ext4_sb_info *sbi = EXT4_SB(sb);
4057
4058        journal->j_commit_interval = sbi->s_commit_interval;
4059        journal->j_min_batch_time = sbi->s_min_batch_time;
4060        journal->j_max_batch_time = sbi->s_max_batch_time;
4061
4062        write_lock(&journal->j_state_lock);
4063        if (test_opt(sb, BARRIER))
4064                journal->j_flags |= JBD2_BARRIER;
4065        else
4066                journal->j_flags &= ~JBD2_BARRIER;
4067        if (test_opt(sb, DATA_ERR_ABORT))
4068                journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
4069        else
4070                journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
4071        write_unlock(&journal->j_state_lock);
4072}
4073
4074static journal_t *ext4_get_journal(struct super_block *sb,
4075                                   unsigned int journal_inum)
4076{
4077        struct inode *journal_inode;
4078        journal_t *journal;
4079
4080        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4081
4082        /* First, test for the existence of a valid inode on disk.  Bad
4083         * things happen if we iget() an unused inode, as the subsequent
4084         * iput() will try to delete it. */
4085
4086        journal_inode = ext4_iget(sb, journal_inum);
4087        if (IS_ERR(journal_inode)) {
4088                ext4_msg(sb, KERN_ERR, "no journal found");
4089                return NULL;
4090        }
4091        if (!journal_inode->i_nlink) {
4092                make_bad_inode(journal_inode);
4093                iput(journal_inode);
4094                ext4_msg(sb, KERN_ERR, "journal inode is deleted");
4095                return NULL;
4096        }
4097
4098        jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
4099                  journal_inode, journal_inode->i_size);
4100        if (!S_ISREG(journal_inode->i_mode)) {
4101                ext4_msg(sb, KERN_ERR, "invalid journal inode");
4102                iput(journal_inode);
4103                return NULL;
4104        }
4105
4106        journal = jbd2_journal_init_inode(journal_inode);
4107        if (!journal) {
4108                ext4_msg(sb, KERN_ERR, "Could not load journal inode");
4109                iput(journal_inode);
4110                return NULL;
4111        }
4112        journal->j_private = sb;
4113        ext4_init_journal_params(sb, journal);
4114        return journal;
4115}
4116
4117static journal_t *ext4_get_dev_journal(struct super_block *sb,
4118                                       dev_t j_dev)
4119{
4120        struct buffer_head *bh;
4121        journal_t *journal;
4122        ext4_fsblk_t start;
4123        ext4_fsblk_t len;
4124        int hblock, blocksize;
4125        ext4_fsblk_t sb_block;
4126        unsigned long offset;
4127        struct ext4_super_block *es;
4128        struct block_device *bdev;
4129
4130        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4131
4132        bdev = ext4_blkdev_get(j_dev, sb);
4133        if (bdev == NULL)
4134                return NULL;
4135
4136        blocksize = sb->s_blocksize;
4137        hblock = bdev_logical_block_size(bdev);
4138        if (blocksize < hblock) {
4139                ext4_msg(sb, KERN_ERR,
4140                        "blocksize too small for journal device");
4141                goto out_bdev;
4142        }
4143
4144        sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
4145        offset = EXT4_MIN_BLOCK_SIZE % blocksize;
4146        set_blocksize(bdev, blocksize);
4147        if (!(bh = __bread(bdev, sb_block, blocksize))) {
4148                ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
4149                       "external journal");
4150                goto out_bdev;
4151        }
4152
4153        es = (struct ext4_super_block *) (bh->b_data + offset);
4154        if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
4155            !(le32_to_cpu(es->s_feature_incompat) &
4156              EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
4157                ext4_msg(sb, KERN_ERR, "external journal has "
4158                                        "bad superblock");
4159                brelse(bh);
4160                goto out_bdev;
4161        }
4162
4163        if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
4164                ext4_msg(sb, KERN_ERR, "journal UUID does not match");
4165                brelse(bh);
4166                goto out_bdev;
4167        }
4168
4169        len = ext4_blocks_count(es);
4170        start = sb_block + 1;
4171        brelse(bh);     /* we're done with the superblock */
4172
4173        journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
4174                                        start, len, blocksize);
4175        if (!journal) {
4176                ext4_msg(sb, KERN_ERR, "failed to create device journal");
4177                goto out_bdev;
4178        }
4179        journal->j_private = sb;
4180        ll_rw_block(READ, 1, &journal->j_sb_buffer);
4181        wait_on_buffer(journal->j_sb_buffer);
4182        if (!buffer_uptodate(journal->j_sb_buffer)) {
4183                ext4_msg(sb, KERN_ERR, "I/O error on journal device");
4184                goto out_journal;
4185        }
4186        if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
4187                ext4_msg(sb, KERN_ERR, "External journal has more than one "
4188                                        "user (unsupported) - %d",
4189                        be32_to_cpu(journal->j_superblock->s_nr_users));
4190                goto out_journal;
4191        }
4192        EXT4_SB(sb)->journal_bdev = bdev;
4193        ext4_init_journal_params(sb, journal);
4194        return journal;
4195
4196out_journal:
4197        jbd2_journal_destroy(journal);
4198out_bdev:
4199        ext4_blkdev_put(bdev);
4200        return NULL;
4201}
4202
4203static int ext4_load_journal(struct super_block *sb,
4204                             struct ext4_super_block *es,
4205                             unsigned long journal_devnum)
4206{
4207        journal_t *journal;
4208        unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
4209        dev_t journal_dev;
4210        int err = 0;
4211        int really_read_only;
4212
4213        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4214
4215        if (journal_devnum &&
4216            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
4217                ext4_msg(sb, KERN_INFO, "external journal device major/minor "
4218                        "numbers have changed");
4219                journal_dev = new_decode_dev(journal_devnum);
4220        } else
4221                journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
4222
4223        really_read_only = bdev_read_only(sb->s_bdev);
4224
4225        /*
4226         * Are we loading a blank journal or performing recovery after a
4227         * crash?  For recovery, we need to check in advance whether we
4228         * can get read-write access to the device.
4229         */
4230        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
4231                if (sb->s_flags & MS_RDONLY) {
4232                        ext4_msg(sb, KERN_INFO, "INFO: recovery "
4233                                        "required on readonly filesystem");
4234                        if (really_read_only) {
4235                                ext4_msg(sb, KERN_ERR, "write access "
4236                                        "unavailable, cannot proceed");
4237                                return -EROFS;
4238                        }
4239                        ext4_msg(sb, KERN_INFO, "write access will "
4240                               "be enabled during recovery");
4241                }
4242        }
4243
4244        if (journal_inum && journal_dev) {
4245                ext4_msg(sb, KERN_ERR, "filesystem has both journal "
4246                       "and inode journals!");
4247                return -EINVAL;
4248        }
4249
4250        if (journal_inum) {
4251                if (!(journal = ext4_get_journal(sb, journal_inum)))
4252                        return -EINVAL;
4253        } else {
4254                if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
4255                        return -EINVAL;
4256        }
4257
4258        if (!(journal->j_flags & JBD2_BARRIER))
4259                ext4_msg(sb, KERN_INFO, "barriers disabled");
4260
4261        if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
4262                err = jbd2_journal_wipe(journal, !really_read_only);
4263        if (!err) {
4264                char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
4265                if (save)
4266                        memcpy(save, ((char *) es) +
4267                               EXT4_S_ERR_START, EXT4_S_ERR_LEN);
4268                err = jbd2_journal_load(journal);
4269                if (save)
4270                        memcpy(((char *) es) + EXT4_S_ERR_START,
4271                               save, EXT4_S_ERR_LEN);
4272                kfree(save);
4273        }
4274
4275        if (err) {
4276                ext4_msg(sb, KERN_ERR, "error loading journal");
4277                jbd2_journal_destroy(journal);
4278                return err;
4279        }
4280
4281        EXT4_SB(sb)->s_journal = journal;
4282        ext4_clear_journal_err(sb, es);
4283
4284        if (!really_read_only && journal_devnum &&
4285            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
4286                es->s_journal_dev = cpu_to_le32(journal_devnum);
4287
4288                /* Make sure we flush the recovery flag to disk. */
4289                ext4_commit_super(sb, 1);
4290        }
4291
4292        return 0;
4293}
4294
4295static int ext4_commit_super(struct super_block *sb, int sync)
4296{
4297        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
4298        struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
4299        int error = 0;
4300
4301        if (!sbh || block_device_ejected(sb))
4302                return error;
4303        if (buffer_write_io_error(sbh)) {
4304                /*
4305                 * Oh, dear.  A previous attempt to write the
4306                 * superblock failed.  This could happen because the
4307                 * USB device was yanked out.  Or it could happen to
4308                 * be a transient write error and maybe the block will
4309                 * be remapped.  Nothing we can do but to retry the
4310                 * write and hope for the best.
4311                 */
4312                ext4_msg(sb, KERN_ERR, "previous I/O error to "
4313                       "superblock detected");
4314                clear_buffer_write_io_error(sbh);
4315                set_buffer_uptodate(sbh);
4316        }
4317        /*
4318         * If the file system is mounted read-only, don't update the
4319         * superblock write time.  This avoids updating the superblock
4320         * write time when we are mounting the root file system
4321         * read/only but we need to replay the journal; at that point,
4322         * for people who are east of GMT and who make their clock
4323         * tick in localtime for Windows bug-for-bug compatibility,
4324         * the clock is set in the future, and this will cause e2fsck
4325         * to complain and force a full file system check.
4326         */
4327        if (!(sb->s_flags & MS_RDONLY))
4328                es->s_wtime = cpu_to_le32(get_seconds());
4329        if (sb->s_bdev->bd_part)
4330                es->s_kbytes_written =
4331                        cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
4332                            ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
4333                              EXT4_SB(sb)->s_sectors_written_start) >> 1));
4334        else
4335                es->s_kbytes_written =
4336                        cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
4337        ext4_free_blocks_count_set(es,
4338                        EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
4339                                &EXT4_SB(sb)->s_freeclusters_counter)));
4340        es->s_free_inodes_count =
4341                cpu_to_le32(percpu_counter_sum_positive(
4342                                &EXT4_SB(sb)->s_freeinodes_counter));
4343        BUFFER_TRACE(sbh, "marking dirty");
4344        ext4_superblock_csum_set(sb);
4345        mark_buffer_dirty(sbh);
4346        if (sync) {
4347                error = sync_dirty_buffer(sbh);
4348                if (error)
4349                        return error;
4350
4351                error = buffer_write_io_error(sbh);
4352                if (error) {
4353                        ext4_msg(sb, KERN_ERR, "I/O error while writing "
4354                               "superblock");
4355                        clear_buffer_write_io_error(sbh);
4356                        set_buffer_uptodate(sbh);
4357                }
4358        }
4359        return error;
4360}
4361
4362/*
4363 * Have we just finished recovery?  If so, and if we are mounting (or
4364 * remounting) the filesystem readonly, then we will end up with a
4365 * consistent fs on disk.  Record that fact.
4366 */
4367static void ext4_mark_recovery_complete(struct super_block *sb,
4368                                        struct ext4_super_block *es)
4369{
4370        journal_t *journal = EXT4_SB(sb)->s_journal;
4371
4372        if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
4373                BUG_ON(journal != NULL);
4374                return;
4375        }
4376        jbd2_journal_lock_updates(journal);
4377        if (jbd2_journal_flush(journal) < 0)
4378                goto out;
4379
4380        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
4381            sb->s_flags & MS_RDONLY) {
4382                EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
4383                ext4_commit_super(sb, 1);
4384        }
4385
4386out:
4387        jbd2_journal_unlock_updates(journal);
4388}
4389
4390/*
4391 * If we are mounting (or read-write remounting) a filesystem whose journal
4392 * has recorded an error from a previous lifetime, move that error to the
4393 * main filesystem now.
4394 */
4395static void ext4_clear_journal_err(struct super_block *sb,
4396                                   struct ext4_super_block *es)
4397{
4398        journal_t *journal;
4399        int j_errno;
4400        const char *errstr;
4401
4402        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4403
4404        journal = EXT4_SB(sb)->s_journal;
4405
4406        /*
4407         * Now check for any error status which may have been recorded in the
4408         * journal by a prior ext4_error() or ext4_abort()
4409         */
4410
4411        j_errno = jbd2_journal_errno(journal);
4412        if (j_errno) {
4413                char nbuf[16];
4414
4415                errstr = ext4_decode_error(sb, j_errno, nbuf);
4416                ext4_warning(sb, "Filesystem error recorded "
4417                             "from previous mount: %s", errstr);
4418                ext4_warning(sb, "Marking fs in need of filesystem check.");
4419
4420                EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
4421                es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
4422                ext4_commit_super(sb, 1);
4423
4424                jbd2_journal_clear_err(journal);
4425                jbd2_journal_update_sb_errno(journal);
4426        }
4427}
4428
4429/*
4430 * Force the running and committing transactions to commit,
4431 * and wait on the commit.
4432 */
4433int ext4_force_commit(struct super_block *sb)
4434{
4435        journal_t *journal;
4436
4437        if (sb->s_flags & MS_RDONLY)
4438                return 0;
4439
4440        journal = EXT4_SB(sb)->s_journal;
4441        return ext4_journal_force_commit(journal);
4442}
4443
4444static int ext4_sync_fs(struct super_block *sb, int wait)
4445{
4446        int ret = 0;
4447        tid_t target;
4448        struct ext4_sb_info *sbi = EXT4_SB(sb);
4449
4450        trace_ext4_sync_fs(sb, wait);
4451        flush_workqueue(sbi->dio_unwritten_wq);
4452        /*
4453         * Writeback quota in non-journalled quota case - journalled quota has
4454         * no dirty dquots
4455         */
4456        dquot_writeback_dquots(sb, -1);
4457        if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
4458                if (wait)
4459                        jbd2_log_wait_commit(sbi->s_journal, target);
4460        }
4461        return ret;
4462}
4463
4464/*
4465 * LVM calls this function before a (read-only) snapshot is created.  This
4466 * gives us a chance to flush the journal completely and mark the fs clean.
4467 *
4468 * Note that only this function cannot bring a filesystem to be in a clean
4469 * state independently. It relies on upper layer to stop all data & metadata
4470 * modifications.
4471 */
4472static int ext4_freeze(struct super_block *sb)
4473{
4474        int error = 0;
4475        journal_t *journal;
4476
4477        if (sb->s_flags & MS_RDONLY)
4478                return 0;
4479
4480        journal = EXT4_SB(sb)->s_journal;
4481
4482        /* Now we set up the journal barrier. */
4483        jbd2_journal_lock_updates(journal);
4484
4485        /*
4486         * Don't clear the needs_recovery flag if we failed to flush
4487         * the journal.
4488         */
4489        error = jbd2_journal_flush(journal);
4490        if (error < 0)
4491                goto out;
4492
4493        /* Journal blocked and flushed, clear needs_recovery flag. */
4494        EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
4495        error = ext4_commit_super(sb, 1);
4496out:
4497        /* we rely on upper layer to stop further updates */
4498        jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
4499        return error;
4500}
4501
4502/*
4503 * Called by LVM after the snapshot is done.  We need to reset the RECOVER
4504 * flag here, even though the filesystem is not technically dirty yet.
4505 */
4506static int ext4_unfreeze(struct super_block *sb)
4507{
4508        if (sb->s_flags & MS_RDONLY)
4509                return 0;
4510
4511        /* Reset the needs_recovery flag before the fs is unlocked. */
4512        EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
4513        ext4_commit_super(sb, 1);
4514        return 0;
4515}
4516
4517/*
4518 * Structure to save mount options for ext4_remount's benefit
4519 */
4520struct ext4_mount_options {
4521        unsigned long s_mount_opt;
4522        unsigned long s_mount_opt2;
4523        kuid_t s_resuid;
4524        kgid_t s_resgid;
4525        unsigned long s_commit_interval;
4526        u32 s_min_batch_time, s_max_batch_time;
4527#ifdef CONFIG_QUOTA
4528        int s_jquota_fmt;
4529        char *s_qf_names[MAXQUOTAS];
4530#endif
4531};
4532
4533static int ext4_remount(struct super_block *sb, int *flags, char *data)
4534{
4535        struct ext4_super_block *es;
4536        struct ext4_sb_info *sbi = EXT4_SB(sb);
4537        unsigned long old_sb_flags;
4538        struct ext4_mount_options old_opts;
4539        int enable_quota = 0;
4540        ext4_group_t g;
4541        unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
4542        int err = 0;
4543#ifdef CONFIG_QUOTA
4544        int i, j;
4545#endif
4546        char *orig_data = kstrdup(data, GFP_KERNEL);
4547
4548        /* Store the original options */
4549        old_sb_flags = sb->s_flags;
4550        old_opts.s_mount_opt = sbi->s_mount_opt;
4551        old_opts.s_mount_opt2 = sbi->s_mount_opt2;
4552        old_opts.s_resuid = sbi->s_resuid;
4553        old_opts.s_resgid = sbi->s_resgid;
4554        old_opts.s_commit_interval = sbi->s_commit_interval;
4555        old_opts.s_min_batch_time = sbi->s_min_batch_time;
4556        old_opts.s_max_batch_time = sbi->s_max_batch_time;
4557#ifdef CONFIG_QUOTA
4558        old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
4559        for (i = 0; i < MAXQUOTAS; i++)
4560                if (sbi->s_qf_names[i]) {
4561                        old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
4562                                                         GFP_KERNEL);
4563                        if (!old_opts.s_qf_names[i]) {
4564                                for (j = 0; j < i; j++)
4565                                        kfree(old_opts.s_qf_names[j]);
4566                                kfree(orig_data);
4567                                return -ENOMEM;
4568                        }
4569                } else
4570                        old_opts.s_qf_names[i] = NULL;
4571#endif
4572        if (sbi->s_journal && sbi->s_journal->j_task->io_context)
4573                journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
4574
4575        /*
4576         * Allow the "check" option to be passed as a remount option.
4577         */
4578        if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
4579                err = -EINVAL;
4580                goto restore_opts;
4581        }
4582
4583        if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
4584                ext4_abort(sb, "Abort forced by user");
4585
4586        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
4587                (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
4588
4589        es = sbi->s_es;
4590
4591        if (sbi->s_journal) {
4592                ext4_init_journal_params(sb, sbi->s_journal);
4593                set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
4594        }
4595
4596        if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
4597                if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
4598                        err = -EROFS;
4599                        goto restore_opts;
4600                }
4601
4602                if (*flags & MS_RDONLY) {
4603                        err = dquot_suspend(sb, -1);
4604                        if (err < 0)
4605                                goto restore_opts;
4606
4607                        /*
4608                         * First of all, the unconditional stuff we have to do
4609                         * to disable replay of the journal when we next remount
4610                         */
4611                        sb->s_flags |= MS_RDONLY;
4612
4613                        /*
4614                         * OK, test if we are remounting a valid rw partition
4615                         * readonly, and if so set the rdonly flag and then
4616                         * mark the partition as valid again.
4617                         */
4618                        if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
4619                            (sbi->s_mount_state & EXT4_VALID_FS))
4620                                es->s_state = cpu_to_le16(sbi->s_mount_state);
4621
4622                        if (sbi->s_journal)
4623                                ext4_mark_recovery_complete(sb, es);
4624                } else {
4625                        /* Make sure we can mount this feature set readwrite */
4626                        if (!ext4_feature_set_ok(sb, 0)) {
4627                                err = -EROFS;
4628                                goto restore_opts;
4629                        }
4630                        /*
4631                         * Make sure the group descriptor checksums
4632                         * are sane.  If they aren't, refuse to remount r/w.
4633                         */
4634                        for (g = 0; g < sbi->s_groups_count; g++) {
4635                                struct ext4_group_desc *gdp =
4636                                        ext4_get_group_desc(sb, g, NULL);
4637
4638                                if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
4639                                        ext4_msg(sb, KERN_ERR,
4640               "ext4_remount: Checksum for group %u failed (%u!=%u)",
4641                g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
4642                                               le16_to_cpu(gdp->bg_checksum));
4643                                        err = -EINVAL;
4644                                        goto restore_opts;
4645                                }
4646                        }
4647
4648                        /*
4649                         * If we have an unprocessed orphan list hanging
4650                         * around from a previously readonly bdev mount,
4651                         * require a full umount/remount for now.
4652                         */
4653                        if (es->s_last_orphan) {
4654                                ext4_msg(sb, KERN_WARNING, "Couldn't "
4655                                       "remount RDWR because of unprocessed "
4656                                       "orphan inode list.  Please "
4657                                       "umount/remount instead");
4658                                err = -EINVAL;
4659                                goto restore_opts;
4660                        }
4661
4662                        /*
4663                         * Mounting a RDONLY partition read-write, so reread
4664                         * and store the current valid flag.  (It may have
4665                         * been changed by e2fsck since we originally mounted
4666                         * the partition.)
4667                         */
4668                        if (sbi->s_journal)
4669                                ext4_clear_journal_err(sb, es);
4670                        sbi->s_mount_state = le16_to_cpu(es->s_state);
4671                        if (!ext4_setup_super(sb, es, 0))
4672                                sb->s_flags &= ~MS_RDONLY;
4673                        if (EXT4_HAS_INCOMPAT_FEATURE(sb,
4674                                                     EXT4_FEATURE_INCOMPAT_MMP))
4675                                if (ext4_multi_mount_protect(sb,
4676                                                le64_to_cpu(es->s_mmp_block))) {
4677                                        err = -EROFS;
4678                                        goto restore_opts;
4679                                }
4680                        enable_quota = 1;
4681                }
4682        }
4683
4684        /*
4685         * Reinitialize lazy itable initialization thread based on
4686         * current settings
4687         */
4688        if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE))
4689                ext4_unregister_li_request(sb);
4690        else {
4691                ext4_group_t first_not_zeroed;
4692                first_not_zeroed = ext4_has_uninit_itable(sb);
4693                ext4_register_li_request(sb, first_not_zeroed);
4694        }
4695
4696        ext4_setup_system_zone(sb);
4697        if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
4698                ext4_commit_super(sb, 1);
4699
4700#ifdef CONFIG_QUOTA
4701        /* Release old quota file names */
4702        for (i = 0; i < MAXQUOTAS; i++)
4703                kfree(old_opts.s_qf_names[i]);
4704        if (enable_quota) {
4705                if (sb_any_quota_suspended(sb))
4706                        dquot_resume(sb, -1);
4707                else if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
4708                                        EXT4_FEATURE_RO_COMPAT_QUOTA)) {
4709                        err = ext4_enable_quotas(sb);
4710                        if (err)
4711                                goto restore_opts;
4712                }
4713        }
4714#endif
4715
4716        ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
4717        kfree(orig_data);
4718        return 0;
4719
4720restore_opts:
4721        sb->s_flags = old_sb_flags;
4722        sbi->s_mount_opt = old_opts.s_mount_opt;
4723        sbi->s_mount_opt2 = old_opts.s_mount_opt2;
4724        sbi->s_resuid = old_opts.s_resuid;
4725        sbi->s_resgid = old_opts.s_resgid;
4726        sbi->s_commit_interval = old_opts.s_commit_interval;
4727        sbi->s_min_batch_time = old_opts.s_min_batch_time;
4728        sbi->s_max_batch_time = old_opts.s_max_batch_time;
4729#ifdef CONFIG_QUOTA
4730        sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
4731        for (i = 0; i < MAXQUOTAS; i++) {
4732                kfree(sbi->s_qf_names[i]);
4733                sbi->s_qf_names[i] = old_opts.s_qf_names[i];
4734        }
4735#endif
4736        kfree(orig_data);
4737        return err;
4738}
4739
4740static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
4741{
4742        struct super_block *sb = dentry->d_sb;
4743        struct ext4_sb_info *sbi = EXT4_SB(sb);
4744        struct ext4_super_block *es = sbi->s_es;
4745        ext4_fsblk_t overhead = 0;
4746        u64 fsid;
4747        s64 bfree;
4748
4749        if (!test_opt(sb, MINIX_DF))
4750                overhead = sbi->s_overhead;
4751
4752        buf->f_type = EXT4_SUPER_MAGIC;
4753        buf->f_bsize = sb->s_blocksize;
4754        buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
4755        bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
4756                percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
4757        /* prevent underflow in case that few free space is available */
4758        buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
4759        buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
4760        if (buf->f_bfree < ext4_r_blocks_count(es))
4761                buf->f_bavail = 0;
4762        buf->f_files = le32_to_cpu(es->s_inodes_count);
4763        buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
4764        buf->f_namelen = EXT4_NAME_LEN;
4765        fsid = le64_to_cpup((void *)es->s_uuid) ^
4766               le64_to_cpup((void *)es->s_uuid + sizeof(u64));
4767        buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
4768        buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
4769
4770        return 0;
4771}
4772
4773/* Helper function for writing quotas on sync - we need to start transaction
4774 * before quota file is locked for write. Otherwise the are possible deadlocks:
4775 * Process 1                         Process 2
4776 * ext4_create()                     quota_sync()
4777 *   jbd2_journal_start()                  write_dquot()
4778 *   dquot_initialize()                         down(dqio_mutex)
4779 *     down(dqio_mutex)                    jbd2_journal_start()
4780 *
4781 */
4782
4783#ifdef CONFIG_QUOTA
4784
4785static inline struct inode *dquot_to_inode(struct dquot *dquot)
4786{
4787        return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
4788}
4789
4790static int ext4_write_dquot(struct dquot *dquot)
4791{
4792        int ret, err;
4793        handle_t *handle;
4794        struct inode *inode;
4795
4796        inode = dquot_to_inode(dquot);
4797        handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
4798                                    EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
4799        if (IS_ERR(handle))
4800                return PTR_ERR(handle);
4801        ret = dquot_commit(dquot);
4802        err = ext4_journal_stop(handle);
4803        if (!ret)
4804                ret = err;
4805        return ret;
4806}
4807
4808static int ext4_acquire_dquot(struct dquot *dquot)
4809{
4810        int ret, err;
4811        handle_t *handle;
4812
4813        handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
4814                                    EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
4815        if (IS_ERR(handle))
4816                return PTR_ERR(handle);
4817        ret = dquot_acquire(dquot);
4818        err = ext4_journal_stop(handle);
4819        if (!ret)
4820                ret = err;
4821        return ret;
4822}
4823
4824static int ext4_release_dquot(struct dquot *dquot)
4825{
4826        int ret, err;
4827        handle_t *handle;
4828
4829        handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
4830                                    EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
4831        if (IS_ERR(handle)) {
4832                /* Release dquot anyway to avoid endless cycle in dqput() */
4833                dquot_release(dquot);
4834                return PTR_ERR(handle);
4835        }
4836        ret = dquot_release(dquot);
4837        err = ext4_journal_stop(handle);
4838        if (!ret)
4839                ret = err;
4840        return ret;
4841}
4842
4843static int ext4_mark_dquot_dirty(struct dquot *dquot)
4844{
4845        struct super_block *sb = dquot->dq_sb;
4846        struct ext4_sb_info *sbi = EXT4_SB(sb);
4847
4848        /* Are we journaling quotas? */
4849        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) ||
4850            sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
4851                dquot_mark_dquot_dirty(dquot);
4852                return ext4_write_dquot(dquot);
4853        } else {
4854                return dquot_mark_dquot_dirty(dquot);
4855        }
4856}
4857
4858static int ext4_write_info(struct super_block *sb, int type)
4859{
4860        int ret, err;
4861        handle_t *handle;
4862
4863        /* Data block + inode block */
4864        handle = ext4_journal_start(sb->s_root->d_inode, EXT4_HT_QUOTA, 2);
4865        if (IS_ERR(handle))
4866                return PTR_ERR(handle);
4867        ret = dquot_commit_info(sb, type);
4868        err = ext4_journal_stop(handle);
4869        if (!ret)
4870                ret = err;
4871        return ret;
4872}
4873
4874/*
4875 * Turn on quotas during mount time - we need to find
4876 * the quota file and such...
4877 */
4878static int ext4_quota_on_mount(struct super_block *sb, int type)
4879{
4880        return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
4881                                        EXT4_SB(sb)->s_jquota_fmt, type);
4882}
4883
4884/*
4885 * Standard function to be called on quota_on
4886 */
4887static int ext4_quota_on(struct super_block *sb, int type, int format_id,
4888                         struct path *path)
4889{
4890        int err;
4891
4892        if (!test_opt(sb, QUOTA))
4893                return -EINVAL;
4894
4895        /* Quotafile not on the same filesystem? */
4896        if (path->dentry->d_sb != sb)
4897                return -EXDEV;
4898        /* Journaling quota? */
4899        if (EXT4_SB(sb)->s_qf_names[type]) {
4900                /* Quotafile not in fs root? */
4901                if (path->dentry->d_parent != sb->s_root)
4902                        ext4_msg(sb, KERN_WARNING,
4903                                "Quota file not on filesystem root. "
4904                                "Journaled quota will not work");
4905        }
4906
4907        /*
4908         * When we journal data on quota file, we have to flush journal to see
4909         * all updates to the file when we bypass pagecache...
4910         */
4911        if (EXT4_SB(sb)->s_journal &&
4912            ext4_should_journal_data(path->dentry->d_inode)) {
4913                /*
4914                 * We don't need to lock updates but journal_flush() could
4915                 * otherwise be livelocked...
4916                 */
4917                jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
4918                err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
4919                jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
4920                if (err)
4921                        return err;
4922        }
4923
4924        return dquot_quota_on(sb, type, format_id, path);
4925}
4926
4927static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
4928                             unsigned int flags)
4929{
4930        int err;
4931        struct inode *qf_inode;
4932        unsigned long qf_inums[MAXQUOTAS] = {
4933                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
4934                le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
4935        };
4936
4937        BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA));
4938
4939        if (!qf_inums[type])
4940                return -EPERM;
4941
4942        qf_inode = ext4_iget(sb, qf_inums[type]);
4943        if (IS_ERR(qf_inode)) {
4944                ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
4945                return PTR_ERR(qf_inode);
4946        }
4947
4948        err = dquot_enable(qf_inode, type, format_id, flags);
4949        iput(qf_inode);
4950
4951        return err;
4952}
4953
4954/* Enable usage tracking for all quota types. */
4955static int ext4_enable_quotas(struct super_block *sb)
4956{
4957        int type, err = 0;
4958        unsigned long qf_inums[MAXQUOTAS] = {
4959                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
4960                le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
4961        };
4962
4963        sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
4964        for (type = 0; type < MAXQUOTAS; type++) {
4965                if (qf_inums[type]) {
4966                        err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
4967                                                DQUOT_USAGE_ENABLED);
4968                        if (err) {
4969                                ext4_warning(sb,
4970                                        "Failed to enable quota tracking "
4971                                        "(type=%d, err=%d). Please run "
4972                                        "e2fsck to fix.", type, err);
4973                                return err;
4974                        }
4975                }
4976        }
4977        return 0;
4978}
4979
4980/*
4981 * quota_on function that is used when QUOTA feature is set.
4982 */
4983static int ext4_quota_on_sysfile(struct super_block *sb, int type,
4984                                 int format_id)
4985{
4986        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
4987                return -EINVAL;
4988
4989        /*
4990         * USAGE was enabled at mount time. Only need to enable LIMITS now.
4991         */
4992        return ext4_quota_enable(sb, type, format_id, DQUOT_LIMITS_ENABLED);
4993}
4994
4995static int ext4_quota_off(struct super_block *sb, int type)
4996{
4997        struct inode *inode = sb_dqopt(sb)->files[type];
4998        handle_t *handle;
4999
5000        /* Force all delayed allocation blocks to be allocated.

5001         * Caller already holds s_umount sem */
5002        if (test_opt(sb, DELALLOC))
5003                sync_filesystem(sb);
5004
5005        if (!inode)
5006                goto out;
5007
5008        /* Update modification times of quota files when userspace can
5009         * start looking at them */
5010        handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
5011        if (IS_ERR(handle))
5012                goto out;
5013        inode->i_mtime = inode->i_ctime = CURRENT_TIME;
5014        ext4_mark_inode_dirty(handle, inode);
5015        ext4_journal_stop(handle);
5016
5017out:
5018        return dquot_quota_off(sb, type);
5019}
5020
5021/*
5022 * quota_off function that is used when QUOTA feature is set.
5023 */
5024static int ext4_quota_off_sysfile(struct super_block *sb, int type)
5025{
5026        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
5027                return -EINVAL;
5028
5029        /* Disable only the limits. */
5030        return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED);
5031}
5032
5033/* Read data from quotafile - avoid pagecache and such because we cannot afford
5034 * acquiring the locks... As quota files are never truncated and quota code
5035 * itself serializes the operations (and no one else should touch the files)
5036 * we don't have to be afraid of races */
5037static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
5038                               size_t len, loff_t off)
5039{
5040        struct inode *inode = sb_dqopt(sb)->files[type];
5041        ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
5042        int err = 0;
5043        int offset = off & (sb->s_blocksize - 1);
5044        int tocopy;
5045        size_t toread;
5046        struct buffer_head *bh;
5047        loff_t i_size = i_size_read(inode);
5048
5049        if (off > i_size)
5050                return 0;
5051        if (off+len > i_size)
5052                len = i_size-off;
5053        toread = len;
5054        while (toread > 0) {
5055                tocopy = sb->s_blocksize - offset < toread ?
5056                                sb->s_blocksize - offset : toread;
5057                bh = ext4_bread(NULL, inode, blk, 0, &err);
5058                if (err)
5059                        return err;
5060                if (!bh)        /* A hole? */
5061                        memset(data, 0, tocopy);
5062                else
5063                        memcpy(data, bh->b_data+offset, tocopy);
5064                brelse(bh);
5065                offset = 0;
5066                toread -= tocopy;
5067                data += tocopy;
5068                blk++;
5069        }
5070        return len;
5071}
5072
5073/* Write to quotafile (we know the transaction is already started and has
5074 * enough credits) */
5075static ssize_t ext4_quota_write(struct super_block *sb, int type,
5076                                const char *data, size_t len, loff_t off)
5077{
5078        struct inode *inode = sb_dqopt(sb)->files[type];
5079        ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
5080        int err = 0;
5081        int offset = off & (sb->s_blocksize - 1);
5082        struct buffer_head *bh;
5083        handle_t *handle = journal_current_handle();
5084
5085        if (EXT4_SB(sb)->s_journal && !handle) {
5086                ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
5087                        " cancelled because transaction is not started",
5088                        (unsigned long long)off, (unsigned long long)len);
5089                return -EIO;
5090        }
5091        /*
5092         * Since we account only one data block in transaction credits,
5093         * then it is impossible to cross a block boundary.
5094         */
5095        if (sb->s_blocksize - offset < len) {
5096                ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
5097                        " cancelled because not block aligned",
5098                        (unsigned long long)off, (unsigned long long)len);
5099                return -EIO;
5100        }
5101
5102        bh = ext4_bread(handle, inode, blk, 1, &err);
5103        if (!bh)
5104                goto out;
5105        err = ext4_journal_get_write_access(handle, bh);
5106        if (err) {
5107                brelse(bh);
5108                goto out;
5109        }
5110        lock_buffer(bh);
5111        memcpy(bh->b_data+offset, data, len);
5112        flush_dcache_page(bh->b_page);
5113        unlock_buffer(bh);
5114        err = ext4_handle_dirty_metadata(handle, NULL, bh);
5115        brelse(bh);
5116out:
5117        if (err)
5118                return err;
5119        if (inode->i_size < off + len) {
5120                i_size_write(inode, off + len);
5121                EXT4_I(inode)->i_disksize = inode->i_size;
5122                ext4_mark_inode_dirty(handle, inode);
5123        }
5124        return len;
5125}
5126
5127#endif
5128
5129static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
5130                       const char *dev_name, void *data)
5131{
5132        return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
5133}
5134
5135#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
5136static inline void register_as_ext2(void)
5137{
5138        int err = register_filesystem(&ext2_fs_type);
5139        if (err)
5140                printk(KERN_WARNING
5141                       "EXT4-fs: Unable to register as ext2 (%d)\n", err);
5142}
5143
5144static inline void unregister_as_ext2(void)
5145{
5146        unregister_filesystem(&ext2_fs_type);
5147}
5148
5149static inline int ext2_feature_set_ok(struct super_block *sb)
5150{
5151        if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP))
5152                return 0;
5153        if (sb->s_flags & MS_RDONLY)
5154                return 1;
5155        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))
5156                return 0;
5157        return 1;
5158}
5159#else
5160static inline void register_as_ext2(void) { }
5161static inline void unregister_as_ext2(void) { }
5162static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
5163#endif
5164
5165#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
5166static inline void register_as_ext3(void)
5167{
5168        int err = register_filesystem(&ext3_fs_type);
5169        if (err)
5170                printk(KERN_WARNING
5171                       "EXT4-fs: Unable to register as ext3 (%d)\n", err);
5172}
5173
5174static inline void unregister_as_ext3(void)
5175{
5176        unregister_filesystem(&ext3_fs_type);
5177}
5178
5179static inline int ext3_feature_set_ok(struct super_block *sb)
5180{
5181        if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP))
5182                return 0;
5183        if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
5184                return 0;
5185        if (sb->s_flags & MS_RDONLY)
5186                return 1;
5187        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP))
5188                return 0;
5189        return 1;
5190}
5191#else
5192static inline void register_as_ext3(void) { }
5193static inline void unregister_as_ext3(void) { }
5194static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; }
5195#endif
5196
5197static struct file_system_type ext4_fs_type = {
5198        .owner          = THIS_MODULE,
5199        .name           = "ext4",
5200        .mount          = ext4_mount,
5201        .kill_sb        = kill_block_super,
5202        .fs_flags       = FS_REQUIRES_DEV,
5203};
5204MODULE_ALIAS_FS("ext4");
5205
5206static int __init ext4_init_feat_adverts(void)
5207{
5208        struct ext4_features *ef;
5209        int ret = -ENOMEM;
5210
5211        ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL);
5212        if (!ef)
5213                goto out;
5214
5215        ef->f_kobj.kset = ext4_kset;
5216        init_completion(&ef->f_kobj_unregister);
5217        ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL,
5218                                   "features");
5219        if (ret) {
5220                kfree(ef);
5221                goto out;
5222        }
5223
5224        ext4_feat = ef;
5225        ret = 0;
5226out:
5227        return ret;
5228}
5229
5230static void ext4_exit_feat_adverts(void)
5231{
5232        kobject_put(&ext4_feat->f_kobj);
5233        wait_for_completion(&ext4_feat->f_kobj_unregister);
5234        kfree(ext4_feat);
5235}
5236
5237/* Shared across all ext4 file systems */
5238wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
5239struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
5240
5241static int __init ext4_init_fs(void)
5242{
5243        int i, err;
5244
5245        ext4_li_info = NULL;
5246        mutex_init(&ext4_li_mtx);
5247
5248        /* Build-time check for flags consistency */
5249        ext4_check_flag_values();
5250
5251        for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
5252                mutex_init(&ext4__aio_mutex[i]);
5253                init_waitqueue_head(&ext4__ioend_wq[i]);
5254        }
5255
5256        err = ext4_init_es();
5257        if (err)
5258                return err;
5259
5260        err = ext4_init_pageio();
5261        if (err)
5262                goto out7;
5263
5264        err = ext4_init_system_zone();
5265        if (err)
5266                goto out6;
5267        ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
5268        if (!ext4_kset) {
5269                err = -ENOMEM;
5270                goto out5;
5271        }
5272        ext4_proc_root = proc_mkdir("fs/ext4", NULL);
5273
5274        err = ext4_init_feat_adverts();
5275        if (err)
5276                goto out4;
5277
5278        err = ext4_init_mballoc();
5279        if (err)
5280                goto out3;
5281
5282        err = ext4_init_xattr();
5283        if (err)
5284                goto out2;
5285        err = init_inodecache();
5286        if (err)
5287                goto out1;
5288        register_as_ext3();
5289        register_as_ext2();
5290        err = register_filesystem(&ext4_fs_type);
5291        if (err)
5292                goto out;
5293
5294        return 0;
5295out:
5296        unregister_as_ext2();
5297        unregister_as_ext3();
5298        destroy_inodecache();
5299out1:
5300        ext4_exit_xattr();
5301out2:
5302        ext4_exit_mballoc();
5303out3:
5304        ext4_exit_feat_adverts();
5305out4:
5306        if (ext4_proc_root)
5307                remove_proc_entry("fs/ext4", NULL);
5308        kset_unregister(ext4_kset);
5309out5:
5310        ext4_exit_system_zone();
5311out6:
5312        ext4_exit_pageio();
5313out7:
5314        ext4_exit_es();
5315
5316        return err;
5317}
5318
5319static void __exit ext4_exit_fs(void)
5320{
5321        ext4_destroy_lazyinit_thread();
5322        unregister_as_ext2();
5323        unregister_as_ext3();
5324        unregister_filesystem(&ext4_fs_type);
5325        destroy_inodecache();
5326        ext4_exit_xattr();
5327        ext4_exit_mballoc();
5328        ext4_exit_feat_adverts();
5329        remove_proc_entry("fs/ext4", NULL);
5330        kset_unregister(ext4_kset);
5331        ext4_exit_system_zone();
5332        ext4_exit_pageio();
5333}
5334
5335MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
5336MODULE_DESCRIPTION("Fourth Extended Filesystem");
5337MODULE_LICENSE("GPL");
5338module_init(ext4_init_fs)
5339module_exit(ext4_exit_fs)
5340