linux/fs/ext4/super.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  linux/fs/ext4/super.c
   4 *
   5 * Copyright (C) 1992, 1993, 1994, 1995
   6 * Remy Card (card@masi.ibp.fr)
   7 * Laboratoire MASI - Institut Blaise Pascal
   8 * Universite Pierre et Marie Curie (Paris VI)
   9 *
  10 *  from
  11 *
  12 *  linux/fs/minix/inode.c
  13 *
  14 *  Copyright (C) 1991, 1992  Linus Torvalds
  15 *
  16 *  Big-endian to little-endian byte-swapping/bitmaps by
  17 *        David S. Miller (davem@caip.rutgers.edu), 1995
  18 */
  19
  20#include <linux/module.h>
  21#include <linux/string.h>
  22#include <linux/fs.h>
  23#include <linux/time.h>
  24#include <linux/vmalloc.h>
  25#include <linux/slab.h>
  26#include <linux/init.h>
  27#include <linux/blkdev.h>
  28#include <linux/backing-dev.h>
  29#include <linux/parser.h>
  30#include <linux/buffer_head.h>
  31#include <linux/exportfs.h>
  32#include <linux/vfs.h>
  33#include <linux/random.h>
  34#include <linux/mount.h>
  35#include <linux/namei.h>
  36#include <linux/quotaops.h>
  37#include <linux/seq_file.h>
  38#include <linux/ctype.h>
  39#include <linux/log2.h>
  40#include <linux/crc16.h>
  41#include <linux/dax.h>
  42#include <linux/cleancache.h>
  43#include <linux/uaccess.h>
  44#include <linux/iversion.h>
  45#include <linux/unicode.h>
  46#include <linux/part_stat.h>
  47#include <linux/kthread.h>
  48#include <linux/freezer.h>
  49
  50#include "ext4.h"
  51#include "ext4_extents.h"       /* Needed for trace points definition */
  52#include "ext4_jbd2.h"
  53#include "xattr.h"
  54#include "acl.h"
  55#include "mballoc.h"
  56#include "fsmap.h"
  57
  58#define CREATE_TRACE_POINTS
  59#include <trace/events/ext4.h>
  60
  61static struct ext4_lazy_init *ext4_li_info;
  62static struct mutex ext4_li_mtx;
  63static struct ratelimit_state ext4_mount_msg_ratelimit;
  64
  65static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
  66                             unsigned long journal_devnum);
  67static int ext4_show_options(struct seq_file *seq, struct dentry *root);
  68static int ext4_commit_super(struct super_block *sb, int sync);
  69static void ext4_mark_recovery_complete(struct super_block *sb,
  70                                        struct ext4_super_block *es);
  71static void ext4_clear_journal_err(struct super_block *sb,
  72                                   struct ext4_super_block *es);
  73static int ext4_sync_fs(struct super_block *sb, int wait);
  74static int ext4_remount(struct super_block *sb, int *flags, char *data);
  75static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
  76static int ext4_unfreeze(struct super_block *sb);
  77static int ext4_freeze(struct super_block *sb);
  78static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
  79                       const char *dev_name, void *data);
  80static inline int ext2_feature_set_ok(struct super_block *sb);
  81static inline int ext3_feature_set_ok(struct super_block *sb);
  82static int ext4_feature_set_ok(struct super_block *sb, int readonly);
  83static void ext4_destroy_lazyinit_thread(void);
  84static void ext4_unregister_li_request(struct super_block *sb);
  85static void ext4_clear_request_list(void);
  86static struct inode *ext4_get_journal_inode(struct super_block *sb,
  87                                            unsigned int journal_inum);
  88
  89/*
  90 * Lock ordering
  91 *
  92 * Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
  93 * i_mmap_rwsem (inode->i_mmap_rwsem)!
  94 *
  95 * page fault path:
  96 * mmap_lock -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
  97 *   page lock -> i_data_sem (rw)
  98 *
  99 * buffered write path:
 100 * sb_start_write -> i_mutex -> mmap_lock
 101 * sb_start_write -> i_mutex -> transaction start -> page lock ->
 102 *   i_data_sem (rw)
 103 *
 104 * truncate:
 105 * sb_start_write -> i_mutex -> i_mmap_sem (w) -> i_mmap_rwsem (w) -> page lock
 106 * sb_start_write -> i_mutex -> i_mmap_sem (w) -> transaction start ->
 107 *   i_data_sem (rw)
 108 *
 109 * direct IO:
 110 * sb_start_write -> i_mutex -> mmap_lock
 111 * sb_start_write -> i_mutex -> transaction start -> i_data_sem (rw)
 112 *
 113 * writepages:
 114 * transaction start -> page lock(s) -> i_data_sem (rw)
 115 */
 116
 117#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
 118static struct file_system_type ext2_fs_type = {
 119        .owner          = THIS_MODULE,
 120        .name           = "ext2",
 121        .mount          = ext4_mount,
 122        .kill_sb        = kill_block_super,
 123        .fs_flags       = FS_REQUIRES_DEV,
 124};
 125MODULE_ALIAS_FS("ext2");
 126MODULE_ALIAS("ext2");
 127#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
 128#else
 129#define IS_EXT2_SB(sb) (0)
 130#endif
 131
 132
 133static struct file_system_type ext3_fs_type = {
 134        .owner          = THIS_MODULE,
 135        .name           = "ext3",
 136        .mount          = ext4_mount,
 137        .kill_sb        = kill_block_super,
 138        .fs_flags       = FS_REQUIRES_DEV,
 139};
 140MODULE_ALIAS_FS("ext3");
 141MODULE_ALIAS("ext3");
 142#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
 143
 144/*
 145 * This works like sb_bread() except it uses ERR_PTR for error
 146 * returns.  Currently with sb_bread it's impossible to distinguish
 147 * between ENOMEM and EIO situations (since both result in a NULL
 148 * return.
 149 */
 150struct buffer_head *
 151ext4_sb_bread(struct super_block *sb, sector_t block, int op_flags)
 152{
 153        struct buffer_head *bh = sb_getblk(sb, block);
 154
 155        if (bh == NULL)
 156                return ERR_PTR(-ENOMEM);
 157        if (ext4_buffer_uptodate(bh))
 158                return bh;
 159        ll_rw_block(REQ_OP_READ, REQ_META | op_flags, 1, &bh);
 160        wait_on_buffer(bh);
 161        if (buffer_uptodate(bh))
 162                return bh;
 163        put_bh(bh);
 164        return ERR_PTR(-EIO);
 165}
 166
 167static int ext4_verify_csum_type(struct super_block *sb,
 168                                 struct ext4_super_block *es)
 169{
 170        if (!ext4_has_feature_metadata_csum(sb))
 171                return 1;
 172
 173        return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
 174}
 175
 176static __le32 ext4_superblock_csum(struct super_block *sb,
 177                                   struct ext4_super_block *es)
 178{
 179        struct ext4_sb_info *sbi = EXT4_SB(sb);
 180        int offset = offsetof(struct ext4_super_block, s_checksum);
 181        __u32 csum;
 182
 183        csum = ext4_chksum(sbi, ~0, (char *)es, offset);
 184
 185        return cpu_to_le32(csum);
 186}
 187
 188static int ext4_superblock_csum_verify(struct super_block *sb,
 189                                       struct ext4_super_block *es)
 190{
 191        if (!ext4_has_metadata_csum(sb))
 192                return 1;
 193
 194        return es->s_checksum == ext4_superblock_csum(sb, es);
 195}
 196
 197void ext4_superblock_csum_set(struct super_block *sb)
 198{
 199        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 200
 201        if (!ext4_has_metadata_csum(sb))
 202                return;
 203
 204        es->s_checksum = ext4_superblock_csum(sb, es);
 205}
 206
 207ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
 208                               struct ext4_group_desc *bg)
 209{
 210        return le32_to_cpu(bg->bg_block_bitmap_lo) |
 211                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 212                 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
 213}
 214
 215ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
 216                               struct ext4_group_desc *bg)
 217{
 218        return le32_to_cpu(bg->bg_inode_bitmap_lo) |
 219                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 220                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
 221}
 222
 223ext4_fsblk_t ext4_inode_table(struct super_block *sb,
 224                              struct ext4_group_desc *bg)
 225{
 226        return le32_to_cpu(bg->bg_inode_table_lo) |
 227                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 228                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
 229}
 230
 231__u32 ext4_free_group_clusters(struct super_block *sb,
 232                               struct ext4_group_desc *bg)
 233{
 234        return le16_to_cpu(bg->bg_free_blocks_count_lo) |
 235                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 236                 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
 237}
 238
 239__u32 ext4_free_inodes_count(struct super_block *sb,
 240                              struct ext4_group_desc *bg)
 241{
 242        return le16_to_cpu(bg->bg_free_inodes_count_lo) |
 243                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 244                 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
 245}
 246
 247__u32 ext4_used_dirs_count(struct super_block *sb,
 248                              struct ext4_group_desc *bg)
 249{
 250        return le16_to_cpu(bg->bg_used_dirs_count_lo) |
 251                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 252                 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
 253}
 254
 255__u32 ext4_itable_unused_count(struct super_block *sb,
 256                              struct ext4_group_desc *bg)
 257{
 258        return le16_to_cpu(bg->bg_itable_unused_lo) |
 259                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 260                 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
 261}
 262
 263void ext4_block_bitmap_set(struct super_block *sb,
 264                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 265{
 266        bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
 267        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 268                bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
 269}
 270
 271void ext4_inode_bitmap_set(struct super_block *sb,
 272                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 273{
 274        bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
 275        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 276                bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
 277}
 278
 279void ext4_inode_table_set(struct super_block *sb,
 280                          struct ext4_group_desc *bg, ext4_fsblk_t blk)
 281{
 282        bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
 283        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 284                bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
 285}
 286
 287void ext4_free_group_clusters_set(struct super_block *sb,
 288                                  struct ext4_group_desc *bg, __u32 count)
 289{
 290        bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
 291        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 292                bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
 293}
 294
 295void ext4_free_inodes_set(struct super_block *sb,
 296                          struct ext4_group_desc *bg, __u32 count)
 297{
 298        bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
 299        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 300                bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
 301}
 302
 303void ext4_used_dirs_set(struct super_block *sb,
 304                          struct ext4_group_desc *bg, __u32 count)
 305{
 306        bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
 307        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 308                bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
 309}
 310
 311void ext4_itable_unused_set(struct super_block *sb,
 312                          struct ext4_group_desc *bg, __u32 count)
 313{
 314        bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
 315        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 316                bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
 317}
 318
 319static void __ext4_update_tstamp(__le32 *lo, __u8 *hi)
 320{
 321        time64_t now = ktime_get_real_seconds();
 322
 323        now = clamp_val(now, 0, (1ull << 40) - 1);
 324
 325        *lo = cpu_to_le32(lower_32_bits(now));
 326        *hi = upper_32_bits(now);
 327}
 328
 329static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
 330{
 331        return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo);
 332}
 333#define ext4_update_tstamp(es, tstamp) \
 334        __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
 335#define ext4_get_tstamp(es, tstamp) \
 336        __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
 337
 338static void __save_error_info(struct super_block *sb, int error,
 339                              __u32 ino, __u64 block,
 340                              const char *func, unsigned int line)
 341{
 342        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 343        int err;
 344
 345        EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 346        if (bdev_read_only(sb->s_bdev))
 347                return;
 348        es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
 349        ext4_update_tstamp(es, s_last_error_time);
 350        strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
 351        es->s_last_error_line = cpu_to_le32(line);
 352        es->s_last_error_ino = cpu_to_le32(ino);
 353        es->s_last_error_block = cpu_to_le64(block);
 354        switch (error) {
 355        case EIO:
 356                err = EXT4_ERR_EIO;
 357                break;
 358        case ENOMEM:
 359                err = EXT4_ERR_ENOMEM;
 360                break;
 361        case EFSBADCRC:
 362                err = EXT4_ERR_EFSBADCRC;
 363                break;
 364        case 0:
 365        case EFSCORRUPTED:
 366                err = EXT4_ERR_EFSCORRUPTED;
 367                break;
 368        case ENOSPC:
 369                err = EXT4_ERR_ENOSPC;
 370                break;
 371        case ENOKEY:
 372                err = EXT4_ERR_ENOKEY;
 373                break;
 374        case EROFS:
 375                err = EXT4_ERR_EROFS;
 376                break;
 377        case EFBIG:
 378                err = EXT4_ERR_EFBIG;
 379                break;
 380        case EEXIST:
 381                err = EXT4_ERR_EEXIST;
 382                break;
 383        case ERANGE:
 384                err = EXT4_ERR_ERANGE;
 385                break;
 386        case EOVERFLOW:
 387                err = EXT4_ERR_EOVERFLOW;
 388                break;
 389        case EBUSY:
 390                err = EXT4_ERR_EBUSY;
 391                break;
 392        case ENOTDIR:
 393                err = EXT4_ERR_ENOTDIR;
 394                break;
 395        case ENOTEMPTY:
 396                err = EXT4_ERR_ENOTEMPTY;
 397                break;
 398        case ESHUTDOWN:
 399                err = EXT4_ERR_ESHUTDOWN;
 400                break;
 401        case EFAULT:
 402                err = EXT4_ERR_EFAULT;
 403                break;
 404        default:
 405                err = EXT4_ERR_UNKNOWN;
 406        }
 407        es->s_last_error_errcode = err;
 408        if (!es->s_first_error_time) {
 409                es->s_first_error_time = es->s_last_error_time;
 410                es->s_first_error_time_hi = es->s_last_error_time_hi;
 411                strncpy(es->s_first_error_func, func,
 412                        sizeof(es->s_first_error_func));
 413                es->s_first_error_line = cpu_to_le32(line);
 414                es->s_first_error_ino = es->s_last_error_ino;
 415                es->s_first_error_block = es->s_last_error_block;
 416                es->s_first_error_errcode = es->s_last_error_errcode;
 417        }
 418        /*
 419         * Start the daily error reporting function if it hasn't been
 420         * started already
 421         */
 422        if (!es->s_error_count)
 423                mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
 424        le32_add_cpu(&es->s_error_count, 1);
 425}
 426
 427static void save_error_info(struct super_block *sb, int error,
 428                            __u32 ino, __u64 block,
 429                            const char *func, unsigned int line)
 430{
 431        __save_error_info(sb, error, ino, block, func, line);
 432        if (!bdev_read_only(sb->s_bdev))
 433                ext4_commit_super(sb, 1);
 434}
 435
 436/*
 437 * The del_gendisk() function uninitializes the disk-specific data
 438 * structures, including the bdi structure, without telling anyone
 439 * else.  Once this happens, any attempt to call mark_buffer_dirty()
 440 * (for example, by ext4_commit_super), will cause a kernel OOPS.
 441 * This is a kludge to prevent these oops until we can put in a proper
 442 * hook in del_gendisk() to inform the VFS and file system layers.
 443 */
 444static int block_device_ejected(struct super_block *sb)
 445{
 446        struct inode *bd_inode = sb->s_bdev->bd_inode;
 447        struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
 448
 449        return bdi->dev == NULL;
 450}
 451
 452static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
 453{
 454        struct super_block              *sb = journal->j_private;
 455        struct ext4_sb_info             *sbi = EXT4_SB(sb);
 456        int                             error = is_journal_aborted(journal);
 457        struct ext4_journal_cb_entry    *jce;
 458
 459        BUG_ON(txn->t_state == T_FINISHED);
 460
 461        ext4_process_freed_data(sb, txn->t_tid);
 462
 463        spin_lock(&sbi->s_md_lock);
 464        while (!list_empty(&txn->t_private_list)) {
 465                jce = list_entry(txn->t_private_list.next,
 466                                 struct ext4_journal_cb_entry, jce_list);
 467                list_del_init(&jce->jce_list);
 468                spin_unlock(&sbi->s_md_lock);
 469                jce->jce_func(sb, jce, error);
 470                spin_lock(&sbi->s_md_lock);
 471        }
 472        spin_unlock(&sbi->s_md_lock);
 473}
 474
 475static bool system_going_down(void)
 476{
 477        return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
 478                || system_state == SYSTEM_RESTART;
 479}
 480
 481/* Deal with the reporting of failure conditions on a filesystem such as
 482 * inconsistencies detected or read IO failures.
 483 *
 484 * On ext2, we can store the error state of the filesystem in the
 485 * superblock.  That is not possible on ext4, because we may have other
 486 * write ordering constraints on the superblock which prevent us from
 487 * writing it out straight away; and given that the journal is about to
 488 * be aborted, we can't rely on the current, or future, transactions to
 489 * write out the superblock safely.
 490 *
 491 * We'll just use the jbd2_journal_abort() error code to record an error in
 492 * the journal instead.  On recovery, the journal will complain about
 493 * that error until we've noted it down and cleared it.
 494 */
 495
 496static void ext4_handle_error(struct super_block *sb)
 497{
 498        if (test_opt(sb, WARN_ON_ERROR))
 499                WARN_ON_ONCE(1);
 500
 501        if (sb_rdonly(sb))
 502                return;
 503
 504        if (!test_opt(sb, ERRORS_CONT)) {
 505                journal_t *journal = EXT4_SB(sb)->s_journal;
 506
 507                EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
 508                if (journal)
 509                        jbd2_journal_abort(journal, -EIO);
 510        }
 511        /*
 512         * We force ERRORS_RO behavior when system is rebooting. Otherwise we
 513         * could panic during 'reboot -f' as the underlying device got already
 514         * disabled.
 515         */
 516        if (test_opt(sb, ERRORS_RO) || system_going_down()) {
 517                ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 518                /*
 519                 * Make sure updated value of ->s_mount_flags will be visible
 520                 * before ->s_flags update
 521                 */
 522                smp_wmb();
 523                sb->s_flags |= SB_RDONLY;
 524        } else if (test_opt(sb, ERRORS_PANIC)) {
 525                panic("EXT4-fs (device %s): panic forced after error\n",
 526                        sb->s_id);
 527        }
 528}
 529
 530#define ext4_error_ratelimit(sb)                                        \
 531                ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state),     \
 532                             "EXT4-fs error")
 533
 534void __ext4_error(struct super_block *sb, const char *function,
 535                  unsigned int line, int error, __u64 block,
 536                  const char *fmt, ...)
 537{
 538        struct va_format vaf;
 539        va_list args;
 540
 541        if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
 542                return;
 543
 544        trace_ext4_error(sb, function, line);
 545        if (ext4_error_ratelimit(sb)) {
 546                va_start(args, fmt);
 547                vaf.fmt = fmt;
 548                vaf.va = &args;
 549                printk(KERN_CRIT
 550                       "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
 551                       sb->s_id, function, line, current->comm, &vaf);
 552                va_end(args);
 553        }
 554        save_error_info(sb, error, 0, block, function, line);
 555        ext4_handle_error(sb);
 556}
 557
 558void __ext4_error_inode(struct inode *inode, const char *function,
 559                        unsigned int line, ext4_fsblk_t block, int error,
 560                        const char *fmt, ...)
 561{
 562        va_list args;
 563        struct va_format vaf;
 564
 565        if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
 566                return;
 567
 568        trace_ext4_error(inode->i_sb, function, line);
 569        if (ext4_error_ratelimit(inode->i_sb)) {
 570                va_start(args, fmt);
 571                vaf.fmt = fmt;
 572                vaf.va = &args;
 573                if (block)
 574                        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
 575                               "inode #%lu: block %llu: comm %s: %pV\n",
 576                               inode->i_sb->s_id, function, line, inode->i_ino,
 577                               block, current->comm, &vaf);
 578                else
 579                        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
 580                               "inode #%lu: comm %s: %pV\n",
 581                               inode->i_sb->s_id, function, line, inode->i_ino,
 582                               current->comm, &vaf);
 583                va_end(args);
 584        }
 585        save_error_info(inode->i_sb, error, inode->i_ino, block,
 586                        function, line);
 587        ext4_handle_error(inode->i_sb);
 588}
 589
 590void __ext4_error_file(struct file *file, const char *function,
 591                       unsigned int line, ext4_fsblk_t block,
 592                       const char *fmt, ...)
 593{
 594        va_list args;
 595        struct va_format vaf;
 596        struct inode *inode = file_inode(file);
 597        char pathname[80], *path;
 598
 599        if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
 600                return;
 601
 602        trace_ext4_error(inode->i_sb, function, line);
 603        if (ext4_error_ratelimit(inode->i_sb)) {
 604                path = file_path(file, pathname, sizeof(pathname));
 605                if (IS_ERR(path))
 606                        path = "(unknown)";
 607                va_start(args, fmt);
 608                vaf.fmt = fmt;
 609                vaf.va = &args;
 610                if (block)
 611                        printk(KERN_CRIT
 612                               "EXT4-fs error (device %s): %s:%d: inode #%lu: "
 613                               "block %llu: comm %s: path %s: %pV\n",
 614                               inode->i_sb->s_id, function, line, inode->i_ino,
 615                               block, current->comm, path, &vaf);
 616                else
 617                        printk(KERN_CRIT
 618                               "EXT4-fs error (device %s): %s:%d: inode #%lu: "
 619                               "comm %s: path %s: %pV\n",
 620                               inode->i_sb->s_id, function, line, inode->i_ino,
 621                               current->comm, path, &vaf);
 622                va_end(args);
 623        }
 624        save_error_info(inode->i_sb, EFSCORRUPTED, inode->i_ino, block,
 625                        function, line);
 626        ext4_handle_error(inode->i_sb);
 627}
 628
 629const char *ext4_decode_error(struct super_block *sb, int errno,
 630                              char nbuf[16])
 631{
 632        char *errstr = NULL;
 633
 634        switch (errno) {
 635        case -EFSCORRUPTED:
 636                errstr = "Corrupt filesystem";
 637                break;
 638        case -EFSBADCRC:
 639                errstr = "Filesystem failed CRC";
 640                break;
 641        case -EIO:
 642                errstr = "IO failure";
 643                break;
 644        case -ENOMEM:
 645                errstr = "Out of memory";
 646                break;
 647        case -EROFS:
 648                if (!sb || (EXT4_SB(sb)->s_journal &&
 649                            EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
 650                        errstr = "Journal has aborted";
 651                else
 652                        errstr = "Readonly filesystem";
 653                break;
 654        default:
 655                /* If the caller passed in an extra buffer for unknown
 656                 * errors, textualise them now.  Else we just return
 657                 * NULL. */
 658                if (nbuf) {
 659                        /* Check for truncated error codes... */
 660                        if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
 661                                errstr = nbuf;
 662                }
 663                break;
 664        }
 665
 666        return errstr;
 667}
 668
 669/* __ext4_std_error decodes expected errors from journaling functions
 670 * automatically and invokes the appropriate error response.  */
 671
 672void __ext4_std_error(struct super_block *sb, const char *function,
 673                      unsigned int line, int errno)
 674{
 675        char nbuf[16];
 676        const char *errstr;
 677
 678        if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
 679                return;
 680
 681        /* Special case: if the error is EROFS, and we're not already
 682         * inside a transaction, then there's really no point in logging
 683         * an error. */
 684        if (errno == -EROFS && journal_current_handle() == NULL && sb_rdonly(sb))
 685                return;
 686
 687        if (ext4_error_ratelimit(sb)) {
 688                errstr = ext4_decode_error(sb, errno, nbuf);
 689                printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
 690                       sb->s_id, function, line, errstr);
 691        }
 692
 693        save_error_info(sb, -errno, 0, 0, function, line);
 694        ext4_handle_error(sb);
 695}
 696
 697/*
 698 * ext4_abort is a much stronger failure handler than ext4_error.  The
 699 * abort function may be used to deal with unrecoverable failures such
 700 * as journal IO errors or ENOMEM at a critical moment in log management.
 701 *
 702 * We unconditionally force the filesystem into an ABORT|READONLY state,
 703 * unless the error response on the fs has been set to panic in which
 704 * case we take the easy way out and panic immediately.
 705 */
 706
 707void __ext4_abort(struct super_block *sb, const char *function,
 708                  unsigned int line, int error, const char *fmt, ...)
 709{
 710        struct va_format vaf;
 711        va_list args;
 712
 713        if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
 714                return;
 715
 716        save_error_info(sb, error, 0, 0, function, line);
 717        va_start(args, fmt);
 718        vaf.fmt = fmt;
 719        vaf.va = &args;
 720        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: %pV\n",
 721               sb->s_id, function, line, &vaf);
 722        va_end(args);
 723
 724        if (sb_rdonly(sb) == 0) {
 725                EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
 726                if (EXT4_SB(sb)->s_journal)
 727                        jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 728
 729                ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 730                /*
 731                 * Make sure updated value of ->s_mount_flags will be visible
 732                 * before ->s_flags update
 733                 */
 734                smp_wmb();
 735                sb->s_flags |= SB_RDONLY;
 736        }
 737        if (test_opt(sb, ERRORS_PANIC) && !system_going_down())
 738                panic("EXT4-fs panic from previous error\n");
 739}
 740
 741void __ext4_msg(struct super_block *sb,
 742                const char *prefix, const char *fmt, ...)
 743{
 744        struct va_format vaf;
 745        va_list args;
 746
 747        if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs"))
 748                return;
 749
 750        va_start(args, fmt);
 751        vaf.fmt = fmt;
 752        vaf.va = &args;
 753        printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
 754        va_end(args);
 755}
 756
 757#define ext4_warning_ratelimit(sb)                                      \
 758                ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), \
 759                             "EXT4-fs warning")
 760
 761void __ext4_warning(struct super_block *sb, const char *function,
 762                    unsigned int line, const char *fmt, ...)
 763{
 764        struct va_format vaf;
 765        va_list args;
 766
 767        if (!ext4_warning_ratelimit(sb))
 768                return;
 769
 770        va_start(args, fmt);
 771        vaf.fmt = fmt;
 772        vaf.va = &args;
 773        printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
 774               sb->s_id, function, line, &vaf);
 775        va_end(args);
 776}
 777
 778void __ext4_warning_inode(const struct inode *inode, const char *function,
 779                          unsigned int line, const char *fmt, ...)
 780{
 781        struct va_format vaf;
 782        va_list args;
 783
 784        if (!ext4_warning_ratelimit(inode->i_sb))
 785                return;
 786
 787        va_start(args, fmt);
 788        vaf.fmt = fmt;
 789        vaf.va = &args;
 790        printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
 791               "inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
 792               function, line, inode->i_ino, current->comm, &vaf);
 793        va_end(args);
 794}
 795
 796void __ext4_grp_locked_error(const char *function, unsigned int line,
 797                             struct super_block *sb, ext4_group_t grp,
 798                             unsigned long ino, ext4_fsblk_t block,
 799                             const char *fmt, ...)
 800__releases(bitlock)
 801__acquires(bitlock)
 802{
 803        struct va_format vaf;
 804        va_list args;
 805
 806        if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
 807                return;
 808
 809        trace_ext4_error(sb, function, line);
 810        __save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
 811
 812        if (ext4_error_ratelimit(sb)) {
 813                va_start(args, fmt);
 814                vaf.fmt = fmt;
 815                vaf.va = &args;
 816                printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
 817                       sb->s_id, function, line, grp);
 818                if (ino)
 819                        printk(KERN_CONT "inode %lu: ", ino);
 820                if (block)
 821                        printk(KERN_CONT "block %llu:",
 822                               (unsigned long long) block);
 823                printk(KERN_CONT "%pV\n", &vaf);
 824                va_end(args);
 825        }
 826
 827        if (test_opt(sb, WARN_ON_ERROR))
 828                WARN_ON_ONCE(1);
 829
 830        if (test_opt(sb, ERRORS_CONT)) {
 831                ext4_commit_super(sb, 0);
 832                return;
 833        }
 834
 835        ext4_unlock_group(sb, grp);
 836        ext4_commit_super(sb, 1);
 837        ext4_handle_error(sb);
 838        /*
 839         * We only get here in the ERRORS_RO case; relocking the group
 840         * may be dangerous, but nothing bad will happen since the
 841         * filesystem will have already been marked read/only and the
 842         * journal has been aborted.  We return 1 as a hint to callers
 843         * who might what to use the return value from
 844         * ext4_grp_locked_error() to distinguish between the
 845         * ERRORS_CONT and ERRORS_RO case, and perhaps return more
 846         * aggressively from the ext4 function in question, with a
 847         * more appropriate error code.
 848         */
 849        ext4_lock_group(sb, grp);
 850        return;
 851}
 852
 853void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
 854                                     ext4_group_t group,
 855                                     unsigned int flags)
 856{
 857        struct ext4_sb_info *sbi = EXT4_SB(sb);
 858        struct ext4_group_info *grp = ext4_get_group_info(sb, group);
 859        struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
 860        int ret;
 861
 862        if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
 863                ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
 864                                            &grp->bb_state);
 865                if (!ret)
 866                        percpu_counter_sub(&sbi->s_freeclusters_counter,
 867                                           grp->bb_free);
 868        }
 869
 870        if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) {
 871                ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
 872                                            &grp->bb_state);
 873                if (!ret && gdp) {
 874                        int count;
 875
 876                        count = ext4_free_inodes_count(sb, gdp);
 877                        percpu_counter_sub(&sbi->s_freeinodes_counter,
 878                                           count);
 879                }
 880        }
 881}
 882
 883void ext4_update_dynamic_rev(struct super_block *sb)
 884{
 885        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 886
 887        if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
 888                return;
 889
 890        ext4_warning(sb,
 891                     "updating to rev %d because of new feature flag, "
 892                     "running e2fsck is recommended",
 893                     EXT4_DYNAMIC_REV);
 894
 895        es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
 896        es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
 897        es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
 898        /* leave es->s_feature_*compat flags alone */
 899        /* es->s_uuid will be set by e2fsck if empty */
 900
 901        /*
 902         * The rest of the superblock fields should be zero, and if not it
 903         * means they are likely already in use, so leave them alone.  We
 904         * can leave it up to e2fsck to clean up any inconsistencies there.
 905         */
 906}
 907
 908/*
 909 * Open the external journal device
 910 */
 911static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
 912{
 913        struct block_device *bdev;
 914
 915        bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
 916        if (IS_ERR(bdev))
 917                goto fail;
 918        return bdev;
 919
 920fail:
 921        ext4_msg(sb, KERN_ERR,
 922                 "failed to open journal device unknown-block(%u,%u) %ld",
 923                 MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
 924        return NULL;
 925}
 926
 927/*
 928 * Release the journal device
 929 */
 930static void ext4_blkdev_put(struct block_device *bdev)
 931{
 932        blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 933}
 934
 935static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
 936{
 937        struct block_device *bdev;
 938        bdev = sbi->journal_bdev;
 939        if (bdev) {
 940                ext4_blkdev_put(bdev);
 941                sbi->journal_bdev = NULL;
 942        }
 943}
 944
 945static inline struct inode *orphan_list_entry(struct list_head *l)
 946{
 947        return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
 948}
 949
 950static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
 951{
 952        struct list_head *l;
 953
 954        ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
 955                 le32_to_cpu(sbi->s_es->s_last_orphan));
 956
 957        printk(KERN_ERR "sb_info orphan list:\n");
 958        list_for_each(l, &sbi->s_orphan) {
 959                struct inode *inode = orphan_list_entry(l);
 960                printk(KERN_ERR "  "
 961                       "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
 962                       inode->i_sb->s_id, inode->i_ino, inode,
 963                       inode->i_mode, inode->i_nlink,
 964                       NEXT_ORPHAN(inode));
 965        }
 966}
 967
 968#ifdef CONFIG_QUOTA
 969static int ext4_quota_off(struct super_block *sb, int type);
 970
 971static inline void ext4_quota_off_umount(struct super_block *sb)
 972{
 973        int type;
 974
 975        /* Use our quota_off function to clear inode flags etc. */
 976        for (type = 0; type < EXT4_MAXQUOTAS; type++)
 977                ext4_quota_off(sb, type);
 978}
 979
 980/*
 981 * This is a helper function which is used in the mount/remount
 982 * codepaths (which holds s_umount) to fetch the quota file name.
 983 */
 984static inline char *get_qf_name(struct super_block *sb,
 985                                struct ext4_sb_info *sbi,
 986                                int type)
 987{
 988        return rcu_dereference_protected(sbi->s_qf_names[type],
 989                                         lockdep_is_held(&sb->s_umount));
 990}
 991#else
 992static inline void ext4_quota_off_umount(struct super_block *sb)
 993{
 994}
 995#endif
 996
 997static void ext4_put_super(struct super_block *sb)
 998{
 999        struct ext4_sb_info *sbi = EXT4_SB(sb);
1000        struct ext4_super_block *es = sbi->s_es;
1001        struct buffer_head **group_desc;
1002        struct flex_groups **flex_groups;
1003        int aborted = 0;
1004        int i, err;
1005
1006        ext4_unregister_li_request(sb);
1007        ext4_quota_off_umount(sb);
1008
1009        destroy_workqueue(sbi->rsv_conversion_wq);
1010
1011        /*
1012         * Unregister sysfs before destroying jbd2 journal.
1013         * Since we could still access attr_journal_task attribute via sysfs
1014         * path which could have sbi->s_journal->j_task as NULL
1015         */
1016        ext4_unregister_sysfs(sb);
1017
1018        if (sbi->s_journal) {
1019                aborted = is_journal_aborted(sbi->s_journal);
1020                err = jbd2_journal_destroy(sbi->s_journal);
1021                sbi->s_journal = NULL;
1022                if ((err < 0) && !aborted) {
1023                        ext4_abort(sb, -err, "Couldn't clean up the journal");
1024                }
1025        }
1026
1027        ext4_es_unregister_shrinker(sbi);
1028        del_timer_sync(&sbi->s_err_report);
1029        ext4_release_system_zone(sb);
1030        ext4_mb_release(sb);
1031        ext4_ext_release(sb);
1032
1033        if (!sb_rdonly(sb) && !aborted) {
1034                ext4_clear_feature_journal_needs_recovery(sb);
1035                es->s_state = cpu_to_le16(sbi->s_mount_state);
1036        }
1037        if (!sb_rdonly(sb))
1038                ext4_commit_super(sb, 1);
1039
1040        rcu_read_lock();
1041        group_desc = rcu_dereference(sbi->s_group_desc);
1042        for (i = 0; i < sbi->s_gdb_count; i++)
1043                brelse(group_desc[i]);
1044        kvfree(group_desc);
1045        flex_groups = rcu_dereference(sbi->s_flex_groups);
1046        if (flex_groups) {
1047                for (i = 0; i < sbi->s_flex_groups_allocated; i++)
1048                        kvfree(flex_groups[i]);
1049                kvfree(flex_groups);
1050        }
1051        rcu_read_unlock();
1052        percpu_counter_destroy(&sbi->s_freeclusters_counter);
1053        percpu_counter_destroy(&sbi->s_freeinodes_counter);
1054        percpu_counter_destroy(&sbi->s_dirs_counter);
1055        percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
1056        percpu_free_rwsem(&sbi->s_writepages_rwsem);
1057#ifdef CONFIG_QUOTA
1058        for (i = 0; i < EXT4_MAXQUOTAS; i++)
1059                kfree(get_qf_name(sb, sbi, i));
1060#endif
1061
1062        /* Debugging code just in case the in-memory inode orphan list
1063         * isn't empty.  The on-disk one can be non-empty if we've
1064         * detected an error and taken the fs readonly, but the
1065         * in-memory list had better be clean by this point. */
1066        if (!list_empty(&sbi->s_orphan))
1067                dump_orphan_list(sb, sbi);
1068        J_ASSERT(list_empty(&sbi->s_orphan));
1069
1070        sync_blockdev(sb->s_bdev);
1071        invalidate_bdev(sb->s_bdev);
1072        if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
1073                /*
1074                 * Invalidate the journal device's buffers.  We don't want them
1075                 * floating about in memory - the physical journal device may
1076                 * hotswapped, and it breaks the `ro-after' testing code.
1077                 */
1078                sync_blockdev(sbi->journal_bdev);
1079                invalidate_bdev(sbi->journal_bdev);
1080                ext4_blkdev_remove(sbi);
1081        }
1082
1083        ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
1084        sbi->s_ea_inode_cache = NULL;
1085
1086        ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
1087        sbi->s_ea_block_cache = NULL;
1088
1089        if (sbi->s_mmp_tsk)
1090                kthread_stop(sbi->s_mmp_tsk);
1091        brelse(sbi->s_sbh);
1092        sb->s_fs_info = NULL;
1093        /*
1094         * Now that we are completely done shutting down the
1095         * superblock, we need to actually destroy the kobject.
1096         */
1097        kobject_put(&sbi->s_kobj);
1098        wait_for_completion(&sbi->s_kobj_unregister);
1099        if (sbi->s_chksum_driver)
1100                crypto_free_shash(sbi->s_chksum_driver);
1101        kfree(sbi->s_blockgroup_lock);
1102        fs_put_dax(sbi->s_daxdev);
1103        fscrypt_free_dummy_context(&sbi->s_dummy_enc_ctx);
1104#ifdef CONFIG_UNICODE
1105        utf8_unload(sbi->s_encoding);
1106#endif
1107        kfree(sbi);
1108}
1109
1110static struct kmem_cache *ext4_inode_cachep;
1111
1112/*
1113 * Called inside transaction, so use GFP_NOFS
1114 */
1115static struct inode *ext4_alloc_inode(struct super_block *sb)
1116{
1117        struct ext4_inode_info *ei;
1118
1119        ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
1120        if (!ei)
1121                return NULL;
1122
1123        inode_set_iversion(&ei->vfs_inode, 1);
1124        spin_lock_init(&ei->i_raw_lock);
1125        INIT_LIST_HEAD(&ei->i_prealloc_list);
1126        spin_lock_init(&ei->i_prealloc_lock);
1127        ext4_es_init_tree(&ei->i_es_tree);
1128        rwlock_init(&ei->i_es_lock);
1129        INIT_LIST_HEAD(&ei->i_es_list);
1130        ei->i_es_all_nr = 0;
1131        ei->i_es_shk_nr = 0;
1132        ei->i_es_shrink_lblk = 0;
1133        ei->i_reserved_data_blocks = 0;
1134        spin_lock_init(&(ei->i_block_reservation_lock));
1135        ext4_init_pending_tree(&ei->i_pending_tree);
1136#ifdef CONFIG_QUOTA
1137        ei->i_reserved_quota = 0;
1138        memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
1139#endif
1140        ei->jinode = NULL;
1141        INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
1142        spin_lock_init(&ei->i_completed_io_lock);
1143        ei->i_sync_tid = 0;
1144        ei->i_datasync_tid = 0;
1145        atomic_set(&ei->i_unwritten, 0);
1146        INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
1147        return &ei->vfs_inode;
1148}
1149
1150static int ext4_drop_inode(struct inode *inode)
1151{
1152        int drop = generic_drop_inode(inode);
1153
1154        if (!drop)
1155                drop = fscrypt_drop_inode(inode);
1156
1157        trace_ext4_drop_inode(inode, drop);
1158        return drop;
1159}
1160
1161static void ext4_free_in_core_inode(struct inode *inode)
1162{
1163        fscrypt_free_inode(inode);
1164        kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
1165}
1166
1167static void ext4_destroy_inode(struct inode *inode)
1168{
1169        if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
1170                ext4_msg(inode->i_sb, KERN_ERR,
1171                         "Inode %lu (%p): orphan list check failed!",
1172                         inode->i_ino, EXT4_I(inode));
1173                print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
1174                                EXT4_I(inode), sizeof(struct ext4_inode_info),
1175                                true);
1176                dump_stack();
1177        }
1178}
1179
1180static void init_once(void *foo)
1181{
1182        struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
1183
1184        INIT_LIST_HEAD(&ei->i_orphan);
1185        init_rwsem(&ei->xattr_sem);
1186        init_rwsem(&ei->i_data_sem);
1187        init_rwsem(&ei->i_mmap_sem);
1188        inode_init_once(&ei->vfs_inode);
1189}
1190
1191static int __init init_inodecache(void)
1192{
1193        ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache",
1194                                sizeof(struct ext4_inode_info), 0,
1195                                (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
1196                                        SLAB_ACCOUNT),
1197                                offsetof(struct ext4_inode_info, i_data),
1198                                sizeof_field(struct ext4_inode_info, i_data),
1199                                init_once);
1200        if (ext4_inode_cachep == NULL)
1201                return -ENOMEM;
1202        return 0;
1203}
1204
1205static void destroy_inodecache(void)
1206{
1207        /*
1208         * Make sure all delayed rcu free inodes are flushed before we
1209         * destroy cache.
1210         */
1211        rcu_barrier();
1212        kmem_cache_destroy(ext4_inode_cachep);
1213}
1214
1215void ext4_clear_inode(struct inode *inode)
1216{
1217        invalidate_inode_buffers(inode);
1218        clear_inode(inode);
1219        ext4_discard_preallocations(inode);
1220        ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
1221        dquot_drop(inode);
1222        if (EXT4_I(inode)->jinode) {
1223                jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
1224                                               EXT4_I(inode)->jinode);
1225                jbd2_free_inode(EXT4_I(inode)->jinode);
1226                EXT4_I(inode)->jinode = NULL;
1227        }
1228        fscrypt_put_encryption_info(inode);
1229        fsverity_cleanup_inode(inode);
1230}
1231
1232static struct inode *ext4_nfs_get_inode(struct super_block *sb,
1233                                        u64 ino, u32 generation)
1234{
1235        struct inode *inode;
1236
1237        /*
1238         * Currently we don't know the generation for parent directory, so
1239         * a generation of 0 means "accept any"
1240         */
1241        inode = ext4_iget(sb, ino, EXT4_IGET_HANDLE);
1242        if (IS_ERR(inode))
1243                return ERR_CAST(inode);
1244        if (generation && inode->i_generation != generation) {
1245                iput(inode);
1246                return ERR_PTR(-ESTALE);
1247        }
1248
1249        return inode;
1250}
1251
1252static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
1253                                        int fh_len, int fh_type)
1254{
1255        return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1256                                    ext4_nfs_get_inode);
1257}
1258
1259static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
1260                                        int fh_len, int fh_type)
1261{
1262        return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1263                                    ext4_nfs_get_inode);
1264}
1265
1266static int ext4_nfs_commit_metadata(struct inode *inode)
1267{
1268        struct writeback_control wbc = {
1269                .sync_mode = WB_SYNC_ALL
1270        };
1271
1272        trace_ext4_nfs_commit_metadata(inode);
1273        return ext4_write_inode(inode, &wbc);
1274}
1275
1276/*
1277 * Try to release metadata pages (indirect blocks, directories) which are
1278 * mapped via the block device.  Since these pages could have journal heads
1279 * which would prevent try_to_free_buffers() from freeing them, we must use
1280 * jbd2 layer's try_to_free_buffers() function to release them.
1281 */
1282static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
1283                                 gfp_t wait)
1284{
1285        journal_t *journal = EXT4_SB(sb)->s_journal;
1286
1287        WARN_ON(PageChecked(page));
1288        if (!page_has_buffers(page))
1289                return 0;
1290        if (journal)
1291                return jbd2_journal_try_to_free_buffers(journal, page,
1292                                                wait & ~__GFP_DIRECT_RECLAIM);
1293        return try_to_free_buffers(page);
1294}
1295
1296#ifdef CONFIG_FS_ENCRYPTION
1297static int ext4_get_context(struct inode *inode, void *ctx, size_t len)
1298{
1299        return ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
1300                                 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len);
1301}
1302
1303static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
1304                                                        void *fs_data)
1305{
1306        handle_t *handle = fs_data;
1307        int res, res2, credits, retries = 0;
1308
1309        /*
1310         * Encrypting the root directory is not allowed because e2fsck expects
1311         * lost+found to exist and be unencrypted, and encrypting the root
1312         * directory would imply encrypting the lost+found directory as well as
1313         * the filename "lost+found" itself.
1314         */
1315        if (inode->i_ino == EXT4_ROOT_INO)
1316                return -EPERM;
1317
1318        if (WARN_ON_ONCE(IS_DAX(inode) && i_size_read(inode)))
1319                return -EINVAL;
1320
1321        if (ext4_test_inode_flag(inode, EXT4_INODE_DAX))
1322                return -EOPNOTSUPP;
1323
1324        res = ext4_convert_inline_data(inode);
1325        if (res)
1326                return res;
1327
1328        /*
1329         * If a journal handle was specified, then the encryption context is
1330         * being set on a new inode via inheritance and is part of a larger
1331         * transaction to create the inode.  Otherwise the encryption context is
1332         * being set on an existing inode in its own transaction.  Only in the
1333         * latter case should the "retry on ENOSPC" logic be used.
1334         */
1335
1336        if (handle) {
1337                res = ext4_xattr_set_handle(handle, inode,
1338                                            EXT4_XATTR_INDEX_ENCRYPTION,
1339                                            EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
1340                                            ctx, len, 0);
1341                if (!res) {
1342                        ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
1343                        ext4_clear_inode_state(inode,
1344                                        EXT4_STATE_MAY_INLINE_DATA);
1345                        /*
1346                         * Update inode->i_flags - S_ENCRYPTED will be enabled,
1347                         * S_DAX may be disabled
1348                         */
1349                        ext4_set_inode_flags(inode, false);
1350                }
1351                return res;
1352        }
1353
1354        res = dquot_initialize(inode);
1355        if (res)
1356                return res;
1357retry:
1358        res = ext4_xattr_set_credits(inode, len, false /* is_create */,
1359                                     &credits);
1360        if (res)
1361                return res;
1362
1363        handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
1364        if (IS_ERR(handle))
1365                return PTR_ERR(handle);
1366
1367        res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION,
1368                                    EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
1369                                    ctx, len, 0);
1370        if (!res) {
1371                ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
1372                /*
1373                 * Update inode->i_flags - S_ENCRYPTED will be enabled,
1374                 * S_DAX may be disabled
1375                 */
1376                ext4_set_inode_flags(inode, false);
1377                res = ext4_mark_inode_dirty(handle, inode);
1378                if (res)
1379                        EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
1380        }
1381        res2 = ext4_journal_stop(handle);
1382
1383        if (res == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
1384                goto retry;
1385        if (!res)
1386                res = res2;
1387        return res;
1388}
1389
1390static const union fscrypt_context *
1391ext4_get_dummy_context(struct super_block *sb)
1392{
1393        return EXT4_SB(sb)->s_dummy_enc_ctx.ctx;
1394}
1395
1396static bool ext4_has_stable_inodes(struct super_block *sb)
1397{
1398        return ext4_has_feature_stable_inodes(sb);
1399}
1400
1401static void ext4_get_ino_and_lblk_bits(struct super_block *sb,
1402                                       int *ino_bits_ret, int *lblk_bits_ret)
1403{
1404        *ino_bits_ret = 8 * sizeof(EXT4_SB(sb)->s_es->s_inodes_count);
1405        *lblk_bits_ret = 8 * sizeof(ext4_lblk_t);
1406}
1407
1408static const struct fscrypt_operations ext4_cryptops = {
1409        .key_prefix             = "ext4:",
1410        .get_context            = ext4_get_context,
1411        .set_context            = ext4_set_context,
1412        .get_dummy_context      = ext4_get_dummy_context,
1413        .empty_dir              = ext4_empty_dir,
1414        .max_namelen            = EXT4_NAME_LEN,
1415        .has_stable_inodes      = ext4_has_stable_inodes,
1416        .get_ino_and_lblk_bits  = ext4_get_ino_and_lblk_bits,
1417};
1418#endif
1419
1420#ifdef CONFIG_QUOTA
1421static const char * const quotatypes[] = INITQFNAMES;
1422#define QTYPE2NAME(t) (quotatypes[t])
1423
1424static int ext4_write_dquot(struct dquot *dquot);
1425static int ext4_acquire_dquot(struct dquot *dquot);
1426static int ext4_release_dquot(struct dquot *dquot);
1427static int ext4_mark_dquot_dirty(struct dquot *dquot);
1428static int ext4_write_info(struct super_block *sb, int type);
1429static int ext4_quota_on(struct super_block *sb, int type, int format_id,
1430                         const struct path *path);
1431static int ext4_quota_on_mount(struct super_block *sb, int type);
1432static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
1433                               size_t len, loff_t off);
1434static ssize_t ext4_quota_write(struct super_block *sb, int type,
1435                                const char *data, size_t len, loff_t off);
1436static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
1437                             unsigned int flags);
1438static int ext4_enable_quotas(struct super_block *sb);
1439
1440static struct dquot **ext4_get_dquots(struct inode *inode)
1441{
1442        return EXT4_I(inode)->i_dquot;
1443}
1444
1445static const struct dquot_operations ext4_quota_operations = {
1446        .get_reserved_space     = ext4_get_reserved_space,
1447        .write_dquot            = ext4_write_dquot,
1448        .acquire_dquot          = ext4_acquire_dquot,
1449        .release_dquot          = ext4_release_dquot,
1450        .mark_dirty             = ext4_mark_dquot_dirty,
1451        .write_info             = ext4_write_info,
1452        .alloc_dquot            = dquot_alloc,
1453        .destroy_dquot          = dquot_destroy,
1454        .get_projid             = ext4_get_projid,
1455        .get_inode_usage        = ext4_get_inode_usage,
1456        .get_next_id            = dquot_get_next_id,
1457};
1458
1459static const struct quotactl_ops ext4_qctl_operations = {
1460        .quota_on       = ext4_quota_on,
1461        .quota_off      = ext4_quota_off,
1462        .quota_sync     = dquot_quota_sync,
1463        .get_state      = dquot_get_state,
1464        .set_info       = dquot_set_dqinfo,
1465        .get_dqblk      = dquot_get_dqblk,
1466        .set_dqblk      = dquot_set_dqblk,
1467        .get_nextdqblk  = dquot_get_next_dqblk,
1468};
1469#endif
1470
1471static const struct super_operations ext4_sops = {
1472        .alloc_inode    = ext4_alloc_inode,
1473        .free_inode     = ext4_free_in_core_inode,
1474        .destroy_inode  = ext4_destroy_inode,
1475        .write_inode    = ext4_write_inode,
1476        .dirty_inode    = ext4_dirty_inode,
1477        .drop_inode     = ext4_drop_inode,
1478        .evict_inode    = ext4_evict_inode,
1479        .put_super      = ext4_put_super,
1480        .sync_fs        = ext4_sync_fs,
1481        .freeze_fs      = ext4_freeze,
1482        .unfreeze_fs    = ext4_unfreeze,
1483        .statfs         = ext4_statfs,
1484        .remount_fs     = ext4_remount,
1485        .show_options   = ext4_show_options,
1486#ifdef CONFIG_QUOTA
1487        .quota_read     = ext4_quota_read,
1488        .quota_write    = ext4_quota_write,
1489        .get_dquots     = ext4_get_dquots,
1490#endif
1491        .bdev_try_to_free_page = bdev_try_to_free_page,
1492};
1493
1494static const struct export_operations ext4_export_ops = {
1495        .fh_to_dentry = ext4_fh_to_dentry,
1496        .fh_to_parent = ext4_fh_to_parent,
1497        .get_parent = ext4_get_parent,
1498        .commit_metadata = ext4_nfs_commit_metadata,
1499};
1500
1501enum {
1502        Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1503        Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
1504        Opt_nouid32, Opt_debug, Opt_removed,
1505        Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
1506        Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
1507        Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
1508        Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
1509        Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1510        Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
1511        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1512        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
1513        Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
1514        Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version,
1515        Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never,
1516        Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
1517        Opt_nowarn_on_error, Opt_mblk_io_submit,
1518        Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize,
1519        Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1520        Opt_inode_readahead_blks, Opt_journal_ioprio,
1521        Opt_dioread_nolock, Opt_dioread_lock,
1522        Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1523        Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
1524};
1525
1526static const match_table_t tokens = {
1527        {Opt_bsd_df, "bsddf"},
1528        {Opt_minix_df, "minixdf"},
1529        {Opt_grpid, "grpid"},
1530        {Opt_grpid, "bsdgroups"},
1531        {Opt_nogrpid, "nogrpid"},
1532        {Opt_nogrpid, "sysvgroups"},
1533        {Opt_resgid, "resgid=%u"},
1534        {Opt_resuid, "resuid=%u"},
1535        {Opt_sb, "sb=%u"},
1536        {Opt_err_cont, "errors=continue"},
1537        {Opt_err_panic, "errors=panic"},
1538        {Opt_err_ro, "errors=remount-ro"},
1539        {Opt_nouid32, "nouid32"},
1540        {Opt_debug, "debug"},
1541        {Opt_removed, "oldalloc"},
1542        {Opt_removed, "orlov"},
1543        {Opt_user_xattr, "user_xattr"},
1544        {Opt_nouser_xattr, "nouser_xattr"},
1545        {Opt_acl, "acl"},
1546        {Opt_noacl, "noacl"},
1547        {Opt_noload, "norecovery"},
1548        {Opt_noload, "noload"},
1549        {Opt_removed, "nobh"},
1550        {Opt_removed, "bh"},
1551        {Opt_commit, "commit=%u"},
1552        {Opt_min_batch_time, "min_batch_time=%u"},
1553        {Opt_max_batch_time, "max_batch_time=%u"},
1554        {Opt_journal_dev, "journal_dev=%u"},
1555        {Opt_journal_path, "journal_path=%s"},
1556        {Opt_journal_checksum, "journal_checksum"},
1557        {Opt_nojournal_checksum, "nojournal_checksum"},
1558        {Opt_journal_async_commit, "journal_async_commit"},
1559        {Opt_abort, "abort"},
1560        {Opt_data_journal, "data=journal"},
1561        {Opt_data_ordered, "data=ordered"},
1562        {Opt_data_writeback, "data=writeback"},
1563        {Opt_data_err_abort, "data_err=abort"},
1564        {Opt_data_err_ignore, "data_err=ignore"},
1565        {Opt_offusrjquota, "usrjquota="},
1566        {Opt_usrjquota, "usrjquota=%s"},
1567        {Opt_offgrpjquota, "grpjquota="},
1568        {Opt_grpjquota, "grpjquota=%s"},
1569        {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1570        {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1571        {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
1572        {Opt_grpquota, "grpquota"},
1573        {Opt_noquota, "noquota"},
1574        {Opt_quota, "quota"},
1575        {Opt_usrquota, "usrquota"},
1576        {Opt_prjquota, "prjquota"},
1577        {Opt_barrier, "barrier=%u"},
1578        {Opt_barrier, "barrier"},
1579        {Opt_nobarrier, "nobarrier"},
1580        {Opt_i_version, "i_version"},
1581        {Opt_dax, "dax"},
1582        {Opt_dax_always, "dax=always"},
1583        {Opt_dax_inode, "dax=inode"},
1584        {Opt_dax_never, "dax=never"},
1585        {Opt_stripe, "stripe=%u"},
1586        {Opt_delalloc, "delalloc"},
1587        {Opt_warn_on_error, "warn_on_error"},
1588        {Opt_nowarn_on_error, "nowarn_on_error"},
1589        {Opt_lazytime, "lazytime"},
1590        {Opt_nolazytime, "nolazytime"},
1591        {Opt_debug_want_extra_isize, "debug_want_extra_isize=%u"},
1592        {Opt_nodelalloc, "nodelalloc"},
1593        {Opt_removed, "mblk_io_submit"},
1594        {Opt_removed, "nomblk_io_submit"},
1595        {Opt_block_validity, "block_validity"},
1596        {Opt_noblock_validity, "noblock_validity"},
1597        {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1598        {Opt_journal_ioprio, "journal_ioprio=%u"},
1599        {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1600        {Opt_auto_da_alloc, "auto_da_alloc"},
1601        {Opt_noauto_da_alloc, "noauto_da_alloc"},
1602        {Opt_dioread_nolock, "dioread_nolock"},
1603        {Opt_dioread_lock, "nodioread_nolock"},
1604        {Opt_dioread_lock, "dioread_lock"},
1605        {Opt_discard, "discard"},
1606        {Opt_nodiscard, "nodiscard"},
1607        {Opt_init_itable, "init_itable=%u"},
1608        {Opt_init_itable, "init_itable"},
1609        {Opt_noinit_itable, "noinit_itable"},
1610        {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
1611        {Opt_test_dummy_encryption, "test_dummy_encryption=%s"},
1612        {Opt_test_dummy_encryption, "test_dummy_encryption"},
1613        {Opt_nombcache, "nombcache"},
1614        {Opt_nombcache, "no_mbcache"},  /* for backward compatibility */
1615        {Opt_removed, "check=none"},    /* mount option from ext2/3 */
1616        {Opt_removed, "nocheck"},       /* mount option from ext2/3 */
1617        {Opt_removed, "reservation"},   /* mount option from ext2/3 */
1618        {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
1619        {Opt_removed, "journal=%u"},    /* mount option from ext2/3 */
1620        {Opt_err, NULL},
1621};
1622
1623static ext4_fsblk_t get_sb_block(void **data)
1624{
1625        ext4_fsblk_t    sb_block;
1626        char            *options = (char *) *data;
1627
1628        if (!options || strncmp(options, "sb=", 3) != 0)
1629                return 1;       /* Default location */
1630
1631        options += 3;
1632        /* TODO: use simple_strtoll with >32bit ext4 */
1633        sb_block = simple_strtoul(options, &options, 0);
1634        if (*options && *options != ',') {
1635                printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
1636                       (char *) *data);
1637                return 1;
1638        }
1639        if (*options == ',')
1640                options++;
1641        *data = (void *) options;
1642
1643        return sb_block;
1644}
1645
1646#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1647static const char deprecated_msg[] =
1648        "Mount option \"%s\" will be removed by %s\n"
1649        "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
1650
1651#ifdef CONFIG_QUOTA
1652static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
1653{
1654        struct ext4_sb_info *sbi = EXT4_SB(sb);
1655        char *qname, *old_qname = get_qf_name(sb, sbi, qtype);
1656        int ret = -1;
1657
1658        if (sb_any_quota_loaded(sb) && !old_qname) {
1659                ext4_msg(sb, KERN_ERR,
1660                        "Cannot change journaled "
1661                        "quota options when quota turned on");
1662                return -1;
1663        }
1664        if (ext4_has_feature_quota(sb)) {
1665                ext4_msg(sb, KERN_INFO, "Journaled quota options "
1666                         "ignored when QUOTA feature is enabled");
1667                return 1;
1668        }
1669        qname = match_strdup(args);
1670        if (!qname) {
1671                ext4_msg(sb, KERN_ERR,
1672                        "Not enough memory for storing quotafile name");
1673                return -1;
1674        }
1675        if (old_qname) {
1676                if (strcmp(old_qname, qname) == 0)
1677                        ret = 1;
1678                else
1679                        ext4_msg(sb, KERN_ERR,
1680                                 "%s quota file already specified",
1681                                 QTYPE2NAME(qtype));
1682                goto errout;
1683        }
1684        if (strchr(qname, '/')) {
1685                ext4_msg(sb, KERN_ERR,
1686                        "quotafile must be on filesystem root");
1687                goto errout;
1688        }
1689        rcu_assign_pointer(sbi->s_qf_names[qtype], qname);
1690        set_opt(sb, QUOTA);
1691        return 1;
1692errout:
1693        kfree(qname);
1694        return ret;
1695}
1696
1697static int clear_qf_name(struct super_block *sb, int qtype)
1698{
1699
1700        struct ext4_sb_info *sbi = EXT4_SB(sb);
1701        char *old_qname = get_qf_name(sb, sbi, qtype);
1702
1703        if (sb_any_quota_loaded(sb) && old_qname) {
1704                ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
1705                        " when quota turned on");
1706                return -1;
1707        }
1708        rcu_assign_pointer(sbi->s_qf_names[qtype], NULL);
1709        synchronize_rcu();
1710        kfree(old_qname);
1711        return 1;
1712}
1713#endif
1714
1715#define MOPT_SET        0x0001
1716#define MOPT_CLEAR      0x0002
1717#define MOPT_NOSUPPORT  0x0004
1718#define MOPT_EXPLICIT   0x0008
1719#define MOPT_CLEAR_ERR  0x0010
1720#define MOPT_GTE0       0x0020
1721#ifdef CONFIG_QUOTA
1722#define MOPT_Q          0
1723#define MOPT_QFMT       0x0040
1724#else
1725#define MOPT_Q          MOPT_NOSUPPORT
1726#define MOPT_QFMT       MOPT_NOSUPPORT
1727#endif
1728#define MOPT_DATAJ      0x0080
1729#define MOPT_NO_EXT2    0x0100
1730#define MOPT_NO_EXT3    0x0200
1731#define MOPT_EXT4_ONLY  (MOPT_NO_EXT2 | MOPT_NO_EXT3)
1732#define MOPT_STRING     0x0400
1733#define MOPT_SKIP       0x0800
1734
1735static const struct mount_opts {
1736        int     token;
1737        int     mount_opt;
1738        int     flags;
1739} ext4_mount_opts[] = {
1740        {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
1741        {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
1742        {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
1743        {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
1744        {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
1745        {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
1746        {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
1747         MOPT_EXT4_ONLY | MOPT_SET},
1748        {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
1749         MOPT_EXT4_ONLY | MOPT_CLEAR},
1750        {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
1751        {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
1752        {Opt_delalloc, EXT4_MOUNT_DELALLOC,
1753         MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1754        {Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
1755         MOPT_EXT4_ONLY | MOPT_CLEAR},
1756        {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET},
1757        {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR},
1758        {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1759         MOPT_EXT4_ONLY | MOPT_CLEAR},
1760        {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1761         MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1762        {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
1763                                    EXT4_MOUNT_JOURNAL_CHECKSUM),
1764         MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1765        {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
1766        {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR},
1767        {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
1768        {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
1769        {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
1770         MOPT_NO_EXT2},
1771        {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
1772         MOPT_NO_EXT2},
1773        {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
1774        {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
1775        {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
1776        {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
1777        {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
1778        {Opt_commit, 0, MOPT_GTE0},
1779        {Opt_max_batch_time, 0, MOPT_GTE0},
1780        {Opt_min_batch_time, 0, MOPT_GTE0},
1781        {Opt_inode_readahead_blks, 0, MOPT_GTE0},
1782        {Opt_init_itable, 0, MOPT_GTE0},
1783        {Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET | MOPT_SKIP},
1784        {Opt_dax_always, EXT4_MOUNT_DAX_ALWAYS,
1785                MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
1786        {Opt_dax_inode, EXT4_MOUNT2_DAX_INODE,
1787                MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
1788        {Opt_dax_never, EXT4_MOUNT2_DAX_NEVER,
1789                MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
1790        {Opt_stripe, 0, MOPT_GTE0},
1791        {Opt_resuid, 0, MOPT_GTE0},
1792        {Opt_resgid, 0, MOPT_GTE0},
1793        {Opt_journal_dev, 0, MOPT_NO_EXT2 | MOPT_GTE0},
1794        {Opt_journal_path, 0, MOPT_NO_EXT2 | MOPT_STRING},
1795        {Opt_journal_ioprio, 0, MOPT_NO_EXT2 | MOPT_GTE0},
1796        {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1797        {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1798        {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
1799         MOPT_NO_EXT2 | MOPT_DATAJ},
1800        {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1801        {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
1802#ifdef CONFIG_EXT4_FS_POSIX_ACL
1803        {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
1804        {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
1805#else
1806        {Opt_acl, 0, MOPT_NOSUPPORT},
1807        {Opt_noacl, 0, MOPT_NOSUPPORT},
1808#endif
1809        {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
1810        {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
1811        {Opt_debug_want_extra_isize, 0, MOPT_GTE0},
1812        {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
1813        {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
1814                                                        MOPT_SET | MOPT_Q},
1815        {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
1816                                                        MOPT_SET | MOPT_Q},
1817        {Opt_prjquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_PRJQUOTA,
1818                                                        MOPT_SET | MOPT_Q},
1819        {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
1820                       EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
1821                                                        MOPT_CLEAR | MOPT_Q},
1822        {Opt_usrjquota, 0, MOPT_Q},
1823        {Opt_grpjquota, 0, MOPT_Q},
1824        {Opt_offusrjquota, 0, MOPT_Q},
1825        {Opt_offgrpjquota, 0, MOPT_Q},
1826        {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
1827        {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
1828        {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
1829        {Opt_max_dir_size_kb, 0, MOPT_GTE0},
1830        {Opt_test_dummy_encryption, 0, MOPT_STRING},
1831        {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
1832        {Opt_err, 0, 0}
1833};
1834
1835#ifdef CONFIG_UNICODE
1836static const struct ext4_sb_encodings {
1837        __u16 magic;
1838        char *name;
1839        char *version;
1840} ext4_sb_encoding_map[] = {
1841        {EXT4_ENC_UTF8_12_1, "utf8", "12.1.0"},
1842};
1843
1844static int ext4_sb_read_encoding(const struct ext4_super_block *es,
1845                                 const struct ext4_sb_encodings **encoding,
1846                                 __u16 *flags)
1847{
1848        __u16 magic = le16_to_cpu(es->s_encoding);
1849        int i;
1850
1851        for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++)
1852                if (magic == ext4_sb_encoding_map[i].magic)
1853                        break;
1854
1855        if (i >= ARRAY_SIZE(ext4_sb_encoding_map))
1856                return -EINVAL;
1857
1858        *encoding = &ext4_sb_encoding_map[i];
1859        *flags = le16_to_cpu(es->s_encoding_flags);
1860
1861        return 0;
1862}
1863#endif
1864
1865static int ext4_set_test_dummy_encryption(struct super_block *sb,
1866                                          const char *opt,
1867                                          const substring_t *arg,
1868                                          bool is_remount)
1869{
1870#ifdef CONFIG_FS_ENCRYPTION
1871        struct ext4_sb_info *sbi = EXT4_SB(sb);
1872        int err;
1873
1874        /*
1875         * This mount option is just for testing, and it's not worthwhile to
1876         * implement the extra complexity (e.g. RCU protection) that would be
1877         * needed to allow it to be set or changed during remount.  We do allow
1878         * it to be specified during remount, but only if there is no change.
1879         */
1880        if (is_remount && !sbi->s_dummy_enc_ctx.ctx) {
1881                ext4_msg(sb, KERN_WARNING,
1882                         "Can't set test_dummy_encryption on remount");
1883                return -1;
1884        }
1885        err = fscrypt_set_test_dummy_encryption(sb, arg, &sbi->s_dummy_enc_ctx);
1886        if (err) {
1887                if (err == -EEXIST)
1888                        ext4_msg(sb, KERN_WARNING,
1889                                 "Can't change test_dummy_encryption on remount");
1890                else if (err == -EINVAL)
1891                        ext4_msg(sb, KERN_WARNING,
1892                                 "Value of option \"%s\" is unrecognized", opt);
1893                else
1894                        ext4_msg(sb, KERN_WARNING,
1895                                 "Error processing option \"%s\" [%d]",
1896                                 opt, err);
1897                return -1;
1898        }
1899        ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
1900#else
1901        ext4_msg(sb, KERN_WARNING,
1902                 "Test dummy encryption mount option ignored");
1903#endif
1904        return 1;
1905}
1906
1907static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1908                            substring_t *args, unsigned long *journal_devnum,
1909                            unsigned int *journal_ioprio, int is_remount)
1910{
1911        struct ext4_sb_info *sbi = EXT4_SB(sb);
1912        const struct mount_opts *m;
1913        kuid_t uid;
1914        kgid_t gid;
1915        int arg = 0;
1916
1917#ifdef CONFIG_QUOTA
1918        if (token == Opt_usrjquota)
1919                return set_qf_name(sb, USRQUOTA, &args[0]);
1920        else if (token == Opt_grpjquota)
1921                return set_qf_name(sb, GRPQUOTA, &args[0]);
1922        else if (token == Opt_offusrjquota)
1923                return clear_qf_name(sb, USRQUOTA);
1924        else if (token == Opt_offgrpjquota)
1925                return clear_qf_name(sb, GRPQUOTA);
1926#endif
1927        switch (token) {
1928        case Opt_noacl:
1929        case Opt_nouser_xattr:
1930                ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5");
1931                break;
1932        case Opt_sb:
1933                return 1;       /* handled by get_sb_block() */
1934        case Opt_removed:
1935                ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
1936                return 1;
1937        case Opt_abort:
1938                sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
1939                return 1;
1940        case Opt_i_version:
1941                sb->s_flags |= SB_I_VERSION;
1942                return 1;
1943        case Opt_lazytime:
1944                sb->s_flags |= SB_LAZYTIME;
1945                return 1;
1946        case Opt_nolazytime:
1947                sb->s_flags &= ~SB_LAZYTIME;
1948                return 1;
1949        }
1950
1951        for (m = ext4_mount_opts; m->token != Opt_err; m++)
1952                if (token == m->token)
1953                        break;
1954
1955        if (m->token == Opt_err) {
1956                ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
1957                         "or missing value", opt);
1958                return -1;
1959        }
1960
1961        if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
1962                ext4_msg(sb, KERN_ERR,
1963                         "Mount option \"%s\" incompatible with ext2", opt);
1964                return -1;
1965        }
1966        if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
1967                ext4_msg(sb, KERN_ERR,
1968                         "Mount option \"%s\" incompatible with ext3", opt);
1969                return -1;
1970        }
1971
1972        if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg))
1973                return -1;
1974        if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
1975                return -1;
1976        if (m->flags & MOPT_EXPLICIT) {
1977                if (m->mount_opt & EXT4_MOUNT_DELALLOC) {
1978                        set_opt2(sb, EXPLICIT_DELALLOC);
1979                } else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) {
1980                        set_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM);
1981                } else
1982                        return -1;
1983        }
1984        if (m->flags & MOPT_CLEAR_ERR)
1985                clear_opt(sb, ERRORS_MASK);
1986        if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
1987                ext4_msg(sb, KERN_ERR, "Cannot change quota "
1988                         "options when quota turned on");
1989                return -1;
1990        }
1991
1992        if (m->flags & MOPT_NOSUPPORT) {
1993                ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
1994        } else if (token == Opt_commit) {
1995                if (arg == 0)
1996                        arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
1997                else if (arg > INT_MAX / HZ) {
1998                        ext4_msg(sb, KERN_ERR,
1999                                 "Invalid commit interval %d, "
2000                                 "must be smaller than %d",
2001                                 arg, INT_MAX / HZ);
2002                        return -1;
2003                }
2004                sbi->s_commit_interval = HZ * arg;
2005        } else if (token == Opt_debug_want_extra_isize) {
2006                if ((arg & 1) ||
2007                    (arg < 4) ||
2008                    (arg > (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE))) {
2009                        ext4_msg(sb, KERN_ERR,
2010                                 "Invalid want_extra_isize %d", arg);
2011                        return -1;
2012                }
2013                sbi->s_want_extra_isize = arg;
2014        } else if (token == Opt_max_batch_time) {
2015                sbi->s_max_batch_time = arg;
2016        } else if (token == Opt_min_batch_time) {
2017                sbi->s_min_batch_time = arg;
2018        } else if (token == Opt_inode_readahead_blks) {
2019                if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) {
2020                        ext4_msg(sb, KERN_ERR,
2021                                 "EXT4-fs: inode_readahead_blks must be "
2022                                 "0 or a power of 2 smaller than 2^31");
2023                        return -1;
2024                }
2025                sbi->s_inode_readahead_blks = arg;
2026        } else if (token == Opt_init_itable) {
2027                set_opt(sb, INIT_INODE_TABLE);
2028                if (!args->from)
2029                        arg = EXT4_DEF_LI_WAIT_MULT;
2030                sbi->s_li_wait_mult = arg;
2031        } else if (token == Opt_max_dir_size_kb) {
2032                sbi->s_max_dir_size_kb = arg;
2033        } else if (token == Opt_stripe) {
2034                sbi->s_stripe = arg;
2035        } else if (token == Opt_resuid) {
2036                uid = make_kuid(current_user_ns(), arg);
2037                if (!uid_valid(uid)) {
2038                        ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg);
2039                        return -1;
2040                }
2041                sbi->s_resuid = uid;
2042        } else if (token == Opt_resgid) {
2043                gid = make_kgid(current_user_ns(), arg);
2044                if (!gid_valid(gid)) {
2045                        ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg);
2046                        return -1;
2047                }
2048                sbi->s_resgid = gid;
2049        } else if (token == Opt_journal_dev) {
2050                if (is_remount) {
2051                        ext4_msg(sb, KERN_ERR,
2052                                 "Cannot specify journal on remount");
2053                        return -1;
2054                }
2055                *journal_devnum = arg;
2056        } else if (token == Opt_journal_path) {
2057                char *journal_path;
2058                struct inode *journal_inode;
2059                struct path path;
2060                int error;
2061
2062                if (is_remount) {
2063                        ext4_msg(sb, KERN_ERR,
2064                                 "Cannot specify journal on remount");
2065                        return -1;
2066                }
2067                journal_path = match_strdup(&args[0]);
2068                if (!journal_path) {
2069                        ext4_msg(sb, KERN_ERR, "error: could not dup "
2070                                "journal device string");
2071                        return -1;
2072                }
2073
2074                error = kern_path(journal_path, LOOKUP_FOLLOW, &path);
2075                if (error) {
2076                        ext4_msg(sb, KERN_ERR, "error: could not find "
2077                                "journal device path: error %d", error);
2078                        kfree(journal_path);
2079                        return -1;
2080                }
2081
2082                journal_inode = d_inode(path.dentry);
2083                if (!S_ISBLK(journal_inode->i_mode)) {
2084                        ext4_msg(sb, KERN_ERR, "error: journal path %s "
2085                                "is not a block device", journal_path);
2086                        path_put(&path);
2087                        kfree(journal_path);
2088                        return -1;
2089                }
2090
2091                *journal_devnum = new_encode_dev(journal_inode->i_rdev);
2092                path_put(&path);
2093                kfree(journal_path);
2094        } else if (token == Opt_journal_ioprio) {
2095                if (arg > 7) {
2096                        ext4_msg(sb, KERN_ERR, "Invalid journal IO priority"
2097                                 " (must be 0-7)");
2098                        return -1;
2099                }
2100                *journal_ioprio =
2101                        IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
2102        } else if (token == Opt_test_dummy_encryption) {
2103                return ext4_set_test_dummy_encryption(sb, opt, &args[0],
2104                                                      is_remount);
2105        } else if (m->flags & MOPT_DATAJ) {
2106                if (is_remount) {
2107                        if (!sbi->s_journal)
2108                                ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
2109                        else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) {
2110                                ext4_msg(sb, KERN_ERR,
2111                                         "Cannot change data mode on remount");
2112                                return -1;
2113                        }
2114                } else {
2115                        clear_opt(sb, DATA_FLAGS);
2116                        sbi->s_mount_opt |= m->mount_opt;
2117                }
2118#ifdef CONFIG_QUOTA
2119        } else if (m->flags & MOPT_QFMT) {
2120                if (sb_any_quota_loaded(sb) &&
2121                    sbi->s_jquota_fmt != m->mount_opt) {
2122                        ext4_msg(sb, KERN_ERR, "Cannot change journaled "
2123                                 "quota options when quota turned on");
2124                        return -1;
2125                }
2126                if (ext4_has_feature_quota(sb)) {
2127                        ext4_msg(sb, KERN_INFO,
2128                                 "Quota format mount options ignored "
2129                                 "when QUOTA feature is enabled");
2130                        return 1;
2131                }
2132                sbi->s_jquota_fmt = m->mount_opt;
2133#endif
2134        } else if (token == Opt_dax || token == Opt_dax_always ||
2135                   token == Opt_dax_inode || token == Opt_dax_never) {
2136#ifdef CONFIG_FS_DAX
2137                switch (token) {
2138                case Opt_dax:
2139                case Opt_dax_always:
2140                        if (is_remount &&
2141                            (!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2142                             (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) {
2143                        fail_dax_change_remount:
2144                                ext4_msg(sb, KERN_ERR, "can't change "
2145                                         "dax mount option while remounting");
2146                                return -1;
2147                        }
2148                        if (is_remount &&
2149                            (test_opt(sb, DATA_FLAGS) ==
2150                             EXT4_MOUNT_JOURNAL_DATA)) {
2151                                    ext4_msg(sb, KERN_ERR, "can't mount with "
2152                                             "both data=journal and dax");
2153                                    return -1;
2154                        }
2155                        ext4_msg(sb, KERN_WARNING,
2156                                "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
2157                        sbi->s_mount_opt |= EXT4_MOUNT_DAX_ALWAYS;
2158                        sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
2159                        break;
2160                case Opt_dax_never:
2161                        if (is_remount &&
2162                            (!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2163                             (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS)))
2164                                goto fail_dax_change_remount;
2165                        sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
2166                        sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
2167                        break;
2168                case Opt_dax_inode:
2169                        if (is_remount &&
2170                            ((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2171                             (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2172                             !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE)))
2173                                goto fail_dax_change_remount;
2174                        sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
2175                        sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
2176                        /* Strictly for printing options */
2177                        sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_INODE;
2178                        break;
2179                }
2180#else
2181                ext4_msg(sb, KERN_INFO, "dax option not supported");
2182                sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
2183                sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
2184                return -1;
2185#endif
2186        } else if (token == Opt_data_err_abort) {
2187                sbi->s_mount_opt |= m->mount_opt;
2188        } else if (token == Opt_data_err_ignore) {
2189                sbi->s_mount_opt &= ~m->mount_opt;
2190        } else {
2191                if (!args->from)
2192                        arg = 1;
2193                if (m->flags & MOPT_CLEAR)
2194                        arg = !arg;
2195                else if (unlikely(!(m->flags & MOPT_SET))) {
2196                        ext4_msg(sb, KERN_WARNING,
2197                                 "buggy handling of option %s", opt);
2198                        WARN_ON(1);
2199                        return -1;
2200                }
2201                if (arg != 0)
2202                        sbi->s_mount_opt |= m->mount_opt;
2203                else
2204                        sbi->s_mount_opt &= ~m->mount_opt;
2205        }
2206        return 1;
2207}
2208
2209static int parse_options(char *options, struct super_block *sb,
2210                         unsigned long *journal_devnum,
2211                         unsigned int *journal_ioprio,
2212                         int is_remount)
2213{
2214        struct ext4_sb_info __maybe_unused *sbi = EXT4_SB(sb);
2215        char *p, __maybe_unused *usr_qf_name, __maybe_unused *grp_qf_name;
2216        substring_t args[MAX_OPT_ARGS];
2217        int token;
2218
2219        if (!options)
2220                return 1;
2221
2222        while ((p = strsep(&options, ",")) != NULL) {
2223                if (!*p)
2224                        continue;
2225                /*
2226                 * Initialize args struct so we know whether arg was
2227                 * found; some options take optional arguments.
2228                 */
2229                args[0].to = args[0].from = NULL;
2230                token = match_token(p, tokens, args);
2231                if (handle_mount_opt(sb, p, token, args, journal_devnum,
2232                                     journal_ioprio, is_remount) < 0)
2233                        return 0;
2234        }
2235#ifdef CONFIG_QUOTA
2236        /*
2237         * We do the test below only for project quotas. 'usrquota' and
2238         * 'grpquota' mount options are allowed even without quota feature
2239         * to support legacy quotas in quota files.
2240         */
2241        if (test_opt(sb, PRJQUOTA) && !ext4_has_feature_project(sb)) {
2242                ext4_msg(sb, KERN_ERR, "Project quota feature not enabled. "
2243                         "Cannot enable project quota enforcement.");
2244                return 0;
2245        }
2246        usr_qf_name = get_qf_name(sb, sbi, USRQUOTA);
2247        grp_qf_name = get_qf_name(sb, sbi, GRPQUOTA);
2248        if (usr_qf_name || grp_qf_name) {
2249                if (test_opt(sb, USRQUOTA) && usr_qf_name)
2250                        clear_opt(sb, USRQUOTA);
2251
2252                if (test_opt(sb, GRPQUOTA) && grp_qf_name)
2253                        clear_opt(sb, GRPQUOTA);
2254
2255                if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
2256                        ext4_msg(sb, KERN_ERR, "old and new quota "
2257                                        "format mixing");
2258                        return 0;
2259                }
2260
2261                if (!sbi->s_jquota_fmt) {
2262                        ext4_msg(sb, KERN_ERR, "journaled quota format "
2263                                        "not specified");
2264                        return 0;
2265                }
2266        }
2267#endif
2268        if (test_opt(sb, DIOREAD_NOLOCK)) {
2269                int blocksize =
2270                        BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
2271                if (blocksize < PAGE_SIZE)
2272                        ext4_msg(sb, KERN_WARNING, "Warning: mounting with an "
2273                                 "experimental mount option 'dioread_nolock' "
2274                                 "for blocksize < PAGE_SIZE");
2275        }
2276        return 1;
2277}
2278
2279static inline void ext4_show_quota_options(struct seq_file *seq,
2280                                           struct super_block *sb)
2281{
2282#if defined(CONFIG_QUOTA)
2283        struct ext4_sb_info *sbi = EXT4_SB(sb);
2284        char *usr_qf_name, *grp_qf_name;
2285
2286        if (sbi->s_jquota_fmt) {
2287                char *fmtname = "";
2288
2289                switch (sbi->s_jquota_fmt) {
2290                case QFMT_VFS_OLD:
2291                        fmtname = "vfsold";
2292                        break;
2293                case QFMT_VFS_V0:
2294                        fmtname = "vfsv0";
2295                        break;
2296                case QFMT_VFS_V1:
2297                        fmtname = "vfsv1";
2298                        break;
2299                }
2300                seq_printf(seq, ",jqfmt=%s", fmtname);
2301        }
2302
2303        rcu_read_lock();
2304        usr_qf_name = rcu_dereference(sbi->s_qf_names[USRQUOTA]);
2305        grp_qf_name = rcu_dereference(sbi->s_qf_names[GRPQUOTA]);
2306        if (usr_qf_name)
2307                seq_show_option(seq, "usrjquota", usr_qf_name);
2308        if (grp_qf_name)
2309                seq_show_option(seq, "grpjquota", grp_qf_name);
2310        rcu_read_unlock();
2311#endif
2312}
2313
2314static const char *token2str(int token)
2315{
2316        const struct match_token *t;
2317
2318        for (t = tokens; t->token != Opt_err; t++)
2319                if (t->token == token && !strchr(t->pattern, '='))
2320                        break;
2321        return t->pattern;
2322}
2323
2324/*
2325 * Show an option if
2326 *  - it's set to a non-default value OR
2327 *  - if the per-sb default is different from the global default
2328 */
2329static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
2330                              int nodefs)
2331{
2332        struct ext4_sb_info *sbi = EXT4_SB(sb);
2333        struct ext4_super_block *es = sbi->s_es;
2334        int def_errors, def_mount_opt = sbi->s_def_mount_opt;
2335        const struct mount_opts *m;
2336        char sep = nodefs ? '\n' : ',';
2337
2338#define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
2339#define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
2340
2341        if (sbi->s_sb_block != 1)
2342                SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
2343
2344        for (m = ext4_mount_opts; m->token != Opt_err; m++) {
2345                int want_set = m->flags & MOPT_SET;
2346                if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
2347                    (m->flags & MOPT_CLEAR_ERR) || m->flags & MOPT_SKIP)
2348                        continue;
2349                if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
2350                        continue; /* skip if same as the default */
2351                if ((want_set &&
2352                     (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
2353                    (!want_set && (sbi->s_mount_opt & m->mount_opt)))
2354                        continue; /* select Opt_noFoo vs Opt_Foo */
2355                SEQ_OPTS_PRINT("%s", token2str(m->token));
2356        }
2357
2358        if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
2359            le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
2360                SEQ_OPTS_PRINT("resuid=%u",
2361                                from_kuid_munged(&init_user_ns, sbi->s_resuid));
2362        if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
2363            le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
2364                SEQ_OPTS_PRINT("resgid=%u",
2365                                from_kgid_munged(&init_user_ns, sbi->s_resgid));
2366        def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
2367        if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
2368                SEQ_OPTS_PUTS("errors=remount-ro");
2369        if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
2370                SEQ_OPTS_PUTS("errors=continue");
2371        if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
2372                SEQ_OPTS_PUTS("errors=panic");
2373        if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
2374                SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
2375        if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
2376                SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
2377        if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
2378                SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
2379        if (sb->s_flags & SB_I_VERSION)
2380                SEQ_OPTS_PUTS("i_version");
2381        if (nodefs || sbi->s_stripe)
2382                SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
2383        if (nodefs || EXT4_MOUNT_DATA_FLAGS &
2384                        (sbi->s_mount_opt ^ def_mount_opt)) {
2385                if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
2386                        SEQ_OPTS_PUTS("data=journal");
2387                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
2388                        SEQ_OPTS_PUTS("data=ordered");
2389                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
2390                        SEQ_OPTS_PUTS("data=writeback");
2391        }
2392        if (nodefs ||
2393            sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
2394                SEQ_OPTS_PRINT("inode_readahead_blks=%u",
2395                               sbi->s_inode_readahead_blks);
2396
2397        if (test_opt(sb, INIT_INODE_TABLE) && (nodefs ||
2398                       (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
2399                SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
2400        if (nodefs || sbi->s_max_dir_size_kb)
2401                SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
2402        if (test_opt(sb, DATA_ERR_ABORT))
2403                SEQ_OPTS_PUTS("data_err=abort");
2404
2405        fscrypt_show_test_dummy_encryption(seq, sep, sb);
2406
2407        if (test_opt(sb, DAX_ALWAYS)) {
2408                if (IS_EXT2_SB(sb))
2409                        SEQ_OPTS_PUTS("dax");
2410                else
2411                        SEQ_OPTS_PUTS("dax=always");
2412        } else if (test_opt2(sb, DAX_NEVER)) {
2413                SEQ_OPTS_PUTS("dax=never");
2414        } else if (test_opt2(sb, DAX_INODE)) {
2415                SEQ_OPTS_PUTS("dax=inode");
2416        }
2417
2418        ext4_show_quota_options(seq, sb);
2419        return 0;
2420}
2421
2422static int ext4_show_options(struct seq_file *seq, struct dentry *root)
2423{
2424        return _ext4_show_options(seq, root->d_sb, 0);
2425}
2426
2427int ext4_seq_options_show(struct seq_file *seq, void *offset)
2428{
2429        struct super_block *sb = seq->private;
2430        int rc;
2431
2432        seq_puts(seq, sb_rdonly(sb) ? "ro" : "rw");
2433        rc = _ext4_show_options(seq, sb, 1);
2434        seq_puts(seq, "\n");
2435        return rc;
2436}
2437
2438static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
2439                            int read_only)
2440{
2441        struct ext4_sb_info *sbi = EXT4_SB(sb);
2442        int err = 0;
2443
2444        if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
2445                ext4_msg(sb, KERN_ERR, "revision level too high, "
2446                         "forcing read-only mode");
2447                err = -EROFS;
2448                goto done;
2449        }
2450        if (read_only)
2451                goto done;
2452        if (!(sbi->s_mount_state & EXT4_VALID_FS))
2453                ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
2454                         "running e2fsck is recommended");
2455        else if (sbi->s_mount_state & EXT4_ERROR_FS)
2456                ext4_msg(sb, KERN_WARNING,
2457                         "warning: mounting fs with errors, "
2458                         "running e2fsck is recommended");
2459        else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
2460                 le16_to_cpu(es->s_mnt_count) >=
2461                 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
2462                ext4_msg(sb, KERN_WARNING,
2463                         "warning: maximal mount count reached, "
2464                         "running e2fsck is recommended");
2465        else if (le32_to_cpu(es->s_checkinterval) &&
2466                 (ext4_get_tstamp(es, s_lastcheck) +
2467                  le32_to_cpu(es->s_checkinterval) <= ktime_get_real_seconds()))
2468                ext4_msg(sb, KERN_WARNING,
2469                         "warning: checktime reached, "
2470                         "running e2fsck is recommended");
2471        if (!sbi->s_journal)
2472                es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
2473        if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
2474                es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
2475        le16_add_cpu(&es->s_mnt_count, 1);
2476        ext4_update_tstamp(es, s_mtime);
2477        if (sbi->s_journal)
2478                ext4_set_feature_journal_needs_recovery(sb);
2479
2480        err = ext4_commit_super(sb, 1);
2481done:
2482        if (test_opt(sb, DEBUG))
2483                printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
2484                                "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
2485                        sb->s_blocksize,
2486                        sbi->s_groups_count,
2487                        EXT4_BLOCKS_PER_GROUP(sb),
2488                        EXT4_INODES_PER_GROUP(sb),
2489                        sbi->s_mount_opt, sbi->s_mount_opt2);
2490
2491        cleancache_init_fs(sb);
2492        return err;
2493}
2494
2495int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
2496{
2497        struct ext4_sb_info *sbi = EXT4_SB(sb);
2498        struct flex_groups **old_groups, **new_groups;
2499        int size, i, j;
2500
2501        if (!sbi->s_log_groups_per_flex)
2502                return 0;
2503
2504        size = ext4_flex_group(sbi, ngroup - 1) + 1;
2505        if (size <= sbi->s_flex_groups_allocated)
2506                return 0;
2507
2508        new_groups = kvzalloc(roundup_pow_of_two(size *
2509                              sizeof(*sbi->s_flex_groups)), GFP_KERNEL);
2510        if (!new_groups) {
2511                ext4_msg(sb, KERN_ERR,
2512                         "not enough memory for %d flex group pointers", size);
2513                return -ENOMEM;
2514        }
2515        for (i = sbi->s_flex_groups_allocated; i < size; i++) {
2516                new_groups[i] = kvzalloc(roundup_pow_of_two(
2517                                         sizeof(struct flex_groups)),
2518                                         GFP_KERNEL);
2519                if (!new_groups[i]) {
2520                        for (j = sbi->s_flex_groups_allocated; j < i; j++)
2521                                kvfree(new_groups[j]);
2522                        kvfree(new_groups);
2523                        ext4_msg(sb, KERN_ERR,
2524                                 "not enough memory for %d flex groups", size);
2525                        return -ENOMEM;
2526                }
2527        }
2528        rcu_read_lock();
2529        old_groups = rcu_dereference(sbi->s_flex_groups);
2530        if (old_groups)
2531                memcpy(new_groups, old_groups,
2532                       (sbi->s_flex_groups_allocated *
2533                        sizeof(struct flex_groups *)));
2534        rcu_read_unlock();
2535        rcu_assign_pointer(sbi->s_flex_groups, new_groups);
2536        sbi->s_flex_groups_allocated = size;
2537        if (old_groups)
2538                ext4_kvfree_array_rcu(old_groups);
2539        return 0;
2540}
2541
2542static int ext4_fill_flex_info(struct super_block *sb)
2543{
2544        struct ext4_sb_info *sbi = EXT4_SB(sb);
2545        struct ext4_group_desc *gdp = NULL;
2546        struct flex_groups *fg;
2547        ext4_group_t flex_group;
2548        int i, err;
2549
2550        sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
2551        if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
2552                sbi->s_log_groups_per_flex = 0;
2553                return 1;
2554        }
2555
2556        err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
2557        if (err)
2558                goto failed;
2559
2560        for (i = 0; i < sbi->s_groups_count; i++) {
2561                gdp = ext4_get_group_desc(sb, i, NULL);
2562
2563                flex_group = ext4_flex_group(sbi, i);
2564                fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group);
2565                atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes);
2566                atomic64_add(ext4_free_group_clusters(sb, gdp),
2567                             &fg->free_clusters);
2568                atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs);
2569        }
2570
2571        return 1;
2572failed:
2573        return 0;
2574}
2575
2576static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
2577                                   struct ext4_group_desc *gdp)
2578{
2579        int offset = offsetof(struct ext4_group_desc, bg_checksum);
2580        __u16 crc = 0;
2581        __le32 le_group = cpu_to_le32(block_group);
2582        struct ext4_sb_info *sbi = EXT4_SB(sb);
2583
2584        if (ext4_has_metadata_csum(sbi->s_sb)) {
2585                /* Use new metadata_csum algorithm */
2586                __u32 csum32;
2587                __u16 dummy_csum = 0;
2588
2589                csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
2590                                     sizeof(le_group));
2591                csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, offset);
2592                csum32 = ext4_chksum(sbi, csum32, (__u8 *)&dummy_csum,
2593                                     sizeof(dummy_csum));
2594                offset += sizeof(dummy_csum);
2595                if (offset < sbi->s_desc_size)
2596                        csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp + offset,
2597                                             sbi->s_desc_size - offset);
2598
2599                crc = csum32 & 0xFFFF;
2600                goto out;
2601        }
2602
2603        /* old crc16 code */
2604        if (!ext4_has_feature_gdt_csum(sb))
2605                return 0;
2606
2607        crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
2608        crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
2609        crc = crc16(crc, (__u8 *)gdp, offset);
2610        offset += sizeof(gdp->bg_checksum); /* skip checksum */
2611        /* for checksum of struct ext4_group_desc do the rest...*/
2612        if (ext4_has_feature_64bit(sb) &&
2613            offset < le16_to_cpu(sbi->s_es->s_desc_size))
2614                crc = crc16(crc, (__u8 *)gdp + offset,
2615                            le16_to_cpu(sbi->s_es->s_desc_size) -
2616                                offset);
2617
2618out:
2619        return cpu_to_le16(crc);
2620}
2621
2622int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
2623                                struct ext4_group_desc *gdp)
2624{
2625        if (ext4_has_group_desc_csum(sb) &&
2626            (gdp->bg_checksum != ext4_group_desc_csum(sb, block_group, gdp)))
2627                return 0;
2628
2629        return 1;
2630}
2631
2632void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
2633                              struct ext4_group_desc *gdp)
2634{
2635        if (!ext4_has_group_desc_csum(sb))
2636                return;
2637        gdp->bg_checksum = ext4_group_desc_csum(sb, block_group, gdp);
2638}
2639
2640/* Called at mount-time, super-block is locked */
2641static int ext4_check_descriptors(struct super_block *sb,
2642                                  ext4_fsblk_t sb_block,
2643                                  ext4_group_t *first_not_zeroed)
2644{
2645        struct ext4_sb_info *sbi = EXT4_SB(sb);
2646        ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
2647        ext4_fsblk_t last_block;
2648        ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
2649        ext4_fsblk_t block_bitmap;
2650        ext4_fsblk_t inode_bitmap;
2651        ext4_fsblk_t inode_table;
2652        int flexbg_flag = 0;
2653        ext4_group_t i, grp = sbi->s_groups_count;
2654
2655        if (ext4_has_feature_flex_bg(sb))
2656                flexbg_flag = 1;
2657
2658        ext4_debug("Checking group descriptors");
2659
2660        for (i = 0; i < sbi->s_groups_count; i++) {
2661                struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
2662
2663                if (i == sbi->s_groups_count - 1 || flexbg_flag)
2664                        last_block = ext4_blocks_count(sbi->s_es) - 1;
2665                else
2666                        last_block = first_block +
2667                                (EXT4_BLOCKS_PER_GROUP(sb) - 1);
2668
2669                if ((grp == sbi->s_groups_count) &&
2670                   !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2671                        grp = i;
2672
2673                block_bitmap = ext4_block_bitmap(sb, gdp);
2674                if (block_bitmap == sb_block) {
2675                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2676                                 "Block bitmap for group %u overlaps "
2677                                 "superblock", i);
2678                        if (!sb_rdonly(sb))
2679                                return 0;
2680                }
2681                if (block_bitmap >= sb_block + 1 &&
2682                    block_bitmap <= last_bg_block) {
2683                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2684                                 "Block bitmap for group %u overlaps "
2685                                 "block group descriptors", i);
2686                        if (!sb_rdonly(sb))
2687                                return 0;
2688                }
2689                if (block_bitmap < first_block || block_bitmap > last_block) {
2690                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2691                               "Block bitmap for group %u not in group "
2692                               "(block %llu)!", i, block_bitmap);
2693                        return 0;
2694                }
2695                inode_bitmap = ext4_inode_bitmap(sb, gdp);
2696                if (inode_bitmap == sb_block) {
2697                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2698                                 "Inode bitmap for group %u overlaps "
2699                                 "superblock", i);
2700                        if (!sb_rdonly(sb))
2701                                return 0;
2702                }
2703                if (inode_bitmap >= sb_block + 1 &&
2704                    inode_bitmap <= last_bg_block) {
2705                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2706                                 "Inode bitmap for group %u overlaps "
2707                                 "block group descriptors", i);
2708                        if (!sb_rdonly(sb))
2709                                return 0;
2710                }
2711                if (inode_bitmap < first_block || inode_bitmap > last_block) {
2712                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2713                               "Inode bitmap for group %u not in group "
2714                               "(block %llu)!", i, inode_bitmap);
2715                        return 0;
2716                }
2717                inode_table = ext4_inode_table(sb, gdp);
2718                if (inode_table == sb_block) {
2719                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2720                                 "Inode table for group %u overlaps "
2721                                 "superblock", i);
2722                        if (!sb_rdonly(sb))
2723                                return 0;
2724                }
2725                if (inode_table >= sb_block + 1 &&
2726                    inode_table <= last_bg_block) {
2727                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2728                                 "Inode table for group %u overlaps "
2729                                 "block group descriptors", i);
2730                        if (!sb_rdonly(sb))
2731                                return 0;
2732                }
2733                if (inode_table < first_block ||
2734                    inode_table + sbi->s_itb_per_group - 1 > last_block) {
2735                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2736                               "Inode table for group %u not in group "
2737                               "(block %llu)!", i, inode_table);
2738                        return 0;
2739                }
2740                ext4_lock_group(sb, i);
2741                if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
2742                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2743                                 "Checksum for group %u failed (%u!=%u)",
2744                                 i, le16_to_cpu(ext4_group_desc_csum(sb, i,
2745                                     gdp)), le16_to_cpu(gdp->bg_checksum));
2746                        if (!sb_rdonly(sb)) {
2747                                ext4_unlock_group(sb, i);
2748                                return 0;
2749                        }
2750                }
2751                ext4_unlock_group(sb, i);
2752                if (!flexbg_flag)
2753                        first_block += EXT4_BLOCKS_PER_GROUP(sb);
2754        }
2755        if (NULL != first_not_zeroed)
2756                *first_not_zeroed = grp;
2757        return 1;
2758}
2759
2760/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
2761 * the superblock) which were deleted from all directories, but held open by
2762 * a process at the time of a crash.  We walk the list and try to delete these
2763 * inodes at recovery time (only with a read-write filesystem).
2764 *
2765 * In order to keep the orphan inode chain consistent during traversal (in
2766 * case of crash during recovery), we link each inode into the superblock
2767 * orphan list_head and handle it the same way as an inode deletion during
2768 * normal operation (which journals the operations for us).
2769 *
2770 * We only do an iget() and an iput() on each inode, which is very safe if we
2771 * accidentally point at an in-use or already deleted inode.  The worst that
2772 * can happen in this case is that we get a "bit already cleared" message from
2773 * ext4_free_inode().  The only reason we would point at a wrong inode is if
2774 * e2fsck was run on this filesystem, and it must have already done the orphan
2775 * inode cleanup for us, so we can safely abort without any further action.
2776 */
2777static void ext4_orphan_cleanup(struct super_block *sb,
2778                                struct ext4_super_block *es)
2779{
2780        unsigned int s_flags = sb->s_flags;
2781        int ret, nr_orphans = 0, nr_truncates = 0;
2782#ifdef CONFIG_QUOTA
2783        int quota_update = 0;
2784        int i;
2785#endif
2786        if (!es->s_last_orphan) {
2787                jbd_debug(4, "no orphan inodes to clean up\n");
2788                return;
2789        }
2790
2791        if (bdev_read_only(sb->s_bdev)) {
2792                ext4_msg(sb, KERN_ERR, "write access "
2793                        "unavailable, skipping orphan cleanup");
2794                return;
2795        }
2796
2797        /* Check if feature set would not allow a r/w mount */
2798        if (!ext4_feature_set_ok(sb, 0)) {
2799                ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
2800                         "unknown ROCOMPAT features");
2801                return;
2802        }
2803
2804        if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2805                /* don't clear list on RO mount w/ errors */
2806                if (es->s_last_orphan && !(s_flags & SB_RDONLY)) {
2807                        ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
2808                                  "clearing orphan list.\n");
2809                        es->s_last_orphan = 0;
2810                }
2811                jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
2812                return;
2813        }
2814
2815        if (s_flags & SB_RDONLY) {
2816                ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
2817                sb->s_flags &= ~SB_RDONLY;
2818        }
2819#ifdef CONFIG_QUOTA
2820        /* Needed for iput() to work correctly and not trash data */
2821        sb->s_flags |= SB_ACTIVE;
2822
2823        /*
2824         * Turn on quotas which were not enabled for read-only mounts if
2825         * filesystem has quota feature, so that they are updated correctly.
2826         */
2827        if (ext4_has_feature_quota(sb) && (s_flags & SB_RDONLY)) {
2828                int ret = ext4_enable_quotas(sb);
2829
2830                if (!ret)
2831                        quota_update = 1;
2832                else
2833                        ext4_msg(sb, KERN_ERR,
2834                                "Cannot turn on quotas: error %d", ret);
2835        }
2836
2837        /* Turn on journaled quotas used for old sytle */
2838        for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2839                if (EXT4_SB(sb)->s_qf_names[i]) {
2840                        int ret = ext4_quota_on_mount(sb, i);
2841
2842                        if (!ret)
2843                                quota_update = 1;
2844                        else
2845                                ext4_msg(sb, KERN_ERR,
2846                                        "Cannot turn on journaled "
2847                                        "quota: type %d: error %d", i, ret);
2848                }
2849        }
2850#endif
2851
2852        while (es->s_last_orphan) {
2853                struct inode *inode;
2854
2855                /*
2856                 * We may have encountered an error during cleanup; if
2857                 * so, skip the rest.
2858                 */
2859                if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2860                        jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
2861                        es->s_last_orphan = 0;
2862                        break;
2863                }
2864
2865                inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
2866                if (IS_ERR(inode)) {
2867                        es->s_last_orphan = 0;
2868                        break;
2869                }
2870
2871                list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
2872                dquot_initialize(inode);
2873                if (inode->i_nlink) {
2874                        if (test_opt(sb, DEBUG))
2875                                ext4_msg(sb, KERN_DEBUG,
2876                                        "%s: truncating inode %lu to %lld bytes",
2877                                        __func__, inode->i_ino, inode->i_size);
2878                        jbd_debug(2, "truncating inode %lu to %lld bytes\n",
2879                                  inode->i_ino, inode->i_size);
2880                        inode_lock(inode);
2881                        truncate_inode_pages(inode->i_mapping, inode->i_size);
2882                        ret = ext4_truncate(inode);
2883                        if (ret)
2884                                ext4_std_error(inode->i_sb, ret);
2885                        inode_unlock(inode);
2886                        nr_truncates++;
2887                } else {
2888                        if (test_opt(sb, DEBUG))
2889                                ext4_msg(sb, KERN_DEBUG,
2890                                        "%s: deleting unreferenced inode %lu",
2891                                        __func__, inode->i_ino);
2892                        jbd_debug(2, "deleting unreferenced inode %lu\n",
2893                                  inode->i_ino);
2894                        nr_orphans++;
2895                }
2896                iput(inode);  /* The delete magic happens here! */
2897        }
2898
2899#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
2900
2901        if (nr_orphans)
2902                ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
2903                       PLURAL(nr_orphans));
2904        if (nr_truncates)
2905                ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
2906                       PLURAL(nr_truncates));
2907#ifdef CONFIG_QUOTA
2908        /* Turn off quotas if they were enabled for orphan cleanup */
2909        if (quota_update) {
2910                for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2911                        if (sb_dqopt(sb)->files[i])
2912                                dquot_quota_off(sb, i);
2913                }
2914        }
2915#endif
2916        sb->s_flags = s_flags; /* Restore SB_RDONLY status */
2917}
2918
2919/*
2920 * Maximal extent format file size.
2921 * Resulting logical blkno at s_maxbytes must fit in our on-disk
2922 * extent format containers, within a sector_t, and within i_blocks
2923 * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
2924 * so that won't be a limiting factor.
2925 *
2926 * However there is other limiting factor. We do store extents in the form
2927 * of starting block and length, hence the resulting length of the extent
2928 * covering maximum file size must fit into on-disk format containers as
2929 * well. Given that length is always by 1 unit bigger than max unit (because
2930 * we count 0 as well) we have to lower the s_maxbytes by one fs block.
2931 *
2932 * Note, this does *not* consider any metadata overhead for vfs i_blocks.
2933 */
2934static loff_t ext4_max_size(int blkbits, int has_huge_files)
2935{
2936        loff_t res;
2937        loff_t upper_limit = MAX_LFS_FILESIZE;
2938
2939        BUILD_BUG_ON(sizeof(blkcnt_t) < sizeof(u64));
2940
2941        if (!has_huge_files) {
2942                upper_limit = (1LL << 32) - 1;
2943
2944                /* total blocks in file system block size */
2945                upper_limit >>= (blkbits - 9);
2946                upper_limit <<= blkbits;
2947        }
2948
2949        /*
2950         * 32-bit extent-start container, ee_block. We lower the maxbytes
2951         * by one fs block, so ee_len can cover the extent of maximum file
2952         * size
2953         */
2954        res = (1LL << 32) - 1;
2955        res <<= blkbits;
2956
2957        /* Sanity check against vm- & vfs- imposed limits */
2958        if (res > upper_limit)
2959                res = upper_limit;
2960
2961        return res;
2962}
2963
2964/*
2965 * Maximal bitmap file size.  There is a direct, and {,double-,triple-}indirect
2966 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
2967 * We need to be 1 filesystem block less than the 2^48 sector limit.
2968 */
2969static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
2970{
2971        loff_t res = EXT4_NDIR_BLOCKS;
2972        int meta_blocks;
2973        loff_t upper_limit;
2974        /* This is calculated to be the largest file size for a dense, block
2975         * mapped file such that the file's total number of 512-byte sectors,
2976         * including data and all indirect blocks, does not exceed (2^48 - 1).
2977         *
2978         * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
2979         * number of 512-byte sectors of the file.
2980         */
2981
2982        if (!has_huge_files) {
2983                /*
2984                 * !has_huge_files or implies that the inode i_block field
2985                 * represents total file blocks in 2^32 512-byte sectors ==
2986                 * size of vfs inode i_blocks * 8
2987                 */
2988                upper_limit = (1LL << 32) - 1;
2989
2990                /* total blocks in file system block size */
2991                upper_limit >>= (bits - 9);
2992
2993        } else {
2994                /*
2995                 * We use 48 bit ext4_inode i_blocks
2996                 * With EXT4_HUGE_FILE_FL set the i_blocks
2997                 * represent total number of blocks in
2998                 * file system block size
2999                 */
3000                upper_limit = (1LL << 48) - 1;
3001
3002        }
3003
3004        /* indirect blocks */
3005        meta_blocks = 1;
3006        /* double indirect blocks */
3007        meta_blocks += 1 + (1LL << (bits-2));
3008        /* tripple indirect blocks */
3009        meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
3010
3011        upper_limit -= meta_blocks;
3012        upper_limit <<= bits;
3013
3014        res += 1LL << (bits-2);
3015        res += 1LL << (2*(bits-2));
3016        res += 1LL << (3*(bits-2));
3017        res <<= bits;
3018        if (res > upper_limit)
3019                res = upper_limit;
3020
3021        if (res > MAX_LFS_FILESIZE)
3022                res = MAX_LFS_FILESIZE;
3023
3024        return res;
3025}
3026
3027static ext4_fsblk_t descriptor_loc(struct super_block *sb,
3028                                   ext4_fsblk_t logical_sb_block, int nr)
3029{
3030        struct ext4_sb_info *sbi = EXT4_SB(sb);
3031        ext4_group_t bg, first_meta_bg;
3032        int has_super = 0;
3033
3034        first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
3035
3036        if (!ext4_has_feature_meta_bg(sb) || nr < first_meta_bg)
3037                return logical_sb_block + nr + 1;
3038        bg = sbi->s_desc_per_block * nr;
3039        if (ext4_bg_has_super(sb, bg))
3040                has_super = 1;
3041
3042        /*
3043         * If we have a meta_bg fs with 1k blocks, group 0's GDT is at
3044         * block 2, not 1.  If s_first_data_block == 0 (bigalloc is enabled
3045         * on modern mke2fs or blksize > 1k on older mke2fs) then we must
3046         * compensate.
3047         */
3048        if (sb->s_blocksize == 1024 && nr == 0 &&
3049            le32_to_cpu(sbi->s_es->s_first_data_block) == 0)
3050                has_super++;
3051
3052        return (has_super + ext4_group_first_block_no(sb, bg));
3053}
3054
3055/**
3056 * ext4_get_stripe_size: Get the stripe size.
3057 * @sbi: In memory super block info
3058 *
3059 * If we have specified it via mount option, then
3060 * use the mount option value. If the value specified at mount time is
3061 * greater than the blocks per group use the super block value.
3062 * If the super block value is greater than blocks per group return 0.
3063 * Allocator needs it be less than blocks per group.
3064 *
3065 */
3066static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
3067{
3068        unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
3069        unsigned long stripe_width =
3070                        le32_to_cpu(sbi->s_es->s_raid_stripe_width);
3071        int ret;
3072
3073        if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
3074                ret = sbi->s_stripe;
3075        else if (stripe_width && stripe_width <= sbi->s_blocks_per_group)
3076                ret = stripe_width;
3077        else if (stride && stride <= sbi->s_blocks_per_group)
3078                ret = stride;
3079        else
3080                ret = 0;
3081
3082        /*
3083         * If the stripe width is 1, this makes no sense and
3084         * we set it to 0 to turn off stripe handling code.
3085         */
3086        if (ret <= 1)
3087                ret = 0;
3088
3089        return ret;
3090}
3091
3092/*
3093 * Check whether this filesystem can be mounted based on
3094 * the features present and the RDONLY/RDWR mount requested.
3095 * Returns 1 if this filesystem can be mounted as requested,
3096 * 0 if it cannot be.
3097 */
3098static int ext4_feature_set_ok(struct super_block *sb, int readonly)
3099{
3100        if (ext4_has_unknown_ext4_incompat_features(sb)) {
3101                ext4_msg(sb, KERN_ERR,
3102                        "Couldn't mount because of "
3103                        "unsupported optional features (%x)",
3104                        (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
3105                        ~EXT4_FEATURE_INCOMPAT_SUPP));
3106                return 0;
3107        }
3108
3109#ifndef CONFIG_UNICODE
3110        if (ext4_has_feature_casefold(sb)) {
3111                ext4_msg(sb, KERN_ERR,
3112                         "Filesystem with casefold feature cannot be "
3113                         "mounted without CONFIG_UNICODE");
3114                return 0;
3115        }
3116#endif
3117
3118        if (readonly)
3119                return 1;
3120
3121        if (ext4_has_feature_readonly(sb)) {
3122                ext4_msg(sb, KERN_INFO, "filesystem is read-only");
3123                sb->s_flags |= SB_RDONLY;
3124                return 1;
3125        }
3126
3127        /* Check that feature set is OK for a read-write mount */
3128        if (ext4_has_unknown_ext4_ro_compat_features(sb)) {
3129                ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
3130                         "unsupported optional features (%x)",
3131                         (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
3132                                ~EXT4_FEATURE_RO_COMPAT_SUPP));
3133                return 0;
3134        }
3135        if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) {
3136                ext4_msg(sb, KERN_ERR,
3137                         "Can't support bigalloc feature without "
3138                         "extents feature\n");
3139                return 0;
3140        }
3141
3142#if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2)
3143        if (!readonly && (ext4_has_feature_quota(sb) ||
3144                          ext4_has_feature_project(sb))) {
3145                ext4_msg(sb, KERN_ERR,
3146                         "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2");
3147                return 0;
3148        }
3149#endif  /* CONFIG_QUOTA */
3150        return 1;
3151}
3152
3153/*
3154 * This function is called once a day if we have errors logged
3155 * on the file system
3156 */
3157static void print_daily_error_info(struct timer_list *t)
3158{
3159        struct ext4_sb_info *sbi = from_timer(sbi, t, s_err_report);
3160        struct super_block *sb = sbi->s_sb;
3161        struct ext4_super_block *es = sbi->s_es;
3162
3163        if (es->s_error_count)
3164                /* fsck newer than v1.41.13 is needed to clean this condition. */
3165                ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
3166                         le32_to_cpu(es->s_error_count));
3167        if (es->s_first_error_time) {
3168                printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %llu: %.*s:%d",
3169                       sb->s_id,
3170                       ext4_get_tstamp(es, s_first_error_time),
3171                       (int) sizeof(es->s_first_error_func),
3172                       es->s_first_error_func,
3173                       le32_to_cpu(es->s_first_error_line));
3174                if (es->s_first_error_ino)
3175                        printk(KERN_CONT ": inode %u",
3176                               le32_to_cpu(es->s_first_error_ino));
3177                if (es->s_first_error_block)
3178                        printk(KERN_CONT ": block %llu", (unsigned long long)
3179                               le64_to_cpu(es->s_first_error_block));
3180                printk(KERN_CONT "\n");
3181        }
3182        if (es->s_last_error_time) {
3183                printk(KERN_NOTICE "EXT4-fs (%s): last error at time %llu: %.*s:%d",
3184                       sb->s_id,
3185                       ext4_get_tstamp(es, s_last_error_time),
3186                       (int) sizeof(es->s_last_error_func),
3187                       es->s_last_error_func,
3188                       le32_to_cpu(es->s_last_error_line));
3189                if (es->s_last_error_ino)
3190                        printk(KERN_CONT ": inode %u",
3191                               le32_to_cpu(es->s_last_error_ino));
3192                if (es->s_last_error_block)
3193                        printk(KERN_CONT ": block %llu", (unsigned long long)
3194                               le64_to_cpu(es->s_last_error_block));
3195                printk(KERN_CONT "\n");
3196        }
3197        mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
3198}
3199
3200/* Find next suitable group and run ext4_init_inode_table */
3201static int ext4_run_li_request(struct ext4_li_request *elr)
3202{
3203        struct ext4_group_desc *gdp = NULL;
3204        ext4_group_t group, ngroups;
3205        struct super_block *sb;
3206        unsigned long timeout = 0;
3207        int ret = 0;
3208
3209        sb = elr->lr_super;
3210        ngroups = EXT4_SB(sb)->s_groups_count;
3211
3212        for (group = elr->lr_next_group; group < ngroups; group++) {
3213                gdp = ext4_get_group_desc(sb, group, NULL);
3214                if (!gdp) {
3215                        ret = 1;
3216                        break;
3217                }
3218
3219                if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3220                        break;
3221        }
3222
3223        if (group >= ngroups)
3224                ret = 1;
3225
3226        if (!ret) {
3227                timeout = jiffies;
3228                ret = ext4_init_inode_table(sb, group,
3229                                            elr->lr_timeout ? 0 : 1);
3230                if (elr->lr_timeout == 0) {
3231                        timeout = (jiffies - timeout) *
3232                                  elr->lr_sbi->s_li_wait_mult;
3233                        elr->lr_timeout = timeout;
3234                }
3235                elr->lr_next_sched = jiffies + elr->lr_timeout;
3236                elr->lr_next_group = group + 1;
3237        }
3238        return ret;
3239}
3240
3241/*
3242 * Remove lr_request from the list_request and free the
3243 * request structure. Should be called with li_list_mtx held
3244 */
3245static void ext4_remove_li_request(struct ext4_li_request *elr)
3246{
3247        struct ext4_sb_info *sbi;
3248
3249        if (!elr)
3250                return;
3251
3252        sbi = elr->lr_sbi;
3253
3254        list_del(&elr->lr_request);
3255        sbi->s_li_request = NULL;
3256        kfree(elr);
3257}
3258
3259static void ext4_unregister_li_request(struct super_block *sb)
3260{
3261        mutex_lock(&ext4_li_mtx);
3262        if (!ext4_li_info) {
3263                mutex_unlock(&ext4_li_mtx);
3264                return;
3265        }
3266
3267        mutex_lock(&ext4_li_info->li_list_mtx);
3268        ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
3269        mutex_unlock(&ext4_li_info->li_list_mtx);
3270        mutex_unlock(&ext4_li_mtx);
3271}
3272
3273static struct task_struct *ext4_lazyinit_task;
3274
3275/*
3276 * This is the function where ext4lazyinit thread lives. It walks
3277 * through the request list searching for next scheduled filesystem.
3278 * When such a fs is found, run the lazy initialization request
3279 * (ext4_rn_li_request) and keep track of the time spend in this
3280 * function. Based on that time we compute next schedule time of
3281 * the request. When walking through the list is complete, compute
3282 * next waking time and put itself into sleep.
3283 */
3284static int ext4_lazyinit_thread(void *arg)
3285{
3286        struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
3287        struct list_head *pos, *n;
3288        struct ext4_li_request *elr;
3289        unsigned long next_wakeup, cur;
3290
3291        BUG_ON(NULL == eli);
3292
3293cont_thread:
3294        while (true) {
3295                next_wakeup = MAX_JIFFY_OFFSET;
3296
3297                mutex_lock(&eli->li_list_mtx);
3298                if (list_empty(&eli->li_request_list)) {
3299                        mutex_unlock(&eli->li_list_mtx);
3300                        goto exit_thread;
3301                }
3302                list_for_each_safe(pos, n, &eli->li_request_list) {
3303                        int err = 0;
3304                        int progress = 0;
3305                        elr = list_entry(pos, struct ext4_li_request,
3306                                         lr_request);
3307
3308                        if (time_before(jiffies, elr->lr_next_sched)) {
3309                                if (time_before(elr->lr_next_sched, next_wakeup))
3310                                        next_wakeup = elr->lr_next_sched;
3311                                continue;
3312                        }
3313                        if (down_read_trylock(&elr->lr_super->s_umount)) {
3314                                if (sb_start_write_trylock(elr->lr_super)) {
3315                                        progress = 1;
3316                                        /*
3317                                         * We hold sb->s_umount, sb can not
3318                                         * be removed from the list, it is
3319                                         * now safe to drop li_list_mtx
3320                                         */
3321                                        mutex_unlock(&eli->li_list_mtx);
3322                                        err = ext4_run_li_request(elr);
3323                                        sb_end_write(elr->lr_super);
3324                                        mutex_lock(&eli->li_list_mtx);
3325                                        n = pos->next;
3326                                }
3327                                up_read((&elr->lr_super->s_umount));
3328                        }
3329                        /* error, remove the lazy_init job */
3330                        if (err) {
3331                                ext4_remove_li_request(elr);
3332                                continue;
3333                        }
3334                        if (!progress) {
3335                                elr->lr_next_sched = jiffies +
3336                                        (prandom_u32()
3337                                         % (EXT4_DEF_LI_MAX_START_DELAY * HZ));
3338                        }
3339                        if (time_before(elr->lr_next_sched, next_wakeup))
3340                                next_wakeup = elr->lr_next_sched;
3341                }
3342                mutex_unlock(&eli->li_list_mtx);
3343
3344                try_to_freeze();
3345
3346                cur = jiffies;
3347                if ((time_after_eq(cur, next_wakeup)) ||
3348                    (MAX_JIFFY_OFFSET == next_wakeup)) {
3349                        cond_resched();
3350                        continue;
3351                }
3352
3353                schedule_timeout_interruptible(next_wakeup - cur);
3354
3355                if (kthread_should_stop()) {
3356                        ext4_clear_request_list();
3357                        goto exit_thread;
3358                }
3359        }
3360
3361exit_thread:
3362        /*
3363         * It looks like the request list is empty, but we need
3364         * to check it under the li_list_mtx lock, to prevent any
3365         * additions into it, and of course we should lock ext4_li_mtx
3366         * to atomically free the list and ext4_li_info, because at
3367         * this point another ext4 filesystem could be registering
3368         * new one.
3369         */
3370        mutex_lock(&ext4_li_mtx);
3371        mutex_lock(&eli->li_list_mtx);
3372        if (!list_empty(&eli->li_request_list)) {
3373                mutex_unlock(&eli->li_list_mtx);
3374                mutex_unlock(&ext4_li_mtx);
3375                goto cont_thread;
3376        }
3377        mutex_unlock(&eli->li_list_mtx);
3378        kfree(ext4_li_info);
3379        ext4_li_info = NULL;
3380        mutex_unlock(&ext4_li_mtx);
3381
3382        return 0;
3383}
3384
3385static void ext4_clear_request_list(void)
3386{
3387        struct list_head *pos, *n;
3388        struct ext4_li_request *elr;
3389
3390        mutex_lock(&ext4_li_info->li_list_mtx);
3391        list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
3392                elr = list_entry(pos, struct ext4_li_request,
3393                                 lr_request);
3394                ext4_remove_li_request(elr);
3395        }
3396        mutex_unlock(&ext4_li_info->li_list_mtx);
3397}
3398
3399static int ext4_run_lazyinit_thread(void)
3400{
3401        ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
3402                                         ext4_li_info, "ext4lazyinit");
3403        if (IS_ERR(ext4_lazyinit_task)) {
3404                int err = PTR_ERR(ext4_lazyinit_task);
3405                ext4_clear_request_list();
3406                kfree(ext4_li_info);
3407                ext4_li_info = NULL;
3408                printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
3409                                 "initialization thread\n",
3410                                 err);
3411                return err;
3412        }
3413        ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
3414        return 0;
3415}
3416
3417/*
3418 * Check whether it make sense to run itable init. thread or not.
3419 * If there is at least one uninitialized inode table, return
3420 * corresponding group number, else the loop goes through all
3421 * groups and return total number of groups.
3422 */
3423static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
3424{
3425        ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
3426        struct ext4_group_desc *gdp = NULL;
3427
3428        if (!ext4_has_group_desc_csum(sb))
3429                return ngroups;
3430
3431        for (group = 0; group < ngroups; group++) {
3432                gdp = ext4_get_group_desc(sb, group, NULL);
3433                if (!gdp)
3434                        continue;
3435
3436                if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3437                        break;
3438        }
3439
3440        return group;
3441}
3442
3443static int ext4_li_info_new(void)
3444{
3445        struct ext4_lazy_init *eli = NULL;
3446
3447        eli = kzalloc(sizeof(*eli), GFP_KERNEL);
3448        if (!eli)
3449                return -ENOMEM;
3450
3451        INIT_LIST_HEAD(&eli->li_request_list);
3452        mutex_init(&eli->li_list_mtx);
3453
3454        eli->li_state |= EXT4_LAZYINIT_QUIT;
3455
3456        ext4_li_info = eli;
3457
3458        return 0;
3459}
3460
3461static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
3462                                            ext4_group_t start)
3463{
3464        struct ext4_sb_info *sbi = EXT4_SB(sb);
3465        struct ext4_li_request *elr;
3466
3467        elr = kzalloc(sizeof(*elr), GFP_KERNEL);
3468        if (!elr)
3469                return NULL;
3470
3471        elr->lr_super = sb;
3472        elr->lr_sbi = sbi;
3473        elr->lr_next_group = start;
3474
3475        /*
3476         * Randomize first schedule time of the request to
3477         * spread the inode table initialization requests
3478         * better.
3479         */
3480        elr->lr_next_sched = jiffies + (prandom_u32() %
3481                                (EXT4_DEF_LI_MAX_START_DELAY * HZ));
3482        return elr;
3483}
3484
3485int ext4_register_li_request(struct super_block *sb,
3486                             ext4_group_t first_not_zeroed)
3487{
3488        struct ext4_sb_info *sbi = EXT4_SB(sb);
3489        struct ext4_li_request *elr = NULL;
3490        ext4_group_t ngroups = sbi->s_groups_count;
3491        int ret = 0;
3492
3493        mutex_lock(&ext4_li_mtx);
3494        if (sbi->s_li_request != NULL) {
3495                /*
3496                 * Reset timeout so it can be computed again, because
3497                 * s_li_wait_mult might have changed.
3498                 */
3499                sbi->s_li_request->lr_timeout = 0;
3500                goto out;
3501        }
3502
3503        if (first_not_zeroed == ngroups || sb_rdonly(sb) ||
3504            !test_opt(sb, INIT_INODE_TABLE))
3505                goto out;
3506
3507        elr = ext4_li_request_new(sb, first_not_zeroed);
3508        if (!elr) {
3509                ret = -ENOMEM;
3510                goto out;
3511        }
3512
3513        if (NULL == ext4_li_info) {
3514                ret = ext4_li_info_new();
3515                if (ret)
3516                        goto out;
3517        }
3518
3519        mutex_lock(&ext4_li_info->li_list_mtx);
3520        list_add(&elr->lr_request, &ext4_li_info->li_request_list);
3521        mutex_unlock(&ext4_li_info->li_list_mtx);
3522
3523        sbi->s_li_request = elr;
3524        /*
3525         * set elr to NULL here since it has been inserted to
3526         * the request_list and the removal and free of it is
3527         * handled by ext4_clear_request_list from now on.
3528         */
3529        elr = NULL;
3530
3531        if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
3532                ret = ext4_run_lazyinit_thread();
3533                if (ret)
3534                        goto out;
3535        }
3536out:
3537        mutex_unlock(&ext4_li_mtx);
3538        if (ret)
3539                kfree(elr);
3540        return ret;
3541}
3542
3543/*
3544 * We do not need to lock anything since this is called on
3545 * module unload.
3546 */
3547static void ext4_destroy_lazyinit_thread(void)
3548{
3549        /*
3550         * If thread exited earlier
3551         * there's nothing to be done.
3552         */
3553        if (!ext4_li_info || !ext4_lazyinit_task)
3554                return;
3555
3556        kthread_stop(ext4_lazyinit_task);
3557}
3558
3559static int set_journal_csum_feature_set(struct super_block *sb)
3560{
3561        int ret = 1;
3562        int compat, incompat;
3563        struct ext4_sb_info *sbi = EXT4_SB(sb);
3564
3565        if (ext4_has_metadata_csum(sb)) {
3566                /* journal checksum v3 */
3567                compat = 0;
3568                incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
3569        } else {
3570                /* journal checksum v1 */
3571                compat = JBD2_FEATURE_COMPAT_CHECKSUM;
3572                incompat = 0;
3573        }
3574
3575        jbd2_journal_clear_features(sbi->s_journal,
3576                        JBD2_FEATURE_COMPAT_CHECKSUM, 0,
3577                        JBD2_FEATURE_INCOMPAT_CSUM_V3 |
3578                        JBD2_FEATURE_INCOMPAT_CSUM_V2);
3579        if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
3580                ret = jbd2_journal_set_features(sbi->s_journal,
3581                                compat, 0,
3582                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
3583                                incompat);
3584        } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
3585                ret = jbd2_journal_set_features(sbi->s_journal,
3586                                compat, 0,
3587                                incompat);
3588                jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3589                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3590        } else {
3591                jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3592                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3593        }
3594
3595        return ret;
3596}
3597
3598/*
3599 * Note: calculating the overhead so we can be compatible with
3600 * historical BSD practice is quite difficult in the face of
3601 * clusters/bigalloc.  This is because multiple metadata blocks from
3602 * different block group can end up in the same allocation cluster.
3603 * Calculating the exact overhead in the face of clustered allocation
3604 * requires either O(all block bitmaps) in memory or O(number of block
3605 * groups**2) in time.  We will still calculate the superblock for
3606 * older file systems --- and if we come across with a bigalloc file
3607 * system with zero in s_overhead_clusters the estimate will be close to
3608 * correct especially for very large cluster sizes --- but for newer
3609 * file systems, it's better to calculate this figure once at mkfs
3610 * time, and store it in the superblock.  If the superblock value is
3611 * present (even for non-bigalloc file systems), we will use it.
3612 */
3613static int count_overhead(struct super_block *sb, ext4_group_t grp,
3614                          char *buf)
3615{
3616        struct ext4_sb_info     *sbi = EXT4_SB(sb);
3617        struct ext4_group_desc  *gdp;
3618        ext4_fsblk_t            first_block, last_block, b;
3619        ext4_group_t            i, ngroups = ext4_get_groups_count(sb);
3620        int                     s, j, count = 0;
3621
3622        if (!ext4_has_feature_bigalloc(sb))
3623                return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
3624                        sbi->s_itb_per_group + 2);
3625
3626        first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
3627                (grp * EXT4_BLOCKS_PER_GROUP(sb));
3628        last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
3629        for (i = 0; i < ngroups; i++) {
3630                gdp = ext4_get_group_desc(sb, i, NULL);
3631                b = ext4_block_bitmap(sb, gdp);
3632                if (b >= first_block && b <= last_block) {
3633                        ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3634                        count++;
3635                }
3636                b = ext4_inode_bitmap(sb, gdp);
3637                if (b >= first_block && b <= last_block) {
3638                        ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3639                        count++;
3640                }
3641                b = ext4_inode_table(sb, gdp);
3642                if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
3643                        for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
3644                                int c = EXT4_B2C(sbi, b - first_block);
3645                                ext4_set_bit(c, buf);
3646                                count++;
3647                        }
3648                if (i != grp)
3649                        continue;
3650                s = 0;
3651                if (ext4_bg_has_super(sb, grp)) {
3652                        ext4_set_bit(s++, buf);
3653                        count++;
3654                }
3655                j = ext4_bg_num_gdb(sb, grp);
3656                if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
3657                        ext4_error(sb, "Invalid number of block group "
3658                                   "descriptor blocks: %d", j);
3659                        j = EXT4_BLOCKS_PER_GROUP(sb) - s;
3660                }
3661                count += j;
3662                for (; j > 0; j--)
3663                        ext4_set_bit(EXT4_B2C(sbi, s++), buf);
3664        }
3665        if (!count)
3666                return 0;
3667        return EXT4_CLUSTERS_PER_GROUP(sb) -
3668                ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
3669}
3670
3671/*
3672 * Compute the overhead and stash it in sbi->s_overhead
3673 */
3674int ext4_calculate_overhead(struct super_block *sb)
3675{
3676        struct ext4_sb_info *sbi = EXT4_SB(sb);
3677        struct ext4_super_block *es = sbi->s_es;
3678        struct inode *j_inode;
3679        unsigned int j_blocks, j_inum = le32_to_cpu(es->s_journal_inum);
3680        ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3681        ext4_fsblk_t overhead = 0;
3682        char *buf = (char *) get_zeroed_page(GFP_NOFS);
3683
3684        if (!buf)
3685                return -ENOMEM;
3686
3687        /*
3688         * Compute the overhead (FS structures).  This is constant
3689         * for a given filesystem unless the number of block groups
3690         * changes so we cache the previous value until it does.
3691         */
3692
3693        /*
3694         * All of the blocks before first_data_block are overhead
3695         */
3696        overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
3697
3698        /*
3699         * Add the overhead found in each block group
3700         */
3701        for (i = 0; i < ngroups; i++) {
3702                int blks;
3703
3704                blks = count_overhead(sb, i, buf);
3705                overhead += blks;
3706                if (blks)
3707                        memset(buf, 0, PAGE_SIZE);
3708                cond_resched();
3709        }
3710
3711        /*
3712         * Add the internal journal blocks whether the journal has been
3713         * loaded or not
3714         */
3715        if (sbi->s_journal && !sbi->journal_bdev)
3716                overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
3717        else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
3718                /* j_inum for internal journal is non-zero */
3719                j_inode = ext4_get_journal_inode(sb, j_inum);
3720                if (j_inode) {
3721                        j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
3722                        overhead += EXT4_NUM_B2C(sbi, j_blocks);
3723                        iput(j_inode);
3724                } else {
3725                        ext4_msg(sb, KERN_ERR, "can't get journal size");
3726                }
3727        }
3728        sbi->s_overhead = overhead;
3729        smp_wmb();
3730        free_page((unsigned long) buf);
3731        return 0;
3732}
3733
3734static void ext4_set_resv_clusters(struct super_block *sb)
3735{
3736        ext4_fsblk_t resv_clusters;
3737        struct ext4_sb_info *sbi = EXT4_SB(sb);
3738
3739        /*
3740         * There's no need to reserve anything when we aren't using extents.
3741         * The space estimates are exact, there are no unwritten extents,
3742         * hole punching doesn't need new metadata... This is needed especially
3743         * to keep ext2/3 backward compatibility.
3744         */
3745        if (!ext4_has_feature_extents(sb))
3746                return;
3747        /*
3748         * By default we reserve 2% or 4096 clusters, whichever is smaller.
3749         * This should cover the situations where we can not afford to run
3750         * out of space like for example punch hole, or converting
3751         * unwritten extents in delalloc path. In most cases such
3752         * allocation would require 1, or 2 blocks, higher numbers are
3753         * very rare.
3754         */
3755        resv_clusters = (ext4_blocks_count(sbi->s_es) >>
3756                         sbi->s_cluster_bits);
3757
3758        do_div(resv_clusters, 50);
3759        resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
3760
3761        atomic64_set(&sbi->s_resv_clusters, resv_clusters);
3762}
3763
3764static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3765{
3766        struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
3767        char *orig_data = kstrdup(data, GFP_KERNEL);
3768        struct buffer_head *bh, **group_desc;
3769        struct ext4_super_block *es = NULL;
3770        struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
3771        struct flex_groups **flex_groups;
3772        ext4_fsblk_t block;
3773        ext4_fsblk_t sb_block = get_sb_block(&data);
3774        ext4_fsblk_t logical_sb_block;
3775        unsigned long offset = 0;
3776        unsigned long journal_devnum = 0;
3777        unsigned long def_mount_opts;
3778        struct inode *root;
3779        const char *descr;
3780        int ret = -ENOMEM;
3781        int blocksize, clustersize;
3782        unsigned int db_count;
3783        unsigned int i;
3784        int needs_recovery, has_huge_files;
3785        __u64 blocks_count;
3786        int err = 0;
3787        unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
3788        ext4_group_t first_not_zeroed;
3789
3790        if ((data && !orig_data) || !sbi)
3791                goto out_free_base;
3792
3793        sbi->s_daxdev = dax_dev;
3794        sbi->s_blockgroup_lock =
3795                kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
3796        if (!sbi->s_blockgroup_lock)
3797                goto out_free_base;
3798
3799        sb->s_fs_info = sbi;
3800        sbi->s_sb = sb;
3801        sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
3802        sbi->s_sb_block = sb_block;
3803        if (sb->s_bdev->bd_part)
3804                sbi->s_sectors_written_start =
3805                        part_stat_read(sb->s_bdev->bd_part, sectors[STAT_WRITE]);
3806
3807        /* Cleanup superblock name */
3808        strreplace(sb->s_id, '/', '!');
3809
3810        /* -EINVAL is default */
3811        ret = -EINVAL;
3812        blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
3813        if (!blocksize) {
3814                ext4_msg(sb, KERN_ERR, "unable to set blocksize");
3815                goto out_fail;
3816        }
3817
3818        /*
3819         * The ext4 superblock will not be buffer aligned for other than 1kB
3820         * block sizes.  We need to calculate the offset from buffer start.
3821         */
3822        if (blocksize != EXT4_MIN_BLOCK_SIZE) {
3823                logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
3824                offset = do_div(logical_sb_block, blocksize);
3825        } else {
3826                logical_sb_block = sb_block;
3827        }
3828
3829        if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) {
3830                ext4_msg(sb, KERN_ERR, "unable to read superblock");
3831                goto out_fail;
3832        }
3833        /*
3834         * Note: s_es must be initialized as soon as possible because
3835         *       some ext4 macro-instructions depend on its value
3836         */
3837        es = (struct ext4_super_block *) (bh->b_data + offset);
3838        sbi->s_es = es;
3839        sb->s_magic = le16_to_cpu(es->s_magic);
3840        if (sb->s_magic != EXT4_SUPER_MAGIC)
3841                goto cantfind_ext4;
3842        sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
3843
3844        /* Warn if metadata_csum and gdt_csum are both set. */
3845        if (ext4_has_feature_metadata_csum(sb) &&
3846            ext4_has_feature_gdt_csum(sb))
3847                ext4_warning(sb, "metadata_csum and uninit_bg are "
3848                             "redundant flags; please run fsck.");
3849
3850        /* Check for a known checksum algorithm */
3851        if (!ext4_verify_csum_type(sb, es)) {
3852                ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3853                         "unknown checksum algorithm.");
3854                silent = 1;
3855                goto cantfind_ext4;
3856        }
3857
3858        /* Load the checksum driver */
3859        sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
3860        if (IS_ERR(sbi->s_chksum_driver)) {
3861                ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
3862                ret = PTR_ERR(sbi->s_chksum_driver);
3863                sbi->s_chksum_driver = NULL;
3864                goto failed_mount;
3865        }
3866
3867        /* Check superblock checksum */
3868        if (!ext4_superblock_csum_verify(sb, es)) {
3869                ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3870                         "invalid superblock checksum.  Run e2fsck?");
3871                silent = 1;
3872                ret = -EFSBADCRC;
3873                goto cantfind_ext4;
3874        }
3875
3876        /* Precompute checksum seed for all metadata */
3877        if (ext4_has_feature_csum_seed(sb))
3878                sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
3879        else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb))
3880                sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
3881                                               sizeof(es->s_uuid));
3882
3883        /* Set defaults before we parse the mount options */
3884        def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
3885        set_opt(sb, INIT_INODE_TABLE);
3886        if (def_mount_opts & EXT4_DEFM_DEBUG)
3887                set_opt(sb, DEBUG);
3888        if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
3889                set_opt(sb, GRPID);
3890        if (def_mount_opts & EXT4_DEFM_UID16)
3891                set_opt(sb, NO_UID32);
3892        /* xattr user namespace & acls are now defaulted on */
3893        set_opt(sb, XATTR_USER);
3894#ifdef CONFIG_EXT4_FS_POSIX_ACL
3895        set_opt(sb, POSIX_ACL);
3896#endif
3897        /* don't forget to enable journal_csum when metadata_csum is enabled. */
3898        if (ext4_has_metadata_csum(sb))
3899                set_opt(sb, JOURNAL_CHECKSUM);
3900
3901        if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
3902                set_opt(sb, JOURNAL_DATA);
3903        else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
3904                set_opt(sb, ORDERED_DATA);
3905        else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
3906                set_opt(sb, WRITEBACK_DATA);
3907
3908        if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
3909                set_opt(sb, ERRORS_PANIC);
3910        else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
3911                set_opt(sb, ERRORS_CONT);
3912        else
3913                set_opt(sb, ERRORS_RO);
3914        /* block_validity enabled by default; disable with noblock_validity */
3915        set_opt(sb, BLOCK_VALIDITY);
3916        if (def_mount_opts & EXT4_DEFM_DISCARD)
3917                set_opt(sb, DISCARD);
3918
3919        sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
3920        sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
3921        sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
3922        sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
3923        sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
3924
3925        if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
3926                set_opt(sb, BARRIER);
3927
3928        /*
3929         * enable delayed allocation by default
3930         * Use -o nodelalloc to turn it off
3931         */
3932        if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
3933            ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
3934                set_opt(sb, DELALLOC);
3935
3936        /*
3937         * set default s_li_wait_mult for lazyinit, for the case there is
3938         * no mount option specified.
3939         */
3940        sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
3941
3942        blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
3943
3944        if (blocksize == PAGE_SIZE)
3945                set_opt(sb, DIOREAD_NOLOCK);
3946
3947        if (blocksize < EXT4_MIN_BLOCK_SIZE ||
3948            blocksize > EXT4_MAX_BLOCK_SIZE) {
3949                ext4_msg(sb, KERN_ERR,
3950                       "Unsupported filesystem blocksize %d (%d log_block_size)",
3951                         blocksize, le32_to_cpu(es->s_log_block_size));
3952                goto failed_mount;
3953        }
3954
3955        if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
3956                sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
3957                sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
3958        } else {
3959                sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
3960                sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
3961                if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
3962                        ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
3963                                 sbi->s_first_ino);
3964                        goto failed_mount;
3965                }
3966                if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
3967                    (!is_power_of_2(sbi->s_inode_size)) ||
3968                    (sbi->s_inode_size > blocksize)) {
3969                        ext4_msg(sb, KERN_ERR,
3970                               "unsupported inode size: %d",
3971                               sbi->s_inode_size);
3972                        ext4_msg(sb, KERN_ERR, "blocksize: %d", blocksize);
3973                        goto failed_mount;
3974                }
3975                /*
3976                 * i_atime_extra is the last extra field available for
3977                 * [acm]times in struct ext4_inode. Checking for that
3978                 * field should suffice to ensure we have extra space
3979                 * for all three.
3980                 */
3981                if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) +
3982                        sizeof(((struct ext4_inode *)0)->i_atime_extra)) {
3983                        sb->s_time_gran = 1;
3984                        sb->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
3985                } else {
3986                        sb->s_time_gran = NSEC_PER_SEC;
3987                        sb->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
3988                }
3989                sb->s_time_min = EXT4_TIMESTAMP_MIN;
3990        }
3991        if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
3992                sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
3993                        EXT4_GOOD_OLD_INODE_SIZE;
3994                if (ext4_has_feature_extra_isize(sb)) {
3995                        unsigned v, max = (sbi->s_inode_size -
3996                                           EXT4_GOOD_OLD_INODE_SIZE);
3997
3998                        v = le16_to_cpu(es->s_want_extra_isize);
3999                        if (v > max) {
4000                                ext4_msg(sb, KERN_ERR,
4001                                         "bad s_want_extra_isize: %d", v);
4002                                goto failed_mount;
4003                        }
4004                        if (sbi->s_want_extra_isize < v)
4005                                sbi->s_want_extra_isize = v;
4006
4007                        v = le16_to_cpu(es->s_min_extra_isize);
4008                        if (v > max) {
4009                                ext4_msg(sb, KERN_ERR,
4010                                         "bad s_min_extra_isize: %d", v);
4011                                goto failed_mount;
4012                        }
4013                        if (sbi->s_want_extra_isize < v)
4014                                sbi->s_want_extra_isize = v;
4015                }
4016        }
4017
4018        if (sbi->s_es->s_mount_opts[0]) {
4019                char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
4020                                              sizeof(sbi->s_es->s_mount_opts),
4021                                              GFP_KERNEL);
4022                if (!s_mount_opts)
4023                        goto failed_mount;
4024                if (!parse_options(s_mount_opts, sb, &journal_devnum,
4025                                   &journal_ioprio, 0)) {
4026                        ext4_msg(sb, KERN_WARNING,
4027                                 "failed to parse options in superblock: %s",
4028                                 s_mount_opts);
4029                }
4030                kfree(s_mount_opts);
4031        }
4032        sbi->s_def_mount_opt = sbi->s_mount_opt;
4033        if (!parse_options((char *) data, sb, &journal_devnum,
4034                           &journal_ioprio, 0))
4035                goto failed_mount;
4036
4037#ifdef CONFIG_UNICODE
4038        if (ext4_has_feature_casefold(sb) && !sbi->s_encoding) {
4039                const struct ext4_sb_encodings *encoding_info;
4040                struct unicode_map *encoding;
4041                __u16 encoding_flags;
4042
4043                if (ext4_has_feature_encrypt(sb)) {
4044                        ext4_msg(sb, KERN_ERR,
4045                                 "Can't mount with encoding and encryption");
4046                        goto failed_mount;
4047                }
4048
4049                if (ext4_sb_read_encoding(es, &encoding_info,
4050                                          &encoding_flags)) {
4051                        ext4_msg(sb, KERN_ERR,
4052                                 "Encoding requested by superblock is unknown");
4053                        goto failed_mount;
4054                }
4055
4056                encoding = utf8_load(encoding_info->version);
4057                if (IS_ERR(encoding)) {
4058                        ext4_msg(sb, KERN_ERR,
4059                                 "can't mount with superblock charset: %s-%s "
4060                                 "not supported by the kernel. flags: 0x%x.",
4061                                 encoding_info->name, encoding_info->version,
4062                                 encoding_flags);
4063                        goto failed_mount;
4064                }
4065                ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
4066                         "%s-%s with flags 0x%hx", encoding_info->name,
4067                         encoding_info->version?:"\b", encoding_flags);
4068
4069                sbi->s_encoding = encoding;
4070                sbi->s_encoding_flags = encoding_flags;
4071        }
4072#endif
4073
4074        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
4075                printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, and O_DIRECT support!\n");
4076                /* can't mount with both data=journal and dioread_nolock. */
4077                clear_opt(sb, DIOREAD_NOLOCK);
4078                if (test_opt2(sb, EXPLICIT_DELALLOC)) {
4079                        ext4_msg(sb, KERN_ERR, "can't mount with "
4080                                 "both data=journal and delalloc");
4081                        goto failed_mount;
4082                }
4083                if (test_opt(sb, DAX_ALWAYS)) {
4084                        ext4_msg(sb, KERN_ERR, "can't mount with "
4085                                 "both data=journal and dax");
4086                        goto failed_mount;
4087                }
4088                if (ext4_has_feature_encrypt(sb)) {
4089                        ext4_msg(sb, KERN_WARNING,
4090                                 "encrypted files will use data=ordered "
4091                                 "instead of data journaling mode");
4092                }
4093                if (test_opt(sb, DELALLOC))
4094                        clear_opt(sb, DELALLOC);
4095        } else {
4096                sb->s_iflags |= SB_I_CGROUPWB;
4097        }
4098
4099        sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
4100                (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
4101
4102        if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
4103            (ext4_has_compat_features(sb) ||
4104             ext4_has_ro_compat_features(sb) ||
4105             ext4_has_incompat_features(sb)))
4106                ext4_msg(sb, KERN_WARNING,
4107                       "feature flags set on rev 0 fs, "
4108                       "running e2fsck is recommended");
4109
4110        if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
4111                set_opt2(sb, HURD_COMPAT);
4112                if (ext4_has_feature_64bit(sb)) {
4113                        ext4_msg(sb, KERN_ERR,
4114                                 "The Hurd can't support 64-bit file systems");
4115                        goto failed_mount;
4116                }
4117
4118                /*
4119                 * ea_inode feature uses l_i_version field which is not
4120                 * available in HURD_COMPAT mode.
4121                 */
4122                if (ext4_has_feature_ea_inode(sb)) {
4123                        ext4_msg(sb, KERN_ERR,
4124                                 "ea_inode feature is not supported for Hurd");
4125                        goto failed_mount;
4126                }
4127        }
4128
4129        if (IS_EXT2_SB(sb)) {
4130                if (ext2_feature_set_ok(sb))
4131                        ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
4132                                 "using the ext4 subsystem");
4133                else {
4134                        /*
4135                         * If we're probing be silent, if this looks like
4136                         * it's actually an ext[34] filesystem.
4137                         */
4138                        if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4139                                goto failed_mount;
4140                        ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
4141                                 "to feature incompatibilities");
4142                        goto failed_mount;
4143                }
4144        }
4145
4146        if (IS_EXT3_SB(sb)) {
4147                if (ext3_feature_set_ok(sb))
4148                        ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
4149                                 "using the ext4 subsystem");
4150                else {
4151                        /*
4152                         * If we're probing be silent, if this looks like
4153                         * it's actually an ext4 filesystem.
4154                         */
4155                        if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4156                                goto failed_mount;
4157                        ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
4158                                 "to feature incompatibilities");
4159                        goto failed_mount;
4160                }
4161        }
4162
4163        /*
4164         * Check feature flags regardless of the revision level, since we
4165         * previously didn't change the revision level when setting the flags,
4166         * so there is a chance incompat flags are set on a rev 0 filesystem.
4167         */
4168        if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
4169                goto failed_mount;
4170
4171        if (le32_to_cpu(es->s_log_block_size) >
4172            (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
4173                ext4_msg(sb, KERN_ERR,
4174                         "Invalid log block size: %u",
4175                         le32_to_cpu(es->s_log_block_size));
4176                goto failed_mount;
4177        }
4178        if (le32_to_cpu(es->s_log_cluster_size) >
4179            (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
4180                ext4_msg(sb, KERN_ERR,
4181                         "Invalid log cluster size: %u",
4182                         le32_to_cpu(es->s_log_cluster_size));
4183                goto failed_mount;
4184        }
4185
4186        if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
4187                ext4_msg(sb, KERN_ERR,
4188                         "Number of reserved GDT blocks insanely large: %d",
4189                         le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
4190                goto failed_mount;
4191        }
4192
4193        if (bdev_dax_supported(sb->s_bdev, blocksize))
4194                set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
4195
4196        if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
4197                if (ext4_has_feature_inline_data(sb)) {
4198                        ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
4199                                        " that may contain inline data");
4200                        goto failed_mount;
4201                }
4202                if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) {
4203                        ext4_msg(sb, KERN_ERR,
4204                                "DAX unsupported by block device.");
4205                        goto failed_mount;
4206                }
4207        }
4208
4209        if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
4210                ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
4211                         es->s_encryption_level);
4212                goto failed_mount;
4213        }
4214
4215        if (sb->s_blocksize != blocksize) {
4216                /* Validate the filesystem blocksize */
4217                if (!sb_set_blocksize(sb, blocksize)) {
4218                        ext4_msg(sb, KERN_ERR, "bad block size %d",
4219                                        blocksize);
4220                        goto failed_mount;
4221                }
4222
4223                brelse(bh);
4224                logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
4225                offset = do_div(logical_sb_block, blocksize);
4226                bh = sb_bread_unmovable(sb, logical_sb_block);
4227                if (!bh) {
4228                        ext4_msg(sb, KERN_ERR,
4229                               "Can't read superblock on 2nd try");
4230                        goto failed_mount;
4231                }
4232                es = (struct ext4_super_block *)(bh->b_data + offset);
4233                sbi->s_es = es;
4234                if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
4235                        ext4_msg(sb, KERN_ERR,
4236                               "Magic mismatch, very weird!");
4237                        goto failed_mount;
4238                }
4239        }
4240
4241        has_huge_files = ext4_has_feature_huge_file(sb);
4242        sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
4243                                                      has_huge_files);
4244        sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
4245
4246        sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
4247        if (ext4_has_feature_64bit(sb)) {
4248                if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
4249                    sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
4250                    !is_power_of_2(sbi->s_desc_size)) {
4251                        ext4_msg(sb, KERN_ERR,
4252                               "unsupported descriptor size %lu",
4253                               sbi->s_desc_size);
4254                        goto failed_mount;
4255                }
4256        } else
4257                sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
4258
4259        sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
4260        sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
4261
4262        sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
4263        if (sbi->s_inodes_per_block == 0)
4264                goto cantfind_ext4;
4265        if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
4266            sbi->s_inodes_per_group > blocksize * 8) {
4267                ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
4268                         sbi->s_inodes_per_group);
4269                goto failed_mount;
4270        }
4271        sbi->s_itb_per_group = sbi->s_inodes_per_group /
4272                                        sbi->s_inodes_per_block;
4273        sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
4274        sbi->s_sbh = bh;
4275        sbi->s_mount_state = le16_to_cpu(es->s_state);
4276        sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
4277        sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
4278
4279        for (i = 0; i < 4; i++)
4280                sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
4281        sbi->s_def_hash_version = es->s_def_hash_version;
4282        if (ext4_has_feature_dir_index(sb)) {
4283                i = le32_to_cpu(es->s_flags);
4284                if (i & EXT2_FLAGS_UNSIGNED_HASH)
4285                        sbi->s_hash_unsigned = 3;
4286                else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
4287#ifdef __CHAR_UNSIGNED__
4288                        if (!sb_rdonly(sb))
4289                                es->s_flags |=
4290                                        cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
4291                        sbi->s_hash_unsigned = 3;
4292#else
4293                        if (!sb_rdonly(sb))
4294                                es->s_flags |=
4295                                        cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
4296#endif
4297                }
4298        }
4299
4300        /* Handle clustersize */
4301        clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
4302        if (ext4_has_feature_bigalloc(sb)) {
4303                if (clustersize < blocksize) {
4304                        ext4_msg(sb, KERN_ERR,
4305                                 "cluster size (%d) smaller than "
4306                                 "block size (%d)", clustersize, blocksize);
4307                        goto failed_mount;
4308                }
4309                sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
4310                        le32_to_cpu(es->s_log_block_size);
4311                sbi->s_clusters_per_group =
4312                        le32_to_cpu(es->s_clusters_per_group);
4313                if (sbi->s_clusters_per_group > blocksize * 8) {
4314                        ext4_msg(sb, KERN_ERR,
4315                                 "#clusters per group too big: %lu",
4316                                 sbi->s_clusters_per_group);
4317                        goto failed_mount;
4318                }
4319                if (sbi->s_blocks_per_group !=
4320                    (sbi->s_clusters_per_group * (clustersize / blocksize))) {
4321                        ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
4322                                 "clusters per group (%lu) inconsistent",
4323                                 sbi->s_blocks_per_group,
4324                                 sbi->s_clusters_per_group);
4325                        goto failed_mount;
4326                }
4327        } else {
4328                if (clustersize != blocksize) {
4329                        ext4_msg(sb, KERN_ERR,
4330                                 "fragment/cluster size (%d) != "
4331                                 "block size (%d)", clustersize, blocksize);
4332                        goto failed_mount;
4333                }
4334                if (sbi->s_blocks_per_group > blocksize * 8) {
4335                        ext4_msg(sb, KERN_ERR,
4336                                 "#blocks per group too big: %lu",
4337                                 sbi->s_blocks_per_group);
4338                        goto failed_mount;
4339                }
4340                sbi->s_clusters_per_group = sbi->s_blocks_per_group;
4341                sbi->s_cluster_bits = 0;
4342        }
4343        sbi->s_cluster_ratio = clustersize / blocksize;
4344
4345        /* Do we have standard group size of clustersize * 8 blocks ? */
4346        if (sbi->s_blocks_per_group == clustersize << 3)
4347                set_opt2(sb, STD_GROUP_SIZE);
4348
4349        /*
4350         * Test whether we have more sectors than will fit in sector_t,
4351         * and whether the max offset is addressable by the page cache.
4352         */
4353        err = generic_check_addressable(sb->s_blocksize_bits,
4354                                        ext4_blocks_count(es));
4355        if (err) {
4356                ext4_msg(sb, KERN_ERR, "filesystem"
4357                         " too large to mount safely on this system");
4358                goto failed_mount;
4359        }
4360
4361        if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
4362                goto cantfind_ext4;
4363
4364        /* check blocks count against device size */
4365        blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
4366        if (blocks_count && ext4_blocks_count(es) > blocks_count) {
4367                ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
4368                       "exceeds size of device (%llu blocks)",
4369                       ext4_blocks_count(es), blocks_count);
4370                goto failed_mount;
4371        }
4372
4373        /*
4374         * It makes no sense for the first data block to be beyond the end
4375         * of the filesystem.
4376         */
4377        if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
4378                ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4379                         "block %u is beyond end of filesystem (%llu)",
4380                         le32_to_cpu(es->s_first_data_block),
4381                         ext4_blocks_count(es));
4382                goto failed_mount;
4383        }
4384        if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
4385            (sbi->s_cluster_ratio == 1)) {
4386                ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4387                         "block is 0 with a 1k block and cluster size");
4388                goto failed_mount;
4389        }
4390
4391        blocks_count = (ext4_blocks_count(es) -
4392                        le32_to_cpu(es->s_first_data_block) +
4393                        EXT4_BLOCKS_PER_GROUP(sb) - 1);
4394        do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
4395        if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
4396                ext4_msg(sb, KERN_WARNING, "groups count too large: %llu "
4397                       "(block count %llu, first data block %u, "
4398                       "blocks per group %lu)", blocks_count,
4399                       ext4_blocks_count(es),
4400                       le32_to_cpu(es->s_first_data_block),
4401                       EXT4_BLOCKS_PER_GROUP(sb));
4402                goto failed_mount;
4403        }
4404        sbi->s_groups_count = blocks_count;
4405        sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
4406                        (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
4407        if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
4408            le32_to_cpu(es->s_inodes_count)) {
4409                ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
4410                         le32_to_cpu(es->s_inodes_count),
4411                         ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
4412                ret = -EINVAL;
4413                goto failed_mount;
4414        }
4415        db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
4416                   EXT4_DESC_PER_BLOCK(sb);
4417        if (ext4_has_feature_meta_bg(sb)) {
4418                if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
4419                        ext4_msg(sb, KERN_WARNING,
4420                                 "first meta block group too large: %u "
4421                                 "(group descriptor block count %u)",
4422                                 le32_to_cpu(es->s_first_meta_bg), db_count);
4423                        goto failed_mount;
4424                }
4425        }
4426        rcu_assign_pointer(sbi->s_group_desc,
4427                           kvmalloc_array(db_count,
4428                                          sizeof(struct buffer_head *),
4429                                          GFP_KERNEL));
4430        if (sbi->s_group_desc == NULL) {
4431                ext4_msg(sb, KERN_ERR, "not enough memory");
4432                ret = -ENOMEM;
4433                goto failed_mount;
4434        }
4435
4436        bgl_lock_init(sbi->s_blockgroup_lock);
4437
4438        /* Pre-read the descriptors into the buffer cache */
4439        for (i = 0; i < db_count; i++) {
4440                block = descriptor_loc(sb, logical_sb_block, i);
4441                sb_breadahead_unmovable(sb, block);
4442        }
4443
4444        for (i = 0; i < db_count; i++) {
4445                struct buffer_head *bh;
4446
4447                block = descriptor_loc(sb, logical_sb_block, i);
4448                bh = sb_bread_unmovable(sb, block);
4449                if (!bh) {
4450                        ext4_msg(sb, KERN_ERR,
4451                               "can't read group descriptor %d", i);
4452                        db_count = i;
4453                        goto failed_mount2;
4454                }
4455                rcu_read_lock();
4456                rcu_dereference(sbi->s_group_desc)[i] = bh;
4457                rcu_read_unlock();
4458        }
4459        sbi->s_gdb_count = db_count;
4460        if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
4461                ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
4462                ret = -EFSCORRUPTED;
4463                goto failed_mount2;
4464        }
4465
4466        timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
4467
4468        /* Register extent status tree shrinker */
4469        if (ext4_es_register_shrinker(sbi))
4470                goto failed_mount3;
4471
4472        sbi->s_stripe = ext4_get_stripe_size(sbi);
4473        sbi->s_extent_max_zeroout_kb = 32;
4474
4475        /*
4476         * set up enough so that it can read an inode
4477         */
4478        sb->s_op = &ext4_sops;
4479        sb->s_export_op = &ext4_export_ops;
4480        sb->s_xattr = ext4_xattr_handlers;
4481#ifdef CONFIG_FS_ENCRYPTION
4482        sb->s_cop = &ext4_cryptops;
4483#endif
4484#ifdef CONFIG_FS_VERITY
4485        sb->s_vop = &ext4_verityops;
4486#endif
4487#ifdef CONFIG_QUOTA
4488        sb->dq_op = &ext4_quota_operations;
4489        if (ext4_has_feature_quota(sb))
4490                sb->s_qcop = &dquot_quotactl_sysfile_ops;
4491        else
4492                sb->s_qcop = &ext4_qctl_operations;
4493        sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
4494#endif
4495        memcpy(&sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
4496
4497        INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
4498        mutex_init(&sbi->s_orphan_lock);
4499
4500        sb->s_root = NULL;
4501
4502        needs_recovery = (es->s_last_orphan != 0 ||
4503                          ext4_has_feature_journal_needs_recovery(sb));
4504
4505        if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb))
4506                if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
4507                        goto failed_mount3a;
4508
4509        /*
4510         * The first inode we look at is the journal inode.  Don't try
4511         * root first: it may be modified in the journal!
4512         */
4513        if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
4514                err = ext4_load_journal(sb, es, journal_devnum);
4515                if (err)
4516                        goto failed_mount3a;
4517        } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) &&
4518                   ext4_has_feature_journal_needs_recovery(sb)) {
4519                ext4_msg(sb, KERN_ERR, "required journal recovery "
4520                       "suppressed and not mounted read-only");
4521                goto failed_mount_wq;
4522        } else {
4523                /* Nojournal mode, all journal mount options are illegal */
4524                if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
4525                        ext4_msg(sb, KERN_ERR, "can't mount with "
4526                                 "journal_checksum, fs mounted w/o journal");
4527                        goto failed_mount_wq;
4528                }
4529                if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4530                        ext4_msg(sb, KERN_ERR, "can't mount with "
4531                                 "journal_async_commit, fs mounted w/o journal");
4532                        goto failed_mount_wq;
4533                }
4534                if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
4535                        ext4_msg(sb, KERN_ERR, "can't mount with "
4536                                 "commit=%lu, fs mounted w/o journal",
4537                                 sbi->s_commit_interval / HZ);
4538                        goto failed_mount_wq;
4539                }
4540                if (EXT4_MOUNT_DATA_FLAGS &
4541                    (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
4542                        ext4_msg(sb, KERN_ERR, "can't mount with "
4543                                 "data=, fs mounted w/o journal");
4544                        goto failed_mount_wq;
4545                }
4546                sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
4547                clear_opt(sb, JOURNAL_CHECKSUM);
4548                clear_opt(sb, DATA_FLAGS);
4549                sbi->s_journal = NULL;
4550                needs_recovery = 0;
4551                goto no_journal;
4552        }
4553
4554        if (ext4_has_feature_64bit(sb) &&
4555            !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4556                                       JBD2_FEATURE_INCOMPAT_64BIT)) {
4557                ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
4558                goto failed_mount_wq;
4559        }
4560
4561        if (!set_journal_csum_feature_set(sb)) {
4562                ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
4563                         "feature set");
4564                goto failed_mount_wq;
4565        }
4566
4567        /* We have now updated the journal if required, so we can
4568         * validate the data journaling mode. */
4569        switch (test_opt(sb, DATA_FLAGS)) {
4570        case 0:
4571                /* No mode set, assume a default based on the journal
4572                 * capabilities: ORDERED_DATA if the journal can
4573                 * cope, else JOURNAL_DATA
4574                 */
4575                if (jbd2_journal_check_available_features
4576                    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
4577                        set_opt(sb, ORDERED_DATA);
4578                        sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
4579                } else {
4580                        set_opt(sb, JOURNAL_DATA);
4581                        sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
4582                }
4583                break;
4584
4585        case EXT4_MOUNT_ORDERED_DATA:
4586        case EXT4_MOUNT_WRITEBACK_DATA:
4587                if (!jbd2_journal_check_available_features
4588                    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
4589                        ext4_msg(sb, KERN_ERR, "Journal does not support "
4590                               "requested data journaling mode");
4591                        goto failed_mount_wq;
4592                }
4593        default:
4594                break;
4595        }
4596
4597        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
4598            test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4599                ext4_msg(sb, KERN_ERR, "can't mount with "
4600                        "journal_async_commit in data=ordered mode");
4601                goto failed_mount_wq;
4602        }
4603
4604        set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
4605
4606        sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
4607
4608no_journal:
4609        if (!test_opt(sb, NO_MBCACHE)) {
4610                sbi->s_ea_block_cache = ext4_xattr_create_cache();
4611                if (!sbi->s_ea_block_cache) {
4612                        ext4_msg(sb, KERN_ERR,
4613                                 "Failed to create ea_block_cache");
4614                        goto failed_mount_wq;
4615                }
4616
4617                if (ext4_has_feature_ea_inode(sb)) {
4618                        sbi->s_ea_inode_cache = ext4_xattr_create_cache();
4619                        if (!sbi->s_ea_inode_cache) {
4620                                ext4_msg(sb, KERN_ERR,
4621                                         "Failed to create ea_inode_cache");
4622                                goto failed_mount_wq;
4623                        }
4624                }
4625        }
4626
4627        if (ext4_has_feature_verity(sb) && blocksize != PAGE_SIZE) {
4628                ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity");
4629                goto failed_mount_wq;
4630        }
4631
4632        if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) &&
4633            !ext4_has_feature_encrypt(sb)) {
4634                ext4_set_feature_encrypt(sb);
4635                ext4_commit_super(sb, 1);
4636        }
4637
4638        /*
4639         * Get the # of file system overhead blocks from the
4640         * superblock if present.
4641         */
4642        if (es->s_overhead_clusters)
4643                sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
4644        else {
4645                err = ext4_calculate_overhead(sb);
4646                if (err)
4647                        goto failed_mount_wq;
4648        }
4649
4650        /*
4651         * The maximum number of concurrent works can be high and
4652         * concurrency isn't really necessary.  Limit it to 1.
4653         */
4654        EXT4_SB(sb)->rsv_conversion_wq =
4655                alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
4656        if (!EXT4_SB(sb)->rsv_conversion_wq) {
4657                printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
4658                ret = -ENOMEM;
4659                goto failed_mount4;
4660        }
4661
4662        /*
4663         * The jbd2_journal_load will have done any necessary log recovery,
4664         * so we can safely mount the rest of the filesystem now.
4665         */
4666
4667        root = ext4_iget(sb, EXT4_ROOT_INO, EXT4_IGET_SPECIAL);
4668        if (IS_ERR(root)) {
4669                ext4_msg(sb, KERN_ERR, "get root inode failed");
4670                ret = PTR_ERR(root);
4671                root = NULL;
4672                goto failed_mount4;
4673        }
4674        if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
4675                ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
4676                iput(root);
4677                goto failed_mount4;
4678        }
4679
4680#ifdef CONFIG_UNICODE
4681        if (sbi->s_encoding)
4682                sb->s_d_op = &ext4_dentry_ops;
4683#endif
4684
4685        sb->s_root = d_make_root(root);
4686        if (!sb->s_root) {
4687                ext4_msg(sb, KERN_ERR, "get root dentry failed");
4688                ret = -ENOMEM;
4689                goto failed_mount4;
4690        }
4691
4692        ret = ext4_setup_super(sb, es, sb_rdonly(sb));
4693        if (ret == -EROFS) {
4694                sb->s_flags |= SB_RDONLY;
4695                ret = 0;
4696        } else if (ret)
4697                goto failed_mount4a;
4698
4699        ext4_set_resv_clusters(sb);
4700
4701        err = ext4_setup_system_zone(sb);
4702        if (err) {
4703                ext4_msg(sb, KERN_ERR, "failed to initialize system "
4704                         "zone (%d)", err);
4705                goto failed_mount4a;
4706        }
4707
4708        ext4_ext_init(sb);
4709        err = ext4_mb_init(sb);
4710        if (err) {
4711                ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
4712                         err);
4713                goto failed_mount5;
4714        }
4715
4716        block = ext4_count_free_clusters(sb);
4717        ext4_free_blocks_count_set(sbi->s_es, 
4718                                   EXT4_C2B(sbi, block));
4719        ext4_superblock_csum_set(sb);
4720        err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
4721                                  GFP_KERNEL);
4722        if (!err) {
4723                unsigned long freei = ext4_count_free_inodes(sb);
4724                sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
4725                ext4_superblock_csum_set(sb);
4726                err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
4727                                          GFP_KERNEL);
4728        }
4729        if (!err)
4730                err = percpu_counter_init(&sbi->s_dirs_counter,
4731                                          ext4_count_dirs(sb), GFP_KERNEL);
4732        if (!err)
4733                err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
4734                                          GFP_KERNEL);
4735        if (!err)
4736                err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
4737
4738        if (err) {
4739                ext4_msg(sb, KERN_ERR, "insufficient memory");
4740                goto failed_mount6;
4741        }
4742
4743        if (ext4_has_feature_flex_bg(sb))
4744                if (!ext4_fill_flex_info(sb)) {
4745                        ext4_msg(sb, KERN_ERR,
4746                               "unable to initialize "
4747                               "flex_bg meta info!");
4748                        goto failed_mount6;
4749                }
4750
4751        err = ext4_register_li_request(sb, first_not_zeroed);
4752        if (err)
4753                goto failed_mount6;
4754
4755        err = ext4_register_sysfs(sb);
4756        if (err)
4757                goto failed_mount7;
4758
4759#ifdef CONFIG_QUOTA
4760        /* Enable quota usage during mount. */
4761        if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
4762                err = ext4_enable_quotas(sb);
4763                if (err)
4764                        goto failed_mount8;
4765        }
4766#endif  /* CONFIG_QUOTA */
4767
4768        EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
4769        ext4_orphan_cleanup(sb, es);
4770        EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
4771        if (needs_recovery) {
4772                ext4_msg(sb, KERN_INFO, "recovery complete");
4773                ext4_mark_recovery_complete(sb, es);
4774        }
4775        if (EXT4_SB(sb)->s_journal) {
4776                if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
4777                        descr = " journalled data mode";
4778                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
4779                        descr = " ordered data mode";
4780                else
4781                        descr = " writeback data mode";
4782        } else
4783                descr = "out journal";
4784
4785        if (test_opt(sb, DISCARD)) {
4786                struct request_queue *q = bdev_get_queue(sb->s_bdev);
4787                if (!blk_queue_discard(q))
4788                        ext4_msg(sb, KERN_WARNING,
4789                                 "mounting with \"discard\" option, but "
4790                                 "the device does not support discard");
4791        }
4792
4793        if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
4794                ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
4795                         "Opts: %.*s%s%s", descr,
4796                         (int) sizeof(sbi->s_es->s_mount_opts),
4797                         sbi->s_es->s_mount_opts,
4798                         *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
4799
4800        if (es->s_error_count)
4801                mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
4802
4803        /* Enable message ratelimiting. Default is 10 messages per 5 secs. */
4804        ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
4805        ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
4806        ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
4807
4808        kfree(orig_data);
4809        return 0;
4810
4811cantfind_ext4:
4812        if (!silent)
4813                ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
4814        goto failed_mount;
4815
4816#ifdef CONFIG_QUOTA
4817failed_mount8:
4818        ext4_unregister_sysfs(sb);
4819#endif
4820failed_mount7:
4821        ext4_unregister_li_request(sb);
4822failed_mount6:
4823        ext4_mb_release(sb);
4824        rcu_read_lock();
4825        flex_groups = rcu_dereference(sbi->s_flex_groups);
4826        if (flex_groups) {
4827                for (i = 0; i < sbi->s_flex_groups_allocated; i++)
4828                        kvfree(flex_groups[i]);
4829                kvfree(flex_groups);
4830        }
4831        rcu_read_unlock();
4832        percpu_counter_destroy(&sbi->s_freeclusters_counter);
4833        percpu_counter_destroy(&sbi->s_freeinodes_counter);
4834        percpu_counter_destroy(&sbi->s_dirs_counter);
4835        percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
4836        percpu_free_rwsem(&sbi->s_writepages_rwsem);
4837failed_mount5:
4838        ext4_ext_release(sb);
4839        ext4_release_system_zone(sb);
4840failed_mount4a:
4841        dput(sb->s_root);
4842        sb->s_root = NULL;
4843failed_mount4:
4844        ext4_msg(sb, KERN_ERR, "mount failed");
4845        if (EXT4_SB(sb)->rsv_conversion_wq)
4846                destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
4847failed_mount_wq:
4848        ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
4849        sbi->s_ea_inode_cache = NULL;
4850
4851        ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
4852        sbi->s_ea_block_cache = NULL;
4853
4854        if (sbi->s_journal) {
4855                jbd2_journal_destroy(sbi->s_journal);
4856                sbi->s_journal = NULL;
4857        }
4858failed_mount3a:
4859        ext4_es_unregister_shrinker(sbi);
4860failed_mount3:
4861        del_timer_sync(&sbi->s_err_report);
4862        if (sbi->s_mmp_tsk)
4863                kthread_stop(sbi->s_mmp_tsk);
4864failed_mount2:
4865        rcu_read_lock();
4866        group_desc = rcu_dereference(sbi->s_group_desc);
4867        for (i = 0; i < db_count; i++)
4868                brelse(group_desc[i]);
4869        kvfree(group_desc);
4870        rcu_read_unlock();
4871failed_mount:
4872        if (sbi->s_chksum_driver)
4873                crypto_free_shash(sbi->s_chksum_driver);
4874
4875#ifdef CONFIG_UNICODE
4876        utf8_unload(sbi->s_encoding);
4877#endif
4878
4879#ifdef CONFIG_QUOTA
4880        for (i = 0; i < EXT4_MAXQUOTAS; i++)
4881                kfree(get_qf_name(sb, sbi, i));
4882#endif
4883        fscrypt_free_dummy_context(&sbi->s_dummy_enc_ctx);
4884        ext4_blkdev_remove(sbi);
4885        brelse(bh);
4886out_fail:
4887        sb->s_fs_info = NULL;
4888        kfree(sbi->s_blockgroup_lock);
4889out_free_base:
4890        kfree(sbi);
4891        kfree(orig_data);
4892        fs_put_dax(dax_dev);
4893        return err ? err : ret;
4894}
4895
4896/*
4897 * Setup any per-fs journal parameters now.  We'll do this both on
4898 * initial mount, once the journal has been initialised but before we've
4899 * done any recovery; and again on any subsequent remount.
4900 */
4901static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
4902{
4903        struct ext4_sb_info *sbi = EXT4_SB(sb);
4904
4905        journal->j_commit_interval = sbi->s_commit_interval;
4906        journal->j_min_batch_time = sbi->s_min_batch_time;
4907        journal->j_max_batch_time = sbi->s_max_batch_time;
4908
4909        write_lock(&journal->j_state_lock);
4910        if (test_opt(sb, BARRIER))
4911                journal->j_flags |= JBD2_BARRIER;
4912        else
4913                journal->j_flags &= ~JBD2_BARRIER;
4914        if (test_opt(sb, DATA_ERR_ABORT))
4915                journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
4916        else
4917                journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
4918        write_unlock(&journal->j_state_lock);
4919}
4920
4921static struct inode *ext4_get_journal_inode(struct super_block *sb,
4922                                             unsigned int journal_inum)
4923{
4924        struct inode *journal_inode;
4925
4926        /*
4927         * Test for the existence of a valid inode on disk.  Bad things
4928         * happen if we iget() an unused inode, as the subsequent iput()
4929         * will try to delete it.
4930         */
4931        journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
4932        if (IS_ERR(journal_inode)) {
4933                ext4_msg(sb, KERN_ERR, "no journal found");
4934                return NULL;
4935        }
4936        if (!journal_inode->i_nlink) {
4937                make_bad_inode(journal_inode);
4938                iput(journal_inode);
4939                ext4_msg(sb, KERN_ERR, "journal inode is deleted");
4940                return NULL;
4941        }
4942
4943        jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
4944                  journal_inode, journal_inode->i_size);
4945        if (!S_ISREG(journal_inode->i_mode)) {
4946                ext4_msg(sb, KERN_ERR, "invalid journal inode");
4947                iput(journal_inode);
4948                return NULL;
4949        }
4950        return journal_inode;
4951}
4952
4953static journal_t *ext4_get_journal(struct super_block *sb,
4954                                   unsigned int journal_inum)
4955{
4956        struct inode *journal_inode;
4957        journal_t *journal;
4958
4959        BUG_ON(!ext4_has_feature_journal(sb));
4960
4961        journal_inode = ext4_get_journal_inode(sb, journal_inum);
4962        if (!journal_inode)
4963                return NULL;
4964
4965        journal = jbd2_journal_init_inode(journal_inode);
4966        if (!journal) {
4967                ext4_msg(sb, KERN_ERR, "Could not load journal inode");
4968                iput(journal_inode);
4969                return NULL;
4970        }
4971        journal->j_private = sb;
4972        ext4_init_journal_params(sb, journal);
4973        return journal;
4974}
4975
4976static journal_t *ext4_get_dev_journal(struct super_block *sb,
4977                                       dev_t j_dev)
4978{
4979        struct buffer_head *bh;
4980        journal_t *journal;
4981        ext4_fsblk_t start;
4982        ext4_fsblk_t len;
4983        int hblock, blocksize;
4984        ext4_fsblk_t sb_block;
4985        unsigned long offset;
4986        struct ext4_super_block *es;
4987        struct block_device *bdev;
4988
4989        BUG_ON(!ext4_has_feature_journal(sb));
4990
4991        bdev = ext4_blkdev_get(j_dev, sb);
4992        if (bdev == NULL)
4993                return NULL;
4994
4995        blocksize = sb->s_blocksize;
4996        hblock = bdev_logical_block_size(bdev);
4997        if (blocksize < hblock) {
4998                ext4_msg(sb, KERN_ERR,
4999                        "blocksize too small for journal device");
5000                goto out_bdev;
5001        }
5002
5003        sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
5004        offset = EXT4_MIN_BLOCK_SIZE % blocksize;
5005        set_blocksize(bdev, blocksize);
5006        if (!(bh = __bread(bdev, sb_block, blocksize))) {
5007                ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
5008                       "external journal");
5009                goto out_bdev;
5010        }
5011
5012        es = (struct ext4_super_block *) (bh->b_data + offset);
5013        if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
5014            !(le32_to_cpu(es->s_feature_incompat) &
5015              EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
5016                ext4_msg(sb, KERN_ERR, "external journal has "
5017                                        "bad superblock");
5018                brelse(bh);
5019                goto out_bdev;
5020        }
5021
5022        if ((le32_to_cpu(es->s_feature_ro_compat) &
5023             EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
5024            es->s_checksum != ext4_superblock_csum(sb, es)) {
5025                ext4_msg(sb, KERN_ERR, "external journal has "
5026                                       "corrupt superblock");
5027                brelse(bh);
5028                goto out_bdev;
5029        }
5030
5031        if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
5032                ext4_msg(sb, KERN_ERR, "journal UUID does not match");
5033                brelse(bh);
5034                goto out_bdev;
5035        }
5036
5037        len = ext4_blocks_count(es);
5038        start = sb_block + 1;
5039        brelse(bh);     /* we're done with the superblock */
5040
5041        journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
5042                                        start, len, blocksize);
5043        if (!journal) {
5044                ext4_msg(sb, KERN_ERR, "failed to create device journal");
5045                goto out_bdev;
5046        }
5047        journal->j_private = sb;
5048        ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer);
5049        wait_on_buffer(journal->j_sb_buffer);
5050        if (!buffer_uptodate(journal->j_sb_buffer)) {
5051                ext4_msg(sb, KERN_ERR, "I/O error on journal device");
5052                goto out_journal;
5053        }
5054        if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
5055                ext4_msg(sb, KERN_ERR, "External journal has more than one "
5056                                        "user (unsupported) - %d",
5057                        be32_to_cpu(journal->j_superblock->s_nr_users));
5058                goto out_journal;
5059        }
5060        EXT4_SB(sb)->journal_bdev = bdev;
5061        ext4_init_journal_params(sb, journal);
5062        return journal;
5063
5064out_journal:
5065        jbd2_journal_destroy(journal);
5066out_bdev:
5067        ext4_blkdev_put(bdev);
5068        return NULL;
5069}
5070
5071static int ext4_load_journal(struct super_block *sb,
5072                             struct ext4_super_block *es,
5073                             unsigned long journal_devnum)
5074{
5075        journal_t *journal;
5076        unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
5077        dev_t journal_dev;
5078        int err = 0;
5079        int really_read_only;
5080
5081        BUG_ON(!ext4_has_feature_journal(sb));
5082
5083        if (journal_devnum &&
5084            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
5085                ext4_msg(sb, KERN_INFO, "external journal device major/minor "
5086                        "numbers have changed");
5087                journal_dev = new_decode_dev(journal_devnum);
5088        } else
5089                journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
5090
5091        really_read_only = bdev_read_only(sb->s_bdev);
5092
5093        /*
5094         * Are we loading a blank journal or performing recovery after a
5095         * crash?  For recovery, we need to check in advance whether we
5096         * can get read-write access to the device.
5097         */
5098        if (ext4_has_feature_journal_needs_recovery(sb)) {
5099                if (sb_rdonly(sb)) {
5100                        ext4_msg(sb, KERN_INFO, "INFO: recovery "
5101                                        "required on readonly filesystem");
5102                        if (really_read_only) {
5103                                ext4_msg(sb, KERN_ERR, "write access "
5104                                        "unavailable, cannot proceed "
5105                                        "(try mounting with noload)");
5106                                return -EROFS;
5107                        }
5108                        ext4_msg(sb, KERN_INFO, "write access will "
5109                               "be enabled during recovery");
5110                }
5111        }
5112
5113        if (journal_inum && journal_dev) {
5114                ext4_msg(sb, KERN_ERR, "filesystem has both journal "
5115                       "and inode journals!");
5116                return -EINVAL;
5117        }
5118
5119        if (journal_inum) {
5120                if (!(journal = ext4_get_journal(sb, journal_inum)))
5121                        return -EINVAL;
5122        } else {
5123                if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
5124                        return -EINVAL;
5125        }
5126
5127        if (!(journal->j_flags & JBD2_BARRIER))
5128                ext4_msg(sb, KERN_INFO, "barriers disabled");
5129
5130        if (!ext4_has_feature_journal_needs_recovery(sb))
5131                err = jbd2_journal_wipe(journal, !really_read_only);
5132        if (!err) {
5133                char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
5134                if (save)
5135                        memcpy(save, ((char *) es) +
5136                               EXT4_S_ERR_START, EXT4_S_ERR_LEN);
5137                err = jbd2_journal_load(journal);
5138                if (save)
5139                        memcpy(((char *) es) + EXT4_S_ERR_START,
5140                               save, EXT4_S_ERR_LEN);
5141                kfree(save);
5142        }
5143
5144        if (err) {
5145                ext4_msg(sb, KERN_ERR, "error loading journal");
5146                jbd2_journal_destroy(journal);
5147                return err;
5148        }
5149
5150        EXT4_SB(sb)->s_journal = journal;
5151        ext4_clear_journal_err(sb, es);
5152
5153        if (!really_read_only && journal_devnum &&
5154            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
5155                es->s_journal_dev = cpu_to_le32(journal_devnum);
5156
5157                /* Make sure we flush the recovery flag to disk. */
5158                ext4_commit_super(sb, 1);
5159        }
5160
5161        return 0;
5162}
5163
5164static int ext4_commit_super(struct super_block *sb, int sync)
5165{
5166        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
5167        struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
5168        int error = 0;
5169
5170        if (!sbh || block_device_ejected(sb))
5171                return error;
5172
5173        /*
5174         * The superblock bh should be mapped, but it might not be if the
5175         * device was hot-removed. Not much we can do but fail the I/O.
5176         */
5177        if (!buffer_mapped(sbh))
5178                return error;
5179
5180        /*
5181         * If the file system is mounted read-only, don't update the
5182         * superblock write time.  This avoids updating the superblock
5183         * write time when we are mounting the root file system
5184         * read/only but we need to replay the journal; at that point,
5185         * for people who are east of GMT and who make their clock
5186         * tick in localtime for Windows bug-for-bug compatibility,
5187         * the clock is set in the future, and this will cause e2fsck
5188         * to complain and force a full file system check.
5189         */
5190        if (!(sb->s_flags & SB_RDONLY))
5191                ext4_update_tstamp(es, s_wtime);
5192        if (sb->s_bdev->bd_part)
5193                es->s_kbytes_written =
5194                        cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
5195                            ((part_stat_read(sb->s_bdev->bd_part,
5196                                             sectors[STAT_WRITE]) -
5197                              EXT4_SB(sb)->s_sectors_written_start) >> 1));
5198        else
5199                es->s_kbytes_written =
5200                        cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
5201        if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter))
5202                ext4_free_blocks_count_set(es,
5203                        EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
5204                                &EXT4_SB(sb)->s_freeclusters_counter)));
5205        if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
5206                es->s_free_inodes_count =
5207                        cpu_to_le32(percpu_counter_sum_positive(
5208                                &EXT4_SB(sb)->s_freeinodes_counter));
5209        BUFFER_TRACE(sbh, "marking dirty");
5210        ext4_superblock_csum_set(sb);
5211        if (sync)
5212                lock_buffer(sbh);
5213        if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
5214                /*
5215                 * Oh, dear.  A previous attempt to write the
5216                 * superblock failed.  This could happen because the
5217                 * USB device was yanked out.  Or it could happen to
5218                 * be a transient write error and maybe the block will
5219                 * be remapped.  Nothing we can do but to retry the
5220                 * write and hope for the best.
5221                 */
5222                ext4_msg(sb, KERN_ERR, "previous I/O error to "
5223                       "superblock detected");
5224                clear_buffer_write_io_error(sbh);
5225                set_buffer_uptodate(sbh);
5226        }
5227        mark_buffer_dirty(sbh);
5228        if (sync) {
5229                unlock_buffer(sbh);
5230                error = __sync_dirty_buffer(sbh,
5231                        REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
5232                if (buffer_write_io_error(sbh)) {
5233                        ext4_msg(sb, KERN_ERR, "I/O error while writing "
5234                               "superblock");
5235                        clear_buffer_write_io_error(sbh);
5236                        set_buffer_uptodate(sbh);
5237                }
5238        }
5239        return error;
5240}
5241
5242/*
5243 * Have we just finished recovery?  If so, and if we are mounting (or
5244 * remounting) the filesystem readonly, then we will end up with a
5245 * consistent fs on disk.  Record that fact.
5246 */
5247static void ext4_mark_recovery_complete(struct super_block *sb,
5248                                        struct ext4_super_block *es)
5249{
5250        journal_t *journal = EXT4_SB(sb)->s_journal;
5251
5252        if (!ext4_has_feature_journal(sb)) {
5253                BUG_ON(journal != NULL);
5254                return;
5255        }
5256        jbd2_journal_lock_updates(journal);
5257        if (jbd2_journal_flush(journal) < 0)
5258                goto out;
5259
5260        if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) {
5261                ext4_clear_feature_journal_needs_recovery(sb);
5262                ext4_commit_super(sb, 1);
5263        }
5264
5265out:
5266        jbd2_journal_unlock_updates(journal);
5267}
5268
5269/*
5270 * If we are mounting (or read-write remounting) a filesystem whose journal
5271 * has recorded an error from a previous lifetime, move that error to the
5272 * main filesystem now.
5273 */
5274static void ext4_clear_journal_err(struct super_block *sb,
5275                                   struct ext4_super_block *es)
5276{
5277        journal_t *journal;
5278        int j_errno;
5279        const char *errstr;
5280
5281        BUG_ON(!ext4_has_feature_journal(sb));
5282
5283        journal = EXT4_SB(sb)->s_journal;
5284
5285        /*
5286         * Now check for any error status which may have been recorded in the
5287         * journal by a prior ext4_error() or ext4_abort()
5288         */
5289
5290        j_errno = jbd2_journal_errno(journal);
5291        if (j_errno) {
5292                char nbuf[16];
5293
5294                errstr = ext4_decode_error(sb, j_errno, nbuf);
5295                ext4_warning(sb, "Filesystem error recorded "
5296                             "from previous mount: %s", errstr);
5297                ext4_warning(sb, "Marking fs in need of filesystem check.");
5298
5299                EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
5300                es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
5301                ext4_commit_super(sb, 1);
5302
5303                jbd2_journal_clear_err(journal);
5304                jbd2_journal_update_sb_errno(journal);
5305        }
5306}
5307
5308/*
5309 * Force the running and committing transactions to commit,
5310 * and wait on the commit.
5311 */
5312int ext4_force_commit(struct super_block *sb)
5313{
5314        journal_t *journal;
5315
5316        if (sb_rdonly(sb))
5317                return 0;
5318
5319        journal = EXT4_SB(sb)->s_journal;
5320        return ext4_journal_force_commit(journal);
5321}
5322
5323static int ext4_sync_fs(struct super_block *sb, int wait)
5324{
5325        int ret = 0;
5326        tid_t target;
5327        bool needs_barrier = false;
5328        struct ext4_sb_info *sbi = EXT4_SB(sb);
5329
5330        if (unlikely(ext4_forced_shutdown(sbi)))
5331                return 0;
5332
5333        trace_ext4_sync_fs(sb, wait);
5334        flush_workqueue(sbi->rsv_conversion_wq);
5335        /*
5336         * Writeback quota in non-journalled quota case - journalled quota has
5337         * no dirty dquots
5338         */
5339        dquot_writeback_dquots(sb, -1);
5340        /*
5341         * Data writeback is possible w/o journal transaction, so barrier must
5342         * being sent at the end of the function. But we can skip it if
5343         * transaction_commit will do it for us.
5344         */
5345        if (sbi->s_journal) {
5346                target = jbd2_get_latest_transaction(sbi->s_journal);
5347                if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
5348                    !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
5349                        needs_barrier = true;
5350
5351                if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
5352                        if (wait)
5353                                ret = jbd2_log_wait_commit(sbi->s_journal,
5354                                                           target);
5355                }
5356        } else if (wait && test_opt(sb, BARRIER))
5357                needs_barrier = true;
5358        if (needs_barrier) {
5359                int err;
5360                err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL);
5361                if (!ret)
5362                        ret = err;
5363        }
5364
5365        return ret;
5366}
5367
5368/*
5369 * LVM calls this function before a (read-only) snapshot is created.  This
5370 * gives us a chance to flush the journal completely and mark the fs clean.
5371 *
5372 * Note that only this function cannot bring a filesystem to be in a clean
5373 * state independently. It relies on upper layer to stop all data & metadata
5374 * modifications.
5375 */
5376static int ext4_freeze(struct super_block *sb)
5377{
5378        int error = 0;
5379        journal_t *journal;
5380
5381        if (sb_rdonly(sb))
5382                return 0;
5383
5384        journal = EXT4_SB(sb)->s_journal;
5385
5386        if (journal) {
5387                /* Now we set up the journal barrier. */
5388                jbd2_journal_lock_updates(journal);
5389
5390                /*
5391                 * Don't clear the needs_recovery flag if we failed to
5392                 * flush the journal.
5393                 */
5394                error = jbd2_journal_flush(journal);
5395                if (error < 0)
5396                        goto out;
5397
5398                /* Journal blocked and flushed, clear needs_recovery flag. */
5399                ext4_clear_feature_journal_needs_recovery(sb);
5400        }
5401
5402        error = ext4_commit_super(sb, 1);
5403out:
5404        if (journal)
5405                /* we rely on upper layer to stop further updates */
5406                jbd2_journal_unlock_updates(journal);
5407        return error;
5408}
5409
5410/*
5411 * Called by LVM after the snapshot is done.  We need to reset the RECOVER
5412 * flag here, even though the filesystem is not technically dirty yet.
5413 */
5414static int ext4_unfreeze(struct super_block *sb)
5415{
5416        if (sb_rdonly(sb) || ext4_forced_shutdown(EXT4_SB(sb)))
5417                return 0;
5418
5419        if (EXT4_SB(sb)->s_journal) {
5420                /* Reset the needs_recovery flag before the fs is unlocked. */
5421                ext4_set_feature_journal_needs_recovery(sb);
5422        }
5423
5424        ext4_commit_super(sb, 1);
5425        return 0;
5426}
5427
5428/*
5429 * Structure to save mount options for ext4_remount's benefit
5430 */
5431struct ext4_mount_options {
5432        unsigned long s_mount_opt;
5433        unsigned long s_mount_opt2;
5434        kuid_t s_resuid;
5435        kgid_t s_resgid;
5436        unsigned long s_commit_interval;
5437        u32 s_min_batch_time, s_max_batch_time;
5438#ifdef CONFIG_QUOTA
5439        int s_jquota_fmt;
5440        char *s_qf_names[EXT4_MAXQUOTAS];
5441#endif
5442};
5443
5444static int ext4_remount(struct super_block *sb, int *flags, char *data)
5445{
5446        struct ext4_super_block *es;
5447        struct ext4_sb_info *sbi = EXT4_SB(sb);
5448        unsigned long old_sb_flags;
5449        struct ext4_mount_options old_opts;
5450        int enable_quota = 0;
5451        ext4_group_t g;
5452        unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
5453        int err = 0;
5454#ifdef CONFIG_QUOTA
5455        int i, j;
5456        char *to_free[EXT4_MAXQUOTAS];
5457#endif
5458        char *orig_data = kstrdup(data, GFP_KERNEL);
5459
5460        if (data && !orig_data)
5461                return -ENOMEM;
5462
5463        /* Store the original options */
5464        old_sb_flags = sb->s_flags;
5465        old_opts.s_mount_opt = sbi->s_mount_opt;
5466        old_opts.s_mount_opt2 = sbi->s_mount_opt2;
5467        old_opts.s_resuid = sbi->s_resuid;
5468        old_opts.s_resgid = sbi->s_resgid;
5469        old_opts.s_commit_interval = sbi->s_commit_interval;
5470        old_opts.s_min_batch_time = sbi->s_min_batch_time;
5471        old_opts.s_max_batch_time = sbi->s_max_batch_time;
5472#ifdef CONFIG_QUOTA
5473        old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
5474        for (i = 0; i < EXT4_MAXQUOTAS; i++)
5475                if (sbi->s_qf_names[i]) {
5476                        char *qf_name = get_qf_name(sb, sbi, i);
5477
5478                        old_opts.s_qf_names[i] = kstrdup(qf_name, GFP_KERNEL);
5479                        if (!old_opts.s_qf_names[i]) {
5480                                for (j = 0; j < i; j++)
5481                                        kfree(old_opts.s_qf_names[j]);
5482                                kfree(orig_data);
5483                                return -ENOMEM;
5484                        }
5485                } else
5486                        old_opts.s_qf_names[i] = NULL;
5487#endif
5488        if (sbi->s_journal && sbi->s_journal->j_task->io_context)
5489                journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
5490
5491        if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
5492                err = -EINVAL;
5493                goto restore_opts;
5494        }
5495
5496        if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
5497            test_opt(sb, JOURNAL_CHECKSUM)) {
5498                ext4_msg(sb, KERN_ERR, "changing journal_checksum "
5499                         "during remount not supported; ignoring");
5500                sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
5501        }
5502
5503        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
5504                if (test_opt2(sb, EXPLICIT_DELALLOC)) {
5505                        ext4_msg(sb, KERN_ERR, "can't mount with "
5506                                 "both data=journal and delalloc");
5507                        err = -EINVAL;
5508                        goto restore_opts;
5509                }
5510                if (test_opt(sb, DIOREAD_NOLOCK)) {
5511                        ext4_msg(sb, KERN_ERR, "can't mount with "
5512                                 "both data=journal and dioread_nolock");
5513                        err = -EINVAL;
5514                        goto restore_opts;
5515                }
5516        } else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
5517                if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
5518                        ext4_msg(sb, KERN_ERR, "can't mount with "
5519                                "journal_async_commit in data=ordered mode");
5520                        err = -EINVAL;
5521                        goto restore_opts;
5522                }
5523        }
5524
5525        if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
5526                ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
5527                err = -EINVAL;
5528                goto restore_opts;
5529        }
5530
5531        if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
5532                ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user");
5533
5534        sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
5535                (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
5536
5537        es = sbi->s_es;
5538
5539        if (sbi->s_journal) {
5540                ext4_init_journal_params(sb, sbi->s_journal);
5541                set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
5542        }
5543
5544        if (*flags & SB_LAZYTIME)
5545                sb->s_flags |= SB_LAZYTIME;
5546
5547        if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
5548                if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
5549                        err = -EROFS;
5550                        goto restore_opts;
5551                }
5552
5553                if (*flags & SB_RDONLY) {
5554                        err = sync_filesystem(sb);
5555                        if (err < 0)
5556                                goto restore_opts;
5557                        err = dquot_suspend(sb, -1);
5558                        if (err < 0)
5559                                goto restore_opts;
5560
5561                        /*
5562                         * First of all, the unconditional stuff we have to do
5563                         * to disable replay of the journal when we next remount
5564                         */
5565                        sb->s_flags |= SB_RDONLY;
5566
5567                        /*
5568                         * OK, test if we are remounting a valid rw partition
5569                         * readonly, and if so set the rdonly flag and then
5570                         * mark the partition as valid again.
5571                         */
5572                        if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
5573                            (sbi->s_mount_state & EXT4_VALID_FS))
5574                                es->s_state = cpu_to_le16(sbi->s_mount_state);
5575
5576                        if (sbi->s_journal)
5577                                ext4_mark_recovery_complete(sb, es);
5578                        if (sbi->s_mmp_tsk)
5579                                kthread_stop(sbi->s_mmp_tsk);
5580                } else {
5581                        /* Make sure we can mount this feature set readwrite */
5582                        if (ext4_has_feature_readonly(sb) ||
5583                            !ext4_feature_set_ok(sb, 0)) {
5584                                err = -EROFS;
5585                                goto restore_opts;
5586                        }
5587                        /*
5588                         * Make sure the group descriptor checksums
5589                         * are sane.  If they aren't, refuse to remount r/w.
5590                         */
5591                        for (g = 0; g < sbi->s_groups_count; g++) {
5592                                struct ext4_group_desc *gdp =
5593                                        ext4_get_group_desc(sb, g, NULL);
5594
5595                                if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
5596                                        ext4_msg(sb, KERN_ERR,
5597               "ext4_remount: Checksum for group %u failed (%u!=%u)",
5598                g, le16_to_cpu(ext4_group_desc_csum(sb, g, gdp)),
5599                                               le16_to_cpu(gdp->bg_checksum));
5600                                        err = -EFSBADCRC;
5601                                        goto restore_opts;
5602                                }
5603                        }
5604
5605                        /*
5606                         * If we have an unprocessed orphan list hanging
5607                         * around from a previously readonly bdev mount,
5608                         * require a full umount/remount for now.
5609                         */
5610                        if (es->s_last_orphan) {
5611                                ext4_msg(sb, KERN_WARNING, "Couldn't "
5612                                       "remount RDWR because of unprocessed "
5613                                       "orphan inode list.  Please "
5614                                       "umount/remount instead");
5615                                err = -EINVAL;
5616                                goto restore_opts;
5617                        }
5618
5619                        /*
5620                         * Mounting a RDONLY partition read-write, so reread
5621                         * and store the current valid flag.  (It may have
5622                         * been changed by e2fsck since we originally mounted
5623                         * the partition.)
5624                         */
5625                        if (sbi->s_journal)
5626                                ext4_clear_journal_err(sb, es);
5627                        sbi->s_mount_state = le16_to_cpu(es->s_state);
5628
5629                        err = ext4_setup_super(sb, es, 0);
5630                        if (err)
5631                                goto restore_opts;
5632
5633                        sb->s_flags &= ~SB_RDONLY;
5634                        if (ext4_has_feature_mmp(sb))
5635                                if (ext4_multi_mount_protect(sb,
5636                                                le64_to_cpu(es->s_mmp_block))) {
5637                                        err = -EROFS;
5638                                        goto restore_opts;
5639                                }
5640                        enable_quota = 1;
5641                }
5642        }
5643
5644        /*
5645         * Reinitialize lazy itable initialization thread based on
5646         * current settings
5647         */
5648        if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE))
5649                ext4_unregister_li_request(sb);
5650        else {
5651                ext4_group_t first_not_zeroed;
5652                first_not_zeroed = ext4_has_uninit_itable(sb);
5653                ext4_register_li_request(sb, first_not_zeroed);
5654        }
5655
5656        ext4_setup_system_zone(sb);
5657        if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
5658                err = ext4_commit_super(sb, 1);
5659                if (err)
5660                        goto restore_opts;
5661        }
5662
5663#ifdef CONFIG_QUOTA
5664        /* Release old quota file names */
5665        for (i = 0; i < EXT4_MAXQUOTAS; i++)
5666                kfree(old_opts.s_qf_names[i]);
5667        if (enable_quota) {
5668                if (sb_any_quota_suspended(sb))
5669                        dquot_resume(sb, -1);
5670                else if (ext4_has_feature_quota(sb)) {
5671                        err = ext4_enable_quotas(sb);
5672                        if (err)
5673                                goto restore_opts;
5674                }
5675        }
5676#endif
5677
5678        *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
5679        ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
5680        kfree(orig_data);
5681        return 0;
5682
5683restore_opts:
5684        sb->s_flags = old_sb_flags;
5685        sbi->s_mount_opt = old_opts.s_mount_opt;
5686        sbi->s_mount_opt2 = old_opts.s_mount_opt2;
5687        sbi->s_resuid = old_opts.s_resuid;
5688        sbi->s_resgid = old_opts.s_resgid;
5689        sbi->s_commit_interval = old_opts.s_commit_interval;
5690        sbi->s_min_batch_time = old_opts.s_min_batch_time;
5691        sbi->s_max_batch_time = old_opts.s_max_batch_time;
5692#ifdef CONFIG_QUOTA
5693        sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
5694        for (i = 0; i < EXT4_MAXQUOTAS; i++) {
5695                to_free[i] = get_qf_name(sb, sbi, i);
5696                rcu_assign_pointer(sbi->s_qf_names[i], old_opts.s_qf_names[i]);
5697        }
5698        synchronize_rcu();
5699        for (i = 0; i < EXT4_MAXQUOTAS; i++)
5700                kfree(to_free[i]);
5701#endif
5702        kfree(orig_data);
5703        return err;
5704}
5705
5706#ifdef CONFIG_QUOTA
5707static int ext4_statfs_project(struct super_block *sb,
5708                               kprojid_t projid, struct kstatfs *buf)
5709{
5710        struct kqid qid;
5711        struct dquot *dquot;
5712        u64 limit;
5713        u64 curblock;
5714
5715        qid = make_kqid_projid(projid);
5716        dquot = dqget(sb, qid);
5717        if (IS_ERR(dquot))
5718                return PTR_ERR(dquot);
5719        spin_lock(&dquot->dq_dqb_lock);
5720
5721        limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit,
5722                             dquot->dq_dqb.dqb_bhardlimit);
5723        limit >>= sb->s_blocksize_bits;
5724
5725        if (limit && buf->f_blocks > limit) {
5726                curblock = (dquot->dq_dqb.dqb_curspace +
5727                            dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
5728                buf->f_blocks = limit;
5729                buf->f_bfree = buf->f_bavail =
5730                        (buf->f_blocks > curblock) ?
5731                         (buf->f_blocks - curblock) : 0;
5732        }
5733
5734        limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit,
5735                             dquot->dq_dqb.dqb_ihardlimit);
5736        if (limit && buf->f_files > limit) {
5737                buf->f_files = limit;
5738                buf->f_ffree =
5739                        (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
5740                         (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
5741        }
5742
5743        spin_unlock(&dquot->dq_dqb_lock);
5744        dqput(dquot);
5745        return 0;
5746}
5747#endif
5748
5749static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
5750{
5751        struct super_block *sb = dentry->d_sb;
5752        struct ext4_sb_info *sbi = EXT4_SB(sb);
5753        struct ext4_super_block *es = sbi->s_es;
5754        ext4_fsblk_t overhead = 0, resv_blocks;
5755        u64 fsid;
5756        s64 bfree;
5757        resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
5758
5759        if (!test_opt(sb, MINIX_DF))
5760                overhead = sbi->s_overhead;
5761
5762        buf->f_type = EXT4_SUPER_MAGIC;
5763        buf->f_bsize = sb->s_blocksize;
5764        buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
5765        bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
5766                percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
5767        /* prevent underflow in case that few free space is available */
5768        buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
5769        buf->f_bavail = buf->f_bfree -
5770                        (ext4_r_blocks_count(es) + resv_blocks);
5771        if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
5772                buf->f_bavail = 0;
5773        buf->f_files = le32_to_cpu(es->s_inodes_count);
5774        buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
5775        buf->f_namelen = EXT4_NAME_LEN;
5776        fsid = le64_to_cpup((void *)es->s_uuid) ^
5777               le64_to_cpup((void *)es->s_uuid + sizeof(u64));
5778        buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
5779        buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
5780
5781#ifdef CONFIG_QUOTA
5782        if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
5783            sb_has_quota_limits_enabled(sb, PRJQUOTA))
5784                ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
5785#endif
5786        return 0;
5787}
5788
5789
5790#ifdef CONFIG_QUOTA
5791
5792/*
5793 * Helper functions so that transaction is started before we acquire dqio_sem
5794 * to keep correct lock ordering of transaction > dqio_sem
5795 */
5796static inline struct inode *dquot_to_inode(struct dquot *dquot)
5797{
5798        return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
5799}
5800
5801static int ext4_write_dquot(struct dquot *dquot)
5802{
5803        int ret, err;
5804        handle_t *handle;
5805        struct inode *inode;
5806
5807        inode = dquot_to_inode(dquot);
5808        handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
5809                                    EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
5810        if (IS_ERR(handle))
5811                return PTR_ERR(handle);
5812        ret = dquot_commit(dquot);
5813        err = ext4_journal_stop(handle);
5814        if (!ret)
5815                ret = err;
5816        return ret;
5817}
5818
5819static int ext4_acquire_dquot(struct dquot *dquot)
5820{
5821        int ret, err;
5822        handle_t *handle;
5823
5824        handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
5825                                    EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
5826        if (IS_ERR(handle))
5827                return PTR_ERR(handle);
5828        ret = dquot_acquire(dquot);
5829        err = ext4_journal_stop(handle);
5830        if (!ret)
5831                ret = err;
5832        return ret;
5833}
5834
5835static int ext4_release_dquot(struct dquot *dquot)
5836{
5837        int ret, err;
5838        handle_t *handle;
5839
5840        handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
5841                                    EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
5842        if (IS_ERR(handle)) {
5843                /* Release dquot anyway to avoid endless cycle in dqput() */
5844                dquot_release(dquot);
5845                return PTR_ERR(handle);
5846        }
5847        ret = dquot_release(dquot);
5848        err = ext4_journal_stop(handle);
5849        if (!ret)
5850                ret = err;
5851        return ret;
5852}
5853
5854static int ext4_mark_dquot_dirty(struct dquot *dquot)
5855{
5856        struct super_block *sb = dquot->dq_sb;
5857        struct ext4_sb_info *sbi = EXT4_SB(sb);
5858
5859        /* Are we journaling quotas? */
5860        if (ext4_has_feature_quota(sb) ||
5861            sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
5862                dquot_mark_dquot_dirty(dquot);
5863                return ext4_write_dquot(dquot);
5864        } else {
5865                return dquot_mark_dquot_dirty(dquot);
5866        }
5867}
5868
5869static int ext4_write_info(struct super_block *sb, int type)
5870{
5871        int ret, err;
5872        handle_t *handle;
5873
5874        /* Data block + inode block */
5875        handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2);
5876        if (IS_ERR(handle))
5877                return PTR_ERR(handle);
5878        ret = dquot_commit_info(sb, type);
5879        err = ext4_journal_stop(handle);
5880        if (!ret)
5881                ret = err;
5882        return ret;
5883}
5884
5885/*
5886 * Turn on quotas during mount time - we need to find
5887 * the quota file and such...
5888 */
5889static int ext4_quota_on_mount(struct super_block *sb, int type)
5890{
5891        return dquot_quota_on_mount(sb, get_qf_name(sb, EXT4_SB(sb), type),
5892                                        EXT4_SB(sb)->s_jquota_fmt, type);
5893}
5894
5895static void lockdep_set_quota_inode(struct inode *inode, int subclass)
5896{
5897        struct ext4_inode_info *ei = EXT4_I(inode);
5898
5899        /* The first argument of lockdep_set_subclass has to be
5900         * *exactly* the same as the argument to init_rwsem() --- in
5901         * this case, in init_once() --- or lockdep gets unhappy
5902         * because the name of the lock is set using the
5903         * stringification of the argument to init_rwsem().
5904         */
5905        (void) ei;      /* shut up clang warning if !CONFIG_LOCKDEP */
5906        lockdep_set_subclass(&ei->i_data_sem, subclass);
5907}
5908
5909/*
5910 * Standard function to be called on quota_on
5911 */
5912static int ext4_quota_on(struct super_block *sb, int type, int format_id,
5913                         const struct path *path)
5914{
5915        int err;
5916
5917        if (!test_opt(sb, QUOTA))
5918                return -EINVAL;
5919
5920        /* Quotafile not on the same filesystem? */
5921        if (path->dentry->d_sb != sb)
5922                return -EXDEV;
5923        /* Journaling quota? */
5924        if (EXT4_SB(sb)->s_qf_names[type]) {
5925                /* Quotafile not in fs root? */
5926                if (path->dentry->d_parent != sb->s_root)
5927                        ext4_msg(sb, KERN_WARNING,
5928                                "Quota file not on filesystem root. "
5929                                "Journaled quota will not work");
5930                sb_dqopt(sb)->flags |= DQUOT_NOLIST_DIRTY;
5931        } else {
5932                /*
5933                 * Clear the flag just in case mount options changed since
5934                 * last time.
5935                 */
5936                sb_dqopt(sb)->flags &= ~DQUOT_NOLIST_DIRTY;
5937        }
5938
5939        /*
5940         * When we journal data on quota file, we have to flush journal to see
5941         * all updates to the file when we bypass pagecache...
5942         */
5943        if (EXT4_SB(sb)->s_journal &&
5944            ext4_should_journal_data(d_inode(path->dentry))) {
5945                /*
5946                 * We don't need to lock updates but journal_flush() could
5947                 * otherwise be livelocked...
5948                 */
5949                jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
5950                err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
5951                jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
5952                if (err)
5953                        return err;
5954        }
5955
5956        lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
5957        err = dquot_quota_on(sb, type, format_id, path);
5958        if (err) {
5959                lockdep_set_quota_inode(path->dentry->d_inode,
5960                                             I_DATA_SEM_NORMAL);
5961        } else {
5962                struct inode *inode = d_inode(path->dentry);
5963                handle_t *handle;
5964
5965                /*
5966                 * Set inode flags to prevent userspace from messing with quota
5967                 * files. If this fails, we return success anyway since quotas
5968                 * are already enabled and this is not a hard failure.
5969                 */
5970                inode_lock(inode);
5971                handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
5972                if (IS_ERR(handle))
5973                        goto unlock_inode;
5974                EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL;
5975                inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
5976                                S_NOATIME | S_IMMUTABLE);
5977                err = ext4_mark_inode_dirty(handle, inode);
5978                ext4_journal_stop(handle);
5979        unlock_inode:
5980                inode_unlock(inode);
5981        }
5982        return err;
5983}
5984
5985static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
5986                             unsigned int flags)
5987{
5988        int err;
5989        struct inode *qf_inode;
5990        unsigned long qf_inums[EXT4_MAXQUOTAS] = {
5991                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
5992                le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
5993                le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
5994        };
5995
5996        BUG_ON(!ext4_has_feature_quota(sb));
5997
5998        if (!qf_inums[type])
5999                return -EPERM;
6000
6001        qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
6002        if (IS_ERR(qf_inode)) {
6003                ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
6004                return PTR_ERR(qf_inode);
6005        }
6006
6007        /* Don't account quota for quota files to avoid recursion */
6008        qf_inode->i_flags |= S_NOQUOTA;
6009        lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
6010        err = dquot_load_quota_inode(qf_inode, type, format_id, flags);
6011        if (err)
6012                lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
6013        iput(qf_inode);
6014
6015        return err;
6016}
6017
6018/* Enable usage tracking for all quota types. */
6019static int ext4_enable_quotas(struct super_block *sb)
6020{
6021        int type, err = 0;
6022        unsigned long qf_inums[EXT4_MAXQUOTAS] = {
6023                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
6024                le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
6025                le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
6026        };
6027        bool quota_mopt[EXT4_MAXQUOTAS] = {
6028                test_opt(sb, USRQUOTA),
6029                test_opt(sb, GRPQUOTA),
6030                test_opt(sb, PRJQUOTA),
6031        };
6032
6033        sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
6034        for (type = 0; type < EXT4_MAXQUOTAS; type++) {
6035                if (qf_inums[type]) {
6036                        err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
6037                                DQUOT_USAGE_ENABLED |
6038                                (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
6039                        if (err) {
6040                                ext4_warning(sb,
6041                                        "Failed to enable quota tracking "
6042                                        "(type=%d, err=%d). Please run "
6043                                        "e2fsck to fix.", type, err);
6044                                for (type--; type >= 0; type--)
6045                                        dquot_quota_off(sb, type);
6046
6047                                return err;
6048                        }
6049                }
6050        }
6051        return 0;
6052}
6053
6054static int ext4_quota_off(struct super_block *sb, int type)
6055{
6056        struct inode *inode = sb_dqopt(sb)->files[type];
6057        handle_t *handle;
6058        int err;
6059
6060        /* Force all delayed allocation blocks to be allocated.
6061         * Caller already holds s_umount sem */
6062        if (test_opt(sb, DELALLOC))
6063                sync_filesystem(sb);
6064
6065        if (!inode || !igrab(inode))
6066                goto out;
6067
6068        err = dquot_quota_off(sb, type);
6069        if (err || ext4_has_feature_quota(sb))
6070                goto out_put;
6071
6072        inode_lock(inode);
6073        /*
6074         * Update modification times of quota files when userspace can
6075         * start looking at them. If we fail, we return success anyway since
6076         * this is not a hard failure and quotas are already disabled.
6077         */
6078        handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
6079        if (IS_ERR(handle)) {
6080                err = PTR_ERR(handle);
6081                goto out_unlock;
6082        }
6083        EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
6084        inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
6085        inode->i_mtime = inode->i_ctime = current_time(inode);
6086        err = ext4_mark_inode_dirty(handle, inode);
6087        ext4_journal_stop(handle);
6088out_unlock:
6089        inode_unlock(inode);
6090out_put:
6091        lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
6092        iput(inode);
6093        return err;
6094out:
6095        return dquot_quota_off(sb, type);
6096}
6097
6098/* Read data from quotafile - avoid pagecache and such because we cannot afford
6099 * acquiring the locks... As quota files are never truncated and quota code
6100 * itself serializes the operations (and no one else should touch the files)
6101 * we don't have to be afraid of races */
6102static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
6103                               size_t len, loff_t off)
6104{
6105        struct inode *inode = sb_dqopt(sb)->files[type];
6106        ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
6107        int offset = off & (sb->s_blocksize - 1);
6108        int tocopy;
6109        size_t toread;
6110        struct buffer_head *bh;
6111        loff_t i_size = i_size_read(inode);
6112
6113        if (off > i_size)
6114                return 0;
6115        if (off+len > i_size)
6116                len = i_size-off;
6117        toread = len;
6118        while (toread > 0) {
6119                tocopy = sb->s_blocksize - offset < toread ?
6120                                sb->s_blocksize - offset : toread;
6121                bh = ext4_bread(NULL, inode, blk, 0);
6122                if (IS_ERR(bh))
6123                        return PTR_ERR(bh);
6124                if (!bh)        /* A hole? */
6125                        memset(data, 0, tocopy);
6126                else
6127                        memcpy(data, bh->b_data+offset, tocopy);
6128                brelse(bh);
6129                offset = 0;
6130                toread -= tocopy;
6131                data += tocopy;
6132                blk++;
6133        }
6134        return len;
6135}
6136
6137/* Write to quotafile (we know the transaction is already started and has
6138 * enough credits) */
6139static ssize_t ext4_quota_write(struct super_block *sb, int type,
6140                                const char *data, size_t len, loff_t off)
6141{
6142        struct inode *inode = sb_dqopt(sb)->files[type];
6143        ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
6144        int err = 0, err2 = 0, offset = off & (sb->s_blocksize - 1);
6145        int retries = 0;
6146        struct buffer_head *bh;
6147        handle_t *handle = journal_current_handle();
6148
6149        if (EXT4_SB(sb)->s_journal && !handle) {
6150                ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
6151                        " cancelled because transaction is not started",
6152                        (unsigned long long)off, (unsigned long long)len);
6153                return -EIO;
6154        }
6155        /*
6156         * Since we account only one data block in transaction credits,
6157         * then it is impossible to cross a block boundary.
6158         */
6159        if (sb->s_blocksize - offset < len) {
6160                ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
6161                        " cancelled because not block aligned",
6162                        (unsigned long long)off, (unsigned long long)len);
6163                return -EIO;
6164        }
6165
6166        do {
6167                bh = ext4_bread(handle, inode, blk,
6168                                EXT4_GET_BLOCKS_CREATE |
6169                                EXT4_GET_BLOCKS_METADATA_NOFAIL);
6170        } while (PTR_ERR(bh) == -ENOSPC &&
6171                 ext4_should_retry_alloc(inode->i_sb, &retries));
6172        if (IS_ERR(bh))
6173                return PTR_ERR(bh);
6174        if (!bh)
6175                goto out;
6176        BUFFER_TRACE(bh, "get write access");
6177        err = ext4_journal_get_write_access(handle, bh);
6178        if (err) {
6179                brelse(bh);
6180                return err;
6181        }
6182        lock_buffer(bh);
6183        memcpy(bh->b_data+offset, data, len);
6184        flush_dcache_page(bh->b_page);
6185        unlock_buffer(bh);
6186        err = ext4_handle_dirty_metadata(handle, NULL, bh);
6187        brelse(bh);
6188out:
6189        if (inode->i_size < off + len) {
6190                i_size_write(inode, off + len);
6191                EXT4_I(inode)->i_disksize = inode->i_size;
6192                err2 = ext4_mark_inode_dirty(handle, inode);
6193                if (unlikely(err2 && !err))
6194                        err = err2;
6195        }
6196        return err ? err : len;
6197}
6198#endif
6199
6200static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
6201                       const char *dev_name, void *data)
6202{
6203        return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
6204}
6205
6206#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
6207static inline void register_as_ext2(void)
6208{
6209        int err = register_filesystem(&ext2_fs_type);
6210        if (err)
6211                printk(KERN_WARNING
6212                       "EXT4-fs: Unable to register as ext2 (%d)\n", err);
6213}
6214
6215static inline void unregister_as_ext2(void)
6216{
6217        unregister_filesystem(&ext2_fs_type);
6218}
6219
6220static inline int ext2_feature_set_ok(struct super_block *sb)
6221{
6222        if (ext4_has_unknown_ext2_incompat_features(sb))
6223                return 0;
6224        if (sb_rdonly(sb))
6225                return 1;
6226        if (ext4_has_unknown_ext2_ro_compat_features(sb))
6227                return 0;
6228        return 1;
6229}
6230#else
6231static inline void register_as_ext2(void) { }
6232static inline void unregister_as_ext2(void) { }
6233static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
6234#endif
6235
6236static inline void register_as_ext3(void)
6237{
6238        int err = register_filesystem(&ext3_fs_type);
6239        if (err)
6240                printk(KERN_WARNING
6241                       "EXT4-fs: Unable to register as ext3 (%d)\n", err);
6242}
6243
6244static inline void unregister_as_ext3(void)
6245{
6246        unregister_filesystem(&ext3_fs_type);
6247}
6248
6249static inline int ext3_feature_set_ok(struct super_block *sb)
6250{
6251        if (ext4_has_unknown_ext3_incompat_features(sb))
6252                return 0;
6253        if (!ext4_has_feature_journal(sb))
6254                return 0;
6255        if (sb_rdonly(sb))
6256                return 1;
6257        if (ext4_has_unknown_ext3_ro_compat_features(sb))
6258                return 0;
6259        return 1;
6260}
6261
6262static struct file_system_type ext4_fs_type = {
6263        .owner          = THIS_MODULE,
6264        .name           = "ext4",
6265        .mount          = ext4_mount,
6266        .kill_sb        = kill_block_super,
6267        .fs_flags       = FS_REQUIRES_DEV,
6268};
6269MODULE_ALIAS_FS("ext4");
6270
6271/* Shared across all ext4 file systems */
6272wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
6273
6274static int __init ext4_init_fs(void)
6275{
6276        int i, err;
6277
6278        ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
6279        ext4_li_info = NULL;
6280        mutex_init(&ext4_li_mtx);
6281
6282        /* Build-time check for flags consistency */
6283        ext4_check_flag_values();
6284
6285        for (i = 0; i < EXT4_WQ_HASH_SZ; i++)
6286                init_waitqueue_head(&ext4__ioend_wq[i]);
6287
6288        err = ext4_init_es();
6289        if (err)
6290                return err;
6291
6292        err = ext4_init_pending();
6293        if (err)
6294                goto out7;
6295
6296        err = ext4_init_post_read_processing();
6297        if (err)
6298                goto out6;
6299
6300        err = ext4_init_pageio();
6301        if (err)
6302                goto out5;
6303
6304        err = ext4_init_system_zone();
6305        if (err)
6306                goto out4;
6307
6308        err = ext4_init_sysfs();
6309        if (err)
6310                goto out3;
6311
6312        err = ext4_init_mballoc();
6313        if (err)
6314                goto out2;
6315        err = init_inodecache();
6316        if (err)
6317                goto out1;
6318        register_as_ext3();
6319        register_as_ext2();
6320        err = register_filesystem(&ext4_fs_type);
6321        if (err)
6322                goto out;
6323
6324        return 0;
6325out:
6326        unregister_as_ext2();
6327        unregister_as_ext3();
6328        destroy_inodecache();
6329out1:
6330        ext4_exit_mballoc();
6331out2:
6332        ext4_exit_sysfs();
6333out3:
6334        ext4_exit_system_zone();
6335out4:
6336        ext4_exit_pageio();
6337out5:
6338        ext4_exit_post_read_processing();
6339out6:
6340        ext4_exit_pending();
6341out7:
6342        ext4_exit_es();
6343
6344        return err;
6345}
6346
6347static void __exit ext4_exit_fs(void)
6348{
6349        ext4_destroy_lazyinit_thread();
6350        unregister_as_ext2();
6351        unregister_as_ext3();
6352        unregister_filesystem(&ext4_fs_type);
6353        destroy_inodecache();
6354        ext4_exit_mballoc();
6355        ext4_exit_sysfs();
6356        ext4_exit_system_zone();
6357        ext4_exit_pageio();
6358        ext4_exit_post_read_processing();
6359        ext4_exit_es();
6360        ext4_exit_pending();
6361}
6362
6363MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
6364MODULE_DESCRIPTION("Fourth Extended Filesystem");
6365MODULE_LICENSE("GPL");
6366MODULE_SOFTDEP("pre: crc32c");
6367module_init(ext4_init_fs)
6368module_exit(ext4_exit_fs)
6369