linux/fs/ext4/super.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  linux/fs/ext4/super.c
   4 *
   5 * Copyright (C) 1992, 1993, 1994, 1995
   6 * Remy Card (card@masi.ibp.fr)
   7 * Laboratoire MASI - Institut Blaise Pascal
   8 * Universite Pierre et Marie Curie (Paris VI)
   9 *
  10 *  from
  11 *
  12 *  linux/fs/minix/inode.c
  13 *
  14 *  Copyright (C) 1991, 1992  Linus Torvalds
  15 *
  16 *  Big-endian to little-endian byte-swapping/bitmaps by
  17 *        David S. Miller (davem@caip.rutgers.edu), 1995
  18 */
  19
  20#include <linux/module.h>
  21#include <linux/string.h>
  22#include <linux/fs.h>
  23#include <linux/time.h>
  24#include <linux/vmalloc.h>
  25#include <linux/slab.h>
  26#include <linux/init.h>
  27#include <linux/blkdev.h>
  28#include <linux/backing-dev.h>
  29#include <linux/parser.h>
  30#include <linux/buffer_head.h>
  31#include <linux/exportfs.h>
  32#include <linux/vfs.h>
  33#include <linux/random.h>
  34#include <linux/mount.h>
  35#include <linux/namei.h>
  36#include <linux/quotaops.h>
  37#include <linux/seq_file.h>
  38#include <linux/ctype.h>
  39#include <linux/log2.h>
  40#include <linux/crc16.h>
  41#include <linux/dax.h>
  42#include <linux/cleancache.h>
  43#include <linux/uaccess.h>
  44#include <linux/iversion.h>
  45#include <linux/unicode.h>
  46
  47#include <linux/kthread.h>
  48#include <linux/freezer.h>
  49
  50#include "ext4.h"
  51#include "ext4_extents.h"       /* Needed for trace points definition */
  52#include "ext4_jbd2.h"
  53#include "xattr.h"
  54#include "acl.h"
  55#include "mballoc.h"
  56#include "fsmap.h"
  57
  58#define CREATE_TRACE_POINTS
  59#include <trace/events/ext4.h>
  60
  61static struct ext4_lazy_init *ext4_li_info;
  62static struct mutex ext4_li_mtx;
  63static struct ratelimit_state ext4_mount_msg_ratelimit;
  64
  65static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
  66                             unsigned long journal_devnum);
  67static int ext4_show_options(struct seq_file *seq, struct dentry *root);
  68static int ext4_commit_super(struct super_block *sb, int sync);
  69static void ext4_mark_recovery_complete(struct super_block *sb,
  70                                        struct ext4_super_block *es);
  71static void ext4_clear_journal_err(struct super_block *sb,
  72                                   struct ext4_super_block *es);
  73static int ext4_sync_fs(struct super_block *sb, int wait);
  74static int ext4_remount(struct super_block *sb, int *flags, char *data);
  75static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
  76static int ext4_unfreeze(struct super_block *sb);
  77static int ext4_freeze(struct super_block *sb);
  78static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
  79                       const char *dev_name, void *data);
  80static inline int ext2_feature_set_ok(struct super_block *sb);
  81static inline int ext3_feature_set_ok(struct super_block *sb);
  82static int ext4_feature_set_ok(struct super_block *sb, int readonly);
  83static void ext4_destroy_lazyinit_thread(void);
  84static void ext4_unregister_li_request(struct super_block *sb);
  85static void ext4_clear_request_list(void);
  86static struct inode *ext4_get_journal_inode(struct super_block *sb,
  87                                            unsigned int journal_inum);
  88
  89/*
  90 * Lock ordering
  91 *
  92 * Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
  93 * i_mmap_rwsem (inode->i_mmap_rwsem)!
  94 *
  95 * page fault path:
  96 * mmap_sem -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
  97 *   page lock -> i_data_sem (rw)
  98 *
  99 * buffered write path:
 100 * sb_start_write -> i_mutex -> mmap_sem
 101 * sb_start_write -> i_mutex -> transaction start -> page lock ->
 102 *   i_data_sem (rw)
 103 *
 104 * truncate:
 105 * sb_start_write -> i_mutex -> i_mmap_sem (w) -> i_mmap_rwsem (w) -> page lock
 106 * sb_start_write -> i_mutex -> i_mmap_sem (w) -> transaction start ->
 107 *   i_data_sem (rw)
 108 *
 109 * direct IO:
 110 * sb_start_write -> i_mutex -> mmap_sem
 111 * sb_start_write -> i_mutex -> transaction start -> i_data_sem (rw)
 112 *
 113 * writepages:
 114 * transaction start -> page lock(s) -> i_data_sem (rw)
 115 */
 116
 117#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
 118static struct file_system_type ext2_fs_type = {
 119        .owner          = THIS_MODULE,
 120        .name           = "ext2",
 121        .mount          = ext4_mount,
 122        .kill_sb        = kill_block_super,
 123        .fs_flags       = FS_REQUIRES_DEV,
 124};
 125MODULE_ALIAS_FS("ext2");
 126MODULE_ALIAS("ext2");
 127#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
 128#else
 129#define IS_EXT2_SB(sb) (0)
 130#endif
 131
 132
 133static struct file_system_type ext3_fs_type = {
 134        .owner          = THIS_MODULE,
 135        .name           = "ext3",
 136        .mount          = ext4_mount,
 137        .kill_sb        = kill_block_super,
 138        .fs_flags       = FS_REQUIRES_DEV,
 139};
 140MODULE_ALIAS_FS("ext3");
 141MODULE_ALIAS("ext3");
 142#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
 143
 144/*
 145 * This works like sb_bread() except it uses ERR_PTR for error
 146 * returns.  Currently with sb_bread it's impossible to distinguish
 147 * between ENOMEM and EIO situations (since both result in a NULL
 148 * return.
 149 */
 150struct buffer_head *
 151ext4_sb_bread(struct super_block *sb, sector_t block, int op_flags)
 152{
 153        struct buffer_head *bh = sb_getblk(sb, block);
 154
 155        if (bh == NULL)
 156                return ERR_PTR(-ENOMEM);
 157        if (ext4_buffer_uptodate(bh))
 158                return bh;
 159        ll_rw_block(REQ_OP_READ, REQ_META | op_flags, 1, &bh);
 160        wait_on_buffer(bh);
 161        if (buffer_uptodate(bh))
 162                return bh;
 163        put_bh(bh);
 164        return ERR_PTR(-EIO);
 165}
 166
 167static int ext4_verify_csum_type(struct super_block *sb,
 168                                 struct ext4_super_block *es)
 169{
 170        if (!ext4_has_feature_metadata_csum(sb))
 171                return 1;
 172
 173        return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
 174}
 175
 176static __le32 ext4_superblock_csum(struct super_block *sb,
 177                                   struct ext4_super_block *es)
 178{
 179        struct ext4_sb_info *sbi = EXT4_SB(sb);
 180        int offset = offsetof(struct ext4_super_block, s_checksum);
 181        __u32 csum;
 182
 183        csum = ext4_chksum(sbi, ~0, (char *)es, offset);
 184
 185        return cpu_to_le32(csum);
 186}
 187
 188static int ext4_superblock_csum_verify(struct super_block *sb,
 189                                       struct ext4_super_block *es)
 190{
 191        if (!ext4_has_metadata_csum(sb))
 192                return 1;
 193
 194        return es->s_checksum == ext4_superblock_csum(sb, es);
 195}
 196
 197void ext4_superblock_csum_set(struct super_block *sb)
 198{
 199        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 200
 201        if (!ext4_has_metadata_csum(sb))
 202                return;
 203
 204        es->s_checksum = ext4_superblock_csum(sb, es);
 205}
 206
 207ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
 208                               struct ext4_group_desc *bg)
 209{
 210        return le32_to_cpu(bg->bg_block_bitmap_lo) |
 211                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 212                 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
 213}
 214
 215ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
 216                               struct ext4_group_desc *bg)
 217{
 218        return le32_to_cpu(bg->bg_inode_bitmap_lo) |
 219                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 220                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
 221}
 222
 223ext4_fsblk_t ext4_inode_table(struct super_block *sb,
 224                              struct ext4_group_desc *bg)
 225{
 226        return le32_to_cpu(bg->bg_inode_table_lo) |
 227                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 228                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
 229}
 230
 231__u32 ext4_free_group_clusters(struct super_block *sb,
 232                               struct ext4_group_desc *bg)
 233{
 234        return le16_to_cpu(bg->bg_free_blocks_count_lo) |
 235                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 236                 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
 237}
 238
 239__u32 ext4_free_inodes_count(struct super_block *sb,
 240                              struct ext4_group_desc *bg)
 241{
 242        return le16_to_cpu(bg->bg_free_inodes_count_lo) |
 243                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 244                 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
 245}
 246
 247__u32 ext4_used_dirs_count(struct super_block *sb,
 248                              struct ext4_group_desc *bg)
 249{
 250        return le16_to_cpu(bg->bg_used_dirs_count_lo) |
 251                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 252                 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
 253}
 254
 255__u32 ext4_itable_unused_count(struct super_block *sb,
 256                              struct ext4_group_desc *bg)
 257{
 258        return le16_to_cpu(bg->bg_itable_unused_lo) |
 259                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 260                 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
 261}
 262
 263void ext4_block_bitmap_set(struct super_block *sb,
 264                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 265{
 266        bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
 267        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 268                bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
 269}
 270
 271void ext4_inode_bitmap_set(struct super_block *sb,
 272                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 273{
 274        bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
 275        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 276                bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
 277}
 278
 279void ext4_inode_table_set(struct super_block *sb,
 280                          struct ext4_group_desc *bg, ext4_fsblk_t blk)
 281{
 282        bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
 283        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 284                bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
 285}
 286
 287void ext4_free_group_clusters_set(struct super_block *sb,
 288                                  struct ext4_group_desc *bg, __u32 count)
 289{
 290        bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
 291        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 292                bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
 293}
 294
 295void ext4_free_inodes_set(struct super_block *sb,
 296                          struct ext4_group_desc *bg, __u32 count)
 297{
 298        bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
 299        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 300                bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
 301}
 302
 303void ext4_used_dirs_set(struct super_block *sb,
 304                          struct ext4_group_desc *bg, __u32 count)
 305{
 306        bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
 307        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 308                bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
 309}
 310
 311void ext4_itable_unused_set(struct super_block *sb,
 312                          struct ext4_group_desc *bg, __u32 count)
 313{
 314        bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
 315        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 316                bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
 317}
 318
 319static void __ext4_update_tstamp(__le32 *lo, __u8 *hi)
 320{
 321        time64_t now = ktime_get_real_seconds();
 322
 323        now = clamp_val(now, 0, (1ull << 40) - 1);
 324
 325        *lo = cpu_to_le32(lower_32_bits(now));
 326        *hi = upper_32_bits(now);
 327}
 328
 329static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
 330{
 331        return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo);
 332}
 333#define ext4_update_tstamp(es, tstamp) \
 334        __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
 335#define ext4_get_tstamp(es, tstamp) \
 336        __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
 337
 338static void __save_error_info(struct super_block *sb, const char *func,
 339                            unsigned int line)
 340{
 341        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 342
 343        EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 344        if (bdev_read_only(sb->s_bdev))
 345                return;
 346        es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
 347        ext4_update_tstamp(es, s_last_error_time);
 348        strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
 349        es->s_last_error_line = cpu_to_le32(line);
 350        if (es->s_last_error_errcode == 0)
 351                es->s_last_error_errcode = EXT4_ERR_EFSCORRUPTED;
 352        if (!es->s_first_error_time) {
 353                es->s_first_error_time = es->s_last_error_time;
 354                es->s_first_error_time_hi = es->s_last_error_time_hi;
 355                strncpy(es->s_first_error_func, func,
 356                        sizeof(es->s_first_error_func));
 357                es->s_first_error_line = cpu_to_le32(line);
 358                es->s_first_error_ino = es->s_last_error_ino;
 359                es->s_first_error_block = es->s_last_error_block;
 360                es->s_first_error_errcode = es->s_last_error_errcode;
 361        }
 362        /*
 363         * Start the daily error reporting function if it hasn't been
 364         * started already
 365         */
 366        if (!es->s_error_count)
 367                mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
 368        le32_add_cpu(&es->s_error_count, 1);
 369}
 370
 371static void save_error_info(struct super_block *sb, const char *func,
 372                            unsigned int line)
 373{
 374        __save_error_info(sb, func, line);
 375        ext4_commit_super(sb, 1);
 376}
 377
 378/*
 379 * The del_gendisk() function uninitializes the disk-specific data
 380 * structures, including the bdi structure, without telling anyone
 381 * else.  Once this happens, any attempt to call mark_buffer_dirty()
 382 * (for example, by ext4_commit_super), will cause a kernel OOPS.
 383 * This is a kludge to prevent these oops until we can put in a proper
 384 * hook in del_gendisk() to inform the VFS and file system layers.
 385 */
 386static int block_device_ejected(struct super_block *sb)
 387{
 388        struct inode *bd_inode = sb->s_bdev->bd_inode;
 389        struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
 390
 391        return bdi->dev == NULL;
 392}
 393
 394static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
 395{
 396        struct super_block              *sb = journal->j_private;
 397        struct ext4_sb_info             *sbi = EXT4_SB(sb);
 398        int                             error = is_journal_aborted(journal);
 399        struct ext4_journal_cb_entry    *jce;
 400
 401        BUG_ON(txn->t_state == T_FINISHED);
 402
 403        ext4_process_freed_data(sb, txn->t_tid);
 404
 405        spin_lock(&sbi->s_md_lock);
 406        while (!list_empty(&txn->t_private_list)) {
 407                jce = list_entry(txn->t_private_list.next,
 408                                 struct ext4_journal_cb_entry, jce_list);
 409                list_del_init(&jce->jce_list);
 410                spin_unlock(&sbi->s_md_lock);
 411                jce->jce_func(sb, jce, error);
 412                spin_lock(&sbi->s_md_lock);
 413        }
 414        spin_unlock(&sbi->s_md_lock);
 415}
 416
 417static bool system_going_down(void)
 418{
 419        return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
 420                || system_state == SYSTEM_RESTART;
 421}
 422
 423/* Deal with the reporting of failure conditions on a filesystem such as
 424 * inconsistencies detected or read IO failures.
 425 *
 426 * On ext2, we can store the error state of the filesystem in the
 427 * superblock.  That is not possible on ext4, because we may have other
 428 * write ordering constraints on the superblock which prevent us from
 429 * writing it out straight away; and given that the journal is about to
 430 * be aborted, we can't rely on the current, or future, transactions to
 431 * write out the superblock safely.
 432 *
 433 * We'll just use the jbd2_journal_abort() error code to record an error in
 434 * the journal instead.  On recovery, the journal will complain about
 435 * that error until we've noted it down and cleared it.
 436 */
 437
 438static void ext4_handle_error(struct super_block *sb)
 439{
 440        if (test_opt(sb, WARN_ON_ERROR))
 441                WARN_ON_ONCE(1);
 442
 443        if (sb_rdonly(sb))
 444                return;
 445
 446        if (!test_opt(sb, ERRORS_CONT)) {
 447                journal_t *journal = EXT4_SB(sb)->s_journal;
 448
 449                EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
 450                if (journal)
 451                        jbd2_journal_abort(journal, -EIO);
 452        }
 453        /*
 454         * We force ERRORS_RO behavior when system is rebooting. Otherwise we
 455         * could panic during 'reboot -f' as the underlying device got already
 456         * disabled.
 457         */
 458        if (test_opt(sb, ERRORS_RO) || system_going_down()) {
 459                ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 460                /*
 461                 * Make sure updated value of ->s_mount_flags will be visible
 462                 * before ->s_flags update
 463                 */
 464                smp_wmb();
 465                sb->s_flags |= SB_RDONLY;
 466        } else if (test_opt(sb, ERRORS_PANIC)) {
 467                if (EXT4_SB(sb)->s_journal &&
 468                  !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
 469                        return;
 470                panic("EXT4-fs (device %s): panic forced after error\n",
 471                        sb->s_id);
 472        }
 473}
 474
 475#define ext4_error_ratelimit(sb)                                        \
 476                ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state),     \
 477                             "EXT4-fs error")
 478
 479void __ext4_error(struct super_block *sb, const char *function,
 480                  unsigned int line, const char *fmt, ...)
 481{
 482        struct va_format vaf;
 483        va_list args;
 484
 485        if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
 486                return;
 487
 488        trace_ext4_error(sb, function, line);
 489        if (ext4_error_ratelimit(sb)) {
 490                va_start(args, fmt);
 491                vaf.fmt = fmt;
 492                vaf.va = &args;
 493                printk(KERN_CRIT
 494                       "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
 495                       sb->s_id, function, line, current->comm, &vaf);
 496                va_end(args);
 497        }
 498        save_error_info(sb, function, line);
 499        ext4_handle_error(sb);
 500}
 501
 502void __ext4_error_inode(struct inode *inode, const char *function,
 503                        unsigned int line, ext4_fsblk_t block,
 504                        const char *fmt, ...)
 505{
 506        va_list args;
 507        struct va_format vaf;
 508        struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
 509
 510        if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
 511                return;
 512
 513        trace_ext4_error(inode->i_sb, function, line);
 514        es->s_last_error_ino = cpu_to_le32(inode->i_ino);
 515        es->s_last_error_block = cpu_to_le64(block);
 516        if (ext4_error_ratelimit(inode->i_sb)) {
 517                va_start(args, fmt);
 518                vaf.fmt = fmt;
 519                vaf.va = &args;
 520                if (block)
 521                        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
 522                               "inode #%lu: block %llu: comm %s: %pV\n",
 523                               inode->i_sb->s_id, function, line, inode->i_ino,
 524                               block, current->comm, &vaf);
 525                else
 526                        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
 527                               "inode #%lu: comm %s: %pV\n",
 528                               inode->i_sb->s_id, function, line, inode->i_ino,
 529                               current->comm, &vaf);
 530                va_end(args);
 531        }
 532        save_error_info(inode->i_sb, function, line);
 533        ext4_handle_error(inode->i_sb);
 534}
 535
 536void __ext4_error_file(struct file *file, const char *function,
 537                       unsigned int line, ext4_fsblk_t block,
 538                       const char *fmt, ...)
 539{
 540        va_list args;
 541        struct va_format vaf;
 542        struct ext4_super_block *es;
 543        struct inode *inode = file_inode(file);
 544        char pathname[80], *path;
 545
 546        if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
 547                return;
 548
 549        trace_ext4_error(inode->i_sb, function, line);
 550        es = EXT4_SB(inode->i_sb)->s_es;
 551        es->s_last_error_ino = cpu_to_le32(inode->i_ino);
 552        if (ext4_error_ratelimit(inode->i_sb)) {
 553                path = file_path(file, pathname, sizeof(pathname));
 554                if (IS_ERR(path))
 555                        path = "(unknown)";
 556                va_start(args, fmt);
 557                vaf.fmt = fmt;
 558                vaf.va = &args;
 559                if (block)
 560                        printk(KERN_CRIT
 561                               "EXT4-fs error (device %s): %s:%d: inode #%lu: "
 562                               "block %llu: comm %s: path %s: %pV\n",
 563                               inode->i_sb->s_id, function, line, inode->i_ino,
 564                               block, current->comm, path, &vaf);
 565                else
 566                        printk(KERN_CRIT
 567                               "EXT4-fs error (device %s): %s:%d: inode #%lu: "
 568                               "comm %s: path %s: %pV\n",
 569                               inode->i_sb->s_id, function, line, inode->i_ino,
 570                               current->comm, path, &vaf);
 571                va_end(args);
 572        }
 573        save_error_info(inode->i_sb, function, line);
 574        ext4_handle_error(inode->i_sb);
 575}
 576
 577const char *ext4_decode_error(struct super_block *sb, int errno,
 578                              char nbuf[16])
 579{
 580        char *errstr = NULL;
 581
 582        switch (errno) {
 583        case -EFSCORRUPTED:
 584                errstr = "Corrupt filesystem";
 585                break;
 586        case -EFSBADCRC:
 587                errstr = "Filesystem failed CRC";
 588                break;
 589        case -EIO:
 590                errstr = "IO failure";
 591                break;
 592        case -ENOMEM:
 593                errstr = "Out of memory";
 594                break;
 595        case -EROFS:
 596                if (!sb || (EXT4_SB(sb)->s_journal &&
 597                            EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
 598                        errstr = "Journal has aborted";
 599                else
 600                        errstr = "Readonly filesystem";
 601                break;
 602        default:
 603                /* If the caller passed in an extra buffer for unknown
 604                 * errors, textualise them now.  Else we just return
 605                 * NULL. */
 606                if (nbuf) {
 607                        /* Check for truncated error codes... */
 608                        if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
 609                                errstr = nbuf;
 610                }
 611                break;
 612        }
 613
 614        return errstr;
 615}
 616
 617void ext4_set_errno(struct super_block *sb, int err)
 618{
 619        if (err < 0)
 620                err = -err;
 621
 622        switch (err) {
 623        case EIO:
 624                err = EXT4_ERR_EIO;
 625                break;
 626        case ENOMEM:
 627                err = EXT4_ERR_ENOMEM;
 628                break;
 629        case EFSBADCRC:
 630                err = EXT4_ERR_EFSBADCRC;
 631                break;
 632        case EFSCORRUPTED:
 633                err = EXT4_ERR_EFSCORRUPTED;
 634                break;
 635        case ENOSPC:
 636                err = EXT4_ERR_ENOSPC;
 637                break;
 638        case ENOKEY:
 639                err = EXT4_ERR_ENOKEY;
 640                break;
 641        case EROFS:
 642                err = EXT4_ERR_EROFS;
 643                break;
 644        case EFBIG:
 645                err = EXT4_ERR_EFBIG;
 646                break;
 647        case EEXIST:
 648                err = EXT4_ERR_EEXIST;
 649                break;
 650        case ERANGE:
 651                err = EXT4_ERR_ERANGE;
 652                break;
 653        case EOVERFLOW:
 654                err = EXT4_ERR_EOVERFLOW;
 655                break;
 656        case EBUSY:
 657                err = EXT4_ERR_EBUSY;
 658                break;
 659        case ENOTDIR:
 660                err = EXT4_ERR_ENOTDIR;
 661                break;
 662        case ENOTEMPTY:
 663                err = EXT4_ERR_ENOTEMPTY;
 664                break;
 665        case ESHUTDOWN:
 666                err = EXT4_ERR_ESHUTDOWN;
 667                break;
 668        case EFAULT:
 669                err = EXT4_ERR_EFAULT;
 670                break;
 671        default:
 672                err = EXT4_ERR_UNKNOWN;
 673        }
 674        EXT4_SB(sb)->s_es->s_last_error_errcode = err;
 675}
 676
 677/* __ext4_std_error decodes expected errors from journaling functions
 678 * automatically and invokes the appropriate error response.  */
 679
 680void __ext4_std_error(struct super_block *sb, const char *function,
 681                      unsigned int line, int errno)
 682{
 683        char nbuf[16];
 684        const char *errstr;
 685
 686        if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
 687                return;
 688
 689        /* Special case: if the error is EROFS, and we're not already
 690         * inside a transaction, then there's really no point in logging
 691         * an error. */
 692        if (errno == -EROFS && journal_current_handle() == NULL && sb_rdonly(sb))
 693                return;
 694
 695        if (ext4_error_ratelimit(sb)) {
 696                errstr = ext4_decode_error(sb, errno, nbuf);
 697                printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
 698                       sb->s_id, function, line, errstr);
 699        }
 700
 701        ext4_set_errno(sb, -errno);
 702        save_error_info(sb, function, line);
 703        ext4_handle_error(sb);
 704}
 705
 706/*
 707 * ext4_abort is a much stronger failure handler than ext4_error.  The
 708 * abort function may be used to deal with unrecoverable failures such
 709 * as journal IO errors or ENOMEM at a critical moment in log management.
 710 *
 711 * We unconditionally force the filesystem into an ABORT|READONLY state,
 712 * unless the error response on the fs has been set to panic in which
 713 * case we take the easy way out and panic immediately.
 714 */
 715
 716void __ext4_abort(struct super_block *sb, const char *function,
 717                unsigned int line, const char *fmt, ...)
 718{
 719        struct va_format vaf;
 720        va_list args;
 721
 722        if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
 723                return;
 724
 725        save_error_info(sb, function, line);
 726        va_start(args, fmt);
 727        vaf.fmt = fmt;
 728        vaf.va = &args;
 729        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: %pV\n",
 730               sb->s_id, function, line, &vaf);
 731        va_end(args);
 732
 733        if (sb_rdonly(sb) == 0) {
 734                ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 735                EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
 736                /*
 737                 * Make sure updated value of ->s_mount_flags will be visible
 738                 * before ->s_flags update
 739                 */
 740                smp_wmb();
 741                sb->s_flags |= SB_RDONLY;
 742                if (EXT4_SB(sb)->s_journal)
 743                        jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 744                save_error_info(sb, function, line);
 745        }
 746        if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
 747                if (EXT4_SB(sb)->s_journal &&
 748                  !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
 749                        return;
 750                panic("EXT4-fs panic from previous error\n");
 751        }
 752}
 753
 754void __ext4_msg(struct super_block *sb,
 755                const char *prefix, const char *fmt, ...)
 756{
 757        struct va_format vaf;
 758        va_list args;
 759
 760        if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs"))
 761                return;
 762
 763        va_start(args, fmt);
 764        vaf.fmt = fmt;
 765        vaf.va = &args;
 766        printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
 767        va_end(args);
 768}
 769
 770#define ext4_warning_ratelimit(sb)                                      \
 771                ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), \
 772                             "EXT4-fs warning")
 773
 774void __ext4_warning(struct super_block *sb, const char *function,
 775                    unsigned int line, const char *fmt, ...)
 776{
 777        struct va_format vaf;
 778        va_list args;
 779
 780        if (!ext4_warning_ratelimit(sb))
 781                return;
 782
 783        va_start(args, fmt);
 784        vaf.fmt = fmt;
 785        vaf.va = &args;
 786        printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
 787               sb->s_id, function, line, &vaf);
 788        va_end(args);
 789}
 790
 791void __ext4_warning_inode(const struct inode *inode, const char *function,
 792                          unsigned int line, const char *fmt, ...)
 793{
 794        struct va_format vaf;
 795        va_list args;
 796
 797        if (!ext4_warning_ratelimit(inode->i_sb))
 798                return;
 799
 800        va_start(args, fmt);
 801        vaf.fmt = fmt;
 802        vaf.va = &args;
 803        printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
 804               "inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
 805               function, line, inode->i_ino, current->comm, &vaf);
 806        va_end(args);
 807}
 808
 809void __ext4_grp_locked_error(const char *function, unsigned int line,
 810                             struct super_block *sb, ext4_group_t grp,
 811                             unsigned long ino, ext4_fsblk_t block,
 812                             const char *fmt, ...)
 813__releases(bitlock)
 814__acquires(bitlock)
 815{
 816        struct va_format vaf;
 817        va_list args;
 818        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 819
 820        if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
 821                return;
 822
 823        trace_ext4_error(sb, function, line);
 824        es->s_last_error_ino = cpu_to_le32(ino);
 825        es->s_last_error_block = cpu_to_le64(block);
 826        __save_error_info(sb, function, line);
 827
 828        if (ext4_error_ratelimit(sb)) {
 829                va_start(args, fmt);
 830                vaf.fmt = fmt;
 831                vaf.va = &args;
 832                printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
 833                       sb->s_id, function, line, grp);
 834                if (ino)
 835                        printk(KERN_CONT "inode %lu: ", ino);
 836                if (block)
 837                        printk(KERN_CONT "block %llu:",
 838                               (unsigned long long) block);
 839                printk(KERN_CONT "%pV\n", &vaf);
 840                va_end(args);
 841        }
 842
 843        if (test_opt(sb, WARN_ON_ERROR))
 844                WARN_ON_ONCE(1);
 845
 846        if (test_opt(sb, ERRORS_CONT)) {
 847                ext4_commit_super(sb, 0);
 848                return;
 849        }
 850
 851        ext4_unlock_group(sb, grp);
 852        ext4_commit_super(sb, 1);
 853        ext4_handle_error(sb);
 854        /*
 855         * We only get here in the ERRORS_RO case; relocking the group
 856         * may be dangerous, but nothing bad will happen since the
 857         * filesystem will have already been marked read/only and the
 858         * journal has been aborted.  We return 1 as a hint to callers
 859         * who might what to use the return value from
 860         * ext4_grp_locked_error() to distinguish between the
 861         * ERRORS_CONT and ERRORS_RO case, and perhaps return more
 862         * aggressively from the ext4 function in question, with a
 863         * more appropriate error code.
 864         */
 865        ext4_lock_group(sb, grp);
 866        return;
 867}
 868
 869void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
 870                                     ext4_group_t group,
 871                                     unsigned int flags)
 872{
 873        struct ext4_sb_info *sbi = EXT4_SB(sb);
 874        struct ext4_group_info *grp = ext4_get_group_info(sb, group);
 875        struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
 876        int ret;
 877
 878        if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
 879                ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
 880                                            &grp->bb_state);
 881                if (!ret)
 882                        percpu_counter_sub(&sbi->s_freeclusters_counter,
 883                                           grp->bb_free);
 884        }
 885
 886        if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) {
 887                ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
 888                                            &grp->bb_state);
 889                if (!ret && gdp) {
 890                        int count;
 891
 892                        count = ext4_free_inodes_count(sb, gdp);
 893                        percpu_counter_sub(&sbi->s_freeinodes_counter,
 894                                           count);
 895                }
 896        }
 897}
 898
 899void ext4_update_dynamic_rev(struct super_block *sb)
 900{
 901        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 902
 903        if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
 904                return;
 905
 906        ext4_warning(sb,
 907                     "updating to rev %d because of new feature flag, "
 908                     "running e2fsck is recommended",
 909                     EXT4_DYNAMIC_REV);
 910
 911        es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
 912        es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
 913        es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
 914        /* leave es->s_feature_*compat flags alone */
 915        /* es->s_uuid will be set by e2fsck if empty */
 916
 917        /*
 918         * The rest of the superblock fields should be zero, and if not it
 919         * means they are likely already in use, so leave them alone.  We
 920         * can leave it up to e2fsck to clean up any inconsistencies there.
 921         */
 922}
 923
 924/*
 925 * Open the external journal device
 926 */
 927static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
 928{
 929        struct block_device *bdev;
 930        char b[BDEVNAME_SIZE];
 931
 932        bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
 933        if (IS_ERR(bdev))
 934                goto fail;
 935        return bdev;
 936
 937fail:
 938        ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
 939                        __bdevname(dev, b), PTR_ERR(bdev));
 940        return NULL;
 941}
 942
 943/*
 944 * Release the journal device
 945 */
 946static void ext4_blkdev_put(struct block_device *bdev)
 947{
 948        blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 949}
 950
 951static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
 952{
 953        struct block_device *bdev;
 954        bdev = sbi->journal_bdev;
 955        if (bdev) {
 956                ext4_blkdev_put(bdev);
 957                sbi->journal_bdev = NULL;
 958        }
 959}
 960
 961static inline struct inode *orphan_list_entry(struct list_head *l)
 962{
 963        return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
 964}
 965
 966static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
 967{
 968        struct list_head *l;
 969
 970        ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
 971                 le32_to_cpu(sbi->s_es->s_last_orphan));
 972
 973        printk(KERN_ERR "sb_info orphan list:\n");
 974        list_for_each(l, &sbi->s_orphan) {
 975                struct inode *inode = orphan_list_entry(l);
 976                printk(KERN_ERR "  "
 977                       "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
 978                       inode->i_sb->s_id, inode->i_ino, inode,
 979                       inode->i_mode, inode->i_nlink,
 980                       NEXT_ORPHAN(inode));
 981        }
 982}
 983
 984#ifdef CONFIG_QUOTA
 985static int ext4_quota_off(struct super_block *sb, int type);
 986
 987static inline void ext4_quota_off_umount(struct super_block *sb)
 988{
 989        int type;
 990
 991        /* Use our quota_off function to clear inode flags etc. */
 992        for (type = 0; type < EXT4_MAXQUOTAS; type++)
 993                ext4_quota_off(sb, type);
 994}
 995
 996/*
 997 * This is a helper function which is used in the mount/remount
 998 * codepaths (which holds s_umount) to fetch the quota file name.
 999 */
1000static inline char *get_qf_name(struct super_block *sb,
1001                                struct ext4_sb_info *sbi,
1002                                int type)
1003{
1004        return rcu_dereference_protected(sbi->s_qf_names[type],
1005                                         lockdep_is_held(&sb->s_umount));
1006}
1007#else
1008static inline void ext4_quota_off_umount(struct super_block *sb)
1009{
1010}
1011#endif
1012
1013static void ext4_put_super(struct super_block *sb)
1014{
1015        struct ext4_sb_info *sbi = EXT4_SB(sb);
1016        struct ext4_super_block *es = sbi->s_es;
1017        struct buffer_head **group_desc;
1018        struct flex_groups **flex_groups;
1019        int aborted = 0;
1020        int i, err;
1021
1022        ext4_unregister_li_request(sb);
1023        ext4_quota_off_umount(sb);
1024
1025        destroy_workqueue(sbi->rsv_conversion_wq);
1026
1027        if (sbi->s_journal) {
1028                aborted = is_journal_aborted(sbi->s_journal);
1029                err = jbd2_journal_destroy(sbi->s_journal);
1030                sbi->s_journal = NULL;
1031                if ((err < 0) && !aborted) {
1032                        ext4_set_errno(sb, -err);
1033                        ext4_abort(sb, "Couldn't clean up the journal");
1034                }
1035        }
1036
1037        ext4_unregister_sysfs(sb);
1038        ext4_es_unregister_shrinker(sbi);
1039        del_timer_sync(&sbi->s_err_report);
1040        ext4_release_system_zone(sb);
1041        ext4_mb_release(sb);
1042        ext4_ext_release(sb);
1043
1044        if (!sb_rdonly(sb) && !aborted) {
1045                ext4_clear_feature_journal_needs_recovery(sb);
1046                es->s_state = cpu_to_le16(sbi->s_mount_state);
1047        }
1048        if (!sb_rdonly(sb))
1049                ext4_commit_super(sb, 1);
1050
1051        rcu_read_lock();
1052        group_desc = rcu_dereference(sbi->s_group_desc);
1053        for (i = 0; i < sbi->s_gdb_count; i++)
1054                brelse(group_desc[i]);
1055        kvfree(group_desc);
1056        flex_groups = rcu_dereference(sbi->s_flex_groups);
1057        if (flex_groups) {
1058                for (i = 0; i < sbi->s_flex_groups_allocated; i++)
1059                        kvfree(flex_groups[i]);
1060                kvfree(flex_groups);
1061        }
1062        rcu_read_unlock();
1063        percpu_counter_destroy(&sbi->s_freeclusters_counter);
1064        percpu_counter_destroy(&sbi->s_freeinodes_counter);
1065        percpu_counter_destroy(&sbi->s_dirs_counter);
1066        percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
1067        percpu_free_rwsem(&sbi->s_writepages_rwsem);
1068#ifdef CONFIG_QUOTA
1069        for (i = 0; i < EXT4_MAXQUOTAS; i++)
1070                kfree(get_qf_name(sb, sbi, i));
1071#endif
1072
1073        /* Debugging code just in case the in-memory inode orphan list
1074         * isn't empty.  The on-disk one can be non-empty if we've
1075         * detected an error and taken the fs readonly, but the
1076         * in-memory list had better be clean by this point. */
1077        if (!list_empty(&sbi->s_orphan))
1078                dump_orphan_list(sb, sbi);
1079        J_ASSERT(list_empty(&sbi->s_orphan));
1080
1081        sync_blockdev(sb->s_bdev);
1082        invalidate_bdev(sb->s_bdev);
1083        if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
1084                /*
1085                 * Invalidate the journal device's buffers.  We don't want them
1086                 * floating about in memory - the physical journal device may
1087                 * hotswapped, and it breaks the `ro-after' testing code.
1088                 */
1089                sync_blockdev(sbi->journal_bdev);
1090                invalidate_bdev(sbi->journal_bdev);
1091                ext4_blkdev_remove(sbi);
1092        }
1093
1094        ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
1095        sbi->s_ea_inode_cache = NULL;
1096
1097        ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
1098        sbi->s_ea_block_cache = NULL;
1099
1100        if (sbi->s_mmp_tsk)
1101                kthread_stop(sbi->s_mmp_tsk);
1102        brelse(sbi->s_sbh);
1103        sb->s_fs_info = NULL;
1104        /*
1105         * Now that we are completely done shutting down the
1106         * superblock, we need to actually destroy the kobject.
1107         */
1108        kobject_put(&sbi->s_kobj);
1109        wait_for_completion(&sbi->s_kobj_unregister);
1110        if (sbi->s_chksum_driver)
1111                crypto_free_shash(sbi->s_chksum_driver);
1112        kfree(sbi->s_blockgroup_lock);
1113        fs_put_dax(sbi->s_daxdev);
1114#ifdef CONFIG_UNICODE
1115        utf8_unload(sbi->s_encoding);
1116#endif
1117        kfree(sbi);
1118}
1119
1120static struct kmem_cache *ext4_inode_cachep;
1121
1122/*
1123 * Called inside transaction, so use GFP_NOFS
1124 */
1125static struct inode *ext4_alloc_inode(struct super_block *sb)
1126{
1127        struct ext4_inode_info *ei;
1128
1129        ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
1130        if (!ei)
1131                return NULL;
1132
1133        inode_set_iversion(&ei->vfs_inode, 1);
1134        spin_lock_init(&ei->i_raw_lock);
1135        INIT_LIST_HEAD(&ei->i_prealloc_list);
1136        spin_lock_init(&ei->i_prealloc_lock);
1137        ext4_es_init_tree(&ei->i_es_tree);
1138        rwlock_init(&ei->i_es_lock);
1139        INIT_LIST_HEAD(&ei->i_es_list);
1140        ei->i_es_all_nr = 0;
1141        ei->i_es_shk_nr = 0;
1142        ei->i_es_shrink_lblk = 0;
1143        ei->i_reserved_data_blocks = 0;
1144        spin_lock_init(&(ei->i_block_reservation_lock));
1145        ext4_init_pending_tree(&ei->i_pending_tree);
1146#ifdef CONFIG_QUOTA
1147        ei->i_reserved_quota = 0;
1148        memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
1149#endif
1150        ei->jinode = NULL;
1151        INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
1152        spin_lock_init(&ei->i_completed_io_lock);
1153        ei->i_sync_tid = 0;
1154        ei->i_datasync_tid = 0;
1155        atomic_set(&ei->i_unwritten, 0);
1156        INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
1157        return &ei->vfs_inode;
1158}
1159
1160static int ext4_drop_inode(struct inode *inode)
1161{
1162        int drop = generic_drop_inode(inode);
1163
1164        if (!drop)
1165                drop = fscrypt_drop_inode(inode);
1166
1167        trace_ext4_drop_inode(inode, drop);
1168        return drop;
1169}
1170
1171static void ext4_free_in_core_inode(struct inode *inode)
1172{
1173        fscrypt_free_inode(inode);
1174        kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
1175}
1176
1177static void ext4_destroy_inode(struct inode *inode)
1178{
1179        if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
1180                ext4_msg(inode->i_sb, KERN_ERR,
1181                         "Inode %lu (%p): orphan list check failed!",
1182                         inode->i_ino, EXT4_I(inode));
1183                print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
1184                                EXT4_I(inode), sizeof(struct ext4_inode_info),
1185                                true);
1186                dump_stack();
1187        }
1188}
1189
1190static void init_once(void *foo)
1191{
1192        struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
1193
1194        INIT_LIST_HEAD(&ei->i_orphan);
1195        init_rwsem(&ei->xattr_sem);
1196        init_rwsem(&ei->i_data_sem);
1197        init_rwsem(&ei->i_mmap_sem);
1198        inode_init_once(&ei->vfs_inode);
1199}
1200
1201static int __init init_inodecache(void)
1202{
1203        ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache",
1204                                sizeof(struct ext4_inode_info), 0,
1205                                (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
1206                                        SLAB_ACCOUNT),
1207                                offsetof(struct ext4_inode_info, i_data),
1208                                sizeof_field(struct ext4_inode_info, i_data),
1209                                init_once);
1210        if (ext4_inode_cachep == NULL)
1211                return -ENOMEM;
1212        return 0;
1213}
1214
1215static void destroy_inodecache(void)
1216{
1217        /*
1218         * Make sure all delayed rcu free inodes are flushed before we
1219         * destroy cache.
1220         */
1221        rcu_barrier();
1222        kmem_cache_destroy(ext4_inode_cachep);
1223}
1224
1225void ext4_clear_inode(struct inode *inode)
1226{
1227        invalidate_inode_buffers(inode);
1228        clear_inode(inode);
1229        ext4_discard_preallocations(inode);
1230        ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
1231        dquot_drop(inode);
1232        if (EXT4_I(inode)->jinode) {
1233                jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
1234                                               EXT4_I(inode)->jinode);
1235                jbd2_free_inode(EXT4_I(inode)->jinode);
1236                EXT4_I(inode)->jinode = NULL;
1237        }
1238        fscrypt_put_encryption_info(inode);
1239        fsverity_cleanup_inode(inode);
1240}
1241
1242static struct inode *ext4_nfs_get_inode(struct super_block *sb,
1243                                        u64 ino, u32 generation)
1244{
1245        struct inode *inode;
1246
1247        /*
1248         * Currently we don't know the generation for parent directory, so
1249         * a generation of 0 means "accept any"
1250         */
1251        inode = ext4_iget(sb, ino, EXT4_IGET_HANDLE);
1252        if (IS_ERR(inode))
1253                return ERR_CAST(inode);
1254        if (generation && inode->i_generation != generation) {
1255                iput(inode);
1256                return ERR_PTR(-ESTALE);
1257        }
1258
1259        return inode;
1260}
1261
1262static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
1263                                        int fh_len, int fh_type)
1264{
1265        return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1266                                    ext4_nfs_get_inode);
1267}
1268
1269static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
1270                                        int fh_len, int fh_type)
1271{
1272        return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1273                                    ext4_nfs_get_inode);
1274}
1275
1276static int ext4_nfs_commit_metadata(struct inode *inode)
1277{
1278        struct writeback_control wbc = {
1279                .sync_mode = WB_SYNC_ALL
1280        };
1281
1282        trace_ext4_nfs_commit_metadata(inode);
1283        return ext4_write_inode(inode, &wbc);
1284}
1285
1286/*
1287 * Try to release metadata pages (indirect blocks, directories) which are
1288 * mapped via the block device.  Since these pages could have journal heads
1289 * which would prevent try_to_free_buffers() from freeing them, we must use
1290 * jbd2 layer's try_to_free_buffers() function to release them.
1291 */
1292static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
1293                                 gfp_t wait)
1294{
1295        journal_t *journal = EXT4_SB(sb)->s_journal;
1296
1297        WARN_ON(PageChecked(page));
1298        if (!page_has_buffers(page))
1299                return 0;
1300        if (journal)
1301                return jbd2_journal_try_to_free_buffers(journal, page,
1302                                                wait & ~__GFP_DIRECT_RECLAIM);
1303        return try_to_free_buffers(page);
1304}
1305
1306#ifdef CONFIG_FS_ENCRYPTION
1307static int ext4_get_context(struct inode *inode, void *ctx, size_t len)
1308{
1309        return ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
1310                                 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len);
1311}
1312
1313static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
1314                                                        void *fs_data)
1315{
1316        handle_t *handle = fs_data;
1317        int res, res2, credits, retries = 0;
1318
1319        /*
1320         * Encrypting the root directory is not allowed because e2fsck expects
1321         * lost+found to exist and be unencrypted, and encrypting the root
1322         * directory would imply encrypting the lost+found directory as well as
1323         * the filename "lost+found" itself.
1324         */
1325        if (inode->i_ino == EXT4_ROOT_INO)
1326                return -EPERM;
1327
1328        if (WARN_ON_ONCE(IS_DAX(inode) && i_size_read(inode)))
1329                return -EINVAL;
1330
1331        res = ext4_convert_inline_data(inode);
1332        if (res)
1333                return res;
1334
1335        /*
1336         * If a journal handle was specified, then the encryption context is
1337         * being set on a new inode via inheritance and is part of a larger
1338         * transaction to create the inode.  Otherwise the encryption context is
1339         * being set on an existing inode in its own transaction.  Only in the
1340         * latter case should the "retry on ENOSPC" logic be used.
1341         */
1342
1343        if (handle) {
1344                res = ext4_xattr_set_handle(handle, inode,
1345                                            EXT4_XATTR_INDEX_ENCRYPTION,
1346                                            EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
1347                                            ctx, len, 0);
1348                if (!res) {
1349                        ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
1350                        ext4_clear_inode_state(inode,
1351                                        EXT4_STATE_MAY_INLINE_DATA);
1352                        /*
1353                         * Update inode->i_flags - S_ENCRYPTED will be enabled,
1354                         * S_DAX may be disabled
1355                         */
1356                        ext4_set_inode_flags(inode);
1357                }
1358                return res;
1359        }
1360
1361        res = dquot_initialize(inode);
1362        if (res)
1363                return res;
1364retry:
1365        res = ext4_xattr_set_credits(inode, len, false /* is_create */,
1366                                     &credits);
1367        if (res)
1368                return res;
1369
1370        handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
1371        if (IS_ERR(handle))
1372                return PTR_ERR(handle);
1373
1374        res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION,
1375                                    EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
1376                                    ctx, len, 0);
1377        if (!res) {
1378                ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
1379                /*
1380                 * Update inode->i_flags - S_ENCRYPTED will be enabled,
1381                 * S_DAX may be disabled
1382                 */
1383                ext4_set_inode_flags(inode);
1384                res = ext4_mark_inode_dirty(handle, inode);
1385                if (res)
1386                        EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
1387        }
1388        res2 = ext4_journal_stop(handle);
1389
1390        if (res == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
1391                goto retry;
1392        if (!res)
1393                res = res2;
1394        return res;
1395}
1396
1397static bool ext4_dummy_context(struct inode *inode)
1398{
1399        return DUMMY_ENCRYPTION_ENABLED(EXT4_SB(inode->i_sb));
1400}
1401
1402static bool ext4_has_stable_inodes(struct super_block *sb)
1403{
1404        return ext4_has_feature_stable_inodes(sb);
1405}
1406
1407static void ext4_get_ino_and_lblk_bits(struct super_block *sb,
1408                                       int *ino_bits_ret, int *lblk_bits_ret)
1409{
1410        *ino_bits_ret = 8 * sizeof(EXT4_SB(sb)->s_es->s_inodes_count);
1411        *lblk_bits_ret = 8 * sizeof(ext4_lblk_t);
1412}
1413
1414static const struct fscrypt_operations ext4_cryptops = {
1415        .key_prefix             = "ext4:",
1416        .get_context            = ext4_get_context,
1417        .set_context            = ext4_set_context,
1418        .dummy_context          = ext4_dummy_context,
1419        .empty_dir              = ext4_empty_dir,
1420        .max_namelen            = EXT4_NAME_LEN,
1421        .has_stable_inodes      = ext4_has_stable_inodes,
1422        .get_ino_and_lblk_bits  = ext4_get_ino_and_lblk_bits,
1423};
1424#endif
1425
1426#ifdef CONFIG_QUOTA
1427static const char * const quotatypes[] = INITQFNAMES;
1428#define QTYPE2NAME(t) (quotatypes[t])
1429
1430static int ext4_write_dquot(struct dquot *dquot);
1431static int ext4_acquire_dquot(struct dquot *dquot);
1432static int ext4_release_dquot(struct dquot *dquot);
1433static int ext4_mark_dquot_dirty(struct dquot *dquot);
1434static int ext4_write_info(struct super_block *sb, int type);
1435static int ext4_quota_on(struct super_block *sb, int type, int format_id,
1436                         const struct path *path);
1437static int ext4_quota_on_mount(struct super_block *sb, int type);
1438static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
1439                               size_t len, loff_t off);
1440static ssize_t ext4_quota_write(struct super_block *sb, int type,
1441                                const char *data, size_t len, loff_t off);
1442static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
1443                             unsigned int flags);
1444static int ext4_enable_quotas(struct super_block *sb);
1445
1446static struct dquot **ext4_get_dquots(struct inode *inode)
1447{
1448        return EXT4_I(inode)->i_dquot;
1449}
1450
1451static const struct dquot_operations ext4_quota_operations = {
1452        .get_reserved_space     = ext4_get_reserved_space,
1453        .write_dquot            = ext4_write_dquot,
1454        .acquire_dquot          = ext4_acquire_dquot,
1455        .release_dquot          = ext4_release_dquot,
1456        .mark_dirty             = ext4_mark_dquot_dirty,
1457        .write_info             = ext4_write_info,
1458        .alloc_dquot            = dquot_alloc,
1459        .destroy_dquot          = dquot_destroy,
1460        .get_projid             = ext4_get_projid,
1461        .get_inode_usage        = ext4_get_inode_usage,
1462        .get_next_id            = dquot_get_next_id,
1463};
1464
1465static const struct quotactl_ops ext4_qctl_operations = {
1466        .quota_on       = ext4_quota_on,
1467        .quota_off      = ext4_quota_off,
1468        .quota_sync     = dquot_quota_sync,
1469        .get_state      = dquot_get_state,
1470        .set_info       = dquot_set_dqinfo,
1471        .get_dqblk      = dquot_get_dqblk,
1472        .set_dqblk      = dquot_set_dqblk,
1473        .get_nextdqblk  = dquot_get_next_dqblk,
1474};
1475#endif
1476
1477static const struct super_operations ext4_sops = {
1478        .alloc_inode    = ext4_alloc_inode,
1479        .free_inode     = ext4_free_in_core_inode,
1480        .destroy_inode  = ext4_destroy_inode,
1481        .write_inode    = ext4_write_inode,
1482        .dirty_inode    = ext4_dirty_inode,
1483        .drop_inode     = ext4_drop_inode,
1484        .evict_inode    = ext4_evict_inode,
1485        .put_super      = ext4_put_super,
1486        .sync_fs        = ext4_sync_fs,
1487        .freeze_fs      = ext4_freeze,
1488        .unfreeze_fs    = ext4_unfreeze,
1489        .statfs         = ext4_statfs,
1490        .remount_fs     = ext4_remount,
1491        .show_options   = ext4_show_options,
1492#ifdef CONFIG_QUOTA
1493        .quota_read     = ext4_quota_read,
1494        .quota_write    = ext4_quota_write,
1495        .get_dquots     = ext4_get_dquots,
1496#endif
1497        .bdev_try_to_free_page = bdev_try_to_free_page,
1498};
1499
1500static const struct export_operations ext4_export_ops = {
1501        .fh_to_dentry = ext4_fh_to_dentry,
1502        .fh_to_parent = ext4_fh_to_parent,
1503        .get_parent = ext4_get_parent,
1504        .commit_metadata = ext4_nfs_commit_metadata,
1505};
1506
1507enum {
1508        Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1509        Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
1510        Opt_nouid32, Opt_debug, Opt_removed,
1511        Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
1512        Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
1513        Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
1514        Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
1515        Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1516        Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
1517        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1518        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
1519        Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
1520        Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
1521        Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
1522        Opt_nowarn_on_error, Opt_mblk_io_submit,
1523        Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize,
1524        Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1525        Opt_inode_readahead_blks, Opt_journal_ioprio,
1526        Opt_dioread_nolock, Opt_dioread_lock,
1527        Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1528        Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
1529};
1530
1531static const match_table_t tokens = {
1532        {Opt_bsd_df, "bsddf"},
1533        {Opt_minix_df, "minixdf"},
1534        {Opt_grpid, "grpid"},
1535        {Opt_grpid, "bsdgroups"},
1536        {Opt_nogrpid, "nogrpid"},
1537        {Opt_nogrpid, "sysvgroups"},
1538        {Opt_resgid, "resgid=%u"},
1539        {Opt_resuid, "resuid=%u"},
1540        {Opt_sb, "sb=%u"},
1541        {Opt_err_cont, "errors=continue"},
1542        {Opt_err_panic, "errors=panic"},
1543        {Opt_err_ro, "errors=remount-ro"},
1544        {Opt_nouid32, "nouid32"},
1545        {Opt_debug, "debug"},
1546        {Opt_removed, "oldalloc"},
1547        {Opt_removed, "orlov"},
1548        {Opt_user_xattr, "user_xattr"},
1549        {Opt_nouser_xattr, "nouser_xattr"},
1550        {Opt_acl, "acl"},
1551        {Opt_noacl, "noacl"},
1552        {Opt_noload, "norecovery"},
1553        {Opt_noload, "noload"},
1554        {Opt_removed, "nobh"},
1555        {Opt_removed, "bh"},
1556        {Opt_commit, "commit=%u"},
1557        {Opt_min_batch_time, "min_batch_time=%u"},
1558        {Opt_max_batch_time, "max_batch_time=%u"},
1559        {Opt_journal_dev, "journal_dev=%u"},
1560        {Opt_journal_path, "journal_path=%s"},
1561        {Opt_journal_checksum, "journal_checksum"},
1562        {Opt_nojournal_checksum, "nojournal_checksum"},
1563        {Opt_journal_async_commit, "journal_async_commit"},
1564        {Opt_abort, "abort"},
1565        {Opt_data_journal, "data=journal"},
1566        {Opt_data_ordered, "data=ordered"},
1567        {Opt_data_writeback, "data=writeback"},
1568        {Opt_data_err_abort, "data_err=abort"},
1569        {Opt_data_err_ignore, "data_err=ignore"},
1570        {Opt_offusrjquota, "usrjquota="},
1571        {Opt_usrjquota, "usrjquota=%s"},
1572        {Opt_offgrpjquota, "grpjquota="},
1573        {Opt_grpjquota, "grpjquota=%s"},
1574        {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1575        {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1576        {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
1577        {Opt_grpquota, "grpquota"},
1578        {Opt_noquota, "noquota"},
1579        {Opt_quota, "quota"},
1580        {Opt_usrquota, "usrquota"},
1581        {Opt_prjquota, "prjquota"},
1582        {Opt_barrier, "barrier=%u"},
1583        {Opt_barrier, "barrier"},
1584        {Opt_nobarrier, "nobarrier"},
1585        {Opt_i_version, "i_version"},
1586        {Opt_dax, "dax"},
1587        {Opt_stripe, "stripe=%u"},
1588        {Opt_delalloc, "delalloc"},
1589        {Opt_warn_on_error, "warn_on_error"},
1590        {Opt_nowarn_on_error, "nowarn_on_error"},
1591        {Opt_lazytime, "lazytime"},
1592        {Opt_nolazytime, "nolazytime"},
1593        {Opt_debug_want_extra_isize, "debug_want_extra_isize=%u"},
1594        {Opt_nodelalloc, "nodelalloc"},
1595        {Opt_removed, "mblk_io_submit"},
1596        {Opt_removed, "nomblk_io_submit"},
1597        {Opt_block_validity, "block_validity"},
1598        {Opt_noblock_validity, "noblock_validity"},
1599        {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1600        {Opt_journal_ioprio, "journal_ioprio=%u"},
1601        {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1602        {Opt_auto_da_alloc, "auto_da_alloc"},
1603        {Opt_noauto_da_alloc, "noauto_da_alloc"},
1604        {Opt_dioread_nolock, "dioread_nolock"},
1605        {Opt_dioread_lock, "nodioread_nolock"},
1606        {Opt_dioread_lock, "dioread_lock"},
1607        {Opt_discard, "discard"},
1608        {Opt_nodiscard, "nodiscard"},
1609        {Opt_init_itable, "init_itable=%u"},
1610        {Opt_init_itable, "init_itable"},
1611        {Opt_noinit_itable, "noinit_itable"},
1612        {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
1613        {Opt_test_dummy_encryption, "test_dummy_encryption"},
1614        {Opt_nombcache, "nombcache"},
1615        {Opt_nombcache, "no_mbcache"},  /* for backward compatibility */
1616        {Opt_removed, "check=none"},    /* mount option from ext2/3 */
1617        {Opt_removed, "nocheck"},       /* mount option from ext2/3 */
1618        {Opt_removed, "reservation"},   /* mount option from ext2/3 */
1619        {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
1620        {Opt_removed, "journal=%u"},    /* mount option from ext2/3 */
1621        {Opt_err, NULL},
1622};
1623
1624static ext4_fsblk_t get_sb_block(void **data)
1625{
1626        ext4_fsblk_t    sb_block;
1627        char            *options = (char *) *data;
1628
1629        if (!options || strncmp(options, "sb=", 3) != 0)
1630                return 1;       /* Default location */
1631
1632        options += 3;
1633        /* TODO: use simple_strtoll with >32bit ext4 */
1634        sb_block = simple_strtoul(options, &options, 0);
1635        if (*options && *options != ',') {
1636                printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
1637                       (char *) *data);
1638                return 1;
1639        }
1640        if (*options == ',')
1641                options++;
1642        *data = (void *) options;
1643
1644        return sb_block;
1645}
1646
1647#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1648static const char deprecated_msg[] =
1649        "Mount option \"%s\" will be removed by %s\n"
1650        "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
1651
1652#ifdef CONFIG_QUOTA
1653static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
1654{
1655        struct ext4_sb_info *sbi = EXT4_SB(sb);
1656        char *qname, *old_qname = get_qf_name(sb, sbi, qtype);
1657        int ret = -1;
1658
1659        if (sb_any_quota_loaded(sb) && !old_qname) {
1660                ext4_msg(sb, KERN_ERR,
1661                        "Cannot change journaled "
1662                        "quota options when quota turned on");
1663                return -1;
1664        }
1665        if (ext4_has_feature_quota(sb)) {
1666                ext4_msg(sb, KERN_INFO, "Journaled quota options "
1667                         "ignored when QUOTA feature is enabled");
1668                return 1;
1669        }
1670        qname = match_strdup(args);
1671        if (!qname) {
1672                ext4_msg(sb, KERN_ERR,
1673                        "Not enough memory for storing quotafile name");
1674                return -1;
1675        }
1676        if (old_qname) {
1677                if (strcmp(old_qname, qname) == 0)
1678                        ret = 1;
1679                else
1680                        ext4_msg(sb, KERN_ERR,
1681                                 "%s quota file already specified",
1682                                 QTYPE2NAME(qtype));
1683                goto errout;
1684        }
1685        if (strchr(qname, '/')) {
1686                ext4_msg(sb, KERN_ERR,
1687                        "quotafile must be on filesystem root");
1688                goto errout;
1689        }
1690        rcu_assign_pointer(sbi->s_qf_names[qtype], qname);
1691        set_opt(sb, QUOTA);
1692        return 1;
1693errout:
1694        kfree(qname);
1695        return ret;
1696}
1697
1698static int clear_qf_name(struct super_block *sb, int qtype)
1699{
1700
1701        struct ext4_sb_info *sbi = EXT4_SB(sb);
1702        char *old_qname = get_qf_name(sb, sbi, qtype);
1703
1704        if (sb_any_quota_loaded(sb) && old_qname) {
1705                ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
1706                        " when quota turned on");
1707                return -1;
1708        }
1709        rcu_assign_pointer(sbi->s_qf_names[qtype], NULL);
1710        synchronize_rcu();
1711        kfree(old_qname);
1712        return 1;
1713}
1714#endif
1715
1716#define MOPT_SET        0x0001
1717#define MOPT_CLEAR      0x0002
1718#define MOPT_NOSUPPORT  0x0004
1719#define MOPT_EXPLICIT   0x0008
1720#define MOPT_CLEAR_ERR  0x0010
1721#define MOPT_GTE0       0x0020
1722#ifdef CONFIG_QUOTA
1723#define MOPT_Q          0
1724#define MOPT_QFMT       0x0040
1725#else
1726#define MOPT_Q          MOPT_NOSUPPORT
1727#define MOPT_QFMT       MOPT_NOSUPPORT
1728#endif
1729#define MOPT_DATAJ      0x0080
1730#define MOPT_NO_EXT2    0x0100
1731#define MOPT_NO_EXT3    0x0200
1732#define MOPT_EXT4_ONLY  (MOPT_NO_EXT2 | MOPT_NO_EXT3)
1733#define MOPT_STRING     0x0400
1734
1735static const struct mount_opts {
1736        int     token;
1737        int     mount_opt;
1738        int     flags;
1739} ext4_mount_opts[] = {
1740        {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
1741        {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
1742        {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
1743        {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
1744        {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
1745        {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
1746        {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
1747         MOPT_EXT4_ONLY | MOPT_SET},
1748        {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
1749         MOPT_EXT4_ONLY | MOPT_CLEAR},
1750        {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
1751        {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
1752        {Opt_delalloc, EXT4_MOUNT_DELALLOC,
1753         MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1754        {Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
1755         MOPT_EXT4_ONLY | MOPT_CLEAR},
1756        {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET},
1757        {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR},
1758        {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1759         MOPT_EXT4_ONLY | MOPT_CLEAR},
1760        {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1761         MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1762        {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
1763                                    EXT4_MOUNT_JOURNAL_CHECKSUM),
1764         MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1765        {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
1766        {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR},
1767        {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
1768        {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
1769        {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
1770         MOPT_NO_EXT2},
1771        {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
1772         MOPT_NO_EXT2},
1773        {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
1774        {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
1775        {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
1776        {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
1777        {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
1778        {Opt_commit, 0, MOPT_GTE0},
1779        {Opt_max_batch_time, 0, MOPT_GTE0},
1780        {Opt_min_batch_time, 0, MOPT_GTE0},
1781        {Opt_inode_readahead_blks, 0, MOPT_GTE0},
1782        {Opt_init_itable, 0, MOPT_GTE0},
1783        {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET},
1784        {Opt_stripe, 0, MOPT_GTE0},
1785        {Opt_resuid, 0, MOPT_GTE0},
1786        {Opt_resgid, 0, MOPT_GTE0},
1787        {Opt_journal_dev, 0, MOPT_NO_EXT2 | MOPT_GTE0},
1788        {Opt_journal_path, 0, MOPT_NO_EXT2 | MOPT_STRING},
1789        {Opt_journal_ioprio, 0, MOPT_NO_EXT2 | MOPT_GTE0},
1790        {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1791        {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1792        {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
1793         MOPT_NO_EXT2 | MOPT_DATAJ},
1794        {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1795        {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
1796#ifdef CONFIG_EXT4_FS_POSIX_ACL
1797        {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
1798        {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
1799#else
1800        {Opt_acl, 0, MOPT_NOSUPPORT},
1801        {Opt_noacl, 0, MOPT_NOSUPPORT},
1802#endif
1803        {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
1804        {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
1805        {Opt_debug_want_extra_isize, 0, MOPT_GTE0},
1806        {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
1807        {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
1808                                                        MOPT_SET | MOPT_Q},
1809        {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
1810                                                        MOPT_SET | MOPT_Q},
1811        {Opt_prjquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_PRJQUOTA,
1812                                                        MOPT_SET | MOPT_Q},
1813        {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
1814                       EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
1815                                                        MOPT_CLEAR | MOPT_Q},
1816        {Opt_usrjquota, 0, MOPT_Q},
1817        {Opt_grpjquota, 0, MOPT_Q},
1818        {Opt_offusrjquota, 0, MOPT_Q},
1819        {Opt_offgrpjquota, 0, MOPT_Q},
1820        {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
1821        {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
1822        {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
1823        {Opt_max_dir_size_kb, 0, MOPT_GTE0},
1824        {Opt_test_dummy_encryption, 0, MOPT_GTE0},
1825        {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
1826        {Opt_err, 0, 0}
1827};
1828
1829#ifdef CONFIG_UNICODE
1830static const struct ext4_sb_encodings {
1831        __u16 magic;
1832        char *name;
1833        char *version;
1834} ext4_sb_encoding_map[] = {
1835        {EXT4_ENC_UTF8_12_1, "utf8", "12.1.0"},
1836};
1837
1838static int ext4_sb_read_encoding(const struct ext4_super_block *es,
1839                                 const struct ext4_sb_encodings **encoding,
1840                                 __u16 *flags)
1841{
1842        __u16 magic = le16_to_cpu(es->s_encoding);
1843        int i;
1844
1845        for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++)
1846                if (magic == ext4_sb_encoding_map[i].magic)
1847                        break;
1848
1849        if (i >= ARRAY_SIZE(ext4_sb_encoding_map))
1850                return -EINVAL;
1851
1852        *encoding = &ext4_sb_encoding_map[i];
1853        *flags = le16_to_cpu(es->s_encoding_flags);
1854
1855        return 0;
1856}
1857#endif
1858
1859static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1860                            substring_t *args, unsigned long *journal_devnum,
1861                            unsigned int *journal_ioprio, int is_remount)
1862{
1863        struct ext4_sb_info *sbi = EXT4_SB(sb);
1864        const struct mount_opts *m;
1865        kuid_t uid;
1866        kgid_t gid;
1867        int arg = 0;
1868
1869#ifdef CONFIG_QUOTA
1870        if (token == Opt_usrjquota)
1871                return set_qf_name(sb, USRQUOTA, &args[0]);
1872        else if (token == Opt_grpjquota)
1873                return set_qf_name(sb, GRPQUOTA, &args[0]);
1874        else if (token == Opt_offusrjquota)
1875                return clear_qf_name(sb, USRQUOTA);
1876        else if (token == Opt_offgrpjquota)
1877                return clear_qf_name(sb, GRPQUOTA);
1878#endif
1879        switch (token) {
1880        case Opt_noacl:
1881        case Opt_nouser_xattr:
1882                ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5");
1883                break;
1884        case Opt_sb:
1885                return 1;       /* handled by get_sb_block() */
1886        case Opt_removed:
1887                ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
1888                return 1;
1889        case Opt_abort:
1890                sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
1891                return 1;
1892        case Opt_i_version:
1893                sb->s_flags |= SB_I_VERSION;
1894                return 1;
1895        case Opt_lazytime:
1896                sb->s_flags |= SB_LAZYTIME;
1897                return 1;
1898        case Opt_nolazytime:
1899                sb->s_flags &= ~SB_LAZYTIME;
1900                return 1;
1901        }
1902
1903        for (m = ext4_mount_opts; m->token != Opt_err; m++)
1904                if (token == m->token)
1905                        break;
1906
1907        if (m->token == Opt_err) {
1908                ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
1909                         "or missing value", opt);
1910                return -1;
1911        }
1912
1913        if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
1914                ext4_msg(sb, KERN_ERR,
1915                         "Mount option \"%s\" incompatible with ext2", opt);
1916                return -1;
1917        }
1918        if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
1919                ext4_msg(sb, KERN_ERR,
1920                         "Mount option \"%s\" incompatible with ext3", opt);
1921                return -1;
1922        }
1923
1924        if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg))
1925                return -1;
1926        if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
1927                return -1;
1928        if (m->flags & MOPT_EXPLICIT) {
1929                if (m->mount_opt & EXT4_MOUNT_DELALLOC) {
1930                        set_opt2(sb, EXPLICIT_DELALLOC);
1931                } else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) {
1932                        set_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM);
1933                } else
1934                        return -1;
1935        }
1936        if (m->flags & MOPT_CLEAR_ERR)
1937                clear_opt(sb, ERRORS_MASK);
1938        if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
1939                ext4_msg(sb, KERN_ERR, "Cannot change quota "
1940                         "options when quota turned on");
1941                return -1;
1942        }
1943
1944        if (m->flags & MOPT_NOSUPPORT) {
1945                ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
1946        } else if (token == Opt_commit) {
1947                if (arg == 0)
1948                        arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
1949                else if (arg > INT_MAX / HZ) {
1950                        ext4_msg(sb, KERN_ERR,
1951                                 "Invalid commit interval %d, "
1952                                 "must be smaller than %d",
1953                                 arg, INT_MAX / HZ);
1954                        return -1;
1955                }
1956                sbi->s_commit_interval = HZ * arg;
1957        } else if (token == Opt_debug_want_extra_isize) {
1958                if ((arg & 1) ||
1959                    (arg < 4) ||
1960                    (arg > (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE))) {
1961                        ext4_msg(sb, KERN_ERR,
1962                                 "Invalid want_extra_isize %d", arg);
1963                        return -1;
1964                }
1965                sbi->s_want_extra_isize = arg;
1966        } else if (token == Opt_max_batch_time) {
1967                sbi->s_max_batch_time = arg;
1968        } else if (token == Opt_min_batch_time) {
1969                sbi->s_min_batch_time = arg;
1970        } else if (token == Opt_inode_readahead_blks) {
1971                if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) {
1972                        ext4_msg(sb, KERN_ERR,
1973                                 "EXT4-fs: inode_readahead_blks must be "
1974                                 "0 or a power of 2 smaller than 2^31");
1975                        return -1;
1976                }
1977                sbi->s_inode_readahead_blks = arg;
1978        } else if (token == Opt_init_itable) {
1979                set_opt(sb, INIT_INODE_TABLE);
1980                if (!args->from)
1981                        arg = EXT4_DEF_LI_WAIT_MULT;
1982                sbi->s_li_wait_mult = arg;
1983        } else if (token == Opt_max_dir_size_kb) {
1984                sbi->s_max_dir_size_kb = arg;
1985        } else if (token == Opt_stripe) {
1986                sbi->s_stripe = arg;
1987        } else if (token == Opt_resuid) {
1988                uid = make_kuid(current_user_ns(), arg);
1989                if (!uid_valid(uid)) {
1990                        ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg);
1991                        return -1;
1992                }
1993                sbi->s_resuid = uid;
1994        } else if (token == Opt_resgid) {
1995                gid = make_kgid(current_user_ns(), arg);
1996                if (!gid_valid(gid)) {
1997                        ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg);
1998                        return -1;
1999                }
2000                sbi->s_resgid = gid;
2001        } else if (token == Opt_journal_dev) {
2002                if (is_remount) {
2003                        ext4_msg(sb, KERN_ERR,
2004                                 "Cannot specify journal on remount");
2005                        return -1;
2006                }
2007                *journal_devnum = arg;
2008        } else if (token == Opt_journal_path) {
2009                char *journal_path;
2010                struct inode *journal_inode;
2011                struct path path;
2012                int error;
2013
2014                if (is_remount) {
2015                        ext4_msg(sb, KERN_ERR,
2016                                 "Cannot specify journal on remount");
2017                        return -1;
2018                }
2019                journal_path = match_strdup(&args[0]);
2020                if (!journal_path) {
2021                        ext4_msg(sb, KERN_ERR, "error: could not dup "
2022                                "journal device string");
2023                        return -1;
2024                }
2025
2026                error = kern_path(journal_path, LOOKUP_FOLLOW, &path);
2027                if (error) {
2028                        ext4_msg(sb, KERN_ERR, "error: could not find "
2029                                "journal device path: error %d", error);
2030                        kfree(journal_path);
2031                        return -1;
2032                }
2033
2034                journal_inode = d_inode(path.dentry);
2035                if (!S_ISBLK(journal_inode->i_mode)) {
2036                        ext4_msg(sb, KERN_ERR, "error: journal path %s "
2037                                "is not a block device", journal_path);
2038                        path_put(&path);
2039                        kfree(journal_path);
2040                        return -1;
2041                }
2042
2043                *journal_devnum = new_encode_dev(journal_inode->i_rdev);
2044                path_put(&path);
2045                kfree(journal_path);
2046        } else if (token == Opt_journal_ioprio) {
2047                if (arg > 7) {
2048                        ext4_msg(sb, KERN_ERR, "Invalid journal IO priority"
2049                                 " (must be 0-7)");
2050                        return -1;
2051                }
2052                *journal_ioprio =
2053                        IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
2054        } else if (token == Opt_test_dummy_encryption) {
2055#ifdef CONFIG_FS_ENCRYPTION
2056                sbi->s_mount_flags |= EXT4_MF_TEST_DUMMY_ENCRYPTION;
2057                ext4_msg(sb, KERN_WARNING,
2058                         "Test dummy encryption mode enabled");
2059#else
2060                ext4_msg(sb, KERN_WARNING,
2061                         "Test dummy encryption mount option ignored");
2062#endif
2063        } else if (m->flags & MOPT_DATAJ) {
2064                if (is_remount) {
2065                        if (!sbi->s_journal)
2066                                ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
2067                        else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) {
2068                                ext4_msg(sb, KERN_ERR,
2069                                         "Cannot change data mode on remount");
2070                                return -1;
2071                        }
2072                } else {
2073                        clear_opt(sb, DATA_FLAGS);
2074                        sbi->s_mount_opt |= m->mount_opt;
2075                }
2076#ifdef CONFIG_QUOTA
2077        } else if (m->flags & MOPT_QFMT) {
2078                if (sb_any_quota_loaded(sb) &&
2079                    sbi->s_jquota_fmt != m->mount_opt) {
2080                        ext4_msg(sb, KERN_ERR, "Cannot change journaled "
2081                                 "quota options when quota turned on");
2082                        return -1;
2083                }
2084                if (ext4_has_feature_quota(sb)) {
2085                        ext4_msg(sb, KERN_INFO,
2086                                 "Quota format mount options ignored "
2087                                 "when QUOTA feature is enabled");
2088                        return 1;
2089                }
2090                sbi->s_jquota_fmt = m->mount_opt;
2091#endif
2092        } else if (token == Opt_dax) {
2093#ifdef CONFIG_FS_DAX
2094                ext4_msg(sb, KERN_WARNING,
2095                "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
2096                sbi->s_mount_opt |= m->mount_opt;
2097#else
2098                ext4_msg(sb, KERN_INFO, "dax option not supported");
2099                return -1;
2100#endif
2101        } else if (token == Opt_data_err_abort) {
2102                sbi->s_mount_opt |= m->mount_opt;
2103        } else if (token == Opt_data_err_ignore) {
2104                sbi->s_mount_opt &= ~m->mount_opt;
2105        } else {
2106                if (!args->from)
2107                        arg = 1;
2108                if (m->flags & MOPT_CLEAR)
2109                        arg = !arg;
2110                else if (unlikely(!(m->flags & MOPT_SET))) {
2111                        ext4_msg(sb, KERN_WARNING,
2112                                 "buggy handling of option %s", opt);
2113                        WARN_ON(1);
2114                        return -1;
2115                }
2116                if (arg != 0)
2117                        sbi->s_mount_opt |= m->mount_opt;
2118                else
2119                        sbi->s_mount_opt &= ~m->mount_opt;
2120        }
2121        return 1;
2122}
2123
2124static int parse_options(char *options, struct super_block *sb,
2125                         unsigned long *journal_devnum,
2126                         unsigned int *journal_ioprio,
2127                         int is_remount)
2128{
2129        struct ext4_sb_info __maybe_unused *sbi = EXT4_SB(sb);
2130        char *p, __maybe_unused *usr_qf_name, __maybe_unused *grp_qf_name;
2131        substring_t args[MAX_OPT_ARGS];
2132        int token;
2133
2134        if (!options)
2135                return 1;
2136
2137        while ((p = strsep(&options, ",")) != NULL) {
2138                if (!*p)
2139                        continue;
2140                /*
2141                 * Initialize args struct so we know whether arg was
2142                 * found; some options take optional arguments.
2143                 */
2144                args[0].to = args[0].from = NULL;
2145                token = match_token(p, tokens, args);
2146                if (handle_mount_opt(sb, p, token, args, journal_devnum,
2147                                     journal_ioprio, is_remount) < 0)
2148                        return 0;
2149        }
2150#ifdef CONFIG_QUOTA
2151        /*
2152         * We do the test below only for project quotas. 'usrquota' and
2153         * 'grpquota' mount options are allowed even without quota feature
2154         * to support legacy quotas in quota files.
2155         */
2156        if (test_opt(sb, PRJQUOTA) && !ext4_has_feature_project(sb)) {
2157                ext4_msg(sb, KERN_ERR, "Project quota feature not enabled. "
2158                         "Cannot enable project quota enforcement.");
2159                return 0;
2160        }
2161        usr_qf_name = get_qf_name(sb, sbi, USRQUOTA);
2162        grp_qf_name = get_qf_name(sb, sbi, GRPQUOTA);
2163        if (usr_qf_name || grp_qf_name) {
2164                if (test_opt(sb, USRQUOTA) && usr_qf_name)
2165                        clear_opt(sb, USRQUOTA);
2166
2167                if (test_opt(sb, GRPQUOTA) && grp_qf_name)
2168                        clear_opt(sb, GRPQUOTA);
2169
2170                if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
2171                        ext4_msg(sb, KERN_ERR, "old and new quota "
2172                                        "format mixing");
2173                        return 0;
2174                }
2175
2176                if (!sbi->s_jquota_fmt) {
2177                        ext4_msg(sb, KERN_ERR, "journaled quota format "
2178                                        "not specified");
2179                        return 0;
2180                }
2181        }
2182#endif
2183        return 1;
2184}
2185
2186static inline void ext4_show_quota_options(struct seq_file *seq,
2187                                           struct super_block *sb)
2188{
2189#if defined(CONFIG_QUOTA)
2190        struct ext4_sb_info *sbi = EXT4_SB(sb);
2191        char *usr_qf_name, *grp_qf_name;
2192
2193        if (sbi->s_jquota_fmt) {
2194                char *fmtname = "";
2195
2196                switch (sbi->s_jquota_fmt) {
2197                case QFMT_VFS_OLD:
2198                        fmtname = "vfsold";
2199                        break;
2200                case QFMT_VFS_V0:
2201                        fmtname = "vfsv0";
2202                        break;
2203                case QFMT_VFS_V1:
2204                        fmtname = "vfsv1";
2205                        break;
2206                }
2207                seq_printf(seq, ",jqfmt=%s", fmtname);
2208        }
2209
2210        rcu_read_lock();
2211        usr_qf_name = rcu_dereference(sbi->s_qf_names[USRQUOTA]);
2212        grp_qf_name = rcu_dereference(sbi->s_qf_names[GRPQUOTA]);
2213        if (usr_qf_name)
2214                seq_show_option(seq, "usrjquota", usr_qf_name);
2215        if (grp_qf_name)
2216                seq_show_option(seq, "grpjquota", grp_qf_name);
2217        rcu_read_unlock();
2218#endif
2219}
2220
2221static const char *token2str(int token)
2222{
2223        const struct match_token *t;
2224
2225        for (t = tokens; t->token != Opt_err; t++)
2226                if (t->token == token && !strchr(t->pattern, '='))
2227                        break;
2228        return t->pattern;
2229}
2230
2231/*
2232 * Show an option if
2233 *  - it's set to a non-default value OR
2234 *  - if the per-sb default is different from the global default
2235 */
2236static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
2237                              int nodefs)
2238{
2239        struct ext4_sb_info *sbi = EXT4_SB(sb);
2240        struct ext4_super_block *es = sbi->s_es;
2241        int def_errors, def_mount_opt = sbi->s_def_mount_opt;
2242        const struct mount_opts *m;
2243        char sep = nodefs ? '\n' : ',';
2244
2245#define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
2246#define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
2247
2248        if (sbi->s_sb_block != 1)
2249                SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
2250
2251        for (m = ext4_mount_opts; m->token != Opt_err; m++) {
2252                int want_set = m->flags & MOPT_SET;
2253                if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
2254                    (m->flags & MOPT_CLEAR_ERR))
2255                        continue;
2256                if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
2257                        continue; /* skip if same as the default */
2258                if ((want_set &&
2259                     (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
2260                    (!want_set && (sbi->s_mount_opt & m->mount_opt)))
2261                        continue; /* select Opt_noFoo vs Opt_Foo */
2262                SEQ_OPTS_PRINT("%s", token2str(m->token));
2263        }
2264
2265        if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
2266            le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
2267                SEQ_OPTS_PRINT("resuid=%u",
2268                                from_kuid_munged(&init_user_ns, sbi->s_resuid));
2269        if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
2270            le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
2271                SEQ_OPTS_PRINT("resgid=%u",
2272                                from_kgid_munged(&init_user_ns, sbi->s_resgid));
2273        def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
2274        if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
2275                SEQ_OPTS_PUTS("errors=remount-ro");
2276        if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
2277                SEQ_OPTS_PUTS("errors=continue");
2278        if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
2279                SEQ_OPTS_PUTS("errors=panic");
2280        if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
2281                SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
2282        if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
2283                SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
2284        if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
2285                SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
2286        if (sb->s_flags & SB_I_VERSION)
2287                SEQ_OPTS_PUTS("i_version");
2288        if (nodefs || sbi->s_stripe)
2289                SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
2290        if (nodefs || EXT4_MOUNT_DATA_FLAGS &
2291                        (sbi->s_mount_opt ^ def_mount_opt)) {
2292                if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
2293                        SEQ_OPTS_PUTS("data=journal");
2294                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
2295                        SEQ_OPTS_PUTS("data=ordered");
2296                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
2297                        SEQ_OPTS_PUTS("data=writeback");
2298        }
2299        if (nodefs ||
2300            sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
2301                SEQ_OPTS_PRINT("inode_readahead_blks=%u",
2302                               sbi->s_inode_readahead_blks);
2303
2304        if (test_opt(sb, INIT_INODE_TABLE) && (nodefs ||
2305                       (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
2306                SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
2307        if (nodefs || sbi->s_max_dir_size_kb)
2308                SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
2309        if (test_opt(sb, DATA_ERR_ABORT))
2310                SEQ_OPTS_PUTS("data_err=abort");
2311        if (DUMMY_ENCRYPTION_ENABLED(sbi))
2312                SEQ_OPTS_PUTS("test_dummy_encryption");
2313
2314        ext4_show_quota_options(seq, sb);
2315        return 0;
2316}
2317
2318static int ext4_show_options(struct seq_file *seq, struct dentry *root)
2319{
2320        return _ext4_show_options(seq, root->d_sb, 0);
2321}
2322
2323int ext4_seq_options_show(struct seq_file *seq, void *offset)
2324{
2325        struct super_block *sb = seq->private;
2326        int rc;
2327
2328        seq_puts(seq, sb_rdonly(sb) ? "ro" : "rw");
2329        rc = _ext4_show_options(seq, sb, 1);
2330        seq_puts(seq, "\n");
2331        return rc;
2332}
2333
2334static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
2335                            int read_only)
2336{
2337        struct ext4_sb_info *sbi = EXT4_SB(sb);
2338        int err = 0;
2339
2340        if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
2341                ext4_msg(sb, KERN_ERR, "revision level too high, "
2342                         "forcing read-only mode");
2343                err = -EROFS;
2344        }
2345        if (read_only)
2346                goto done;
2347        if (!(sbi->s_mount_state & EXT4_VALID_FS))
2348                ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
2349                         "running e2fsck is recommended");
2350        else if (sbi->s_mount_state & EXT4_ERROR_FS)
2351                ext4_msg(sb, KERN_WARNING,
2352                         "warning: mounting fs with errors, "
2353                         "running e2fsck is recommended");
2354        else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
2355                 le16_to_cpu(es->s_mnt_count) >=
2356                 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
2357                ext4_msg(sb, KERN_WARNING,
2358                         "warning: maximal mount count reached, "
2359                         "running e2fsck is recommended");
2360        else if (le32_to_cpu(es->s_checkinterval) &&
2361                 (ext4_get_tstamp(es, s_lastcheck) +
2362                  le32_to_cpu(es->s_checkinterval) <= ktime_get_real_seconds()))
2363                ext4_msg(sb, KERN_WARNING,
2364                         "warning: checktime reached, "
2365                         "running e2fsck is recommended");
2366        if (!sbi->s_journal)
2367                es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
2368        if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
2369                es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
2370        le16_add_cpu(&es->s_mnt_count, 1);
2371        ext4_update_tstamp(es, s_mtime);
2372        if (sbi->s_journal)
2373                ext4_set_feature_journal_needs_recovery(sb);
2374
2375        err = ext4_commit_super(sb, 1);
2376done:
2377        if (test_opt(sb, DEBUG))
2378                printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
2379                                "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
2380                        sb->s_blocksize,
2381                        sbi->s_groups_count,
2382                        EXT4_BLOCKS_PER_GROUP(sb),
2383                        EXT4_INODES_PER_GROUP(sb),
2384                        sbi->s_mount_opt, sbi->s_mount_opt2);
2385
2386        cleancache_init_fs(sb);
2387        return err;
2388}
2389
2390int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
2391{
2392        struct ext4_sb_info *sbi = EXT4_SB(sb);
2393        struct flex_groups **old_groups, **new_groups;
2394        int size, i, j;
2395
2396        if (!sbi->s_log_groups_per_flex)
2397                return 0;
2398
2399        size = ext4_flex_group(sbi, ngroup - 1) + 1;
2400        if (size <= sbi->s_flex_groups_allocated)
2401                return 0;
2402
2403        new_groups = kvzalloc(roundup_pow_of_two(size *
2404                              sizeof(*sbi->s_flex_groups)), GFP_KERNEL);
2405        if (!new_groups) {
2406                ext4_msg(sb, KERN_ERR,
2407                         "not enough memory for %d flex group pointers", size);
2408                return -ENOMEM;
2409        }
2410        for (i = sbi->s_flex_groups_allocated; i < size; i++) {
2411                new_groups[i] = kvzalloc(roundup_pow_of_two(
2412                                         sizeof(struct flex_groups)),
2413                                         GFP_KERNEL);
2414                if (!new_groups[i]) {
2415                        for (j = sbi->s_flex_groups_allocated; j < i; j++)
2416                                kvfree(new_groups[j]);
2417                        kvfree(new_groups);
2418                        ext4_msg(sb, KERN_ERR,
2419                                 "not enough memory for %d flex groups", size);
2420                        return -ENOMEM;
2421                }
2422        }
2423        rcu_read_lock();
2424        old_groups = rcu_dereference(sbi->s_flex_groups);
2425        if (old_groups)
2426                memcpy(new_groups, old_groups,
2427                       (sbi->s_flex_groups_allocated *
2428                        sizeof(struct flex_groups *)));
2429        rcu_read_unlock();
2430        rcu_assign_pointer(sbi->s_flex_groups, new_groups);
2431        sbi->s_flex_groups_allocated = size;
2432        if (old_groups)
2433                ext4_kvfree_array_rcu(old_groups);
2434        return 0;
2435}
2436
2437static int ext4_fill_flex_info(struct super_block *sb)
2438{
2439        struct ext4_sb_info *sbi = EXT4_SB(sb);
2440        struct ext4_group_desc *gdp = NULL;
2441        struct flex_groups *fg;
2442        ext4_group_t flex_group;
2443        int i, err;
2444
2445        sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
2446        if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
2447                sbi->s_log_groups_per_flex = 0;
2448                return 1;
2449        }
2450
2451        err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
2452        if (err)
2453                goto failed;
2454
2455        for (i = 0; i < sbi->s_groups_count; i++) {
2456                gdp = ext4_get_group_desc(sb, i, NULL);
2457
2458                flex_group = ext4_flex_group(sbi, i);
2459                fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group);
2460                atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes);
2461                atomic64_add(ext4_free_group_clusters(sb, gdp),
2462                             &fg->free_clusters);
2463                atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs);
2464        }
2465
2466        return 1;
2467failed:
2468        return 0;
2469}
2470
2471static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
2472                                   struct ext4_group_desc *gdp)
2473{
2474        int offset = offsetof(struct ext4_group_desc, bg_checksum);
2475        __u16 crc = 0;
2476        __le32 le_group = cpu_to_le32(block_group);
2477        struct ext4_sb_info *sbi = EXT4_SB(sb);
2478
2479        if (ext4_has_metadata_csum(sbi->s_sb)) {
2480                /* Use new metadata_csum algorithm */
2481                __u32 csum32;
2482                __u16 dummy_csum = 0;
2483
2484                csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
2485                                     sizeof(le_group));
2486                csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, offset);
2487                csum32 = ext4_chksum(sbi, csum32, (__u8 *)&dummy_csum,
2488                                     sizeof(dummy_csum));
2489                offset += sizeof(dummy_csum);
2490                if (offset < sbi->s_desc_size)
2491                        csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp + offset,
2492                                             sbi->s_desc_size - offset);
2493
2494                crc = csum32 & 0xFFFF;
2495                goto out;
2496        }
2497
2498        /* old crc16 code */
2499        if (!ext4_has_feature_gdt_csum(sb))
2500                return 0;
2501
2502        crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
2503        crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
2504        crc = crc16(crc, (__u8 *)gdp, offset);
2505        offset += sizeof(gdp->bg_checksum); /* skip checksum */
2506        /* for checksum of struct ext4_group_desc do the rest...*/
2507        if (ext4_has_feature_64bit(sb) &&
2508            offset < le16_to_cpu(sbi->s_es->s_desc_size))
2509                crc = crc16(crc, (__u8 *)gdp + offset,
2510                            le16_to_cpu(sbi->s_es->s_desc_size) -
2511                                offset);
2512
2513out:
2514        return cpu_to_le16(crc);
2515}
2516
2517int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
2518                                struct ext4_group_desc *gdp)
2519{
2520        if (ext4_has_group_desc_csum(sb) &&
2521            (gdp->bg_checksum != ext4_group_desc_csum(sb, block_group, gdp)))
2522                return 0;
2523
2524        return 1;
2525}
2526
2527void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
2528                              struct ext4_group_desc *gdp)
2529{
2530        if (!ext4_has_group_desc_csum(sb))
2531                return;
2532        gdp->bg_checksum = ext4_group_desc_csum(sb, block_group, gdp);
2533}
2534
2535/* Called at mount-time, super-block is locked */
2536static int ext4_check_descriptors(struct super_block *sb,
2537                                  ext4_fsblk_t sb_block,
2538                                  ext4_group_t *first_not_zeroed)
2539{
2540        struct ext4_sb_info *sbi = EXT4_SB(sb);
2541        ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
2542        ext4_fsblk_t last_block;
2543        ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
2544        ext4_fsblk_t block_bitmap;
2545        ext4_fsblk_t inode_bitmap;
2546        ext4_fsblk_t inode_table;
2547        int flexbg_flag = 0;
2548        ext4_group_t i, grp = sbi->s_groups_count;
2549
2550        if (ext4_has_feature_flex_bg(sb))
2551                flexbg_flag = 1;
2552
2553        ext4_debug("Checking group descriptors");
2554
2555        for (i = 0; i < sbi->s_groups_count; i++) {
2556                struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
2557
2558                if (i == sbi->s_groups_count - 1 || flexbg_flag)
2559                        last_block = ext4_blocks_count(sbi->s_es) - 1;
2560                else
2561                        last_block = first_block +
2562                                (EXT4_BLOCKS_PER_GROUP(sb) - 1);
2563
2564                if ((grp == sbi->s_groups_count) &&
2565                   !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2566                        grp = i;
2567
2568                block_bitmap = ext4_block_bitmap(sb, gdp);
2569                if (block_bitmap == sb_block) {
2570                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2571                                 "Block bitmap for group %u overlaps "
2572                                 "superblock", i);
2573                        if (!sb_rdonly(sb))
2574                                return 0;
2575                }
2576                if (block_bitmap >= sb_block + 1 &&
2577                    block_bitmap <= last_bg_block) {
2578                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2579                                 "Block bitmap for group %u overlaps "
2580                                 "block group descriptors", i);
2581                        if (!sb_rdonly(sb))
2582                                return 0;
2583                }
2584                if (block_bitmap < first_block || block_bitmap > last_block) {
2585                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2586                               "Block bitmap for group %u not in group "
2587                               "(block %llu)!", i, block_bitmap);
2588                        return 0;
2589                }
2590                inode_bitmap = ext4_inode_bitmap(sb, gdp);
2591                if (inode_bitmap == sb_block) {
2592                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2593                                 "Inode bitmap for group %u overlaps "
2594                                 "superblock", i);
2595                        if (!sb_rdonly(sb))
2596                                return 0;
2597                }
2598                if (inode_bitmap >= sb_block + 1 &&
2599                    inode_bitmap <= last_bg_block) {
2600                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2601                                 "Inode bitmap for group %u overlaps "
2602                                 "block group descriptors", i);
2603                        if (!sb_rdonly(sb))
2604                                return 0;
2605                }
2606                if (inode_bitmap < first_block || inode_bitmap > last_block) {
2607                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2608                               "Inode bitmap for group %u not in group "
2609                               "(block %llu)!", i, inode_bitmap);
2610                        return 0;
2611                }
2612                inode_table = ext4_inode_table(sb, gdp);
2613                if (inode_table == sb_block) {
2614                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2615                                 "Inode table for group %u overlaps "
2616                                 "superblock", i);
2617                        if (!sb_rdonly(sb))
2618                                return 0;
2619                }
2620                if (inode_table >= sb_block + 1 &&
2621                    inode_table <= last_bg_block) {
2622                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2623                                 "Inode table for group %u overlaps "
2624                                 "block group descriptors", i);
2625                        if (!sb_rdonly(sb))
2626                                return 0;
2627                }
2628                if (inode_table < first_block ||
2629                    inode_table + sbi->s_itb_per_group - 1 > last_block) {
2630                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2631                               "Inode table for group %u not in group "
2632                               "(block %llu)!", i, inode_table);
2633                        return 0;
2634                }
2635                ext4_lock_group(sb, i);
2636                if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
2637                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2638                                 "Checksum for group %u failed (%u!=%u)",
2639                                 i, le16_to_cpu(ext4_group_desc_csum(sb, i,
2640                                     gdp)), le16_to_cpu(gdp->bg_checksum));
2641                        if (!sb_rdonly(sb)) {
2642                                ext4_unlock_group(sb, i);
2643                                return 0;
2644                        }
2645                }
2646                ext4_unlock_group(sb, i);
2647                if (!flexbg_flag)
2648                        first_block += EXT4_BLOCKS_PER_GROUP(sb);
2649        }
2650        if (NULL != first_not_zeroed)
2651                *first_not_zeroed = grp;
2652        return 1;
2653}
2654
2655/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
2656 * the superblock) which were deleted from all directories, but held open by
2657 * a process at the time of a crash.  We walk the list and try to delete these
2658 * inodes at recovery time (only with a read-write filesystem).
2659 *
2660 * In order to keep the orphan inode chain consistent during traversal (in
2661 * case of crash during recovery), we link each inode into the superblock
2662 * orphan list_head and handle it the same way as an inode deletion during
2663 * normal operation (which journals the operations for us).
2664 *
2665 * We only do an iget() and an iput() on each inode, which is very safe if we
2666 * accidentally point at an in-use or already deleted inode.  The worst that
2667 * can happen in this case is that we get a "bit already cleared" message from
2668 * ext4_free_inode().  The only reason we would point at a wrong inode is if
2669 * e2fsck was run on this filesystem, and it must have already done the orphan
2670 * inode cleanup for us, so we can safely abort without any further action.
2671 */
2672static void ext4_orphan_cleanup(struct super_block *sb,
2673                                struct ext4_super_block *es)
2674{
2675        unsigned int s_flags = sb->s_flags;
2676        int ret, nr_orphans = 0, nr_truncates = 0;
2677#ifdef CONFIG_QUOTA
2678        int quota_update = 0;
2679        int i;
2680#endif
2681        if (!es->s_last_orphan) {
2682                jbd_debug(4, "no orphan inodes to clean up\n");
2683                return;
2684        }
2685
2686        if (bdev_read_only(sb->s_bdev)) {
2687                ext4_msg(sb, KERN_ERR, "write access "
2688                        "unavailable, skipping orphan cleanup");
2689                return;
2690        }
2691
2692        /* Check if feature set would not allow a r/w mount */
2693        if (!ext4_feature_set_ok(sb, 0)) {
2694                ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
2695                         "unknown ROCOMPAT features");
2696                return;
2697        }
2698
2699        if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2700                /* don't clear list on RO mount w/ errors */
2701                if (es->s_last_orphan && !(s_flags & SB_RDONLY)) {
2702                        ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
2703                                  "clearing orphan list.\n");
2704                        es->s_last_orphan = 0;
2705                }
2706                jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
2707                return;
2708        }
2709
2710        if (s_flags & SB_RDONLY) {
2711                ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
2712                sb->s_flags &= ~SB_RDONLY;
2713        }
2714#ifdef CONFIG_QUOTA
2715        /* Needed for iput() to work correctly and not trash data */
2716        sb->s_flags |= SB_ACTIVE;
2717
2718        /*
2719         * Turn on quotas which were not enabled for read-only mounts if
2720         * filesystem has quota feature, so that they are updated correctly.
2721         */
2722        if (ext4_has_feature_quota(sb) && (s_flags & SB_RDONLY)) {
2723                int ret = ext4_enable_quotas(sb);
2724
2725                if (!ret)
2726                        quota_update = 1;
2727                else
2728                        ext4_msg(sb, KERN_ERR,
2729                                "Cannot turn on quotas: error %d", ret);
2730        }
2731
2732        /* Turn on journaled quotas used for old sytle */
2733        for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2734                if (EXT4_SB(sb)->s_qf_names[i]) {
2735                        int ret = ext4_quota_on_mount(sb, i);
2736
2737                        if (!ret)
2738                                quota_update = 1;
2739                        else
2740                                ext4_msg(sb, KERN_ERR,
2741                                        "Cannot turn on journaled "
2742                                        "quota: type %d: error %d", i, ret);
2743                }
2744        }
2745#endif
2746
2747        while (es->s_last_orphan) {
2748                struct inode *inode;
2749
2750                /*
2751                 * We may have encountered an error during cleanup; if
2752                 * so, skip the rest.
2753                 */
2754                if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2755                        jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
2756                        es->s_last_orphan = 0;
2757                        break;
2758                }
2759
2760                inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
2761                if (IS_ERR(inode)) {
2762                        es->s_last_orphan = 0;
2763                        break;
2764                }
2765
2766                list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
2767                dquot_initialize(inode);
2768                if (inode->i_nlink) {
2769                        if (test_opt(sb, DEBUG))
2770                                ext4_msg(sb, KERN_DEBUG,
2771                                        "%s: truncating inode %lu to %lld bytes",
2772                                        __func__, inode->i_ino, inode->i_size);
2773                        jbd_debug(2, "truncating inode %lu to %lld bytes\n",
2774                                  inode->i_ino, inode->i_size);
2775                        inode_lock(inode);
2776                        truncate_inode_pages(inode->i_mapping, inode->i_size);
2777                        ret = ext4_truncate(inode);
2778                        if (ret)
2779                                ext4_std_error(inode->i_sb, ret);
2780                        inode_unlock(inode);
2781                        nr_truncates++;
2782                } else {
2783                        if (test_opt(sb, DEBUG))
2784                                ext4_msg(sb, KERN_DEBUG,
2785                                        "%s: deleting unreferenced inode %lu",
2786                                        __func__, inode->i_ino);
2787                        jbd_debug(2, "deleting unreferenced inode %lu\n",
2788                                  inode->i_ino);
2789                        nr_orphans++;
2790                }
2791                iput(inode);  /* The delete magic happens here! */
2792        }
2793
2794#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
2795
2796        if (nr_orphans)
2797                ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
2798                       PLURAL(nr_orphans));
2799        if (nr_truncates)
2800                ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
2801                       PLURAL(nr_truncates));
2802#ifdef CONFIG_QUOTA
2803        /* Turn off quotas if they were enabled for orphan cleanup */
2804        if (quota_update) {
2805                for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2806                        if (sb_dqopt(sb)->files[i])
2807                                dquot_quota_off(sb, i);
2808                }
2809        }
2810#endif
2811        sb->s_flags = s_flags; /* Restore SB_RDONLY status */
2812}
2813
2814/*
2815 * Maximal extent format file size.
2816 * Resulting logical blkno at s_maxbytes must fit in our on-disk
2817 * extent format containers, within a sector_t, and within i_blocks
2818 * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
2819 * so that won't be a limiting factor.
2820 *
2821 * However there is other limiting factor. We do store extents in the form
2822 * of starting block and length, hence the resulting length of the extent
2823 * covering maximum file size must fit into on-disk format containers as
2824 * well. Given that length is always by 1 unit bigger than max unit (because
2825 * we count 0 as well) we have to lower the s_maxbytes by one fs block.
2826 *
2827 * Note, this does *not* consider any metadata overhead for vfs i_blocks.
2828 */
2829static loff_t ext4_max_size(int blkbits, int has_huge_files)
2830{
2831        loff_t res;
2832        loff_t upper_limit = MAX_LFS_FILESIZE;
2833
2834        BUILD_BUG_ON(sizeof(blkcnt_t) < sizeof(u64));
2835
2836        if (!has_huge_files) {
2837                upper_limit = (1LL << 32) - 1;
2838
2839                /* total blocks in file system block size */
2840                upper_limit >>= (blkbits - 9);
2841                upper_limit <<= blkbits;
2842        }
2843
2844        /*
2845         * 32-bit extent-start container, ee_block. We lower the maxbytes
2846         * by one fs block, so ee_len can cover the extent of maximum file
2847         * size
2848         */
2849        res = (1LL << 32) - 1;
2850        res <<= blkbits;
2851
2852        /* Sanity check against vm- & vfs- imposed limits */
2853        if (res > upper_limit)
2854                res = upper_limit;
2855
2856        return res;
2857}
2858
2859/*
2860 * Maximal bitmap file size.  There is a direct, and {,double-,triple-}indirect
2861 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
2862 * We need to be 1 filesystem block less than the 2^48 sector limit.
2863 */
2864static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
2865{
2866        loff_t res = EXT4_NDIR_BLOCKS;
2867        int meta_blocks;
2868        loff_t upper_limit;
2869        /* This is calculated to be the largest file size for a dense, block
2870         * mapped file such that the file's total number of 512-byte sectors,
2871         * including data and all indirect blocks, does not exceed (2^48 - 1).
2872         *
2873         * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
2874         * number of 512-byte sectors of the file.
2875         */
2876
2877        if (!has_huge_files) {
2878                /*
2879                 * !has_huge_files or implies that the inode i_block field
2880                 * represents total file blocks in 2^32 512-byte sectors ==
2881                 * size of vfs inode i_blocks * 8
2882                 */
2883                upper_limit = (1LL << 32) - 1;
2884
2885                /* total blocks in file system block size */
2886                upper_limit >>= (bits - 9);
2887
2888        } else {
2889                /*
2890                 * We use 48 bit ext4_inode i_blocks
2891                 * With EXT4_HUGE_FILE_FL set the i_blocks
2892                 * represent total number of blocks in
2893                 * file system block size
2894                 */
2895                upper_limit = (1LL << 48) - 1;
2896
2897        }
2898
2899        /* indirect blocks */
2900        meta_blocks = 1;
2901        /* double indirect blocks */
2902        meta_blocks += 1 + (1LL << (bits-2));
2903        /* tripple indirect blocks */
2904        meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
2905
2906        upper_limit -= meta_blocks;
2907        upper_limit <<= bits;
2908
2909        res += 1LL << (bits-2);
2910        res += 1LL << (2*(bits-2));
2911        res += 1LL << (3*(bits-2));
2912        res <<= bits;
2913        if (res > upper_limit)
2914                res = upper_limit;
2915
2916        if (res > MAX_LFS_FILESIZE)
2917                res = MAX_LFS_FILESIZE;
2918
2919        return res;
2920}
2921
2922static ext4_fsblk_t descriptor_loc(struct super_block *sb,
2923                                   ext4_fsblk_t logical_sb_block, int nr)
2924{
2925        struct ext4_sb_info *sbi = EXT4_SB(sb);
2926        ext4_group_t bg, first_meta_bg;
2927        int has_super = 0;
2928
2929        first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
2930
2931        if (!ext4_has_feature_meta_bg(sb) || nr < first_meta_bg)
2932                return logical_sb_block + nr + 1;
2933        bg = sbi->s_desc_per_block * nr;
2934        if (ext4_bg_has_super(sb, bg))
2935                has_super = 1;
2936
2937        /*
2938         * If we have a meta_bg fs with 1k blocks, group 0's GDT is at
2939         * block 2, not 1.  If s_first_data_block == 0 (bigalloc is enabled
2940         * on modern mke2fs or blksize > 1k on older mke2fs) then we must
2941         * compensate.
2942         */
2943        if (sb->s_blocksize == 1024 && nr == 0 &&
2944            le32_to_cpu(sbi->s_es->s_first_data_block) == 0)
2945                has_super++;
2946
2947        return (has_super + ext4_group_first_block_no(sb, bg));
2948}
2949
2950/**
2951 * ext4_get_stripe_size: Get the stripe size.
2952 * @sbi: In memory super block info
2953 *
2954 * If we have specified it via mount option, then
2955 * use the mount option value. If the value specified at mount time is
2956 * greater than the blocks per group use the super block value.
2957 * If the super block value is greater than blocks per group return 0.
2958 * Allocator needs it be less than blocks per group.
2959 *
2960 */
2961static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
2962{
2963        unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
2964        unsigned long stripe_width =
2965                        le32_to_cpu(sbi->s_es->s_raid_stripe_width);
2966        int ret;
2967
2968        if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
2969                ret = sbi->s_stripe;
2970        else if (stripe_width && stripe_width <= sbi->s_blocks_per_group)
2971                ret = stripe_width;
2972        else if (stride && stride <= sbi->s_blocks_per_group)
2973                ret = stride;
2974        else
2975                ret = 0;
2976
2977        /*
2978         * If the stripe width is 1, this makes no sense and
2979         * we set it to 0 to turn off stripe handling code.
2980         */
2981        if (ret <= 1)
2982                ret = 0;
2983
2984        return ret;
2985}
2986
2987/*
2988 * Check whether this filesystem can be mounted based on
2989 * the features present and the RDONLY/RDWR mount requested.
2990 * Returns 1 if this filesystem can be mounted as requested,
2991 * 0 if it cannot be.
2992 */
2993static int ext4_feature_set_ok(struct super_block *sb, int readonly)
2994{
2995        if (ext4_has_unknown_ext4_incompat_features(sb)) {
2996                ext4_msg(sb, KERN_ERR,
2997                        "Couldn't mount because of "
2998                        "unsupported optional features (%x)",
2999                        (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
3000                        ~EXT4_FEATURE_INCOMPAT_SUPP));
3001                return 0;
3002        }
3003
3004#ifndef CONFIG_UNICODE
3005        if (ext4_has_feature_casefold(sb)) {
3006                ext4_msg(sb, KERN_ERR,
3007                         "Filesystem with casefold feature cannot be "
3008                         "mounted without CONFIG_UNICODE");
3009                return 0;
3010        }
3011#endif
3012
3013        if (readonly)
3014                return 1;
3015
3016        if (ext4_has_feature_readonly(sb)) {
3017                ext4_msg(sb, KERN_INFO, "filesystem is read-only");
3018                sb->s_flags |= SB_RDONLY;
3019                return 1;
3020        }
3021
3022        /* Check that feature set is OK for a read-write mount */
3023        if (ext4_has_unknown_ext4_ro_compat_features(sb)) {
3024                ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
3025                         "unsupported optional features (%x)",
3026                         (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
3027                                ~EXT4_FEATURE_RO_COMPAT_SUPP));
3028                return 0;
3029        }
3030        if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) {
3031                ext4_msg(sb, KERN_ERR,
3032                         "Can't support bigalloc feature without "
3033                         "extents feature\n");
3034                return 0;
3035        }
3036
3037#if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2)
3038        if (!readonly && (ext4_has_feature_quota(sb) ||
3039                          ext4_has_feature_project(sb))) {
3040                ext4_msg(sb, KERN_ERR,
3041                         "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2");
3042                return 0;
3043        }
3044#endif  /* CONFIG_QUOTA */
3045        return 1;
3046}
3047
3048/*
3049 * This function is called once a day if we have errors logged
3050 * on the file system
3051 */
3052static void print_daily_error_info(struct timer_list *t)
3053{
3054        struct ext4_sb_info *sbi = from_timer(sbi, t, s_err_report);
3055        struct super_block *sb = sbi->s_sb;
3056        struct ext4_super_block *es = sbi->s_es;
3057
3058        if (es->s_error_count)
3059                /* fsck newer than v1.41.13 is needed to clean this condition. */
3060                ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
3061                         le32_to_cpu(es->s_error_count));
3062        if (es->s_first_error_time) {
3063                printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %llu: %.*s:%d",
3064                       sb->s_id,
3065                       ext4_get_tstamp(es, s_first_error_time),
3066                       (int) sizeof(es->s_first_error_func),
3067                       es->s_first_error_func,
3068                       le32_to_cpu(es->s_first_error_line));
3069                if (es->s_first_error_ino)
3070                        printk(KERN_CONT ": inode %u",
3071                               le32_to_cpu(es->s_first_error_ino));
3072                if (es->s_first_error_block)
3073                        printk(KERN_CONT ": block %llu", (unsigned long long)
3074                               le64_to_cpu(es->s_first_error_block));
3075                printk(KERN_CONT "\n");
3076        }
3077        if (es->s_last_error_time) {
3078                printk(KERN_NOTICE "EXT4-fs (%s): last error at time %llu: %.*s:%d",
3079                       sb->s_id,
3080                       ext4_get_tstamp(es, s_last_error_time),
3081                       (int) sizeof(es->s_last_error_func),
3082                       es->s_last_error_func,
3083                       le32_to_cpu(es->s_last_error_line));
3084                if (es->s_last_error_ino)
3085                        printk(KERN_CONT ": inode %u",
3086                               le32_to_cpu(es->s_last_error_ino));
3087                if (es->s_last_error_block)
3088                        printk(KERN_CONT ": block %llu", (unsigned long long)
3089                               le64_to_cpu(es->s_last_error_block));
3090                printk(KERN_CONT "\n");
3091        }
3092        mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
3093}
3094
3095/* Find next suitable group and run ext4_init_inode_table */
3096static int ext4_run_li_request(struct ext4_li_request *elr)
3097{
3098        struct ext4_group_desc *gdp = NULL;
3099        ext4_group_t group, ngroups;
3100        struct super_block *sb;
3101        unsigned long timeout = 0;
3102        int ret = 0;
3103
3104        sb = elr->lr_super;
3105        ngroups = EXT4_SB(sb)->s_groups_count;
3106
3107        for (group = elr->lr_next_group; group < ngroups; group++) {
3108                gdp = ext4_get_group_desc(sb, group, NULL);
3109                if (!gdp) {
3110                        ret = 1;
3111                        break;
3112                }
3113
3114                if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3115                        break;
3116        }
3117
3118        if (group >= ngroups)
3119                ret = 1;
3120
3121        if (!ret) {
3122                timeout = jiffies;
3123                ret = ext4_init_inode_table(sb, group,
3124                                            elr->lr_timeout ? 0 : 1);
3125                if (elr->lr_timeout == 0) {
3126                        timeout = (jiffies - timeout) *
3127                                  elr->lr_sbi->s_li_wait_mult;
3128                        elr->lr_timeout = timeout;
3129                }
3130                elr->lr_next_sched = jiffies + elr->lr_timeout;
3131                elr->lr_next_group = group + 1;
3132        }
3133        return ret;
3134}
3135
3136/*
3137 * Remove lr_request from the list_request and free the
3138 * request structure. Should be called with li_list_mtx held
3139 */
3140static void ext4_remove_li_request(struct ext4_li_request *elr)
3141{
3142        struct ext4_sb_info *sbi;
3143
3144        if (!elr)
3145                return;
3146
3147        sbi = elr->lr_sbi;
3148
3149        list_del(&elr->lr_request);
3150        sbi->s_li_request = NULL;
3151        kfree(elr);
3152}
3153
3154static void ext4_unregister_li_request(struct super_block *sb)
3155{
3156        mutex_lock(&ext4_li_mtx);
3157        if (!ext4_li_info) {
3158                mutex_unlock(&ext4_li_mtx);
3159                return;
3160        }
3161
3162        mutex_lock(&ext4_li_info->li_list_mtx);
3163        ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
3164        mutex_unlock(&ext4_li_info->li_list_mtx);
3165        mutex_unlock(&ext4_li_mtx);
3166}
3167
3168static struct task_struct *ext4_lazyinit_task;
3169
3170/*
3171 * This is the function where ext4lazyinit thread lives. It walks
3172 * through the request list searching for next scheduled filesystem.
3173 * When such a fs is found, run the lazy initialization request
3174 * (ext4_rn_li_request) and keep track of the time spend in this
3175 * function. Based on that time we compute next schedule time of
3176 * the request. When walking through the list is complete, compute
3177 * next waking time and put itself into sleep.
3178 */
3179static int ext4_lazyinit_thread(void *arg)
3180{
3181        struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
3182        struct list_head *pos, *n;
3183        struct ext4_li_request *elr;
3184        unsigned long next_wakeup, cur;
3185
3186        BUG_ON(NULL == eli);
3187
3188cont_thread:
3189        while (true) {
3190                next_wakeup = MAX_JIFFY_OFFSET;
3191
3192                mutex_lock(&eli->li_list_mtx);
3193                if (list_empty(&eli->li_request_list)) {
3194                        mutex_unlock(&eli->li_list_mtx);
3195                        goto exit_thread;
3196                }
3197                list_for_each_safe(pos, n, &eli->li_request_list) {
3198                        int err = 0;
3199                        int progress = 0;
3200                        elr = list_entry(pos, struct ext4_li_request,
3201                                         lr_request);
3202
3203                        if (time_before(jiffies, elr->lr_next_sched)) {
3204                                if (time_before(elr->lr_next_sched, next_wakeup))
3205                                        next_wakeup = elr->lr_next_sched;
3206                                continue;
3207                        }
3208                        if (down_read_trylock(&elr->lr_super->s_umount)) {
3209                                if (sb_start_write_trylock(elr->lr_super)) {
3210                                        progress = 1;
3211                                        /*
3212                                         * We hold sb->s_umount, sb can not
3213                                         * be removed from the list, it is
3214                                         * now safe to drop li_list_mtx
3215                                         */
3216                                        mutex_unlock(&eli->li_list_mtx);
3217                                        err = ext4_run_li_request(elr);
3218                                        sb_end_write(elr->lr_super);
3219                                        mutex_lock(&eli->li_list_mtx);
3220                                        n = pos->next;
3221                                }
3222                                up_read((&elr->lr_super->s_umount));
3223                        }
3224                        /* error, remove the lazy_init job */
3225                        if (err) {
3226                                ext4_remove_li_request(elr);
3227                                continue;
3228                        }
3229                        if (!progress) {
3230                                elr->lr_next_sched = jiffies +
3231                                        (prandom_u32()
3232                                         % (EXT4_DEF_LI_MAX_START_DELAY * HZ));
3233                        }
3234                        if (time_before(elr->lr_next_sched, next_wakeup))
3235                                next_wakeup = elr->lr_next_sched;
3236                }
3237                mutex_unlock(&eli->li_list_mtx);
3238
3239                try_to_freeze();
3240
3241                cur = jiffies;
3242                if ((time_after_eq(cur, next_wakeup)) ||
3243                    (MAX_JIFFY_OFFSET == next_wakeup)) {
3244                        cond_resched();
3245                        continue;
3246                }
3247
3248                schedule_timeout_interruptible(next_wakeup - cur);
3249
3250                if (kthread_should_stop()) {
3251                        ext4_clear_request_list();
3252                        goto exit_thread;
3253                }
3254        }
3255
3256exit_thread:
3257        /*
3258         * It looks like the request list is empty, but we need
3259         * to check it under the li_list_mtx lock, to prevent any
3260         * additions into it, and of course we should lock ext4_li_mtx
3261         * to atomically free the list and ext4_li_info, because at
3262         * this point another ext4 filesystem could be registering
3263         * new one.
3264         */
3265        mutex_lock(&ext4_li_mtx);
3266        mutex_lock(&eli->li_list_mtx);
3267        if (!list_empty(&eli->li_request_list)) {
3268                mutex_unlock(&eli->li_list_mtx);
3269                mutex_unlock(&ext4_li_mtx);
3270                goto cont_thread;
3271        }
3272        mutex_unlock(&eli->li_list_mtx);
3273        kfree(ext4_li_info);
3274        ext4_li_info = NULL;
3275        mutex_unlock(&ext4_li_mtx);
3276
3277        return 0;
3278}
3279
3280static void ext4_clear_request_list(void)
3281{
3282        struct list_head *pos, *n;
3283        struct ext4_li_request *elr;
3284
3285        mutex_lock(&ext4_li_info->li_list_mtx);
3286        list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
3287                elr = list_entry(pos, struct ext4_li_request,
3288                                 lr_request);
3289                ext4_remove_li_request(elr);
3290        }
3291        mutex_unlock(&ext4_li_info->li_list_mtx);
3292}
3293
3294static int ext4_run_lazyinit_thread(void)
3295{
3296        ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
3297                                         ext4_li_info, "ext4lazyinit");
3298        if (IS_ERR(ext4_lazyinit_task)) {
3299                int err = PTR_ERR(ext4_lazyinit_task);
3300                ext4_clear_request_list();
3301                kfree(ext4_li_info);
3302                ext4_li_info = NULL;
3303                printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
3304                                 "initialization thread\n",
3305                                 err);
3306                return err;
3307        }
3308        ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
3309        return 0;
3310}
3311
3312/*
3313 * Check whether it make sense to run itable init. thread or not.
3314 * If there is at least one uninitialized inode table, return
3315 * corresponding group number, else the loop goes through all
3316 * groups and return total number of groups.
3317 */
3318static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
3319{
3320        ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
3321        struct ext4_group_desc *gdp = NULL;
3322
3323        if (!ext4_has_group_desc_csum(sb))
3324                return ngroups;
3325
3326        for (group = 0; group < ngroups; group++) {
3327                gdp = ext4_get_group_desc(sb, group, NULL);
3328                if (!gdp)
3329                        continue;
3330
3331                if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3332                        break;
3333        }
3334
3335        return group;
3336}
3337
3338static int ext4_li_info_new(void)
3339{
3340        struct ext4_lazy_init *eli = NULL;
3341
3342        eli = kzalloc(sizeof(*eli), GFP_KERNEL);
3343        if (!eli)
3344                return -ENOMEM;
3345
3346        INIT_LIST_HEAD(&eli->li_request_list);
3347        mutex_init(&eli->li_list_mtx);
3348
3349        eli->li_state |= EXT4_LAZYINIT_QUIT;
3350
3351        ext4_li_info = eli;
3352
3353        return 0;
3354}
3355
3356static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
3357                                            ext4_group_t start)
3358{
3359        struct ext4_sb_info *sbi = EXT4_SB(sb);
3360        struct ext4_li_request *elr;
3361
3362        elr = kzalloc(sizeof(*elr), GFP_KERNEL);
3363        if (!elr)
3364                return NULL;
3365
3366        elr->lr_super = sb;
3367        elr->lr_sbi = sbi;
3368        elr->lr_next_group = start;
3369
3370        /*
3371         * Randomize first schedule time of the request to
3372         * spread the inode table initialization requests
3373         * better.
3374         */
3375        elr->lr_next_sched = jiffies + (prandom_u32() %
3376                                (EXT4_DEF_LI_MAX_START_DELAY * HZ));
3377        return elr;
3378}
3379
3380int ext4_register_li_request(struct super_block *sb,
3381                             ext4_group_t first_not_zeroed)
3382{
3383        struct ext4_sb_info *sbi = EXT4_SB(sb);
3384        struct ext4_li_request *elr = NULL;
3385        ext4_group_t ngroups = sbi->s_groups_count;
3386        int ret = 0;
3387
3388        mutex_lock(&ext4_li_mtx);
3389        if (sbi->s_li_request != NULL) {
3390                /*
3391                 * Reset timeout so it can be computed again, because
3392                 * s_li_wait_mult might have changed.
3393                 */
3394                sbi->s_li_request->lr_timeout = 0;
3395                goto out;
3396        }
3397
3398        if (first_not_zeroed == ngroups || sb_rdonly(sb) ||
3399            !test_opt(sb, INIT_INODE_TABLE))
3400                goto out;
3401
3402        elr = ext4_li_request_new(sb, first_not_zeroed);
3403        if (!elr) {
3404                ret = -ENOMEM;
3405                goto out;
3406        }
3407
3408        if (NULL == ext4_li_info) {
3409                ret = ext4_li_info_new();
3410                if (ret)
3411                        goto out;
3412        }
3413
3414        mutex_lock(&ext4_li_info->li_list_mtx);
3415        list_add(&elr->lr_request, &ext4_li_info->li_request_list);
3416        mutex_unlock(&ext4_li_info->li_list_mtx);
3417
3418        sbi->s_li_request = elr;
3419        /*
3420         * set elr to NULL here since it has been inserted to
3421         * the request_list and the removal and free of it is
3422         * handled by ext4_clear_request_list from now on.
3423         */
3424        elr = NULL;
3425
3426        if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
3427                ret = ext4_run_lazyinit_thread();
3428                if (ret)
3429                        goto out;
3430        }
3431out:
3432        mutex_unlock(&ext4_li_mtx);
3433        if (ret)
3434                kfree(elr);
3435        return ret;
3436}
3437
3438/*
3439 * We do not need to lock anything since this is called on
3440 * module unload.
3441 */
3442static void ext4_destroy_lazyinit_thread(void)
3443{
3444        /*
3445         * If thread exited earlier
3446         * there's nothing to be done.
3447         */
3448        if (!ext4_li_info || !ext4_lazyinit_task)
3449                return;
3450
3451        kthread_stop(ext4_lazyinit_task);
3452}
3453
3454static int set_journal_csum_feature_set(struct super_block *sb)
3455{
3456        int ret = 1;
3457        int compat, incompat;
3458        struct ext4_sb_info *sbi = EXT4_SB(sb);
3459
3460        if (ext4_has_metadata_csum(sb)) {
3461                /* journal checksum v3 */
3462                compat = 0;
3463                incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
3464        } else {
3465                /* journal checksum v1 */
3466                compat = JBD2_FEATURE_COMPAT_CHECKSUM;
3467                incompat = 0;
3468        }
3469
3470        jbd2_journal_clear_features(sbi->s_journal,
3471                        JBD2_FEATURE_COMPAT_CHECKSUM, 0,
3472                        JBD2_FEATURE_INCOMPAT_CSUM_V3 |
3473                        JBD2_FEATURE_INCOMPAT_CSUM_V2);
3474        if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
3475                ret = jbd2_journal_set_features(sbi->s_journal,
3476                                compat, 0,
3477                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
3478                                incompat);
3479        } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
3480                ret = jbd2_journal_set_features(sbi->s_journal,
3481                                compat, 0,
3482                                incompat);
3483                jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3484                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3485        } else {
3486                jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3487                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3488        }
3489
3490        return ret;
3491}
3492
3493/*
3494 * Note: calculating the overhead so we can be compatible with
3495 * historical BSD practice is quite difficult in the face of
3496 * clusters/bigalloc.  This is because multiple metadata blocks from
3497 * different block group can end up in the same allocation cluster.
3498 * Calculating the exact overhead in the face of clustered allocation
3499 * requires either O(all block bitmaps) in memory or O(number of block
3500 * groups**2) in time.  We will still calculate the superblock for
3501 * older file systems --- and if we come across with a bigalloc file
3502 * system with zero in s_overhead_clusters the estimate will be close to
3503 * correct especially for very large cluster sizes --- but for newer
3504 * file systems, it's better to calculate this figure once at mkfs
3505 * time, and store it in the superblock.  If the superblock value is
3506 * present (even for non-bigalloc file systems), we will use it.
3507 */
3508static int count_overhead(struct super_block *sb, ext4_group_t grp,
3509                          char *buf)
3510{
3511        struct ext4_sb_info     *sbi = EXT4_SB(sb);
3512        struct ext4_group_desc  *gdp;
3513        ext4_fsblk_t            first_block, last_block, b;
3514        ext4_group_t            i, ngroups = ext4_get_groups_count(sb);
3515        int                     s, j, count = 0;
3516
3517        if (!ext4_has_feature_bigalloc(sb))
3518                return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
3519                        sbi->s_itb_per_group + 2);
3520
3521        first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
3522                (grp * EXT4_BLOCKS_PER_GROUP(sb));
3523        last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
3524        for (i = 0; i < ngroups; i++) {
3525                gdp = ext4_get_group_desc(sb, i, NULL);
3526                b = ext4_block_bitmap(sb, gdp);
3527                if (b >= first_block && b <= last_block) {
3528                        ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3529                        count++;
3530                }
3531                b = ext4_inode_bitmap(sb, gdp);
3532                if (b >= first_block && b <= last_block) {
3533                        ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3534                        count++;
3535                }
3536                b = ext4_inode_table(sb, gdp);
3537                if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
3538                        for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
3539                                int c = EXT4_B2C(sbi, b - first_block);
3540                                ext4_set_bit(c, buf);
3541                                count++;
3542                        }
3543                if (i != grp)
3544                        continue;
3545                s = 0;
3546                if (ext4_bg_has_super(sb, grp)) {
3547                        ext4_set_bit(s++, buf);
3548                        count++;
3549                }
3550                j = ext4_bg_num_gdb(sb, grp);
3551                if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
3552                        ext4_error(sb, "Invalid number of block group "
3553                                   "descriptor blocks: %d", j);
3554                        j = EXT4_BLOCKS_PER_GROUP(sb) - s;
3555                }
3556                count += j;
3557                for (; j > 0; j--)
3558                        ext4_set_bit(EXT4_B2C(sbi, s++), buf);
3559        }
3560        if (!count)
3561                return 0;
3562        return EXT4_CLUSTERS_PER_GROUP(sb) -
3563                ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
3564}
3565
3566/*
3567 * Compute the overhead and stash it in sbi->s_overhead
3568 */
3569int ext4_calculate_overhead(struct super_block *sb)
3570{
3571        struct ext4_sb_info *sbi = EXT4_SB(sb);
3572        struct ext4_super_block *es = sbi->s_es;
3573        struct inode *j_inode;
3574        unsigned int j_blocks, j_inum = le32_to_cpu(es->s_journal_inum);
3575        ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3576        ext4_fsblk_t overhead = 0;
3577        char *buf = (char *) get_zeroed_page(GFP_NOFS);
3578
3579        if (!buf)
3580                return -ENOMEM;
3581
3582        /*
3583         * Compute the overhead (FS structures).  This is constant
3584         * for a given filesystem unless the number of block groups
3585         * changes so we cache the previous value until it does.
3586         */
3587
3588        /*
3589         * All of the blocks before first_data_block are overhead
3590         */
3591        overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
3592
3593        /*
3594         * Add the overhead found in each block group
3595         */
3596        for (i = 0; i < ngroups; i++) {
3597                int blks;
3598
3599                blks = count_overhead(sb, i, buf);
3600                overhead += blks;
3601                if (blks)
3602                        memset(buf, 0, PAGE_SIZE);
3603                cond_resched();
3604        }
3605
3606        /*
3607         * Add the internal journal blocks whether the journal has been
3608         * loaded or not
3609         */
3610        if (sbi->s_journal && !sbi->journal_bdev)
3611                overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
3612        else if (ext4_has_feature_journal(sb) && !sbi->s_journal) {
3613                j_inode = ext4_get_journal_inode(sb, j_inum);
3614                if (j_inode) {
3615                        j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
3616                        overhead += EXT4_NUM_B2C(sbi, j_blocks);
3617                        iput(j_inode);
3618                } else {
3619                        ext4_msg(sb, KERN_ERR, "can't get journal size");
3620                }
3621        }
3622        sbi->s_overhead = overhead;
3623        smp_wmb();
3624        free_page((unsigned long) buf);
3625        return 0;
3626}
3627
3628static void ext4_set_resv_clusters(struct super_block *sb)
3629{
3630        ext4_fsblk_t resv_clusters;
3631        struct ext4_sb_info *sbi = EXT4_SB(sb);
3632
3633        /*
3634         * There's no need to reserve anything when we aren't using extents.
3635         * The space estimates are exact, there are no unwritten extents,
3636         * hole punching doesn't need new metadata... This is needed especially
3637         * to keep ext2/3 backward compatibility.
3638         */
3639        if (!ext4_has_feature_extents(sb))
3640                return;
3641        /*
3642         * By default we reserve 2% or 4096 clusters, whichever is smaller.
3643         * This should cover the situations where we can not afford to run
3644         * out of space like for example punch hole, or converting
3645         * unwritten extents in delalloc path. In most cases such
3646         * allocation would require 1, or 2 blocks, higher numbers are
3647         * very rare.
3648         */
3649        resv_clusters = (ext4_blocks_count(sbi->s_es) >>
3650                         sbi->s_cluster_bits);
3651
3652        do_div(resv_clusters, 50);
3653        resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
3654
3655        atomic64_set(&sbi->s_resv_clusters, resv_clusters);
3656}
3657
3658static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3659{
3660        struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
3661        char *orig_data = kstrdup(data, GFP_KERNEL);
3662        struct buffer_head *bh, **group_desc;
3663        struct ext4_super_block *es = NULL;
3664        struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
3665        struct flex_groups **flex_groups;
3666        ext4_fsblk_t block;
3667        ext4_fsblk_t sb_block = get_sb_block(&data);
3668        ext4_fsblk_t logical_sb_block;
3669        unsigned long offset = 0;
3670        unsigned long journal_devnum = 0;
3671        unsigned long def_mount_opts;
3672        struct inode *root;
3673        const char *descr;
3674        int ret = -ENOMEM;
3675        int blocksize, clustersize;
3676        unsigned int db_count;
3677        unsigned int i;
3678        int needs_recovery, has_huge_files, has_bigalloc;
3679        __u64 blocks_count;
3680        int err = 0;
3681        unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
3682        ext4_group_t first_not_zeroed;
3683
3684        if ((data && !orig_data) || !sbi)
3685                goto out_free_base;
3686
3687        sbi->s_daxdev = dax_dev;
3688        sbi->s_blockgroup_lock =
3689                kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
3690        if (!sbi->s_blockgroup_lock)
3691                goto out_free_base;
3692
3693        sb->s_fs_info = sbi;
3694        sbi->s_sb = sb;
3695        sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
3696        sbi->s_sb_block = sb_block;
3697        if (sb->s_bdev->bd_part)
3698                sbi->s_sectors_written_start =
3699                        part_stat_read(sb->s_bdev->bd_part, sectors[STAT_WRITE]);
3700
3701        /* Cleanup superblock name */
3702        strreplace(sb->s_id, '/', '!');
3703
3704        /* -EINVAL is default */
3705        ret = -EINVAL;
3706        blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
3707        if (!blocksize) {
3708                ext4_msg(sb, KERN_ERR, "unable to set blocksize");
3709                goto out_fail;
3710        }
3711
3712        /*
3713         * The ext4 superblock will not be buffer aligned for other than 1kB
3714         * block sizes.  We need to calculate the offset from buffer start.
3715         */
3716        if (blocksize != EXT4_MIN_BLOCK_SIZE) {
3717                logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
3718                offset = do_div(logical_sb_block, blocksize);
3719        } else {
3720                logical_sb_block = sb_block;
3721        }
3722
3723        if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) {
3724                ext4_msg(sb, KERN_ERR, "unable to read superblock");
3725                goto out_fail;
3726        }
3727        /*
3728         * Note: s_es must be initialized as soon as possible because
3729         *       some ext4 macro-instructions depend on its value
3730         */
3731        es = (struct ext4_super_block *) (bh->b_data + offset);
3732        sbi->s_es = es;
3733        sb->s_magic = le16_to_cpu(es->s_magic);
3734        if (sb->s_magic != EXT4_SUPER_MAGIC)
3735                goto cantfind_ext4;
3736        sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
3737
3738        /* Warn if metadata_csum and gdt_csum are both set. */
3739        if (ext4_has_feature_metadata_csum(sb) &&
3740            ext4_has_feature_gdt_csum(sb))
3741                ext4_warning(sb, "metadata_csum and uninit_bg are "
3742                             "redundant flags; please run fsck.");
3743
3744        /* Check for a known checksum algorithm */
3745        if (!ext4_verify_csum_type(sb, es)) {
3746                ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3747                         "unknown checksum algorithm.");
3748                silent = 1;
3749                goto cantfind_ext4;
3750        }
3751
3752        /* Load the checksum driver */
3753        sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
3754        if (IS_ERR(sbi->s_chksum_driver)) {
3755                ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
3756                ret = PTR_ERR(sbi->s_chksum_driver);
3757                sbi->s_chksum_driver = NULL;
3758                goto failed_mount;
3759        }
3760
3761        /* Check superblock checksum */
3762        if (!ext4_superblock_csum_verify(sb, es)) {
3763                ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3764                         "invalid superblock checksum.  Run e2fsck?");
3765                silent = 1;
3766                ret = -EFSBADCRC;
3767                goto cantfind_ext4;
3768        }
3769
3770        /* Precompute checksum seed for all metadata */
3771        if (ext4_has_feature_csum_seed(sb))
3772                sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
3773        else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb))
3774                sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
3775                                               sizeof(es->s_uuid));
3776
3777        /* Set defaults before we parse the mount options */
3778        def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
3779        set_opt(sb, INIT_INODE_TABLE);
3780        if (def_mount_opts & EXT4_DEFM_DEBUG)
3781                set_opt(sb, DEBUG);
3782        if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
3783                set_opt(sb, GRPID);
3784        if (def_mount_opts & EXT4_DEFM_UID16)
3785                set_opt(sb, NO_UID32);
3786        /* xattr user namespace & acls are now defaulted on */
3787        set_opt(sb, XATTR_USER);
3788        set_opt(sb, DIOREAD_NOLOCK);
3789#ifdef CONFIG_EXT4_FS_POSIX_ACL
3790        set_opt(sb, POSIX_ACL);
3791#endif
3792        /* don't forget to enable journal_csum when metadata_csum is enabled. */
3793        if (ext4_has_metadata_csum(sb))
3794                set_opt(sb, JOURNAL_CHECKSUM);
3795
3796        if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
3797                set_opt(sb, JOURNAL_DATA);
3798        else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
3799                set_opt(sb, ORDERED_DATA);
3800        else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
3801                set_opt(sb, WRITEBACK_DATA);
3802
3803        if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
3804                set_opt(sb, ERRORS_PANIC);
3805        else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
3806                set_opt(sb, ERRORS_CONT);
3807        else
3808                set_opt(sb, ERRORS_RO);
3809        /* block_validity enabled by default; disable with noblock_validity */
3810        set_opt(sb, BLOCK_VALIDITY);
3811        if (def_mount_opts & EXT4_DEFM_DISCARD)
3812                set_opt(sb, DISCARD);
3813
3814        sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
3815        sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
3816        sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
3817        sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
3818        sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
3819
3820        if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
3821                set_opt(sb, BARRIER);
3822
3823        /*
3824         * enable delayed allocation by default
3825         * Use -o nodelalloc to turn it off
3826         */
3827        if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
3828            ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
3829                set_opt(sb, DELALLOC);
3830
3831        /*
3832         * set default s_li_wait_mult for lazyinit, for the case there is
3833         * no mount option specified.
3834         */
3835        sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
3836
3837        blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
3838        if (blocksize < EXT4_MIN_BLOCK_SIZE ||
3839            blocksize > EXT4_MAX_BLOCK_SIZE) {
3840                ext4_msg(sb, KERN_ERR,
3841                       "Unsupported filesystem blocksize %d (%d log_block_size)",
3842                         blocksize, le32_to_cpu(es->s_log_block_size));
3843                goto failed_mount;
3844        }
3845
3846        if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
3847                sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
3848                sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
3849        } else {
3850                sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
3851                sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
3852                if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
3853                        ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
3854                                 sbi->s_first_ino);
3855                        goto failed_mount;
3856                }
3857                if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
3858                    (!is_power_of_2(sbi->s_inode_size)) ||
3859                    (sbi->s_inode_size > blocksize)) {
3860                        ext4_msg(sb, KERN_ERR,
3861                               "unsupported inode size: %d",
3862                               sbi->s_inode_size);
3863                        ext4_msg(sb, KERN_ERR, "blocksize: %d", blocksize);
3864                        goto failed_mount;
3865                }
3866                /*
3867                 * i_atime_extra is the last extra field available for
3868                 * [acm]times in struct ext4_inode. Checking for that
3869                 * field should suffice to ensure we have extra space
3870                 * for all three.
3871                 */
3872                if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) +
3873                        sizeof(((struct ext4_inode *)0)->i_atime_extra)) {
3874                        sb->s_time_gran = 1;
3875                        sb->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
3876                } else {
3877                        sb->s_time_gran = NSEC_PER_SEC;
3878                        sb->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
3879                }
3880                sb->s_time_min = EXT4_TIMESTAMP_MIN;
3881        }
3882        if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
3883                sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
3884                        EXT4_GOOD_OLD_INODE_SIZE;
3885                if (ext4_has_feature_extra_isize(sb)) {
3886                        unsigned v, max = (sbi->s_inode_size -
3887                                           EXT4_GOOD_OLD_INODE_SIZE);
3888
3889                        v = le16_to_cpu(es->s_want_extra_isize);
3890                        if (v > max) {
3891                                ext4_msg(sb, KERN_ERR,
3892                                         "bad s_want_extra_isize: %d", v);
3893                                goto failed_mount;
3894                        }
3895                        if (sbi->s_want_extra_isize < v)
3896                                sbi->s_want_extra_isize = v;
3897
3898                        v = le16_to_cpu(es->s_min_extra_isize);
3899                        if (v > max) {
3900                                ext4_msg(sb, KERN_ERR,
3901                                         "bad s_min_extra_isize: %d", v);
3902                                goto failed_mount;
3903                        }
3904                        if (sbi->s_want_extra_isize < v)
3905                                sbi->s_want_extra_isize = v;
3906                }
3907        }
3908
3909        if (sbi->s_es->s_mount_opts[0]) {
3910                char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
3911                                              sizeof(sbi->s_es->s_mount_opts),
3912                                              GFP_KERNEL);
3913                if (!s_mount_opts)
3914                        goto failed_mount;
3915                if (!parse_options(s_mount_opts, sb, &journal_devnum,
3916                                   &journal_ioprio, 0)) {
3917                        ext4_msg(sb, KERN_WARNING,
3918                                 "failed to parse options in superblock: %s",
3919                                 s_mount_opts);
3920                }
3921                kfree(s_mount_opts);
3922        }
3923        sbi->s_def_mount_opt = sbi->s_mount_opt;
3924        if (!parse_options((char *) data, sb, &journal_devnum,
3925                           &journal_ioprio, 0))
3926                goto failed_mount;
3927
3928#ifdef CONFIG_UNICODE
3929        if (ext4_has_feature_casefold(sb) && !sbi->s_encoding) {
3930                const struct ext4_sb_encodings *encoding_info;
3931                struct unicode_map *encoding;
3932                __u16 encoding_flags;
3933
3934                if (ext4_has_feature_encrypt(sb)) {
3935                        ext4_msg(sb, KERN_ERR,
3936                                 "Can't mount with encoding and encryption");
3937                        goto failed_mount;
3938                }
3939
3940                if (ext4_sb_read_encoding(es, &encoding_info,
3941                                          &encoding_flags)) {
3942                        ext4_msg(sb, KERN_ERR,
3943                                 "Encoding requested by superblock is unknown");
3944                        goto failed_mount;
3945                }
3946
3947                encoding = utf8_load(encoding_info->version);
3948                if (IS_ERR(encoding)) {
3949                        ext4_msg(sb, KERN_ERR,
3950                                 "can't mount with superblock charset: %s-%s "
3951                                 "not supported by the kernel. flags: 0x%x.",
3952                                 encoding_info->name, encoding_info->version,
3953                                 encoding_flags);
3954                        goto failed_mount;
3955                }
3956                ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
3957                         "%s-%s with flags 0x%hx", encoding_info->name,
3958                         encoding_info->version?:"\b", encoding_flags);
3959
3960                sbi->s_encoding = encoding;
3961                sbi->s_encoding_flags = encoding_flags;
3962        }
3963#endif
3964
3965        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
3966                printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, and O_DIRECT support!\n");
3967                clear_opt(sb, DIOREAD_NOLOCK);
3968                if (test_opt2(sb, EXPLICIT_DELALLOC)) {
3969                        ext4_msg(sb, KERN_ERR, "can't mount with "
3970                                 "both data=journal and delalloc");
3971                        goto failed_mount;
3972                }
3973                if (test_opt(sb, DIOREAD_NOLOCK)) {
3974                        ext4_msg(sb, KERN_ERR, "can't mount with "
3975                                 "both data=journal and dioread_nolock");
3976                        goto failed_mount;
3977                }
3978                if (test_opt(sb, DAX)) {
3979                        ext4_msg(sb, KERN_ERR, "can't mount with "
3980                                 "both data=journal and dax");
3981                        goto failed_mount;
3982                }
3983                if (ext4_has_feature_encrypt(sb)) {
3984                        ext4_msg(sb, KERN_WARNING,
3985                                 "encrypted files will use data=ordered "
3986                                 "instead of data journaling mode");
3987                }
3988                if (test_opt(sb, DELALLOC))
3989                        clear_opt(sb, DELALLOC);
3990        } else {
3991                sb->s_iflags |= SB_I_CGROUPWB;
3992        }
3993
3994        sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
3995                (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
3996
3997        if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
3998            (ext4_has_compat_features(sb) ||
3999             ext4_has_ro_compat_features(sb) ||
4000             ext4_has_incompat_features(sb)))
4001                ext4_msg(sb, KERN_WARNING,
4002                       "feature flags set on rev 0 fs, "
4003                       "running e2fsck is recommended");
4004
4005        if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
4006                set_opt2(sb, HURD_COMPAT);
4007                if (ext4_has_feature_64bit(sb)) {
4008                        ext4_msg(sb, KERN_ERR,
4009                                 "The Hurd can't support 64-bit file systems");
4010                        goto failed_mount;
4011                }
4012
4013                /*
4014                 * ea_inode feature uses l_i_version field which is not
4015                 * available in HURD_COMPAT mode.
4016                 */
4017                if (ext4_has_feature_ea_inode(sb)) {
4018                        ext4_msg(sb, KERN_ERR,
4019                                 "ea_inode feature is not supported for Hurd");
4020                        goto failed_mount;
4021                }
4022        }
4023
4024        if (IS_EXT2_SB(sb)) {
4025                if (ext2_feature_set_ok(sb))
4026                        ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
4027                                 "using the ext4 subsystem");
4028                else {
4029                        /*
4030                         * If we're probing be silent, if this looks like
4031                         * it's actually an ext[34] filesystem.
4032                         */
4033                        if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4034                                goto failed_mount;
4035                        ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
4036                                 "to feature incompatibilities");
4037                        goto failed_mount;
4038                }
4039        }
4040
4041        if (IS_EXT3_SB(sb)) {
4042                if (ext3_feature_set_ok(sb))
4043                        ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
4044                                 "using the ext4 subsystem");
4045                else {
4046                        /*
4047                         * If we're probing be silent, if this looks like
4048                         * it's actually an ext4 filesystem.
4049                         */
4050                        if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4051                                goto failed_mount;
4052                        ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
4053                                 "to feature incompatibilities");
4054                        goto failed_mount;
4055                }
4056        }
4057
4058        /*
4059         * Check feature flags regardless of the revision level, since we
4060         * previously didn't change the revision level when setting the flags,
4061         * so there is a chance incompat flags are set on a rev 0 filesystem.
4062         */
4063        if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
4064                goto failed_mount;
4065
4066        if (le32_to_cpu(es->s_log_block_size) >
4067            (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
4068                ext4_msg(sb, KERN_ERR,
4069                         "Invalid log block size: %u",
4070                         le32_to_cpu(es->s_log_block_size));
4071                goto failed_mount;
4072        }
4073        if (le32_to_cpu(es->s_log_cluster_size) >
4074            (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
4075                ext4_msg(sb, KERN_ERR,
4076                         "Invalid log cluster size: %u",
4077                         le32_to_cpu(es->s_log_cluster_size));
4078                goto failed_mount;
4079        }
4080
4081        if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
4082                ext4_msg(sb, KERN_ERR,
4083                         "Number of reserved GDT blocks insanely large: %d",
4084                         le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
4085                goto failed_mount;
4086        }
4087
4088        if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
4089                if (ext4_has_feature_inline_data(sb)) {
4090                        ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
4091                                        " that may contain inline data");
4092                        goto failed_mount;
4093                }
4094                if (!bdev_dax_supported(sb->s_bdev, blocksize)) {
4095                        ext4_msg(sb, KERN_ERR,
4096                                "DAX unsupported by block device.");
4097                        goto failed_mount;
4098                }
4099        }
4100
4101        if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
4102                ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
4103                         es->s_encryption_level);
4104                goto failed_mount;
4105        }
4106
4107        if (sb->s_blocksize != blocksize) {
4108                /* Validate the filesystem blocksize */
4109                if (!sb_set_blocksize(sb, blocksize)) {
4110                        ext4_msg(sb, KERN_ERR, "bad block size %d",
4111                                        blocksize);
4112                        goto failed_mount;
4113                }
4114
4115                brelse(bh);
4116                logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
4117                offset = do_div(logical_sb_block, blocksize);
4118                bh = sb_bread_unmovable(sb, logical_sb_block);
4119                if (!bh) {
4120                        ext4_msg(sb, KERN_ERR,
4121                               "Can't read superblock on 2nd try");
4122                        goto failed_mount;
4123                }
4124                es = (struct ext4_super_block *)(bh->b_data + offset);
4125                sbi->s_es = es;
4126                if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
4127                        ext4_msg(sb, KERN_ERR,
4128                               "Magic mismatch, very weird!");
4129                        goto failed_mount;
4130                }
4131        }
4132
4133        has_huge_files = ext4_has_feature_huge_file(sb);
4134        sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
4135                                                      has_huge_files);
4136        sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
4137
4138        sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
4139        if (ext4_has_feature_64bit(sb)) {
4140                if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
4141                    sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
4142                    !is_power_of_2(sbi->s_desc_size)) {
4143                        ext4_msg(sb, KERN_ERR,
4144                               "unsupported descriptor size %lu",
4145                               sbi->s_desc_size);
4146                        goto failed_mount;
4147                }
4148        } else
4149                sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
4150
4151        sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
4152        sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
4153
4154        sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
4155        if (sbi->s_inodes_per_block == 0)
4156                goto cantfind_ext4;
4157        if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
4158            sbi->s_inodes_per_group > blocksize * 8) {
4159                ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
4160                         sbi->s_blocks_per_group);
4161                goto failed_mount;
4162        }
4163        sbi->s_itb_per_group = sbi->s_inodes_per_group /
4164                                        sbi->s_inodes_per_block;
4165        sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
4166        sbi->s_sbh = bh;
4167        sbi->s_mount_state = le16_to_cpu(es->s_state);
4168        sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
4169        sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
4170
4171        for (i = 0; i < 4; i++)
4172                sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
4173        sbi->s_def_hash_version = es->s_def_hash_version;
4174        if (ext4_has_feature_dir_index(sb)) {
4175                i = le32_to_cpu(es->s_flags);
4176                if (i & EXT2_FLAGS_UNSIGNED_HASH)
4177                        sbi->s_hash_unsigned = 3;
4178                else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
4179#ifdef __CHAR_UNSIGNED__
4180                        if (!sb_rdonly(sb))
4181                                es->s_flags |=
4182                                        cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
4183                        sbi->s_hash_unsigned = 3;
4184#else
4185                        if (!sb_rdonly(sb))
4186                                es->s_flags |=
4187                                        cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
4188#endif
4189                }
4190        }
4191
4192        /* Handle clustersize */
4193        clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
4194        has_bigalloc = ext4_has_feature_bigalloc(sb);
4195        if (has_bigalloc) {
4196                if (clustersize < blocksize) {
4197                        ext4_msg(sb, KERN_ERR,
4198                                 "cluster size (%d) smaller than "
4199                                 "block size (%d)", clustersize, blocksize);
4200                        goto failed_mount;
4201                }
4202                sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
4203                        le32_to_cpu(es->s_log_block_size);
4204                sbi->s_clusters_per_group =
4205                        le32_to_cpu(es->s_clusters_per_group);
4206                if (sbi->s_clusters_per_group > blocksize * 8) {
4207                        ext4_msg(sb, KERN_ERR,
4208                                 "#clusters per group too big: %lu",
4209                                 sbi->s_clusters_per_group);
4210                        goto failed_mount;
4211                }
4212                if (sbi->s_blocks_per_group !=
4213                    (sbi->s_clusters_per_group * (clustersize / blocksize))) {
4214                        ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
4215                                 "clusters per group (%lu) inconsistent",
4216                                 sbi->s_blocks_per_group,
4217                                 sbi->s_clusters_per_group);
4218                        goto failed_mount;
4219                }
4220        } else {
4221                if (clustersize != blocksize) {
4222                        ext4_msg(sb, KERN_ERR,
4223                                 "fragment/cluster size (%d) != "
4224                                 "block size (%d)", clustersize, blocksize);
4225                        goto failed_mount;
4226                }
4227                if (sbi->s_blocks_per_group > blocksize * 8) {
4228                        ext4_msg(sb, KERN_ERR,
4229                                 "#blocks per group too big: %lu",
4230                                 sbi->s_blocks_per_group);
4231                        goto failed_mount;
4232                }
4233                sbi->s_clusters_per_group = sbi->s_blocks_per_group;
4234                sbi->s_cluster_bits = 0;
4235        }
4236        sbi->s_cluster_ratio = clustersize / blocksize;
4237
4238        /* Do we have standard group size of clustersize * 8 blocks ? */
4239        if (sbi->s_blocks_per_group == clustersize << 3)
4240                set_opt2(sb, STD_GROUP_SIZE);
4241
4242        /*
4243         * Test whether we have more sectors than will fit in sector_t,
4244         * and whether the max offset is addressable by the page cache.
4245         */
4246        err = generic_check_addressable(sb->s_blocksize_bits,
4247                                        ext4_blocks_count(es));
4248        if (err) {
4249                ext4_msg(sb, KERN_ERR, "filesystem"
4250                         " too large to mount safely on this system");
4251                goto failed_mount;
4252        }
4253
4254        if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
4255                goto cantfind_ext4;
4256
4257        /* check blocks count against device size */
4258        blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
4259        if (blocks_count && ext4_blocks_count(es) > blocks_count) {
4260                ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
4261                       "exceeds size of device (%llu blocks)",
4262                       ext4_blocks_count(es), blocks_count);
4263                goto failed_mount;
4264        }
4265
4266        /*
4267         * It makes no sense for the first data block to be beyond the end
4268         * of the filesystem.
4269         */
4270        if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
4271                ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4272                         "block %u is beyond end of filesystem (%llu)",
4273                         le32_to_cpu(es->s_first_data_block),
4274                         ext4_blocks_count(es));
4275                goto failed_mount;
4276        }
4277        if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
4278            (sbi->s_cluster_ratio == 1)) {
4279                ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4280                         "block is 0 with a 1k block and cluster size");
4281                goto failed_mount;
4282        }
4283
4284        blocks_count = (ext4_blocks_count(es) -
4285                        le32_to_cpu(es->s_first_data_block) +
4286                        EXT4_BLOCKS_PER_GROUP(sb) - 1);
4287        do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
4288        if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
4289                ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
4290                       "(block count %llu, first data block %u, "
4291                       "blocks per group %lu)", sbi->s_groups_count,
4292                       ext4_blocks_count(es),
4293                       le32_to_cpu(es->s_first_data_block),
4294                       EXT4_BLOCKS_PER_GROUP(sb));
4295                goto failed_mount;
4296        }
4297        sbi->s_groups_count = blocks_count;
4298        sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
4299                        (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
4300        if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
4301            le32_to_cpu(es->s_inodes_count)) {
4302                ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
4303                         le32_to_cpu(es->s_inodes_count),
4304                         ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
4305                ret = -EINVAL;
4306                goto failed_mount;
4307        }
4308        db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
4309                   EXT4_DESC_PER_BLOCK(sb);
4310        if (ext4_has_feature_meta_bg(sb)) {
4311                if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
4312                        ext4_msg(sb, KERN_WARNING,
4313                                 "first meta block group too large: %u "
4314                                 "(group descriptor block count %u)",
4315                                 le32_to_cpu(es->s_first_meta_bg), db_count);
4316                        goto failed_mount;
4317                }
4318        }
4319        rcu_assign_pointer(sbi->s_group_desc,
4320                           kvmalloc_array(db_count,
4321                                          sizeof(struct buffer_head *),
4322                                          GFP_KERNEL));
4323        if (sbi->s_group_desc == NULL) {
4324                ext4_msg(sb, KERN_ERR, "not enough memory");
4325                ret = -ENOMEM;
4326                goto failed_mount;
4327        }
4328
4329        bgl_lock_init(sbi->s_blockgroup_lock);
4330
4331        /* Pre-read the descriptors into the buffer cache */
4332        for (i = 0; i < db_count; i++) {
4333                block = descriptor_loc(sb, logical_sb_block, i);
4334                sb_breadahead(sb, block);
4335        }
4336
4337        for (i = 0; i < db_count; i++) {
4338                struct buffer_head *bh;
4339
4340                block = descriptor_loc(sb, logical_sb_block, i);
4341                bh = sb_bread_unmovable(sb, block);
4342                if (!bh) {
4343                        ext4_msg(sb, KERN_ERR,
4344                               "can't read group descriptor %d", i);
4345                        db_count = i;
4346                        goto failed_mount2;
4347                }
4348                rcu_read_lock();
4349                rcu_dereference(sbi->s_group_desc)[i] = bh;
4350                rcu_read_unlock();
4351        }
4352        sbi->s_gdb_count = db_count;
4353        if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
4354                ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
4355                ret = -EFSCORRUPTED;
4356                goto failed_mount2;
4357        }
4358
4359        timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
4360
4361        /* Register extent status tree shrinker */
4362        if (ext4_es_register_shrinker(sbi))
4363                goto failed_mount3;
4364
4365        sbi->s_stripe = ext4_get_stripe_size(sbi);
4366        sbi->s_extent_max_zeroout_kb = 32;
4367
4368        /*
4369         * set up enough so that it can read an inode
4370         */
4371        sb->s_op = &ext4_sops;
4372        sb->s_export_op = &ext4_export_ops;
4373        sb->s_xattr = ext4_xattr_handlers;
4374#ifdef CONFIG_FS_ENCRYPTION
4375        sb->s_cop = &ext4_cryptops;
4376#endif
4377#ifdef CONFIG_FS_VERITY
4378        sb->s_vop = &ext4_verityops;
4379#endif
4380#ifdef CONFIG_QUOTA
4381        sb->dq_op = &ext4_quota_operations;
4382        if (ext4_has_feature_quota(sb))
4383                sb->s_qcop = &dquot_quotactl_sysfile_ops;
4384        else
4385                sb->s_qcop = &ext4_qctl_operations;
4386        sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
4387#endif
4388        memcpy(&sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
4389
4390        INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
4391        mutex_init(&sbi->s_orphan_lock);
4392
4393        sb->s_root = NULL;
4394
4395        needs_recovery = (es->s_last_orphan != 0 ||
4396                          ext4_has_feature_journal_needs_recovery(sb));
4397
4398        if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb))
4399                if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
4400                        goto failed_mount3a;
4401
4402        /*
4403         * The first inode we look at is the journal inode.  Don't try
4404         * root first: it may be modified in the journal!
4405         */
4406        if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
4407                err = ext4_load_journal(sb, es, journal_devnum);
4408                if (err)
4409                        goto failed_mount3a;
4410        } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) &&
4411                   ext4_has_feature_journal_needs_recovery(sb)) {
4412                ext4_msg(sb, KERN_ERR, "required journal recovery "
4413                       "suppressed and not mounted read-only");
4414                goto failed_mount_wq;
4415        } else {
4416                /* Nojournal mode, all journal mount options are illegal */
4417                if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
4418                        ext4_msg(sb, KERN_ERR, "can't mount with "
4419                                 "journal_checksum, fs mounted w/o journal");
4420                        goto failed_mount_wq;
4421                }
4422                if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4423                        ext4_msg(sb, KERN_ERR, "can't mount with "
4424                                 "journal_async_commit, fs mounted w/o journal");
4425                        goto failed_mount_wq;
4426                }
4427                if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
4428                        ext4_msg(sb, KERN_ERR, "can't mount with "
4429                                 "commit=%lu, fs mounted w/o journal",
4430                                 sbi->s_commit_interval / HZ);
4431                        goto failed_mount_wq;
4432                }
4433                if (EXT4_MOUNT_DATA_FLAGS &
4434                    (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
4435                        ext4_msg(sb, KERN_ERR, "can't mount with "
4436                                 "data=, fs mounted w/o journal");
4437                        goto failed_mount_wq;
4438                }
4439                sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
4440                clear_opt(sb, JOURNAL_CHECKSUM);
4441                clear_opt(sb, DATA_FLAGS);
4442                sbi->s_journal = NULL;
4443                needs_recovery = 0;
4444                goto no_journal;
4445        }
4446
4447        if (ext4_has_feature_64bit(sb) &&
4448            !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4449                                       JBD2_FEATURE_INCOMPAT_64BIT)) {
4450                ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
4451                goto failed_mount_wq;
4452        }
4453
4454        if (!set_journal_csum_feature_set(sb)) {
4455                ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
4456                         "feature set");
4457                goto failed_mount_wq;
4458        }
4459
4460        /* We have now updated the journal if required, so we can
4461         * validate the data journaling mode. */
4462        switch (test_opt(sb, DATA_FLAGS)) {
4463        case 0:
4464                /* No mode set, assume a default based on the journal
4465                 * capabilities: ORDERED_DATA if the journal can
4466                 * cope, else JOURNAL_DATA
4467                 */
4468                if (jbd2_journal_check_available_features
4469                    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
4470                        set_opt(sb, ORDERED_DATA);
4471                        sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
4472                } else {
4473                        set_opt(sb, JOURNAL_DATA);
4474                        sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
4475                }
4476                break;
4477
4478        case EXT4_MOUNT_ORDERED_DATA:
4479        case EXT4_MOUNT_WRITEBACK_DATA:
4480                if (!jbd2_journal_check_available_features
4481                    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
4482                        ext4_msg(sb, KERN_ERR, "Journal does not support "
4483                               "requested data journaling mode");
4484                        goto failed_mount_wq;
4485                }
4486        default:
4487                break;
4488        }
4489
4490        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
4491            test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4492                ext4_msg(sb, KERN_ERR, "can't mount with "
4493                        "journal_async_commit in data=ordered mode");
4494                goto failed_mount_wq;
4495        }
4496
4497        set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
4498
4499        sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
4500
4501no_journal:
4502        if (!test_opt(sb, NO_MBCACHE)) {
4503                sbi->s_ea_block_cache = ext4_xattr_create_cache();
4504                if (!sbi->s_ea_block_cache) {
4505                        ext4_msg(sb, KERN_ERR,
4506                                 "Failed to create ea_block_cache");
4507                        goto failed_mount_wq;
4508                }
4509
4510                if (ext4_has_feature_ea_inode(sb)) {
4511                        sbi->s_ea_inode_cache = ext4_xattr_create_cache();
4512                        if (!sbi->s_ea_inode_cache) {
4513                                ext4_msg(sb, KERN_ERR,
4514                                         "Failed to create ea_inode_cache");
4515                                goto failed_mount_wq;
4516                        }
4517                }
4518        }
4519
4520        if (ext4_has_feature_verity(sb) && blocksize != PAGE_SIZE) {
4521                ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity");
4522                goto failed_mount_wq;
4523        }
4524
4525        if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) &&
4526            !ext4_has_feature_encrypt(sb)) {
4527                ext4_set_feature_encrypt(sb);
4528                ext4_commit_super(sb, 1);
4529        }
4530
4531        /*
4532         * Get the # of file system overhead blocks from the
4533         * superblock if present.
4534         */
4535        if (es->s_overhead_clusters)
4536                sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
4537        else {
4538                err = ext4_calculate_overhead(sb);
4539                if (err)
4540                        goto failed_mount_wq;
4541        }
4542
4543        /*
4544         * The maximum number of concurrent works can be high and
4545         * concurrency isn't really necessary.  Limit it to 1.
4546         */
4547        EXT4_SB(sb)->rsv_conversion_wq =
4548                alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
4549        if (!EXT4_SB(sb)->rsv_conversion_wq) {
4550                printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
4551                ret = -ENOMEM;
4552                goto failed_mount4;
4553        }
4554
4555        /*
4556         * The jbd2_journal_load will have done any necessary log recovery,
4557         * so we can safely mount the rest of the filesystem now.
4558         */
4559
4560        root = ext4_iget(sb, EXT4_ROOT_INO, EXT4_IGET_SPECIAL);
4561        if (IS_ERR(root)) {
4562                ext4_msg(sb, KERN_ERR, "get root inode failed");
4563                ret = PTR_ERR(root);
4564                root = NULL;
4565                goto failed_mount4;
4566        }
4567        if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
4568                ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
4569                iput(root);
4570                goto failed_mount4;
4571        }
4572
4573#ifdef CONFIG_UNICODE
4574        if (sbi->s_encoding)
4575                sb->s_d_op = &ext4_dentry_ops;
4576#endif
4577
4578        sb->s_root = d_make_root(root);
4579        if (!sb->s_root) {
4580                ext4_msg(sb, KERN_ERR, "get root dentry failed");
4581                ret = -ENOMEM;
4582                goto failed_mount4;
4583        }
4584
4585        ret = ext4_setup_super(sb, es, sb_rdonly(sb));
4586        if (ret == -EROFS) {
4587                sb->s_flags |= SB_RDONLY;
4588                ret = 0;
4589        } else if (ret)
4590                goto failed_mount4a;
4591
4592        ext4_set_resv_clusters(sb);
4593
4594        err = ext4_setup_system_zone(sb);
4595        if (err) {
4596                ext4_msg(sb, KERN_ERR, "failed to initialize system "
4597                         "zone (%d)", err);
4598                goto failed_mount4a;
4599        }
4600
4601        ext4_ext_init(sb);
4602        err = ext4_mb_init(sb);
4603        if (err) {
4604                ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
4605                         err);
4606                goto failed_mount5;
4607        }
4608
4609        block = ext4_count_free_clusters(sb);
4610        ext4_free_blocks_count_set(sbi->s_es, 
4611                                   EXT4_C2B(sbi, block));
4612        ext4_superblock_csum_set(sb);
4613        err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
4614                                  GFP_KERNEL);
4615        if (!err) {
4616                unsigned long freei = ext4_count_free_inodes(sb);
4617                sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
4618                ext4_superblock_csum_set(sb);
4619                err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
4620                                          GFP_KERNEL);
4621        }
4622        if (!err)
4623                err = percpu_counter_init(&sbi->s_dirs_counter,
4624                                          ext4_count_dirs(sb), GFP_KERNEL);
4625        if (!err)
4626                err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
4627                                          GFP_KERNEL);
4628        if (!err)
4629                err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
4630
4631        if (err) {
4632                ext4_msg(sb, KERN_ERR, "insufficient memory");
4633                goto failed_mount6;
4634        }
4635
4636        if (ext4_has_feature_flex_bg(sb))
4637                if (!ext4_fill_flex_info(sb)) {
4638                        ext4_msg(sb, KERN_ERR,
4639                               "unable to initialize "
4640                               "flex_bg meta info!");
4641                        goto failed_mount6;
4642                }
4643
4644        err = ext4_register_li_request(sb, first_not_zeroed);
4645        if (err)
4646                goto failed_mount6;
4647
4648        err = ext4_register_sysfs(sb);
4649        if (err)
4650                goto failed_mount7;
4651
4652#ifdef CONFIG_QUOTA
4653        /* Enable quota usage during mount. */
4654        if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
4655                err = ext4_enable_quotas(sb);
4656                if (err)
4657                        goto failed_mount8;
4658        }
4659#endif  /* CONFIG_QUOTA */
4660
4661        EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
4662        ext4_orphan_cleanup(sb, es);
4663        EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
4664        if (needs_recovery) {
4665                ext4_msg(sb, KERN_INFO, "recovery complete");
4666                ext4_mark_recovery_complete(sb, es);
4667        }
4668        if (EXT4_SB(sb)->s_journal) {
4669                if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
4670                        descr = " journalled data mode";
4671                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
4672                        descr = " ordered data mode";
4673                else
4674                        descr = " writeback data mode";
4675        } else
4676                descr = "out journal";
4677
4678        if (test_opt(sb, DISCARD)) {
4679                struct request_queue *q = bdev_get_queue(sb->s_bdev);
4680                if (!blk_queue_discard(q))
4681                        ext4_msg(sb, KERN_WARNING,
4682                                 "mounting with \"discard\" option, but "
4683                                 "the device does not support discard");
4684        }
4685
4686        if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
4687                ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
4688                         "Opts: %.*s%s%s", descr,
4689                         (int) sizeof(sbi->s_es->s_mount_opts),
4690                         sbi->s_es->s_mount_opts,
4691                         *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
4692
4693        if (es->s_error_count)
4694                mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
4695
4696        /* Enable message ratelimiting. Default is 10 messages per 5 secs. */
4697        ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
4698        ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
4699        ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
4700
4701        kfree(orig_data);
4702        return 0;
4703
4704cantfind_ext4:
4705        if (!silent)
4706                ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
4707        goto failed_mount;
4708
4709#ifdef CONFIG_QUOTA
4710failed_mount8:
4711        ext4_unregister_sysfs(sb);
4712#endif
4713failed_mount7:
4714        ext4_unregister_li_request(sb);
4715failed_mount6:
4716        ext4_mb_release(sb);
4717        rcu_read_lock();
4718        flex_groups = rcu_dereference(sbi->s_flex_groups);
4719        if (flex_groups) {
4720                for (i = 0; i < sbi->s_flex_groups_allocated; i++)
4721                        kvfree(flex_groups[i]);
4722                kvfree(flex_groups);
4723        }
4724        rcu_read_unlock();
4725        percpu_counter_destroy(&sbi->s_freeclusters_counter);
4726        percpu_counter_destroy(&sbi->s_freeinodes_counter);
4727        percpu_counter_destroy(&sbi->s_dirs_counter);
4728        percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
4729        percpu_free_rwsem(&sbi->s_writepages_rwsem);
4730failed_mount5:
4731        ext4_ext_release(sb);
4732        ext4_release_system_zone(sb);
4733failed_mount4a:
4734        dput(sb->s_root);
4735        sb->s_root = NULL;
4736failed_mount4:
4737        ext4_msg(sb, KERN_ERR, "mount failed");
4738        if (EXT4_SB(sb)->rsv_conversion_wq)
4739                destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
4740failed_mount_wq:
4741        ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
4742        sbi->s_ea_inode_cache = NULL;
4743
4744        ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
4745        sbi->s_ea_block_cache = NULL;
4746
4747        if (sbi->s_journal) {
4748                jbd2_journal_destroy(sbi->s_journal);
4749                sbi->s_journal = NULL;
4750        }
4751failed_mount3a:
4752        ext4_es_unregister_shrinker(sbi);
4753failed_mount3:
4754        del_timer_sync(&sbi->s_err_report);
4755        if (sbi->s_mmp_tsk)
4756                kthread_stop(sbi->s_mmp_tsk);
4757failed_mount2:
4758        rcu_read_lock();
4759        group_desc = rcu_dereference(sbi->s_group_desc);
4760        for (i = 0; i < db_count; i++)
4761                brelse(group_desc[i]);
4762        kvfree(group_desc);
4763        rcu_read_unlock();
4764failed_mount:
4765        if (sbi->s_chksum_driver)
4766                crypto_free_shash(sbi->s_chksum_driver);
4767
4768#ifdef CONFIG_UNICODE
4769        utf8_unload(sbi->s_encoding);
4770#endif
4771
4772#ifdef CONFIG_QUOTA
4773        for (i = 0; i < EXT4_MAXQUOTAS; i++)
4774                kfree(get_qf_name(sb, sbi, i));
4775#endif
4776        ext4_blkdev_remove(sbi);
4777        brelse(bh);
4778out_fail:
4779        sb->s_fs_info = NULL;
4780        kfree(sbi->s_blockgroup_lock);
4781out_free_base:
4782        kfree(sbi);
4783        kfree(orig_data);
4784        fs_put_dax(dax_dev);
4785        return err ? err : ret;
4786}
4787
4788/*
4789 * Setup any per-fs journal parameters now.  We'll do this both on
4790 * initial mount, once the journal has been initialised but before we've
4791 * done any recovery; and again on any subsequent remount.
4792 */
4793static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
4794{
4795        struct ext4_sb_info *sbi = EXT4_SB(sb);
4796
4797        journal->j_commit_interval = sbi->s_commit_interval;
4798        journal->j_min_batch_time = sbi->s_min_batch_time;
4799        journal->j_max_batch_time = sbi->s_max_batch_time;
4800
4801        write_lock(&journal->j_state_lock);
4802        if (test_opt(sb, BARRIER))
4803                journal->j_flags |= JBD2_BARRIER;
4804        else
4805                journal->j_flags &= ~JBD2_BARRIER;
4806        if (test_opt(sb, DATA_ERR_ABORT))
4807                journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
4808        else
4809                journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
4810        write_unlock(&journal->j_state_lock);
4811}
4812
4813static struct inode *ext4_get_journal_inode(struct super_block *sb,
4814                                             unsigned int journal_inum)
4815{
4816        struct inode *journal_inode;
4817
4818        /*
4819         * Test for the existence of a valid inode on disk.  Bad things
4820         * happen if we iget() an unused inode, as the subsequent iput()
4821         * will try to delete it.
4822         */
4823        journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
4824        if (IS_ERR(journal_inode)) {
4825                ext4_msg(sb, KERN_ERR, "no journal found");
4826                return NULL;
4827        }
4828        if (!journal_inode->i_nlink) {
4829                make_bad_inode(journal_inode);
4830                iput(journal_inode);
4831                ext4_msg(sb, KERN_ERR, "journal inode is deleted");
4832                return NULL;
4833        }
4834
4835        jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
4836                  journal_inode, journal_inode->i_size);
4837        if (!S_ISREG(journal_inode->i_mode)) {
4838                ext4_msg(sb, KERN_ERR, "invalid journal inode");
4839                iput(journal_inode);
4840                return NULL;
4841        }
4842        return journal_inode;
4843}
4844
4845static journal_t *ext4_get_journal(struct super_block *sb,
4846                                   unsigned int journal_inum)
4847{
4848        struct inode *journal_inode;
4849        journal_t *journal;
4850
4851        BUG_ON(!ext4_has_feature_journal(sb));
4852
4853        journal_inode = ext4_get_journal_inode(sb, journal_inum);
4854        if (!journal_inode)
4855                return NULL;
4856
4857        journal = jbd2_journal_init_inode(journal_inode);
4858        if (!journal) {
4859                ext4_msg(sb, KERN_ERR, "Could not load journal inode");
4860                iput(journal_inode);
4861                return NULL;
4862        }
4863        journal->j_private = sb;
4864        ext4_init_journal_params(sb, journal);
4865        return journal;
4866}
4867
4868static journal_t *ext4_get_dev_journal(struct super_block *sb,
4869                                       dev_t j_dev)
4870{
4871        struct buffer_head *bh;
4872        journal_t *journal;
4873        ext4_fsblk_t start;
4874        ext4_fsblk_t len;
4875        int hblock, blocksize;
4876        ext4_fsblk_t sb_block;
4877        unsigned long offset;
4878        struct ext4_super_block *es;
4879        struct block_device *bdev;
4880
4881        BUG_ON(!ext4_has_feature_journal(sb));
4882
4883        bdev = ext4_blkdev_get(j_dev, sb);
4884        if (bdev == NULL)
4885                return NULL;
4886
4887        blocksize = sb->s_blocksize;
4888        hblock = bdev_logical_block_size(bdev);
4889        if (blocksize < hblock) {
4890                ext4_msg(sb, KERN_ERR,
4891                        "blocksize too small for journal device");
4892                goto out_bdev;
4893        }
4894
4895        sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
4896        offset = EXT4_MIN_BLOCK_SIZE % blocksize;
4897        set_blocksize(bdev, blocksize);
4898        if (!(bh = __bread(bdev, sb_block, blocksize))) {
4899                ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
4900                       "external journal");
4901                goto out_bdev;
4902        }
4903
4904        es = (struct ext4_super_block *) (bh->b_data + offset);
4905        if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
4906            !(le32_to_cpu(es->s_feature_incompat) &
4907              EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
4908                ext4_msg(sb, KERN_ERR, "external journal has "
4909                                        "bad superblock");
4910                brelse(bh);
4911                goto out_bdev;
4912        }
4913
4914        if ((le32_to_cpu(es->s_feature_ro_compat) &
4915             EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
4916            es->s_checksum != ext4_superblock_csum(sb, es)) {
4917                ext4_msg(sb, KERN_ERR, "external journal has "
4918                                       "corrupt superblock");
4919                brelse(bh);
4920                goto out_bdev;
4921        }
4922
4923        if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
4924                ext4_msg(sb, KERN_ERR, "journal UUID does not match");
4925                brelse(bh);
4926                goto out_bdev;
4927        }
4928
4929        len = ext4_blocks_count(es);
4930        start = sb_block + 1;
4931        brelse(bh);     /* we're done with the superblock */
4932
4933        journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
4934                                        start, len, blocksize);
4935        if (!journal) {
4936                ext4_msg(sb, KERN_ERR, "failed to create device journal");
4937                goto out_bdev;
4938        }
4939        journal->j_private = sb;
4940        ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer);
4941        wait_on_buffer(journal->j_sb_buffer);
4942        if (!buffer_uptodate(journal->j_sb_buffer)) {
4943                ext4_msg(sb, KERN_ERR, "I/O error on journal device");
4944                goto out_journal;
4945        }
4946        if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
4947                ext4_msg(sb, KERN_ERR, "External journal has more than one "
4948                                        "user (unsupported) - %d",
4949                        be32_to_cpu(journal->j_superblock->s_nr_users));
4950                goto out_journal;
4951        }
4952        EXT4_SB(sb)->journal_bdev = bdev;
4953        ext4_init_journal_params(sb, journal);
4954        return journal;
4955
4956out_journal:
4957        jbd2_journal_destroy(journal);
4958out_bdev:
4959        ext4_blkdev_put(bdev);
4960        return NULL;
4961}
4962
4963static int ext4_load_journal(struct super_block *sb,
4964                             struct ext4_super_block *es,
4965                             unsigned long journal_devnum)
4966{
4967        journal_t *journal;
4968        unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
4969        dev_t journal_dev;
4970        int err = 0;
4971        int really_read_only;
4972
4973        BUG_ON(!ext4_has_feature_journal(sb));
4974
4975        if (journal_devnum &&
4976            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
4977                ext4_msg(sb, KERN_INFO, "external journal device major/minor "
4978                        "numbers have changed");
4979                journal_dev = new_decode_dev(journal_devnum);
4980        } else
4981                journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
4982
4983        really_read_only = bdev_read_only(sb->s_bdev);
4984
4985        /*
4986         * Are we loading a blank journal or performing recovery after a
4987         * crash?  For recovery, we need to check in advance whether we
4988         * can get read-write access to the device.
4989         */
4990        if (ext4_has_feature_journal_needs_recovery(sb)) {
4991                if (sb_rdonly(sb)) {
4992                        ext4_msg(sb, KERN_INFO, "INFO: recovery "
4993                                        "required on readonly filesystem");
4994                        if (really_read_only) {
4995                                ext4_msg(sb, KERN_ERR, "write access "
4996                                        "unavailable, cannot proceed "
4997                                        "(try mounting with noload)");
4998                                return -EROFS;
4999                        }
5000                        ext4_msg(sb, KERN_INFO, "write access will "
5001                               "be enabled during recovery");
5002                }
5003        }
5004
5005        if (journal_inum && journal_dev) {
5006                ext4_msg(sb, KERN_ERR, "filesystem has both journal "
5007                       "and inode journals!");
5008                return -EINVAL;
5009        }
5010
5011        if (journal_inum) {
5012                if (!(journal = ext4_get_journal(sb, journal_inum)))
5013                        return -EINVAL;
5014        } else {
5015                if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
5016                        return -EINVAL;
5017        }
5018
5019        if (!(journal->j_flags & JBD2_BARRIER))
5020                ext4_msg(sb, KERN_INFO, "barriers disabled");
5021
5022        if (!ext4_has_feature_journal_needs_recovery(sb))
5023                err = jbd2_journal_wipe(journal, !really_read_only);
5024        if (!err) {
5025                char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
5026                if (save)
5027                        memcpy(save, ((char *) es) +
5028                               EXT4_S_ERR_START, EXT4_S_ERR_LEN);
5029                err = jbd2_journal_load(journal);
5030                if (save)
5031                        memcpy(((char *) es) + EXT4_S_ERR_START,
5032                               save, EXT4_S_ERR_LEN);
5033                kfree(save);
5034        }
5035
5036        if (err) {
5037                ext4_msg(sb, KERN_ERR, "error loading journal");
5038                jbd2_journal_destroy(journal);
5039                return err;
5040        }
5041
5042        EXT4_SB(sb)->s_journal = journal;
5043        ext4_clear_journal_err(sb, es);
5044
5045        if (!really_read_only && journal_devnum &&
5046            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
5047                es->s_journal_dev = cpu_to_le32(journal_devnum);
5048
5049                /* Make sure we flush the recovery flag to disk. */
5050                ext4_commit_super(sb, 1);
5051        }
5052
5053        return 0;
5054}
5055
5056static int ext4_commit_super(struct super_block *sb, int sync)
5057{
5058        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
5059        struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
5060        int error = 0;
5061
5062        if (!sbh || block_device_ejected(sb))
5063                return error;
5064
5065        /*
5066         * The superblock bh should be mapped, but it might not be if the
5067         * device was hot-removed. Not much we can do but fail the I/O.
5068         */
5069        if (!buffer_mapped(sbh))
5070                return error;
5071
5072        /*
5073         * If the file system is mounted read-only, don't update the
5074         * superblock write time.  This avoids updating the superblock
5075         * write time when we are mounting the root file system
5076         * read/only but we need to replay the journal; at that point,
5077         * for people who are east of GMT and who make their clock
5078         * tick in localtime for Windows bug-for-bug compatibility,
5079         * the clock is set in the future, and this will cause e2fsck
5080         * to complain and force a full file system check.
5081         */
5082        if (!(sb->s_flags & SB_RDONLY))
5083                ext4_update_tstamp(es, s_wtime);
5084        if (sb->s_bdev->bd_part)
5085                es->s_kbytes_written =
5086                        cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
5087                            ((part_stat_read(sb->s_bdev->bd_part,
5088                                             sectors[STAT_WRITE]) -
5089                              EXT4_SB(sb)->s_sectors_written_start) >> 1));
5090        else
5091                es->s_kbytes_written =
5092                        cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
5093        if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter))
5094                ext4_free_blocks_count_set(es,
5095                        EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
5096                                &EXT4_SB(sb)->s_freeclusters_counter)));
5097        if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
5098                es->s_free_inodes_count =
5099                        cpu_to_le32(percpu_counter_sum_positive(
5100                                &EXT4_SB(sb)->s_freeinodes_counter));
5101        BUFFER_TRACE(sbh, "marking dirty");
5102        ext4_superblock_csum_set(sb);
5103        if (sync)
5104                lock_buffer(sbh);
5105        if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
5106                /*
5107                 * Oh, dear.  A previous attempt to write the
5108                 * superblock failed.  This could happen because the
5109                 * USB device was yanked out.  Or it could happen to
5110                 * be a transient write error and maybe the block will
5111                 * be remapped.  Nothing we can do but to retry the
5112                 * write and hope for the best.
5113                 */
5114                ext4_msg(sb, KERN_ERR, "previous I/O error to "
5115                       "superblock detected");
5116                clear_buffer_write_io_error(sbh);
5117                set_buffer_uptodate(sbh);
5118        }
5119        mark_buffer_dirty(sbh);
5120        if (sync) {
5121                unlock_buffer(sbh);
5122                error = __sync_dirty_buffer(sbh,
5123                        REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
5124                if (buffer_write_io_error(sbh)) {
5125                        ext4_msg(sb, KERN_ERR, "I/O error while writing "
5126                               "superblock");
5127                        clear_buffer_write_io_error(sbh);
5128                        set_buffer_uptodate(sbh);
5129                }
5130        }
5131        return error;
5132}
5133
5134/*
5135 * Have we just finished recovery?  If so, and if we are mounting (or
5136 * remounting) the filesystem readonly, then we will end up with a
5137 * consistent fs on disk.  Record that fact.
5138 */
5139static void ext4_mark_recovery_complete(struct super_block *sb,
5140                                        struct ext4_super_block *es)
5141{
5142        journal_t *journal = EXT4_SB(sb)->s_journal;
5143
5144        if (!ext4_has_feature_journal(sb)) {
5145                BUG_ON(journal != NULL);
5146                return;
5147        }
5148        jbd2_journal_lock_updates(journal);
5149        if (jbd2_journal_flush(journal) < 0)
5150                goto out;
5151
5152        if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) {
5153                ext4_clear_feature_journal_needs_recovery(sb);
5154                ext4_commit_super(sb, 1);
5155        }
5156
5157out:
5158        jbd2_journal_unlock_updates(journal);
5159}
5160
5161/*
5162 * If we are mounting (or read-write remounting) a filesystem whose journal
5163 * has recorded an error from a previous lifetime, move that error to the
5164 * main filesystem now.
5165 */
5166static void ext4_clear_journal_err(struct super_block *sb,
5167                                   struct ext4_super_block *es)
5168{
5169        journal_t *journal;
5170        int j_errno;
5171        const char *errstr;
5172
5173        BUG_ON(!ext4_has_feature_journal(sb));
5174
5175        journal = EXT4_SB(sb)->s_journal;
5176
5177        /*
5178         * Now check for any error status which may have been recorded in the
5179         * journal by a prior ext4_error() or ext4_abort()
5180         */
5181
5182        j_errno = jbd2_journal_errno(journal);
5183        if (j_errno) {
5184                char nbuf[16];
5185
5186                errstr = ext4_decode_error(sb, j_errno, nbuf);
5187                ext4_warning(sb, "Filesystem error recorded "
5188                             "from previous mount: %s", errstr);
5189                ext4_warning(sb, "Marking fs in need of filesystem check.");
5190
5191                EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
5192                es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
5193                ext4_commit_super(sb, 1);
5194
5195                jbd2_journal_clear_err(journal);
5196                jbd2_journal_update_sb_errno(journal);
5197        }
5198}
5199
5200/*
5201 * Force the running and committing transactions to commit,
5202 * and wait on the commit.
5203 */
5204int ext4_force_commit(struct super_block *sb)
5205{
5206        journal_t *journal;
5207
5208        if (sb_rdonly(sb))
5209                return 0;
5210
5211        journal = EXT4_SB(sb)->s_journal;
5212        return ext4_journal_force_commit(journal);
5213}
5214
5215static int ext4_sync_fs(struct super_block *sb, int wait)
5216{
5217        int ret = 0;
5218        tid_t target;
5219        bool needs_barrier = false;
5220        struct ext4_sb_info *sbi = EXT4_SB(sb);
5221
5222        if (unlikely(ext4_forced_shutdown(sbi)))
5223                return 0;
5224
5225        trace_ext4_sync_fs(sb, wait);
5226        flush_workqueue(sbi->rsv_conversion_wq);
5227        /*
5228         * Writeback quota in non-journalled quota case - journalled quota has
5229         * no dirty dquots
5230         */
5231        dquot_writeback_dquots(sb, -1);
5232        /*
5233         * Data writeback is possible w/o journal transaction, so barrier must
5234         * being sent at the end of the function. But we can skip it if
5235         * transaction_commit will do it for us.
5236         */
5237        if (sbi->s_journal) {
5238                target = jbd2_get_latest_transaction(sbi->s_journal);
5239                if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
5240                    !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
5241                        needs_barrier = true;
5242
5243                if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
5244                        if (wait)
5245                                ret = jbd2_log_wait_commit(sbi->s_journal,
5246                                                           target);
5247                }
5248        } else if (wait && test_opt(sb, BARRIER))
5249                needs_barrier = true;
5250        if (needs_barrier) {
5251                int err;
5252                err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
5253                if (!ret)
5254                        ret = err;
5255        }
5256
5257        return ret;
5258}
5259
5260/*
5261 * LVM calls this function before a (read-only) snapshot is created.  This
5262 * gives us a chance to flush the journal completely and mark the fs clean.
5263 *
5264 * Note that only this function cannot bring a filesystem to be in a clean
5265 * state independently. It relies on upper layer to stop all data & metadata
5266 * modifications.
5267 */
5268static int ext4_freeze(struct super_block *sb)
5269{
5270        int error = 0;
5271        journal_t *journal;
5272
5273        if (sb_rdonly(sb))
5274                return 0;
5275
5276        journal = EXT4_SB(sb)->s_journal;
5277
5278        if (journal) {
5279                /* Now we set up the journal barrier. */
5280                jbd2_journal_lock_updates(journal);
5281
5282                /*
5283                 * Don't clear the needs_recovery flag if we failed to
5284                 * flush the journal.
5285                 */
5286                error = jbd2_journal_flush(journal);
5287                if (error < 0)
5288                        goto out;
5289
5290                /* Journal blocked and flushed, clear needs_recovery flag. */
5291                ext4_clear_feature_journal_needs_recovery(sb);
5292        }
5293
5294        error = ext4_commit_super(sb, 1);
5295out:
5296        if (journal)
5297                /* we rely on upper layer to stop further updates */
5298                jbd2_journal_unlock_updates(journal);
5299        return error;
5300}
5301
5302/*
5303 * Called by LVM after the snapshot is done.  We need to reset the RECOVER
5304 * flag here, even though the filesystem is not technically dirty yet.
5305 */
5306static int ext4_unfreeze(struct super_block *sb)
5307{
5308        if (sb_rdonly(sb) || ext4_forced_shutdown(EXT4_SB(sb)))
5309                return 0;
5310
5311        if (EXT4_SB(sb)->s_journal) {
5312                /* Reset the needs_recovery flag before the fs is unlocked. */
5313                ext4_set_feature_journal_needs_recovery(sb);
5314        }
5315
5316        ext4_commit_super(sb, 1);
5317        return 0;
5318}
5319
5320/*
5321 * Structure to save mount options for ext4_remount's benefit
5322 */
5323struct ext4_mount_options {
5324        unsigned long s_mount_opt;
5325        unsigned long s_mount_opt2;
5326        kuid_t s_resuid;
5327        kgid_t s_resgid;
5328        unsigned long s_commit_interval;
5329        u32 s_min_batch_time, s_max_batch_time;
5330#ifdef CONFIG_QUOTA
5331        int s_jquota_fmt;
5332        char *s_qf_names[EXT4_MAXQUOTAS];
5333#endif
5334};
5335
5336static int ext4_remount(struct super_block *sb, int *flags, char *data)
5337{
5338        struct ext4_super_block *es;
5339        struct ext4_sb_info *sbi = EXT4_SB(sb);
5340        unsigned long old_sb_flags;
5341        struct ext4_mount_options old_opts;
5342        int enable_quota = 0;
5343        ext4_group_t g;
5344        unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
5345        int err = 0;
5346#ifdef CONFIG_QUOTA
5347        int i, j;
5348        char *to_free[EXT4_MAXQUOTAS];
5349#endif
5350        char *orig_data = kstrdup(data, GFP_KERNEL);
5351
5352        if (data && !orig_data)
5353                return -ENOMEM;
5354
5355        /* Store the original options */
5356        old_sb_flags = sb->s_flags;
5357        old_opts.s_mount_opt = sbi->s_mount_opt;
5358        old_opts.s_mount_opt2 = sbi->s_mount_opt2;
5359        old_opts.s_resuid = sbi->s_resuid;
5360        old_opts.s_resgid = sbi->s_resgid;
5361        old_opts.s_commit_interval = sbi->s_commit_interval;
5362        old_opts.s_min_batch_time = sbi->s_min_batch_time;
5363        old_opts.s_max_batch_time = sbi->s_max_batch_time;
5364#ifdef CONFIG_QUOTA
5365        old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
5366        for (i = 0; i < EXT4_MAXQUOTAS; i++)
5367                if (sbi->s_qf_names[i]) {
5368                        char *qf_name = get_qf_name(sb, sbi, i);
5369
5370                        old_opts.s_qf_names[i] = kstrdup(qf_name, GFP_KERNEL);
5371                        if (!old_opts.s_qf_names[i]) {
5372                                for (j = 0; j < i; j++)
5373                                        kfree(old_opts.s_qf_names[j]);
5374                                kfree(orig_data);
5375                                return -ENOMEM;
5376                        }
5377                } else
5378                        old_opts.s_qf_names[i] = NULL;
5379#endif
5380        if (sbi->s_journal && sbi->s_journal->j_task->io_context)
5381                journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
5382
5383        if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
5384                err = -EINVAL;
5385                goto restore_opts;
5386        }
5387
5388        if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
5389            test_opt(sb, JOURNAL_CHECKSUM)) {
5390                ext4_msg(sb, KERN_ERR, "changing journal_checksum "
5391                         "during remount not supported; ignoring");
5392                sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
5393        }
5394
5395        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
5396                if (test_opt2(sb, EXPLICIT_DELALLOC)) {
5397                        ext4_msg(sb, KERN_ERR, "can't mount with "
5398                                 "both data=journal and delalloc");
5399                        err = -EINVAL;
5400                        goto restore_opts;
5401                }
5402                if (test_opt(sb, DIOREAD_NOLOCK)) {
5403                        ext4_msg(sb, KERN_ERR, "can't mount with "
5404                                 "both data=journal and dioread_nolock");
5405                        err = -EINVAL;
5406                        goto restore_opts;
5407                }
5408                if (test_opt(sb, DAX)) {
5409                        ext4_msg(sb, KERN_ERR, "can't mount with "
5410                                 "both data=journal and dax");
5411                        err = -EINVAL;
5412                        goto restore_opts;
5413                }
5414        } else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
5415                if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
5416                        ext4_msg(sb, KERN_ERR, "can't mount with "
5417                                "journal_async_commit in data=ordered mode");
5418                        err = -EINVAL;
5419                        goto restore_opts;
5420                }
5421        }
5422
5423        if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
5424                ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
5425                err = -EINVAL;
5426                goto restore_opts;
5427        }
5428
5429        if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
5430                ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
5431                        "dax flag with busy inodes while remounting");
5432                sbi->s_mount_opt ^= EXT4_MOUNT_DAX;
5433        }
5434
5435        if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
5436                ext4_abort(sb, "Abort forced by user");
5437
5438        sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
5439                (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
5440
5441        es = sbi->s_es;
5442
5443        if (sbi->s_journal) {
5444                ext4_init_journal_params(sb, sbi->s_journal);
5445                set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
5446        }
5447
5448        if (*flags & SB_LAZYTIME)
5449                sb->s_flags |= SB_LAZYTIME;
5450
5451        if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
5452                if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
5453                        err = -EROFS;
5454                        goto restore_opts;
5455                }
5456
5457                if (*flags & SB_RDONLY) {
5458                        err = sync_filesystem(sb);
5459                        if (err < 0)
5460                                goto restore_opts;
5461                        err = dquot_suspend(sb, -1);
5462                        if (err < 0)
5463                                goto restore_opts;
5464
5465                        /*
5466                         * First of all, the unconditional stuff we have to do
5467                         * to disable replay of the journal when we next remount
5468                         */
5469                        sb->s_flags |= SB_RDONLY;
5470
5471                        /*
5472                         * OK, test if we are remounting a valid rw partition
5473                         * readonly, and if so set the rdonly flag and then
5474                         * mark the partition as valid again.
5475                         */
5476                        if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
5477                            (sbi->s_mount_state & EXT4_VALID_FS))
5478                                es->s_state = cpu_to_le16(sbi->s_mount_state);
5479
5480                        if (sbi->s_journal)
5481                                ext4_mark_recovery_complete(sb, es);
5482                        if (sbi->s_mmp_tsk)
5483                                kthread_stop(sbi->s_mmp_tsk);
5484                } else {
5485                        /* Make sure we can mount this feature set readwrite */
5486                        if (ext4_has_feature_readonly(sb) ||
5487                            !ext4_feature_set_ok(sb, 0)) {
5488                                err = -EROFS;
5489                                goto restore_opts;
5490                        }
5491                        /*
5492                         * Make sure the group descriptor checksums
5493                         * are sane.  If they aren't, refuse to remount r/w.
5494                         */
5495                        for (g = 0; g < sbi->s_groups_count; g++) {
5496                                struct ext4_group_desc *gdp =
5497                                        ext4_get_group_desc(sb, g, NULL);
5498
5499                                if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
5500                                        ext4_msg(sb, KERN_ERR,
5501               "ext4_remount: Checksum for group %u failed (%u!=%u)",
5502                g, le16_to_cpu(ext4_group_desc_csum(sb, g, gdp)),
5503                                               le16_to_cpu(gdp->bg_checksum));
5504                                        err = -EFSBADCRC;
5505                                        goto restore_opts;
5506                                }
5507                        }
5508
5509                        /*
5510                         * If we have an unprocessed orphan list hanging
5511                         * around from a previously readonly bdev mount,
5512                         * require a full umount/remount for now.
5513                         */
5514                        if (es->s_last_orphan) {
5515                                ext4_msg(sb, KERN_WARNING, "Couldn't "
5516                                       "remount RDWR because of unprocessed "
5517                                       "orphan inode list.  Please "
5518                                       "umount/remount instead");
5519                                err = -EINVAL;
5520                                goto restore_opts;
5521                        }
5522
5523                        /*
5524                         * Mounting a RDONLY partition read-write, so reread
5525                         * and store the current valid flag.  (It may have
5526                         * been changed by e2fsck since we originally mounted
5527                         * the partition.)
5528                         */
5529                        if (sbi->s_journal)
5530                                ext4_clear_journal_err(sb, es);
5531                        sbi->s_mount_state = le16_to_cpu(es->s_state);
5532
5533                        err = ext4_setup_super(sb, es, 0);
5534                        if (err)
5535                                goto restore_opts;
5536
5537                        sb->s_flags &= ~SB_RDONLY;
5538                        if (ext4_has_feature_mmp(sb))
5539                                if (ext4_multi_mount_protect(sb,
5540                                                le64_to_cpu(es->s_mmp_block))) {
5541                                        err = -EROFS;
5542                                        goto restore_opts;
5543                                }
5544                        enable_quota = 1;
5545                }
5546        }
5547
5548        /*
5549         * Reinitialize lazy itable initialization thread based on
5550         * current settings
5551         */
5552        if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE))
5553                ext4_unregister_li_request(sb);
5554        else {
5555                ext4_group_t first_not_zeroed;
5556                first_not_zeroed = ext4_has_uninit_itable(sb);
5557                ext4_register_li_request(sb, first_not_zeroed);
5558        }
5559
5560        ext4_setup_system_zone(sb);
5561        if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
5562                err = ext4_commit_super(sb, 1);
5563                if (err)
5564                        goto restore_opts;
5565        }
5566
5567#ifdef CONFIG_QUOTA
5568        /* Release old quota file names */
5569        for (i = 0; i < EXT4_MAXQUOTAS; i++)
5570                kfree(old_opts.s_qf_names[i]);
5571        if (enable_quota) {
5572                if (sb_any_quota_suspended(sb))
5573                        dquot_resume(sb, -1);
5574                else if (ext4_has_feature_quota(sb)) {
5575                        err = ext4_enable_quotas(sb);
5576                        if (err)
5577                                goto restore_opts;
5578                }
5579        }
5580#endif
5581
5582        *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
5583        ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
5584        kfree(orig_data);
5585        return 0;
5586
5587restore_opts:
5588        sb->s_flags = old_sb_flags;
5589        sbi->s_mount_opt = old_opts.s_mount_opt;
5590        sbi->s_mount_opt2 = old_opts.s_mount_opt2;
5591        sbi->s_resuid = old_opts.s_resuid;
5592        sbi->s_resgid = old_opts.s_resgid;
5593        sbi->s_commit_interval = old_opts.s_commit_interval;
5594        sbi->s_min_batch_time = old_opts.s_min_batch_time;
5595        sbi->s_max_batch_time = old_opts.s_max_batch_time;
5596#ifdef CONFIG_QUOTA
5597        sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
5598        for (i = 0; i < EXT4_MAXQUOTAS; i++) {
5599                to_free[i] = get_qf_name(sb, sbi, i);
5600                rcu_assign_pointer(sbi->s_qf_names[i], old_opts.s_qf_names[i]);
5601        }
5602        synchronize_rcu();
5603        for (i = 0; i < EXT4_MAXQUOTAS; i++)
5604                kfree(to_free[i]);
5605#endif
5606        kfree(orig_data);
5607        return err;
5608}
5609
5610#ifdef CONFIG_QUOTA
5611static int ext4_statfs_project(struct super_block *sb,
5612                               kprojid_t projid, struct kstatfs *buf)
5613{
5614        struct kqid qid;
5615        struct dquot *dquot;
5616        u64 limit;
5617        u64 curblock;
5618
5619        qid = make_kqid_projid(projid);
5620        dquot = dqget(sb, qid);
5621        if (IS_ERR(dquot))
5622                return PTR_ERR(dquot);
5623        spin_lock(&dquot->dq_dqb_lock);
5624
5625        limit = dquot->dq_dqb.dqb_bsoftlimit;
5626        if (dquot->dq_dqb.dqb_bhardlimit &&
5627            (!limit || dquot->dq_dqb.dqb_bhardlimit < limit))
5628                limit = dquot->dq_dqb.dqb_bhardlimit;
5629        limit >>= sb->s_blocksize_bits;
5630
5631        if (limit && buf->f_blocks > limit) {
5632                curblock = (dquot->dq_dqb.dqb_curspace +
5633                            dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
5634                buf->f_blocks = limit;
5635                buf->f_bfree = buf->f_bavail =
5636                        (buf->f_blocks > curblock) ?
5637                         (buf->f_blocks - curblock) : 0;
5638        }
5639
5640        limit = dquot->dq_dqb.dqb_isoftlimit;
5641        if (dquot->dq_dqb.dqb_ihardlimit &&
5642            (!limit || dquot->dq_dqb.dqb_ihardlimit < limit))
5643                limit = dquot->dq_dqb.dqb_ihardlimit;
5644
5645        if (limit && buf->f_files > limit) {
5646                buf->f_files = limit;
5647                buf->f_ffree =
5648                        (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
5649                         (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
5650        }
5651
5652        spin_unlock(&dquot->dq_dqb_lock);
5653        dqput(dquot);
5654        return 0;
5655}
5656#endif
5657
5658static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
5659{
5660        struct super_block *sb = dentry->d_sb;
5661        struct ext4_sb_info *sbi = EXT4_SB(sb);
5662        struct ext4_super_block *es = sbi->s_es;
5663        ext4_fsblk_t overhead = 0, resv_blocks;
5664        u64 fsid;
5665        s64 bfree;
5666        resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
5667
5668        if (!test_opt(sb, MINIX_DF))
5669                overhead = sbi->s_overhead;
5670
5671        buf->f_type = EXT4_SUPER_MAGIC;
5672        buf->f_bsize = sb->s_blocksize;
5673        buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
5674        bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
5675                percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
5676        /* prevent underflow in case that few free space is available */
5677        buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
5678        buf->f_bavail = buf->f_bfree -
5679                        (ext4_r_blocks_count(es) + resv_blocks);
5680        if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
5681                buf->f_bavail = 0;
5682        buf->f_files = le32_to_cpu(es->s_inodes_count);
5683        buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
5684        buf->f_namelen = EXT4_NAME_LEN;
5685        fsid = le64_to_cpup((void *)es->s_uuid) ^
5686               le64_to_cpup((void *)es->s_uuid + sizeof(u64));
5687        buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
5688        buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
5689
5690#ifdef CONFIG_QUOTA
5691        if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
5692            sb_has_quota_limits_enabled(sb, PRJQUOTA))
5693                ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
5694#endif
5695        return 0;
5696}
5697
5698
5699#ifdef CONFIG_QUOTA
5700
5701/*
5702 * Helper functions so that transaction is started before we acquire dqio_sem
5703 * to keep correct lock ordering of transaction > dqio_sem
5704 */
5705static inline struct inode *dquot_to_inode(struct dquot *dquot)
5706{
5707        return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
5708}
5709
5710static int ext4_write_dquot(struct dquot *dquot)
5711{
5712        int ret, err;
5713        handle_t *handle;
5714        struct inode *inode;
5715
5716        inode = dquot_to_inode(dquot);
5717        handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
5718                                    EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
5719        if (IS_ERR(handle))
5720                return PTR_ERR(handle);
5721        ret = dquot_commit(dquot);
5722        err = ext4_journal_stop(handle);
5723        if (!ret)
5724                ret = err;
5725        return ret;
5726}
5727
5728static int ext4_acquire_dquot(struct dquot *dquot)
5729{
5730        int ret, err;
5731        handle_t *handle;
5732
5733        handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
5734                                    EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
5735        if (IS_ERR(handle))
5736                return PTR_ERR(handle);
5737        ret = dquot_acquire(dquot);
5738        err = ext4_journal_stop(handle);
5739        if (!ret)
5740                ret = err;
5741        return ret;
5742}
5743
5744static int ext4_release_dquot(struct dquot *dquot)
5745{
5746        int ret, err;
5747        handle_t *handle;
5748
5749        handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
5750                                    EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
5751        if (IS_ERR(handle)) {
5752                /* Release dquot anyway to avoid endless cycle in dqput() */
5753                dquot_release(dquot);
5754                return PTR_ERR(handle);
5755        }
5756        ret = dquot_release(dquot);
5757        err = ext4_journal_stop(handle);
5758        if (!ret)
5759                ret = err;
5760        return ret;
5761}
5762
5763static int ext4_mark_dquot_dirty(struct dquot *dquot)
5764{
5765        struct super_block *sb = dquot->dq_sb;
5766        struct ext4_sb_info *sbi = EXT4_SB(sb);
5767
5768        /* Are we journaling quotas? */
5769        if (ext4_has_feature_quota(sb) ||
5770            sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
5771                dquot_mark_dquot_dirty(dquot);
5772                return ext4_write_dquot(dquot);
5773        } else {
5774                return dquot_mark_dquot_dirty(dquot);
5775        }
5776}
5777
5778static int ext4_write_info(struct super_block *sb, int type)
5779{
5780        int ret, err;
5781        handle_t *handle;
5782
5783        /* Data block + inode block */
5784        handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2);
5785        if (IS_ERR(handle))
5786                return PTR_ERR(handle);
5787        ret = dquot_commit_info(sb, type);
5788        err = ext4_journal_stop(handle);
5789        if (!ret)
5790                ret = err;
5791        return ret;
5792}
5793
5794/*
5795 * Turn on quotas during mount time - we need to find
5796 * the quota file and such...
5797 */
5798static int ext4_quota_on_mount(struct super_block *sb, int type)
5799{
5800        return dquot_quota_on_mount(sb, get_qf_name(sb, EXT4_SB(sb), type),
5801                                        EXT4_SB(sb)->s_jquota_fmt, type);
5802}
5803
5804static void lockdep_set_quota_inode(struct inode *inode, int subclass)
5805{
5806        struct ext4_inode_info *ei = EXT4_I(inode);
5807
5808        /* The first argument of lockdep_set_subclass has to be
5809         * *exactly* the same as the argument to init_rwsem() --- in
5810         * this case, in init_once() --- or lockdep gets unhappy
5811         * because the name of the lock is set using the
5812         * stringification of the argument to init_rwsem().
5813         */
5814        (void) ei;      /* shut up clang warning if !CONFIG_LOCKDEP */
5815        lockdep_set_subclass(&ei->i_data_sem, subclass);
5816}
5817
5818/*
5819 * Standard function to be called on quota_on
5820 */
5821static int ext4_quota_on(struct super_block *sb, int type, int format_id,
5822                         const struct path *path)
5823{
5824        int err;
5825
5826        if (!test_opt(sb, QUOTA))
5827                return -EINVAL;
5828
5829        /* Quotafile not on the same filesystem? */
5830        if (path->dentry->d_sb != sb)
5831                return -EXDEV;
5832        /* Journaling quota? */
5833        if (EXT4_SB(sb)->s_qf_names[type]) {
5834                /* Quotafile not in fs root? */
5835                if (path->dentry->d_parent != sb->s_root)
5836                        ext4_msg(sb, KERN_WARNING,
5837                                "Quota file not on filesystem root. "
5838                                "Journaled quota will not work");
5839                sb_dqopt(sb)->flags |= DQUOT_NOLIST_DIRTY;
5840        } else {
5841                /*
5842                 * Clear the flag just in case mount options changed since
5843                 * last time.
5844                 */
5845                sb_dqopt(sb)->flags &= ~DQUOT_NOLIST_DIRTY;
5846        }
5847
5848        /*
5849         * When we journal data on quota file, we have to flush journal to see
5850         * all updates to the file when we bypass pagecache...
5851         */
5852        if (EXT4_SB(sb)->s_journal &&
5853            ext4_should_journal_data(d_inode(path->dentry))) {
5854                /*
5855                 * We don't need to lock updates but journal_flush() could
5856                 * otherwise be livelocked...
5857                 */
5858                jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
5859                err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
5860                jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
5861                if (err)
5862                        return err;
5863        }
5864
5865        lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
5866        err = dquot_quota_on(sb, type, format_id, path);
5867        if (err) {
5868                lockdep_set_quota_inode(path->dentry->d_inode,
5869                                             I_DATA_SEM_NORMAL);
5870        } else {
5871                struct inode *inode = d_inode(path->dentry);
5872                handle_t *handle;
5873
5874                /*
5875                 * Set inode flags to prevent userspace from messing with quota
5876                 * files. If this fails, we return success anyway since quotas
5877                 * are already enabled and this is not a hard failure.
5878                 */
5879                inode_lock(inode);
5880                handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
5881                if (IS_ERR(handle))
5882                        goto unlock_inode;
5883                EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL;
5884                inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
5885                                S_NOATIME | S_IMMUTABLE);
5886                ext4_mark_inode_dirty(handle, inode);
5887                ext4_journal_stop(handle);
5888        unlock_inode:
5889                inode_unlock(inode);
5890        }
5891        return err;
5892}
5893
5894static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
5895                             unsigned int flags)
5896{
5897        int err;
5898        struct inode *qf_inode;
5899        unsigned long qf_inums[EXT4_MAXQUOTAS] = {
5900                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
5901                le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
5902                le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
5903        };
5904
5905        BUG_ON(!ext4_has_feature_quota(sb));
5906
5907        if (!qf_inums[type])
5908                return -EPERM;
5909
5910        qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
5911        if (IS_ERR(qf_inode)) {
5912                ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
5913                return PTR_ERR(qf_inode);
5914        }
5915
5916        /* Don't account quota for quota files to avoid recursion */
5917        qf_inode->i_flags |= S_NOQUOTA;
5918        lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
5919        err = dquot_load_quota_inode(qf_inode, type, format_id, flags);
5920        if (err)
5921                lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
5922        iput(qf_inode);
5923
5924        return err;
5925}
5926
5927/* Enable usage tracking for all quota types. */
5928static int ext4_enable_quotas(struct super_block *sb)
5929{
5930        int type, err = 0;
5931        unsigned long qf_inums[EXT4_MAXQUOTAS] = {
5932                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
5933                le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
5934                le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
5935        };
5936        bool quota_mopt[EXT4_MAXQUOTAS] = {
5937                test_opt(sb, USRQUOTA),
5938                test_opt(sb, GRPQUOTA),
5939                test_opt(sb, PRJQUOTA),
5940        };
5941
5942        sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
5943        for (type = 0; type < EXT4_MAXQUOTAS; type++) {
5944                if (qf_inums[type]) {
5945                        err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
5946                                DQUOT_USAGE_ENABLED |
5947                                (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
5948                        if (err) {
5949                                ext4_warning(sb,
5950                                        "Failed to enable quota tracking "
5951                                        "(type=%d, err=%d). Please run "
5952                                        "e2fsck to fix.", type, err);
5953                                for (type--; type >= 0; type--)
5954                                        dquot_quota_off(sb, type);
5955
5956                                return err;
5957                        }
5958                }
5959        }
5960        return 0;
5961}
5962
5963static int ext4_quota_off(struct super_block *sb, int type)
5964{
5965        struct inode *inode = sb_dqopt(sb)->files[type];
5966        handle_t *handle;
5967        int err;
5968
5969        /* Force all delayed allocation blocks to be allocated.
5970         * Caller already holds s_umount sem */
5971        if (test_opt(sb, DELALLOC))
5972                sync_filesystem(sb);
5973
5974        if (!inode || !igrab(inode))
5975                goto out;
5976
5977        err = dquot_quota_off(sb, type);
5978        if (err || ext4_has_feature_quota(sb))
5979                goto out_put;
5980
5981        inode_lock(inode);
5982        /*
5983         * Update modification times of quota files when userspace can
5984         * start looking at them. If we fail, we return success anyway since
5985         * this is not a hard failure and quotas are already disabled.
5986         */
5987        handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
5988        if (IS_ERR(handle))
5989                goto out_unlock;
5990        EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
5991        inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
5992        inode->i_mtime = inode->i_ctime = current_time(inode);
5993        ext4_mark_inode_dirty(handle, inode);
5994        ext4_journal_stop(handle);
5995out_unlock:
5996        inode_unlock(inode);
5997out_put:
5998        lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
5999        iput(inode);
6000        return err;
6001out:
6002        return dquot_quota_off(sb, type);
6003}
6004
6005/* Read data from quotafile - avoid pagecache and such because we cannot afford
6006 * acquiring the locks... As quota files are never truncated and quota code
6007 * itself serializes the operations (and no one else should touch the files)
6008 * we don't have to be afraid of races */
6009static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
6010                               size_t len, loff_t off)
6011{
6012        struct inode *inode = sb_dqopt(sb)->files[type];
6013        ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
6014        int offset = off & (sb->s_blocksize - 1);
6015        int tocopy;
6016        size_t toread;
6017        struct buffer_head *bh;
6018        loff_t i_size = i_size_read(inode);
6019
6020        if (off > i_size)
6021                return 0;
6022        if (off+len > i_size)
6023                len = i_size-off;
6024        toread = len;
6025        while (toread > 0) {
6026                tocopy = sb->s_blocksize - offset < toread ?
6027                                sb->s_blocksize - offset : toread;
6028                bh = ext4_bread(NULL, inode, blk, 0);
6029                if (IS_ERR(bh))
6030                        return PTR_ERR(bh);
6031                if (!bh)        /* A hole? */
6032                        memset(data, 0, tocopy);
6033                else
6034                        memcpy(data, bh->b_data+offset, tocopy);
6035                brelse(bh);
6036                offset = 0;
6037                toread -= tocopy;
6038                data += tocopy;
6039                blk++;
6040        }
6041        return len;
6042}
6043
6044/* Write to quotafile (we know the transaction is already started and has
6045 * enough credits) */
6046static ssize_t ext4_quota_write(struct super_block *sb, int type,
6047                                const char *data, size_t len, loff_t off)
6048{
6049        struct inode *inode = sb_dqopt(sb)->files[type];
6050        ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
6051        int err, offset = off & (sb->s_blocksize - 1);
6052        int retries = 0;
6053        struct buffer_head *bh;
6054        handle_t *handle = journal_current_handle();
6055
6056        if (EXT4_SB(sb)->s_journal && !handle) {
6057                ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
6058                        " cancelled because transaction is not started",
6059                        (unsigned long long)off, (unsigned long long)len);
6060                return -EIO;
6061        }
6062        /*
6063         * Since we account only one data block in transaction credits,
6064         * then it is impossible to cross a block boundary.
6065         */
6066        if (sb->s_blocksize - offset < len) {
6067                ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
6068                        " cancelled because not block aligned",
6069                        (unsigned long long)off, (unsigned long long)len);
6070                return -EIO;
6071        }
6072
6073        do {
6074                bh = ext4_bread(handle, inode, blk,
6075                                EXT4_GET_BLOCKS_CREATE |
6076                                EXT4_GET_BLOCKS_METADATA_NOFAIL);
6077        } while (PTR_ERR(bh) == -ENOSPC &&
6078                 ext4_should_retry_alloc(inode->i_sb, &retries));
6079        if (IS_ERR(bh))
6080                return PTR_ERR(bh);
6081        if (!bh)
6082                goto out;
6083        BUFFER_TRACE(bh, "get write access");
6084        err = ext4_journal_get_write_access(handle, bh);
6085        if (err) {
6086                brelse(bh);
6087                return err;
6088        }
6089        lock_buffer(bh);
6090        memcpy(bh->b_data+offset, data, len);
6091        flush_dcache_page(bh->b_page);
6092        unlock_buffer(bh);
6093        err = ext4_handle_dirty_metadata(handle, NULL, bh);
6094        brelse(bh);
6095out:
6096        if (inode->i_size < off + len) {
6097                i_size_write(inode, off + len);
6098                EXT4_I(inode)->i_disksize = inode->i_size;
6099                ext4_mark_inode_dirty(handle, inode);
6100        }
6101        return len;
6102}
6103#endif
6104
6105static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
6106                       const char *dev_name, void *data)
6107{
6108        return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
6109}
6110
6111#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
6112static inline void register_as_ext2(void)
6113{
6114        int err = register_filesystem(&ext2_fs_type);
6115        if (err)
6116                printk(KERN_WARNING
6117                       "EXT4-fs: Unable to register as ext2 (%d)\n", err);
6118}
6119
6120static inline void unregister_as_ext2(void)
6121{
6122        unregister_filesystem(&ext2_fs_type);
6123}
6124
6125static inline int ext2_feature_set_ok(struct super_block *sb)
6126{
6127        if (ext4_has_unknown_ext2_incompat_features(sb))
6128                return 0;
6129        if (sb_rdonly(sb))
6130                return 1;
6131        if (ext4_has_unknown_ext2_ro_compat_features(sb))
6132                return 0;
6133        return 1;
6134}
6135#else
6136static inline void register_as_ext2(void) { }
6137static inline void unregister_as_ext2(void) { }
6138static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
6139#endif
6140
6141static inline void register_as_ext3(void)
6142{
6143        int err = register_filesystem(&ext3_fs_type);
6144        if (err)
6145                printk(KERN_WARNING
6146                       "EXT4-fs: Unable to register as ext3 (%d)\n", err);
6147}
6148
6149static inline void unregister_as_ext3(void)
6150{
6151        unregister_filesystem(&ext3_fs_type);
6152}
6153
6154static inline int ext3_feature_set_ok(struct super_block *sb)
6155{
6156        if (ext4_has_unknown_ext3_incompat_features(sb))
6157                return 0;
6158        if (!ext4_has_feature_journal(sb))
6159                return 0;
6160        if (sb_rdonly(sb))
6161                return 1;
6162        if (ext4_has_unknown_ext3_ro_compat_features(sb))
6163                return 0;
6164        return 1;
6165}
6166
6167static struct file_system_type ext4_fs_type = {
6168        .owner          = THIS_MODULE,
6169        .name           = "ext4",
6170        .mount          = ext4_mount,
6171        .kill_sb        = kill_block_super,
6172        .fs_flags       = FS_REQUIRES_DEV,
6173};
6174MODULE_ALIAS_FS("ext4");
6175
6176/* Shared across all ext4 file systems */
6177wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
6178
6179static int __init ext4_init_fs(void)
6180{
6181        int i, err;
6182
6183        ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
6184        ext4_li_info = NULL;
6185        mutex_init(&ext4_li_mtx);
6186
6187        /* Build-time check for flags consistency */
6188        ext4_check_flag_values();
6189
6190        for (i = 0; i < EXT4_WQ_HASH_SZ; i++)
6191                init_waitqueue_head(&ext4__ioend_wq[i]);
6192
6193        err = ext4_init_es();
6194        if (err)
6195                return err;
6196
6197        err = ext4_init_pending();
6198        if (err)
6199                goto out7;
6200
6201        err = ext4_init_post_read_processing();
6202        if (err)
6203                goto out6;
6204
6205        err = ext4_init_pageio();
6206        if (err)
6207                goto out5;
6208
6209        err = ext4_init_system_zone();
6210        if (err)
6211                goto out4;
6212
6213        err = ext4_init_sysfs();
6214        if (err)
6215                goto out3;
6216
6217        err = ext4_init_mballoc();
6218        if (err)
6219                goto out2;
6220        err = init_inodecache();
6221        if (err)
6222                goto out1;
6223        register_as_ext3();
6224        register_as_ext2();
6225        err = register_filesystem(&ext4_fs_type);
6226        if (err)
6227                goto out;
6228
6229        return 0;
6230out:
6231        unregister_as_ext2();
6232        unregister_as_ext3();
6233        destroy_inodecache();
6234out1:
6235        ext4_exit_mballoc();
6236out2:
6237        ext4_exit_sysfs();
6238out3:
6239        ext4_exit_system_zone();
6240out4:
6241        ext4_exit_pageio();
6242out5:
6243        ext4_exit_post_read_processing();
6244out6:
6245        ext4_exit_pending();
6246out7:
6247        ext4_exit_es();
6248
6249        return err;
6250}
6251
6252static void __exit ext4_exit_fs(void)
6253{
6254        ext4_destroy_lazyinit_thread();
6255        unregister_as_ext2();
6256        unregister_as_ext3();
6257        unregister_filesystem(&ext4_fs_type);
6258        destroy_inodecache();
6259        ext4_exit_mballoc();
6260        ext4_exit_sysfs();
6261        ext4_exit_system_zone();
6262        ext4_exit_pageio();
6263        ext4_exit_post_read_processing();
6264        ext4_exit_es();
6265        ext4_exit_pending();
6266}
6267
6268MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
6269MODULE_DESCRIPTION("Fourth Extended Filesystem");
6270MODULE_LICENSE("GPL");
6271MODULE_SOFTDEP("pre: crc32c");
6272module_init(ext4_init_fs)
6273module_exit(ext4_exit_fs)
6274