LXR linux/fs/ext4/super.c

   1/*
   2 *  linux/fs/ext4/super.c
   3 *
   4 * Copyright (C) 1992, 1993, 1994, 1995
   5 * Remy Card (card@masi.ibp.fr)
   6 * Laboratoire MASI - Institut Blaise Pascal
   7 * Universite Pierre et Marie Curie (Paris VI)
   8 *
   9 *  from
  10 *
  11 *  linux/fs/minix/inode.c
  12 *
  13 *  Copyright (C) 1991, 1992  Linus Torvalds
  14 *
  15 *  Big-endian to little-endian byte-swapping/bitmaps by
  16 *        David S. Miller (davem@caip.rutgers.edu), 1995
  17 */
  18
  19#include <linux/module.h>
  20#include <linux/string.h>
  21#include <linux/fs.h>
  22#include <linux/time.h>
  23#include <linux/vmalloc.h>
  24#include <linux/jbd2.h>
  25#include <linux/slab.h>
  26#include <linux/init.h>
  27#include <linux/blkdev.h>
  28#include <linux/parser.h>
  29#include <linux/buffer_head.h>
  30#include <linux/exportfs.h>
  31#include <linux/vfs.h>
  32#include <linux/random.h>
  33#include <linux/mount.h>
  34#include <linux/namei.h>
  35#include <linux/quotaops.h>
  36#include <linux/seq_file.h>
  37#include <linux/proc_fs.h>
  38#include <linux/ctype.h>
  39#include <linux/log2.h>
  40#include <linux/crc16.h>
  41#include <linux/dax.h>
  42#include <linux/cleancache.h>
  43#include <asm/uaccess.h>
  44
  45#include <linux/kthread.h>
  46#include <linux/freezer.h>
  47
  48#include "ext4.h"
  49#include "ext4_extents.h"       /* Needed for trace points definition */
  50#include "ext4_jbd2.h"
  51#include "xattr.h"
  52#include "acl.h"
  53#include "mballoc.h"
  54
  55#define CREATE_TRACE_POINTS
  56#include <trace/events/ext4.h>
  57
  58static struct proc_dir_entry *ext4_proc_root;
  59static struct kset *ext4_kset;
  60static struct ext4_lazy_init *ext4_li_info;
  61static struct mutex ext4_li_mtx;
  62static struct ext4_features *ext4_feat;
  63
  64static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
  65                             unsigned long journal_devnum);
  66static int ext4_show_options(struct seq_file *seq, struct dentry *root);
  67static int ext4_commit_super(struct super_block *sb, int sync);
  68static void ext4_mark_recovery_complete(struct super_block *sb,
  69                                        struct ext4_super_block *es);
  70static void ext4_clear_journal_err(struct super_block *sb,
  71                                   struct ext4_super_block *es);
  72static int ext4_sync_fs(struct super_block *sb, int wait);
  73static int ext4_sync_fs_nojournal(struct super_block *sb, int wait);
  74static int ext4_remount(struct super_block *sb, int *flags, char *data);
  75static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
  76static int ext4_unfreeze(struct super_block *sb);
  77static int ext4_freeze(struct super_block *sb);
  78static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
  79                       const char *dev_name, void *data);
  80static inline int ext2_feature_set_ok(struct super_block *sb);
  81static inline int ext3_feature_set_ok(struct super_block *sb);
  82static int ext4_feature_set_ok(struct super_block *sb, int readonly);
  83static void ext4_destroy_lazyinit_thread(void);
  84static void ext4_unregister_li_request(struct super_block *sb);
  85static void ext4_clear_request_list(void);
  86static int ext4_reserve_clusters(struct ext4_sb_info *, ext4_fsblk_t);
  87
  88#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
  89static struct file_system_type ext2_fs_type = {
  90        .owner          = THIS_MODULE,
  91        .name           = "ext2",
  92        .mount          = ext4_mount,
  93        .kill_sb        = kill_block_super,
  94        .fs_flags       = FS_REQUIRES_DEV | FS_HAS_INVALIDATE_RANGE,
  95};
  96MODULE_ALIAS_FS("ext2");
  97MODULE_ALIAS("ext2");
  98#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
  99#else
 100#define IS_EXT2_SB(sb) (0)
 101#endif
 102
 103
 104#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
 105static struct file_system_type ext3_fs_type = {
 106        .owner          = THIS_MODULE,
 107        .name           = "ext3",
 108        .mount          = ext4_mount,
 109        .kill_sb        = kill_block_super,
 110        .fs_flags       = FS_REQUIRES_DEV | FS_HAS_INVALIDATE_RANGE,
 111};
 112MODULE_ALIAS_FS("ext3");
 113MODULE_ALIAS("ext3");
 114#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
 115#else
 116#define IS_EXT3_SB(sb) (0)
 117#endif
 118
 119static int ext4_verify_csum_type(struct super_block *sb,
 120                                 struct ext4_super_block *es)
 121{
 122        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
 123                                        EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
 124                return 1;
 125
 126        return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
 127}
 128
 129static __le32 ext4_superblock_csum(struct super_block *sb,
 130                                   struct ext4_super_block *es)
 131{
 132        struct ext4_sb_info *sbi = EXT4_SB(sb);
 133        int offset = offsetof(struct ext4_super_block, s_checksum);
 134        __u32 csum;
 135
 136        csum = ext4_chksum(sbi, ~0, (char *)es, offset);
 137
 138        return cpu_to_le32(csum);
 139}
 140
 141static int ext4_superblock_csum_verify(struct super_block *sb,
 142                                       struct ext4_super_block *es)
 143{
 144        if (!ext4_has_metadata_csum(sb))
 145                return 1;
 146
 147        return es->s_checksum == ext4_superblock_csum(sb, es);
 148}
 149
 150void ext4_superblock_csum_set(struct super_block *sb)
 151{
 152        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 153
 154        if (!ext4_has_metadata_csum(sb))
 155                return;
 156
 157        es->s_checksum = ext4_superblock_csum(sb, es);
 158}
 159
 160void *ext4_kvmalloc(size_t size, gfp_t flags)
 161{
 162        void *ret;
 163
 164        ret = kmalloc(size, flags);
 165        if (!ret)
 166                ret = __vmalloc(size, flags, PAGE_KERNEL);
 167        return ret;
 168}
 169
 170void *ext4_kvzalloc(size_t size, gfp_t flags)
 171{
 172        void *ret;
 173
 174        ret = kzalloc(size, flags);
 175        if (!ret)
 176                ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
 177        return ret;
 178}
 179
 180void ext4_kvfree(void *ptr)
 181{
 182        if (is_vmalloc_addr(ptr))
 183                vfree(ptr);
 184        else
 185                kfree(ptr);
 186
 187}
 188
 189ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
 190                               struct ext4_group_desc *bg)
 191{
 192        return le32_to_cpu(bg->bg_block_bitmap_lo) |
 193                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 194                 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
 195}
 196
 197ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
 198                               struct ext4_group_desc *bg)
 199{
 200        return le32_to_cpu(bg->bg_inode_bitmap_lo) |
 201                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 202                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
 203}
 204
 205ext4_fsblk_t ext4_inode_table(struct super_block *sb,
 206                              struct ext4_group_desc *bg)
 207{
 208        return le32_to_cpu(bg->bg_inode_table_lo) |
 209                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 210                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
 211}
 212
 213__u32 ext4_free_group_clusters(struct super_block *sb,
 214                               struct ext4_group_desc *bg)
 215{
 216        return le16_to_cpu(bg->bg_free_blocks_count_lo) |
 217                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 218                 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
 219}
 220
 221__u32 ext4_free_inodes_count(struct super_block *sb,
 222                              struct ext4_group_desc *bg)
 223{
 224        return le16_to_cpu(bg->bg_free_inodes_count_lo) |
 225                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 226                 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
 227}
 228
 229__u32 ext4_used_dirs_count(struct super_block *sb,
 230                              struct ext4_group_desc *bg)
 231{
 232        return le16_to_cpu(bg->bg_used_dirs_count_lo) |
 233                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 234                 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
 235}
 236
 237__u32 ext4_itable_unused_count(struct super_block *sb,
 238                              struct ext4_group_desc *bg)
 239{
 240        return le16_to_cpu(bg->bg_itable_unused_lo) |
 241                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 242                 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
 243}
 244
 245void ext4_block_bitmap_set(struct super_block *sb,
 246                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 247{
 248        bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
 249        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 250                bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
 251}
 252
 253void ext4_inode_bitmap_set(struct super_block *sb,
 254                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 255{
 256        bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
 257        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 258                bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
 259}
 260
 261void ext4_inode_table_set(struct super_block *sb,
 262                          struct ext4_group_desc *bg, ext4_fsblk_t blk)
 263{
 264        bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
 265        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 266                bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
 267}
 268
 269void ext4_free_group_clusters_set(struct super_block *sb,
 270                                  struct ext4_group_desc *bg, __u32 count)
 271{
 272        bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
 273        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 274                bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
 275}
 276
 277void ext4_free_inodes_set(struct super_block *sb,
 278                          struct ext4_group_desc *bg, __u32 count)
 279{
 280        bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
 281        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 282                bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
 283}
 284
 285void ext4_used_dirs_set(struct super_block *sb,
 286                          struct ext4_group_desc *bg, __u32 count)
 287{
 288        bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
 289        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 290                bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
 291}
 292
 293void ext4_itable_unused_set(struct super_block *sb,
 294                          struct ext4_group_desc *bg, __u32 count)
 295{
 296        bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
 297        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 298                bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
 299}
 300
 301
 302static void __save_error_info(struct super_block *sb, const char *func,
 303                            unsigned int line)
 304{
 305        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 306
 307        EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 308        es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
 309        es->s_last_error_time = cpu_to_le32(get_seconds());
 310        strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
 311        es->s_last_error_line = cpu_to_le32(line);
 312        if (!es->s_first_error_time) {
 313                es->s_first_error_time = es->s_last_error_time;
 314                strncpy(es->s_first_error_func, func,
 315                        sizeof(es->s_first_error_func));
 316                es->s_first_error_line = cpu_to_le32(line);
 317                es->s_first_error_ino = es->s_last_error_ino;
 318                es->s_first_error_block = es->s_last_error_block;
 319        }
 320        /*
 321         * Start the daily error reporting function if it hasn't been
 322         * started already
 323         */
 324        if (!es->s_error_count)
 325                mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
 326        le32_add_cpu(&es->s_error_count, 1);
 327}
 328
 329static void save_error_info(struct super_block *sb, const char *func,
 330                            unsigned int line)
 331{
 332        __save_error_info(sb, func, line);
 333        ext4_commit_super(sb, 1);
 334}
 335
 336/*
 337 * The del_gendisk() function uninitializes the disk-specific data
 338 * structures, including the bdi structure, without telling anyone
 339 * else.  Once this happens, any attempt to call mark_buffer_dirty()
 340 * (for example, by ext4_commit_super), will cause a kernel OOPS.
 341 * This is a kludge to prevent these oops until we can put in a proper
 342 * hook in del_gendisk() to inform the VFS and file system layers.
 343 */
 344static int block_device_ejected(struct super_block *sb)
 345{
 346        struct inode *bd_inode = sb->s_bdev->bd_inode;
 347        struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info;
 348
 349        return bdi->dev == NULL;
 350}
 351
 352static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
 353{
 354        struct super_block              *sb = journal->j_private;
 355        struct ext4_sb_info             *sbi = EXT4_SB(sb);
 356        int                             error = is_journal_aborted(journal);
 357        struct ext4_journal_cb_entry    *jce;
 358
 359        BUG_ON(txn->t_state == T_FINISHED);
 360        spin_lock(&sbi->s_md_lock);
 361        while (!list_empty(&txn->t_private_list)) {
 362                jce = list_entry(txn->t_private_list.next,
 363                                 struct ext4_journal_cb_entry, jce_list);
 364                list_del_init(&jce->jce_list);
 365                spin_unlock(&sbi->s_md_lock);
 366                jce->jce_func(sb, jce, error);
 367                spin_lock(&sbi->s_md_lock);
 368        }
 369        spin_unlock(&sbi->s_md_lock);
 370}
 371
 372static bool system_going_down(void)
 373{
 374        return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
 375                || system_state == SYSTEM_RESTART;
 376}
 377
 378/* Deal with the reporting of failure conditions on a filesystem such as
 379 * inconsistencies detected or read IO failures.
 380 *
 381 * On ext2, we can store the error state of the filesystem in the
 382 * superblock.  That is not possible on ext4, because we may have other
 383 * write ordering constraints on the superblock which prevent us from
 384 * writing it out straight away; and given that the journal is about to
 385 * be aborted, we can't rely on the current, or future, transactions to
 386 * write out the superblock safely.
 387 *
 388 * We'll just use the jbd2_journal_abort() error code to record an error in
 389 * the journal instead.  On recovery, the journal will complain about
 390 * that error until we've noted it down and cleared it.
 391 */
 392
 393static void ext4_handle_error(struct super_block *sb)
 394{
 395        if (sb->s_flags & MS_RDONLY)
 396                return;
 397
 398        if (!test_opt(sb, ERRORS_CONT)) {
 399                journal_t *journal = EXT4_SB(sb)->s_journal;
 400
 401                EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
 402                if (journal)
 403                        jbd2_journal_abort(journal, -EIO);
 404        }
 405        /*
 406         * We force ERRORS_RO behavior when system is rebooting. Otherwise we
 407         * could panic during 'reboot -f' as the underlying device got already
 408         * disabled.
 409         */
 410        if (test_opt(sb, ERRORS_RO) || system_going_down()) {
 411                ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 412                /*
 413                 * Make sure updated value of ->s_mount_flags will be visible
 414                 * before ->s_flags update
 415                 */
 416                smp_wmb();
 417                sb->s_flags |= MS_RDONLY;
 418        } else if (test_opt(sb, ERRORS_PANIC)) {
 419                if (EXT4_SB(sb)->s_journal &&
 420                  !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
 421                        return;
 422                panic("EXT4-fs (device %s): panic forced after error\n",
 423                        sb->s_id);
 424        }
 425}
 426
 427#define ext4_error_ratelimit(sb)                                        \
 428                ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state),     \
 429                             "EXT4-fs error")
 430
 431void __ext4_error(struct super_block *sb, const char *function,
 432                  unsigned int line, const char *fmt, ...)
 433{
 434        struct va_format vaf;
 435        va_list args;
 436
 437        if (ext4_error_ratelimit(sb)) {
 438                va_start(args, fmt);
 439                vaf.fmt = fmt;
 440                vaf.va = &args;
 441                printk(KERN_CRIT
 442                       "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
 443                       sb->s_id, function, line, current->comm, &vaf);
 444                va_end(args);
 445        }
 446        save_error_info(sb, function, line);
 447        ext4_handle_error(sb);
 448}
 449
 450void __ext4_error_inode(struct inode *inode, const char *function,
 451                        unsigned int line, ext4_fsblk_t block,
 452                        const char *fmt, ...)
 453{
 454        va_list args;
 455        struct va_format vaf;
 456        struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
 457
 458        es->s_last_error_ino = cpu_to_le32(inode->i_ino);
 459        es->s_last_error_block = cpu_to_le64(block);
 460        if (ext4_error_ratelimit(inode->i_sb)) {
 461                va_start(args, fmt);
 462                vaf.fmt = fmt;
 463                vaf.va = &args;
 464                if (block)
 465                        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
 466                               "inode #%lu: block %llu: comm %s: %pV\n",
 467                               inode->i_sb->s_id, function, line, inode->i_ino,
 468                               block, current->comm, &vaf);
 469                else
 470                        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
 471                               "inode #%lu: comm %s: %pV\n",
 472                               inode->i_sb->s_id, function, line, inode->i_ino,
 473                               current->comm, &vaf);
 474                va_end(args);
 475        }
 476        save_error_info(inode->i_sb, function, line);
 477        ext4_handle_error(inode->i_sb);
 478}
 479
 480void __ext4_error_file(struct file *file, const char *function,
 481                       unsigned int line, ext4_fsblk_t block,
 482                       const char *fmt, ...)
 483{
 484        va_list args;
 485        struct va_format vaf;
 486        struct ext4_super_block *es;
 487        struct inode *inode = file_inode(file);
 488        char pathname[80], *path;
 489
 490        es = EXT4_SB(inode->i_sb)->s_es;
 491        es->s_last_error_ino = cpu_to_le32(inode->i_ino);
 492        if (ext4_error_ratelimit(inode->i_sb)) {
 493                path = d_path(&(file->f_path), pathname, sizeof(pathname));
 494                if (IS_ERR(path))
 495                        path = "(unknown)";
 496                va_start(args, fmt);
 497                vaf.fmt = fmt;
 498                vaf.va = &args;
 499                if (block)
 500                        printk(KERN_CRIT
 501                               "EXT4-fs error (device %s): %s:%d: inode #%lu: "
 502                               "block %llu: comm %s: path %s: %pV\n",
 503                               inode->i_sb->s_id, function, line, inode->i_ino,
 504                               block, current->comm, path, &vaf);
 505                else
 506                        printk(KERN_CRIT
 507                               "EXT4-fs error (device %s): %s:%d: inode #%lu: "
 508                               "comm %s: path %s: %pV\n",
 509                               inode->i_sb->s_id, function, line, inode->i_ino,
 510                               current->comm, path, &vaf);
 511                va_end(args);
 512        }
 513        save_error_info(inode->i_sb, function, line);
 514        ext4_handle_error(inode->i_sb);
 515}
 516
 517const char *ext4_decode_error(struct super_block *sb, int errno,
 518                              char nbuf[16])
 519{
 520        char *errstr = NULL;
 521
 522        switch (errno) {
 523        case -EIO:
 524                errstr = "IO failure";
 525                break;
 526        case -ENOMEM:
 527                errstr = "Out of memory";
 528                break;
 529        case -EROFS:
 530                if (!sb || (EXT4_SB(sb)->s_journal &&
 531                            EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
 532                        errstr = "Journal has aborted";
 533                else
 534                        errstr = "Readonly filesystem";
 535                break;
 536        default:
 537                /* If the caller passed in an extra buffer for unknown
 538                 * errors, textualise them now.  Else we just return
 539                 * NULL. */
 540                if (nbuf) {
 541                        /* Check for truncated error codes... */
 542                        if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
 543                                errstr = nbuf;
 544                }
 545                break;
 546        }
 547
 548        return errstr;
 549}
 550
 551/* __ext4_std_error decodes expected errors from journaling functions
 552 * automatically and invokes the appropriate error response.  */
 553
 554void __ext4_std_error(struct super_block *sb, const char *function,
 555                      unsigned int line, int errno)
 556{
 557        char nbuf[16];
 558        const char *errstr;
 559
 560        /* Special case: if the error is EROFS, and we're not already
 561         * inside a transaction, then there's really no point in logging
 562         * an error. */
 563        if (errno == -EROFS && journal_current_handle() == NULL &&
 564            (sb->s_flags & MS_RDONLY))
 565                return;
 566
 567        if (ext4_error_ratelimit(sb)) {
 568                errstr = ext4_decode_error(sb, errno, nbuf);
 569                printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
 570                       sb->s_id, function, line, errstr);
 571        }
 572
 573        save_error_info(sb, function, line);
 574        ext4_handle_error(sb);
 575}
 576
 577/*
 578 * ext4_abort is a much stronger failure handler than ext4_error.  The
 579 * abort function may be used to deal with unrecoverable failures such
 580 * as journal IO errors or ENOMEM at a critical moment in log management.
 581 *
 582 * We unconditionally force the filesystem into an ABORT|READONLY state,
 583 * unless the error response on the fs has been set to panic in which
 584 * case we take the easy way out and panic immediately.
 585 */
 586
 587void __ext4_abort(struct super_block *sb, const char *function,
 588                unsigned int line, const char *fmt, ...)
 589{
 590        va_list args;
 591
 592        save_error_info(sb, function, line);
 593        va_start(args, fmt);
 594        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id,
 595               function, line);
 596        vprintk(fmt, args);
 597        printk("\n");
 598        va_end(args);
 599
 600        if ((sb->s_flags & MS_RDONLY) == 0) {
 601                ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 602                EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
 603                /*
 604                 * Make sure updated value of ->s_mount_flags will be visible
 605                 * before ->s_flags update
 606                 */
 607                smp_wmb();
 608                sb->s_flags |= MS_RDONLY;
 609                if (EXT4_SB(sb)->s_journal)
 610                        jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 611                save_error_info(sb, function, line);
 612        }
 613        if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
 614                if (EXT4_SB(sb)->s_journal &&
 615                  !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
 616                        return;
 617                panic("EXT4-fs panic from previous error\n");
 618        }
 619}
 620
 621void __ext4_msg(struct super_block *sb,
 622                const char *prefix, const char *fmt, ...)
 623{
 624        struct va_format vaf;
 625        va_list args;
 626
 627        if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs"))
 628                return;
 629
 630        va_start(args, fmt);
 631        vaf.fmt = fmt;
 632        vaf.va = &args;
 633        printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
 634        va_end(args);
 635}
 636
 637void __ext4_warning(struct super_block *sb, const char *function,
 638                    unsigned int line, const char *fmt, ...)
 639{
 640        struct va_format vaf;
 641        va_list args;
 642
 643        if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
 644                          "EXT4-fs warning"))
 645                return;
 646
 647        va_start(args, fmt);
 648        vaf.fmt = fmt;
 649        vaf.va = &args;
 650        printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
 651               sb->s_id, function, line, &vaf);
 652        va_end(args);
 653}
 654
 655void __ext4_grp_locked_error(const char *function, unsigned int line,
 656                             struct super_block *sb, ext4_group_t grp,
 657                             unsigned long ino, ext4_fsblk_t block,
 658                             const char *fmt, ...)
 659__releases(bitlock)
 660__acquires(bitlock)
 661{
 662        struct va_format vaf;
 663        va_list args;
 664        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 665
 666        es->s_last_error_ino = cpu_to_le32(ino);
 667        es->s_last_error_block = cpu_to_le64(block);
 668        __save_error_info(sb, function, line);
 669
 670        if (ext4_error_ratelimit(sb)) {
 671                va_start(args, fmt);
 672                vaf.fmt = fmt;
 673                vaf.va = &args;
 674                printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
 675                       sb->s_id, function, line, grp);
 676                if (ino)
 677                        printk(KERN_CONT "inode %lu: ", ino);
 678                if (block)
 679                        printk(KERN_CONT "block %llu:",
 680                               (unsigned long long) block);
 681                printk(KERN_CONT "%pV\n", &vaf);
 682                va_end(args);
 683        }
 684
 685        if (test_opt(sb, ERRORS_CONT)) {
 686                ext4_commit_super(sb, 0);
 687                return;
 688        }
 689
 690        ext4_unlock_group(sb, grp);
 691        ext4_commit_super(sb, 1);
 692        ext4_handle_error(sb);
 693        /*
 694         * We only get here in the ERRORS_RO case; relocking the group
 695         * may be dangerous, but nothing bad will happen since the
 696         * filesystem will have already been marked read/only and the
 697         * journal has been aborted.  We return 1 as a hint to callers
 698         * who might what to use the return value from
 699         * ext4_grp_locked_error() to distinguish between the
 700         * ERRORS_CONT and ERRORS_RO case, and perhaps return more
 701         * aggressively from the ext4 function in question, with a
 702         * more appropriate error code.
 703         */
 704        ext4_lock_group(sb, grp);
 705        return;
 706}
 707
 708void ext4_update_dynamic_rev(struct super_block *sb)
 709{
 710        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 711
 712        if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
 713                return;
 714
 715        ext4_warning(sb,
 716                     "updating to rev %d because of new feature flag, "
 717                     "running e2fsck is recommended",
 718                     EXT4_DYNAMIC_REV);
 719
 720        es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
 721        es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
 722        es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
 723        /* leave es->s_feature_*compat flags alone */
 724        /* es->s_uuid will be set by e2fsck if empty */
 725
 726        /*
 727         * The rest of the superblock fields should be zero, and if not it
 728         * means they are likely already in use, so leave them alone.  We
 729         * can leave it up to e2fsck to clean up any inconsistencies there.
 730         */
 731}
 732
 733/*
 734 * Open the external journal device
 735 */
 736static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
 737{
 738        struct block_device *bdev;
 739        char b[BDEVNAME_SIZE];
 740
 741        bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
 742        if (IS_ERR(bdev))
 743                goto fail;
 744        return bdev;
 745
 746fail:
 747        ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
 748                        __bdevname(dev, b), PTR_ERR(bdev));
 749        return NULL;
 750}
 751
 752/*
 753 * Release the journal device
 754 */
 755static void ext4_blkdev_put(struct block_device *bdev)
 756{
 757        blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 758}
 759
 760static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
 761{
 762        struct block_device *bdev;
 763        bdev = sbi->journal_bdev;
 764        if (bdev) {
 765                ext4_blkdev_put(bdev);
 766                sbi->journal_bdev = NULL;
 767        }
 768}
 769
 770static inline struct inode *orphan_list_entry(struct list_head *l)
 771{
 772        return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
 773}
 774
 775static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
 776{
 777        struct list_head *l;
 778
 779        ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
 780                 le32_to_cpu(sbi->s_es->s_last_orphan));
 781
 782        printk(KERN_ERR "sb_info orphan list:\n");
 783        list_for_each(l, &sbi->s_orphan) {
 784                struct inode *inode = orphan_list_entry(l);
 785                printk(KERN_ERR "  "
 786                       "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
 787                       inode->i_sb->s_id, inode->i_ino, inode,
 788                       inode->i_mode, inode->i_nlink,
 789                       NEXT_ORPHAN(inode));
 790        }
 791}
 792
 793#ifdef CONFIG_QUOTA
 794static int ext4_quota_off(struct super_block *sb, int type);
 795
 796static inline void ext4_quota_off_umount(struct super_block *sb)
 797{
 798        int type;
 799
 800        /* Use our quota_off function to clear inode flags etc. */
 801        for (type = 0; type < MAXQUOTAS; type++)
 802                ext4_quota_off(sb, type);
 803}
 804#else
 805static inline void ext4_quota_off_umount(struct super_block *sb)
 806{
 807}
 808#endif
 809
 810static void ext4_put_super(struct super_block *sb)
 811{
 812        struct ext4_sb_info *sbi = EXT4_SB(sb);
 813        struct ext4_super_block *es = sbi->s_es;
 814        int aborted = 0;
 815        int i, err;
 816
 817        ext4_unregister_li_request(sb);
 818        ext4_quota_off_umount(sb);
 819
 820        flush_workqueue(sbi->rsv_conversion_wq);
 821        destroy_workqueue(sbi->rsv_conversion_wq);
 822
 823        if (sbi->s_journal) {
 824                aborted = is_journal_aborted(sbi->s_journal);
 825                err = jbd2_journal_destroy(sbi->s_journal);
 826                sbi->s_journal = NULL;
 827                if ((err < 0) && !aborted)
 828                        ext4_abort(sb, "Couldn't clean up the journal");
 829        }
 830
 831        ext4_es_unregister_shrinker(sbi);
 832        del_timer_sync(&sbi->s_err_report);
 833        ext4_release_system_zone(sb);
 834        ext4_mb_release(sb);
 835        ext4_ext_release(sb);
 836        ext4_xattr_put_super(sb);
 837
 838        if (!(sb->s_flags & MS_RDONLY) && !aborted) {
 839                EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 840                es->s_state = cpu_to_le16(sbi->s_mount_state);
 841        }
 842        if (!(sb->s_flags & MS_RDONLY))
 843                ext4_commit_super(sb, 1);
 844
 845        if (sbi->s_proc) {
 846                remove_proc_entry("options", sbi->s_proc);
 847                remove_proc_entry(sb->s_id, ext4_proc_root);
 848        }
 849        kobject_del(&sbi->s_kobj);
 850
 851        for (i = 0; i < sbi->s_gdb_count; i++)
 852                brelse(sbi->s_group_desc[i]);
 853        ext4_kvfree(sbi->s_group_desc);
 854        ext4_kvfree(sbi->s_flex_groups);
 855        percpu_counter_destroy(&sbi->s_freeclusters_counter);
 856        percpu_counter_destroy(&sbi->s_freeinodes_counter);
 857        percpu_counter_destroy(&sbi->s_dirs_counter);
 858        percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
 859        percpu_counter_destroy(&sbi->s_extent_cache_cnt);
 860#ifdef CONFIG_QUOTA
 861        for (i = 0; i < MAXQUOTAS; i++)
 862                kfree(sbi->s_qf_names[i]);
 863#endif
 864
 865        /* Debugging code just in case the in-memory inode orphan list
 866         * isn't empty.  The on-disk one can be non-empty if we've
 867         * detected an error and taken the fs readonly, but the
 868         * in-memory list had better be clean by this point. */
 869        if (!list_empty(&sbi->s_orphan))
 870                dump_orphan_list(sb, sbi);
 871        J_ASSERT(list_empty(&sbi->s_orphan));
 872
 873        sync_blockdev(sb->s_bdev);
 874        invalidate_bdev(sb->s_bdev);
 875        if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
 876                /*
 877                 * Invalidate the journal device's buffers.  We don't want them
 878                 * floating about in memory - the physical journal device may
 879                 * hotswapped, and it breaks the `ro-after' testing code.
 880                 */
 881                sync_blockdev(sbi->journal_bdev);
 882                invalidate_bdev(sbi->journal_bdev);
 883                ext4_blkdev_remove(sbi);
 884        }
 885        if (sbi->s_mmp_tsk)
 886                kthread_stop(sbi->s_mmp_tsk);
 887        brelse(sbi->s_sbh);
 888        sb->s_fs_info = NULL;
 889        /*
 890         * Now that we are completely done shutting down the
 891         * superblock, we need to actually destroy the kobject.
 892         */
 893        kobject_put(&sbi->s_kobj);
 894        wait_for_completion(&sbi->s_kobj_unregister);
 895        if (sbi->s_chksum_driver)
 896                crypto_free_shash(sbi->s_chksum_driver);
 897        kfree(sbi->s_blockgroup_lock);
 898        fs_put_dax(sbi->s_daxdev);
 899        kfree(sbi);
 900}
 901
 902static struct kmem_cache *ext4_inode_cachep;
 903
 904/*
 905 * Called inside transaction, so use GFP_NOFS
 906 */
 907static struct inode *ext4_alloc_inode(struct super_block *sb)
 908{
 909        struct ext4_inode_info *ei;
 910
 911        ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
 912        if (!ei)
 913                return NULL;
 914
 915        ei->vfs_inode.i_version = 1;
 916        spin_lock_init(&ei->i_raw_lock);
 917        INIT_LIST_HEAD(&ei->i_prealloc_list);
 918        spin_lock_init(&ei->i_prealloc_lock);
 919        ext4_es_init_tree(&ei->i_es_tree);
 920        rwlock_init(&ei->i_es_lock);
 921        INIT_LIST_HEAD(&ei->i_es_lru);
 922        ei->i_es_lru_nr = 0;
 923        ei->i_touch_when = 0;
 924        ei->i_reserved_data_blocks = 0;
 925        ei->i_reserved_meta_blocks = 0;
 926        ei->i_allocated_meta_blocks = 0;
 927        ei->i_da_metadata_calc_len = 0;
 928        ei->i_da_metadata_calc_last_lblock = 0;
 929        spin_lock_init(&(ei->i_block_reservation_lock));
 930#ifdef CONFIG_QUOTA
 931        ei->i_reserved_quota = 0;
 932#endif
 933        ei->jinode = NULL;
 934        INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
 935        spin_lock_init(&ei->i_completed_io_lock);
 936        ei->i_sync_tid = 0;
 937        ei->i_datasync_tid = 0;
 938        atomic_set(&ei->i_unwritten, 0);
 939        INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
 940
 941        return &ei->vfs_inode;
 942}
 943
 944static int ext4_drop_inode(struct inode *inode)
 945{
 946        int drop = generic_drop_inode(inode);
 947
 948        trace_ext4_drop_inode(inode, drop);
 949        return drop;
 950}
 951
 952static void ext4_i_callback(struct rcu_head *head)
 953{
 954        struct inode *inode = container_of(head, struct inode, i_rcu);
 955        kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
 956}
 957
 958static void ext4_destroy_inode(struct inode *inode)
 959{
 960        if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
 961                ext4_msg(inode->i_sb, KERN_ERR,
 962                         "Inode %lu (%p): orphan list check failed!",
 963                         inode->i_ino, EXT4_I(inode));
 964                print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
 965                                EXT4_I(inode), sizeof(struct ext4_inode_info),
 966                                true);
 967                dump_stack();
 968        }
 969        call_rcu(&inode->i_rcu, ext4_i_callback);
 970}
 971
 972static void init_once(void *foo)
 973{
 974        struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
 975
 976        INIT_LIST_HEAD(&ei->i_orphan);
 977        init_rwsem(&ei->xattr_sem);
 978        init_rwsem(&ei->i_data_sem);
 979        init_rwsem(&ei->i_mmap_sem);
 980        inode_init_once(&ei->vfs_inode);
 981}
 982
 983static int __init init_inodecache(void)
 984{
 985        ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
 986                                             sizeof(struct ext4_inode_info),
 987                                             0, (SLAB_RECLAIM_ACCOUNT|
 988                                                SLAB_MEM_SPREAD|SLAB_ACCOUNT),
 989                                             init_once);
 990        if (ext4_inode_cachep == NULL)
 991                return -ENOMEM;
 992        return 0;
 993}
 994
 995static void destroy_inodecache(void)
 996{
 997        /*
 998         * Make sure all delayed rcu free inodes are flushed before we
 999         * destroy cache.
1000         */

1001        rcu_barrier();
1002        kmem_cache_destroy(ext4_inode_cachep);
1003}
1004
1005void ext4_clear_inode(struct inode *inode)
1006{
1007        invalidate_inode_buffers(inode);
1008        clear_inode(inode);
1009        dquot_drop(inode);
1010        ext4_discard_preallocations(inode);
1011        ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
1012        ext4_es_lru_del(inode);
1013        if (EXT4_I(inode)->jinode) {
1014                jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
1015                                               EXT4_I(inode)->jinode);
1016                jbd2_free_inode(EXT4_I(inode)->jinode);
1017                EXT4_I(inode)->jinode = NULL;
1018        }
1019}
1020
1021static struct inode *ext4_nfs_get_inode(struct super_block *sb,
1022                                        u64 ino, u32 generation)
1023{
1024        struct inode *inode;
1025
1026        if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
1027                return ERR_PTR(-ESTALE);
1028        if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
1029                return ERR_PTR(-ESTALE);
1030
1031        /* iget isn't really right if the inode is currently unallocated!!
1032         *
1033         * ext4_read_inode will return a bad_inode if the inode had been
1034         * deleted, so we should be safe.
1035         *
1036         * Currently we don't know the generation for parent directory, so
1037         * a generation of 0 means "accept any"
1038         */
1039        inode = ext4_iget_normal(sb, ino);
1040        if (IS_ERR(inode))
1041                return ERR_CAST(inode);
1042        if (generation && inode->i_generation != generation) {
1043                iput(inode);
1044                return ERR_PTR(-ESTALE);
1045        }
1046
1047        return inode;
1048}
1049
1050static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
1051                                        int fh_len, int fh_type)
1052{
1053        return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1054                                    ext4_nfs_get_inode);
1055}
1056
1057static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
1058                                        int fh_len, int fh_type)
1059{
1060        return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1061                                    ext4_nfs_get_inode);
1062}
1063
1064/*
1065 * Try to release metadata pages (indirect blocks, directories) which are
1066 * mapped via the block device.  Since these pages could have journal heads
1067 * which would prevent try_to_free_buffers() from freeing them, we must use
1068 * jbd2 layer's try_to_free_buffers() function to release them.
1069 */
1070static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
1071                                 gfp_t wait)
1072{
1073        journal_t *journal = EXT4_SB(sb)->s_journal;
1074
1075        WARN_ON(PageChecked(page));
1076        if (!page_has_buffers(page))
1077                return 0;
1078        if (journal)
1079                return jbd2_journal_try_to_free_buffers(journal, page,
1080                                                        wait & ~__GFP_WAIT);
1081        return try_to_free_buffers(page);
1082}
1083
1084#ifdef CONFIG_QUOTA
1085#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
1086#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
1087
1088static int ext4_write_dquot(struct dquot *dquot);
1089static int ext4_acquire_dquot(struct dquot *dquot);
1090static int ext4_release_dquot(struct dquot *dquot);
1091static int ext4_mark_dquot_dirty(struct dquot *dquot);
1092static int ext4_write_info(struct super_block *sb, int type);
1093static int ext4_quota_on(struct super_block *sb, int type, int format_id,
1094                         struct path *path);
1095static int ext4_quota_on_sysfile(struct super_block *sb, int type,
1096                                 int format_id);
1097static int ext4_quota_off_sysfile(struct super_block *sb, int type);
1098static int ext4_quota_on_mount(struct super_block *sb, int type);
1099static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
1100                               size_t len, loff_t off);
1101static ssize_t ext4_quota_write(struct super_block *sb, int type,
1102                                const char *data, size_t len, loff_t off);
1103static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
1104                             unsigned int flags);
1105static int ext4_enable_quotas(struct super_block *sb);
1106
1107static const struct dquot_operations ext4_quota_operations = {
1108        .get_reserved_space = ext4_get_reserved_space,
1109        .write_dquot    = ext4_write_dquot,
1110        .acquire_dquot  = ext4_acquire_dquot,
1111        .release_dquot  = ext4_release_dquot,
1112        .mark_dirty     = ext4_mark_dquot_dirty,
1113        .write_info     = ext4_write_info,
1114        .alloc_dquot    = dquot_alloc,
1115        .destroy_dquot  = dquot_destroy,
1116};
1117
1118static const struct quotactl_ops ext4_qctl_operations = {
1119        .quota_on       = ext4_quota_on,
1120        .quota_off      = ext4_quota_off,
1121        .quota_sync     = dquot_quota_sync,
1122        .get_info       = dquot_get_dqinfo,
1123        .set_info       = dquot_set_dqinfo,
1124        .get_dqblk      = dquot_get_dqblk,
1125        .set_dqblk      = dquot_set_dqblk
1126};
1127
1128static const struct quotactl_ops ext4_qctl_sysfile_operations = {
1129        .quota_on_meta  = ext4_quota_on_sysfile,
1130        .quota_off      = ext4_quota_off_sysfile,
1131        .quota_sync     = dquot_quota_sync,
1132        .get_info       = dquot_get_dqinfo,
1133        .set_info       = dquot_set_dqinfo,
1134        .get_dqblk      = dquot_get_dqblk,
1135        .set_dqblk      = dquot_set_dqblk
1136};
1137#endif
1138
1139static const struct super_operations ext4_sops = {
1140        .alloc_inode    = ext4_alloc_inode,
1141        .destroy_inode  = ext4_destroy_inode,
1142        .write_inode    = ext4_write_inode,
1143        .dirty_inode    = ext4_dirty_inode,
1144        .drop_inode     = ext4_drop_inode,
1145        .evict_inode    = ext4_evict_inode,
1146        .put_super      = ext4_put_super,
1147        .sync_fs        = ext4_sync_fs,
1148        .freeze_fs      = ext4_freeze,
1149        .unfreeze_fs    = ext4_unfreeze,
1150        .statfs         = ext4_statfs,
1151        .remount_fs     = ext4_remount,
1152        .show_options   = ext4_show_options,
1153#ifdef CONFIG_QUOTA
1154        .quota_read     = ext4_quota_read,
1155        .quota_write    = ext4_quota_write,
1156#endif
1157        .bdev_try_to_free_page = bdev_try_to_free_page,
1158};
1159
1160static const struct super_operations ext4_nojournal_sops = {
1161        .alloc_inode    = ext4_alloc_inode,
1162        .destroy_inode  = ext4_destroy_inode,
1163        .write_inode    = ext4_write_inode,
1164        .dirty_inode    = ext4_dirty_inode,
1165        .drop_inode     = ext4_drop_inode,
1166        .evict_inode    = ext4_evict_inode,
1167        .sync_fs        = ext4_sync_fs_nojournal,
1168        .put_super      = ext4_put_super,
1169        .statfs         = ext4_statfs,
1170        .remount_fs     = ext4_remount,
1171        .show_options   = ext4_show_options,
1172#ifdef CONFIG_QUOTA
1173        .quota_read     = ext4_quota_read,
1174        .quota_write    = ext4_quota_write,
1175#endif
1176        .bdev_try_to_free_page = bdev_try_to_free_page,
1177};
1178
1179static const struct export_operations ext4_export_ops = {
1180        .fh_to_dentry = ext4_fh_to_dentry,
1181        .fh_to_parent = ext4_fh_to_parent,
1182        .get_parent = ext4_get_parent,
1183};
1184
1185enum {
1186        Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1187        Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
1188        Opt_nouid32, Opt_debug, Opt_removed,
1189        Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
1190        Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
1191        Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
1192        Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
1193        Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1194        Opt_data_err_abort, Opt_data_err_ignore,
1195        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1196        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
1197        Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
1198        Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax,
1199        Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
1200        Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1201        Opt_inode_readahead_blks, Opt_journal_ioprio,
1202        Opt_dioread_nolock, Opt_dioread_lock,
1203        Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1204        Opt_max_dir_size_kb, Opt_nojournal_checksum,
1205};
1206
1207static const match_table_t tokens = {
1208        {Opt_bsd_df, "bsddf"},
1209        {Opt_minix_df, "minixdf"},
1210        {Opt_grpid, "grpid"},
1211        {Opt_grpid, "bsdgroups"},
1212        {Opt_nogrpid, "nogrpid"},
1213        {Opt_nogrpid, "sysvgroups"},
1214        {Opt_resgid, "resgid=%u"},
1215        {Opt_resuid, "resuid=%u"},
1216        {Opt_sb, "sb=%u"},
1217        {Opt_err_cont, "errors=continue"},
1218        {Opt_err_panic, "errors=panic"},
1219        {Opt_err_ro, "errors=remount-ro"},
1220        {Opt_nouid32, "nouid32"},
1221        {Opt_debug, "debug"},
1222        {Opt_removed, "oldalloc"},
1223        {Opt_removed, "orlov"},
1224        {Opt_user_xattr, "user_xattr"},
1225        {Opt_nouser_xattr, "nouser_xattr"},
1226        {Opt_acl, "acl"},
1227        {Opt_noacl, "noacl"},
1228        {Opt_noload, "norecovery"},
1229        {Opt_noload, "noload"},
1230        {Opt_removed, "nobh"},
1231        {Opt_removed, "bh"},
1232        {Opt_commit, "commit=%u"},
1233        {Opt_min_batch_time, "min_batch_time=%u"},
1234        {Opt_max_batch_time, "max_batch_time=%u"},
1235        {Opt_journal_dev, "journal_dev=%u"},
1236        {Opt_journal_path, "journal_path=%s"},
1237        {Opt_journal_checksum, "journal_checksum"},
1238        {Opt_nojournal_checksum, "nojournal_checksum"},
1239        {Opt_journal_async_commit, "journal_async_commit"},
1240        {Opt_abort, "abort"},
1241        {Opt_data_journal, "data=journal"},
1242        {Opt_data_ordered, "data=ordered"},
1243        {Opt_data_writeback, "data=writeback"},
1244        {Opt_data_err_abort, "data_err=abort"},
1245        {Opt_data_err_ignore, "data_err=ignore"},
1246        {Opt_offusrjquota, "usrjquota="},
1247        {Opt_usrjquota, "usrjquota=%s"},
1248        {Opt_offgrpjquota, "grpjquota="},
1249        {Opt_grpjquota, "grpjquota=%s"},
1250        {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1251        {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1252        {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
1253        {Opt_grpquota, "grpquota"},
1254        {Opt_noquota, "noquota"},
1255        {Opt_quota, "quota"},
1256        {Opt_usrquota, "usrquota"},
1257        {Opt_barrier, "barrier=%u"},
1258        {Opt_barrier, "barrier"},
1259        {Opt_nobarrier, "nobarrier"},
1260        {Opt_i_version, "i_version"},
1261        {Opt_dax, "dax"},
1262        {Opt_stripe, "stripe=%u"},
1263        {Opt_delalloc, "delalloc"},
1264        {Opt_nodelalloc, "nodelalloc"},
1265        {Opt_removed, "mblk_io_submit"},
1266        {Opt_removed, "nomblk_io_submit"},
1267        {Opt_block_validity, "block_validity"},
1268        {Opt_noblock_validity, "noblock_validity"},
1269        {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1270        {Opt_journal_ioprio, "journal_ioprio=%u"},
1271        {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1272        {Opt_auto_da_alloc, "auto_da_alloc"},
1273        {Opt_noauto_da_alloc, "noauto_da_alloc"},
1274        {Opt_dioread_nolock, "dioread_nolock"},
1275        {Opt_dioread_lock, "dioread_lock"},
1276        {Opt_discard, "discard"},
1277        {Opt_nodiscard, "nodiscard"},
1278        {Opt_init_itable, "init_itable=%u"},
1279        {Opt_init_itable, "init_itable"},
1280        {Opt_noinit_itable, "noinit_itable"},
1281        {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
1282        {Opt_removed, "check=none"},    /* mount option from ext2/3 */
1283        {Opt_removed, "nocheck"},       /* mount option from ext2/3 */
1284        {Opt_removed, "reservation"},   /* mount option from ext2/3 */
1285        {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
1286        {Opt_removed, "journal=%u"},    /* mount option from ext2/3 */
1287        {Opt_err, NULL},
1288};
1289
1290static ext4_fsblk_t get_sb_block(void **data)
1291{
1292        ext4_fsblk_t    sb_block;
1293        char            *options = (char *) *data;
1294
1295        if (!options || strncmp(options, "sb=", 3) != 0)
1296                return 1;       /* Default location */
1297
1298        options += 3;
1299        /* TODO: use simple_strtoll with >32bit ext4 */
1300        sb_block = simple_strtoul(options, &options, 0);
1301        if (*options && *options != ',') {
1302                printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
1303                       (char *) *data);
1304                return 1;
1305        }
1306        if (*options == ',')
1307                options++;
1308        *data = (void *) options;
1309
1310        return sb_block;
1311}
1312
1313#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1314static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n"
1315        "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
1316
1317#ifdef CONFIG_QUOTA
1318static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
1319{
1320        struct ext4_sb_info *sbi = EXT4_SB(sb);
1321        char *qname;
1322        int ret = -1;
1323
1324        if (sb_any_quota_loaded(sb) &&
1325                !sbi->s_qf_names[qtype]) {
1326                ext4_msg(sb, KERN_ERR,
1327                        "Cannot change journaled "
1328                        "quota options when quota turned on");
1329                return -1;
1330        }
1331        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
1332                ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options "
1333                         "when QUOTA feature is enabled");
1334                return -1;
1335        }
1336        qname = match_strdup(args);
1337        if (!qname) {
1338                ext4_msg(sb, KERN_ERR,
1339                        "Not enough memory for storing quotafile name");
1340                return -1;
1341        }
1342        if (sbi->s_qf_names[qtype]) {
1343                if (strcmp(sbi->s_qf_names[qtype], qname) == 0)
1344                        ret = 1;
1345                else
1346                        ext4_msg(sb, KERN_ERR,
1347                                 "%s quota file already specified",
1348                                 QTYPE2NAME(qtype));
1349                goto errout;
1350        }
1351        if (strchr(qname, '/')) {
1352                ext4_msg(sb, KERN_ERR,
1353                        "quotafile must be on filesystem root");
1354                goto errout;
1355        }
1356        sbi->s_qf_names[qtype] = qname;
1357        set_opt(sb, QUOTA);
1358        return 1;
1359errout:
1360        kfree(qname);
1361        return ret;
1362}
1363
1364static int clear_qf_name(struct super_block *sb, int qtype)
1365{
1366
1367        struct ext4_sb_info *sbi = EXT4_SB(sb);
1368
1369        if (sb_any_quota_loaded(sb) &&
1370                sbi->s_qf_names[qtype]) {
1371                ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
1372                        " when quota turned on");
1373                return -1;
1374        }
1375        kfree(sbi->s_qf_names[qtype]);
1376        sbi->s_qf_names[qtype] = NULL;
1377        return 1;
1378}
1379#endif
1380
1381#define MOPT_SET        0x0001
1382#define MOPT_CLEAR      0x0002
1383#define MOPT_NOSUPPORT  0x0004
1384#define MOPT_EXPLICIT   0x0008
1385#define MOPT_CLEAR_ERR  0x0010
1386#define MOPT_GTE0       0x0020
1387#ifdef CONFIG_QUOTA
1388#define MOPT_Q          0
1389#define MOPT_QFMT       0x0040
1390#else
1391#define MOPT_Q          MOPT_NOSUPPORT
1392#define MOPT_QFMT       MOPT_NOSUPPORT
1393#endif
1394#define MOPT_DATAJ      0x0080
1395#define MOPT_NO_EXT2    0x0100
1396#define MOPT_NO_EXT3    0x0200
1397#define MOPT_EXT4_ONLY  (MOPT_NO_EXT2 | MOPT_NO_EXT3)
1398#define MOPT_STRING     0x0400
1399
1400static const struct mount_opts {
1401        int     token;
1402        int     mount_opt;
1403        int     flags;
1404} ext4_mount_opts[] = {
1405        {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
1406        {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
1407        {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
1408        {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
1409        {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
1410        {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
1411        {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
1412         MOPT_EXT4_ONLY | MOPT_SET},
1413        {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
1414         MOPT_EXT4_ONLY | MOPT_CLEAR},
1415        {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
1416        {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
1417        {Opt_delalloc, EXT4_MOUNT_DELALLOC,
1418         MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1419        {Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
1420         MOPT_EXT4_ONLY | MOPT_CLEAR},
1421        {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1422         MOPT_EXT4_ONLY | MOPT_CLEAR},
1423        {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1424         MOPT_EXT4_ONLY | MOPT_SET},
1425        {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
1426                                    EXT4_MOUNT_JOURNAL_CHECKSUM),
1427         MOPT_EXT4_ONLY | MOPT_SET},
1428        {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
1429        {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR},
1430        {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
1431        {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
1432        {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
1433         MOPT_NO_EXT2},
1434        {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
1435         MOPT_NO_EXT2},
1436        {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
1437        {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
1438        {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
1439        {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
1440        {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
1441        {Opt_commit, 0, MOPT_GTE0},
1442        {Opt_max_batch_time, 0, MOPT_GTE0},
1443        {Opt_min_batch_time, 0, MOPT_GTE0},
1444        {Opt_inode_readahead_blks, 0, MOPT_GTE0},
1445        {Opt_init_itable, 0, MOPT_GTE0},
1446        {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET | MOPT_EXT4_ONLY},
1447        {Opt_stripe, 0, MOPT_GTE0},
1448        {Opt_resuid, 0, MOPT_GTE0},
1449        {Opt_resgid, 0, MOPT_GTE0},
1450        {Opt_journal_dev, 0, MOPT_NO_EXT2 | MOPT_GTE0},
1451        {Opt_journal_path, 0, MOPT_NO_EXT2 | MOPT_STRING},
1452        {Opt_journal_ioprio, 0, MOPT_NO_EXT2 | MOPT_GTE0},
1453        {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1454        {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1455        {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
1456         MOPT_NO_EXT2 | MOPT_DATAJ},
1457        {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1458        {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
1459#ifdef CONFIG_EXT4_FS_POSIX_ACL
1460        {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
1461        {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
1462#else
1463        {Opt_acl, 0, MOPT_NOSUPPORT},
1464        {Opt_noacl, 0, MOPT_NOSUPPORT},
1465#endif
1466        {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
1467        {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
1468        {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
1469        {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
1470                                                        MOPT_SET | MOPT_Q},
1471        {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
1472                                                        MOPT_SET | MOPT_Q},
1473        {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
1474                       EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q},
1475        {Opt_usrjquota, 0, MOPT_Q},
1476        {Opt_grpjquota, 0, MOPT_Q},
1477        {Opt_offusrjquota, 0, MOPT_Q},
1478        {Opt_offgrpjquota, 0, MOPT_Q},
1479        {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
1480        {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
1481        {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
1482        {Opt_max_dir_size_kb, 0, MOPT_GTE0},
1483        {Opt_err, 0, 0}
1484};
1485
1486static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1487                            substring_t *args, unsigned long *journal_devnum,
1488                            unsigned int *journal_ioprio, int is_remount)
1489{
1490        struct ext4_sb_info *sbi = EXT4_SB(sb);
1491        const struct mount_opts *m;
1492        kuid_t uid;
1493        kgid_t gid;
1494        int arg = 0;
1495
1496#ifdef CONFIG_QUOTA
1497        if (token == Opt_usrjquota)
1498                return set_qf_name(sb, USRQUOTA, &args[0]);
1499        else if (token == Opt_grpjquota)
1500                return set_qf_name(sb, GRPQUOTA, &args[0]);
1501        else if (token == Opt_offusrjquota)
1502                return clear_qf_name(sb, USRQUOTA);
1503        else if (token == Opt_offgrpjquota)
1504                return clear_qf_name(sb, GRPQUOTA);
1505#endif
1506        switch (token) {
1507        case Opt_noacl:
1508        case Opt_nouser_xattr:
1509                ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5");
1510                break;
1511        case Opt_sb:
1512                return 1;       /* handled by get_sb_block() */
1513        case Opt_removed:
1514                ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
1515                return 1;
1516        case Opt_abort:
1517                sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
1518                return 1;
1519        case Opt_i_version:
1520                sb->s_flags |= MS_I_VERSION;
1521                return 1;
1522        }
1523
1524        for (m = ext4_mount_opts; m->token != Opt_err; m++)
1525                if (token == m->token)
1526                        break;
1527
1528        if (m->token == Opt_err) {
1529                ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
1530                         "or missing value", opt);
1531                return -1;
1532        }
1533
1534        if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
1535                ext4_msg(sb, KERN_ERR,
1536                         "Mount option \"%s\" incompatible with ext2", opt);
1537                return -1;
1538        }
1539        if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
1540                ext4_msg(sb, KERN_ERR,
1541                         "Mount option \"%s\" incompatible with ext3", opt);
1542                return -1;
1543        }
1544
1545        if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg))
1546                return -1;
1547        if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
1548                return -1;
1549        if (m->flags & MOPT_EXPLICIT)
1550                set_opt2(sb, EXPLICIT_DELALLOC);
1551        if (m->flags & MOPT_CLEAR_ERR)
1552                clear_opt(sb, ERRORS_MASK);
1553        if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
1554                ext4_msg(sb, KERN_ERR, "Cannot change quota "
1555                         "options when quota turned on");
1556                return -1;
1557        }
1558
1559        if (m->flags & MOPT_NOSUPPORT) {
1560                ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
1561        } else if (token == Opt_commit) {
1562                if (arg == 0)
1563                        arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
1564                sbi->s_commit_interval = HZ * arg;
1565        } else if (token == Opt_max_batch_time) {
1566                sbi->s_max_batch_time = arg;
1567        } else if (token == Opt_min_batch_time) {
1568                sbi->s_min_batch_time = arg;
1569        } else if (token == Opt_inode_readahead_blks) {
1570                if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) {
1571                        ext4_msg(sb, KERN_ERR,
1572                                 "EXT4-fs: inode_readahead_blks must be "
1573                                 "0 or a power of 2 smaller than 2^31");
1574                        return -1;
1575                }
1576                sbi->s_inode_readahead_blks = arg;
1577        } else if (token == Opt_init_itable) {
1578                set_opt(sb, INIT_INODE_TABLE);
1579                if (!args->from)
1580                        arg = EXT4_DEF_LI_WAIT_MULT;
1581                sbi->s_li_wait_mult = arg;
1582        } else if (token == Opt_max_dir_size_kb) {
1583                sbi->s_max_dir_size_kb = arg;
1584        } else if (token == Opt_stripe) {
1585                sbi->s_stripe = arg;
1586        } else if (token == Opt_resuid) {
1587                uid = make_kuid(current_user_ns(), arg);
1588                if (!uid_valid(uid)) {
1589                        ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg);
1590                        return -1;
1591                }
1592                sbi->s_resuid = uid;
1593        } else if (token == Opt_resgid) {
1594                gid = make_kgid(current_user_ns(), arg);
1595                if (!gid_valid(gid)) {
1596                        ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg);
1597                        return -1;
1598                }
1599                sbi->s_resgid = gid;
1600        } else if (token == Opt_journal_dev) {
1601                if (is_remount) {
1602                        ext4_msg(sb, KERN_ERR,
1603                                 "Cannot specify journal on remount");
1604                        return -1;
1605                }
1606                *journal_devnum = arg;
1607        } else if (token == Opt_journal_path) {
1608                char *journal_path;
1609                struct inode *journal_inode;
1610                struct path path;
1611                int error;
1612
1613                if (is_remount) {
1614                        ext4_msg(sb, KERN_ERR,
1615                                 "Cannot specify journal on remount");
1616                        return -1;
1617                }
1618                journal_path = match_strdup(&args[0]);
1619                if (!journal_path) {
1620                        ext4_msg(sb, KERN_ERR, "error: could not dup "
1621                                "journal device string");
1622                        return -1;
1623                }
1624
1625                error = kern_path(journal_path, LOOKUP_FOLLOW, &path);
1626                if (error) {
1627                        ext4_msg(sb, KERN_ERR, "error: could not find "
1628                                "journal device path: error %d", error);
1629                        kfree(journal_path);
1630                        return -1;
1631                }
1632
1633                journal_inode = path.dentry->d_inode;
1634                if (!S_ISBLK(journal_inode->i_mode)) {
1635                        ext4_msg(sb, KERN_ERR, "error: journal path %s "
1636                                "is not a block device", journal_path);
1637                        path_put(&path);
1638                        kfree(journal_path);
1639                        return -1;
1640                }
1641
1642                *journal_devnum = new_encode_dev(journal_inode->i_rdev);
1643                path_put(&path);
1644                kfree(journal_path);
1645        } else if (token == Opt_journal_ioprio) {
1646                if (arg > 7) {
1647                        ext4_msg(sb, KERN_ERR, "Invalid journal IO priority"
1648                                 " (must be 0-7)");
1649                        return -1;
1650                }
1651                *journal_ioprio =
1652                        IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
1653        } else if (m->flags & MOPT_DATAJ) {
1654                if (is_remount) {
1655                        if (!sbi->s_journal)
1656                                ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
1657                        else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) {
1658                                ext4_msg(sb, KERN_ERR,
1659                                         "Cannot change data mode on remount");
1660                                return -1;
1661                        }
1662                } else {
1663                        clear_opt(sb, DATA_FLAGS);
1664                        sbi->s_mount_opt |= m->mount_opt;
1665                }
1666#ifdef CONFIG_QUOTA
1667        } else if (m->flags & MOPT_QFMT) {
1668                if (sb_any_quota_loaded(sb) &&
1669                    sbi->s_jquota_fmt != m->mount_opt) {
1670                        ext4_msg(sb, KERN_ERR, "Cannot change journaled "
1671                                 "quota options when quota turned on");
1672                        return -1;
1673                }
1674                if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
1675                                               EXT4_FEATURE_RO_COMPAT_QUOTA)) {
1676                        ext4_msg(sb, KERN_ERR,
1677                                 "Cannot set journaled quota options "
1678                                 "when QUOTA feature is enabled");
1679                        return -1;
1680                }
1681                sbi->s_jquota_fmt = m->mount_opt;
1682#endif
1683        } else if (token == Opt_dax) {
1684#ifdef CONFIG_FS_DAX
1685                ext4_msg(sb, KERN_WARNING,
1686                "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
1687                        sbi->s_mount_opt |= m->mount_opt;
1688#else
1689                ext4_msg(sb, KERN_INFO, "dax option not supported");
1690                return -1;
1691#endif
1692        } else if (token == Opt_data_err_abort) {
1693                sbi->s_mount_opt |= m->mount_opt;
1694        } else if (token == Opt_data_err_ignore) {
1695                sbi->s_mount_opt &= ~m->mount_opt;
1696        } else {
1697                if (!args->from)
1698                        arg = 1;
1699                if (m->flags & MOPT_CLEAR)
1700                        arg = !arg;
1701                else if (unlikely(!(m->flags & MOPT_SET))) {
1702                        ext4_msg(sb, KERN_WARNING,
1703                                 "buggy handling of option %s", opt);
1704                        WARN_ON(1);
1705                        return -1;
1706                }
1707                if (arg != 0)
1708                        sbi->s_mount_opt |= m->mount_opt;
1709                else
1710                        sbi->s_mount_opt &= ~m->mount_opt;
1711        }
1712        return 1;
1713}
1714
1715static int parse_options(char *options, struct super_block *sb,
1716                         unsigned long *journal_devnum,
1717                         unsigned int *journal_ioprio,
1718                         int is_remount)
1719{
1720        struct ext4_sb_info *sbi = EXT4_SB(sb);
1721        char *p;
1722        substring_t args[MAX_OPT_ARGS];
1723        int token;
1724
1725        if (!options)
1726                return 1;
1727
1728        while ((p = strsep(&options, ",")) != NULL) {
1729                if (!*p)
1730                        continue;
1731                /*
1732                 * Initialize args struct so we know whether arg was
1733                 * found; some options take optional arguments.
1734                 */
1735                args[0].to = args[0].from = NULL;
1736                token = match_token(p, tokens, args);
1737                if (handle_mount_opt(sb, p, token, args, journal_devnum,
1738                                     journal_ioprio, is_remount) < 0)
1739                        return 0;
1740        }
1741#ifdef CONFIG_QUOTA
1742        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
1743            (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) {
1744                ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA "
1745                         "feature is enabled");
1746                return 0;
1747        }
1748        if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1749                if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
1750                        clear_opt(sb, USRQUOTA);
1751
1752                if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
1753                        clear_opt(sb, GRPQUOTA);
1754
1755                if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
1756                        ext4_msg(sb, KERN_ERR, "old and new quota "
1757                                        "format mixing");
1758                        return 0;
1759                }
1760
1761                if (!sbi->s_jquota_fmt) {
1762                        ext4_msg(sb, KERN_ERR, "journaled quota format "
1763                                        "not specified");
1764                        return 0;
1765                }
1766        }
1767#endif
1768        if (test_opt(sb, DIOREAD_NOLOCK)) {
1769                int blocksize =
1770                        BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
1771
1772                if (blocksize < PAGE_CACHE_SIZE) {
1773                        ext4_msg(sb, KERN_ERR, "can't mount with "
1774                                 "dioread_nolock if block size != PAGE_SIZE");
1775                        return 0;
1776                }
1777        }
1778        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
1779            test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
1780                ext4_msg(sb, KERN_ERR, "can't mount with journal_async_commit "
1781                         "in data=ordered mode");
1782                return 0;
1783        }
1784        return 1;
1785}
1786
1787static inline void ext4_show_quota_options(struct seq_file *seq,
1788                                           struct super_block *sb)
1789{
1790#if defined(CONFIG_QUOTA)
1791        struct ext4_sb_info *sbi = EXT4_SB(sb);
1792
1793        if (sbi->s_jquota_fmt) {
1794                char *fmtname = "";
1795
1796                switch (sbi->s_jquota_fmt) {
1797                case QFMT_VFS_OLD:
1798                        fmtname = "vfsold";
1799                        break;
1800                case QFMT_VFS_V0:
1801                        fmtname = "vfsv0";
1802                        break;
1803                case QFMT_VFS_V1:
1804                        fmtname = "vfsv1";
1805                        break;
1806                }
1807                seq_printf(seq, ",jqfmt=%s", fmtname);
1808        }
1809
1810        if (sbi->s_qf_names[USRQUOTA])
1811                seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]);
1812
1813        if (sbi->s_qf_names[GRPQUOTA])
1814                seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]);
1815#endif
1816}
1817
1818static const char *token2str(int token)
1819{
1820        const struct match_token *t;
1821
1822        for (t = tokens; t->token != Opt_err; t++)
1823                if (t->token == token && !strchr(t->pattern, '='))
1824                        break;
1825        return t->pattern;
1826}
1827
1828/*
1829 * Show an option if
1830 *  - it's set to a non-default value OR
1831 *  - if the per-sb default is different from the global default
1832 */
1833static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
1834                              int nodefs)
1835{
1836        struct ext4_sb_info *sbi = EXT4_SB(sb);
1837        struct ext4_super_block *es = sbi->s_es;
1838        int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt;
1839        const struct mount_opts *m;
1840        char sep = nodefs ? '\n' : ',';
1841
1842#define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
1843#define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
1844
1845        if (sbi->s_sb_block != 1)
1846                SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
1847
1848        for (m = ext4_mount_opts; m->token != Opt_err; m++) {
1849                int want_set = m->flags & MOPT_SET;
1850                if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
1851                    (m->flags & MOPT_CLEAR_ERR))
1852                        continue;
1853                if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
1854                        continue; /* skip if same as the default */
1855                if ((want_set &&
1856                     (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
1857                    (!want_set && (sbi->s_mount_opt & m->mount_opt)))
1858                        continue; /* select Opt_noFoo vs Opt_Foo */
1859                SEQ_OPTS_PRINT("%s", token2str(m->token));
1860        }
1861
1862        if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
1863            le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
1864                SEQ_OPTS_PRINT("resuid=%u",
1865                                from_kuid_munged(&init_user_ns, sbi->s_resuid));
1866        if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
1867            le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
1868                SEQ_OPTS_PRINT("resgid=%u",
1869                                from_kgid_munged(&init_user_ns, sbi->s_resgid));
1870        def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
1871        if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
1872                SEQ_OPTS_PUTS("errors=remount-ro");
1873        if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
1874                SEQ_OPTS_PUTS("errors=continue");
1875        if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
1876                SEQ_OPTS_PUTS("errors=panic");
1877        if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
1878                SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
1879        if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
1880                SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
1881        if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
1882                SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
1883        if (sb->s_flags & MS_I_VERSION)
1884                SEQ_OPTS_PUTS("i_version");
1885        if (nodefs || sbi->s_stripe)
1886                SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
1887        if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) {
1888                if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
1889                        SEQ_OPTS_PUTS("data=journal");
1890                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
1891                        SEQ_OPTS_PUTS("data=ordered");
1892                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
1893                        SEQ_OPTS_PUTS("data=writeback");
1894        }
1895        if (nodefs ||
1896            sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
1897                SEQ_OPTS_PRINT("inode_readahead_blks=%u",
1898                               sbi->s_inode_readahead_blks);
1899
1900        if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
1901                       (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
1902                SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
1903        if (nodefs || sbi->s_max_dir_size_kb)
1904                SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
1905        if (test_opt(sb, DATA_ERR_ABORT))
1906                SEQ_OPTS_PUTS("data_err=abort");
1907
1908        ext4_show_quota_options(seq, sb);
1909        return 0;
1910}
1911
1912static int ext4_show_options(struct seq_file *seq, struct dentry *root)
1913{
1914        return _ext4_show_options(seq, root->d_sb, 0);
1915}
1916
1917static int options_seq_show(struct seq_file *seq, void *offset)
1918{
1919        struct super_block *sb = seq->private;
1920        int rc;
1921
1922        seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw");
1923        rc = _ext4_show_options(seq, sb, 1);
1924        seq_puts(seq, "\n");
1925        return rc;
1926}
1927
1928static int options_open_fs(struct inode *inode, struct file *file)
1929{
1930        return single_open(file, options_seq_show, PDE_DATA(inode));
1931}
1932
1933static const struct file_operations ext4_seq_options_fops = {
1934        .owner = THIS_MODULE,
1935        .open = options_open_fs,
1936        .read = seq_read,
1937        .llseek = seq_lseek,
1938        .release = single_release,
1939};
1940
1941static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1942                            int read_only)
1943{
1944        struct ext4_sb_info *sbi = EXT4_SB(sb);
1945        int res = 0;
1946
1947        if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
1948                ext4_msg(sb, KERN_ERR, "revision level too high, "
1949                         "forcing read-only mode");
1950                res = MS_RDONLY;
1951        }
1952        if (read_only)
1953                goto done;
1954        if (!(sbi->s_mount_state & EXT4_VALID_FS))
1955                ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
1956                         "running e2fsck is recommended");
1957        else if (sbi->s_mount_state & EXT4_ERROR_FS)
1958                ext4_msg(sb, KERN_WARNING,
1959                         "warning: mounting fs with errors, "
1960                         "running e2fsck is recommended");
1961        else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
1962                 le16_to_cpu(es->s_mnt_count) >=
1963                 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1964                ext4_msg(sb, KERN_WARNING,
1965                         "warning: maximal mount count reached, "
1966                         "running e2fsck is recommended");
1967        else if (le32_to_cpu(es->s_checkinterval) &&
1968                (le32_to_cpu(es->s_lastcheck) +
1969                        le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1970                ext4_msg(sb, KERN_WARNING,
1971                         "warning: checktime reached, "
1972                         "running e2fsck is recommended");
1973        if (!sbi->s_journal)
1974                es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1975        if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1976                es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
1977        le16_add_cpu(&es->s_mnt_count, 1);
1978        es->s_mtime = cpu_to_le32(get_seconds());
1979        ext4_update_dynamic_rev(sb);
1980        if (sbi->s_journal)
1981                EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
1982
1983        ext4_commit_super(sb, 1);
1984done:
1985        if (test_opt(sb, DEBUG))
1986                printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1987                                "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
1988                        sb->s_blocksize,
1989                        sbi->s_groups_count,
1990                        EXT4_BLOCKS_PER_GROUP(sb),
1991                        EXT4_INODES_PER_GROUP(sb),
1992                        sbi->s_mount_opt, sbi->s_mount_opt2);
1993
1994        cleancache_init_fs(sb);
1995        return res;
1996}
1997
1998int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
1999{
2000        struct ext4_sb_info *sbi = EXT4_SB(sb);

2001        struct flex_groups *new_groups;
2002        int size;
2003
2004        if (!sbi->s_log_groups_per_flex)
2005                return 0;
2006
2007        size = ext4_flex_group(sbi, ngroup - 1) + 1;
2008        if (size <= sbi->s_flex_groups_allocated)
2009                return 0;
2010
2011        size = roundup_pow_of_two(size * sizeof(struct flex_groups));
2012        new_groups = kvzalloc(size, GFP_KERNEL);
2013        if (!new_groups) {
2014                ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups",
2015                         size / (int) sizeof(struct flex_groups));
2016                return -ENOMEM;
2017        }
2018
2019        if (sbi->s_flex_groups) {
2020                memcpy(new_groups, sbi->s_flex_groups,
2021                       (sbi->s_flex_groups_allocated *
2022                        sizeof(struct flex_groups)));
2023                ext4_kvfree(sbi->s_flex_groups);
2024        }
2025        sbi->s_flex_groups = new_groups;
2026        sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
2027        return 0;
2028}
2029
2030static int ext4_fill_flex_info(struct super_block *sb)
2031{
2032        struct ext4_sb_info *sbi = EXT4_SB(sb);
2033        struct ext4_group_desc *gdp = NULL;
2034        ext4_group_t flex_group;
2035        int i, err;
2036
2037        sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
2038        if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
2039                sbi->s_log_groups_per_flex = 0;
2040                return 1;
2041        }
2042
2043        err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
2044        if (err)
2045                goto failed;
2046
2047        for (i = 0; i < sbi->s_groups_count; i++) {
2048                gdp = ext4_get_group_desc(sb, i, NULL);
2049
2050                flex_group = ext4_flex_group(sbi, i);
2051                atomic_add(ext4_free_inodes_count(sb, gdp),
2052                           &sbi->s_flex_groups[flex_group].free_inodes);
2053                atomic64_add(ext4_free_group_clusters(sb, gdp),
2054                             &sbi->s_flex_groups[flex_group].free_clusters);
2055                atomic_add(ext4_used_dirs_count(sb, gdp),
2056                           &sbi->s_flex_groups[flex_group].used_dirs);
2057        }
2058
2059        return 1;
2060failed:
2061        return 0;
2062}
2063
2064static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
2065                                   struct ext4_group_desc *gdp)
2066{
2067        int offset;
2068        __u16 crc = 0;
2069        __le32 le_group = cpu_to_le32(block_group);
2070
2071        if (ext4_has_metadata_csum(sbi->s_sb)) {
2072                /* Use new metadata_csum algorithm */
2073                __le16 save_csum;
2074                __u32 csum32;
2075
2076                save_csum = gdp->bg_checksum;
2077                gdp->bg_checksum = 0;
2078                csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
2079                                     sizeof(le_group));
2080                csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp,
2081                                     sbi->s_desc_size);
2082                gdp->bg_checksum = save_csum;
2083
2084                crc = csum32 & 0xFFFF;
2085                goto out;
2086        }
2087
2088        /* old crc16 code */
2089        if (!(sbi->s_es->s_feature_ro_compat &
2090              cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)))
2091                return 0;
2092
2093        offset = offsetof(struct ext4_group_desc, bg_checksum);
2094
2095        crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
2096        crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
2097        crc = crc16(crc, (__u8 *)gdp, offset);
2098        offset += sizeof(gdp->bg_checksum); /* skip checksum */
2099        /* for checksum of struct ext4_group_desc do the rest...*/
2100        if ((sbi->s_es->s_feature_incompat &
2101             cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
2102            offset < le16_to_cpu(sbi->s_es->s_desc_size))
2103                crc = crc16(crc, (__u8 *)gdp + offset,
2104                            le16_to_cpu(sbi->s_es->s_desc_size) -
2105                                offset);
2106
2107out:
2108        return cpu_to_le16(crc);
2109}
2110
2111int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
2112                                struct ext4_group_desc *gdp)
2113{
2114        if (ext4_has_group_desc_csum(sb) &&
2115            (gdp->bg_checksum != ext4_group_desc_csum(EXT4_SB(sb),
2116                                                      block_group, gdp)))
2117                return 0;
2118
2119        return 1;
2120}
2121
2122void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
2123                              struct ext4_group_desc *gdp)
2124{
2125        if (!ext4_has_group_desc_csum(sb))
2126                return;
2127        gdp->bg_checksum = ext4_group_desc_csum(EXT4_SB(sb), block_group, gdp);
2128}
2129
2130/* Called at mount-time, super-block is locked */
2131static int ext4_check_descriptors(struct super_block *sb,
2132                                  ext4_fsblk_t sb_block,
2133                                  ext4_group_t *first_not_zeroed)
2134{
2135        struct ext4_sb_info *sbi = EXT4_SB(sb);
2136        ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
2137        ext4_fsblk_t last_block;
2138        ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
2139        ext4_fsblk_t block_bitmap;
2140        ext4_fsblk_t inode_bitmap;
2141        ext4_fsblk_t inode_table;
2142        int flexbg_flag = 0;
2143        ext4_group_t i, grp = sbi->s_groups_count;
2144
2145        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
2146                flexbg_flag = 1;
2147
2148        ext4_debug("Checking group descriptors");
2149
2150        for (i = 0; i < sbi->s_groups_count; i++) {
2151                struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
2152
2153                if (i == sbi->s_groups_count - 1 || flexbg_flag)
2154                        last_block = ext4_blocks_count(sbi->s_es) - 1;
2155                else
2156                        last_block = first_block +
2157                                (EXT4_BLOCKS_PER_GROUP(sb) - 1);
2158
2159                if ((grp == sbi->s_groups_count) &&
2160                   !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2161                        grp = i;
2162
2163                block_bitmap = ext4_block_bitmap(sb, gdp);
2164                if (block_bitmap == sb_block) {
2165                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2166                                 "Block bitmap for group %u overlaps "
2167                                 "superblock", i);
2168                        if (!(sb->s_flags & MS_RDONLY))
2169                                return 0;
2170                }
2171                if (block_bitmap >= sb_block + 1 &&
2172                    block_bitmap <= last_bg_block) {
2173                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2174                                 "Block bitmap for group %u overlaps "
2175                                 "block group descriptors", i);
2176                        if (!(sb->s_flags & MS_RDONLY))
2177                                return 0;
2178                }
2179                if (block_bitmap < first_block || block_bitmap > last_block) {
2180                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2181                               "Block bitmap for group %u not in group "
2182                               "(block %llu)!", i, block_bitmap);
2183                        return 0;
2184                }
2185                inode_bitmap = ext4_inode_bitmap(sb, gdp);
2186                if (inode_bitmap == sb_block) {
2187                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2188                                 "Inode bitmap for group %u overlaps "
2189                                 "superblock", i);
2190                        if (!(sb->s_flags & MS_RDONLY))
2191                                return 0;
2192                }
2193                if (inode_bitmap >= sb_block + 1 &&
2194                    inode_bitmap <= last_bg_block) {
2195                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2196                                 "Inode bitmap for group %u overlaps "
2197                                 "block group descriptors", i);
2198                        if (!(sb->s_flags & MS_RDONLY))
2199                                return 0;
2200                }
2201                if (inode_bitmap < first_block || inode_bitmap > last_block) {
2202                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2203                               "Inode bitmap for group %u not in group "
2204                               "(block %llu)!", i, inode_bitmap);
2205                        return 0;
2206                }
2207                inode_table = ext4_inode_table(sb, gdp);
2208                if (inode_table == sb_block) {
2209                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2210                                 "Inode table for group %u overlaps "
2211                                 "superblock", i);
2212                        if (!(sb->s_flags & MS_RDONLY))
2213                                return 0;
2214                }
2215                if (inode_table >= sb_block + 1 &&
2216                    inode_table <= last_bg_block) {
2217                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2218                                 "Inode table for group %u overlaps "
2219                                 "block group descriptors", i);
2220                        if (!(sb->s_flags & MS_RDONLY))
2221                                return 0;
2222                }
2223                if (inode_table < first_block ||
2224                    inode_table + sbi->s_itb_per_group - 1 > last_block) {
2225                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2226                               "Inode table for group %u not in group "
2227                               "(block %llu)!", i, inode_table);
2228                        return 0;
2229                }
2230                ext4_lock_group(sb, i);
2231                if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
2232                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2233                                 "Checksum for group %u failed (%u!=%u)",
2234                                 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
2235                                     gdp)), le16_to_cpu(gdp->bg_checksum));
2236                        if (!(sb->s_flags & MS_RDONLY)) {
2237                                ext4_unlock_group(sb, i);
2238                                return 0;
2239                        }
2240                }
2241                ext4_unlock_group(sb, i);
2242                if (!flexbg_flag)
2243                        first_block += EXT4_BLOCKS_PER_GROUP(sb);
2244        }
2245        if (NULL != first_not_zeroed)
2246                *first_not_zeroed = grp;
2247        return 1;
2248}
2249
2250/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
2251 * the superblock) which were deleted from all directories, but held open by
2252 * a process at the time of a crash.  We walk the list and try to delete these
2253 * inodes at recovery time (only with a read-write filesystem).
2254 *
2255 * In order to keep the orphan inode chain consistent during traversal (in
2256 * case of crash during recovery), we link each inode into the superblock
2257 * orphan list_head and handle it the same way as an inode deletion during
2258 * normal operation (which journals the operations for us).
2259 *
2260 * We only do an iget() and an iput() on each inode, which is very safe if we
2261 * accidentally point at an in-use or already deleted inode.  The worst that
2262 * can happen in this case is that we get a "bit already cleared" message from
2263 * ext4_free_inode().  The only reason we would point at a wrong inode is if
2264 * e2fsck was run on this filesystem, and it must have already done the orphan
2265 * inode cleanup for us, so we can safely abort without any further action.
2266 */
2267static void ext4_orphan_cleanup(struct super_block *sb,
2268                                struct ext4_super_block *es)
2269{
2270        unsigned int s_flags = sb->s_flags;
2271        int nr_orphans = 0, nr_truncates = 0;
2272#ifdef CONFIG_QUOTA
2273        int quota_update = 0;
2274        int i;
2275#endif
2276        if (!es->s_last_orphan) {
2277                jbd_debug(4, "no orphan inodes to clean up\n");
2278                return;
2279        }
2280
2281        if (bdev_read_only(sb->s_bdev)) {
2282                ext4_msg(sb, KERN_ERR, "write access "
2283                        "unavailable, skipping orphan cleanup");
2284                return;
2285        }
2286
2287        /* Check if feature set would not allow a r/w mount */
2288        if (!ext4_feature_set_ok(sb, 0)) {
2289                ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
2290                         "unknown ROCOMPAT features");
2291                return;
2292        }
2293
2294        if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2295                /* don't clear list on RO mount w/ errors */
2296                if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
2297                        ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
2298                                  "clearing orphan list.\n");
2299                        es->s_last_orphan = 0;
2300                }
2301                jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
2302                return;
2303        }
2304
2305        if (s_flags & MS_RDONLY) {
2306                ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
2307                sb->s_flags &= ~MS_RDONLY;
2308        }
2309#ifdef CONFIG_QUOTA
2310        /* Needed for iput() to work correctly and not trash data */
2311        sb->s_flags |= MS_ACTIVE;
2312
2313        /*
2314         * Turn on quotas which were not enabled for read-only mounts if
2315         * filesystem has quota feature, so that they are updated correctly.
2316         */
2317        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
2318            (s_flags & MS_RDONLY))
2319        {
2320                int ret = ext4_enable_quotas(sb);
2321
2322                if (!ret)
2323                        quota_update = 1;
2324                else
2325                        ext4_msg(sb, KERN_ERR,
2326                                "Cannot turn on quotas: error %d", ret);
2327        }
2328
2329        /* Turn on journaled quotas used for old sytle */
2330        for (i = 0; i < MAXQUOTAS; i++) {
2331                if (EXT4_SB(sb)->s_qf_names[i]) {
2332                        int ret = ext4_quota_on_mount(sb, i);
2333
2334                        if (!ret)
2335                                quota_update = 1;
2336                        else
2337                                ext4_msg(sb, KERN_ERR,
2338                                        "Cannot turn on journaled "
2339                                        "quota: type %d: error %d", i, ret);
2340                }
2341        }
2342#endif
2343
2344        while (es->s_last_orphan) {
2345                struct inode *inode;
2346
2347                /*
2348                 * We may have encountered an error during cleanup; if
2349                 * so, skip the rest.
2350                 */
2351                if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2352                        jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
2353                        es->s_last_orphan = 0;
2354                        break;
2355                }
2356
2357                inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
2358                if (IS_ERR(inode)) {
2359                        es->s_last_orphan = 0;
2360                        break;
2361                }
2362
2363                list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
2364                dquot_initialize(inode);
2365                if (inode->i_nlink) {
2366                        if (test_opt(sb, DEBUG))
2367                                ext4_msg(sb, KERN_DEBUG,
2368                                        "%s: truncating inode %lu to %lld bytes",
2369                                        __func__, inode->i_ino, inode->i_size);
2370                        jbd_debug(2, "truncating inode %lu to %lld bytes\n",
2371                                  inode->i_ino, inode->i_size);
2372                        mutex_lock(&inode->i_mutex);
2373                        truncate_inode_pages(inode->i_mapping, inode->i_size);
2374                        ext4_truncate(inode);
2375                        mutex_unlock(&inode->i_mutex);
2376                        nr_truncates++;
2377                } else {
2378                        if (test_opt(sb, DEBUG))
2379                                ext4_msg(sb, KERN_DEBUG,
2380                                        "%s: deleting unreferenced inode %lu",
2381                                        __func__, inode->i_ino);
2382                        jbd_debug(2, "deleting unreferenced inode %lu\n",
2383                                  inode->i_ino);
2384                        nr_orphans++;
2385                }
2386                iput(inode);  /* The delete magic happens here! */
2387        }
2388
2389#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
2390
2391        if (nr_orphans)
2392                ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
2393                       PLURAL(nr_orphans));
2394        if (nr_truncates)
2395                ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
2396                       PLURAL(nr_truncates));
2397#ifdef CONFIG_QUOTA
2398        /* Turn off quotas if they were enabled for orphan cleanup */
2399        if (quota_update) {
2400                for (i = 0; i < MAXQUOTAS; i++) {
2401                        if (sb_dqopt(sb)->files[i])
2402                                dquot_quota_off(sb, i);
2403                }
2404        }
2405#endif
2406        sb->s_flags = s_flags; /* Restore MS_RDONLY status */
2407}
2408
2409/*
2410 * Maximal extent format file size.
2411 * Resulting logical blkno at s_maxbytes must fit in our on-disk
2412 * extent format containers, within a sector_t, and within i_blocks
2413 * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
2414 * so that won't be a limiting factor.
2415 *
2416 * However there is other limiting factor. We do store extents in the form
2417 * of starting block and length, hence the resulting length of the extent
2418 * covering maximum file size must fit into on-disk format containers as
2419 * well. Given that length is always by 1 unit bigger than max unit (because
2420 * we count 0 as well) we have to lower the s_maxbytes by one fs block.
2421 *
2422 * Note, this does *not* consider any metadata overhead for vfs i_blocks.
2423 */
2424static loff_t ext4_max_size(int blkbits, int has_huge_files)
2425{
2426        loff_t res;
2427        loff_t upper_limit = MAX_LFS_FILESIZE;
2428
2429        /* small i_blocks in vfs inode? */
2430        if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
2431                /*
2432                 * CONFIG_LBDAF is not enabled implies the inode
2433                 * i_block represent total blocks in 512 bytes
2434                 * 32 == size of vfs inode i_blocks * 8
2435                 */
2436                upper_limit = (1LL << 32) - 1;
2437
2438                /* total blocks in file system block size */
2439                upper_limit >>= (blkbits - 9);
2440                upper_limit <<= blkbits;
2441        }
2442
2443        /*
2444         * 32-bit extent-start container, ee_block. We lower the maxbytes
2445         * by one fs block, so ee_len can cover the extent of maximum file
2446         * size
2447         */
2448        res = (1LL << 32) - 1;
2449        res <<= blkbits;
2450
2451        /* Sanity check against vm- & vfs- imposed limits */
2452        if (res > upper_limit)
2453                res = upper_limit;
2454
2455        return res;
2456}
2457
2458/*
2459 * Maximal bitmap file size.  There is a direct, and {,double-,triple-}indirect
2460 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
2461 * We need to be 1 filesystem block less than the 2^48 sector limit.
2462 */
2463static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
2464{
2465        loff_t res = EXT4_NDIR_BLOCKS;
2466        int meta_blocks;
2467        loff_t upper_limit;
2468        /* This is calculated to be the largest file size for a dense, block
2469         * mapped file such that the file's total number of 512-byte sectors,
2470         * including data and all indirect blocks, does not exceed (2^48 - 1).
2471         *
2472         * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
2473         * number of 512-byte sectors of the file.
2474         */
2475
2476        if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
2477                /*
2478                 * !has_huge_files or CONFIG_LBDAF not enabled implies that
2479                 * the inode i_block field represents total file blocks in
2480                 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
2481                 */
2482                upper_limit = (1LL << 32) - 1;
2483
2484                /* total blocks in file system block size */
2485                upper_limit >>= (bits - 9);
2486
2487        } else {
2488                /*
2489                 * We use 48 bit ext4_inode i_blocks
2490                 * With EXT4_HUGE_FILE_FL set the i_blocks
2491                 * represent total number of blocks in
2492                 * file system block size
2493                 */
2494                upper_limit = (1LL << 48) - 1;
2495
2496        }
2497
2498        /* indirect blocks */
2499        meta_blocks = 1;
2500        /* double indirect blocks */
2501        meta_blocks += 1 + (1LL << (bits-2));
2502        /* tripple indirect blocks */
2503        meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
2504
2505        upper_limit -= meta_blocks;
2506        upper_limit <<= bits;
2507
2508        res += 1LL << (bits-2);
2509        res += 1LL << (2*(bits-2));
2510        res += 1LL << (3*(bits-2));
2511        res <<= bits;
2512        if (res > upper_limit)
2513                res = upper_limit;
2514
2515        if (res > MAX_LFS_FILESIZE)
2516                res = MAX_LFS_FILESIZE;
2517
2518        return res;
2519}
2520
2521static ext4_fsblk_t descriptor_loc(struct super_block *sb,
2522                                   ext4_fsblk_t logical_sb_block, int nr)
2523{
2524        struct ext4_sb_info *sbi = EXT4_SB(sb);
2525        ext4_group_t bg, first_meta_bg;
2526        int has_super = 0;
2527
2528        first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
2529
2530        if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
2531            nr < first_meta_bg)
2532                return logical_sb_block + nr + 1;
2533        bg = sbi->s_desc_per_block * nr;
2534        if (ext4_bg_has_super(sb, bg))
2535                has_super = 1;
2536
2537        /*
2538         * If we have a meta_bg fs with 1k blocks, group 0's GDT is at
2539         * block 2, not 1.  If s_first_data_block == 0 (bigalloc is enabled
2540         * on modern mke2fs or blksize > 1k on older mke2fs) then we must
2541         * compensate.
2542         */
2543        if (sb->s_blocksize == 1024 && nr == 0 &&
2544            le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) == 0)
2545                has_super++;
2546
2547        return (has_super + ext4_group_first_block_no(sb, bg));
2548}
2549
2550/**
2551 * ext4_get_stripe_size: Get the stripe size.
2552 * @sbi: In memory super block info
2553 *
2554 * If we have specified it via mount option, then
2555 * use the mount option value. If the value specified at mount time is
2556 * greater than the blocks per group use the super block value.
2557 * If the super block value is greater than blocks per group return 0.
2558 * Allocator needs it be less than blocks per group.
2559 *
2560 */
2561static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
2562{
2563        unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
2564        unsigned long stripe_width =
2565                        le32_to_cpu(sbi->s_es->s_raid_stripe_width);
2566        int ret;
2567
2568        if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
2569                ret = sbi->s_stripe;
2570        else if (stripe_width <= sbi->s_blocks_per_group)
2571                ret = stripe_width;
2572        else if (stride <= sbi->s_blocks_per_group)
2573                ret = stride;
2574        else
2575                ret = 0;
2576
2577        /*
2578         * If the stripe width is 1, this makes no sense and
2579         * we set it to 0 to turn off stripe handling code.
2580         */
2581        if (ret <= 1)
2582                ret = 0;
2583
2584        return ret;
2585}
2586
2587/* sysfs supprt */
2588
2589struct ext4_attr {
2590        struct attribute attr;
2591        ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *);
2592        ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *,
2593                         const char *, size_t);
2594        union {
2595                int offset;
2596                int deprecated_val;
2597        } u;
2598};
2599
2600static int parse_strtoull(const char *buf,
2601                unsigned long long max, unsigned long long *value)
2602{
2603        int ret;
2604
2605        ret = kstrtoull(skip_spaces(buf), 0, value);
2606        if (!ret && *value > max)
2607                ret = -EINVAL;
2608        return ret;
2609}
2610
2611static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
2612                                              struct ext4_sb_info *sbi,
2613                                              char *buf)
2614{
2615        return snprintf(buf, PAGE_SIZE, "%llu\n",
2616                (s64) EXT4_C2B(sbi,
2617                        percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
2618}
2619
2620static ssize_t session_write_kbytes_show(struct ext4_attr *a,
2621                                         struct ext4_sb_info *sbi, char *buf)
2622{
2623        struct super_block *sb = sbi->s_buddy_cache->i_sb;
2624
2625        if (!sb->s_bdev->bd_part)
2626                return snprintf(buf, PAGE_SIZE, "0\n");
2627        return snprintf(buf, PAGE_SIZE, "%lu\n",
2628                        (part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2629                         sbi->s_sectors_written_start) >> 1);
2630}
2631
2632static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2633                                          struct ext4_sb_info *sbi, char *buf)
2634{
2635        struct super_block *sb = sbi->s_buddy_cache->i_sb;
2636
2637        if (!sb->s_bdev->bd_part)
2638                return snprintf(buf, PAGE_SIZE, "0\n");
2639        return snprintf(buf, PAGE_SIZE, "%llu\n",
2640                        (unsigned long long)(sbi->s_kbytes_written +
2641                        ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2642                          EXT4_SB(sb)->s_sectors_written_start) >> 1)));
2643}
2644
2645static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2646                                          struct ext4_sb_info *sbi,
2647                                          const char *buf, size_t count)
2648{
2649        unsigned long t;
2650        int ret;
2651
2652        ret = kstrtoul(skip_spaces(buf), 0, &t);
2653        if (ret)
2654                return ret;
2655
2656        if (t && (!is_power_of_2(t) || t > 0x40000000))
2657                return -EINVAL;
2658
2659        sbi->s_inode_readahead_blks = t;
2660        return count;
2661}
2662
2663static ssize_t sbi_ui_show(struct ext4_attr *a,
2664                           struct ext4_sb_info *sbi, char *buf)
2665{
2666        unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset);
2667
2668        return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
2669}
2670
2671static ssize_t sbi_ui_store(struct ext4_attr *a,
2672                            struct ext4_sb_info *sbi,
2673                            const char *buf, size_t count)
2674{
2675        unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset);
2676        unsigned long t;
2677        int ret;
2678
2679        ret = kstrtoul(skip_spaces(buf), 0, &t);
2680        if (ret)
2681                return ret;
2682        *ui = t;
2683        return count;
2684}
2685
2686static ssize_t es_ui_show(struct ext4_attr *a,
2687                           struct ext4_sb_info *sbi, char *buf)
2688{
2689
2690        unsigned int *ui = (unsigned int *) (((char *) sbi->s_es) +
2691                           a->u.offset);
2692
2693        return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
2694}
2695
2696static ssize_t reserved_clusters_show(struct ext4_attr *a,
2697                                  struct ext4_sb_info *sbi, char *buf)
2698{
2699        return snprintf(buf, PAGE_SIZE, "%llu\n",
2700                (unsigned long long) atomic64_read(&sbi->s_resv_clusters));
2701}
2702
2703static ssize_t reserved_clusters_store(struct ext4_attr *a,
2704                                   struct ext4_sb_info *sbi,
2705                                   const char *buf, size_t count)
2706{
2707        unsigned long long val;
2708        int ret;
2709
2710        if (parse_strtoull(buf, -1ULL, &val))
2711                return -EINVAL;
2712        ret = ext4_reserve_clusters(sbi, val);
2713
2714        return ret ? ret : count;
2715}
2716
2717static ssize_t trigger_test_error(struct ext4_attr *a,
2718                                  struct ext4_sb_info *sbi,
2719                                  const char *buf, size_t count)
2720{
2721        int len = count;
2722
2723        if (!capable(CAP_SYS_ADMIN))
2724                return -EPERM;
2725
2726        if (len && buf[len-1] == '\n')
2727                len--;
2728
2729        if (len)
2730                ext4_error(sbi->s_sb, "%.*s", len, buf);
2731        return count;
2732}
2733
2734static ssize_t sbi_deprecated_show(struct ext4_attr *a,
2735                                   struct ext4_sb_info *sbi, char *buf)
2736{
2737        return snprintf(buf, PAGE_SIZE, "%d\n", a->u.deprecated_val);
2738}
2739
2740#define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \
2741static struct ext4_attr ext4_attr_##_name = {                   \
2742        .attr = {.name = __stringify(_name), .mode = _mode },   \
2743        .show   = _show,                                        \
2744        .store  = _store,                                       \
2745        .u = {                                                  \
2746                .offset = offsetof(struct ext4_sb_info, _elname),\
2747        },                                                      \
2748}
2749
2750#define EXT4_ATTR_OFFSET_ES(_name,_mode,_show,_store,_elname)           \
2751static struct ext4_attr ext4_attr_##_name = {                           \
2752        .attr = {.name = __stringify(_name), .mode = _mode },           \
2753        .show   = _show,                                                \
2754        .store  = _store,                                               \
2755        .u = {                                                          \
2756                .offset = offsetof(struct ext4_super_block, _elname),   \
2757        },                                                              \
2758}
2759
2760#define EXT4_ATTR(name, mode, show, store) \
2761static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
2762
2763#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL)
2764#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
2765#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
2766
2767#define EXT4_RO_ATTR_ES_UI(name, elname)        \
2768        EXT4_ATTR_OFFSET_ES(name, 0444, es_ui_show, NULL, elname)
2769#define EXT4_RW_ATTR_SBI_UI(name, elname)       \
2770        EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
2771
2772#define ATTR_LIST(name) &ext4_attr_##name.attr
2773#define EXT4_DEPRECATED_ATTR(_name, _val)       \
2774static struct ext4_attr ext4_attr_##_name = {                   \
2775        .attr = {.name = __stringify(_name), .mode = 0444 },    \
2776        .show   = sbi_deprecated_show,                          \
2777        .u = {                                                  \
2778                .deprecated_val = _val,                         \
2779        },                                                      \
2780}
2781
2782EXT4_RO_ATTR(delayed_allocation_blocks);
2783EXT4_RO_ATTR(session_write_kbytes);
2784EXT4_RO_ATTR(lifetime_write_kbytes);
2785EXT4_RW_ATTR(reserved_clusters);
2786EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
2787                 inode_readahead_blks_store, s_inode_readahead_blks);
2788EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
2789EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
2790EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
2791EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
2792EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
2793EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
2794EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
2795EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128);
2796EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
2797EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error);
2798EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval);
2799EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst);
2800EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval);
2801EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
2802EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
2803EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
2804EXT4_RO_ATTR_ES_UI(errors_count, s_error_count);
2805EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time);
2806EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time);
2807
2808static struct attribute *ext4_attrs[] = {
2809        ATTR_LIST(delayed_allocation_blocks),
2810        ATTR_LIST(session_write_kbytes),
2811        ATTR_LIST(lifetime_write_kbytes),
2812        ATTR_LIST(reserved_clusters),
2813        ATTR_LIST(inode_readahead_blks),
2814        ATTR_LIST(inode_goal),
2815        ATTR_LIST(mb_stats),
2816        ATTR_LIST(mb_max_to_scan),
2817        ATTR_LIST(mb_min_to_scan),
2818        ATTR_LIST(mb_order2_req),
2819        ATTR_LIST(mb_stream_req),
2820        ATTR_LIST(mb_group_prealloc),
2821        ATTR_LIST(max_writeback_mb_bump),
2822        ATTR_LIST(extent_max_zeroout_kb),
2823        ATTR_LIST(trigger_fs_error),
2824        ATTR_LIST(err_ratelimit_interval_ms),
2825        ATTR_LIST(err_ratelimit_burst),
2826        ATTR_LIST(warning_ratelimit_interval_ms),
2827        ATTR_LIST(warning_ratelimit_burst),
2828        ATTR_LIST(msg_ratelimit_interval_ms),
2829        ATTR_LIST(msg_ratelimit_burst),
2830        ATTR_LIST(errors_count),
2831        ATTR_LIST(first_error_time),
2832        ATTR_LIST(last_error_time),
2833        NULL,
2834};
2835
2836/* Features this copy of ext4 supports */
2837EXT4_INFO_ATTR(lazy_itable_init);
2838EXT4_INFO_ATTR(batched_discard);
2839EXT4_INFO_ATTR(meta_bg_resize);
2840
2841static struct attribute *ext4_feat_attrs[] = {
2842        ATTR_LIST(lazy_itable_init),
2843        ATTR_LIST(batched_discard),
2844        ATTR_LIST(meta_bg_resize),
2845        NULL,
2846};
2847
2848static ssize_t ext4_attr_show(struct kobject *kobj,
2849                              struct attribute *attr, char *buf)
2850{
2851        struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2852                                                s_kobj);
2853        struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2854
2855        return a->show ? a->show(a, sbi, buf) : 0;
2856}
2857
2858static ssize_t ext4_attr_store(struct kobject *kobj,
2859                               struct attribute *attr,
2860                               const char *buf, size_t len)
2861{
2862        struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2863                                                s_kobj);
2864        struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2865
2866        return a->store ? a->store(a, sbi, buf, len) : 0;
2867}
2868
2869static void ext4_sb_release(struct kobject *kobj)
2870{
2871        struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2872                                                s_kobj);
2873        complete(&sbi->s_kobj_unregister);
2874}
2875
2876static const struct sysfs_ops ext4_attr_ops = {
2877        .show   = ext4_attr_show,
2878        .store  = ext4_attr_store,
2879};
2880
2881static struct kobj_type ext4_ktype = {
2882        .default_attrs  = ext4_attrs,
2883        .sysfs_ops      = &ext4_attr_ops,
2884        .release        = ext4_sb_release,
2885};
2886
2887static void ext4_feat_release(struct kobject *kobj)
2888{
2889        complete(&ext4_feat->f_kobj_unregister);
2890}
2891
2892static ssize_t ext4_feat_show(struct kobject *kobj,
2893                              struct attribute *attr, char *buf)
2894{
2895        return snprintf(buf, PAGE_SIZE, "supported\n");
2896}
2897
2898/*
2899 * We can not use ext4_attr_show/store because it relies on the kobject
2900 * being embedded in the ext4_sb_info structure which is definitely not
2901 * true in this case.
2902 */
2903static const struct sysfs_ops ext4_feat_ops = {
2904        .show   = ext4_feat_show,
2905        .store  = NULL,
2906};
2907
2908static struct kobj_type ext4_feat_ktype = {
2909        .default_attrs  = ext4_feat_attrs,
2910        .sysfs_ops      = &ext4_feat_ops,
2911        .release        = ext4_feat_release,
2912};
2913
2914/*
2915 * Check whether this filesystem can be mounted based on
2916 * the features present and the RDONLY/RDWR mount requested.
2917 * Returns 1 if this filesystem can be mounted as requested,
2918 * 0 if it cannot be.
2919 */
2920static int ext4_feature_set_ok(struct super_block *sb, int readonly)
2921{
2922        if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) {
2923                ext4_msg(sb, KERN_ERR,
2924                        "Couldn't mount because of "
2925                        "unsupported optional features (%x)",
2926                        (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
2927                        ~EXT4_FEATURE_INCOMPAT_SUPP));
2928                return 0;
2929        }
2930
2931        if (readonly)
2932                return 1;
2933
2934        /* Check that feature set is OK for a read-write mount */
2935        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) {
2936                ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
2937                         "unsupported optional features (%x)",
2938                         (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
2939                                ~EXT4_FEATURE_RO_COMPAT_SUPP));
2940                return 0;
2941        }
2942        /*
2943         * Large file size enabled file system can only be mounted
2944         * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF
2945         */
2946        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
2947                if (sizeof(blkcnt_t) < sizeof(u64)) {
2948                        ext4_msg(sb, KERN_ERR, "Filesystem with huge files "
2949                                 "cannot be mounted RDWR without "
2950                                 "CONFIG_LBDAF");
2951                        return 0;
2952                }
2953        }
2954        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) &&
2955            !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
2956                ext4_msg(sb, KERN_ERR,
2957                         "Can't support bigalloc feature without "
2958                         "extents feature\n");
2959                return 0;
2960        }
2961
2962#ifndef CONFIG_QUOTA
2963        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
2964            !readonly) {
2965                ext4_msg(sb, KERN_ERR,
2966                         "Filesystem with quota feature cannot be mounted RDWR "
2967                         "without CONFIG_QUOTA");
2968                return 0;
2969        }
2970#endif  /* CONFIG_QUOTA */
2971        return 1;
2972}
2973
2974/*
2975 * This function is called once a day if we have errors logged
2976 * on the file system
2977 */
2978static void print_daily_error_info(unsigned long arg)
2979{
2980        struct super_block *sb = (struct super_block *) arg;
2981        struct ext4_sb_info *sbi;
2982        struct ext4_super_block *es;
2983
2984        sbi = EXT4_SB(sb);
2985        es = sbi->s_es;
2986
2987        if (es->s_error_count)
2988                /* fsck newer than v1.41.13 is needed to clean this condition. */
2989                ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
2990                         le32_to_cpu(es->s_error_count));
2991        if (es->s_first_error_time) {
2992                printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d",
2993                       sb->s_id, le32_to_cpu(es->s_first_error_time),
2994                       (int) sizeof(es->s_first_error_func),
2995                       es->s_first_error_func,
2996                       le32_to_cpu(es->s_first_error_line));
2997                if (es->s_first_error_ino)
2998                        printk(": inode %u",
2999                               le32_to_cpu(es->s_first_error_ino));
3000                if (es->s_first_error_block)

3001                        printk(": block %llu", (unsigned long long)
3002                               le64_to_cpu(es->s_first_error_block));
3003                printk("\n");
3004        }
3005        if (es->s_last_error_time) {
3006                printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d",
3007                       sb->s_id, le32_to_cpu(es->s_last_error_time),
3008                       (int) sizeof(es->s_last_error_func),
3009                       es->s_last_error_func,
3010                       le32_to_cpu(es->s_last_error_line));
3011                if (es->s_last_error_ino)
3012                        printk(": inode %u",
3013                               le32_to_cpu(es->s_last_error_ino));
3014                if (es->s_last_error_block)
3015                        printk(": block %llu", (unsigned long long)
3016                               le64_to_cpu(es->s_last_error_block));
3017                printk("\n");
3018        }
3019        mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
3020}
3021
3022/* Find next suitable group and run ext4_init_inode_table */
3023static int ext4_run_li_request(struct ext4_li_request *elr)
3024{
3025        struct ext4_group_desc *gdp = NULL;
3026        ext4_group_t group, ngroups;
3027        struct super_block *sb;
3028        unsigned long timeout = 0;
3029        int ret = 0;
3030
3031        sb = elr->lr_super;
3032        ngroups = EXT4_SB(sb)->s_groups_count;
3033
3034        sb_start_write(sb);
3035        for (group = elr->lr_next_group; group < ngroups; group++) {
3036                gdp = ext4_get_group_desc(sb, group, NULL);
3037                if (!gdp) {
3038                        ret = 1;
3039                        break;
3040                }
3041
3042                if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3043                        break;
3044        }
3045
3046        if (group >= ngroups)
3047                ret = 1;
3048
3049        if (!ret) {
3050                timeout = jiffies;
3051                ret = ext4_init_inode_table(sb, group,
3052                                            elr->lr_timeout ? 0 : 1);
3053                if (elr->lr_timeout == 0) {
3054                        timeout = (jiffies - timeout) *
3055                                  elr->lr_sbi->s_li_wait_mult;
3056                        elr->lr_timeout = timeout;
3057                }
3058                elr->lr_next_sched = jiffies + elr->lr_timeout;
3059                elr->lr_next_group = group + 1;
3060        }
3061        sb_end_write(sb);
3062
3063        return ret;
3064}
3065
3066/*
3067 * Remove lr_request from the list_request and free the
3068 * request structure. Should be called with li_list_mtx held
3069 */
3070static void ext4_remove_li_request(struct ext4_li_request *elr)
3071{
3072        struct ext4_sb_info *sbi;
3073
3074        if (!elr)
3075                return;
3076
3077        sbi = elr->lr_sbi;
3078
3079        list_del(&elr->lr_request);
3080        sbi->s_li_request = NULL;
3081        kfree(elr);
3082}
3083
3084static void ext4_unregister_li_request(struct super_block *sb)
3085{
3086        mutex_lock(&ext4_li_mtx);
3087        if (!ext4_li_info) {
3088                mutex_unlock(&ext4_li_mtx);
3089                return;
3090        }
3091
3092        mutex_lock(&ext4_li_info->li_list_mtx);
3093        ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
3094        mutex_unlock(&ext4_li_info->li_list_mtx);
3095        mutex_unlock(&ext4_li_mtx);
3096}
3097
3098static struct task_struct *ext4_lazyinit_task;
3099
3100/*
3101 * This is the function where ext4lazyinit thread lives. It walks
3102 * through the request list searching for next scheduled filesystem.
3103 * When such a fs is found, run the lazy initialization request
3104 * (ext4_rn_li_request) and keep track of the time spend in this
3105 * function. Based on that time we compute next schedule time of
3106 * the request. When walking through the list is complete, compute
3107 * next waking time and put itself into sleep.
3108 */
3109static int ext4_lazyinit_thread(void *arg)
3110{
3111        struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
3112        struct list_head *pos, *n;
3113        struct ext4_li_request *elr;
3114        unsigned long next_wakeup, cur;
3115
3116        BUG_ON(NULL == eli);
3117
3118cont_thread:
3119        while (true) {
3120                next_wakeup = MAX_JIFFY_OFFSET;
3121
3122                mutex_lock(&eli->li_list_mtx);
3123                if (list_empty(&eli->li_request_list)) {
3124                        mutex_unlock(&eli->li_list_mtx);
3125                        goto exit_thread;
3126                }
3127
3128                list_for_each_safe(pos, n, &eli->li_request_list) {
3129                        elr = list_entry(pos, struct ext4_li_request,
3130                                         lr_request);
3131
3132                        if (time_after_eq(jiffies, elr->lr_next_sched)) {
3133                                if (ext4_run_li_request(elr) != 0) {
3134                                        /* error, remove the lazy_init job */
3135                                        ext4_remove_li_request(elr);
3136                                        continue;
3137                                }
3138                        }
3139
3140                        if (time_before(elr->lr_next_sched, next_wakeup))
3141                                next_wakeup = elr->lr_next_sched;
3142                }
3143                mutex_unlock(&eli->li_list_mtx);
3144
3145                try_to_freeze();
3146
3147                cur = jiffies;
3148                if ((time_after_eq(cur, next_wakeup)) ||
3149                    (MAX_JIFFY_OFFSET == next_wakeup)) {
3150                        cond_resched();
3151                        continue;
3152                }
3153
3154                schedule_timeout_interruptible(next_wakeup - cur);
3155
3156                if (kthread_should_stop()) {
3157                        ext4_clear_request_list();
3158                        goto exit_thread;
3159                }
3160        }
3161
3162exit_thread:
3163        /*
3164         * It looks like the request list is empty, but we need
3165         * to check it under the li_list_mtx lock, to prevent any
3166         * additions into it, and of course we should lock ext4_li_mtx
3167         * to atomically free the list and ext4_li_info, because at
3168         * this point another ext4 filesystem could be registering
3169         * new one.
3170         */
3171        mutex_lock(&ext4_li_mtx);
3172        mutex_lock(&eli->li_list_mtx);
3173        if (!list_empty(&eli->li_request_list)) {
3174                mutex_unlock(&eli->li_list_mtx);
3175                mutex_unlock(&ext4_li_mtx);
3176                goto cont_thread;
3177        }
3178        mutex_unlock(&eli->li_list_mtx);
3179        kfree(ext4_li_info);
3180        ext4_li_info = NULL;
3181        mutex_unlock(&ext4_li_mtx);
3182
3183        return 0;
3184}
3185
3186static void ext4_clear_request_list(void)
3187{
3188        struct list_head *pos, *n;
3189        struct ext4_li_request *elr;
3190
3191        mutex_lock(&ext4_li_info->li_list_mtx);
3192        list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
3193                elr = list_entry(pos, struct ext4_li_request,
3194                                 lr_request);
3195                ext4_remove_li_request(elr);
3196        }
3197        mutex_unlock(&ext4_li_info->li_list_mtx);
3198}
3199
3200static int ext4_run_lazyinit_thread(void)
3201{
3202        ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
3203                                         ext4_li_info, "ext4lazyinit");
3204        if (IS_ERR(ext4_lazyinit_task)) {
3205                int err = PTR_ERR(ext4_lazyinit_task);
3206                ext4_clear_request_list();
3207                kfree(ext4_li_info);
3208                ext4_li_info = NULL;
3209                printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
3210                                 "initialization thread\n",
3211                                 err);
3212                return err;
3213        }
3214        ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
3215        return 0;
3216}
3217
3218/*
3219 * Check whether it make sense to run itable init. thread or not.
3220 * If there is at least one uninitialized inode table, return
3221 * corresponding group number, else the loop goes through all
3222 * groups and return total number of groups.
3223 */
3224static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
3225{
3226        ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
3227        struct ext4_group_desc *gdp = NULL;
3228
3229        if (!ext4_has_group_desc_csum(sb))
3230                return ngroups;
3231
3232        for (group = 0; group < ngroups; group++) {
3233                gdp = ext4_get_group_desc(sb, group, NULL);
3234                if (!gdp)
3235                        continue;
3236
3237                if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3238                        break;
3239        }
3240
3241        return group;
3242}
3243
3244static int ext4_li_info_new(void)
3245{
3246        struct ext4_lazy_init *eli = NULL;
3247
3248        eli = kzalloc(sizeof(*eli), GFP_KERNEL);
3249        if (!eli)
3250                return -ENOMEM;
3251
3252        INIT_LIST_HEAD(&eli->li_request_list);
3253        mutex_init(&eli->li_list_mtx);
3254
3255        eli->li_state |= EXT4_LAZYINIT_QUIT;
3256
3257        ext4_li_info = eli;
3258
3259        return 0;
3260}
3261
3262static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
3263                                            ext4_group_t start)
3264{
3265        struct ext4_sb_info *sbi = EXT4_SB(sb);
3266        struct ext4_li_request *elr;
3267
3268        elr = kzalloc(sizeof(*elr), GFP_KERNEL);
3269        if (!elr)
3270                return NULL;
3271
3272        elr->lr_super = sb;
3273        elr->lr_sbi = sbi;
3274        elr->lr_next_group = start;
3275
3276        /*
3277         * Randomize first schedule time of the request to
3278         * spread the inode table initialization requests
3279         * better.
3280         */
3281        elr->lr_next_sched = jiffies + (prandom_u32() %
3282                                (EXT4_DEF_LI_MAX_START_DELAY * HZ));
3283        return elr;
3284}
3285
3286int ext4_register_li_request(struct super_block *sb,
3287                             ext4_group_t first_not_zeroed)
3288{
3289        struct ext4_sb_info *sbi = EXT4_SB(sb);
3290        struct ext4_li_request *elr = NULL;
3291        ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
3292        int ret = 0;
3293
3294        mutex_lock(&ext4_li_mtx);
3295        if (sbi->s_li_request != NULL) {
3296                /*
3297                 * Reset timeout so it can be computed again, because
3298                 * s_li_wait_mult might have changed.
3299                 */
3300                sbi->s_li_request->lr_timeout = 0;
3301                goto out;
3302        }
3303
3304        if (first_not_zeroed == ngroups ||
3305            (sb->s_flags & MS_RDONLY) ||
3306            !test_opt(sb, INIT_INODE_TABLE))
3307                goto out;
3308
3309        elr = ext4_li_request_new(sb, first_not_zeroed);
3310        if (!elr) {
3311                ret = -ENOMEM;
3312                goto out;
3313        }
3314
3315        if (NULL == ext4_li_info) {
3316                ret = ext4_li_info_new();
3317                if (ret)
3318                        goto out;
3319        }
3320
3321        mutex_lock(&ext4_li_info->li_list_mtx);
3322        list_add(&elr->lr_request, &ext4_li_info->li_request_list);
3323        mutex_unlock(&ext4_li_info->li_list_mtx);
3324
3325        sbi->s_li_request = elr;
3326        /*
3327         * set elr to NULL here since it has been inserted to
3328         * the request_list and the removal and free of it is
3329         * handled by ext4_clear_request_list from now on.
3330         */
3331        elr = NULL;
3332
3333        if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
3334                ret = ext4_run_lazyinit_thread();
3335                if (ret)
3336                        goto out;
3337        }
3338out:
3339        mutex_unlock(&ext4_li_mtx);
3340        if (ret)
3341                kfree(elr);
3342        return ret;
3343}
3344
3345/*
3346 * We do not need to lock anything since this is called on
3347 * module unload.
3348 */
3349static void ext4_destroy_lazyinit_thread(void)
3350{
3351        /*
3352         * If thread exited earlier
3353         * there's nothing to be done.
3354         */
3355        if (!ext4_li_info || !ext4_lazyinit_task)
3356                return;
3357
3358        kthread_stop(ext4_lazyinit_task);
3359}
3360
3361static int set_journal_csum_feature_set(struct super_block *sb)
3362{
3363        int ret = 1;
3364        int compat, incompat;
3365        struct ext4_sb_info *sbi = EXT4_SB(sb);
3366
3367        if (ext4_has_metadata_csum(sb)) {
3368                /* journal checksum v3 */
3369                compat = 0;
3370                incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
3371        } else {
3372                /* journal checksum v1 */
3373                compat = JBD2_FEATURE_COMPAT_CHECKSUM;
3374                incompat = 0;
3375        }
3376
3377        jbd2_journal_clear_features(sbi->s_journal,
3378                        JBD2_FEATURE_COMPAT_CHECKSUM, 0,
3379                        JBD2_FEATURE_INCOMPAT_CSUM_V3 |
3380                        JBD2_FEATURE_INCOMPAT_CSUM_V2);
3381        if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
3382                ret = jbd2_journal_set_features(sbi->s_journal,
3383                                compat, 0,
3384                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
3385                                incompat);
3386        } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
3387                ret = jbd2_journal_set_features(sbi->s_journal,
3388                                compat, 0,
3389                                incompat);
3390                jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3391                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3392        } else {
3393                jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3394                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3395        }
3396
3397        return ret;
3398}
3399
3400/*
3401 * Note: calculating the overhead so we can be compatible with
3402 * historical BSD practice is quite difficult in the face of
3403 * clusters/bigalloc.  This is because multiple metadata blocks from
3404 * different block group can end up in the same allocation cluster.
3405 * Calculating the exact overhead in the face of clustered allocation
3406 * requires either O(all block bitmaps) in memory or O(number of block
3407 * groups**2) in time.  We will still calculate the superblock for
3408 * older file systems --- and if we come across with a bigalloc file
3409 * system with zero in s_overhead_clusters the estimate will be close to
3410 * correct especially for very large cluster sizes --- but for newer
3411 * file systems, it's better to calculate this figure once at mkfs
3412 * time, and store it in the superblock.  If the superblock value is
3413 * present (even for non-bigalloc file systems), we will use it.
3414 */
3415static int count_overhead(struct super_block *sb, ext4_group_t grp,
3416                          char *buf)
3417{
3418        struct ext4_sb_info     *sbi = EXT4_SB(sb);
3419        struct ext4_group_desc  *gdp;
3420        ext4_fsblk_t            first_block, last_block, b;
3421        ext4_group_t            i, ngroups = ext4_get_groups_count(sb);
3422        int                     s, j, count = 0;
3423
3424        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC))
3425                return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
3426                        sbi->s_itb_per_group + 2);
3427
3428        first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
3429                (grp * EXT4_BLOCKS_PER_GROUP(sb));
3430        last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
3431        for (i = 0; i < ngroups; i++) {
3432                gdp = ext4_get_group_desc(sb, i, NULL);
3433                b = ext4_block_bitmap(sb, gdp);
3434                if (b >= first_block && b <= last_block) {
3435                        ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3436                        count++;
3437                }
3438                b = ext4_inode_bitmap(sb, gdp);
3439                if (b >= first_block && b <= last_block) {
3440                        ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3441                        count++;
3442                }
3443                b = ext4_inode_table(sb, gdp);
3444                if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
3445                        for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
3446                                int c = EXT4_B2C(sbi, b - first_block);
3447                                ext4_set_bit(c, buf);
3448                                count++;
3449                        }
3450                if (i != grp)
3451                        continue;
3452                s = 0;
3453                if (ext4_bg_has_super(sb, grp)) {
3454                        ext4_set_bit(s++, buf);
3455                        count++;
3456                }
3457                j = ext4_bg_num_gdb(sb, grp);
3458                if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
3459                        ext4_error(sb, "Invalid number of block group "
3460                                   "descriptor blocks: %d", j);
3461                        j = EXT4_BLOCKS_PER_GROUP(sb) - s;
3462                }
3463                count += j;
3464                for (; j > 0; j--)
3465                        ext4_set_bit(EXT4_B2C(sbi, s++), buf);
3466        }
3467        if (!count)
3468                return 0;
3469        return EXT4_CLUSTERS_PER_GROUP(sb) -
3470                ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
3471}
3472
3473/*
3474 * Compute the overhead and stash it in sbi->s_overhead
3475 */
3476int ext4_calculate_overhead(struct super_block *sb)
3477{
3478        struct ext4_sb_info *sbi = EXT4_SB(sb);
3479        struct ext4_super_block *es = sbi->s_es;
3480        ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3481        ext4_fsblk_t overhead = 0;
3482        char *buf = (char *) get_zeroed_page(GFP_NOFS);
3483
3484        if (!buf)
3485                return -ENOMEM;
3486
3487        /*
3488         * Compute the overhead (FS structures).  This is constant
3489         * for a given filesystem unless the number of block groups
3490         * changes so we cache the previous value until it does.
3491         */
3492
3493        /*
3494         * All of the blocks before first_data_block are overhead
3495         */
3496        overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
3497
3498        /*
3499         * Add the overhead found in each block group
3500         */
3501        for (i = 0; i < ngroups; i++) {
3502                int blks;
3503
3504                blks = count_overhead(sb, i, buf);
3505                overhead += blks;
3506                if (blks)
3507                        memset(buf, 0, PAGE_SIZE);
3508                cond_resched();
3509        }
3510        /* Add the internal journal blocks as well */
3511        if (sbi->s_journal && !sbi->journal_bdev)
3512                overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
3513
3514        sbi->s_overhead = overhead;
3515        smp_wmb();
3516        free_page((unsigned long) buf);
3517        return 0;
3518}
3519
3520
3521static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb)
3522{
3523        ext4_fsblk_t resv_clusters;
3524
3525        /*
3526         * There's no need to reserve anything when we aren't using extents.
3527         * The space estimates are exact, there are no unwritten extents,
3528         * hole punching doesn't need new metadata... This is needed especially
3529         * to keep ext2/3 backward compatibility.
3530         */
3531        if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
3532                return 0;
3533        /*
3534         * By default we reserve 2% or 4096 clusters, whichever is smaller.
3535         * This should cover the situations where we can not afford to run
3536         * out of space like for example punch hole, or converting
3537         * unwritten extents in delalloc path. In most cases such
3538         * allocation would require 1, or 2 blocks, higher numbers are
3539         * very rare.
3540         */
3541        resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >>
3542                        EXT4_SB(sb)->s_cluster_bits;
3543
3544        do_div(resv_clusters, 50);
3545        resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
3546
3547        return resv_clusters;
3548}
3549
3550
3551static int ext4_reserve_clusters(struct ext4_sb_info *sbi, ext4_fsblk_t count)
3552{
3553        ext4_fsblk_t clusters = ext4_blocks_count(sbi->s_es) >>
3554                                sbi->s_cluster_bits;
3555
3556        if (count >= clusters)
3557                return -EINVAL;
3558
3559        atomic64_set(&sbi->s_resv_clusters, count);
3560        return 0;
3561}
3562
3563static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3564{
3565        struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
3566        char *orig_data = kstrdup(data, GFP_KERNEL);
3567        struct buffer_head *bh;
3568        struct ext4_super_block *es = NULL;
3569        struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
3570        ext4_fsblk_t block;
3571        ext4_fsblk_t sb_block = get_sb_block(&data);
3572        ext4_fsblk_t logical_sb_block;
3573        unsigned long offset = 0;
3574        unsigned long journal_devnum = 0;
3575        unsigned long def_mount_opts;
3576        struct inode *root;
3577        char *cp;
3578        const char *descr;
3579        int ret = -ENOMEM;
3580        int blocksize, clustersize;
3581        unsigned int db_count;
3582        unsigned int i;
3583        int needs_recovery, has_huge_files, has_bigalloc;
3584        __u64 blocks_count;
3585        int err = 0;
3586        unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
3587        ext4_group_t first_not_zeroed;
3588
3589        if ((data && !orig_data) || !sbi)
3590                goto out_free_base;
3591
3592        sbi->s_daxdev = dax_dev;
3593        sbi->s_blockgroup_lock =
3594                kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
3595        if (!sbi->s_blockgroup_lock)
3596                goto out_free_base;
3597
3598        sb->s_fs_info = sbi;
3599        sbi->s_sb = sb;
3600        sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
3601        sbi->s_sb_block = sb_block;
3602        if (sb->s_bdev->bd_part)
3603                sbi->s_sectors_written_start =
3604                        part_stat_read(sb->s_bdev->bd_part, sectors[1]);
3605
3606        /* Cleanup superblock name */
3607        for (cp = sb->s_id; (cp = strchr(cp, '/'));)
3608                *cp = '!';
3609
3610        /* -EINVAL is default */
3611        ret = -EINVAL;
3612        blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
3613        if (!blocksize) {
3614                ext4_msg(sb, KERN_ERR, "unable to set blocksize");
3615                goto out_fail;
3616        }
3617
3618        /*
3619         * The ext4 superblock will not be buffer aligned for other than 1kB
3620         * block sizes.  We need to calculate the offset from buffer start.
3621         */
3622        if (blocksize != EXT4_MIN_BLOCK_SIZE) {
3623                logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
3624                offset = do_div(logical_sb_block, blocksize);
3625        } else {
3626                logical_sb_block = sb_block;
3627        }
3628
3629        if (!(bh = sb_bread(sb, logical_sb_block))) {
3630                ext4_msg(sb, KERN_ERR, "unable to read superblock");
3631                goto out_fail;
3632        }
3633        /*
3634         * Note: s_es must be initialized as soon as possible because
3635         *       some ext4 macro-instructions depend on its value
3636         */
3637        es = (struct ext4_super_block *) (bh->b_data + offset);
3638        sbi->s_es = es;
3639        sb->s_magic = le16_to_cpu(es->s_magic);
3640        if (sb->s_magic != EXT4_SUPER_MAGIC)
3641                goto cantfind_ext4;
3642        sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
3643
3644        /* Warn if metadata_csum and gdt_csum are both set. */
3645        if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3646                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
3647            EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
3648                ext4_warning(sb, "metadata_csum and uninit_bg are "
3649                             "redundant flags; please run fsck.");
3650
3651        /* Check for a known checksum algorithm */
3652        if (!ext4_verify_csum_type(sb, es)) {
3653                ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3654                         "unknown checksum algorithm.");
3655                silent = 1;
3656                goto cantfind_ext4;
3657        }
3658
3659        /* Load the checksum driver */
3660        if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3661                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
3662                sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
3663                if (IS_ERR(sbi->s_chksum_driver)) {
3664                        ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
3665                        ret = PTR_ERR(sbi->s_chksum_driver);
3666                        sbi->s_chksum_driver = NULL;
3667                        goto failed_mount;
3668                }
3669        }
3670
3671        /* Check superblock checksum */
3672        if (!ext4_superblock_csum_verify(sb, es)) {
3673                ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3674                         "invalid superblock checksum.  Run e2fsck?");
3675                silent = 1;
3676                goto cantfind_ext4;
3677        }
3678
3679        /* Precompute checksum seed for all metadata */
3680        if (ext4_has_metadata_csum(sb))
3681                sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
3682                                               sizeof(es->s_uuid));
3683
3684        /* Set defaults before we parse the mount options */
3685        def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
3686        set_opt(sb, INIT_INODE_TABLE);
3687        if (def_mount_opts & EXT4_DEFM_DEBUG)
3688                set_opt(sb, DEBUG);
3689        if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
3690                set_opt(sb, GRPID);
3691        if (def_mount_opts & EXT4_DEFM_UID16)
3692                set_opt(sb, NO_UID32);
3693        /* xattr user namespace & acls are now defaulted on */
3694        set_opt(sb, XATTR_USER);
3695#ifdef CONFIG_EXT4_FS_POSIX_ACL
3696        set_opt(sb, POSIX_ACL);
3697#endif
3698        /* don't forget to enable journal_csum when metadata_csum is enabled. */
3699        if (ext4_has_metadata_csum(sb))
3700                set_opt(sb, JOURNAL_CHECKSUM);
3701
3702        if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
3703                set_opt(sb, JOURNAL_DATA);
3704        else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
3705                set_opt(sb, ORDERED_DATA);
3706        else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
3707                set_opt(sb, WRITEBACK_DATA);
3708
3709        if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
3710                set_opt(sb, ERRORS_PANIC);
3711        else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
3712                set_opt(sb, ERRORS_CONT);
3713        else
3714                set_opt(sb, ERRORS_RO);
3715        if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)
3716                set_opt(sb, BLOCK_VALIDITY);
3717        if (def_mount_opts & EXT4_DEFM_DISCARD)
3718                set_opt(sb, DISCARD);
3719
3720        sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
3721        sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
3722        sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
3723        sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
3724        sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
3725
3726        if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
3727                set_opt(sb, BARRIER);
3728
3729        /*
3730         * enable delayed allocation by default
3731         * Use -o nodelalloc to turn it off
3732         */
3733        if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
3734            ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
3735                set_opt(sb, DELALLOC);
3736
3737        /*
3738         * set default s_li_wait_mult for lazyinit, for the case there is
3739         * no mount option specified.
3740         */
3741        sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
3742
3743        if (sbi->s_es->s_mount_opts[0]) {
3744                char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
3745                                              sizeof(sbi->s_es->s_mount_opts),
3746                                              GFP_KERNEL);
3747                if (!s_mount_opts)
3748                        goto failed_mount;
3749                if (!parse_options(s_mount_opts, sb, &journal_devnum,
3750                                   &journal_ioprio, 0)) {
3751                        ext4_msg(sb, KERN_WARNING,
3752                                 "failed to parse options in superblock: %s",
3753                                 s_mount_opts);
3754                }
3755                kfree(s_mount_opts);
3756        }
3757        sbi->s_def_mount_opt = sbi->s_mount_opt;
3758        if (!parse_options((char *) data, sb, &journal_devnum,
3759                           &journal_ioprio, 0))
3760                goto failed_mount;
3761
3762        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
3763                printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
3764                            "with data=journal disables delayed "
3765                            "allocation and O_DIRECT support!\n");
3766                if (test_opt2(sb, EXPLICIT_DELALLOC)) {
3767                        ext4_msg(sb, KERN_ERR, "can't mount with "
3768                                 "both data=journal and delalloc");
3769                        goto failed_mount;
3770                }
3771                if (test_opt(sb, DIOREAD_NOLOCK)) {
3772                        ext4_msg(sb, KERN_ERR, "can't mount with "
3773                                 "both data=journal and dioread_nolock");
3774                        goto failed_mount;
3775                }
3776                if (test_opt(sb, DAX)) {
3777                        ext4_msg(sb, KERN_ERR, "can't mount with "
3778                                 "both data=journal and dax");
3779                        goto failed_mount;
3780                }
3781                if (test_opt(sb, DELALLOC))
3782                        clear_opt(sb, DELALLOC);
3783        }
3784
3785        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3786                (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
3787
3788        if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
3789            (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
3790             EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
3791             EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
3792                ext4_msg(sb, KERN_WARNING,
3793                       "feature flags set on rev 0 fs, "
3794                       "running e2fsck is recommended");
3795
3796        if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
3797                set_opt2(sb, HURD_COMPAT);
3798                if (EXT4_HAS_INCOMPAT_FEATURE(sb,
3799                                              EXT4_FEATURE_INCOMPAT_64BIT)) {
3800                        ext4_msg(sb, KERN_ERR,
3801                                 "The Hurd can't support 64-bit file systems");
3802                        goto failed_mount;
3803                }
3804        }
3805
3806        if (IS_EXT2_SB(sb)) {
3807                if (ext2_feature_set_ok(sb))
3808                        ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
3809                                 "using the ext4 subsystem");
3810                else {
3811                        ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
3812                                 "to feature incompatibilities");
3813                        goto failed_mount;
3814                }
3815        }
3816
3817        if (IS_EXT3_SB(sb)) {
3818                if (ext3_feature_set_ok(sb))
3819                        ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
3820                                 "using the ext4 subsystem");
3821                else {
3822                        ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
3823                                 "to feature incompatibilities");
3824                        goto failed_mount;
3825                }
3826        }
3827
3828        /*
3829         * Check feature flags regardless of the revision level, since we
3830         * previously didn't change the revision level when setting the flags,
3831         * so there is a chance incompat flags are set on a rev 0 filesystem.
3832         */
3833        if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
3834                goto failed_mount;
3835
3836        blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
3837        if (blocksize < EXT4_MIN_BLOCK_SIZE ||
3838            blocksize > EXT4_MAX_BLOCK_SIZE) {
3839                ext4_msg(sb, KERN_ERR,
3840                       "Unsupported filesystem blocksize %d (%d log_block_size)",
3841                         blocksize, le32_to_cpu(es->s_log_block_size));
3842                goto failed_mount;
3843        }
3844        if (le32_to_cpu(es->s_log_block_size) >
3845            (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
3846                ext4_msg(sb, KERN_ERR,
3847                         "Invalid log block size: %u",
3848                         le32_to_cpu(es->s_log_block_size));
3849                goto failed_mount;
3850        }
3851        if (le32_to_cpu(es->s_log_cluster_size) >
3852            (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
3853                ext4_msg(sb, KERN_ERR,
3854                         "Invalid log cluster size: %u",
3855                         le32_to_cpu(es->s_log_cluster_size));
3856                goto failed_mount;
3857        }
3858
3859        if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
3860                ext4_msg(sb, KERN_ERR,
3861                         "Number of reserved GDT blocks insanely large: %d",
3862                         le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
3863                goto failed_mount;
3864        }
3865
3866        if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
3867                static bool printed = false;
3868
3869                if (EXT4_HAS_INCOMPAT_FEATURE(sb,
3870                                        EXT4_FEATURE_INCOMPAT_INLINE_DATA)) {
3871                        ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
3872                                        " that may contain inline data");
3873                        sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
3874                }
3875                if (!bdev_dax_supported(sb->s_bdev, blocksize)) {
3876                        ext4_msg(sb, KERN_ERR,
3877                                "DAX unsupported by block device. Turning off DAX.");
3878                        sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
3879                }
3880                if (!printed) {
3881                        mark_tech_preview("ext4 direct access (dax)", NULL);
3882                        printed = true;
3883                }
3884        }
3885
3886        if (sb->s_blocksize != blocksize) {
3887                /* Validate the filesystem blocksize */
3888                if (!sb_set_blocksize(sb, blocksize)) {
3889                        ext4_msg(sb, KERN_ERR, "bad block size %d",
3890                                        blocksize);
3891                        goto failed_mount;
3892                }
3893
3894                brelse(bh);
3895                logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
3896                offset = do_div(logical_sb_block, blocksize);
3897                bh = sb_bread(sb, logical_sb_block);
3898                if (!bh) {
3899                        ext4_msg(sb, KERN_ERR,
3900                               "Can't read superblock on 2nd try");
3901                        goto failed_mount;
3902                }
3903                es = (struct ext4_super_block *)(bh->b_data + offset);
3904                sbi->s_es = es;
3905                if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
3906                        ext4_msg(sb, KERN_ERR,
3907                               "Magic mismatch, very weird!");
3908                        goto failed_mount;
3909                }
3910        }
3911
3912        has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3913                                EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
3914        sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
3915                                                      has_huge_files);
3916        sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
3917
3918        if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
3919                sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
3920                sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
3921        } else {
3922                sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
3923                sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
3924                if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
3925                        ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
3926                                 sbi->s_first_ino);
3927                        goto failed_mount;
3928                }
3929                if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
3930                    (!is_power_of_2(sbi->s_inode_size)) ||
3931                    (sbi->s_inode_size > blocksize)) {
3932                        ext4_msg(sb, KERN_ERR,
3933                               "unsupported inode size: %d",
3934                               sbi->s_inode_size);
3935                        goto failed_mount;
3936                }
3937                if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
3938                        sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
3939        }
3940
3941        sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
3942        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
3943                if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
3944                    sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
3945                    !is_power_of_2(sbi->s_desc_size)) {
3946                        ext4_msg(sb, KERN_ERR,
3947                               "unsupported descriptor size %lu",
3948                               sbi->s_desc_size);
3949                        goto failed_mount;
3950                }
3951        } else
3952                sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
3953
3954        sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
3955        sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
3956
3957        sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
3958        if (sbi->s_inodes_per_block == 0)
3959                goto cantfind_ext4;
3960        if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
3961            sbi->s_inodes_per_group > blocksize * 8) {
3962                ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
3963                         sbi->s_blocks_per_group);
3964                goto failed_mount;
3965        }
3966        sbi->s_itb_per_group = sbi->s_inodes_per_group /
3967                                        sbi->s_inodes_per_block;
3968        sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
3969        sbi->s_sbh = bh;
3970        sbi->s_mount_state = le16_to_cpu(es->s_state);
3971        sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
3972        sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
3973
3974        for (i = 0; i < 4; i++)
3975                sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
3976        sbi->s_def_hash_version = es->s_def_hash_version;
3977        if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) {
3978                i = le32_to_cpu(es->s_flags);
3979                if (i & EXT2_FLAGS_UNSIGNED_HASH)
3980                        sbi->s_hash_unsigned = 3;
3981                else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
3982#ifdef __CHAR_UNSIGNED__
3983                        if (!(sb->s_flags & MS_RDONLY))
3984                                es->s_flags |=
3985                                        cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
3986                        sbi->s_hash_unsigned = 3;
3987#else
3988                        if (!(sb->s_flags & MS_RDONLY))
3989                                es->s_flags |=
3990                                        cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
3991#endif
3992                }
3993        }
3994
3995        /* Handle clustersize */
3996        clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
3997        has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3998                                EXT4_FEATURE_RO_COMPAT_BIGALLOC);
3999        if (has_bigalloc) {
4000                if (clustersize < blocksize) {

4001                        ext4_msg(sb, KERN_ERR,
4002                                 "cluster size (%d) smaller than "
4003                                 "block size (%d)", clustersize, blocksize);
4004                        goto failed_mount;
4005                }
4006                sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
4007                        le32_to_cpu(es->s_log_block_size);
4008                sbi->s_clusters_per_group =
4009                        le32_to_cpu(es->s_clusters_per_group);
4010                if (sbi->s_clusters_per_group > blocksize * 8) {
4011                        ext4_msg(sb, KERN_ERR,
4012                                 "#clusters per group too big: %lu",
4013                                 sbi->s_clusters_per_group);
4014                        goto failed_mount;
4015                }
4016                if (sbi->s_blocks_per_group !=
4017                    (sbi->s_clusters_per_group * (clustersize / blocksize))) {
4018                        ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
4019                                 "clusters per group (%lu) inconsistent",
4020                                 sbi->s_blocks_per_group,
4021                                 sbi->s_clusters_per_group);
4022                        goto failed_mount;
4023                }
4024        } else {
4025                if (clustersize != blocksize) {
4026                        ext4_msg(sb, KERN_ERR,
4027                                 "fragment/cluster size (%d) != "
4028                                 "block size (%d)", clustersize, blocksize);
4029                        goto failed_mount;
4030                }
4031                if (sbi->s_blocks_per_group > blocksize * 8) {
4032                        ext4_msg(sb, KERN_ERR,
4033                                 "#blocks per group too big: %lu",
4034                                 sbi->s_blocks_per_group);
4035                        goto failed_mount;
4036                }
4037                sbi->s_clusters_per_group = sbi->s_blocks_per_group;
4038                sbi->s_cluster_bits = 0;
4039        }
4040        sbi->s_cluster_ratio = clustersize / blocksize;
4041
4042        /* Do we have standard group size of clustersize * 8 blocks ? */
4043        if (sbi->s_blocks_per_group == clustersize << 3)
4044                set_opt2(sb, STD_GROUP_SIZE);
4045
4046        /*
4047         * Test whether we have more sectors than will fit in sector_t,
4048         * and whether the max offset is addressable by the page cache.
4049         */
4050        err = generic_check_addressable(sb->s_blocksize_bits,
4051                                        ext4_blocks_count(es));
4052        if (err) {
4053                ext4_msg(sb, KERN_ERR, "filesystem"
4054                         " too large to mount safely on this system");
4055                if (sizeof(sector_t) < 8)
4056                        ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
4057                goto failed_mount;
4058        }
4059
4060        if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
4061                goto cantfind_ext4;
4062
4063        /* check blocks count against device size */
4064        blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
4065        if (blocks_count && ext4_blocks_count(es) > blocks_count) {
4066                ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
4067                       "exceeds size of device (%llu blocks)",
4068                       ext4_blocks_count(es), blocks_count);
4069                goto failed_mount;
4070        }
4071
4072        /*
4073         * It makes no sense for the first data block to be beyond the end
4074         * of the filesystem.
4075         */
4076        if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
4077                ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4078                         "block %u is beyond end of filesystem (%llu)",
4079                         le32_to_cpu(es->s_first_data_block),
4080                         ext4_blocks_count(es));
4081                goto failed_mount;
4082        }
4083        if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
4084            (sbi->s_cluster_ratio == 1)) {
4085                ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4086                         "block is 0 with a 1k block and cluster size");
4087                goto failed_mount;
4088        }
4089
4090        blocks_count = (ext4_blocks_count(es) -
4091                        le32_to_cpu(es->s_first_data_block) +
4092                        EXT4_BLOCKS_PER_GROUP(sb) - 1);
4093        do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
4094        if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
4095                ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
4096                       "(block count %llu, first data block %u, "
4097                       "blocks per group %lu)", sbi->s_groups_count,
4098                       ext4_blocks_count(es),
4099                       le32_to_cpu(es->s_first_data_block),
4100                       EXT4_BLOCKS_PER_GROUP(sb));
4101                goto failed_mount;
4102        }
4103        sbi->s_groups_count = blocks_count;
4104        sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
4105                        (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
4106        if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
4107            le32_to_cpu(es->s_inodes_count)) {
4108                ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
4109                         le32_to_cpu(es->s_inodes_count),
4110                         ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
4111                ret = -EINVAL;
4112                goto failed_mount;
4113        }
4114        db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
4115                   EXT4_DESC_PER_BLOCK(sb);
4116        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG)) {
4117                if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
4118                        ext4_msg(sb, KERN_WARNING,
4119                                 "first meta block group too large: %u "
4120                                 "(group descriptor block count %u)",
4121                                 le32_to_cpu(es->s_first_meta_bg), db_count);
4122                        goto failed_mount;
4123                }
4124        }
4125        sbi->s_group_desc = kvmalloc(db_count *
4126                                          sizeof(struct buffer_head *),
4127                                          GFP_KERNEL);
4128        if (sbi->s_group_desc == NULL) {
4129                ext4_msg(sb, KERN_ERR, "not enough memory");
4130                ret = -ENOMEM;
4131                goto failed_mount;
4132        }
4133
4134        if (ext4_proc_root)
4135                sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
4136
4137        if (sbi->s_proc)
4138                proc_create_data("options", S_IRUGO, sbi->s_proc,
4139                                 &ext4_seq_options_fops, sb);
4140
4141        bgl_lock_init(sbi->s_blockgroup_lock);
4142
4143        for (i = 0; i < db_count; i++) {
4144                block = descriptor_loc(sb, logical_sb_block, i);
4145                sbi->s_group_desc[i] = sb_bread(sb, block);
4146                if (!sbi->s_group_desc[i]) {
4147                        ext4_msg(sb, KERN_ERR,
4148                               "can't read group descriptor %d", i);
4149                        db_count = i;
4150                        goto failed_mount2;
4151                }
4152        }
4153        sbi->s_gdb_count = db_count;
4154        if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
4155                ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
4156                goto failed_mount2;
4157        }
4158
4159        get_random_bytes(&sbi->s_next_generation, sizeof(u32));
4160        spin_lock_init(&sbi->s_next_gen_lock);
4161
4162        init_timer(&sbi->s_err_report);
4163        sbi->s_err_report.function = print_daily_error_info;
4164        sbi->s_err_report.data = (unsigned long) sb;
4165
4166        /* Register extent status tree shrinker */
4167        ext4_es_register_shrinker(sbi);
4168
4169        err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0, GFP_KERNEL);
4170        if (err) {
4171                ext4_msg(sb, KERN_ERR, "insufficient memory");
4172                goto failed_mount3;
4173        }
4174
4175        sbi->s_stripe = ext4_get_stripe_size(sbi);
4176        sbi->s_extent_max_zeroout_kb = 32;
4177
4178        /*
4179         * set up enough so that it can read an inode
4180         */
4181        if (!test_opt(sb, NOLOAD) &&
4182            EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
4183                sb->s_op = &ext4_sops;
4184        else
4185                sb->s_op = &ext4_nojournal_sops;
4186        sb->s_export_op = &ext4_export_ops;
4187        sb->s_xattr = ext4_xattr_handlers;
4188#ifdef CONFIG_QUOTA
4189        sb->dq_op = &ext4_quota_operations;
4190        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
4191                sb->s_qcop = &ext4_qctl_sysfile_operations;
4192        else
4193                sb->s_qcop = &ext4_qctl_operations;
4194#endif
4195        memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
4196
4197        INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
4198        mutex_init(&sbi->s_orphan_lock);
4199
4200        sb->s_root = NULL;
4201
4202        needs_recovery = (es->s_last_orphan != 0 ||
4203                          EXT4_HAS_INCOMPAT_FEATURE(sb,
4204                                    EXT4_FEATURE_INCOMPAT_RECOVER));
4205
4206        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) &&
4207            !(sb->s_flags & MS_RDONLY))
4208                if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
4209                        goto failed_mount3;
4210
4211        /*
4212         * The first inode we look at is the journal inode.  Don't try
4213         * root first: it may be modified in the journal!
4214         */
4215        if (!test_opt(sb, NOLOAD) &&
4216            EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
4217                err = ext4_load_journal(sb, es, journal_devnum);
4218                if (err)
4219                        goto failed_mount3;
4220        } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
4221              EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
4222                ext4_msg(sb, KERN_ERR, "required journal recovery "
4223                       "suppressed and not mounted read-only");
4224                goto failed_mount_wq;
4225        } else {
4226                clear_opt(sb, DATA_FLAGS);
4227                sbi->s_journal = NULL;
4228                needs_recovery = 0;
4229                goto no_journal;
4230        }
4231
4232        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT) &&
4233            !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4234                                       JBD2_FEATURE_INCOMPAT_64BIT)) {
4235                ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
4236                goto failed_mount_wq;
4237        }
4238
4239        if (!set_journal_csum_feature_set(sb)) {
4240                ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
4241                         "feature set");
4242                goto failed_mount_wq;
4243        }
4244
4245        /* We have now updated the journal if required, so we can
4246         * validate the data journaling mode. */
4247        switch (test_opt(sb, DATA_FLAGS)) {
4248        case 0:
4249                /* No mode set, assume a default based on the journal
4250                 * capabilities: ORDERED_DATA if the journal can
4251                 * cope, else JOURNAL_DATA
4252                 */
4253                if (jbd2_journal_check_available_features
4254                    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
4255                        set_opt(sb, ORDERED_DATA);
4256                else
4257                        set_opt(sb, JOURNAL_DATA);
4258                break;
4259
4260        case EXT4_MOUNT_ORDERED_DATA:
4261        case EXT4_MOUNT_WRITEBACK_DATA:
4262                if (!jbd2_journal_check_available_features
4263                    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
4264                        ext4_msg(sb, KERN_ERR, "Journal does not support "
4265                               "requested data journaling mode");
4266                        goto failed_mount_wq;
4267                }
4268        default:
4269                break;
4270        }
4271        set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
4272
4273        sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
4274
4275no_journal:
4276        /*
4277         * Get the # of file system overhead blocks from the
4278         * superblock if present.
4279         */
4280        if (es->s_overhead_clusters)
4281                sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
4282        else {
4283                err = ext4_calculate_overhead(sb);
4284                if (err)
4285                        goto failed_mount_wq;
4286        }
4287
4288        /*
4289         * The maximum number of concurrent works can be high and
4290         * concurrency isn't really necessary.  Limit it to 1.
4291         */
4292        EXT4_SB(sb)->rsv_conversion_wq =
4293                alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
4294        if (!EXT4_SB(sb)->rsv_conversion_wq) {
4295                printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
4296                ret = -ENOMEM;
4297                goto failed_mount4;
4298        }
4299
4300        /*
4301         * The jbd2_journal_load will have done any necessary log recovery,
4302         * so we can safely mount the rest of the filesystem now.
4303         */
4304
4305        root = ext4_iget(sb, EXT4_ROOT_INO);
4306        if (IS_ERR(root)) {
4307                ext4_msg(sb, KERN_ERR, "get root inode failed");
4308                ret = PTR_ERR(root);
4309                root = NULL;
4310                goto failed_mount4;
4311        }
4312        if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
4313                ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
4314                iput(root);
4315                goto failed_mount4;
4316        }
4317        sb->s_root = d_make_root(root);
4318        if (!sb->s_root) {
4319                ext4_msg(sb, KERN_ERR, "get root dentry failed");
4320                ret = -ENOMEM;
4321                goto failed_mount4;
4322        }
4323
4324        if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY))
4325                sb->s_flags |= MS_RDONLY;
4326
4327        /* determine the minimum size of new large inodes, if present */
4328        if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
4329                sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
4330                                                     EXT4_GOOD_OLD_INODE_SIZE;
4331                if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
4332                                       EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
4333                        if (sbi->s_want_extra_isize <
4334                            le16_to_cpu(es->s_want_extra_isize))
4335                                sbi->s_want_extra_isize =
4336                                        le16_to_cpu(es->s_want_extra_isize);
4337                        if (sbi->s_want_extra_isize <
4338                            le16_to_cpu(es->s_min_extra_isize))
4339                                sbi->s_want_extra_isize =
4340                                        le16_to_cpu(es->s_min_extra_isize);
4341                }
4342        }
4343        /* Check if enough inode space is available */
4344        if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
4345                                                        sbi->s_inode_size) {
4346                sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
4347                                                       EXT4_GOOD_OLD_INODE_SIZE;
4348                ext4_msg(sb, KERN_INFO, "required extra inode space not"
4349                         "available");
4350        }
4351
4352        err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb));
4353        if (err) {
4354                ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
4355                         "reserved pool", ext4_calculate_resv_clusters(sb));
4356                goto failed_mount4a;
4357        }
4358
4359        err = ext4_setup_system_zone(sb);
4360        if (err) {
4361                ext4_msg(sb, KERN_ERR, "failed to initialize system "
4362                         "zone (%d)", err);
4363                goto failed_mount4a;
4364        }
4365
4366        ext4_ext_init(sb);
4367        err = ext4_mb_init(sb);
4368        if (err) {
4369                ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
4370                         err);
4371                goto failed_mount5;
4372        }
4373
4374        block = ext4_count_free_clusters(sb);
4375        ext4_free_blocks_count_set(sbi->s_es, 
4376                                   EXT4_C2B(sbi, block));
4377        ext4_superblock_csum_set(sb);
4378        err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
4379                                  GFP_KERNEL);
4380        if (!err) {
4381                unsigned long freei = ext4_count_free_inodes(sb);
4382                sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
4383                ext4_superblock_csum_set(sb);
4384                err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
4385                                          GFP_KERNEL);
4386        }
4387        if (!err)
4388                err = percpu_counter_init(&sbi->s_dirs_counter,
4389                                          ext4_count_dirs(sb), GFP_KERNEL);
4390        if (!err)
4391                err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
4392                                          GFP_KERNEL);
4393        if (err) {
4394                ext4_msg(sb, KERN_ERR, "insufficient memory");
4395                goto failed_mount6;
4396        }
4397
4398        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
4399                if (!ext4_fill_flex_info(sb)) {
4400                        ext4_msg(sb, KERN_ERR,
4401                               "unable to initialize "
4402                               "flex_bg meta info!");
4403                        goto failed_mount6;
4404                }
4405
4406        err = ext4_register_li_request(sb, first_not_zeroed);
4407        if (err)
4408                goto failed_mount6;
4409
4410        sbi->s_kobj.kset = ext4_kset;
4411        init_completion(&sbi->s_kobj_unregister);
4412        err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
4413                                   "%s", sb->s_id);
4414        if (err)
4415                goto failed_mount7;
4416
4417#ifdef CONFIG_QUOTA
4418        /* Enable quota usage during mount. */
4419        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
4420            !(sb->s_flags & MS_RDONLY)) {
4421                err = ext4_enable_quotas(sb);
4422                if (err)
4423                        goto failed_mount8;
4424        }
4425#endif  /* CONFIG_QUOTA */
4426
4427        EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
4428        ext4_orphan_cleanup(sb, es);
4429        EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
4430        if (needs_recovery) {
4431                ext4_msg(sb, KERN_INFO, "recovery complete");
4432                ext4_mark_recovery_complete(sb, es);
4433        }
4434        if (EXT4_SB(sb)->s_journal) {
4435                if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
4436                        descr = " journalled data mode";
4437                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
4438                        descr = " ordered data mode";
4439                else
4440                        descr = " writeback data mode";
4441        } else
4442                descr = "out journal";
4443
4444        if (test_opt(sb, DISCARD)) {
4445                struct request_queue *q = bdev_get_queue(sb->s_bdev);
4446                if (!blk_queue_discard(q))
4447                        ext4_msg(sb, KERN_WARNING,
4448                                 "mounting with \"discard\" option, but "
4449                                 "the device does not support discard");
4450        }
4451
4452        ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
4453                 "Opts: %.*s%s%s", descr,
4454                 (int) sizeof(sbi->s_es->s_mount_opts),
4455                 sbi->s_es->s_mount_opts,
4456                 *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
4457
4458        if (es->s_error_count)
4459                mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
4460
4461        /* Enable message ratelimiting. Default is 10 messages per 5 secs. */
4462        ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
4463        ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
4464        ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
4465
4466        kfree(orig_data);
4467        return 0;
4468
4469cantfind_ext4:
4470        if (!silent)
4471                ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
4472        goto failed_mount;
4473
4474#ifdef CONFIG_QUOTA
4475failed_mount8:
4476        kobject_del(&sbi->s_kobj);
4477#endif
4478failed_mount7:
4479        ext4_unregister_li_request(sb);
4480failed_mount6:
4481        ext4_mb_release(sb);
4482        if (sbi->s_flex_groups)
4483                ext4_kvfree(sbi->s_flex_groups);
4484        percpu_counter_destroy(&sbi->s_freeclusters_counter);
4485        percpu_counter_destroy(&sbi->s_freeinodes_counter);
4486        percpu_counter_destroy(&sbi->s_dirs_counter);
4487        percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
4488failed_mount5:
4489        ext4_ext_release(sb);
4490        ext4_release_system_zone(sb);
4491failed_mount4a:
4492        dput(sb->s_root);
4493        sb->s_root = NULL;
4494failed_mount4:
4495        ext4_msg(sb, KERN_ERR, "mount failed");
4496        if (EXT4_SB(sb)->rsv_conversion_wq)
4497                destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
4498failed_mount_wq:
4499        if (sbi->s_journal) {
4500                jbd2_journal_destroy(sbi->s_journal);
4501                sbi->s_journal = NULL;
4502        }
4503failed_mount3:
4504        ext4_es_unregister_shrinker(sbi);
4505        del_timer_sync(&sbi->s_err_report);
4506        percpu_counter_destroy(&sbi->s_extent_cache_cnt);
4507        if (sbi->s_mmp_tsk)
4508                kthread_stop(sbi->s_mmp_tsk);
4509failed_mount2:
4510        for (i = 0; i < db_count; i++)
4511                brelse(sbi->s_group_desc[i]);
4512        ext4_kvfree(sbi->s_group_desc);
4513failed_mount:
4514        if (sbi->s_chksum_driver)
4515                crypto_free_shash(sbi->s_chksum_driver);
4516        if (sbi->s_proc) {
4517                remove_proc_entry("options", sbi->s_proc);
4518                remove_proc_entry(sb->s_id, ext4_proc_root);
4519        }
4520#ifdef CONFIG_QUOTA
4521        for (i = 0; i < MAXQUOTAS; i++)
4522                kfree(sbi->s_qf_names[i]);
4523#endif
4524        ext4_blkdev_remove(sbi);
4525        brelse(bh);
4526out_fail:
4527        sb->s_fs_info = NULL;
4528        kfree(sbi->s_blockgroup_lock);
4529out_free_base:
4530        kfree(sbi);
4531        kfree(orig_data);
4532        fs_put_dax(dax_dev);
4533        return err ? err : ret;
4534}
4535
4536/*
4537 * Setup any per-fs journal parameters now.  We'll do this both on
4538 * initial mount, once the journal has been initialised but before we've
4539 * done any recovery; and again on any subsequent remount.
4540 */
4541static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
4542{
4543        struct ext4_sb_info *sbi = EXT4_SB(sb);
4544
4545        journal->j_commit_interval = sbi->s_commit_interval;
4546        journal->j_min_batch_time = sbi->s_min_batch_time;
4547        journal->j_max_batch_time = sbi->s_max_batch_time;
4548
4549        write_lock(&journal->j_state_lock);
4550        if (test_opt(sb, BARRIER))
4551                journal->j_flags |= JBD2_BARRIER;
4552        else
4553                journal->j_flags &= ~JBD2_BARRIER;
4554        if (test_opt(sb, DATA_ERR_ABORT))
4555                journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
4556        else
4557                journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
4558        write_unlock(&journal->j_state_lock);
4559}
4560
4561static journal_t *ext4_get_journal(struct super_block *sb,
4562                                   unsigned int journal_inum)
4563{
4564        struct inode *journal_inode;
4565        journal_t *journal;
4566
4567        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4568
4569        /* First, test for the existence of a valid inode on disk.  Bad
4570         * things happen if we iget() an unused inode, as the subsequent
4571         * iput() will try to delete it. */
4572
4573        journal_inode = ext4_iget(sb, journal_inum);
4574        if (IS_ERR(journal_inode)) {
4575                ext4_msg(sb, KERN_ERR, "no journal found");
4576                return NULL;
4577        }
4578        if (!journal_inode->i_nlink) {
4579                make_bad_inode(journal_inode);
4580                iput(journal_inode);
4581                ext4_msg(sb, KERN_ERR, "journal inode is deleted");
4582                return NULL;
4583        }
4584
4585        jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
4586                  journal_inode, journal_inode->i_size);
4587        if (!S_ISREG(journal_inode->i_mode)) {
4588                ext4_msg(sb, KERN_ERR, "invalid journal inode");
4589                iput(journal_inode);
4590                return NULL;
4591        }
4592
4593        journal = jbd2_journal_init_inode(journal_inode);
4594        if (!journal) {
4595                ext4_msg(sb, KERN_ERR, "Could not load journal inode");
4596                iput(journal_inode);
4597                return NULL;
4598        }
4599        journal->j_private = sb;
4600        ext4_init_journal_params(sb, journal);
4601        return journal;
4602}
4603
4604static journal_t *ext4_get_dev_journal(struct super_block *sb,
4605                                       dev_t j_dev)
4606{
4607        struct buffer_head *bh;
4608        journal_t *journal;
4609        ext4_fsblk_t start;
4610        ext4_fsblk_t len;
4611        int hblock, blocksize;
4612        ext4_fsblk_t sb_block;
4613        unsigned long offset;
4614        struct ext4_super_block *es;
4615        struct block_device *bdev;
4616
4617        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4618
4619        bdev = ext4_blkdev_get(j_dev, sb);
4620        if (bdev == NULL)
4621                return NULL;
4622
4623        blocksize = sb->s_blocksize;
4624        hblock = bdev_logical_block_size(bdev);
4625        if (blocksize < hblock) {
4626                ext4_msg(sb, KERN_ERR,
4627                        "blocksize too small for journal device");
4628                goto out_bdev;
4629        }
4630
4631        sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
4632        offset = EXT4_MIN_BLOCK_SIZE % blocksize;
4633        set_blocksize(bdev, blocksize);
4634        if (!(bh = __bread(bdev, sb_block, blocksize))) {
4635                ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
4636                       "external journal");
4637                goto out_bdev;
4638        }
4639
4640        es = (struct ext4_super_block *) (bh->b_data + offset);
4641        if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
4642            !(le32_to_cpu(es->s_feature_incompat) &
4643              EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
4644                ext4_msg(sb, KERN_ERR, "external journal has "
4645                                        "bad superblock");
4646                brelse(bh);
4647                goto out_bdev;
4648        }
4649
4650        if ((le32_to_cpu(es->s_feature_ro_compat) &
4651             EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
4652            es->s_checksum != ext4_superblock_csum(sb, es)) {
4653                ext4_msg(sb, KERN_ERR, "external journal has "
4654                                       "corrupt superblock");
4655                brelse(bh);
4656                goto out_bdev;
4657        }
4658
4659        if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
4660                ext4_msg(sb, KERN_ERR, "journal UUID does not match");
4661                brelse(bh);
4662                goto out_bdev;
4663        }
4664
4665        len = ext4_blocks_count(es);
4666        start = sb_block + 1;
4667        brelse(bh);     /* we're done with the superblock */
4668
4669        journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
4670                                        start, len, blocksize);
4671        if (!journal) {
4672                ext4_msg(sb, KERN_ERR, "failed to create device journal");
4673                goto out_bdev;
4674        }
4675        journal->j_private = sb;
4676        ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer);
4677        wait_on_buffer(journal->j_sb_buffer);
4678        if (!buffer_uptodate(journal->j_sb_buffer)) {
4679                ext4_msg(sb, KERN_ERR, "I/O error on journal device");
4680                goto out_journal;
4681        }
4682        if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
4683                ext4_msg(sb, KERN_ERR, "External journal has more than one "
4684                                        "user (unsupported) - %d",
4685                        be32_to_cpu(journal->j_superblock->s_nr_users));
4686                goto out_journal;
4687        }
4688        EXT4_SB(sb)->journal_bdev = bdev;
4689        ext4_init_journal_params(sb, journal);
4690        return journal;
4691
4692out_journal:
4693        jbd2_journal_destroy(journal);
4694out_bdev:
4695        ext4_blkdev_put(bdev);
4696        return NULL;
4697}
4698
4699static int ext4_load_journal(struct super_block *sb,
4700                             struct ext4_super_block *es,
4701                             unsigned long journal_devnum)
4702{
4703        journal_t *journal;
4704        unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
4705        dev_t journal_dev;
4706        int err = 0;
4707        int really_read_only;
4708
4709        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4710
4711        if (journal_devnum &&
4712            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
4713                ext4_msg(sb, KERN_INFO, "external journal device major/minor "
4714                        "numbers have changed");
4715                journal_dev = new_decode_dev(journal_devnum);
4716        } else
4717                journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
4718
4719        really_read_only = bdev_read_only(sb->s_bdev);
4720
4721        /*
4722         * Are we loading a blank journal or performing recovery after a
4723         * crash?  For recovery, we need to check in advance whether we
4724         * can get read-write access to the device.
4725         */
4726        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
4727                if (sb->s_flags & MS_RDONLY) {
4728                        ext4_msg(sb, KERN_INFO, "INFO: recovery "
4729                                        "required on readonly filesystem");
4730                        if (really_read_only) {
4731                                ext4_msg(sb, KERN_ERR, "write access "
4732                                        "unavailable, cannot proceed");
4733                                return -EROFS;
4734                        }
4735                        ext4_msg(sb, KERN_INFO, "write access will "
4736                               "be enabled during recovery");
4737                }
4738        }
4739
4740        if (journal_inum && journal_dev) {
4741                ext4_msg(sb, KERN_ERR, "filesystem has both journal "
4742                       "and inode journals!");
4743                return -EINVAL;
4744        }
4745
4746        if (journal_inum) {
4747                if (!(journal = ext4_get_journal(sb, journal_inum)))
4748                        return -EINVAL;
4749        } else {
4750                if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
4751                        return -EINVAL;
4752        }
4753
4754        if (!(journal->j_flags & JBD2_BARRIER))
4755                ext4_msg(sb, KERN_INFO, "barriers disabled");
4756
4757        if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
4758                err = jbd2_journal_wipe(journal, !really_read_only);
4759        if (!err) {
4760                char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
4761                if (save)
4762                        memcpy(save, ((char *) es) +
4763                               EXT4_S_ERR_START, EXT4_S_ERR_LEN);
4764                err = jbd2_journal_load(journal);
4765                if (save)
4766                        memcpy(((char *) es) + EXT4_S_ERR_START,
4767                               save, EXT4_S_ERR_LEN);
4768                kfree(save);
4769        }
4770
4771        if (err) {
4772                ext4_msg(sb, KERN_ERR, "error loading journal");
4773                jbd2_journal_destroy(journal);
4774                return err;
4775        }
4776
4777        EXT4_SB(sb)->s_journal = journal;
4778        ext4_clear_journal_err(sb, es);
4779
4780        if (!really_read_only && journal_devnum &&
4781            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
4782                es->s_journal_dev = cpu_to_le32(journal_devnum);
4783
4784                /* Make sure we flush the recovery flag to disk. */
4785                ext4_commit_super(sb, 1);
4786        }
4787
4788        return 0;
4789}
4790
4791static int ext4_commit_super(struct super_block *sb, int sync)
4792{
4793        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
4794        struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
4795        int error = 0;
4796
4797        if (!sbh || block_device_ejected(sb))
4798                return error;
4799
4800        /*
4801         * The superblock bh should be mapped, but it might not be if the
4802         * device was hot-removed. Not much we can do but fail the I/O.
4803         */
4804        if (!buffer_mapped(sbh))
4805                return error;
4806
4807        /*
4808         * If the file system is mounted read-only, don't update the
4809         * superblock write time.  This avoids updating the superblock
4810         * write time when we are mounting the root file system
4811         * read/only but we need to replay the journal; at that point,
4812         * for people who are east of GMT and who make their clock
4813         * tick in localtime for Windows bug-for-bug compatibility,
4814         * the clock is set in the future, and this will cause e2fsck
4815         * to complain and force a full file system check.
4816         */
4817        if (!(sb->s_flags & MS_RDONLY))
4818                es->s_wtime = cpu_to_le32(get_seconds());
4819        if (sb->s_bdev->bd_part)
4820                es->s_kbytes_written =
4821                        cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
4822                            ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
4823                              EXT4_SB(sb)->s_sectors_written_start) >> 1));
4824        else
4825                es->s_kbytes_written =
4826                        cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
4827        if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter))
4828                ext4_free_blocks_count_set(es,
4829                        EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
4830                                &EXT4_SB(sb)->s_freeclusters_counter)));
4831        if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
4832                es->s_free_inodes_count =
4833                        cpu_to_le32(percpu_counter_sum_positive(
4834                                &EXT4_SB(sb)->s_freeinodes_counter));
4835        BUFFER_TRACE(sbh, "marking dirty");
4836        ext4_superblock_csum_set(sb);
4837        if (sync)
4838                lock_buffer(sbh);
4839        if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
4840                /*
4841                 * Oh, dear.  A previous attempt to write the
4842                 * superblock failed.  This could happen because the
4843                 * USB device was yanked out.  Or it could happen to
4844                 * be a transient write error and maybe the block will
4845                 * be remapped.  Nothing we can do but to retry the
4846                 * write and hope for the best.
4847                 */
4848                ext4_msg(sb, KERN_ERR, "previous I/O error to "
4849                       "superblock detected");
4850                clear_buffer_write_io_error(sbh);
4851                set_buffer_uptodate(sbh);
4852        }
4853        mark_buffer_dirty(sbh);
4854        if (sync) {
4855                unlock_buffer(sbh);
4856                error = sync_dirty_buffer(sbh);
4857                if (error)
4858                        return error;
4859
4860                error = buffer_write_io_error(sbh);
4861                if (error) {
4862                        ext4_msg(sb, KERN_ERR, "I/O error while writing "
4863                               "superblock");
4864                        clear_buffer_write_io_error(sbh);
4865                        set_buffer_uptodate(sbh);
4866                }
4867        }
4868        return error;
4869}
4870
4871/*
4872 * Have we just finished recovery?  If so, and if we are mounting (or
4873 * remounting) the filesystem readonly, then we will end up with a
4874 * consistent fs on disk.  Record that fact.
4875 */
4876static void ext4_mark_recovery_complete(struct super_block *sb,
4877                                        struct ext4_super_block *es)
4878{
4879        journal_t *journal = EXT4_SB(sb)->s_journal;
4880
4881        if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
4882                BUG_ON(journal != NULL);
4883                return;
4884        }
4885        jbd2_journal_lock_updates(journal);
4886        if (jbd2_journal_flush(journal) < 0)
4887                goto out;
4888
4889        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
4890            sb->s_flags & MS_RDONLY) {
4891                EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
4892                ext4_commit_super(sb, 1);
4893        }
4894
4895out:
4896        jbd2_journal_unlock_updates(journal);
4897}
4898
4899/*
4900 * If we are mounting (or read-write remounting) a filesystem whose journal
4901 * has recorded an error from a previous lifetime, move that error to the
4902 * main filesystem now.
4903 */
4904static void ext4_clear_journal_err(struct super_block *sb,
4905                                   struct ext4_super_block *es)
4906{
4907        journal_t *journal;
4908        int j_errno;
4909        const char *errstr;
4910
4911        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4912
4913        journal = EXT4_SB(sb)->s_journal;
4914
4915        /*
4916         * Now check for any error status which may have been recorded in the
4917         * journal by a prior ext4_error() or ext4_abort()
4918         */
4919
4920        j_errno = jbd2_journal_errno(journal);
4921        if (j_errno) {
4922                char nbuf[16];
4923
4924                errstr = ext4_decode_error(sb, j_errno, nbuf);
4925                ext4_warning(sb, "Filesystem error recorded "
4926                             "from previous mount: %s", errstr);
4927                ext4_warning(sb, "Marking fs in need of filesystem check.");
4928
4929                EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
4930                es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
4931                ext4_commit_super(sb, 1);
4932
4933                jbd2_journal_clear_err(journal);
4934                jbd2_journal_update_sb_errno(journal);
4935        }
4936}
4937
4938/*
4939 * Force the running and committing transactions to commit,
4940 * and wait on the commit.
4941 */
4942int ext4_force_commit(struct super_block *sb)
4943{
4944        journal_t *journal;
4945
4946        if (sb->s_flags & MS_RDONLY)
4947                return 0;
4948
4949        journal = EXT4_SB(sb)->s_journal;
4950        return ext4_journal_force_commit(journal);
4951}
4952
4953static int ext4_sync_fs(struct super_block *sb, int wait)
4954{
4955        int ret = 0;
4956        tid_t target;
4957        bool needs_barrier = false;
4958        struct ext4_sb_info *sbi = EXT4_SB(sb);
4959
4960        trace_ext4_sync_fs(sb, wait);
4961        flush_workqueue(sbi->rsv_conversion_wq);
4962        /*
4963         * Writeback quota in non-journalled quota case - journalled quota has
4964         * no dirty dquots
4965         */
4966        dquot_writeback_dquots(sb, -1);
4967        /*
4968         * Data writeback is possible w/o journal transaction, so barrier must
4969         * being sent at the end of the function. But we can skip it if
4970         * transaction_commit will do it for us.
4971         */
4972        target = jbd2_get_latest_transaction(sbi->s_journal);
4973        if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
4974            !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
4975                needs_barrier = true;
4976
4977        if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
4978                if (wait)
4979                        ret = jbd2_log_wait_commit(sbi->s_journal, target);
4980        }
4981        if (needs_barrier) {
4982                int err;
4983                err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
4984                if (!ret)
4985                        ret = err;
4986        }
4987
4988        return ret;
4989}
4990
4991static int ext4_sync_fs_nojournal(struct super_block *sb, int wait)
4992{
4993        int ret = 0;
4994
4995        trace_ext4_sync_fs(sb, wait);
4996        flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
4997        dquot_writeback_dquots(sb, -1);
4998        if (wait && test_opt(sb, BARRIER))
4999                ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
5000

5001        return ret;
5002}
5003
5004/*
5005 * LVM calls this function before a (read-only) snapshot is created.  This
5006 * gives us a chance to flush the journal completely and mark the fs clean.
5007 *
5008 * Note that only this function cannot bring a filesystem to be in a clean
5009 * state independently. It relies on upper layer to stop all data & metadata
5010 * modifications.
5011 */
5012static int ext4_freeze(struct super_block *sb)
5013{
5014        int error = 0;
5015        journal_t *journal;
5016
5017        if (sb->s_flags & MS_RDONLY)
5018                return 0;
5019
5020        journal = EXT4_SB(sb)->s_journal;
5021
5022        /* Now we set up the journal barrier. */
5023        jbd2_journal_lock_updates(journal);
5024
5025        /*
5026         * Don't clear the needs_recovery flag if we failed to flush
5027         * the journal.
5028         */
5029        error = jbd2_journal_flush(journal);
5030        if (error < 0)
5031                goto out;
5032
5033        /* Journal blocked and flushed, clear needs_recovery flag. */
5034        EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
5035        error = ext4_commit_super(sb, 1);
5036out:
5037        /* we rely on upper layer to stop further updates */
5038        jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
5039        return error;
5040}
5041
5042/*
5043 * Called by LVM after the snapshot is done.  We need to reset the RECOVER
5044 * flag here, even though the filesystem is not technically dirty yet.
5045 */
5046static int ext4_unfreeze(struct super_block *sb)
5047{
5048        if (sb->s_flags & MS_RDONLY)
5049                return 0;
5050
5051        /* Reset the needs_recovery flag before the fs is unlocked. */
5052        EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
5053        ext4_commit_super(sb, 1);
5054        return 0;
5055}
5056
5057/*
5058 * Structure to save mount options for ext4_remount's benefit
5059 */
5060struct ext4_mount_options {
5061        unsigned long s_mount_opt;
5062        unsigned long s_mount_opt2;
5063        kuid_t s_resuid;
5064        kgid_t s_resgid;
5065        unsigned long s_commit_interval;
5066        u32 s_min_batch_time, s_max_batch_time;
5067#ifdef CONFIG_QUOTA
5068        int s_jquota_fmt;
5069        char *s_qf_names[MAXQUOTAS];
5070#endif
5071};
5072
5073static int ext4_remount(struct super_block *sb, int *flags, char *data)
5074{
5075        struct ext4_super_block *es;
5076        struct ext4_sb_info *sbi = EXT4_SB(sb);
5077        unsigned long old_sb_flags;
5078        struct ext4_mount_options old_opts;
5079        int enable_quota = 0;
5080        ext4_group_t g;
5081        unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
5082        int err = 0;
5083#ifdef CONFIG_QUOTA
5084        int i, j;
5085#endif
5086        char *orig_data = kstrdup(data, GFP_KERNEL);
5087
5088        /* Store the original options */
5089        old_sb_flags = sb->s_flags;
5090        old_opts.s_mount_opt = sbi->s_mount_opt;
5091        old_opts.s_mount_opt2 = sbi->s_mount_opt2;
5092        old_opts.s_resuid = sbi->s_resuid;
5093        old_opts.s_resgid = sbi->s_resgid;
5094        old_opts.s_commit_interval = sbi->s_commit_interval;
5095        old_opts.s_min_batch_time = sbi->s_min_batch_time;
5096        old_opts.s_max_batch_time = sbi->s_max_batch_time;
5097#ifdef CONFIG_QUOTA
5098        old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
5099        for (i = 0; i < MAXQUOTAS; i++)
5100                if (sbi->s_qf_names[i]) {
5101                        old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
5102                                                         GFP_KERNEL);
5103                        if (!old_opts.s_qf_names[i]) {
5104                                for (j = 0; j < i; j++)
5105                                        kfree(old_opts.s_qf_names[j]);
5106                                kfree(orig_data);
5107                                return -ENOMEM;
5108                        }
5109                } else
5110                        old_opts.s_qf_names[i] = NULL;
5111#endif
5112        if (sbi->s_journal && sbi->s_journal->j_task->io_context)
5113                journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
5114
5115        if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
5116                err = -EINVAL;
5117                goto restore_opts;
5118        }
5119
5120        if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
5121            test_opt(sb, JOURNAL_CHECKSUM)) {
5122                ext4_msg(sb, KERN_ERR, "changing journal_checksum "
5123                         "during remount not supported; ignoring");
5124                sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
5125        }
5126
5127        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
5128                if (test_opt2(sb, EXPLICIT_DELALLOC)) {
5129                        ext4_msg(sb, KERN_ERR, "can't mount with "
5130                                 "both data=journal and delalloc");
5131                        err = -EINVAL;
5132                        goto restore_opts;
5133                }
5134                if (test_opt(sb, DIOREAD_NOLOCK)) {
5135                        ext4_msg(sb, KERN_ERR, "can't mount with "
5136                                 "both data=journal and dioread_nolock");
5137                        err = -EINVAL;
5138                        goto restore_opts;
5139                }
5140                if (test_opt(sb, DAX)) {
5141                        ext4_msg(sb, KERN_ERR, "can't mount with "
5142                                 "both data=journal and dax");
5143                        err = -EINVAL;
5144                        goto restore_opts;
5145                }
5146        }
5147
5148        if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
5149                ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
5150                        "dax flag with busy inodes while remounting");
5151                sbi->s_mount_opt ^= EXT4_MOUNT_DAX;
5152        }
5153
5154        if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
5155                ext4_abort(sb, "Abort forced by user");
5156
5157        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
5158                (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
5159
5160        es = sbi->s_es;
5161
5162        if (sbi->s_journal) {
5163                ext4_init_journal_params(sb, sbi->s_journal);
5164                set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
5165        }
5166
5167        if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
5168                if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
5169                        err = -EROFS;
5170                        goto restore_opts;
5171                }
5172
5173                if (*flags & MS_RDONLY) {
5174                        err = dquot_suspend(sb, -1);
5175                        if (err < 0)
5176                                goto restore_opts;
5177
5178                        /*
5179                         * First of all, the unconditional stuff we have to do
5180                         * to disable replay of the journal when we next remount
5181                         */
5182                        sb->s_flags |= MS_RDONLY;
5183
5184                        /*
5185                         * OK, test if we are remounting a valid rw partition
5186                         * readonly, and if so set the rdonly flag and then
5187                         * mark the partition as valid again.
5188                         */
5189                        if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
5190                            (sbi->s_mount_state & EXT4_VALID_FS))
5191                                es->s_state = cpu_to_le16(sbi->s_mount_state);
5192
5193                        if (sbi->s_journal)
5194                                ext4_mark_recovery_complete(sb, es);
5195                } else {
5196                        /* Make sure we can mount this feature set readwrite */
5197                        if (!ext4_feature_set_ok(sb, 0)) {
5198                                err = -EROFS;
5199                                goto restore_opts;
5200                        }
5201                        /*
5202                         * Make sure the group descriptor checksums
5203                         * are sane.  If they aren't, refuse to remount r/w.
5204                         */
5205                        for (g = 0; g < sbi->s_groups_count; g++) {
5206                                struct ext4_group_desc *gdp =
5207                                        ext4_get_group_desc(sb, g, NULL);
5208
5209                                if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
5210                                        ext4_msg(sb, KERN_ERR,
5211               "ext4_remount: Checksum for group %u failed (%u!=%u)",
5212                g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
5213                                               le16_to_cpu(gdp->bg_checksum));
5214                                        err = -EINVAL;
5215                                        goto restore_opts;
5216                                }
5217                        }
5218
5219                        /*
5220                         * If we have an unprocessed orphan list hanging
5221                         * around from a previously readonly bdev mount,
5222                         * require a full umount/remount for now.
5223                         */
5224                        if (es->s_last_orphan) {
5225                                ext4_msg(sb, KERN_WARNING, "Couldn't "
5226                                       "remount RDWR because of unprocessed "
5227                                       "orphan inode list.  Please "
5228                                       "umount/remount instead");
5229                                err = -EINVAL;
5230                                goto restore_opts;
5231                        }
5232
5233                        /*
5234                         * Mounting a RDONLY partition read-write, so reread
5235                         * and store the current valid flag.  (It may have
5236                         * been changed by e2fsck since we originally mounted
5237                         * the partition.)
5238                         */
5239                        if (sbi->s_journal)
5240                                ext4_clear_journal_err(sb, es);
5241                        sbi->s_mount_state = le16_to_cpu(es->s_state);
5242                        if (!ext4_setup_super(sb, es, 0))
5243                                sb->s_flags &= ~MS_RDONLY;
5244                        if (EXT4_HAS_INCOMPAT_FEATURE(sb,
5245                                                     EXT4_FEATURE_INCOMPAT_MMP))
5246                                if (ext4_multi_mount_protect(sb,
5247                                                le64_to_cpu(es->s_mmp_block))) {
5248                                        err = -EROFS;
5249                                        goto restore_opts;
5250                                }
5251                        enable_quota = 1;
5252                }
5253        }
5254
5255        /*
5256         * Reinitialize lazy itable initialization thread based on
5257         * current settings
5258         */
5259        if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE))
5260                ext4_unregister_li_request(sb);
5261        else {
5262                ext4_group_t first_not_zeroed;
5263                first_not_zeroed = ext4_has_uninit_itable(sb);
5264                ext4_register_li_request(sb, first_not_zeroed);
5265        }
5266
5267        ext4_setup_system_zone(sb);
5268        if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
5269                ext4_commit_super(sb, 1);
5270
5271#ifdef CONFIG_QUOTA
5272        /* Release old quota file names */
5273        for (i = 0; i < MAXQUOTAS; i++)
5274                kfree(old_opts.s_qf_names[i]);
5275        if (enable_quota) {
5276                if (sb_any_quota_suspended(sb))
5277                        dquot_resume(sb, -1);
5278                else if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
5279                                        EXT4_FEATURE_RO_COMPAT_QUOTA)) {
5280                        err = ext4_enable_quotas(sb);
5281                        if (err)
5282                                goto restore_opts;
5283                }
5284        }
5285#endif
5286
5287        ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
5288        kfree(orig_data);
5289        return 0;
5290
5291restore_opts:
5292        sb->s_flags = old_sb_flags;
5293        sbi->s_mount_opt = old_opts.s_mount_opt;
5294        sbi->s_mount_opt2 = old_opts.s_mount_opt2;
5295        sbi->s_resuid = old_opts.s_resuid;
5296        sbi->s_resgid = old_opts.s_resgid;
5297        sbi->s_commit_interval = old_opts.s_commit_interval;
5298        sbi->s_min_batch_time = old_opts.s_min_batch_time;
5299        sbi->s_max_batch_time = old_opts.s_max_batch_time;
5300#ifdef CONFIG_QUOTA
5301        sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
5302        for (i = 0; i < MAXQUOTAS; i++) {
5303                kfree(sbi->s_qf_names[i]);
5304                sbi->s_qf_names[i] = old_opts.s_qf_names[i];
5305        }
5306#endif
5307        kfree(orig_data);
5308        return err;
5309}
5310
5311static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
5312{
5313        struct super_block *sb = dentry->d_sb;
5314        struct ext4_sb_info *sbi = EXT4_SB(sb);
5315        struct ext4_super_block *es = sbi->s_es;
5316        ext4_fsblk_t overhead = 0, resv_blocks;
5317        u64 fsid;
5318        s64 bfree;
5319        resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
5320
5321        if (!test_opt(sb, MINIX_DF))
5322                overhead = sbi->s_overhead;
5323
5324        buf->f_type = EXT4_SUPER_MAGIC;
5325        buf->f_bsize = sb->s_blocksize;
5326        buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
5327        bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
5328                percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
5329        /* prevent underflow in case that few free space is available */
5330        buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
5331        buf->f_bavail = buf->f_bfree -
5332                        (ext4_r_blocks_count(es) + resv_blocks);
5333        if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
5334                buf->f_bavail = 0;
5335        buf->f_files = le32_to_cpu(es->s_inodes_count);
5336        buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
5337        buf->f_namelen = EXT4_NAME_LEN;
5338        fsid = le64_to_cpup((void *)es->s_uuid) ^
5339               le64_to_cpup((void *)es->s_uuid + sizeof(u64));
5340        buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
5341        buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
5342
5343        return 0;
5344}
5345
5346/* Helper function for writing quotas on sync - we need to start transaction
5347 * before quota file is locked for write. Otherwise the are possible deadlocks:
5348 * Process 1                         Process 2
5349 * ext4_create()                     quota_sync()
5350 *   jbd2_journal_start()                  write_dquot()
5351 *   dquot_initialize()                         down(dqio_mutex)
5352 *     down(dqio_mutex)                    jbd2_journal_start()
5353 *
5354 */
5355
5356#ifdef CONFIG_QUOTA
5357
5358static inline struct inode *dquot_to_inode(struct dquot *dquot)
5359{
5360        return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
5361}
5362
5363static int ext4_write_dquot(struct dquot *dquot)
5364{
5365        int ret, err;
5366        handle_t *handle;
5367        struct inode *inode;
5368
5369        inode = dquot_to_inode(dquot);
5370        handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
5371                                    EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
5372        if (IS_ERR(handle))
5373                return PTR_ERR(handle);
5374        ret = dquot_commit(dquot);
5375        err = ext4_journal_stop(handle);
5376        if (!ret)
5377                ret = err;
5378        return ret;
5379}
5380
5381static int ext4_acquire_dquot(struct dquot *dquot)
5382{
5383        int ret, err;
5384        handle_t *handle;
5385
5386        handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
5387                                    EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
5388        if (IS_ERR(handle))
5389                return PTR_ERR(handle);
5390        ret = dquot_acquire(dquot);
5391        err = ext4_journal_stop(handle);
5392        if (!ret)
5393                ret = err;
5394        return ret;
5395}
5396
5397static int ext4_release_dquot(struct dquot *dquot)
5398{
5399        int ret, err;
5400        handle_t *handle;
5401
5402        handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
5403                                    EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
5404        if (IS_ERR(handle)) {
5405                /* Release dquot anyway to avoid endless cycle in dqput() */
5406                dquot_release(dquot);
5407                return PTR_ERR(handle);
5408        }
5409        ret = dquot_release(dquot);
5410        err = ext4_journal_stop(handle);
5411        if (!ret)
5412                ret = err;
5413        return ret;
5414}
5415
5416static int ext4_mark_dquot_dirty(struct dquot *dquot)
5417{
5418        struct super_block *sb = dquot->dq_sb;
5419        struct ext4_sb_info *sbi = EXT4_SB(sb);
5420
5421        /* Are we journaling quotas? */
5422        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) ||
5423            sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
5424                dquot_mark_dquot_dirty(dquot);
5425                return ext4_write_dquot(dquot);
5426        } else {
5427                return dquot_mark_dquot_dirty(dquot);
5428        }
5429}
5430
5431static int ext4_write_info(struct super_block *sb, int type)
5432{
5433        int ret, err;
5434        handle_t *handle;
5435
5436        /* Data block + inode block */
5437        handle = ext4_journal_start(sb->s_root->d_inode, EXT4_HT_QUOTA, 2);
5438        if (IS_ERR(handle))
5439                return PTR_ERR(handle);
5440        ret = dquot_commit_info(sb, type);
5441        err = ext4_journal_stop(handle);
5442        if (!ret)
5443                ret = err;
5444        return ret;
5445}
5446
5447/*
5448 * Turn on quotas during mount time - we need to find
5449 * the quota file and such...
5450 */
5451static int ext4_quota_on_mount(struct super_block *sb, int type)
5452{
5453        return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
5454                                        EXT4_SB(sb)->s_jquota_fmt, type);
5455}
5456
5457static void lockdep_set_quota_inode(struct inode *inode, int subclass)
5458{
5459        struct ext4_inode_info *ei = EXT4_I(inode);
5460
5461        /* The first argument of lockdep_set_subclass has to be
5462         * *exactly* the same as the argument to init_rwsem() --- in
5463         * this case, in init_once() --- or lockdep gets unhappy
5464         * because the name of the lock is set using the
5465         * stringification of the argument to init_rwsem().
5466         */
5467        (void) ei;      /* shut up clang warning if !CONFIG_LOCKDEP */
5468        lockdep_set_subclass(&ei->i_data_sem, subclass);
5469}
5470
5471/*
5472 * Standard function to be called on quota_on
5473 */
5474static int ext4_quota_on(struct super_block *sb, int type, int format_id,
5475                         struct path *path)
5476{
5477        int err;
5478
5479        if (!test_opt(sb, QUOTA))
5480                return -EINVAL;
5481
5482        /* Quotafile not on the same filesystem? */
5483        if (path->dentry->d_sb != sb)
5484                return -EXDEV;
5485        /* Journaling quota? */
5486        if (EXT4_SB(sb)->s_qf_names[type]) {
5487                /* Quotafile not in fs root? */
5488                if (path->dentry->d_parent != sb->s_root)
5489                        ext4_msg(sb, KERN_WARNING,
5490                                "Quota file not on filesystem root. "
5491                                "Journaled quota will not work");
5492        }
5493
5494        /*
5495         * When we journal data on quota file, we have to flush journal to see
5496         * all updates to the file when we bypass pagecache...
5497         */
5498        if (EXT4_SB(sb)->s_journal &&
5499            ext4_should_journal_data(path->dentry->d_inode)) {
5500                /*
5501                 * We don't need to lock updates but journal_flush() could
5502                 * otherwise be livelocked...
5503                 */
5504                jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
5505                err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
5506                jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
5507                if (err)
5508                        return err;
5509        }
5510
5511        lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
5512        err = dquot_quota_on(sb, type, format_id, path);
5513        if (err) {
5514                lockdep_set_quota_inode(path->dentry->d_inode,
5515                                             I_DATA_SEM_NORMAL);
5516        } else {
5517                struct inode *inode = d_inode(path->dentry);
5518                handle_t *handle;
5519
5520                inode_lock(inode);
5521                handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
5522                if (IS_ERR(handle))
5523                        goto unlock_inode;
5524                EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL;
5525                inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
5526                                S_NOATIME | S_IMMUTABLE);
5527                ext4_mark_inode_dirty(handle, inode);
5528                ext4_journal_stop(handle);
5529        unlock_inode:
5530                inode_unlock(inode);
5531        }
5532        return err;
5533}
5534
5535static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
5536                             unsigned int flags)
5537{
5538        int err;
5539        struct inode *qf_inode;
5540        unsigned long qf_inums[MAXQUOTAS] = {
5541                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
5542                le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
5543        };
5544
5545        BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA));
5546
5547        if (!qf_inums[type])
5548                return -EPERM;
5549
5550        qf_inode = ext4_iget(sb, qf_inums[type]);
5551        if (IS_ERR(qf_inode)) {
5552                ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
5553                return PTR_ERR(qf_inode);
5554        }
5555
5556        /* Don't account quota for quota files to avoid recursion */
5557        qf_inode->i_flags |= S_NOQUOTA;
5558        lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
5559        err = dquot_enable(qf_inode, type, format_id, flags);
5560        if (err)
5561                lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
5562        iput(qf_inode);
5563
5564        return err;
5565}
5566
5567/* Enable usage tracking for all quota types. */
5568static int ext4_enable_quotas(struct super_block *sb)
5569{
5570        int type, err = 0;
5571        unsigned long qf_inums[MAXQUOTAS] = {
5572                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
5573                le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
5574        };
5575
5576        sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
5577        for (type = 0; type < MAXQUOTAS; type++) {
5578                if (qf_inums[type]) {
5579                        err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
5580                                                DQUOT_USAGE_ENABLED);
5581                        if (err) {
5582                                for (type--; type >= 0; type--)
5583                                        dquot_quota_off(sb, type);
5584
5585                                ext4_warning(sb,
5586                                        "Failed to enable quota tracking "
5587                                        "(type=%d, err=%d). Please run "
5588                                        "e2fsck to fix.", type, err);
5589                                return err;
5590                        }
5591                }
5592        }
5593        return 0;
5594}
5595
5596/*
5597 * quota_on function that is used when QUOTA feature is set.
5598 */
5599static int ext4_quota_on_sysfile(struct super_block *sb, int type,
5600                                 int format_id)
5601{
5602        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
5603                return -EINVAL;
5604
5605        /*
5606         * USAGE was enabled at mount time. Only need to enable LIMITS now.
5607         */
5608        return ext4_quota_enable(sb, type, format_id, DQUOT_LIMITS_ENABLED);
5609}
5610
5611static int ext4_quota_off(struct super_block *sb, int type)
5612{
5613        struct inode *inode = sb_dqopt(sb)->files[type];
5614        handle_t *handle;
5615        int err;
5616
5617        /* Force all delayed allocation blocks to be allocated.
5618         * Caller already holds s_umount sem */
5619        if (test_opt(sb, DELALLOC))
5620                sync_filesystem(sb);
5621
5622        if (!inode || !igrab(inode))
5623                goto out;
5624
5625        err = dquot_quota_off(sb, type);
5626        if (err || EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
5627                goto out_put;
5628
5629        inode_lock(inode);
5630        /* Update modification times of quota files when userspace can
5631         * start looking at them */
5632        handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
5633        if (IS_ERR(handle))
5634                goto out_unlock;
5635        EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
5636        inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
5637        inode->i_mtime = inode->i_ctime = CURRENT_TIME;
5638        ext4_mark_inode_dirty(handle, inode);
5639        ext4_journal_stop(handle);
5640out_unlock:
5641        inode_unlock(inode);
5642out_put:
5643        lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
5644        iput(inode);
5645        return err;
5646out:
5647        return dquot_quota_off(sb, type);
5648}
5649
5650/*
5651 * quota_off function that is used when QUOTA feature is set.
5652 */
5653static int ext4_quota_off_sysfile(struct super_block *sb, int type)
5654{
5655        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
5656                return -EINVAL;
5657
5658        /* Disable only the limits. */
5659        return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED);
5660}
5661
5662/* Read data from quotafile - avoid pagecache and such because we cannot afford
5663 * acquiring the locks... As quota files are never truncated and quota code
5664 * itself serializes the operations (and no one else should touch the files)
5665 * we don't have to be afraid of races */
5666static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
5667                               size_t len, loff_t off)
5668{
5669        struct inode *inode = sb_dqopt(sb)->files[type];
5670        ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
5671        int err = 0;
5672        int offset = off & (sb->s_blocksize - 1);
5673        int tocopy;
5674        size_t toread;
5675        struct buffer_head *bh;
5676        loff_t i_size = i_size_read(inode);
5677
5678        if (off > i_size)
5679                return 0;
5680        if (off+len > i_size)
5681                len = i_size-off;
5682        toread = len;
5683        while (toread > 0) {
5684                tocopy = sb->s_blocksize - offset < toread ?
5685                                sb->s_blocksize - offset : toread;
5686                bh = ext4_bread(NULL, inode, blk, 0, &err);
5687                if (err)
5688                        return err;
5689                if (!bh)        /* A hole? */
5690                        memset(data, 0, tocopy);
5691                else
5692                        memcpy(data, bh->b_data+offset, tocopy);
5693                brelse(bh);
5694                offset = 0;
5695                toread -= tocopy;
5696                data += tocopy;
5697                blk++;
5698        }
5699        return len;
5700}
5701
5702/* Write to quotafile (we know the transaction is already started and has
5703 * enough credits) */
5704static ssize_t ext4_quota_write(struct super_block *sb, int type,
5705                                const char *data, size_t len, loff_t off)
5706{
5707        struct inode *inode = sb_dqopt(sb)->files[type];
5708        ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
5709        int err = 0;
5710        int offset = off & (sb->s_blocksize - 1);
5711        struct buffer_head *bh;
5712        handle_t *handle = journal_current_handle();
5713
5714        if (EXT4_SB(sb)->s_journal && !handle) {
5715                ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
5716                        " cancelled because transaction is not started",
5717                        (unsigned long long)off, (unsigned long long)len);
5718                return -EIO;
5719        }
5720        /*
5721         * Since we account only one data block in transaction credits,
5722         * then it is impossible to cross a block boundary.
5723         */
5724        if (sb->s_blocksize - offset < len) {
5725                ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
5726                        " cancelled because not block aligned",
5727                        (unsigned long long)off, (unsigned long long)len);
5728                return -EIO;
5729        }
5730
5731        bh = ext4_bread(handle, inode, blk, 1, &err);
5732        if (!bh)
5733                goto out;
5734        BUFFER_TRACE(bh, "get write access");
5735        err = ext4_journal_get_write_access(handle, bh);
5736        if (err) {
5737                brelse(bh);
5738                goto out;
5739        }
5740        lock_buffer(bh);
5741        memcpy(bh->b_data+offset, data, len);
5742        flush_dcache_page(bh->b_page);
5743        unlock_buffer(bh);
5744        err = ext4_handle_dirty_metadata(handle, NULL, bh);
5745        brelse(bh);
5746out:
5747        if (err)
5748                return err;
5749        if (inode->i_size < off + len) {
5750                i_size_write(inode, off + len);
5751                EXT4_I(inode)->i_disksize = inode->i_size;
5752                ext4_mark_inode_dirty(handle, inode);
5753        }
5754        return len;
5755}
5756
5757#endif
5758
5759static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
5760                       const char *dev_name, void *data)
5761{
5762        return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
5763}
5764
5765#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
5766static inline void register_as_ext2(void)
5767{
5768        int err = register_filesystem(&ext2_fs_type);
5769        if (err)
5770                printk(KERN_WARNING
5771                       "EXT4-fs: Unable to register as ext2 (%d)\n", err);
5772}
5773
5774static inline void unregister_as_ext2(void)
5775{
5776        unregister_filesystem(&ext2_fs_type);
5777}
5778
5779static inline int ext2_feature_set_ok(struct super_block *sb)
5780{
5781        if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP))
5782                return 0;
5783        if (sb->s_flags & MS_RDONLY)
5784                return 1;
5785        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))
5786                return 0;
5787        return 1;
5788}
5789#else
5790static inline void register_as_ext2(void) { }
5791static inline void unregister_as_ext2(void) { }
5792static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
5793#endif
5794
5795#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
5796static inline void register_as_ext3(void)
5797{
5798        int err = register_filesystem(&ext3_fs_type);
5799        if (err)
5800                printk(KERN_WARNING
5801                       "EXT4-fs: Unable to register as ext3 (%d)\n", err);
5802}
5803
5804static inline void unregister_as_ext3(void)
5805{
5806        unregister_filesystem(&ext3_fs_type);
5807}
5808
5809static inline int ext3_feature_set_ok(struct super_block *sb)
5810{
5811        if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP))
5812                return 0;
5813        if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
5814                return 0;
5815        if (sb->s_flags & MS_RDONLY)
5816                return 1;
5817        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP))
5818                return 0;
5819        return 1;
5820}
5821#else
5822static inline void register_as_ext3(void) { }
5823static inline void unregister_as_ext3(void) { }
5824static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; }
5825#endif
5826
5827static struct file_system_type ext4_fs_type = {
5828        .owner          = THIS_MODULE,
5829        .name           = "ext4",
5830        .mount          = ext4_mount,
5831        .kill_sb        = kill_block_super,
5832        .fs_flags       = FS_REQUIRES_DEV | FS_HAS_INVALIDATE_RANGE |
5833                          FS_HAS_DIO_IODONE2,
5834};
5835MODULE_ALIAS_FS("ext4");
5836
5837static int __init ext4_init_feat_adverts(void)
5838{
5839        struct ext4_features *ef;
5840        int ret = -ENOMEM;
5841
5842        ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL);
5843        if (!ef)
5844                goto out;
5845
5846        ef->f_kobj.kset = ext4_kset;
5847        init_completion(&ef->f_kobj_unregister);
5848        ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL,
5849                                   "features");
5850        if (ret) {
5851                kfree(ef);
5852                goto out;
5853        }
5854
5855        ext4_feat = ef;
5856        ret = 0;
5857out:
5858        return ret;
5859}
5860
5861static void ext4_exit_feat_adverts(void)
5862{
5863        kobject_put(&ext4_feat->f_kobj);
5864        wait_for_completion(&ext4_feat->f_kobj_unregister);
5865        kfree(ext4_feat);
5866}
5867
5868/* Shared across all ext4 file systems */
5869wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
5870struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
5871
5872static int __init ext4_init_fs(void)
5873{
5874        int i, err;
5875
5876        ext4_li_info = NULL;
5877        mutex_init(&ext4_li_mtx);
5878
5879        /* Build-time check for flags consistency */
5880        ext4_check_flag_values();
5881
5882        for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
5883                mutex_init(&ext4__aio_mutex[i]);
5884                init_waitqueue_head(&ext4__ioend_wq[i]);
5885        }
5886
5887        err = ext4_init_es();
5888        if (err)
5889                return err;
5890
5891        err = ext4_init_pageio();
5892        if (err)
5893                goto out7;
5894
5895        err = ext4_init_system_zone();
5896        if (err)
5897                goto out6;
5898        ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
5899        if (!ext4_kset) {
5900                err = -ENOMEM;
5901                goto out5;
5902        }
5903        ext4_proc_root = proc_mkdir("fs/ext4", NULL);
5904
5905        err = ext4_init_feat_adverts();
5906        if (err)
5907                goto out4;
5908
5909        err = ext4_init_mballoc();
5910        if (err)
5911                goto out3;
5912
5913        err = ext4_init_xattr();
5914        if (err)
5915                goto out2;
5916        err = init_inodecache();
5917        if (err)
5918                goto out1;
5919        err = register_fo_extend(&ext4_file_operations);
5920        if (err)
5921                goto out_inodecache;
5922        register_as_ext3();
5923        register_as_ext2();
5924        err = register_filesystem(&ext4_fs_type);
5925        if (err)
5926                goto out;
5927
5928        return 0;
5929out:
5930        unregister_as_ext2();
5931        unregister_as_ext3();
5932        unregister_fo_extend(&ext4_file_operations);
5933out_inodecache:
5934        destroy_inodecache();
5935out1:
5936        ext4_exit_xattr();
5937out2:
5938        ext4_exit_mballoc();
5939out3:
5940        ext4_exit_feat_adverts();
5941out4:
5942        if (ext4_proc_root)
5943                remove_proc_entry("fs/ext4", NULL);
5944        kset_unregister(ext4_kset);
5945out5:
5946        ext4_exit_system_zone();
5947out6:
5948        ext4_exit_pageio();
5949out7:
5950        ext4_exit_es();
5951
5952        return err;
5953}
5954
5955static void __exit ext4_exit_fs(void)
5956{
5957        ext4_destroy_lazyinit_thread();
5958        unregister_as_ext2();
5959        unregister_as_ext3();
5960        unregister_filesystem(&ext4_fs_type);
5961        unregister_fo_extend(&ext4_file_operations);
5962        destroy_inodecache();
5963        ext4_exit_xattr();
5964        ext4_exit_mballoc();
5965        ext4_exit_feat_adverts();
5966        remove_proc_entry("fs/ext4", NULL);
5967        kset_unregister(ext4_kset);
5968        ext4_exit_system_zone();
5969        ext4_exit_pageio();
5970        ext4_exit_es();
5971}
5972
5973MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
5974MODULE_DESCRIPTION("Fourth Extended Filesystem");
5975MODULE_LICENSE("GPL");
5976module_init(ext4_init_fs)
5977module_exit(ext4_exit_fs)
5978