LXR linux/fs/ext4/super.c

   1/*
   2 *  linux/fs/ext4/super.c
   3 *
   4 * Copyright (C) 1992, 1993, 1994, 1995
   5 * Remy Card (card@masi.ibp.fr)
   6 * Laboratoire MASI - Institut Blaise Pascal
   7 * Universite Pierre et Marie Curie (Paris VI)
   8 *
   9 *  from
  10 *
  11 *  linux/fs/minix/inode.c
  12 *
  13 *  Copyright (C) 1991, 1992  Linus Torvalds
  14 *
  15 *  Big-endian to little-endian byte-swapping/bitmaps by
  16 *        David S. Miller (davem@caip.rutgers.edu), 1995
  17 */
  18
  19#include <linux/module.h>
  20#include <linux/string.h>
  21#include <linux/fs.h>
  22#include <linux/time.h>
  23#include <linux/vmalloc.h>
  24#include <linux/jbd2.h>
  25#include <linux/slab.h>
  26#include <linux/init.h>
  27#include <linux/blkdev.h>
  28#include <linux/parser.h>
  29#include <linux/buffer_head.h>
  30#include <linux/exportfs.h>
  31#include <linux/vfs.h>
  32#include <linux/random.h>
  33#include <linux/mount.h>
  34#include <linux/namei.h>
  35#include <linux/quotaops.h>
  36#include <linux/seq_file.h>
  37#include <linux/proc_fs.h>
  38#include <linux/ctype.h>
  39#include <linux/log2.h>
  40#include <linux/crc16.h>
  41#include <linux/dax.h>
  42#include <linux/cleancache.h>
  43#include <asm/uaccess.h>
  44
  45#include <linux/kthread.h>
  46#include <linux/freezer.h>
  47
  48#include "ext4.h"
  49#include "ext4_extents.h"       /* Needed for trace points definition */
  50#include "ext4_jbd2.h"
  51#include "xattr.h"
  52#include "acl.h"
  53#include "mballoc.h"
  54
  55#define CREATE_TRACE_POINTS
  56#include <trace/events/ext4.h>
  57
  58static struct proc_dir_entry *ext4_proc_root;
  59static struct kset *ext4_kset;
  60static struct ext4_lazy_init *ext4_li_info;
  61static struct mutex ext4_li_mtx;
  62static struct ext4_features *ext4_feat;
  63
  64static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
  65                             unsigned long journal_devnum);
  66static int ext4_show_options(struct seq_file *seq, struct dentry *root);
  67static int ext4_commit_super(struct super_block *sb, int sync);
  68static void ext4_mark_recovery_complete(struct super_block *sb,
  69                                        struct ext4_super_block *es);
  70static void ext4_clear_journal_err(struct super_block *sb,
  71                                   struct ext4_super_block *es);
  72static int ext4_sync_fs(struct super_block *sb, int wait);
  73static int ext4_sync_fs_nojournal(struct super_block *sb, int wait);
  74static int ext4_remount(struct super_block *sb, int *flags, char *data);
  75static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
  76static int ext4_unfreeze(struct super_block *sb);
  77static int ext4_freeze(struct super_block *sb);
  78static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
  79                       const char *dev_name, void *data);
  80static inline int ext2_feature_set_ok(struct super_block *sb);
  81static inline int ext3_feature_set_ok(struct super_block *sb);
  82static int ext4_feature_set_ok(struct super_block *sb, int readonly);
  83static void ext4_destroy_lazyinit_thread(void);
  84static void ext4_unregister_li_request(struct super_block *sb);
  85static void ext4_clear_request_list(void);
  86static int ext4_reserve_clusters(struct ext4_sb_info *, ext4_fsblk_t);
  87
  88#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
  89static struct file_system_type ext2_fs_type = {
  90        .owner          = THIS_MODULE,
  91        .name           = "ext2",
  92        .mount          = ext4_mount,
  93        .kill_sb        = kill_block_super,
  94        .fs_flags       = FS_REQUIRES_DEV | FS_HAS_INVALIDATE_RANGE,
  95};
  96MODULE_ALIAS_FS("ext2");
  97MODULE_ALIAS("ext2");
  98#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
  99#else
 100#define IS_EXT2_SB(sb) (0)
 101#endif
 102
 103
 104#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
 105static struct file_system_type ext3_fs_type = {
 106        .owner          = THIS_MODULE,
 107        .name           = "ext3",
 108        .mount          = ext4_mount,
 109        .kill_sb        = kill_block_super,
 110        .fs_flags       = FS_REQUIRES_DEV | FS_HAS_INVALIDATE_RANGE,
 111};
 112MODULE_ALIAS_FS("ext3");
 113MODULE_ALIAS("ext3");
 114#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
 115#else
 116#define IS_EXT3_SB(sb) (0)
 117#endif
 118
 119static int ext4_verify_csum_type(struct super_block *sb,
 120                                 struct ext4_super_block *es)
 121{
 122        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
 123                                        EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
 124                return 1;
 125
 126        return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
 127}
 128
 129static __le32 ext4_superblock_csum(struct super_block *sb,
 130                                   struct ext4_super_block *es)
 131{
 132        struct ext4_sb_info *sbi = EXT4_SB(sb);
 133        int offset = offsetof(struct ext4_super_block, s_checksum);
 134        __u32 csum;
 135
 136        csum = ext4_chksum(sbi, ~0, (char *)es, offset);
 137
 138        return cpu_to_le32(csum);
 139}
 140
 141static int ext4_superblock_csum_verify(struct super_block *sb,
 142                                       struct ext4_super_block *es)
 143{
 144        if (!ext4_has_metadata_csum(sb))
 145                return 1;
 146
 147        return es->s_checksum == ext4_superblock_csum(sb, es);
 148}
 149
 150void ext4_superblock_csum_set(struct super_block *sb)
 151{
 152        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 153
 154        if (!ext4_has_metadata_csum(sb))
 155                return;
 156
 157        es->s_checksum = ext4_superblock_csum(sb, es);
 158}
 159
 160void *ext4_kvmalloc(size_t size, gfp_t flags)
 161{
 162        void *ret;
 163
 164        ret = kmalloc(size, flags);
 165        if (!ret)
 166                ret = __vmalloc(size, flags, PAGE_KERNEL);
 167        return ret;
 168}
 169
 170void *ext4_kvzalloc(size_t size, gfp_t flags)
 171{
 172        void *ret;
 173
 174        ret = kzalloc(size, flags);
 175        if (!ret)
 176                ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
 177        return ret;
 178}
 179
 180void ext4_kvfree(void *ptr)
 181{
 182        if (is_vmalloc_addr(ptr))
 183                vfree(ptr);
 184        else
 185                kfree(ptr);
 186
 187}
 188
 189ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
 190                               struct ext4_group_desc *bg)
 191{
 192        return le32_to_cpu(bg->bg_block_bitmap_lo) |
 193                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 194                 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
 195}
 196
 197ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
 198                               struct ext4_group_desc *bg)
 199{
 200        return le32_to_cpu(bg->bg_inode_bitmap_lo) |
 201                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 202                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
 203}
 204
 205ext4_fsblk_t ext4_inode_table(struct super_block *sb,
 206                              struct ext4_group_desc *bg)
 207{
 208        return le32_to_cpu(bg->bg_inode_table_lo) |
 209                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 210                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
 211}
 212
 213__u32 ext4_free_group_clusters(struct super_block *sb,
 214                               struct ext4_group_desc *bg)
 215{
 216        return le16_to_cpu(bg->bg_free_blocks_count_lo) |
 217                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 218                 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
 219}
 220
 221__u32 ext4_free_inodes_count(struct super_block *sb,
 222                              struct ext4_group_desc *bg)
 223{
 224        return le16_to_cpu(bg->bg_free_inodes_count_lo) |
 225                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 226                 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
 227}
 228
 229__u32 ext4_used_dirs_count(struct super_block *sb,
 230                              struct ext4_group_desc *bg)
 231{
 232        return le16_to_cpu(bg->bg_used_dirs_count_lo) |
 233                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 234                 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
 235}
 236
 237__u32 ext4_itable_unused_count(struct super_block *sb,
 238                              struct ext4_group_desc *bg)
 239{
 240        return le16_to_cpu(bg->bg_itable_unused_lo) |
 241                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 242                 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
 243}
 244
 245void ext4_block_bitmap_set(struct super_block *sb,
 246                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 247{
 248        bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
 249        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 250                bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
 251}
 252
 253void ext4_inode_bitmap_set(struct super_block *sb,
 254                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 255{
 256        bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
 257        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 258                bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
 259}
 260
 261void ext4_inode_table_set(struct super_block *sb,
 262                          struct ext4_group_desc *bg, ext4_fsblk_t blk)
 263{
 264        bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
 265        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 266                bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
 267}
 268
 269void ext4_free_group_clusters_set(struct super_block *sb,
 270                                  struct ext4_group_desc *bg, __u32 count)
 271{
 272        bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
 273        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 274                bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
 275}
 276
 277void ext4_free_inodes_set(struct super_block *sb,
 278                          struct ext4_group_desc *bg, __u32 count)
 279{
 280        bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
 281        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 282                bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
 283}
 284
 285void ext4_used_dirs_set(struct super_block *sb,
 286                          struct ext4_group_desc *bg, __u32 count)
 287{
 288        bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
 289        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 290                bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
 291}
 292
 293void ext4_itable_unused_set(struct super_block *sb,
 294                          struct ext4_group_desc *bg, __u32 count)
 295{
 296        bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
 297        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 298                bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
 299}
 300
 301
 302static void __save_error_info(struct super_block *sb, const char *func,
 303                            unsigned int line)
 304{
 305        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 306
 307        EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 308        es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
 309        es->s_last_error_time = cpu_to_le32(get_seconds());
 310        strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
 311        es->s_last_error_line = cpu_to_le32(line);
 312        if (!es->s_first_error_time) {
 313                es->s_first_error_time = es->s_last_error_time;
 314                strncpy(es->s_first_error_func, func,
 315                        sizeof(es->s_first_error_func));
 316                es->s_first_error_line = cpu_to_le32(line);
 317                es->s_first_error_ino = es->s_last_error_ino;
 318                es->s_first_error_block = es->s_last_error_block;
 319        }
 320        /*
 321         * Start the daily error reporting function if it hasn't been
 322         * started already
 323         */
 324        if (!es->s_error_count)
 325                mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
 326        le32_add_cpu(&es->s_error_count, 1);
 327}
 328
 329static void save_error_info(struct super_block *sb, const char *func,
 330                            unsigned int line)
 331{
 332        __save_error_info(sb, func, line);
 333        ext4_commit_super(sb, 1);
 334}
 335
 336/*
 337 * The del_gendisk() function uninitializes the disk-specific data
 338 * structures, including the bdi structure, without telling anyone
 339 * else.  Once this happens, any attempt to call mark_buffer_dirty()
 340 * (for example, by ext4_commit_super), will cause a kernel OOPS.
 341 * This is a kludge to prevent these oops until we can put in a proper
 342 * hook in del_gendisk() to inform the VFS and file system layers.
 343 */
 344static int block_device_ejected(struct super_block *sb)
 345{
 346        struct inode *bd_inode = sb->s_bdev->bd_inode;
 347        struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info;
 348
 349        return bdi->dev == NULL;
 350}
 351
 352static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
 353{
 354        struct super_block              *sb = journal->j_private;
 355        struct ext4_sb_info             *sbi = EXT4_SB(sb);
 356        int                             error = is_journal_aborted(journal);
 357        struct ext4_journal_cb_entry    *jce;
 358
 359        BUG_ON(txn->t_state == T_FINISHED);
 360        spin_lock(&sbi->s_md_lock);
 361        while (!list_empty(&txn->t_private_list)) {
 362                jce = list_entry(txn->t_private_list.next,
 363                                 struct ext4_journal_cb_entry, jce_list);
 364                list_del_init(&jce->jce_list);
 365                spin_unlock(&sbi->s_md_lock);
 366                jce->jce_func(sb, jce, error);
 367                spin_lock(&sbi->s_md_lock);
 368        }
 369        spin_unlock(&sbi->s_md_lock);
 370}
 371
 372static bool system_going_down(void)
 373{
 374        return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
 375                || system_state == SYSTEM_RESTART;
 376}
 377
 378/* Deal with the reporting of failure conditions on a filesystem such as
 379 * inconsistencies detected or read IO failures.
 380 *
 381 * On ext2, we can store the error state of the filesystem in the
 382 * superblock.  That is not possible on ext4, because we may have other
 383 * write ordering constraints on the superblock which prevent us from
 384 * writing it out straight away; and given that the journal is about to
 385 * be aborted, we can't rely on the current, or future, transactions to
 386 * write out the superblock safely.
 387 *
 388 * We'll just use the jbd2_journal_abort() error code to record an error in
 389 * the journal instead.  On recovery, the journal will complain about
 390 * that error until we've noted it down and cleared it.
 391 */
 392
 393static void ext4_handle_error(struct super_block *sb)
 394{
 395        if (sb->s_flags & MS_RDONLY)
 396                return;
 397
 398        if (!test_opt(sb, ERRORS_CONT)) {
 399                journal_t *journal = EXT4_SB(sb)->s_journal;
 400
 401                EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
 402                if (journal)
 403                        jbd2_journal_abort(journal, -EIO);
 404        }
 405        /*
 406         * We force ERRORS_RO behavior when system is rebooting. Otherwise we
 407         * could panic during 'reboot -f' as the underlying device got already
 408         * disabled.
 409         */
 410        if (test_opt(sb, ERRORS_RO) || system_going_down()) {
 411                ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 412                /*
 413                 * Make sure updated value of ->s_mount_flags will be visible
 414                 * before ->s_flags update
 415                 */
 416                smp_wmb();
 417                sb->s_flags |= MS_RDONLY;
 418        } else if (test_opt(sb, ERRORS_PANIC)) {
 419                if (EXT4_SB(sb)->s_journal &&
 420                  !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
 421                        return;
 422                panic("EXT4-fs (device %s): panic forced after error\n",
 423                        sb->s_id);
 424        }
 425}
 426
 427#define ext4_error_ratelimit(sb)                                        \
 428                ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state),     \
 429                             "EXT4-fs error")
 430
 431void __ext4_error(struct super_block *sb, const char *function,
 432                  unsigned int line, const char *fmt, ...)
 433{
 434        struct va_format vaf;
 435        va_list args;
 436
 437        if (ext4_error_ratelimit(sb)) {
 438                va_start(args, fmt);
 439                vaf.fmt = fmt;
 440                vaf.va = &args;
 441                printk(KERN_CRIT
 442                       "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
 443                       sb->s_id, function, line, current->comm, &vaf);
 444                va_end(args);
 445        }
 446        save_error_info(sb, function, line);
 447        ext4_handle_error(sb);
 448}
 449
 450void __ext4_error_inode(struct inode *inode, const char *function,
 451                        unsigned int line, ext4_fsblk_t block,
 452                        const char *fmt, ...)
 453{
 454        va_list args;
 455        struct va_format vaf;
 456        struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
 457
 458        es->s_last_error_ino = cpu_to_le32(inode->i_ino);
 459        es->s_last_error_block = cpu_to_le64(block);
 460        if (ext4_error_ratelimit(inode->i_sb)) {
 461                va_start(args, fmt);
 462                vaf.fmt = fmt;
 463                vaf.va = &args;
 464                if (block)
 465                        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
 466                               "inode #%lu: block %llu: comm %s: %pV\n",
 467                               inode->i_sb->s_id, function, line, inode->i_ino,
 468                               block, current->comm, &vaf);
 469                else
 470                        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
 471                               "inode #%lu: comm %s: %pV\n",
 472                               inode->i_sb->s_id, function, line, inode->i_ino,
 473                               current->comm, &vaf);
 474                va_end(args);
 475        }
 476        save_error_info(inode->i_sb, function, line);
 477        ext4_handle_error(inode->i_sb);
 478}
 479
 480void __ext4_error_file(struct file *file, const char *function,
 481                       unsigned int line, ext4_fsblk_t block,
 482                       const char *fmt, ...)
 483{
 484        va_list args;
 485        struct va_format vaf;
 486        struct ext4_super_block *es;
 487        struct inode *inode = file_inode(file);
 488        char pathname[80], *path;
 489
 490        es = EXT4_SB(inode->i_sb)->s_es;
 491        es->s_last_error_ino = cpu_to_le32(inode->i_ino);
 492        if (ext4_error_ratelimit(inode->i_sb)) {
 493                path = d_path(&(file->f_path), pathname, sizeof(pathname));
 494                if (IS_ERR(path))
 495                        path = "(unknown)";
 496                va_start(args, fmt);
 497                vaf.fmt = fmt;
 498                vaf.va = &args;
 499                if (block)
 500                        printk(KERN_CRIT
 501                               "EXT4-fs error (device %s): %s:%d: inode #%lu: "
 502                               "block %llu: comm %s: path %s: %pV\n",
 503                               inode->i_sb->s_id, function, line, inode->i_ino,
 504                               block, current->comm, path, &vaf);
 505                else
 506                        printk(KERN_CRIT
 507                               "EXT4-fs error (device %s): %s:%d: inode #%lu: "
 508                               "comm %s: path %s: %pV\n",
 509                               inode->i_sb->s_id, function, line, inode->i_ino,
 510                               current->comm, path, &vaf);
 511                va_end(args);
 512        }
 513        save_error_info(inode->i_sb, function, line);
 514        ext4_handle_error(inode->i_sb);
 515}
 516
 517const char *ext4_decode_error(struct super_block *sb, int errno,
 518                              char nbuf[16])
 519{
 520        char *errstr = NULL;
 521
 522        switch (errno) {
 523        case -EIO:
 524                errstr = "IO failure";
 525                break;
 526        case -ENOMEM:
 527                errstr = "Out of memory";
 528                break;
 529        case -EROFS:
 530                if (!sb || (EXT4_SB(sb)->s_journal &&
 531                            EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
 532                        errstr = "Journal has aborted";
 533                else
 534                        errstr = "Readonly filesystem";
 535                break;
 536        default:
 537                /* If the caller passed in an extra buffer for unknown
 538                 * errors, textualise them now.  Else we just return
 539                 * NULL. */
 540                if (nbuf) {
 541                        /* Check for truncated error codes... */
 542                        if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
 543                                errstr = nbuf;
 544                }
 545                break;
 546        }
 547
 548        return errstr;
 549}
 550
 551/* __ext4_std_error decodes expected errors from journaling functions
 552 * automatically and invokes the appropriate error response.  */
 553
 554void __ext4_std_error(struct super_block *sb, const char *function,
 555                      unsigned int line, int errno)
 556{
 557        char nbuf[16];
 558        const char *errstr;
 559
 560        /* Special case: if the error is EROFS, and we're not already
 561         * inside a transaction, then there's really no point in logging
 562         * an error. */
 563        if (errno == -EROFS && journal_current_handle() == NULL &&
 564            (sb->s_flags & MS_RDONLY))
 565                return;
 566
 567        if (ext4_error_ratelimit(sb)) {
 568                errstr = ext4_decode_error(sb, errno, nbuf);
 569                printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
 570                       sb->s_id, function, line, errstr);
 571        }
 572
 573        save_error_info(sb, function, line);
 574        ext4_handle_error(sb);
 575}
 576
 577/*
 578 * ext4_abort is a much stronger failure handler than ext4_error.  The
 579 * abort function may be used to deal with unrecoverable failures such
 580 * as journal IO errors or ENOMEM at a critical moment in log management.
 581 *
 582 * We unconditionally force the filesystem into an ABORT|READONLY state,
 583 * unless the error response on the fs has been set to panic in which
 584 * case we take the easy way out and panic immediately.
 585 */
 586
 587void __ext4_abort(struct super_block *sb, const char *function,
 588                unsigned int line, const char *fmt, ...)
 589{
 590        va_list args;
 591
 592        save_error_info(sb, function, line);
 593        va_start(args, fmt);
 594        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id,
 595               function, line);
 596        vprintk(fmt, args);
 597        printk("\n");
 598        va_end(args);
 599
 600        if ((sb->s_flags & MS_RDONLY) == 0) {
 601                ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 602                EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
 603                /*
 604                 * Make sure updated value of ->s_mount_flags will be visible
 605                 * before ->s_flags update
 606                 */
 607                smp_wmb();
 608                sb->s_flags |= MS_RDONLY;
 609                if (EXT4_SB(sb)->s_journal)
 610                        jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 611                save_error_info(sb, function, line);
 612        }
 613        if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
 614                if (EXT4_SB(sb)->s_journal &&
 615                  !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
 616                        return;
 617                panic("EXT4-fs panic from previous error\n");
 618        }
 619}
 620
 621void __ext4_msg(struct super_block *sb,
 622                const char *prefix, const char *fmt, ...)
 623{
 624        struct va_format vaf;
 625        va_list args;
 626
 627        if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs"))
 628                return;
 629
 630        va_start(args, fmt);
 631        vaf.fmt = fmt;
 632        vaf.va = &args;
 633        printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
 634        va_end(args);
 635}
 636
 637void __ext4_warning(struct super_block *sb, const char *function,
 638                    unsigned int line, const char *fmt, ...)
 639{
 640        struct va_format vaf;
 641        va_list args;
 642
 643        if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
 644                          "EXT4-fs warning"))
 645                return;
 646
 647        va_start(args, fmt);
 648        vaf.fmt = fmt;
 649        vaf.va = &args;
 650        printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
 651               sb->s_id, function, line, &vaf);
 652        va_end(args);
 653}
 654
 655void __ext4_grp_locked_error(const char *function, unsigned int line,
 656                             struct super_block *sb, ext4_group_t grp,
 657                             unsigned long ino, ext4_fsblk_t block,
 658                             const char *fmt, ...)
 659__releases(bitlock)
 660__acquires(bitlock)
 661{
 662        struct va_format vaf;
 663        va_list args;
 664        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 665
 666        es->s_last_error_ino = cpu_to_le32(ino);
 667        es->s_last_error_block = cpu_to_le64(block);
 668        __save_error_info(sb, function, line);
 669
 670        if (ext4_error_ratelimit(sb)) {
 671                va_start(args, fmt);
 672                vaf.fmt = fmt;
 673                vaf.va = &args;
 674                printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
 675                       sb->s_id, function, line, grp);
 676                if (ino)
 677                        printk(KERN_CONT "inode %lu: ", ino);
 678                if (block)
 679                        printk(KERN_CONT "block %llu:",
 680                               (unsigned long long) block);
 681                printk(KERN_CONT "%pV\n", &vaf);
 682                va_end(args);
 683        }
 684
 685        if (test_opt(sb, ERRORS_CONT)) {
 686                ext4_commit_super(sb, 0);
 687                return;
 688        }
 689
 690        ext4_unlock_group(sb, grp);
 691        ext4_commit_super(sb, 1);
 692        ext4_handle_error(sb);
 693        /*
 694         * We only get here in the ERRORS_RO case; relocking the group
 695         * may be dangerous, but nothing bad will happen since the
 696         * filesystem will have already been marked read/only and the
 697         * journal has been aborted.  We return 1 as a hint to callers
 698         * who might what to use the return value from
 699         * ext4_grp_locked_error() to distinguish between the
 700         * ERRORS_CONT and ERRORS_RO case, and perhaps return more
 701         * aggressively from the ext4 function in question, with a
 702         * more appropriate error code.
 703         */
 704        ext4_lock_group(sb, grp);
 705        return;
 706}
 707
 708void ext4_update_dynamic_rev(struct super_block *sb)
 709{
 710        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 711
 712        if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
 713                return;
 714
 715        ext4_warning(sb,
 716                     "updating to rev %d because of new feature flag, "
 717                     "running e2fsck is recommended",
 718                     EXT4_DYNAMIC_REV);
 719
 720        es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
 721        es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
 722        es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
 723        /* leave es->s_feature_*compat flags alone */
 724        /* es->s_uuid will be set by e2fsck if empty */
 725
 726        /*
 727         * The rest of the superblock fields should be zero, and if not it
 728         * means they are likely already in use, so leave them alone.  We
 729         * can leave it up to e2fsck to clean up any inconsistencies there.
 730         */
 731}
 732
 733/*
 734 * Open the external journal device
 735 */
 736static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
 737{
 738        struct block_device *bdev;
 739        char b[BDEVNAME_SIZE];
 740
 741        bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
 742        if (IS_ERR(bdev))
 743                goto fail;
 744        return bdev;
 745
 746fail:
 747        ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
 748                        __bdevname(dev, b), PTR_ERR(bdev));
 749        return NULL;
 750}
 751
 752/*
 753 * Release the journal device
 754 */
 755static void ext4_blkdev_put(struct block_device *bdev)
 756{
 757        blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 758}
 759
 760static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
 761{
 762        struct block_device *bdev;
 763        bdev = sbi->journal_bdev;
 764        if (bdev) {
 765                ext4_blkdev_put(bdev);
 766                sbi->journal_bdev = NULL;
 767        }
 768}
 769
 770static inline struct inode *orphan_list_entry(struct list_head *l)
 771{
 772        return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
 773}
 774
 775static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
 776{
 777        struct list_head *l;
 778
 779        ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
 780                 le32_to_cpu(sbi->s_es->s_last_orphan));
 781
 782        printk(KERN_ERR "sb_info orphan list:\n");
 783        list_for_each(l, &sbi->s_orphan) {
 784                struct inode *inode = orphan_list_entry(l);
 785                printk(KERN_ERR "  "
 786                       "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
 787                       inode->i_sb->s_id, inode->i_ino, inode,
 788                       inode->i_mode, inode->i_nlink,
 789                       NEXT_ORPHAN(inode));
 790        }
 791}
 792
 793#ifdef CONFIG_QUOTA
 794static int ext4_quota_off(struct super_block *sb, int type);
 795
 796static inline void ext4_quota_off_umount(struct super_block *sb)
 797{
 798        int type;
 799
 800        /* Use our quota_off function to clear inode flags etc. */
 801        for (type = 0; type < MAXQUOTAS; type++)
 802                ext4_quota_off(sb, type);
 803}
 804#else
 805static inline void ext4_quota_off_umount(struct super_block *sb)
 806{
 807}
 808#endif
 809
 810static void ext4_put_super(struct super_block *sb)
 811{
 812        struct ext4_sb_info *sbi = EXT4_SB(sb);
 813        struct ext4_super_block *es = sbi->s_es;
 814        int aborted = 0;
 815        int i, err;
 816
 817        ext4_unregister_li_request(sb);
 818        ext4_quota_off_umount(sb);
 819
 820        flush_workqueue(sbi->rsv_conversion_wq);
 821        destroy_workqueue(sbi->rsv_conversion_wq);
 822
 823        if (sbi->s_journal) {
 824                aborted = is_journal_aborted(sbi->s_journal);
 825                err = jbd2_journal_destroy(sbi->s_journal);
 826                sbi->s_journal = NULL;
 827                if ((err < 0) && !aborted)
 828                        ext4_abort(sb, "Couldn't clean up the journal");
 829        }
 830
 831        ext4_es_unregister_shrinker(sbi);
 832        del_timer_sync(&sbi->s_err_report);
 833        ext4_release_system_zone(sb);
 834        ext4_mb_release(sb);
 835        ext4_ext_release(sb);
 836        ext4_xattr_put_super(sb);
 837
 838        if (!(sb->s_flags & MS_RDONLY) && !aborted) {
 839                EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 840                es->s_state = cpu_to_le16(sbi->s_mount_state);
 841        }
 842        if (!(sb->s_flags & MS_RDONLY))
 843                ext4_commit_super(sb, 1);
 844
 845        if (sbi->s_proc) {
 846                remove_proc_entry("options", sbi->s_proc);
 847                remove_proc_entry(sb->s_id, ext4_proc_root);
 848        }
 849        kobject_del(&sbi->s_kobj);
 850
 851        for (i = 0; i < sbi->s_gdb_count; i++)
 852                brelse(sbi->s_group_desc[i]);
 853        ext4_kvfree(sbi->s_group_desc);
 854        ext4_kvfree(sbi->s_flex_groups);
 855        percpu_counter_destroy(&sbi->s_freeclusters_counter);
 856        percpu_counter_destroy(&sbi->s_freeinodes_counter);
 857        percpu_counter_destroy(&sbi->s_dirs_counter);
 858        percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
 859        percpu_counter_destroy(&sbi->s_extent_cache_cnt);
 860#ifdef CONFIG_QUOTA
 861        for (i = 0; i < MAXQUOTAS; i++)
 862                kfree(sbi->s_qf_names[i]);
 863#endif
 864
 865        /* Debugging code just in case the in-memory inode orphan list
 866         * isn't empty.  The on-disk one can be non-empty if we've
 867         * detected an error and taken the fs readonly, but the
 868         * in-memory list had better be clean by this point. */
 869        if (!list_empty(&sbi->s_orphan))
 870                dump_orphan_list(sb, sbi);
 871        J_ASSERT(list_empty(&sbi->s_orphan));
 872
 873        sync_blockdev(sb->s_bdev);
 874        invalidate_bdev(sb->s_bdev);
 875        if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
 876                /*
 877                 * Invalidate the journal device's buffers.  We don't want them
 878                 * floating about in memory - the physical journal device may
 879                 * hotswapped, and it breaks the `ro-after' testing code.
 880                 */
 881                sync_blockdev(sbi->journal_bdev);
 882                invalidate_bdev(sbi->journal_bdev);
 883                ext4_blkdev_remove(sbi);
 884        }
 885        if (sbi->s_mmp_tsk)
 886                kthread_stop(sbi->s_mmp_tsk);
 887        brelse(sbi->s_sbh);
 888        sb->s_fs_info = NULL;
 889        /*
 890         * Now that we are completely done shutting down the
 891         * superblock, we need to actually destroy the kobject.
 892         */
 893        kobject_put(&sbi->s_kobj);
 894        wait_for_completion(&sbi->s_kobj_unregister);
 895        if (sbi->s_chksum_driver)
 896                crypto_free_shash(sbi->s_chksum_driver);
 897        kfree(sbi->s_blockgroup_lock);
 898        fs_put_dax(sbi->s_daxdev);
 899        kfree(sbi);
 900}
 901
 902static struct kmem_cache *ext4_inode_cachep;
 903
 904/*
 905 * Called inside transaction, so use GFP_NOFS
 906 */
 907static struct inode *ext4_alloc_inode(struct super_block *sb)
 908{
 909        struct ext4_inode_info *ei;
 910
 911        ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
 912        if (!ei)
 913                return NULL;
 914
 915        ei->vfs_inode.i_version = 1;
 916        spin_lock_init(&ei->i_raw_lock);
 917        INIT_LIST_HEAD(&ei->i_prealloc_list);
 918        spin_lock_init(&ei->i_prealloc_lock);
 919        ext4_es_init_tree(&ei->i_es_tree);
 920        rwlock_init(&ei->i_es_lock);
 921        INIT_LIST_HEAD(&ei->i_es_list);
 922        ei->i_es_shk_nr = 0;
 923        ei->i_es_shrink_lblk = 0;
 924        ei->i_reserved_data_blocks = 0;
 925        ei->i_reserved_meta_blocks = 0;
 926        ei->i_allocated_meta_blocks = 0;
 927        ei->i_da_metadata_calc_len = 0;
 928        ei->i_da_metadata_calc_last_lblock = 0;
 929        spin_lock_init(&(ei->i_block_reservation_lock));
 930#ifdef CONFIG_QUOTA
 931        ei->i_reserved_quota = 0;
 932#endif
 933        ei->jinode = NULL;
 934        INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
 935        spin_lock_init(&ei->i_completed_io_lock);
 936        ei->i_sync_tid = 0;
 937        ei->i_datasync_tid = 0;
 938        atomic_set(&ei->i_unwritten, 0);
 939        INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
 940
 941        return &ei->vfs_inode;
 942}
 943
 944static int ext4_drop_inode(struct inode *inode)
 945{
 946        int drop = generic_drop_inode(inode);
 947
 948        trace_ext4_drop_inode(inode, drop);
 949        return drop;
 950}
 951
 952static void ext4_i_callback(struct rcu_head *head)
 953{
 954        struct inode *inode = container_of(head, struct inode, i_rcu);
 955        kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
 956}
 957
 958static void ext4_destroy_inode(struct inode *inode)
 959{
 960        if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
 961                ext4_msg(inode->i_sb, KERN_ERR,
 962                         "Inode %lu (%p): orphan list check failed!",
 963                         inode->i_ino, EXT4_I(inode));
 964                print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
 965                                EXT4_I(inode), sizeof(struct ext4_inode_info),
 966                                true);
 967                dump_stack();
 968        }
 969        call_rcu(&inode->i_rcu, ext4_i_callback);
 970}
 971
 972static void init_once(void *foo)
 973{
 974        struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
 975
 976        INIT_LIST_HEAD(&ei->i_orphan);
 977        init_rwsem(&ei->xattr_sem);
 978        init_rwsem(&ei->i_data_sem);
 979        init_rwsem(&ei->i_mmap_sem);
 980        inode_init_once(&ei->vfs_inode);
 981}
 982
 983static int __init init_inodecache(void)
 984{
 985        ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
 986                                             sizeof(struct ext4_inode_info),
 987                                             0, (SLAB_RECLAIM_ACCOUNT|
 988                                                SLAB_MEM_SPREAD|SLAB_ACCOUNT),
 989                                             init_once);
 990        if (ext4_inode_cachep == NULL)
 991                return -ENOMEM;
 992        return 0;
 993}
 994
 995static void destroy_inodecache(void)
 996{
 997        /*
 998         * Make sure all delayed rcu free inodes are flushed before we
 999         * destroy cache.
1000         */

1001        rcu_barrier();
1002        kmem_cache_destroy(ext4_inode_cachep);
1003}
1004
1005void ext4_clear_inode(struct inode *inode)
1006{
1007        invalidate_inode_buffers(inode);
1008        clear_inode(inode);
1009        dquot_drop(inode);
1010        ext4_discard_preallocations(inode);
1011        ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
1012        if (EXT4_I(inode)->jinode) {
1013                jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
1014                                               EXT4_I(inode)->jinode);
1015                jbd2_free_inode(EXT4_I(inode)->jinode);
1016                EXT4_I(inode)->jinode = NULL;
1017        }
1018}
1019
1020static struct inode *ext4_nfs_get_inode(struct super_block *sb,
1021                                        u64 ino, u32 generation)
1022{
1023        struct inode *inode;
1024
1025        if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
1026                return ERR_PTR(-ESTALE);
1027        if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
1028                return ERR_PTR(-ESTALE);
1029
1030        /* iget isn't really right if the inode is currently unallocated!!
1031         *
1032         * ext4_read_inode will return a bad_inode if the inode had been
1033         * deleted, so we should be safe.
1034         *
1035         * Currently we don't know the generation for parent directory, so
1036         * a generation of 0 means "accept any"
1037         */
1038        inode = ext4_iget_normal(sb, ino);
1039        if (IS_ERR(inode))
1040                return ERR_CAST(inode);
1041        if (generation && inode->i_generation != generation) {
1042                iput(inode);
1043                return ERR_PTR(-ESTALE);
1044        }
1045
1046        return inode;
1047}
1048
1049static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
1050                                        int fh_len, int fh_type)
1051{
1052        return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1053                                    ext4_nfs_get_inode);
1054}
1055
1056static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
1057                                        int fh_len, int fh_type)
1058{
1059        return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1060                                    ext4_nfs_get_inode);
1061}
1062
1063/*
1064 * Try to release metadata pages (indirect blocks, directories) which are
1065 * mapped via the block device.  Since these pages could have journal heads
1066 * which would prevent try_to_free_buffers() from freeing them, we must use
1067 * jbd2 layer's try_to_free_buffers() function to release them.
1068 */
1069static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
1070                                 gfp_t wait)
1071{
1072        journal_t *journal = EXT4_SB(sb)->s_journal;
1073
1074        WARN_ON(PageChecked(page));
1075        if (!page_has_buffers(page))
1076                return 0;
1077        if (journal)
1078                return jbd2_journal_try_to_free_buffers(journal, page,
1079                                                        wait & ~__GFP_WAIT);
1080        return try_to_free_buffers(page);
1081}
1082
1083#ifdef CONFIG_QUOTA
1084#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
1085#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
1086
1087static int ext4_write_dquot(struct dquot *dquot);
1088static int ext4_acquire_dquot(struct dquot *dquot);
1089static int ext4_release_dquot(struct dquot *dquot);
1090static int ext4_mark_dquot_dirty(struct dquot *dquot);
1091static int ext4_write_info(struct super_block *sb, int type);
1092static int ext4_quota_on(struct super_block *sb, int type, int format_id,
1093                         struct path *path);
1094static int ext4_quota_on_sysfile(struct super_block *sb, int type,
1095                                 int format_id);
1096static int ext4_quota_off_sysfile(struct super_block *sb, int type);
1097static int ext4_quota_on_mount(struct super_block *sb, int type);
1098static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
1099                               size_t len, loff_t off);
1100static ssize_t ext4_quota_write(struct super_block *sb, int type,
1101                                const char *data, size_t len, loff_t off);
1102static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
1103                             unsigned int flags);
1104static int ext4_enable_quotas(struct super_block *sb);
1105
1106static const struct dquot_operations ext4_quota_operations = {
1107        .get_reserved_space = ext4_get_reserved_space,
1108        .write_dquot    = ext4_write_dquot,
1109        .acquire_dquot  = ext4_acquire_dquot,
1110        .release_dquot  = ext4_release_dquot,
1111        .mark_dirty     = ext4_mark_dquot_dirty,
1112        .write_info     = ext4_write_info,
1113        .alloc_dquot    = dquot_alloc,
1114        .destroy_dquot  = dquot_destroy,
1115};
1116
1117static const struct quotactl_ops ext4_qctl_operations = {
1118        .quota_on       = ext4_quota_on,
1119        .quota_off      = ext4_quota_off,
1120        .quota_sync     = dquot_quota_sync,
1121        .get_info       = dquot_get_dqinfo,
1122        .set_info       = dquot_set_dqinfo,
1123        .get_dqblk      = dquot_get_dqblk,
1124        .set_dqblk      = dquot_set_dqblk
1125};
1126
1127static const struct quotactl_ops ext4_qctl_sysfile_operations = {
1128        .quota_on_meta  = ext4_quota_on_sysfile,
1129        .quota_off      = ext4_quota_off_sysfile,
1130        .quota_sync     = dquot_quota_sync,
1131        .get_info       = dquot_get_dqinfo,
1132        .set_info       = dquot_set_dqinfo,
1133        .get_dqblk      = dquot_get_dqblk,
1134        .set_dqblk      = dquot_set_dqblk
1135};
1136#endif
1137
1138static const struct super_operations ext4_sops = {
1139        .alloc_inode    = ext4_alloc_inode,
1140        .destroy_inode  = ext4_destroy_inode,
1141        .write_inode    = ext4_write_inode,
1142        .dirty_inode    = ext4_dirty_inode,
1143        .drop_inode     = ext4_drop_inode,
1144        .evict_inode    = ext4_evict_inode,
1145        .put_super      = ext4_put_super,
1146        .sync_fs        = ext4_sync_fs,
1147        .freeze_fs      = ext4_freeze,
1148        .unfreeze_fs    = ext4_unfreeze,
1149        .statfs         = ext4_statfs,
1150        .remount_fs     = ext4_remount,
1151        .show_options   = ext4_show_options,
1152#ifdef CONFIG_QUOTA
1153        .quota_read     = ext4_quota_read,
1154        .quota_write    = ext4_quota_write,
1155#endif
1156        .bdev_try_to_free_page = bdev_try_to_free_page,
1157};
1158
1159static const struct super_operations ext4_nojournal_sops = {
1160        .alloc_inode    = ext4_alloc_inode,
1161        .destroy_inode  = ext4_destroy_inode,
1162        .write_inode    = ext4_write_inode,
1163        .dirty_inode    = ext4_dirty_inode,
1164        .drop_inode     = ext4_drop_inode,
1165        .evict_inode    = ext4_evict_inode,
1166        .sync_fs        = ext4_sync_fs_nojournal,
1167        .put_super      = ext4_put_super,
1168        .statfs         = ext4_statfs,
1169        .remount_fs     = ext4_remount,
1170        .show_options   = ext4_show_options,
1171#ifdef CONFIG_QUOTA
1172        .quota_read     = ext4_quota_read,
1173        .quota_write    = ext4_quota_write,
1174#endif
1175        .bdev_try_to_free_page = bdev_try_to_free_page,
1176};
1177
1178static const struct export_operations ext4_export_ops = {
1179        .fh_to_dentry = ext4_fh_to_dentry,
1180        .fh_to_parent = ext4_fh_to_parent,
1181        .get_parent = ext4_get_parent,
1182};
1183
1184enum {
1185        Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1186        Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
1187        Opt_nouid32, Opt_debug, Opt_removed,
1188        Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
1189        Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
1190        Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
1191        Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
1192        Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1193        Opt_data_err_abort, Opt_data_err_ignore,
1194        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1195        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
1196        Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
1197        Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax,
1198        Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
1199        Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1200        Opt_inode_readahead_blks, Opt_journal_ioprio,
1201        Opt_dioread_nolock, Opt_dioread_lock,
1202        Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1203        Opt_max_dir_size_kb, Opt_nojournal_checksum,
1204};
1205
1206static const match_table_t tokens = {
1207        {Opt_bsd_df, "bsddf"},
1208        {Opt_minix_df, "minixdf"},
1209        {Opt_grpid, "grpid"},
1210        {Opt_grpid, "bsdgroups"},
1211        {Opt_nogrpid, "nogrpid"},
1212        {Opt_nogrpid, "sysvgroups"},
1213        {Opt_resgid, "resgid=%u"},
1214        {Opt_resuid, "resuid=%u"},
1215        {Opt_sb, "sb=%u"},
1216        {Opt_err_cont, "errors=continue"},
1217        {Opt_err_panic, "errors=panic"},
1218        {Opt_err_ro, "errors=remount-ro"},
1219        {Opt_nouid32, "nouid32"},
1220        {Opt_debug, "debug"},
1221        {Opt_removed, "oldalloc"},
1222        {Opt_removed, "orlov"},
1223        {Opt_user_xattr, "user_xattr"},
1224        {Opt_nouser_xattr, "nouser_xattr"},
1225        {Opt_acl, "acl"},
1226        {Opt_noacl, "noacl"},
1227        {Opt_noload, "norecovery"},
1228        {Opt_noload, "noload"},
1229        {Opt_removed, "nobh"},
1230        {Opt_removed, "bh"},
1231        {Opt_commit, "commit=%u"},
1232        {Opt_min_batch_time, "min_batch_time=%u"},
1233        {Opt_max_batch_time, "max_batch_time=%u"},
1234        {Opt_journal_dev, "journal_dev=%u"},
1235        {Opt_journal_path, "journal_path=%s"},
1236        {Opt_journal_checksum, "journal_checksum"},
1237        {Opt_nojournal_checksum, "nojournal_checksum"},
1238        {Opt_journal_async_commit, "journal_async_commit"},
1239        {Opt_abort, "abort"},
1240        {Opt_data_journal, "data=journal"},
1241        {Opt_data_ordered, "data=ordered"},
1242        {Opt_data_writeback, "data=writeback"},
1243        {Opt_data_err_abort, "data_err=abort"},
1244        {Opt_data_err_ignore, "data_err=ignore"},
1245        {Opt_offusrjquota, "usrjquota="},
1246        {Opt_usrjquota, "usrjquota=%s"},
1247        {Opt_offgrpjquota, "grpjquota="},
1248        {Opt_grpjquota, "grpjquota=%s"},
1249        {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1250        {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1251        {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
1252        {Opt_grpquota, "grpquota"},
1253        {Opt_noquota, "noquota"},
1254        {Opt_quota, "quota"},
1255        {Opt_usrquota, "usrquota"},
1256        {Opt_barrier, "barrier=%u"},
1257        {Opt_barrier, "barrier"},
1258        {Opt_nobarrier, "nobarrier"},
1259        {Opt_i_version, "i_version"},
1260        {Opt_dax, "dax"},
1261        {Opt_stripe, "stripe=%u"},
1262        {Opt_delalloc, "delalloc"},
1263        {Opt_nodelalloc, "nodelalloc"},
1264        {Opt_removed, "mblk_io_submit"},
1265        {Opt_removed, "nomblk_io_submit"},
1266        {Opt_block_validity, "block_validity"},
1267        {Opt_noblock_validity, "noblock_validity"},
1268        {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1269        {Opt_journal_ioprio, "journal_ioprio=%u"},
1270        {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1271        {Opt_auto_da_alloc, "auto_da_alloc"},
1272        {Opt_noauto_da_alloc, "noauto_da_alloc"},
1273        {Opt_dioread_nolock, "dioread_nolock"},
1274        {Opt_dioread_lock, "dioread_lock"},
1275        {Opt_discard, "discard"},
1276        {Opt_nodiscard, "nodiscard"},
1277        {Opt_init_itable, "init_itable=%u"},
1278        {Opt_init_itable, "init_itable"},
1279        {Opt_noinit_itable, "noinit_itable"},
1280        {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
1281        {Opt_removed, "check=none"},    /* mount option from ext2/3 */
1282        {Opt_removed, "nocheck"},       /* mount option from ext2/3 */
1283        {Opt_removed, "reservation"},   /* mount option from ext2/3 */
1284        {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
1285        {Opt_removed, "journal=%u"},    /* mount option from ext2/3 */
1286        {Opt_err, NULL},
1287};
1288
1289static ext4_fsblk_t get_sb_block(void **data)
1290{
1291        ext4_fsblk_t    sb_block;
1292        char            *options = (char *) *data;
1293
1294        if (!options || strncmp(options, "sb=", 3) != 0)
1295                return 1;       /* Default location */
1296
1297        options += 3;
1298        /* TODO: use simple_strtoll with >32bit ext4 */
1299        sb_block = simple_strtoul(options, &options, 0);
1300        if (*options && *options != ',') {
1301                printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
1302                       (char *) *data);
1303                return 1;
1304        }
1305        if (*options == ',')
1306                options++;
1307        *data = (void *) options;
1308
1309        return sb_block;
1310}
1311
1312#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1313static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n"
1314        "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
1315
1316#ifdef CONFIG_QUOTA
1317static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
1318{
1319        struct ext4_sb_info *sbi = EXT4_SB(sb);
1320        char *qname;
1321        int ret = -1;
1322
1323        if (sb_any_quota_loaded(sb) &&
1324                !sbi->s_qf_names[qtype]) {
1325                ext4_msg(sb, KERN_ERR,
1326                        "Cannot change journaled "
1327                        "quota options when quota turned on");
1328                return -1;
1329        }
1330        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
1331                ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options "
1332                         "when QUOTA feature is enabled");
1333                return -1;
1334        }
1335        qname = match_strdup(args);
1336        if (!qname) {
1337                ext4_msg(sb, KERN_ERR,
1338                        "Not enough memory for storing quotafile name");
1339                return -1;
1340        }
1341        if (sbi->s_qf_names[qtype]) {
1342                if (strcmp(sbi->s_qf_names[qtype], qname) == 0)
1343                        ret = 1;
1344                else
1345                        ext4_msg(sb, KERN_ERR,
1346                                 "%s quota file already specified",
1347                                 QTYPE2NAME(qtype));
1348                goto errout;
1349        }
1350        if (strchr(qname, '/')) {
1351                ext4_msg(sb, KERN_ERR,
1352                        "quotafile must be on filesystem root");
1353                goto errout;
1354        }
1355        sbi->s_qf_names[qtype] = qname;
1356        set_opt(sb, QUOTA);
1357        return 1;
1358errout:
1359        kfree(qname);
1360        return ret;
1361}
1362
1363static int clear_qf_name(struct super_block *sb, int qtype)
1364{
1365
1366        struct ext4_sb_info *sbi = EXT4_SB(sb);
1367
1368        if (sb_any_quota_loaded(sb) &&
1369                sbi->s_qf_names[qtype]) {
1370                ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
1371                        " when quota turned on");
1372                return -1;
1373        }
1374        kfree(sbi->s_qf_names[qtype]);
1375        sbi->s_qf_names[qtype] = NULL;
1376        return 1;
1377}
1378#endif
1379
1380#define MOPT_SET        0x0001
1381#define MOPT_CLEAR      0x0002
1382#define MOPT_NOSUPPORT  0x0004
1383#define MOPT_EXPLICIT   0x0008
1384#define MOPT_CLEAR_ERR  0x0010
1385#define MOPT_GTE0       0x0020
1386#ifdef CONFIG_QUOTA
1387#define MOPT_Q          0
1388#define MOPT_QFMT       0x0040
1389#else
1390#define MOPT_Q          MOPT_NOSUPPORT
1391#define MOPT_QFMT       MOPT_NOSUPPORT
1392#endif
1393#define MOPT_DATAJ      0x0080
1394#define MOPT_NO_EXT2    0x0100
1395#define MOPT_NO_EXT3    0x0200
1396#define MOPT_EXT4_ONLY  (MOPT_NO_EXT2 | MOPT_NO_EXT3)
1397#define MOPT_STRING     0x0400
1398
1399static const struct mount_opts {
1400        int     token;
1401        int     mount_opt;
1402        int     flags;
1403} ext4_mount_opts[] = {
1404        {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
1405        {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
1406        {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
1407        {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
1408        {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
1409        {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
1410        {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
1411         MOPT_EXT4_ONLY | MOPT_SET},
1412        {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
1413         MOPT_EXT4_ONLY | MOPT_CLEAR},
1414        {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
1415        {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
1416        {Opt_delalloc, EXT4_MOUNT_DELALLOC,
1417         MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1418        {Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
1419         MOPT_EXT4_ONLY | MOPT_CLEAR},
1420        {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1421         MOPT_EXT4_ONLY | MOPT_CLEAR},
1422        {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1423         MOPT_EXT4_ONLY | MOPT_SET},
1424        {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
1425                                    EXT4_MOUNT_JOURNAL_CHECKSUM),
1426         MOPT_EXT4_ONLY | MOPT_SET},
1427        {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
1428        {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR},
1429        {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
1430        {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
1431        {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
1432         MOPT_NO_EXT2},
1433        {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
1434         MOPT_NO_EXT2},
1435        {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
1436        {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
1437        {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
1438        {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
1439        {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
1440        {Opt_commit, 0, MOPT_GTE0},
1441        {Opt_max_batch_time, 0, MOPT_GTE0},
1442        {Opt_min_batch_time, 0, MOPT_GTE0},
1443        {Opt_inode_readahead_blks, 0, MOPT_GTE0},
1444        {Opt_init_itable, 0, MOPT_GTE0},
1445        {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET | MOPT_EXT4_ONLY},
1446        {Opt_stripe, 0, MOPT_GTE0},
1447        {Opt_resuid, 0, MOPT_GTE0},
1448        {Opt_resgid, 0, MOPT_GTE0},
1449        {Opt_journal_dev, 0, MOPT_NO_EXT2 | MOPT_GTE0},
1450        {Opt_journal_path, 0, MOPT_NO_EXT2 | MOPT_STRING},
1451        {Opt_journal_ioprio, 0, MOPT_NO_EXT2 | MOPT_GTE0},
1452        {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1453        {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1454        {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
1455         MOPT_NO_EXT2 | MOPT_DATAJ},
1456        {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1457        {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
1458#ifdef CONFIG_EXT4_FS_POSIX_ACL
1459        {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
1460        {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
1461#else
1462        {Opt_acl, 0, MOPT_NOSUPPORT},
1463        {Opt_noacl, 0, MOPT_NOSUPPORT},
1464#endif
1465        {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
1466        {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
1467        {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
1468        {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
1469                                                        MOPT_SET | MOPT_Q},
1470        {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
1471                                                        MOPT_SET | MOPT_Q},
1472        {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
1473                       EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q},
1474        {Opt_usrjquota, 0, MOPT_Q},
1475        {Opt_grpjquota, 0, MOPT_Q},
1476        {Opt_offusrjquota, 0, MOPT_Q},
1477        {Opt_offgrpjquota, 0, MOPT_Q},
1478        {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
1479        {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
1480        {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
1481        {Opt_max_dir_size_kb, 0, MOPT_GTE0},
1482        {Opt_err, 0, 0}
1483};
1484
1485static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1486                            substring_t *args, unsigned long *journal_devnum,
1487                            unsigned int *journal_ioprio, int is_remount)
1488{
1489        struct ext4_sb_info *sbi = EXT4_SB(sb);
1490        const struct mount_opts *m;
1491        kuid_t uid;
1492        kgid_t gid;
1493        int arg = 0;
1494
1495#ifdef CONFIG_QUOTA
1496        if (token == Opt_usrjquota)
1497                return set_qf_name(sb, USRQUOTA, &args[0]);
1498        else if (token == Opt_grpjquota)
1499                return set_qf_name(sb, GRPQUOTA, &args[0]);
1500        else if (token == Opt_offusrjquota)
1501                return clear_qf_name(sb, USRQUOTA);
1502        else if (token == Opt_offgrpjquota)
1503                return clear_qf_name(sb, GRPQUOTA);
1504#endif
1505        switch (token) {
1506        case Opt_noacl:
1507        case Opt_nouser_xattr:
1508                ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5");
1509                break;
1510        case Opt_sb:
1511                return 1;       /* handled by get_sb_block() */
1512        case Opt_removed:
1513                ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
1514                return 1;
1515        case Opt_abort:
1516                sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
1517                return 1;
1518        case Opt_i_version:
1519                sb->s_flags |= MS_I_VERSION;
1520                return 1;
1521        }
1522
1523        for (m = ext4_mount_opts; m->token != Opt_err; m++)
1524                if (token == m->token)
1525                        break;
1526
1527        if (m->token == Opt_err) {
1528                ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
1529                         "or missing value", opt);
1530                return -1;
1531        }
1532
1533        if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
1534                ext4_msg(sb, KERN_ERR,
1535                         "Mount option \"%s\" incompatible with ext2", opt);
1536                return -1;
1537        }
1538        if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
1539                ext4_msg(sb, KERN_ERR,
1540                         "Mount option \"%s\" incompatible with ext3", opt);
1541                return -1;
1542        }
1543
1544        if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg))
1545                return -1;
1546        if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
1547                return -1;
1548        if (m->flags & MOPT_EXPLICIT)
1549                set_opt2(sb, EXPLICIT_DELALLOC);
1550        if (m->flags & MOPT_CLEAR_ERR)
1551                clear_opt(sb, ERRORS_MASK);
1552        if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
1553                ext4_msg(sb, KERN_ERR, "Cannot change quota "
1554                         "options when quota turned on");
1555                return -1;
1556        }
1557
1558        if (m->flags & MOPT_NOSUPPORT) {
1559                ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
1560        } else if (token == Opt_commit) {
1561                if (arg == 0)
1562                        arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
1563                sbi->s_commit_interval = HZ * arg;
1564        } else if (token == Opt_max_batch_time) {
1565                sbi->s_max_batch_time = arg;
1566        } else if (token == Opt_min_batch_time) {
1567                sbi->s_min_batch_time = arg;
1568        } else if (token == Opt_inode_readahead_blks) {
1569                if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) {
1570                        ext4_msg(sb, KERN_ERR,
1571                                 "EXT4-fs: inode_readahead_blks must be "
1572                                 "0 or a power of 2 smaller than 2^31");
1573                        return -1;
1574                }
1575                sbi->s_inode_readahead_blks = arg;
1576        } else if (token == Opt_init_itable) {
1577                set_opt(sb, INIT_INODE_TABLE);
1578                if (!args->from)
1579                        arg = EXT4_DEF_LI_WAIT_MULT;
1580                sbi->s_li_wait_mult = arg;
1581        } else if (token == Opt_max_dir_size_kb) {
1582                sbi->s_max_dir_size_kb = arg;
1583        } else if (token == Opt_stripe) {
1584                sbi->s_stripe = arg;
1585        } else if (token == Opt_resuid) {
1586                uid = make_kuid(current_user_ns(), arg);
1587                if (!uid_valid(uid)) {
1588                        ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg);
1589                        return -1;
1590                }
1591                sbi->s_resuid = uid;
1592        } else if (token == Opt_resgid) {
1593                gid = make_kgid(current_user_ns(), arg);
1594                if (!gid_valid(gid)) {
1595                        ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg);
1596                        return -1;
1597                }
1598                sbi->s_resgid = gid;
1599        } else if (token == Opt_journal_dev) {
1600                if (is_remount) {
1601                        ext4_msg(sb, KERN_ERR,
1602                                 "Cannot specify journal on remount");
1603                        return -1;
1604                }
1605                *journal_devnum = arg;
1606        } else if (token == Opt_journal_path) {
1607                char *journal_path;
1608                struct inode *journal_inode;
1609                struct path path;
1610                int error;
1611
1612                if (is_remount) {
1613                        ext4_msg(sb, KERN_ERR,
1614                                 "Cannot specify journal on remount");
1615                        return -1;
1616                }
1617                journal_path = match_strdup(&args[0]);
1618                if (!journal_path) {
1619                        ext4_msg(sb, KERN_ERR, "error: could not dup "
1620                                "journal device string");
1621                        return -1;
1622                }
1623
1624                error = kern_path(journal_path, LOOKUP_FOLLOW, &path);
1625                if (error) {
1626                        ext4_msg(sb, KERN_ERR, "error: could not find "
1627                                "journal device path: error %d", error);
1628                        kfree(journal_path);
1629                        return -1;
1630                }
1631
1632                journal_inode = path.dentry->d_inode;
1633                if (!S_ISBLK(journal_inode->i_mode)) {
1634                        ext4_msg(sb, KERN_ERR, "error: journal path %s "
1635                                "is not a block device", journal_path);
1636                        path_put(&path);
1637                        kfree(journal_path);
1638                        return -1;
1639                }
1640
1641                *journal_devnum = new_encode_dev(journal_inode->i_rdev);
1642                path_put(&path);
1643                kfree(journal_path);
1644        } else if (token == Opt_journal_ioprio) {
1645                if (arg > 7) {
1646                        ext4_msg(sb, KERN_ERR, "Invalid journal IO priority"
1647                                 " (must be 0-7)");
1648                        return -1;
1649                }
1650                *journal_ioprio =
1651                        IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
1652        } else if (m->flags & MOPT_DATAJ) {
1653                if (is_remount) {
1654                        if (!sbi->s_journal)
1655                                ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
1656                        else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) {
1657                                ext4_msg(sb, KERN_ERR,
1658                                         "Cannot change data mode on remount");
1659                                return -1;
1660                        }
1661                } else {
1662                        clear_opt(sb, DATA_FLAGS);
1663                        sbi->s_mount_opt |= m->mount_opt;
1664                }
1665#ifdef CONFIG_QUOTA
1666        } else if (m->flags & MOPT_QFMT) {
1667                if (sb_any_quota_loaded(sb) &&
1668                    sbi->s_jquota_fmt != m->mount_opt) {
1669                        ext4_msg(sb, KERN_ERR, "Cannot change journaled "
1670                                 "quota options when quota turned on");
1671                        return -1;
1672                }
1673                if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
1674                                               EXT4_FEATURE_RO_COMPAT_QUOTA)) {
1675                        ext4_msg(sb, KERN_ERR,
1676                                 "Cannot set journaled quota options "
1677                                 "when QUOTA feature is enabled");
1678                        return -1;
1679                }
1680                sbi->s_jquota_fmt = m->mount_opt;
1681#endif
1682        } else if (token == Opt_dax) {
1683#ifdef CONFIG_FS_DAX
1684                ext4_msg(sb, KERN_WARNING,
1685                "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
1686                        sbi->s_mount_opt |= m->mount_opt;
1687#else
1688                ext4_msg(sb, KERN_INFO, "dax option not supported");
1689                return -1;
1690#endif
1691        } else if (token == Opt_data_err_abort) {
1692                sbi->s_mount_opt |= m->mount_opt;
1693        } else if (token == Opt_data_err_ignore) {
1694                sbi->s_mount_opt &= ~m->mount_opt;
1695        } else {
1696                if (!args->from)
1697                        arg = 1;
1698                if (m->flags & MOPT_CLEAR)
1699                        arg = !arg;
1700                else if (unlikely(!(m->flags & MOPT_SET))) {
1701                        ext4_msg(sb, KERN_WARNING,
1702                                 "buggy handling of option %s", opt);
1703                        WARN_ON(1);
1704                        return -1;
1705                }
1706                if (arg != 0)
1707                        sbi->s_mount_opt |= m->mount_opt;
1708                else
1709                        sbi->s_mount_opt &= ~m->mount_opt;
1710        }
1711        return 1;
1712}
1713
1714static int parse_options(char *options, struct super_block *sb,
1715                         unsigned long *journal_devnum,
1716                         unsigned int *journal_ioprio,
1717                         int is_remount)
1718{
1719        struct ext4_sb_info *sbi = EXT4_SB(sb);
1720        char *p;
1721        substring_t args[MAX_OPT_ARGS];
1722        int token;
1723
1724        if (!options)
1725                return 1;
1726
1727        while ((p = strsep(&options, ",")) != NULL) {
1728                if (!*p)
1729                        continue;
1730                /*
1731                 * Initialize args struct so we know whether arg was
1732                 * found; some options take optional arguments.
1733                 */
1734                args[0].to = args[0].from = NULL;
1735                token = match_token(p, tokens, args);
1736                if (handle_mount_opt(sb, p, token, args, journal_devnum,
1737                                     journal_ioprio, is_remount) < 0)
1738                        return 0;
1739        }
1740#ifdef CONFIG_QUOTA
1741        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
1742            (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) {
1743                ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA "
1744                         "feature is enabled");
1745                return 0;
1746        }
1747        if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1748                if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
1749                        clear_opt(sb, USRQUOTA);
1750
1751                if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
1752                        clear_opt(sb, GRPQUOTA);
1753
1754                if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
1755                        ext4_msg(sb, KERN_ERR, "old and new quota "
1756                                        "format mixing");
1757                        return 0;
1758                }
1759
1760                if (!sbi->s_jquota_fmt) {
1761                        ext4_msg(sb, KERN_ERR, "journaled quota format "
1762                                        "not specified");
1763                        return 0;
1764                }
1765        }
1766#endif
1767        if (test_opt(sb, DIOREAD_NOLOCK)) {
1768                int blocksize =
1769                        BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
1770
1771                if (blocksize < PAGE_CACHE_SIZE) {
1772                        ext4_msg(sb, KERN_ERR, "can't mount with "
1773                                 "dioread_nolock if block size != PAGE_SIZE");
1774                        return 0;
1775                }
1776        }
1777        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
1778            test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
1779                ext4_msg(sb, KERN_ERR, "can't mount with journal_async_commit "
1780                         "in data=ordered mode");
1781                return 0;
1782        }
1783        return 1;
1784}
1785
1786static inline void ext4_show_quota_options(struct seq_file *seq,
1787                                           struct super_block *sb)
1788{
1789#if defined(CONFIG_QUOTA)
1790        struct ext4_sb_info *sbi = EXT4_SB(sb);
1791
1792        if (sbi->s_jquota_fmt) {
1793                char *fmtname = "";
1794
1795                switch (sbi->s_jquota_fmt) {
1796                case QFMT_VFS_OLD:
1797                        fmtname = "vfsold";
1798                        break;
1799                case QFMT_VFS_V0:
1800                        fmtname = "vfsv0";
1801                        break;
1802                case QFMT_VFS_V1:
1803                        fmtname = "vfsv1";
1804                        break;
1805                }
1806                seq_printf(seq, ",jqfmt=%s", fmtname);
1807        }
1808
1809        if (sbi->s_qf_names[USRQUOTA])
1810                seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]);
1811
1812        if (sbi->s_qf_names[GRPQUOTA])
1813                seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]);
1814#endif
1815}
1816
1817static const char *token2str(int token)
1818{
1819        const struct match_token *t;
1820
1821        for (t = tokens; t->token != Opt_err; t++)
1822                if (t->token == token && !strchr(t->pattern, '='))
1823                        break;
1824        return t->pattern;
1825}
1826
1827/*
1828 * Show an option if
1829 *  - it's set to a non-default value OR
1830 *  - if the per-sb default is different from the global default
1831 */
1832static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
1833                              int nodefs)
1834{
1835        struct ext4_sb_info *sbi = EXT4_SB(sb);
1836        struct ext4_super_block *es = sbi->s_es;
1837        int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt;
1838        const struct mount_opts *m;
1839        char sep = nodefs ? '\n' : ',';
1840
1841#define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
1842#define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
1843
1844        if (sbi->s_sb_block != 1)
1845                SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
1846
1847        for (m = ext4_mount_opts; m->token != Opt_err; m++) {
1848                int want_set = m->flags & MOPT_SET;
1849                if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
1850                    (m->flags & MOPT_CLEAR_ERR))
1851                        continue;
1852                if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
1853                        continue; /* skip if same as the default */
1854                if ((want_set &&
1855                     (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
1856                    (!want_set && (sbi->s_mount_opt & m->mount_opt)))
1857                        continue; /* select Opt_noFoo vs Opt_Foo */
1858                SEQ_OPTS_PRINT("%s", token2str(m->token));
1859        }
1860
1861        if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
1862            le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
1863                SEQ_OPTS_PRINT("resuid=%u",
1864                                from_kuid_munged(&init_user_ns, sbi->s_resuid));
1865        if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
1866            le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
1867                SEQ_OPTS_PRINT("resgid=%u",
1868                                from_kgid_munged(&init_user_ns, sbi->s_resgid));
1869        def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
1870        if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
1871                SEQ_OPTS_PUTS("errors=remount-ro");
1872        if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
1873                SEQ_OPTS_PUTS("errors=continue");
1874        if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
1875                SEQ_OPTS_PUTS("errors=panic");
1876        if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
1877                SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
1878        if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
1879                SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
1880        if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
1881                SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
1882        if (sb->s_flags & MS_I_VERSION)
1883                SEQ_OPTS_PUTS("i_version");
1884        if (nodefs || sbi->s_stripe)
1885                SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
1886        if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) {
1887                if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
1888                        SEQ_OPTS_PUTS("data=journal");
1889                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
1890                        SEQ_OPTS_PUTS("data=ordered");
1891                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
1892                        SEQ_OPTS_PUTS("data=writeback");
1893        }
1894        if (nodefs ||
1895            sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
1896                SEQ_OPTS_PRINT("inode_readahead_blks=%u",
1897                               sbi->s_inode_readahead_blks);
1898
1899        if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
1900                       (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
1901                SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
1902        if (nodefs || sbi->s_max_dir_size_kb)
1903                SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
1904        if (test_opt(sb, DATA_ERR_ABORT))
1905                SEQ_OPTS_PUTS("data_err=abort");
1906
1907        ext4_show_quota_options(seq, sb);
1908        return 0;
1909}
1910
1911static int ext4_show_options(struct seq_file *seq, struct dentry *root)
1912{
1913        return _ext4_show_options(seq, root->d_sb, 0);
1914}
1915
1916static int options_seq_show(struct seq_file *seq, void *offset)
1917{
1918        struct super_block *sb = seq->private;
1919        int rc;
1920
1921        seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw");
1922        rc = _ext4_show_options(seq, sb, 1);
1923        seq_puts(seq, "\n");
1924        return rc;
1925}
1926
1927static int options_open_fs(struct inode *inode, struct file *file)
1928{
1929        return single_open(file, options_seq_show, PDE_DATA(inode));
1930}
1931
1932static const struct file_operations ext4_seq_options_fops = {
1933        .owner = THIS_MODULE,
1934        .open = options_open_fs,
1935        .read = seq_read,
1936        .llseek = seq_lseek,
1937        .release = single_release,
1938};
1939
1940static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1941                            int read_only)
1942{
1943        struct ext4_sb_info *sbi = EXT4_SB(sb);
1944        int res = 0;
1945
1946        if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
1947                ext4_msg(sb, KERN_ERR, "revision level too high, "
1948                         "forcing read-only mode");
1949                res = MS_RDONLY;
1950        }
1951        if (read_only)
1952                goto done;
1953        if (!(sbi->s_mount_state & EXT4_VALID_FS))
1954                ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
1955                         "running e2fsck is recommended");
1956        else if (sbi->s_mount_state & EXT4_ERROR_FS)
1957                ext4_msg(sb, KERN_WARNING,
1958                         "warning: mounting fs with errors, "
1959                         "running e2fsck is recommended");
1960        else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
1961                 le16_to_cpu(es->s_mnt_count) >=
1962                 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1963                ext4_msg(sb, KERN_WARNING,
1964                         "warning: maximal mount count reached, "
1965                         "running e2fsck is recommended");
1966        else if (le32_to_cpu(es->s_checkinterval) &&
1967                (le32_to_cpu(es->s_lastcheck) +
1968                        le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1969                ext4_msg(sb, KERN_WARNING,
1970                         "warning: checktime reached, "
1971                         "running e2fsck is recommended");
1972        if (!sbi->s_journal)
1973                es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1974        if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1975                es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
1976        le16_add_cpu(&es->s_mnt_count, 1);
1977        es->s_mtime = cpu_to_le32(get_seconds());
1978        ext4_update_dynamic_rev(sb);
1979        if (sbi->s_journal)
1980                EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
1981
1982        ext4_commit_super(sb, 1);
1983done:
1984        if (test_opt(sb, DEBUG))
1985                printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1986                                "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
1987                        sb->s_blocksize,
1988                        sbi->s_groups_count,
1989                        EXT4_BLOCKS_PER_GROUP(sb),
1990                        EXT4_INODES_PER_GROUP(sb),
1991                        sbi->s_mount_opt, sbi->s_mount_opt2);
1992
1993        cleancache_init_fs(sb);
1994        return res;
1995}
1996
1997int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
1998{
1999        struct ext4_sb_info *sbi = EXT4_SB(sb);
2000        struct flex_groups *new_groups;

2001        int size;
2002
2003        if (!sbi->s_log_groups_per_flex)
2004                return 0;
2005
2006        size = ext4_flex_group(sbi, ngroup - 1) + 1;
2007        if (size <= sbi->s_flex_groups_allocated)
2008                return 0;
2009
2010        size = roundup_pow_of_two(size * sizeof(struct flex_groups));
2011        new_groups = kvzalloc(size, GFP_KERNEL);
2012        if (!new_groups) {
2013                ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups",
2014                         size / (int) sizeof(struct flex_groups));
2015                return -ENOMEM;
2016        }
2017
2018        if (sbi->s_flex_groups) {
2019                memcpy(new_groups, sbi->s_flex_groups,
2020                       (sbi->s_flex_groups_allocated *
2021                        sizeof(struct flex_groups)));
2022                ext4_kvfree(sbi->s_flex_groups);
2023        }
2024        sbi->s_flex_groups = new_groups;
2025        sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
2026        return 0;
2027}
2028
2029static int ext4_fill_flex_info(struct super_block *sb)
2030{
2031        struct ext4_sb_info *sbi = EXT4_SB(sb);
2032        struct ext4_group_desc *gdp = NULL;
2033        ext4_group_t flex_group;
2034        int i, err;
2035
2036        sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
2037        if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
2038                sbi->s_log_groups_per_flex = 0;
2039                return 1;
2040        }
2041
2042        err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
2043        if (err)
2044                goto failed;
2045
2046        for (i = 0; i < sbi->s_groups_count; i++) {
2047                gdp = ext4_get_group_desc(sb, i, NULL);
2048
2049                flex_group = ext4_flex_group(sbi, i);
2050                atomic_add(ext4_free_inodes_count(sb, gdp),
2051                           &sbi->s_flex_groups[flex_group].free_inodes);
2052                atomic64_add(ext4_free_group_clusters(sb, gdp),
2053                             &sbi->s_flex_groups[flex_group].free_clusters);
2054                atomic_add(ext4_used_dirs_count(sb, gdp),
2055                           &sbi->s_flex_groups[flex_group].used_dirs);
2056        }
2057
2058        return 1;
2059failed:
2060        return 0;
2061}
2062
2063static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
2064                                   struct ext4_group_desc *gdp)
2065{
2066        int offset;
2067        __u16 crc = 0;
2068        __le32 le_group = cpu_to_le32(block_group);
2069
2070        if (ext4_has_metadata_csum(sbi->s_sb)) {
2071                /* Use new metadata_csum algorithm */
2072                __le16 save_csum;
2073                __u32 csum32;
2074
2075                save_csum = gdp->bg_checksum;
2076                gdp->bg_checksum = 0;
2077                csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
2078                                     sizeof(le_group));
2079                csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp,
2080                                     sbi->s_desc_size);
2081                gdp->bg_checksum = save_csum;
2082
2083                crc = csum32 & 0xFFFF;
2084                goto out;
2085        }
2086
2087        /* old crc16 code */
2088        if (!(sbi->s_es->s_feature_ro_compat &
2089              cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)))
2090                return 0;
2091
2092        offset = offsetof(struct ext4_group_desc, bg_checksum);
2093
2094        crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
2095        crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
2096        crc = crc16(crc, (__u8 *)gdp, offset);
2097        offset += sizeof(gdp->bg_checksum); /* skip checksum */
2098        /* for checksum of struct ext4_group_desc do the rest...*/
2099        if ((sbi->s_es->s_feature_incompat &
2100             cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
2101            offset < le16_to_cpu(sbi->s_es->s_desc_size))
2102                crc = crc16(crc, (__u8 *)gdp + offset,
2103                            le16_to_cpu(sbi->s_es->s_desc_size) -
2104                                offset);
2105
2106out:
2107        return cpu_to_le16(crc);
2108}
2109
2110int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
2111                                struct ext4_group_desc *gdp)
2112{
2113        if (ext4_has_group_desc_csum(sb) &&
2114            (gdp->bg_checksum != ext4_group_desc_csum(EXT4_SB(sb),
2115                                                      block_group, gdp)))
2116                return 0;
2117
2118        return 1;
2119}
2120
2121void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
2122                              struct ext4_group_desc *gdp)
2123{
2124        if (!ext4_has_group_desc_csum(sb))
2125                return;
2126        gdp->bg_checksum = ext4_group_desc_csum(EXT4_SB(sb), block_group, gdp);
2127}
2128
2129/* Called at mount-time, super-block is locked */
2130static int ext4_check_descriptors(struct super_block *sb,
2131                                  ext4_fsblk_t sb_block,
2132                                  ext4_group_t *first_not_zeroed)
2133{
2134        struct ext4_sb_info *sbi = EXT4_SB(sb);
2135        ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
2136        ext4_fsblk_t last_block;
2137        ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
2138        ext4_fsblk_t block_bitmap;
2139        ext4_fsblk_t inode_bitmap;
2140        ext4_fsblk_t inode_table;
2141        int flexbg_flag = 0;
2142        ext4_group_t i, grp = sbi->s_groups_count;
2143
2144        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
2145                flexbg_flag = 1;
2146
2147        ext4_debug("Checking group descriptors");
2148
2149        for (i = 0; i < sbi->s_groups_count; i++) {
2150                struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
2151
2152                if (i == sbi->s_groups_count - 1 || flexbg_flag)
2153                        last_block = ext4_blocks_count(sbi->s_es) - 1;
2154                else
2155                        last_block = first_block +
2156                                (EXT4_BLOCKS_PER_GROUP(sb) - 1);
2157
2158                if ((grp == sbi->s_groups_count) &&
2159                   !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2160                        grp = i;
2161
2162                block_bitmap = ext4_block_bitmap(sb, gdp);
2163                if (block_bitmap == sb_block) {
2164                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2165                                 "Block bitmap for group %u overlaps "
2166                                 "superblock", i);
2167                        if (!(sb->s_flags & MS_RDONLY))
2168                                return 0;
2169                }
2170                if (block_bitmap >= sb_block + 1 &&
2171                    block_bitmap <= last_bg_block) {
2172                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2173                                 "Block bitmap for group %u overlaps "
2174                                 "block group descriptors", i);
2175                        if (!(sb->s_flags & MS_RDONLY))
2176                                return 0;
2177                }
2178                if (block_bitmap < first_block || block_bitmap > last_block) {
2179                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2180                               "Block bitmap for group %u not in group "
2181                               "(block %llu)!", i, block_bitmap);
2182                        return 0;
2183                }
2184                inode_bitmap = ext4_inode_bitmap(sb, gdp);
2185                if (inode_bitmap == sb_block) {
2186                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2187                                 "Inode bitmap for group %u overlaps "
2188                                 "superblock", i);
2189                        if (!(sb->s_flags & MS_RDONLY))
2190                                return 0;
2191                }
2192                if (inode_bitmap >= sb_block + 1 &&
2193                    inode_bitmap <= last_bg_block) {
2194                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2195                                 "Inode bitmap for group %u overlaps "
2196                                 "block group descriptors", i);
2197                        if (!(sb->s_flags & MS_RDONLY))
2198                                return 0;
2199                }
2200                if (inode_bitmap < first_block || inode_bitmap > last_block) {
2201                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2202                               "Inode bitmap for group %u not in group "
2203                               "(block %llu)!", i, inode_bitmap);
2204                        return 0;
2205                }
2206                inode_table = ext4_inode_table(sb, gdp);
2207                if (inode_table == sb_block) {
2208                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2209                                 "Inode table for group %u overlaps "
2210                                 "superblock", i);
2211                        if (!(sb->s_flags & MS_RDONLY))
2212                                return 0;
2213                }
2214                if (inode_table >= sb_block + 1 &&
2215                    inode_table <= last_bg_block) {
2216                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2217                                 "Inode table for group %u overlaps "
2218                                 "block group descriptors", i);
2219                        if (!(sb->s_flags & MS_RDONLY))
2220                                return 0;
2221                }
2222                if (inode_table < first_block ||
2223                    inode_table + sbi->s_itb_per_group - 1 > last_block) {
2224                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2225                               "Inode table for group %u not in group "
2226                               "(block %llu)!", i, inode_table);
2227                        return 0;
2228                }
2229                ext4_lock_group(sb, i);
2230                if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
2231                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2232                                 "Checksum for group %u failed (%u!=%u)",
2233                                 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
2234                                     gdp)), le16_to_cpu(gdp->bg_checksum));
2235                        if (!(sb->s_flags & MS_RDONLY)) {
2236                                ext4_unlock_group(sb, i);
2237                                return 0;
2238                        }
2239                }
2240                ext4_unlock_group(sb, i);
2241                if (!flexbg_flag)
2242                        first_block += EXT4_BLOCKS_PER_GROUP(sb);
2243        }
2244        if (NULL != first_not_zeroed)
2245                *first_not_zeroed = grp;
2246        return 1;
2247}
2248
2249/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
2250 * the superblock) which were deleted from all directories, but held open by
2251 * a process at the time of a crash.  We walk the list and try to delete these
2252 * inodes at recovery time (only with a read-write filesystem).
2253 *
2254 * In order to keep the orphan inode chain consistent during traversal (in
2255 * case of crash during recovery), we link each inode into the superblock
2256 * orphan list_head and handle it the same way as an inode deletion during
2257 * normal operation (which journals the operations for us).
2258 *
2259 * We only do an iget() and an iput() on each inode, which is very safe if we
2260 * accidentally point at an in-use or already deleted inode.  The worst that
2261 * can happen in this case is that we get a "bit already cleared" message from
2262 * ext4_free_inode().  The only reason we would point at a wrong inode is if
2263 * e2fsck was run on this filesystem, and it must have already done the orphan
2264 * inode cleanup for us, so we can safely abort without any further action.
2265 */
2266static void ext4_orphan_cleanup(struct super_block *sb,
2267                                struct ext4_super_block *es)
2268{
2269        unsigned int s_flags = sb->s_flags;
2270        int nr_orphans = 0, nr_truncates = 0;
2271#ifdef CONFIG_QUOTA
2272        int quota_update = 0;
2273        int i;
2274#endif
2275        if (!es->s_last_orphan) {
2276                jbd_debug(4, "no orphan inodes to clean up\n");
2277                return;
2278        }
2279
2280        if (bdev_read_only(sb->s_bdev)) {
2281                ext4_msg(sb, KERN_ERR, "write access "
2282                        "unavailable, skipping orphan cleanup");
2283                return;
2284        }
2285
2286        /* Check if feature set would not allow a r/w mount */
2287        if (!ext4_feature_set_ok(sb, 0)) {
2288                ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
2289                         "unknown ROCOMPAT features");
2290                return;
2291        }
2292
2293        if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2294                /* don't clear list on RO mount w/ errors */
2295                if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
2296                        ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
2297                                  "clearing orphan list.\n");
2298                        es->s_last_orphan = 0;
2299                }
2300                jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
2301                return;
2302        }
2303
2304        if (s_flags & MS_RDONLY) {
2305                ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
2306                sb->s_flags &= ~MS_RDONLY;
2307        }
2308#ifdef CONFIG_QUOTA
2309        /* Needed for iput() to work correctly and not trash data */
2310        sb->s_flags |= MS_ACTIVE;
2311
2312        /*
2313         * Turn on quotas which were not enabled for read-only mounts if
2314         * filesystem has quota feature, so that they are updated correctly.
2315         */
2316        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
2317            (s_flags & MS_RDONLY))
2318        {
2319                int ret = ext4_enable_quotas(sb);
2320
2321                if (!ret)
2322                        quota_update = 1;
2323                else
2324                        ext4_msg(sb, KERN_ERR,
2325                                "Cannot turn on quotas: error %d", ret);
2326        }
2327
2328        /* Turn on journaled quotas used for old sytle */
2329        for (i = 0; i < MAXQUOTAS; i++) {
2330                if (EXT4_SB(sb)->s_qf_names[i]) {
2331                        int ret = ext4_quota_on_mount(sb, i);
2332
2333                        if (!ret)
2334                                quota_update = 1;
2335                        else
2336                                ext4_msg(sb, KERN_ERR,
2337                                        "Cannot turn on journaled "
2338                                        "quota: type %d: error %d", i, ret);
2339                }
2340        }
2341#endif
2342
2343        while (es->s_last_orphan) {
2344                struct inode *inode;
2345
2346                /*
2347                 * We may have encountered an error during cleanup; if
2348                 * so, skip the rest.
2349                 */
2350                if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2351                        jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
2352                        es->s_last_orphan = 0;
2353                        break;
2354                }
2355
2356                inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
2357                if (IS_ERR(inode)) {
2358                        es->s_last_orphan = 0;
2359                        break;
2360                }
2361
2362                list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
2363                dquot_initialize(inode);
2364                if (inode->i_nlink) {
2365                        if (test_opt(sb, DEBUG))
2366                                ext4_msg(sb, KERN_DEBUG,
2367                                        "%s: truncating inode %lu to %lld bytes",
2368                                        __func__, inode->i_ino, inode->i_size);
2369                        jbd_debug(2, "truncating inode %lu to %lld bytes\n",
2370                                  inode->i_ino, inode->i_size);
2371                        mutex_lock(&inode->i_mutex);
2372                        truncate_inode_pages(inode->i_mapping, inode->i_size);
2373                        ext4_truncate(inode);
2374                        mutex_unlock(&inode->i_mutex);
2375                        nr_truncates++;
2376                } else {
2377                        if (test_opt(sb, DEBUG))
2378                                ext4_msg(sb, KERN_DEBUG,
2379                                        "%s: deleting unreferenced inode %lu",
2380                                        __func__, inode->i_ino);
2381                        jbd_debug(2, "deleting unreferenced inode %lu\n",
2382                                  inode->i_ino);
2383                        nr_orphans++;
2384                }
2385                iput(inode);  /* The delete magic happens here! */
2386        }
2387
2388#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
2389
2390        if (nr_orphans)
2391                ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
2392                       PLURAL(nr_orphans));
2393        if (nr_truncates)
2394                ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
2395                       PLURAL(nr_truncates));
2396#ifdef CONFIG_QUOTA
2397        /* Turn off quotas if they were enabled for orphan cleanup */
2398        if (quota_update) {
2399                for (i = 0; i < MAXQUOTAS; i++) {
2400                        if (sb_dqopt(sb)->files[i])
2401                                dquot_quota_off(sb, i);
2402                }
2403        }
2404#endif
2405        sb->s_flags = s_flags; /* Restore MS_RDONLY status */
2406}
2407
2408/*
2409 * Maximal extent format file size.
2410 * Resulting logical blkno at s_maxbytes must fit in our on-disk
2411 * extent format containers, within a sector_t, and within i_blocks
2412 * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
2413 * so that won't be a limiting factor.
2414 *
2415 * However there is other limiting factor. We do store extents in the form
2416 * of starting block and length, hence the resulting length of the extent
2417 * covering maximum file size must fit into on-disk format containers as
2418 * well. Given that length is always by 1 unit bigger than max unit (because
2419 * we count 0 as well) we have to lower the s_maxbytes by one fs block.
2420 *
2421 * Note, this does *not* consider any metadata overhead for vfs i_blocks.
2422 */
2423static loff_t ext4_max_size(int blkbits, int has_huge_files)
2424{
2425        loff_t res;
2426        loff_t upper_limit = MAX_LFS_FILESIZE;
2427
2428        /* small i_blocks in vfs inode? */
2429        if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
2430                /*
2431                 * CONFIG_LBDAF is not enabled implies the inode
2432                 * i_block represent total blocks in 512 bytes
2433                 * 32 == size of vfs inode i_blocks * 8
2434                 */
2435                upper_limit = (1LL << 32) - 1;
2436
2437                /* total blocks in file system block size */
2438                upper_limit >>= (blkbits - 9);
2439                upper_limit <<= blkbits;
2440        }
2441
2442        /*
2443         * 32-bit extent-start container, ee_block. We lower the maxbytes
2444         * by one fs block, so ee_len can cover the extent of maximum file
2445         * size
2446         */
2447        res = (1LL << 32) - 1;
2448        res <<= blkbits;
2449
2450        /* Sanity check against vm- & vfs- imposed limits */
2451        if (res > upper_limit)
2452                res = upper_limit;
2453
2454        return res;
2455}
2456
2457/*
2458 * Maximal bitmap file size.  There is a direct, and {,double-,triple-}indirect
2459 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
2460 * We need to be 1 filesystem block less than the 2^48 sector limit.
2461 */
2462static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
2463{
2464        loff_t res = EXT4_NDIR_BLOCKS;
2465        int meta_blocks;
2466        loff_t upper_limit;
2467        /* This is calculated to be the largest file size for a dense, block
2468         * mapped file such that the file's total number of 512-byte sectors,
2469         * including data and all indirect blocks, does not exceed (2^48 - 1).
2470         *
2471         * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
2472         * number of 512-byte sectors of the file.
2473         */
2474
2475        if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
2476                /*
2477                 * !has_huge_files or CONFIG_LBDAF not enabled implies that
2478                 * the inode i_block field represents total file blocks in
2479                 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
2480                 */
2481                upper_limit = (1LL << 32) - 1;
2482
2483                /* total blocks in file system block size */
2484                upper_limit >>= (bits - 9);
2485
2486        } else {
2487                /*
2488                 * We use 48 bit ext4_inode i_blocks
2489                 * With EXT4_HUGE_FILE_FL set the i_blocks
2490                 * represent total number of blocks in
2491                 * file system block size
2492                 */
2493                upper_limit = (1LL << 48) - 1;
2494
2495        }
2496
2497        /* indirect blocks */
2498        meta_blocks = 1;
2499        /* double indirect blocks */
2500        meta_blocks += 1 + (1LL << (bits-2));
2501        /* tripple indirect blocks */
2502        meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
2503
2504        upper_limit -= meta_blocks;
2505        upper_limit <<= bits;
2506
2507        res += 1LL << (bits-2);
2508        res += 1LL << (2*(bits-2));
2509        res += 1LL << (3*(bits-2));
2510        res <<= bits;
2511        if (res > upper_limit)
2512                res = upper_limit;
2513
2514        if (res > MAX_LFS_FILESIZE)
2515                res = MAX_LFS_FILESIZE;
2516
2517        return res;
2518}
2519
2520static ext4_fsblk_t descriptor_loc(struct super_block *sb,
2521                                   ext4_fsblk_t logical_sb_block, int nr)
2522{
2523        struct ext4_sb_info *sbi = EXT4_SB(sb);
2524        ext4_group_t bg, first_meta_bg;
2525        int has_super = 0;
2526
2527        first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
2528
2529        if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
2530            nr < first_meta_bg)
2531                return logical_sb_block + nr + 1;
2532        bg = sbi->s_desc_per_block * nr;
2533        if (ext4_bg_has_super(sb, bg))
2534                has_super = 1;
2535
2536        /*
2537         * If we have a meta_bg fs with 1k blocks, group 0's GDT is at
2538         * block 2, not 1.  If s_first_data_block == 0 (bigalloc is enabled
2539         * on modern mke2fs or blksize > 1k on older mke2fs) then we must
2540         * compensate.
2541         */
2542        if (sb->s_blocksize == 1024 && nr == 0 &&
2543            le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) == 0)
2544                has_super++;
2545
2546        return (has_super + ext4_group_first_block_no(sb, bg));
2547}
2548
2549/**
2550 * ext4_get_stripe_size: Get the stripe size.
2551 * @sbi: In memory super block info
2552 *
2553 * If we have specified it via mount option, then
2554 * use the mount option value. If the value specified at mount time is
2555 * greater than the blocks per group use the super block value.
2556 * If the super block value is greater than blocks per group return 0.
2557 * Allocator needs it be less than blocks per group.
2558 *
2559 */
2560static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
2561{
2562        unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
2563        unsigned long stripe_width =
2564                        le32_to_cpu(sbi->s_es->s_raid_stripe_width);
2565        int ret;
2566
2567        if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
2568                ret = sbi->s_stripe;
2569        else if (stripe_width <= sbi->s_blocks_per_group)
2570                ret = stripe_width;
2571        else if (stride <= sbi->s_blocks_per_group)
2572                ret = stride;
2573        else
2574                ret = 0;
2575
2576        /*
2577         * If the stripe width is 1, this makes no sense and
2578         * we set it to 0 to turn off stripe handling code.
2579         */
2580        if (ret <= 1)
2581                ret = 0;
2582
2583        return ret;
2584}
2585
2586/* sysfs supprt */
2587
2588struct ext4_attr {
2589        struct attribute attr;
2590        ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *);
2591        ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *,
2592                         const char *, size_t);
2593        union {
2594                int offset;
2595                int deprecated_val;
2596        } u;
2597};
2598
2599static int parse_strtoull(const char *buf,
2600                unsigned long long max, unsigned long long *value)
2601{
2602        int ret;
2603
2604        ret = kstrtoull(skip_spaces(buf), 0, value);
2605        if (!ret && *value > max)
2606                ret = -EINVAL;
2607        return ret;
2608}
2609
2610static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
2611                                              struct ext4_sb_info *sbi,
2612                                              char *buf)
2613{
2614        return snprintf(buf, PAGE_SIZE, "%llu\n",
2615                (s64) EXT4_C2B(sbi,
2616                        percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
2617}
2618
2619static ssize_t session_write_kbytes_show(struct ext4_attr *a,
2620                                         struct ext4_sb_info *sbi, char *buf)
2621{
2622        struct super_block *sb = sbi->s_buddy_cache->i_sb;
2623
2624        if (!sb->s_bdev->bd_part)
2625                return snprintf(buf, PAGE_SIZE, "0\n");
2626        return snprintf(buf, PAGE_SIZE, "%lu\n",
2627                        (part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2628                         sbi->s_sectors_written_start) >> 1);
2629}
2630
2631static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2632                                          struct ext4_sb_info *sbi, char *buf)
2633{
2634        struct super_block *sb = sbi->s_buddy_cache->i_sb;
2635
2636        if (!sb->s_bdev->bd_part)
2637                return snprintf(buf, PAGE_SIZE, "0\n");
2638        return snprintf(buf, PAGE_SIZE, "%llu\n",
2639                        (unsigned long long)(sbi->s_kbytes_written +
2640                        ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2641                          EXT4_SB(sb)->s_sectors_written_start) >> 1)));
2642}
2643
2644static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2645                                          struct ext4_sb_info *sbi,
2646                                          const char *buf, size_t count)
2647{
2648        unsigned long t;
2649        int ret;
2650
2651        ret = kstrtoul(skip_spaces(buf), 0, &t);
2652        if (ret)
2653                return ret;
2654
2655        if (t && (!is_power_of_2(t) || t > 0x40000000))
2656                return -EINVAL;
2657
2658        sbi->s_inode_readahead_blks = t;
2659        return count;
2660}
2661
2662static ssize_t sbi_ui_show(struct ext4_attr *a,
2663                           struct ext4_sb_info *sbi, char *buf)
2664{
2665        unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset);
2666
2667        return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
2668}
2669
2670static ssize_t sbi_ui_store(struct ext4_attr *a,
2671                            struct ext4_sb_info *sbi,
2672                            const char *buf, size_t count)
2673{
2674        unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset);
2675        unsigned long t;
2676        int ret;
2677
2678        ret = kstrtoul(skip_spaces(buf), 0, &t);
2679        if (ret)
2680                return ret;
2681        *ui = t;
2682        return count;
2683}
2684
2685static ssize_t es_ui_show(struct ext4_attr *a,
2686                           struct ext4_sb_info *sbi, char *buf)
2687{
2688
2689        unsigned int *ui = (unsigned int *) (((char *) sbi->s_es) +
2690                           a->u.offset);
2691
2692        return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
2693}
2694
2695static ssize_t reserved_clusters_show(struct ext4_attr *a,
2696                                  struct ext4_sb_info *sbi, char *buf)
2697{
2698        return snprintf(buf, PAGE_SIZE, "%llu\n",
2699                (unsigned long long) atomic64_read(&sbi->s_resv_clusters));
2700}
2701
2702static ssize_t reserved_clusters_store(struct ext4_attr *a,
2703                                   struct ext4_sb_info *sbi,
2704                                   const char *buf, size_t count)
2705{
2706        unsigned long long val;
2707        int ret;
2708
2709        if (parse_strtoull(buf, -1ULL, &val))
2710                return -EINVAL;
2711        ret = ext4_reserve_clusters(sbi, val);
2712
2713        return ret ? ret : count;
2714}
2715
2716static ssize_t trigger_test_error(struct ext4_attr *a,
2717                                  struct ext4_sb_info *sbi,
2718                                  const char *buf, size_t count)
2719{
2720        int len = count;
2721
2722        if (!capable(CAP_SYS_ADMIN))
2723                return -EPERM;
2724
2725        if (len && buf[len-1] == '\n')
2726                len--;
2727
2728        if (len)
2729                ext4_error(sbi->s_sb, "%.*s", len, buf);
2730        return count;
2731}
2732
2733static ssize_t sbi_deprecated_show(struct ext4_attr *a,
2734                                   struct ext4_sb_info *sbi, char *buf)
2735{
2736        return snprintf(buf, PAGE_SIZE, "%d\n", a->u.deprecated_val);
2737}
2738
2739#define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \
2740static struct ext4_attr ext4_attr_##_name = {                   \
2741        .attr = {.name = __stringify(_name), .mode = _mode },   \
2742        .show   = _show,                                        \
2743        .store  = _store,                                       \
2744        .u = {                                                  \
2745                .offset = offsetof(struct ext4_sb_info, _elname),\
2746        },                                                      \
2747}
2748
2749#define EXT4_ATTR_OFFSET_ES(_name,_mode,_show,_store,_elname)           \
2750static struct ext4_attr ext4_attr_##_name = {                           \
2751        .attr = {.name = __stringify(_name), .mode = _mode },           \
2752        .show   = _show,                                                \
2753        .store  = _store,                                               \
2754        .u = {                                                          \
2755                .offset = offsetof(struct ext4_super_block, _elname),   \
2756        },                                                              \
2757}
2758
2759#define EXT4_ATTR(name, mode, show, store) \
2760static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
2761
2762#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL)
2763#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
2764#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
2765
2766#define EXT4_RO_ATTR_ES_UI(name, elname)        \
2767        EXT4_ATTR_OFFSET_ES(name, 0444, es_ui_show, NULL, elname)
2768#define EXT4_RW_ATTR_SBI_UI(name, elname)       \
2769        EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
2770
2771#define ATTR_LIST(name) &ext4_attr_##name.attr
2772#define EXT4_DEPRECATED_ATTR(_name, _val)       \
2773static struct ext4_attr ext4_attr_##_name = {                   \
2774        .attr = {.name = __stringify(_name), .mode = 0444 },    \
2775        .show   = sbi_deprecated_show,                          \
2776        .u = {                                                  \
2777                .deprecated_val = _val,                         \
2778        },                                                      \
2779}
2780
2781EXT4_RO_ATTR(delayed_allocation_blocks);
2782EXT4_RO_ATTR(session_write_kbytes);
2783EXT4_RO_ATTR(lifetime_write_kbytes);
2784EXT4_RW_ATTR(reserved_clusters);
2785EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
2786                 inode_readahead_blks_store, s_inode_readahead_blks);
2787EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
2788EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
2789EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
2790EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
2791EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
2792EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
2793EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
2794EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128);
2795EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
2796EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error);
2797EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval);
2798EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst);
2799EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval);
2800EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
2801EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
2802EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
2803EXT4_RO_ATTR_ES_UI(errors_count, s_error_count);
2804EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time);
2805EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time);
2806
2807static struct attribute *ext4_attrs[] = {
2808        ATTR_LIST(delayed_allocation_blocks),
2809        ATTR_LIST(session_write_kbytes),
2810        ATTR_LIST(lifetime_write_kbytes),
2811        ATTR_LIST(reserved_clusters),
2812        ATTR_LIST(inode_readahead_blks),
2813        ATTR_LIST(inode_goal),
2814        ATTR_LIST(mb_stats),
2815        ATTR_LIST(mb_max_to_scan),
2816        ATTR_LIST(mb_min_to_scan),
2817        ATTR_LIST(mb_order2_req),
2818        ATTR_LIST(mb_stream_req),
2819        ATTR_LIST(mb_group_prealloc),
2820        ATTR_LIST(max_writeback_mb_bump),
2821        ATTR_LIST(extent_max_zeroout_kb),
2822        ATTR_LIST(trigger_fs_error),
2823        ATTR_LIST(err_ratelimit_interval_ms),
2824        ATTR_LIST(err_ratelimit_burst),
2825        ATTR_LIST(warning_ratelimit_interval_ms),
2826        ATTR_LIST(warning_ratelimit_burst),
2827        ATTR_LIST(msg_ratelimit_interval_ms),
2828        ATTR_LIST(msg_ratelimit_burst),
2829        ATTR_LIST(errors_count),
2830        ATTR_LIST(first_error_time),
2831        ATTR_LIST(last_error_time),
2832        NULL,
2833};
2834
2835/* Features this copy of ext4 supports */
2836EXT4_INFO_ATTR(lazy_itable_init);
2837EXT4_INFO_ATTR(batched_discard);
2838EXT4_INFO_ATTR(meta_bg_resize);
2839
2840static struct attribute *ext4_feat_attrs[] = {
2841        ATTR_LIST(lazy_itable_init),
2842        ATTR_LIST(batched_discard),
2843        ATTR_LIST(meta_bg_resize),
2844        NULL,
2845};
2846
2847static ssize_t ext4_attr_show(struct kobject *kobj,
2848                              struct attribute *attr, char *buf)
2849{
2850        struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2851                                                s_kobj);
2852        struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2853
2854        return a->show ? a->show(a, sbi, buf) : 0;
2855}
2856
2857static ssize_t ext4_attr_store(struct kobject *kobj,
2858                               struct attribute *attr,
2859                               const char *buf, size_t len)
2860{
2861        struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2862                                                s_kobj);
2863        struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2864
2865        return a->store ? a->store(a, sbi, buf, len) : 0;
2866}
2867
2868static void ext4_sb_release(struct kobject *kobj)
2869{
2870        struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2871                                                s_kobj);
2872        complete(&sbi->s_kobj_unregister);
2873}
2874
2875static const struct sysfs_ops ext4_attr_ops = {
2876        .show   = ext4_attr_show,
2877        .store  = ext4_attr_store,
2878};
2879
2880static struct kobj_type ext4_ktype = {
2881        .default_attrs  = ext4_attrs,
2882        .sysfs_ops      = &ext4_attr_ops,
2883        .release        = ext4_sb_release,
2884};
2885
2886static void ext4_feat_release(struct kobject *kobj)
2887{
2888        complete(&ext4_feat->f_kobj_unregister);
2889}
2890
2891static ssize_t ext4_feat_show(struct kobject *kobj,
2892                              struct attribute *attr, char *buf)
2893{
2894        return snprintf(buf, PAGE_SIZE, "supported\n");
2895}
2896
2897/*
2898 * We can not use ext4_attr_show/store because it relies on the kobject
2899 * being embedded in the ext4_sb_info structure which is definitely not
2900 * true in this case.
2901 */
2902static const struct sysfs_ops ext4_feat_ops = {
2903        .show   = ext4_feat_show,
2904        .store  = NULL,
2905};
2906
2907static struct kobj_type ext4_feat_ktype = {
2908        .default_attrs  = ext4_feat_attrs,
2909        .sysfs_ops      = &ext4_feat_ops,
2910        .release        = ext4_feat_release,
2911};
2912
2913/*
2914 * Check whether this filesystem can be mounted based on
2915 * the features present and the RDONLY/RDWR mount requested.
2916 * Returns 1 if this filesystem can be mounted as requested,
2917 * 0 if it cannot be.
2918 */
2919static int ext4_feature_set_ok(struct super_block *sb, int readonly)
2920{
2921        if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) {
2922                ext4_msg(sb, KERN_ERR,
2923                        "Couldn't mount because of "
2924                        "unsupported optional features (%x)",
2925                        (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
2926                        ~EXT4_FEATURE_INCOMPAT_SUPP));
2927                return 0;
2928        }
2929
2930        if (readonly)
2931                return 1;
2932
2933        /* Check that feature set is OK for a read-write mount */
2934        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) {
2935                ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
2936                         "unsupported optional features (%x)",
2937                         (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
2938                                ~EXT4_FEATURE_RO_COMPAT_SUPP));
2939                return 0;
2940        }
2941        /*
2942         * Large file size enabled file system can only be mounted
2943         * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF
2944         */
2945        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
2946                if (sizeof(blkcnt_t) < sizeof(u64)) {
2947                        ext4_msg(sb, KERN_ERR, "Filesystem with huge files "
2948                                 "cannot be mounted RDWR without "
2949                                 "CONFIG_LBDAF");
2950                        return 0;
2951                }
2952        }
2953        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) &&
2954            !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
2955                ext4_msg(sb, KERN_ERR,
2956                         "Can't support bigalloc feature without "
2957                         "extents feature\n");
2958                return 0;
2959        }
2960
2961#ifndef CONFIG_QUOTA
2962        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
2963            !readonly) {
2964                ext4_msg(sb, KERN_ERR,
2965                         "Filesystem with quota feature cannot be mounted RDWR "
2966                         "without CONFIG_QUOTA");
2967                return 0;
2968        }
2969#endif  /* CONFIG_QUOTA */
2970        return 1;
2971}
2972
2973/*
2974 * This function is called once a day if we have errors logged
2975 * on the file system
2976 */
2977static void print_daily_error_info(unsigned long arg)
2978{
2979        struct super_block *sb = (struct super_block *) arg;
2980        struct ext4_sb_info *sbi;
2981        struct ext4_super_block *es;
2982
2983        sbi = EXT4_SB(sb);
2984        es = sbi->s_es;
2985
2986        if (es->s_error_count)
2987                /* fsck newer than v1.41.13 is needed to clean this condition. */
2988                ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
2989                         le32_to_cpu(es->s_error_count));
2990        if (es->s_first_error_time) {
2991                printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d",
2992                       sb->s_id, le32_to_cpu(es->s_first_error_time),
2993                       (int) sizeof(es->s_first_error_func),
2994                       es->s_first_error_func,
2995                       le32_to_cpu(es->s_first_error_line));
2996                if (es->s_first_error_ino)
2997                        printk(": inode %u",
2998                               le32_to_cpu(es->s_first_error_ino));
2999                if (es->s_first_error_block)
3000                        printk(": block %llu", (unsigned long long)

3001                               le64_to_cpu(es->s_first_error_block));
3002                printk("\n");
3003        }
3004        if (es->s_last_error_time) {
3005                printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d",
3006                       sb->s_id, le32_to_cpu(es->s_last_error_time),
3007                       (int) sizeof(es->s_last_error_func),
3008                       es->s_last_error_func,
3009                       le32_to_cpu(es->s_last_error_line));
3010                if (es->s_last_error_ino)
3011                        printk(": inode %u",
3012                               le32_to_cpu(es->s_last_error_ino));
3013                if (es->s_last_error_block)
3014                        printk(": block %llu", (unsigned long long)
3015                               le64_to_cpu(es->s_last_error_block));
3016                printk("\n");
3017        }
3018        mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
3019}
3020
3021/* Find next suitable group and run ext4_init_inode_table */
3022static int ext4_run_li_request(struct ext4_li_request *elr)
3023{
3024        struct ext4_group_desc *gdp = NULL;
3025        ext4_group_t group, ngroups;
3026        struct super_block *sb;
3027        unsigned long timeout = 0;
3028        int ret = 0;
3029
3030        sb = elr->lr_super;
3031        ngroups = EXT4_SB(sb)->s_groups_count;
3032
3033        sb_start_write(sb);
3034        for (group = elr->lr_next_group; group < ngroups; group++) {
3035                gdp = ext4_get_group_desc(sb, group, NULL);
3036                if (!gdp) {
3037                        ret = 1;
3038                        break;
3039                }
3040
3041                if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3042                        break;
3043        }
3044
3045        if (group >= ngroups)
3046                ret = 1;
3047
3048        if (!ret) {
3049                timeout = jiffies;
3050                ret = ext4_init_inode_table(sb, group,
3051                                            elr->lr_timeout ? 0 : 1);
3052                if (elr->lr_timeout == 0) {
3053                        timeout = (jiffies - timeout) *
3054                                  elr->lr_sbi->s_li_wait_mult;
3055                        elr->lr_timeout = timeout;
3056                }
3057                elr->lr_next_sched = jiffies + elr->lr_timeout;
3058                elr->lr_next_group = group + 1;
3059        }
3060        sb_end_write(sb);
3061
3062        return ret;
3063}
3064
3065/*
3066 * Remove lr_request from the list_request and free the
3067 * request structure. Should be called with li_list_mtx held
3068 */
3069static void ext4_remove_li_request(struct ext4_li_request *elr)
3070{
3071        struct ext4_sb_info *sbi;
3072
3073        if (!elr)
3074                return;
3075
3076        sbi = elr->lr_sbi;
3077
3078        list_del(&elr->lr_request);
3079        sbi->s_li_request = NULL;
3080        kfree(elr);
3081}
3082
3083static void ext4_unregister_li_request(struct super_block *sb)
3084{
3085        mutex_lock(&ext4_li_mtx);
3086        if (!ext4_li_info) {
3087                mutex_unlock(&ext4_li_mtx);
3088                return;
3089        }
3090
3091        mutex_lock(&ext4_li_info->li_list_mtx);
3092        ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
3093        mutex_unlock(&ext4_li_info->li_list_mtx);
3094        mutex_unlock(&ext4_li_mtx);
3095}
3096
3097static struct task_struct *ext4_lazyinit_task;
3098
3099/*
3100 * This is the function where ext4lazyinit thread lives. It walks
3101 * through the request list searching for next scheduled filesystem.
3102 * When such a fs is found, run the lazy initialization request
3103 * (ext4_rn_li_request) and keep track of the time spend in this
3104 * function. Based on that time we compute next schedule time of
3105 * the request. When walking through the list is complete, compute
3106 * next waking time and put itself into sleep.
3107 */
3108static int ext4_lazyinit_thread(void *arg)
3109{
3110        struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
3111        struct list_head *pos, *n;
3112        struct ext4_li_request *elr;
3113        unsigned long next_wakeup, cur;
3114
3115        BUG_ON(NULL == eli);
3116
3117cont_thread:
3118        while (true) {
3119                next_wakeup = MAX_JIFFY_OFFSET;
3120
3121                mutex_lock(&eli->li_list_mtx);
3122                if (list_empty(&eli->li_request_list)) {
3123                        mutex_unlock(&eli->li_list_mtx);
3124                        goto exit_thread;
3125                }
3126
3127                list_for_each_safe(pos, n, &eli->li_request_list) {
3128                        elr = list_entry(pos, struct ext4_li_request,
3129                                         lr_request);
3130
3131                        if (time_after_eq(jiffies, elr->lr_next_sched)) {
3132                                if (ext4_run_li_request(elr) != 0) {
3133                                        /* error, remove the lazy_init job */
3134                                        ext4_remove_li_request(elr);
3135                                        continue;
3136                                }
3137                        }
3138
3139                        if (time_before(elr->lr_next_sched, next_wakeup))
3140                                next_wakeup = elr->lr_next_sched;
3141                }
3142                mutex_unlock(&eli->li_list_mtx);
3143
3144                try_to_freeze();
3145
3146                cur = jiffies;
3147                if ((time_after_eq(cur, next_wakeup)) ||
3148                    (MAX_JIFFY_OFFSET == next_wakeup)) {
3149                        cond_resched();
3150                        continue;
3151                }
3152
3153                schedule_timeout_interruptible(next_wakeup - cur);
3154
3155                if (kthread_should_stop()) {
3156                        ext4_clear_request_list();
3157                        goto exit_thread;
3158                }
3159        }
3160
3161exit_thread:
3162        /*
3163         * It looks like the request list is empty, but we need
3164         * to check it under the li_list_mtx lock, to prevent any
3165         * additions into it, and of course we should lock ext4_li_mtx
3166         * to atomically free the list and ext4_li_info, because at
3167         * this point another ext4 filesystem could be registering
3168         * new one.
3169         */
3170        mutex_lock(&ext4_li_mtx);
3171        mutex_lock(&eli->li_list_mtx);
3172        if (!list_empty(&eli->li_request_list)) {
3173                mutex_unlock(&eli->li_list_mtx);
3174                mutex_unlock(&ext4_li_mtx);
3175                goto cont_thread;
3176        }
3177        mutex_unlock(&eli->li_list_mtx);
3178        kfree(ext4_li_info);
3179        ext4_li_info = NULL;
3180        mutex_unlock(&ext4_li_mtx);
3181
3182        return 0;
3183}
3184
3185static void ext4_clear_request_list(void)
3186{
3187        struct list_head *pos, *n;
3188        struct ext4_li_request *elr;
3189
3190        mutex_lock(&ext4_li_info->li_list_mtx);
3191        list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
3192                elr = list_entry(pos, struct ext4_li_request,
3193                                 lr_request);
3194                ext4_remove_li_request(elr);
3195        }
3196        mutex_unlock(&ext4_li_info->li_list_mtx);
3197}
3198
3199static int ext4_run_lazyinit_thread(void)
3200{
3201        ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
3202                                         ext4_li_info, "ext4lazyinit");
3203        if (IS_ERR(ext4_lazyinit_task)) {
3204                int err = PTR_ERR(ext4_lazyinit_task);
3205                ext4_clear_request_list();
3206                kfree(ext4_li_info);
3207                ext4_li_info = NULL;
3208                printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
3209                                 "initialization thread\n",
3210                                 err);
3211                return err;
3212        }
3213        ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
3214        return 0;
3215}
3216
3217/*
3218 * Check whether it make sense to run itable init. thread or not.
3219 * If there is at least one uninitialized inode table, return
3220 * corresponding group number, else the loop goes through all
3221 * groups and return total number of groups.
3222 */
3223static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
3224{
3225        ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
3226        struct ext4_group_desc *gdp = NULL;
3227
3228        if (!ext4_has_group_desc_csum(sb))
3229                return ngroups;
3230
3231        for (group = 0; group < ngroups; group++) {
3232                gdp = ext4_get_group_desc(sb, group, NULL);
3233                if (!gdp)
3234                        continue;
3235
3236                if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3237                        break;
3238        }
3239
3240        return group;
3241}
3242
3243static int ext4_li_info_new(void)
3244{
3245        struct ext4_lazy_init *eli = NULL;
3246
3247        eli = kzalloc(sizeof(*eli), GFP_KERNEL);
3248        if (!eli)
3249                return -ENOMEM;
3250
3251        INIT_LIST_HEAD(&eli->li_request_list);
3252        mutex_init(&eli->li_list_mtx);
3253
3254        eli->li_state |= EXT4_LAZYINIT_QUIT;
3255
3256        ext4_li_info = eli;
3257
3258        return 0;
3259}
3260
3261static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
3262                                            ext4_group_t start)
3263{
3264        struct ext4_sb_info *sbi = EXT4_SB(sb);
3265        struct ext4_li_request *elr;
3266
3267        elr = kzalloc(sizeof(*elr), GFP_KERNEL);
3268        if (!elr)
3269                return NULL;
3270
3271        elr->lr_super = sb;
3272        elr->lr_sbi = sbi;
3273        elr->lr_next_group = start;
3274
3275        /*
3276         * Randomize first schedule time of the request to
3277         * spread the inode table initialization requests
3278         * better.
3279         */
3280        elr->lr_next_sched = jiffies + (prandom_u32() %
3281                                (EXT4_DEF_LI_MAX_START_DELAY * HZ));
3282        return elr;
3283}
3284
3285int ext4_register_li_request(struct super_block *sb,
3286                             ext4_group_t first_not_zeroed)
3287{
3288        struct ext4_sb_info *sbi = EXT4_SB(sb);
3289        struct ext4_li_request *elr = NULL;
3290        ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
3291        int ret = 0;
3292
3293        mutex_lock(&ext4_li_mtx);
3294        if (sbi->s_li_request != NULL) {
3295                /*
3296                 * Reset timeout so it can be computed again, because
3297                 * s_li_wait_mult might have changed.
3298                 */
3299                sbi->s_li_request->lr_timeout = 0;
3300                goto out;
3301        }
3302
3303        if (first_not_zeroed == ngroups ||
3304            (sb->s_flags & MS_RDONLY) ||
3305            !test_opt(sb, INIT_INODE_TABLE))
3306                goto out;
3307
3308        elr = ext4_li_request_new(sb, first_not_zeroed);
3309        if (!elr) {
3310                ret = -ENOMEM;
3311                goto out;
3312        }
3313
3314        if (NULL == ext4_li_info) {
3315                ret = ext4_li_info_new();
3316                if (ret)
3317                        goto out;
3318        }
3319
3320        mutex_lock(&ext4_li_info->li_list_mtx);
3321        list_add(&elr->lr_request, &ext4_li_info->li_request_list);
3322        mutex_unlock(&ext4_li_info->li_list_mtx);
3323
3324        sbi->s_li_request = elr;
3325        /*
3326         * set elr to NULL here since it has been inserted to
3327         * the request_list and the removal and free of it is
3328         * handled by ext4_clear_request_list from now on.
3329         */
3330        elr = NULL;
3331
3332        if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
3333                ret = ext4_run_lazyinit_thread();
3334                if (ret)
3335                        goto out;
3336        }
3337out:
3338        mutex_unlock(&ext4_li_mtx);
3339        if (ret)
3340                kfree(elr);
3341        return ret;
3342}
3343
3344/*
3345 * We do not need to lock anything since this is called on
3346 * module unload.
3347 */
3348static void ext4_destroy_lazyinit_thread(void)
3349{
3350        /*
3351         * If thread exited earlier
3352         * there's nothing to be done.
3353         */
3354        if (!ext4_li_info || !ext4_lazyinit_task)
3355                return;
3356
3357        kthread_stop(ext4_lazyinit_task);
3358}
3359
3360static int set_journal_csum_feature_set(struct super_block *sb)
3361{
3362        int ret = 1;
3363        int compat, incompat;
3364        struct ext4_sb_info *sbi = EXT4_SB(sb);
3365
3366        if (ext4_has_metadata_csum(sb)) {
3367                /* journal checksum v3 */
3368                compat = 0;
3369                incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
3370        } else {
3371                /* journal checksum v1 */
3372                compat = JBD2_FEATURE_COMPAT_CHECKSUM;
3373                incompat = 0;
3374        }
3375
3376        jbd2_journal_clear_features(sbi->s_journal,
3377                        JBD2_FEATURE_COMPAT_CHECKSUM, 0,
3378                        JBD2_FEATURE_INCOMPAT_CSUM_V3 |
3379                        JBD2_FEATURE_INCOMPAT_CSUM_V2);
3380        if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
3381                ret = jbd2_journal_set_features(sbi->s_journal,
3382                                compat, 0,
3383                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
3384                                incompat);
3385        } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
3386                ret = jbd2_journal_set_features(sbi->s_journal,
3387                                compat, 0,
3388                                incompat);
3389                jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3390                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3391        } else {
3392                jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3393                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3394        }
3395
3396        return ret;
3397}
3398
3399/*
3400 * Note: calculating the overhead so we can be compatible with
3401 * historical BSD practice is quite difficult in the face of
3402 * clusters/bigalloc.  This is because multiple metadata blocks from
3403 * different block group can end up in the same allocation cluster.
3404 * Calculating the exact overhead in the face of clustered allocation
3405 * requires either O(all block bitmaps) in memory or O(number of block
3406 * groups**2) in time.  We will still calculate the superblock for
3407 * older file systems --- and if we come across with a bigalloc file
3408 * system with zero in s_overhead_clusters the estimate will be close to
3409 * correct especially for very large cluster sizes --- but for newer
3410 * file systems, it's better to calculate this figure once at mkfs
3411 * time, and store it in the superblock.  If the superblock value is
3412 * present (even for non-bigalloc file systems), we will use it.
3413 */
3414static int count_overhead(struct super_block *sb, ext4_group_t grp,
3415                          char *buf)
3416{
3417        struct ext4_sb_info     *sbi = EXT4_SB(sb);
3418        struct ext4_group_desc  *gdp;
3419        ext4_fsblk_t            first_block, last_block, b;
3420        ext4_group_t            i, ngroups = ext4_get_groups_count(sb);
3421        int                     s, j, count = 0;
3422
3423        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC))
3424                return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
3425                        sbi->s_itb_per_group + 2);
3426
3427        first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
3428                (grp * EXT4_BLOCKS_PER_GROUP(sb));
3429        last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
3430        for (i = 0; i < ngroups; i++) {
3431                gdp = ext4_get_group_desc(sb, i, NULL);
3432                b = ext4_block_bitmap(sb, gdp);
3433                if (b >= first_block && b <= last_block) {
3434                        ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3435                        count++;
3436                }
3437                b = ext4_inode_bitmap(sb, gdp);
3438                if (b >= first_block && b <= last_block) {
3439                        ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3440                        count++;
3441                }
3442                b = ext4_inode_table(sb, gdp);
3443                if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
3444                        for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
3445                                int c = EXT4_B2C(sbi, b - first_block);
3446                                ext4_set_bit(c, buf);
3447                                count++;
3448                        }
3449                if (i != grp)
3450                        continue;
3451                s = 0;
3452                if (ext4_bg_has_super(sb, grp)) {
3453                        ext4_set_bit(s++, buf);
3454                        count++;
3455                }
3456                j = ext4_bg_num_gdb(sb, grp);
3457                if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
3458                        ext4_error(sb, "Invalid number of block group "
3459                                   "descriptor blocks: %d", j);
3460                        j = EXT4_BLOCKS_PER_GROUP(sb) - s;
3461                }
3462                count += j;
3463                for (; j > 0; j--)
3464                        ext4_set_bit(EXT4_B2C(sbi, s++), buf);
3465        }
3466        if (!count)
3467                return 0;
3468        return EXT4_CLUSTERS_PER_GROUP(sb) -
3469                ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
3470}
3471
3472/*
3473 * Compute the overhead and stash it in sbi->s_overhead
3474 */
3475int ext4_calculate_overhead(struct super_block *sb)
3476{
3477        struct ext4_sb_info *sbi = EXT4_SB(sb);
3478        struct ext4_super_block *es = sbi->s_es;
3479        ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3480        ext4_fsblk_t overhead = 0;
3481        char *buf = (char *) get_zeroed_page(GFP_NOFS);
3482
3483        if (!buf)
3484                return -ENOMEM;
3485
3486        /*
3487         * Compute the overhead (FS structures).  This is constant
3488         * for a given filesystem unless the number of block groups
3489         * changes so we cache the previous value until it does.
3490         */
3491
3492        /*
3493         * All of the blocks before first_data_block are overhead
3494         */
3495        overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
3496
3497        /*
3498         * Add the overhead found in each block group
3499         */
3500        for (i = 0; i < ngroups; i++) {
3501                int blks;
3502
3503                blks = count_overhead(sb, i, buf);
3504                overhead += blks;
3505                if (blks)
3506                        memset(buf, 0, PAGE_SIZE);
3507                cond_resched();
3508        }
3509        /* Add the internal journal blocks as well */
3510        if (sbi->s_journal && !sbi->journal_bdev)
3511                overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
3512
3513        sbi->s_overhead = overhead;
3514        smp_wmb();
3515        free_page((unsigned long) buf);
3516        return 0;
3517}
3518
3519
3520static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb)
3521{
3522        ext4_fsblk_t resv_clusters;
3523
3524        /*
3525         * There's no need to reserve anything when we aren't using extents.
3526         * The space estimates are exact, there are no unwritten extents,
3527         * hole punching doesn't need new metadata... This is needed especially
3528         * to keep ext2/3 backward compatibility.
3529         */
3530        if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
3531                return 0;
3532        /*
3533         * By default we reserve 2% or 4096 clusters, whichever is smaller.
3534         * This should cover the situations where we can not afford to run
3535         * out of space like for example punch hole, or converting
3536         * unwritten extents in delalloc path. In most cases such
3537         * allocation would require 1, or 2 blocks, higher numbers are
3538         * very rare.
3539         */
3540        resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >>
3541                        EXT4_SB(sb)->s_cluster_bits;
3542
3543        do_div(resv_clusters, 50);
3544        resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
3545
3546        return resv_clusters;
3547}
3548
3549
3550static int ext4_reserve_clusters(struct ext4_sb_info *sbi, ext4_fsblk_t count)
3551{
3552        ext4_fsblk_t clusters = ext4_blocks_count(sbi->s_es) >>
3553                                sbi->s_cluster_bits;
3554
3555        if (count >= clusters)
3556                return -EINVAL;
3557
3558        atomic64_set(&sbi->s_resv_clusters, count);
3559        return 0;
3560}
3561
3562static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3563{
3564        struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
3565        char *orig_data = kstrdup(data, GFP_KERNEL);
3566        struct buffer_head *bh;
3567        struct ext4_super_block *es = NULL;
3568        struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
3569        ext4_fsblk_t block;
3570        ext4_fsblk_t sb_block = get_sb_block(&data);
3571        ext4_fsblk_t logical_sb_block;
3572        unsigned long offset = 0;
3573        unsigned long journal_devnum = 0;
3574        unsigned long def_mount_opts;
3575        struct inode *root;
3576        char *cp;
3577        const char *descr;
3578        int ret = -ENOMEM;
3579        int blocksize, clustersize;
3580        unsigned int db_count;
3581        unsigned int i;
3582        int needs_recovery, has_huge_files, has_bigalloc;
3583        __u64 blocks_count;
3584        int err = 0;
3585        unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
3586        ext4_group_t first_not_zeroed;
3587
3588        if ((data && !orig_data) || !sbi)
3589                goto out_free_base;
3590
3591        sbi->s_daxdev = dax_dev;
3592        sbi->s_blockgroup_lock =
3593                kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
3594        if (!sbi->s_blockgroup_lock)
3595                goto out_free_base;
3596
3597        sb->s_fs_info = sbi;
3598        sbi->s_sb = sb;
3599        sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
3600        sbi->s_sb_block = sb_block;
3601        if (sb->s_bdev->bd_part)
3602                sbi->s_sectors_written_start =
3603                        part_stat_read(sb->s_bdev->bd_part, sectors[1]);
3604
3605        /* Cleanup superblock name */
3606        for (cp = sb->s_id; (cp = strchr(cp, '/'));)
3607                *cp = '!';
3608
3609        /* -EINVAL is default */
3610        ret = -EINVAL;
3611        blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
3612        if (!blocksize) {
3613                ext4_msg(sb, KERN_ERR, "unable to set blocksize");
3614                goto out_fail;
3615        }
3616
3617        /*
3618         * The ext4 superblock will not be buffer aligned for other than 1kB
3619         * block sizes.  We need to calculate the offset from buffer start.
3620         */
3621        if (blocksize != EXT4_MIN_BLOCK_SIZE) {
3622                logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
3623                offset = do_div(logical_sb_block, blocksize);
3624        } else {
3625                logical_sb_block = sb_block;
3626        }
3627
3628        if (!(bh = sb_bread(sb, logical_sb_block))) {
3629                ext4_msg(sb, KERN_ERR, "unable to read superblock");
3630                goto out_fail;
3631        }
3632        /*
3633         * Note: s_es must be initialized as soon as possible because
3634         *       some ext4 macro-instructions depend on its value
3635         */
3636        es = (struct ext4_super_block *) (bh->b_data + offset);
3637        sbi->s_es = es;
3638        sb->s_magic = le16_to_cpu(es->s_magic);
3639        if (sb->s_magic != EXT4_SUPER_MAGIC)
3640                goto cantfind_ext4;
3641        sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
3642
3643        /* Warn if metadata_csum and gdt_csum are both set. */
3644        if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3645                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
3646            EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
3647                ext4_warning(sb, "metadata_csum and uninit_bg are "
3648                             "redundant flags; please run fsck.");
3649
3650        /* Check for a known checksum algorithm */
3651        if (!ext4_verify_csum_type(sb, es)) {
3652                ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3653                         "unknown checksum algorithm.");
3654                silent = 1;
3655                goto cantfind_ext4;
3656        }
3657
3658        /* Load the checksum driver */
3659        if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3660                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
3661                sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
3662                if (IS_ERR(sbi->s_chksum_driver)) {
3663                        ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
3664                        ret = PTR_ERR(sbi->s_chksum_driver);
3665                        sbi->s_chksum_driver = NULL;
3666                        goto failed_mount;
3667                }
3668        }
3669
3670        /* Check superblock checksum */
3671        if (!ext4_superblock_csum_verify(sb, es)) {
3672                ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3673                         "invalid superblock checksum.  Run e2fsck?");
3674                silent = 1;
3675                goto cantfind_ext4;
3676        }
3677
3678        /* Precompute checksum seed for all metadata */
3679        if (ext4_has_metadata_csum(sb))
3680                sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
3681                                               sizeof(es->s_uuid));
3682
3683        /* Set defaults before we parse the mount options */
3684        def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
3685        set_opt(sb, INIT_INODE_TABLE);
3686        if (def_mount_opts & EXT4_DEFM_DEBUG)
3687                set_opt(sb, DEBUG);
3688        if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
3689                set_opt(sb, GRPID);
3690        if (def_mount_opts & EXT4_DEFM_UID16)
3691                set_opt(sb, NO_UID32);
3692        /* xattr user namespace & acls are now defaulted on */
3693        set_opt(sb, XATTR_USER);
3694#ifdef CONFIG_EXT4_FS_POSIX_ACL
3695        set_opt(sb, POSIX_ACL);
3696#endif
3697        /* don't forget to enable journal_csum when metadata_csum is enabled. */
3698        if (ext4_has_metadata_csum(sb))
3699                set_opt(sb, JOURNAL_CHECKSUM);
3700
3701        if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
3702                set_opt(sb, JOURNAL_DATA);
3703        else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
3704                set_opt(sb, ORDERED_DATA);
3705        else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
3706                set_opt(sb, WRITEBACK_DATA);
3707
3708        if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
3709                set_opt(sb, ERRORS_PANIC);
3710        else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
3711                set_opt(sb, ERRORS_CONT);
3712        else
3713                set_opt(sb, ERRORS_RO);
3714        if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)
3715                set_opt(sb, BLOCK_VALIDITY);
3716        if (def_mount_opts & EXT4_DEFM_DISCARD)
3717                set_opt(sb, DISCARD);
3718
3719        sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
3720        sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
3721        sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
3722        sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
3723        sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
3724
3725        if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
3726                set_opt(sb, BARRIER);
3727
3728        /*
3729         * enable delayed allocation by default
3730         * Use -o nodelalloc to turn it off
3731         */
3732        if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
3733            ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
3734                set_opt(sb, DELALLOC);
3735
3736        /*
3737         * set default s_li_wait_mult for lazyinit, for the case there is
3738         * no mount option specified.
3739         */
3740        sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
3741
3742        blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
3743        if (blocksize < EXT4_MIN_BLOCK_SIZE ||
3744            blocksize > EXT4_MAX_BLOCK_SIZE) {
3745                ext4_msg(sb, KERN_ERR,
3746                       "Unsupported filesystem blocksize %d (%d log_block_size)",
3747                         blocksize, le32_to_cpu(es->s_log_block_size));
3748                goto failed_mount;
3749        }
3750
3751        if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
3752                sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
3753                sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
3754        } else {
3755                sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
3756                sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
3757                if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
3758                        ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
3759                                 sbi->s_first_ino);
3760                        goto failed_mount;
3761                }
3762                if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
3763                    (!is_power_of_2(sbi->s_inode_size)) ||
3764                    (sbi->s_inode_size > blocksize)) {
3765                        ext4_msg(sb, KERN_ERR,
3766                               "unsupported inode size: %d",
3767                               sbi->s_inode_size);
3768                        ext4_msg(sb, KERN_ERR, "blocksize: %d", blocksize);
3769                        goto failed_mount;
3770                }
3771                if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
3772                        sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
3773        }
3774        if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
3775                sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
3776                        EXT4_GOOD_OLD_INODE_SIZE;
3777                if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3778                                       EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
3779                        unsigned v, max = (sbi->s_inode_size -
3780                                           EXT4_GOOD_OLD_INODE_SIZE);
3781
3782                        v = le16_to_cpu(es->s_want_extra_isize);
3783                        if (v > max) {
3784                                ext4_msg(sb, KERN_ERR,
3785                                         "bad s_want_extra_isize: %d", v);
3786                                goto failed_mount;
3787                        }
3788                        if (sbi->s_want_extra_isize < v)
3789                                sbi->s_want_extra_isize = v;
3790
3791                        v = le16_to_cpu(es->s_min_extra_isize);
3792                        if (v > max) {
3793                                ext4_msg(sb, KERN_ERR,
3794                                         "bad s_min_extra_isize: %d", v);
3795                                goto failed_mount;
3796                        }
3797                        if (sbi->s_want_extra_isize < v)
3798                                sbi->s_want_extra_isize = v;
3799                }
3800        }
3801
3802        if (sbi->s_es->s_mount_opts[0]) {
3803                char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
3804                                              sizeof(sbi->s_es->s_mount_opts),
3805                                              GFP_KERNEL);
3806                if (!s_mount_opts)
3807                        goto failed_mount;
3808                if (!parse_options(s_mount_opts, sb, &journal_devnum,
3809                                   &journal_ioprio, 0)) {
3810                        ext4_msg(sb, KERN_WARNING,
3811                                 "failed to parse options in superblock: %s",
3812                                 s_mount_opts);
3813                }
3814                kfree(s_mount_opts);
3815        }
3816        sbi->s_def_mount_opt = sbi->s_mount_opt;
3817        if (!parse_options((char *) data, sb, &journal_devnum,
3818                           &journal_ioprio, 0))
3819                goto failed_mount;
3820
3821        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
3822                printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
3823                            "with data=journal disables delayed "
3824                            "allocation and O_DIRECT support!\n");
3825                if (test_opt2(sb, EXPLICIT_DELALLOC)) {
3826                        ext4_msg(sb, KERN_ERR, "can't mount with "
3827                                 "both data=journal and delalloc");
3828                        goto failed_mount;
3829                }
3830                if (test_opt(sb, DIOREAD_NOLOCK)) {
3831                        ext4_msg(sb, KERN_ERR, "can't mount with "
3832                                 "both data=journal and dioread_nolock");
3833                        goto failed_mount;
3834                }
3835                if (test_opt(sb, DAX)) {
3836                        ext4_msg(sb, KERN_ERR, "can't mount with "
3837                                 "both data=journal and dax");
3838                        goto failed_mount;
3839                }
3840                if (test_opt(sb, DELALLOC))
3841                        clear_opt(sb, DELALLOC);
3842        }
3843
3844        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3845                (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
3846
3847        if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
3848            (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
3849             EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
3850             EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
3851                ext4_msg(sb, KERN_WARNING,
3852                       "feature flags set on rev 0 fs, "
3853                       "running e2fsck is recommended");
3854
3855        if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
3856                set_opt2(sb, HURD_COMPAT);
3857                if (EXT4_HAS_INCOMPAT_FEATURE(sb,
3858                                              EXT4_FEATURE_INCOMPAT_64BIT)) {
3859                        ext4_msg(sb, KERN_ERR,
3860                                 "The Hurd can't support 64-bit file systems");
3861                        goto failed_mount;
3862                }
3863        }
3864
3865        if (IS_EXT2_SB(sb)) {
3866                if (ext2_feature_set_ok(sb))
3867                        ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
3868                                 "using the ext4 subsystem");
3869                else {
3870                        ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
3871                                 "to feature incompatibilities");
3872                        goto failed_mount;
3873                }
3874        }
3875
3876        if (IS_EXT3_SB(sb)) {
3877                if (ext3_feature_set_ok(sb))
3878                        ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
3879                                 "using the ext4 subsystem");
3880                else {
3881                        ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
3882                                 "to feature incompatibilities");
3883                        goto failed_mount;
3884                }
3885        }
3886
3887        /*
3888         * Check feature flags regardless of the revision level, since we
3889         * previously didn't change the revision level when setting the flags,
3890         * so there is a chance incompat flags are set on a rev 0 filesystem.
3891         */
3892        if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
3893                goto failed_mount;
3894
3895        if (le32_to_cpu(es->s_log_block_size) >
3896            (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
3897                ext4_msg(sb, KERN_ERR,
3898                         "Invalid log block size: %u",
3899                         le32_to_cpu(es->s_log_block_size));
3900                goto failed_mount;
3901        }
3902        if (le32_to_cpu(es->s_log_cluster_size) >
3903            (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
3904                ext4_msg(sb, KERN_ERR,
3905                         "Invalid log cluster size: %u",
3906                         le32_to_cpu(es->s_log_cluster_size));
3907                goto failed_mount;
3908        }
3909
3910        if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
3911                ext4_msg(sb, KERN_ERR,
3912                         "Number of reserved GDT blocks insanely large: %d",
3913                         le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
3914                goto failed_mount;
3915        }
3916
3917        if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
3918                static bool printed = false;
3919
3920                if (EXT4_HAS_INCOMPAT_FEATURE(sb,
3921                                        EXT4_FEATURE_INCOMPAT_INLINE_DATA)) {
3922                        ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
3923                                        " that may contain inline data");
3924                        sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
3925                }
3926                if (!bdev_dax_supported(sb->s_bdev, blocksize)) {
3927                        ext4_msg(sb, KERN_ERR,
3928                                "DAX unsupported by block device. Turning off DAX.");
3929                        sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
3930                }
3931                if (!printed) {
3932                        mark_tech_preview("ext4 direct access (dax)", NULL);
3933                        printed = true;
3934                }
3935        }
3936
3937        if (sb->s_blocksize != blocksize) {
3938                /* Validate the filesystem blocksize */
3939                if (!sb_set_blocksize(sb, blocksize)) {
3940                        ext4_msg(sb, KERN_ERR, "bad block size %d",
3941                                        blocksize);
3942                        goto failed_mount;
3943                }
3944
3945                brelse(bh);
3946                logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
3947                offset = do_div(logical_sb_block, blocksize);
3948                bh = sb_bread(sb, logical_sb_block);
3949                if (!bh) {
3950                        ext4_msg(sb, KERN_ERR,
3951                               "Can't read superblock on 2nd try");
3952                        goto failed_mount;
3953                }
3954                es = (struct ext4_super_block *)(bh->b_data + offset);
3955                sbi->s_es = es;
3956                if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
3957                        ext4_msg(sb, KERN_ERR,
3958                               "Magic mismatch, very weird!");
3959                        goto failed_mount;
3960                }
3961        }
3962
3963        has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3964                                EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
3965        sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
3966                                                      has_huge_files);
3967        sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
3968
3969        sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
3970        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
3971                if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
3972                    sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
3973                    !is_power_of_2(sbi->s_desc_size)) {
3974                        ext4_msg(sb, KERN_ERR,
3975                               "unsupported descriptor size %lu",
3976                               sbi->s_desc_size);
3977                        goto failed_mount;
3978                }
3979        } else
3980                sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
3981
3982        sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
3983        sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
3984
3985        sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
3986        if (sbi->s_inodes_per_block == 0)
3987                goto cantfind_ext4;
3988        if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
3989            sbi->s_inodes_per_group > blocksize * 8) {
3990                ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
3991                         sbi->s_blocks_per_group);
3992                goto failed_mount;
3993        }
3994        sbi->s_itb_per_group = sbi->s_inodes_per_group /
3995                                        sbi->s_inodes_per_block;
3996        sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
3997        sbi->s_sbh = bh;
3998        sbi->s_mount_state = le16_to_cpu(es->s_state);
3999        sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
4000        sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));

4001
4002        for (i = 0; i < 4; i++)
4003                sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
4004        sbi->s_def_hash_version = es->s_def_hash_version;
4005        if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) {
4006                i = le32_to_cpu(es->s_flags);
4007                if (i & EXT2_FLAGS_UNSIGNED_HASH)
4008                        sbi->s_hash_unsigned = 3;
4009                else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
4010#ifdef __CHAR_UNSIGNED__
4011                        if (!(sb->s_flags & MS_RDONLY))
4012                                es->s_flags |=
4013                                        cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
4014                        sbi->s_hash_unsigned = 3;
4015#else
4016                        if (!(sb->s_flags & MS_RDONLY))
4017                                es->s_flags |=
4018                                        cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
4019#endif
4020                }
4021        }
4022
4023        /* Handle clustersize */
4024        clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
4025        has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb,
4026                                EXT4_FEATURE_RO_COMPAT_BIGALLOC);
4027        if (has_bigalloc) {
4028                if (clustersize < blocksize) {
4029                        ext4_msg(sb, KERN_ERR,
4030                                 "cluster size (%d) smaller than "
4031                                 "block size (%d)", clustersize, blocksize);
4032                        goto failed_mount;
4033                }
4034                sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
4035                        le32_to_cpu(es->s_log_block_size);
4036                sbi->s_clusters_per_group =
4037                        le32_to_cpu(es->s_clusters_per_group);
4038                if (sbi->s_clusters_per_group > blocksize * 8) {
4039                        ext4_msg(sb, KERN_ERR,
4040                                 "#clusters per group too big: %lu",
4041                                 sbi->s_clusters_per_group);
4042                        goto failed_mount;
4043                }
4044                if (sbi->s_blocks_per_group !=
4045                    (sbi->s_clusters_per_group * (clustersize / blocksize))) {
4046                        ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
4047                                 "clusters per group (%lu) inconsistent",
4048                                 sbi->s_blocks_per_group,
4049                                 sbi->s_clusters_per_group);
4050                        goto failed_mount;
4051                }
4052        } else {
4053                if (clustersize != blocksize) {
4054                        ext4_msg(sb, KERN_ERR,
4055                                 "fragment/cluster size (%d) != "
4056                                 "block size (%d)", clustersize, blocksize);
4057                        goto failed_mount;
4058                }
4059                if (sbi->s_blocks_per_group > blocksize * 8) {
4060                        ext4_msg(sb, KERN_ERR,
4061                                 "#blocks per group too big: %lu",
4062                                 sbi->s_blocks_per_group);
4063                        goto failed_mount;
4064                }
4065                sbi->s_clusters_per_group = sbi->s_blocks_per_group;
4066                sbi->s_cluster_bits = 0;
4067        }
4068        sbi->s_cluster_ratio = clustersize / blocksize;
4069
4070        /* Do we have standard group size of clustersize * 8 blocks ? */
4071        if (sbi->s_blocks_per_group == clustersize << 3)
4072                set_opt2(sb, STD_GROUP_SIZE);
4073
4074        /*
4075         * Test whether we have more sectors than will fit in sector_t,
4076         * and whether the max offset is addressable by the page cache.
4077         */
4078        err = generic_check_addressable(sb->s_blocksize_bits,
4079                                        ext4_blocks_count(es));
4080        if (err) {
4081                ext4_msg(sb, KERN_ERR, "filesystem"
4082                         " too large to mount safely on this system");
4083                if (sizeof(sector_t) < 8)
4084                        ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
4085                goto failed_mount;
4086        }
4087
4088        if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
4089                goto cantfind_ext4;
4090
4091        /* check blocks count against device size */
4092        blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
4093        if (blocks_count && ext4_blocks_count(es) > blocks_count) {
4094                ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
4095                       "exceeds size of device (%llu blocks)",
4096                       ext4_blocks_count(es), blocks_count);
4097                goto failed_mount;
4098        }
4099
4100        /*
4101         * It makes no sense for the first data block to be beyond the end
4102         * of the filesystem.
4103         */
4104        if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
4105                ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4106                         "block %u is beyond end of filesystem (%llu)",
4107                         le32_to_cpu(es->s_first_data_block),
4108                         ext4_blocks_count(es));
4109                goto failed_mount;
4110        }
4111        if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
4112            (sbi->s_cluster_ratio == 1)) {
4113                ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4114                         "block is 0 with a 1k block and cluster size");
4115                goto failed_mount;
4116        }
4117
4118        blocks_count = (ext4_blocks_count(es) -
4119                        le32_to_cpu(es->s_first_data_block) +
4120                        EXT4_BLOCKS_PER_GROUP(sb) - 1);
4121        do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
4122        if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
4123                ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
4124                       "(block count %llu, first data block %u, "
4125                       "blocks per group %lu)", sbi->s_groups_count,
4126                       ext4_blocks_count(es),
4127                       le32_to_cpu(es->s_first_data_block),
4128                       EXT4_BLOCKS_PER_GROUP(sb));
4129                goto failed_mount;
4130        }
4131        sbi->s_groups_count = blocks_count;
4132        sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
4133                        (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
4134        if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
4135            le32_to_cpu(es->s_inodes_count)) {
4136                ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
4137                         le32_to_cpu(es->s_inodes_count),
4138                         ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
4139                ret = -EINVAL;
4140                goto failed_mount;
4141        }
4142        db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
4143                   EXT4_DESC_PER_BLOCK(sb);
4144        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG)) {
4145                if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
4146                        ext4_msg(sb, KERN_WARNING,
4147                                 "first meta block group too large: %u "
4148                                 "(group descriptor block count %u)",
4149                                 le32_to_cpu(es->s_first_meta_bg), db_count);
4150                        goto failed_mount;
4151                }
4152        }
4153        sbi->s_group_desc = kvmalloc(db_count *
4154                                          sizeof(struct buffer_head *),
4155                                          GFP_KERNEL);
4156        if (sbi->s_group_desc == NULL) {
4157                ext4_msg(sb, KERN_ERR, "not enough memory");
4158                ret = -ENOMEM;
4159                goto failed_mount;
4160        }
4161
4162        if (ext4_proc_root)
4163                sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
4164
4165        if (sbi->s_proc)
4166                proc_create_data("options", S_IRUGO, sbi->s_proc,
4167                                 &ext4_seq_options_fops, sb);
4168
4169        bgl_lock_init(sbi->s_blockgroup_lock);
4170
4171        for (i = 0; i < db_count; i++) {
4172                block = descriptor_loc(sb, logical_sb_block, i);
4173                sbi->s_group_desc[i] = sb_bread(sb, block);
4174                if (!sbi->s_group_desc[i]) {
4175                        ext4_msg(sb, KERN_ERR,
4176                               "can't read group descriptor %d", i);
4177                        db_count = i;
4178                        goto failed_mount2;
4179                }
4180        }
4181        sbi->s_gdb_count = db_count;
4182        if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
4183                ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
4184                goto failed_mount2;
4185        }
4186
4187        get_random_bytes(&sbi->s_next_generation, sizeof(u32));
4188        spin_lock_init(&sbi->s_next_gen_lock);
4189
4190        init_timer(&sbi->s_err_report);
4191        sbi->s_err_report.function = print_daily_error_info;
4192        sbi->s_err_report.data = (unsigned long) sb;
4193
4194        /* Register extent status tree shrinker */
4195        ext4_es_register_shrinker(sbi);
4196
4197        err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0, GFP_KERNEL);
4198        if (err) {
4199                ext4_msg(sb, KERN_ERR, "insufficient memory");
4200                goto failed_mount3;
4201        }
4202
4203        sbi->s_stripe = ext4_get_stripe_size(sbi);
4204        sbi->s_extent_max_zeroout_kb = 32;
4205
4206        /*
4207         * set up enough so that it can read an inode
4208         */
4209        if (!test_opt(sb, NOLOAD) &&
4210            EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
4211                sb->s_op = &ext4_sops;
4212        else
4213                sb->s_op = &ext4_nojournal_sops;
4214        sb->s_export_op = &ext4_export_ops;
4215        sb->s_xattr = ext4_xattr_handlers;
4216#ifdef CONFIG_QUOTA
4217        sb->dq_op = &ext4_quota_operations;
4218        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
4219                sb->s_qcop = &ext4_qctl_sysfile_operations;
4220        else
4221                sb->s_qcop = &ext4_qctl_operations;
4222#endif
4223        memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
4224
4225        INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
4226        mutex_init(&sbi->s_orphan_lock);
4227
4228        sb->s_root = NULL;
4229
4230        needs_recovery = (es->s_last_orphan != 0 ||
4231                          EXT4_HAS_INCOMPAT_FEATURE(sb,
4232                                    EXT4_FEATURE_INCOMPAT_RECOVER));
4233
4234        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) &&
4235            !(sb->s_flags & MS_RDONLY))
4236                if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
4237                        goto failed_mount3;
4238
4239        /*
4240         * The first inode we look at is the journal inode.  Don't try
4241         * root first: it may be modified in the journal!
4242         */
4243        if (!test_opt(sb, NOLOAD) &&
4244            EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
4245                err = ext4_load_journal(sb, es, journal_devnum);
4246                if (err)
4247                        goto failed_mount3;
4248        } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
4249              EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
4250                ext4_msg(sb, KERN_ERR, "required journal recovery "
4251                       "suppressed and not mounted read-only");
4252                goto failed_mount_wq;
4253        } else {
4254                clear_opt(sb, DATA_FLAGS);
4255                sbi->s_journal = NULL;
4256                needs_recovery = 0;
4257                goto no_journal;
4258        }
4259
4260        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT) &&
4261            !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4262                                       JBD2_FEATURE_INCOMPAT_64BIT)) {
4263                ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
4264                goto failed_mount_wq;
4265        }
4266
4267        if (!set_journal_csum_feature_set(sb)) {
4268                ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
4269                         "feature set");
4270                goto failed_mount_wq;
4271        }
4272
4273        /* We have now updated the journal if required, so we can
4274         * validate the data journaling mode. */
4275        switch (test_opt(sb, DATA_FLAGS)) {
4276        case 0:
4277                /* No mode set, assume a default based on the journal
4278                 * capabilities: ORDERED_DATA if the journal can
4279                 * cope, else JOURNAL_DATA
4280                 */
4281                if (jbd2_journal_check_available_features
4282                    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
4283                        set_opt(sb, ORDERED_DATA);
4284                else
4285                        set_opt(sb, JOURNAL_DATA);
4286                break;
4287
4288        case EXT4_MOUNT_ORDERED_DATA:
4289        case EXT4_MOUNT_WRITEBACK_DATA:
4290                if (!jbd2_journal_check_available_features
4291                    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
4292                        ext4_msg(sb, KERN_ERR, "Journal does not support "
4293                               "requested data journaling mode");
4294                        goto failed_mount_wq;
4295                }
4296        default:
4297                break;
4298        }
4299        set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
4300
4301        sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
4302
4303no_journal:
4304        /*
4305         * Get the # of file system overhead blocks from the
4306         * superblock if present.
4307         */
4308        if (es->s_overhead_clusters)
4309                sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
4310        else {
4311                err = ext4_calculate_overhead(sb);
4312                if (err)
4313                        goto failed_mount_wq;
4314        }
4315
4316        /*
4317         * The maximum number of concurrent works can be high and
4318         * concurrency isn't really necessary.  Limit it to 1.
4319         */
4320        EXT4_SB(sb)->rsv_conversion_wq =
4321                alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
4322        if (!EXT4_SB(sb)->rsv_conversion_wq) {
4323                printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
4324                ret = -ENOMEM;
4325                goto failed_mount4;
4326        }
4327
4328        /*
4329         * The jbd2_journal_load will have done any necessary log recovery,
4330         * so we can safely mount the rest of the filesystem now.
4331         */
4332
4333        root = ext4_iget(sb, EXT4_ROOT_INO);
4334        if (IS_ERR(root)) {
4335                ext4_msg(sb, KERN_ERR, "get root inode failed");
4336                ret = PTR_ERR(root);
4337                root = NULL;
4338                goto failed_mount4;
4339        }
4340        if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
4341                ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
4342                iput(root);
4343                goto failed_mount4;
4344        }
4345        sb->s_root = d_make_root(root);
4346        if (!sb->s_root) {
4347                ext4_msg(sb, KERN_ERR, "get root dentry failed");
4348                ret = -ENOMEM;
4349                goto failed_mount4;
4350        }
4351
4352        if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY))
4353                sb->s_flags |= MS_RDONLY;
4354
4355        err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb));
4356        if (err) {
4357                ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
4358                         "reserved pool", ext4_calculate_resv_clusters(sb));
4359                goto failed_mount4a;
4360        }
4361
4362        err = ext4_setup_system_zone(sb);
4363        if (err) {
4364                ext4_msg(sb, KERN_ERR, "failed to initialize system "
4365                         "zone (%d)", err);
4366                goto failed_mount4a;
4367        }
4368
4369        ext4_ext_init(sb);
4370        err = ext4_mb_init(sb);
4371        if (err) {
4372                ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
4373                         err);
4374                goto failed_mount5;
4375        }
4376
4377        block = ext4_count_free_clusters(sb);
4378        ext4_free_blocks_count_set(sbi->s_es, 
4379                                   EXT4_C2B(sbi, block));
4380        ext4_superblock_csum_set(sb);
4381        err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
4382                                  GFP_KERNEL);
4383        if (!err) {
4384                unsigned long freei = ext4_count_free_inodes(sb);
4385                sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
4386                ext4_superblock_csum_set(sb);
4387                err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
4388                                          GFP_KERNEL);
4389        }
4390        if (!err)
4391                err = percpu_counter_init(&sbi->s_dirs_counter,
4392                                          ext4_count_dirs(sb), GFP_KERNEL);
4393        if (!err)
4394                err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
4395                                          GFP_KERNEL);
4396        if (err) {
4397                ext4_msg(sb, KERN_ERR, "insufficient memory");
4398                goto failed_mount6;
4399        }
4400
4401        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
4402                if (!ext4_fill_flex_info(sb)) {
4403                        ext4_msg(sb, KERN_ERR,
4404                               "unable to initialize "
4405                               "flex_bg meta info!");
4406                        goto failed_mount6;
4407                }
4408
4409        err = ext4_register_li_request(sb, first_not_zeroed);
4410        if (err)
4411                goto failed_mount6;
4412
4413        sbi->s_kobj.kset = ext4_kset;
4414        init_completion(&sbi->s_kobj_unregister);
4415        err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
4416                                   "%s", sb->s_id);
4417        if (err)
4418                goto failed_mount7;
4419
4420#ifdef CONFIG_QUOTA
4421        /* Enable quota usage during mount. */
4422        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
4423            !(sb->s_flags & MS_RDONLY)) {
4424                err = ext4_enable_quotas(sb);
4425                if (err)
4426                        goto failed_mount8;
4427        }
4428#endif  /* CONFIG_QUOTA */
4429
4430        EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
4431        ext4_orphan_cleanup(sb, es);
4432        EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
4433        if (needs_recovery) {
4434                ext4_msg(sb, KERN_INFO, "recovery complete");
4435                ext4_mark_recovery_complete(sb, es);
4436        }
4437        if (EXT4_SB(sb)->s_journal) {
4438                if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
4439                        descr = " journalled data mode";
4440                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
4441                        descr = " ordered data mode";
4442                else
4443                        descr = " writeback data mode";
4444        } else
4445                descr = "out journal";
4446
4447        if (test_opt(sb, DISCARD)) {
4448                struct request_queue *q = bdev_get_queue(sb->s_bdev);
4449                if (!blk_queue_discard(q))
4450                        ext4_msg(sb, KERN_WARNING,
4451                                 "mounting with \"discard\" option, but "
4452                                 "the device does not support discard");
4453        }
4454
4455        ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
4456                 "Opts: %.*s%s%s", descr,
4457                 (int) sizeof(sbi->s_es->s_mount_opts),
4458                 sbi->s_es->s_mount_opts,
4459                 *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
4460
4461        if (es->s_error_count)
4462                mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
4463
4464        /* Enable message ratelimiting. Default is 10 messages per 5 secs. */
4465        ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
4466        ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
4467        ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
4468
4469        kfree(orig_data);
4470        return 0;
4471
4472cantfind_ext4:
4473        if (!silent)
4474                ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
4475        goto failed_mount;
4476
4477#ifdef CONFIG_QUOTA
4478failed_mount8:
4479        kobject_del(&sbi->s_kobj);
4480#endif
4481failed_mount7:
4482        ext4_unregister_li_request(sb);
4483failed_mount6:
4484        ext4_mb_release(sb);
4485        if (sbi->s_flex_groups)
4486                ext4_kvfree(sbi->s_flex_groups);
4487        percpu_counter_destroy(&sbi->s_freeclusters_counter);
4488        percpu_counter_destroy(&sbi->s_freeinodes_counter);
4489        percpu_counter_destroy(&sbi->s_dirs_counter);
4490        percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
4491failed_mount5:
4492        ext4_ext_release(sb);
4493        ext4_release_system_zone(sb);
4494failed_mount4a:
4495        dput(sb->s_root);
4496        sb->s_root = NULL;
4497failed_mount4:
4498        ext4_msg(sb, KERN_ERR, "mount failed");
4499        if (EXT4_SB(sb)->rsv_conversion_wq)
4500                destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
4501failed_mount_wq:
4502        if (sbi->s_journal) {
4503                jbd2_journal_destroy(sbi->s_journal);
4504                sbi->s_journal = NULL;
4505        }
4506failed_mount3:
4507        ext4_es_unregister_shrinker(sbi);
4508        del_timer_sync(&sbi->s_err_report);
4509        percpu_counter_destroy(&sbi->s_extent_cache_cnt);
4510        if (sbi->s_mmp_tsk)
4511                kthread_stop(sbi->s_mmp_tsk);
4512failed_mount2:
4513        for (i = 0; i < db_count; i++)
4514                brelse(sbi->s_group_desc[i]);
4515        ext4_kvfree(sbi->s_group_desc);
4516failed_mount:
4517        if (sbi->s_chksum_driver)
4518                crypto_free_shash(sbi->s_chksum_driver);
4519        if (sbi->s_proc) {
4520                remove_proc_entry("options", sbi->s_proc);
4521                remove_proc_entry(sb->s_id, ext4_proc_root);
4522        }
4523#ifdef CONFIG_QUOTA
4524        for (i = 0; i < MAXQUOTAS; i++)
4525                kfree(sbi->s_qf_names[i]);
4526#endif
4527        ext4_blkdev_remove(sbi);
4528        brelse(bh);
4529out_fail:
4530        sb->s_fs_info = NULL;
4531        kfree(sbi->s_blockgroup_lock);
4532out_free_base:
4533        kfree(sbi);
4534        kfree(orig_data);
4535        fs_put_dax(dax_dev);
4536        return err ? err : ret;
4537}
4538
4539/*
4540 * Setup any per-fs journal parameters now.  We'll do this both on
4541 * initial mount, once the journal has been initialised but before we've
4542 * done any recovery; and again on any subsequent remount.
4543 */
4544static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
4545{
4546        struct ext4_sb_info *sbi = EXT4_SB(sb);
4547
4548        journal->j_commit_interval = sbi->s_commit_interval;
4549        journal->j_min_batch_time = sbi->s_min_batch_time;
4550        journal->j_max_batch_time = sbi->s_max_batch_time;
4551
4552        write_lock(&journal->j_state_lock);
4553        if (test_opt(sb, BARRIER))
4554                journal->j_flags |= JBD2_BARRIER;
4555        else
4556                journal->j_flags &= ~JBD2_BARRIER;
4557        if (test_opt(sb, DATA_ERR_ABORT))
4558                journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
4559        else
4560                journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
4561        write_unlock(&journal->j_state_lock);
4562}
4563
4564static journal_t *ext4_get_journal(struct super_block *sb,
4565                                   unsigned int journal_inum)
4566{
4567        struct inode *journal_inode;
4568        journal_t *journal;
4569
4570        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4571
4572        /* First, test for the existence of a valid inode on disk.  Bad
4573         * things happen if we iget() an unused inode, as the subsequent
4574         * iput() will try to delete it. */
4575
4576        journal_inode = ext4_iget(sb, journal_inum);
4577        if (IS_ERR(journal_inode)) {
4578                ext4_msg(sb, KERN_ERR, "no journal found");
4579                return NULL;
4580        }
4581        if (!journal_inode->i_nlink) {
4582                make_bad_inode(journal_inode);
4583                iput(journal_inode);
4584                ext4_msg(sb, KERN_ERR, "journal inode is deleted");
4585                return NULL;
4586        }
4587
4588        jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
4589                  journal_inode, journal_inode->i_size);
4590        if (!S_ISREG(journal_inode->i_mode)) {
4591                ext4_msg(sb, KERN_ERR, "invalid journal inode");
4592                iput(journal_inode);
4593                return NULL;
4594        }
4595
4596        journal = jbd2_journal_init_inode(journal_inode);
4597        if (!journal) {
4598                ext4_msg(sb, KERN_ERR, "Could not load journal inode");
4599                iput(journal_inode);
4600                return NULL;
4601        }
4602        journal->j_private = sb;
4603        ext4_init_journal_params(sb, journal);
4604        return journal;
4605}
4606
4607static journal_t *ext4_get_dev_journal(struct super_block *sb,
4608                                       dev_t j_dev)
4609{
4610        struct buffer_head *bh;
4611        journal_t *journal;
4612        ext4_fsblk_t start;
4613        ext4_fsblk_t len;
4614        int hblock, blocksize;
4615        ext4_fsblk_t sb_block;
4616        unsigned long offset;
4617        struct ext4_super_block *es;
4618        struct block_device *bdev;
4619
4620        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4621
4622        bdev = ext4_blkdev_get(j_dev, sb);
4623        if (bdev == NULL)
4624                return NULL;
4625
4626        blocksize = sb->s_blocksize;
4627        hblock = bdev_logical_block_size(bdev);
4628        if (blocksize < hblock) {
4629                ext4_msg(sb, KERN_ERR,
4630                        "blocksize too small for journal device");
4631                goto out_bdev;
4632        }
4633
4634        sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
4635        offset = EXT4_MIN_BLOCK_SIZE % blocksize;
4636        set_blocksize(bdev, blocksize);
4637        if (!(bh = __bread(bdev, sb_block, blocksize))) {
4638                ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
4639                       "external journal");
4640                goto out_bdev;
4641        }
4642
4643        es = (struct ext4_super_block *) (bh->b_data + offset);
4644        if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
4645            !(le32_to_cpu(es->s_feature_incompat) &
4646              EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
4647                ext4_msg(sb, KERN_ERR, "external journal has "
4648                                        "bad superblock");
4649                brelse(bh);
4650                goto out_bdev;
4651        }
4652
4653        if ((le32_to_cpu(es->s_feature_ro_compat) &
4654             EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
4655            es->s_checksum != ext4_superblock_csum(sb, es)) {
4656                ext4_msg(sb, KERN_ERR, "external journal has "
4657                                       "corrupt superblock");
4658                brelse(bh);
4659                goto out_bdev;
4660        }
4661
4662        if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
4663                ext4_msg(sb, KERN_ERR, "journal UUID does not match");
4664                brelse(bh);
4665                goto out_bdev;
4666        }
4667
4668        len = ext4_blocks_count(es);
4669        start = sb_block + 1;
4670        brelse(bh);     /* we're done with the superblock */
4671
4672        journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
4673                                        start, len, blocksize);
4674        if (!journal) {
4675                ext4_msg(sb, KERN_ERR, "failed to create device journal");
4676                goto out_bdev;
4677        }
4678        journal->j_private = sb;
4679        ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer);
4680        wait_on_buffer(journal->j_sb_buffer);
4681        if (!buffer_uptodate(journal->j_sb_buffer)) {
4682                ext4_msg(sb, KERN_ERR, "I/O error on journal device");
4683                goto out_journal;
4684        }
4685        if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
4686                ext4_msg(sb, KERN_ERR, "External journal has more than one "
4687                                        "user (unsupported) - %d",
4688                        be32_to_cpu(journal->j_superblock->s_nr_users));
4689                goto out_journal;
4690        }
4691        EXT4_SB(sb)->journal_bdev = bdev;
4692        ext4_init_journal_params(sb, journal);
4693        return journal;
4694
4695out_journal:
4696        jbd2_journal_destroy(journal);
4697out_bdev:
4698        ext4_blkdev_put(bdev);
4699        return NULL;
4700}
4701
4702static int ext4_load_journal(struct super_block *sb,
4703                             struct ext4_super_block *es,
4704                             unsigned long journal_devnum)
4705{
4706        journal_t *journal;
4707        unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
4708        dev_t journal_dev;
4709        int err = 0;
4710        int really_read_only;
4711
4712        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4713
4714        if (journal_devnum &&
4715            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
4716                ext4_msg(sb, KERN_INFO, "external journal device major/minor "
4717                        "numbers have changed");
4718                journal_dev = new_decode_dev(journal_devnum);
4719        } else
4720                journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
4721
4722        really_read_only = bdev_read_only(sb->s_bdev);
4723
4724        /*
4725         * Are we loading a blank journal or performing recovery after a
4726         * crash?  For recovery, we need to check in advance whether we
4727         * can get read-write access to the device.
4728         */
4729        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
4730                if (sb->s_flags & MS_RDONLY) {
4731                        ext4_msg(sb, KERN_INFO, "INFO: recovery "
4732                                        "required on readonly filesystem");
4733                        if (really_read_only) {
4734                                ext4_msg(sb, KERN_ERR, "write access "
4735                                        "unavailable, cannot proceed");
4736                                return -EROFS;
4737                        }
4738                        ext4_msg(sb, KERN_INFO, "write access will "
4739                               "be enabled during recovery");
4740                }
4741        }
4742
4743        if (journal_inum && journal_dev) {
4744                ext4_msg(sb, KERN_ERR, "filesystem has both journal "
4745                       "and inode journals!");
4746                return -EINVAL;
4747        }
4748
4749        if (journal_inum) {
4750                if (!(journal = ext4_get_journal(sb, journal_inum)))
4751                        return -EINVAL;
4752        } else {
4753                if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
4754                        return -EINVAL;
4755        }
4756
4757        if (!(journal->j_flags & JBD2_BARRIER))
4758                ext4_msg(sb, KERN_INFO, "barriers disabled");
4759
4760        if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
4761                err = jbd2_journal_wipe(journal, !really_read_only);
4762        if (!err) {
4763                char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
4764                if (save)
4765                        memcpy(save, ((char *) es) +
4766                               EXT4_S_ERR_START, EXT4_S_ERR_LEN);
4767                err = jbd2_journal_load(journal);
4768                if (save)
4769                        memcpy(((char *) es) + EXT4_S_ERR_START,
4770                               save, EXT4_S_ERR_LEN);
4771                kfree(save);
4772        }
4773
4774        if (err) {
4775                ext4_msg(sb, KERN_ERR, "error loading journal");
4776                jbd2_journal_destroy(journal);
4777                return err;
4778        }
4779
4780        EXT4_SB(sb)->s_journal = journal;
4781        ext4_clear_journal_err(sb, es);
4782
4783        if (!really_read_only && journal_devnum &&
4784            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
4785                es->s_journal_dev = cpu_to_le32(journal_devnum);
4786
4787                /* Make sure we flush the recovery flag to disk. */
4788                ext4_commit_super(sb, 1);
4789        }
4790
4791        return 0;
4792}
4793
4794static int ext4_commit_super(struct super_block *sb, int sync)
4795{
4796        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
4797        struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
4798        int error = 0;
4799
4800        if (!sbh || block_device_ejected(sb))
4801                return error;
4802
4803        /*
4804         * The superblock bh should be mapped, but it might not be if the
4805         * device was hot-removed. Not much we can do but fail the I/O.
4806         */
4807        if (!buffer_mapped(sbh))
4808                return error;
4809
4810        /*
4811         * If the file system is mounted read-only, don't update the
4812         * superblock write time.  This avoids updating the superblock
4813         * write time when we are mounting the root file system
4814         * read/only but we need to replay the journal; at that point,
4815         * for people who are east of GMT and who make their clock
4816         * tick in localtime for Windows bug-for-bug compatibility,
4817         * the clock is set in the future, and this will cause e2fsck
4818         * to complain and force a full file system check.
4819         */
4820        if (!(sb->s_flags & MS_RDONLY))
4821                es->s_wtime = cpu_to_le32(get_seconds());
4822        if (sb->s_bdev->bd_part)
4823                es->s_kbytes_written =
4824                        cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
4825                            ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
4826                              EXT4_SB(sb)->s_sectors_written_start) >> 1));
4827        else
4828                es->s_kbytes_written =
4829                        cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
4830        if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter))
4831                ext4_free_blocks_count_set(es,
4832                        EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
4833                                &EXT4_SB(sb)->s_freeclusters_counter)));
4834        if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
4835                es->s_free_inodes_count =
4836                        cpu_to_le32(percpu_counter_sum_positive(
4837                                &EXT4_SB(sb)->s_freeinodes_counter));
4838        BUFFER_TRACE(sbh, "marking dirty");
4839        ext4_superblock_csum_set(sb);
4840        if (sync)
4841                lock_buffer(sbh);
4842        if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
4843                /*
4844                 * Oh, dear.  A previous attempt to write the
4845                 * superblock failed.  This could happen because the
4846                 * USB device was yanked out.  Or it could happen to
4847                 * be a transient write error and maybe the block will
4848                 * be remapped.  Nothing we can do but to retry the
4849                 * write and hope for the best.
4850                 */
4851                ext4_msg(sb, KERN_ERR, "previous I/O error to "
4852                       "superblock detected");
4853                clear_buffer_write_io_error(sbh);
4854                set_buffer_uptodate(sbh);
4855        }
4856        mark_buffer_dirty(sbh);
4857        if (sync) {
4858                unlock_buffer(sbh);
4859                error = sync_dirty_buffer(sbh);
4860                if (error)
4861                        return error;
4862
4863                error = buffer_write_io_error(sbh);
4864                if (error) {
4865                        ext4_msg(sb, KERN_ERR, "I/O error while writing "
4866                               "superblock");
4867                        clear_buffer_write_io_error(sbh);
4868                        set_buffer_uptodate(sbh);
4869                }
4870        }
4871        return error;
4872}
4873
4874/*
4875 * Have we just finished recovery?  If so, and if we are mounting (or
4876 * remounting) the filesystem readonly, then we will end up with a
4877 * consistent fs on disk.  Record that fact.
4878 */
4879static void ext4_mark_recovery_complete(struct super_block *sb,
4880                                        struct ext4_super_block *es)
4881{
4882        journal_t *journal = EXT4_SB(sb)->s_journal;
4883
4884        if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
4885                BUG_ON(journal != NULL);
4886                return;
4887        }
4888        jbd2_journal_lock_updates(journal);
4889        if (jbd2_journal_flush(journal) < 0)
4890                goto out;
4891
4892        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
4893            sb->s_flags & MS_RDONLY) {
4894                EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
4895                ext4_commit_super(sb, 1);
4896        }
4897
4898out:
4899        jbd2_journal_unlock_updates(journal);
4900}
4901
4902/*
4903 * If we are mounting (or read-write remounting) a filesystem whose journal
4904 * has recorded an error from a previous lifetime, move that error to the
4905 * main filesystem now.
4906 */
4907static void ext4_clear_journal_err(struct super_block *sb,
4908                                   struct ext4_super_block *es)
4909{
4910        journal_t *journal;
4911        int j_errno;
4912        const char *errstr;
4913
4914        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4915
4916        journal = EXT4_SB(sb)->s_journal;
4917
4918        /*
4919         * Now check for any error status which may have been recorded in the
4920         * journal by a prior ext4_error() or ext4_abort()
4921         */
4922
4923        j_errno = jbd2_journal_errno(journal);
4924        if (j_errno) {
4925                char nbuf[16];
4926
4927                errstr = ext4_decode_error(sb, j_errno, nbuf);
4928                ext4_warning(sb, "Filesystem error recorded "
4929                             "from previous mount: %s", errstr);
4930                ext4_warning(sb, "Marking fs in need of filesystem check.");
4931
4932                EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
4933                es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
4934                ext4_commit_super(sb, 1);
4935
4936                jbd2_journal_clear_err(journal);
4937                jbd2_journal_update_sb_errno(journal);
4938        }
4939}
4940
4941/*
4942 * Force the running and committing transactions to commit,
4943 * and wait on the commit.
4944 */
4945int ext4_force_commit(struct super_block *sb)
4946{
4947        journal_t *journal;
4948
4949        if (sb->s_flags & MS_RDONLY)
4950                return 0;
4951
4952        journal = EXT4_SB(sb)->s_journal;
4953        return ext4_journal_force_commit(journal);
4954}
4955
4956static int ext4_sync_fs(struct super_block *sb, int wait)
4957{
4958        int ret = 0;
4959        tid_t target;
4960        bool needs_barrier = false;
4961        struct ext4_sb_info *sbi = EXT4_SB(sb);
4962
4963        trace_ext4_sync_fs(sb, wait);
4964        flush_workqueue(sbi->rsv_conversion_wq);
4965        /*
4966         * Writeback quota in non-journalled quota case - journalled quota has
4967         * no dirty dquots
4968         */
4969        dquot_writeback_dquots(sb, -1);
4970        /*
4971         * Data writeback is possible w/o journal transaction, so barrier must
4972         * being sent at the end of the function. But we can skip it if
4973         * transaction_commit will do it for us.
4974         */
4975        target = jbd2_get_latest_transaction(sbi->s_journal);
4976        if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
4977            !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
4978                needs_barrier = true;
4979
4980        if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
4981                if (wait)
4982                        ret = jbd2_log_wait_commit(sbi->s_journal, target);
4983        }
4984        if (needs_barrier) {
4985                int err;
4986                err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
4987                if (!ret)
4988                        ret = err;
4989        }
4990
4991        return ret;
4992}
4993
4994static int ext4_sync_fs_nojournal(struct super_block *sb, int wait)
4995{
4996        int ret = 0;
4997
4998        trace_ext4_sync_fs(sb, wait);
4999        flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
5000        dquot_writeback_dquots(sb, -1);

5001        if (wait && test_opt(sb, BARRIER))
5002                ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
5003
5004        return ret;
5005}
5006
5007/*
5008 * LVM calls this function before a (read-only) snapshot is created.  This
5009 * gives us a chance to flush the journal completely and mark the fs clean.
5010 *
5011 * Note that only this function cannot bring a filesystem to be in a clean
5012 * state independently. It relies on upper layer to stop all data & metadata
5013 * modifications.
5014 */
5015static int ext4_freeze(struct super_block *sb)
5016{
5017        int error = 0;
5018        journal_t *journal;
5019
5020        if (sb->s_flags & MS_RDONLY)
5021                return 0;
5022
5023        journal = EXT4_SB(sb)->s_journal;
5024
5025        /* Now we set up the journal barrier. */
5026        jbd2_journal_lock_updates(journal);
5027
5028        /*
5029         * Don't clear the needs_recovery flag if we failed to flush
5030         * the journal.
5031         */
5032        error = jbd2_journal_flush(journal);
5033        if (error < 0)
5034                goto out;
5035
5036        /* Journal blocked and flushed, clear needs_recovery flag. */
5037        EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
5038        error = ext4_commit_super(sb, 1);
5039out:
5040        /* we rely on upper layer to stop further updates */
5041        jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
5042        return error;
5043}
5044
5045/*
5046 * Called by LVM after the snapshot is done.  We need to reset the RECOVER
5047 * flag here, even though the filesystem is not technically dirty yet.
5048 */
5049static int ext4_unfreeze(struct super_block *sb)
5050{
5051        if (sb->s_flags & MS_RDONLY)
5052                return 0;
5053
5054        /* Reset the needs_recovery flag before the fs is unlocked. */
5055        EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
5056        ext4_commit_super(sb, 1);
5057        return 0;
5058}
5059
5060/*
5061 * Structure to save mount options for ext4_remount's benefit
5062 */
5063struct ext4_mount_options {
5064        unsigned long s_mount_opt;
5065        unsigned long s_mount_opt2;
5066        kuid_t s_resuid;
5067        kgid_t s_resgid;
5068        unsigned long s_commit_interval;
5069        u32 s_min_batch_time, s_max_batch_time;
5070#ifdef CONFIG_QUOTA
5071        int s_jquota_fmt;
5072        char *s_qf_names[MAXQUOTAS];
5073#endif
5074};
5075
5076static int ext4_remount(struct super_block *sb, int *flags, char *data)
5077{
5078        struct ext4_super_block *es;
5079        struct ext4_sb_info *sbi = EXT4_SB(sb);
5080        unsigned long old_sb_flags;
5081        struct ext4_mount_options old_opts;
5082        int enable_quota = 0;
5083        ext4_group_t g;
5084        unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
5085        int err = 0;
5086#ifdef CONFIG_QUOTA
5087        int i, j;
5088#endif
5089        char *orig_data = kstrdup(data, GFP_KERNEL);
5090
5091        /* Store the original options */
5092        old_sb_flags = sb->s_flags;
5093        old_opts.s_mount_opt = sbi->s_mount_opt;
5094        old_opts.s_mount_opt2 = sbi->s_mount_opt2;
5095        old_opts.s_resuid = sbi->s_resuid;
5096        old_opts.s_resgid = sbi->s_resgid;
5097        old_opts.s_commit_interval = sbi->s_commit_interval;
5098        old_opts.s_min_batch_time = sbi->s_min_batch_time;
5099        old_opts.s_max_batch_time = sbi->s_max_batch_time;
5100#ifdef CONFIG_QUOTA
5101        old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
5102        for (i = 0; i < MAXQUOTAS; i++)
5103                if (sbi->s_qf_names[i]) {
5104                        old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
5105                                                         GFP_KERNEL);
5106                        if (!old_opts.s_qf_names[i]) {
5107                                for (j = 0; j < i; j++)
5108                                        kfree(old_opts.s_qf_names[j]);
5109                                kfree(orig_data);
5110                                return -ENOMEM;
5111                        }
5112                } else
5113                        old_opts.s_qf_names[i] = NULL;
5114#endif
5115        if (sbi->s_journal && sbi->s_journal->j_task->io_context)
5116                journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
5117
5118        if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
5119                err = -EINVAL;
5120                goto restore_opts;
5121        }
5122
5123        if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
5124            test_opt(sb, JOURNAL_CHECKSUM)) {
5125                ext4_msg(sb, KERN_ERR, "changing journal_checksum "
5126                         "during remount not supported; ignoring");
5127                sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
5128        }
5129
5130        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
5131                if (test_opt2(sb, EXPLICIT_DELALLOC)) {
5132                        ext4_msg(sb, KERN_ERR, "can't mount with "
5133                                 "both data=journal and delalloc");
5134                        err = -EINVAL;
5135                        goto restore_opts;
5136                }
5137                if (test_opt(sb, DIOREAD_NOLOCK)) {
5138                        ext4_msg(sb, KERN_ERR, "can't mount with "
5139                                 "both data=journal and dioread_nolock");
5140                        err = -EINVAL;
5141                        goto restore_opts;
5142                }
5143                if (test_opt(sb, DAX)) {
5144                        ext4_msg(sb, KERN_ERR, "can't mount with "
5145                                 "both data=journal and dax");
5146                        err = -EINVAL;
5147                        goto restore_opts;
5148                }
5149        }
5150
5151        if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
5152                ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
5153                        "dax flag with busy inodes while remounting");
5154                sbi->s_mount_opt ^= EXT4_MOUNT_DAX;
5155        }
5156
5157        if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
5158                ext4_abort(sb, "Abort forced by user");
5159
5160        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
5161                (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
5162
5163        es = sbi->s_es;
5164
5165        if (sbi->s_journal) {
5166                ext4_init_journal_params(sb, sbi->s_journal);
5167                set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
5168        }
5169
5170        if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
5171                if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
5172                        err = -EROFS;
5173                        goto restore_opts;
5174                }
5175
5176                if (*flags & MS_RDONLY) {
5177                        err = dquot_suspend(sb, -1);
5178                        if (err < 0)
5179                                goto restore_opts;
5180
5181                        /*
5182                         * First of all, the unconditional stuff we have to do
5183                         * to disable replay of the journal when we next remount
5184                         */
5185                        sb->s_flags |= MS_RDONLY;
5186
5187                        /*
5188                         * OK, test if we are remounting a valid rw partition
5189                         * readonly, and if so set the rdonly flag and then
5190                         * mark the partition as valid again.
5191                         */
5192                        if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
5193                            (sbi->s_mount_state & EXT4_VALID_FS))
5194                                es->s_state = cpu_to_le16(sbi->s_mount_state);
5195
5196                        if (sbi->s_journal)
5197                                ext4_mark_recovery_complete(sb, es);
5198                } else {
5199                        /* Make sure we can mount this feature set readwrite */
5200                        if (!ext4_feature_set_ok(sb, 0)) {
5201                                err = -EROFS;
5202                                goto restore_opts;
5203                        }
5204                        /*
5205                         * Make sure the group descriptor checksums
5206                         * are sane.  If they aren't, refuse to remount r/w.
5207                         */
5208                        for (g = 0; g < sbi->s_groups_count; g++) {
5209                                struct ext4_group_desc *gdp =
5210                                        ext4_get_group_desc(sb, g, NULL);
5211
5212                                if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
5213                                        ext4_msg(sb, KERN_ERR,
5214               "ext4_remount: Checksum for group %u failed (%u!=%u)",
5215                g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
5216                                               le16_to_cpu(gdp->bg_checksum));
5217                                        err = -EINVAL;
5218                                        goto restore_opts;
5219                                }
5220                        }
5221
5222                        /*
5223                         * If we have an unprocessed orphan list hanging
5224                         * around from a previously readonly bdev mount,
5225                         * require a full umount/remount for now.
5226                         */
5227                        if (es->s_last_orphan) {
5228                                ext4_msg(sb, KERN_WARNING, "Couldn't "
5229                                       "remount RDWR because of unprocessed "
5230                                       "orphan inode list.  Please "
5231                                       "umount/remount instead");
5232                                err = -EINVAL;
5233                                goto restore_opts;
5234                        }
5235
5236                        /*
5237                         * Mounting a RDONLY partition read-write, so reread
5238                         * and store the current valid flag.  (It may have
5239                         * been changed by e2fsck since we originally mounted
5240                         * the partition.)
5241                         */
5242                        if (sbi->s_journal)
5243                                ext4_clear_journal_err(sb, es);
5244                        sbi->s_mount_state = le16_to_cpu(es->s_state);
5245                        if (!ext4_setup_super(sb, es, 0))
5246                                sb->s_flags &= ~MS_RDONLY;
5247                        if (EXT4_HAS_INCOMPAT_FEATURE(sb,
5248                                                     EXT4_FEATURE_INCOMPAT_MMP))
5249                                if (ext4_multi_mount_protect(sb,
5250                                                le64_to_cpu(es->s_mmp_block))) {
5251                                        err = -EROFS;
5252                                        goto restore_opts;
5253                                }
5254                        enable_quota = 1;
5255                }
5256        }
5257
5258        /*
5259         * Reinitialize lazy itable initialization thread based on
5260         * current settings
5261         */
5262        if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE))
5263                ext4_unregister_li_request(sb);
5264        else {
5265                ext4_group_t first_not_zeroed;
5266                first_not_zeroed = ext4_has_uninit_itable(sb);
5267                ext4_register_li_request(sb, first_not_zeroed);
5268        }
5269
5270        ext4_setup_system_zone(sb);
5271        if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
5272                ext4_commit_super(sb, 1);
5273
5274#ifdef CONFIG_QUOTA
5275        /* Release old quota file names */
5276        for (i = 0; i < MAXQUOTAS; i++)
5277                kfree(old_opts.s_qf_names[i]);
5278        if (enable_quota) {
5279                if (sb_any_quota_suspended(sb))
5280                        dquot_resume(sb, -1);
5281                else if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
5282                                        EXT4_FEATURE_RO_COMPAT_QUOTA)) {
5283                        err = ext4_enable_quotas(sb);
5284                        if (err)
5285                                goto restore_opts;
5286                }
5287        }
5288#endif
5289
5290        ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
5291        kfree(orig_data);
5292        return 0;
5293
5294restore_opts:
5295        sb->s_flags = old_sb_flags;
5296        sbi->s_mount_opt = old_opts.s_mount_opt;
5297        sbi->s_mount_opt2 = old_opts.s_mount_opt2;
5298        sbi->s_resuid = old_opts.s_resuid;
5299        sbi->s_resgid = old_opts.s_resgid;
5300        sbi->s_commit_interval = old_opts.s_commit_interval;
5301        sbi->s_min_batch_time = old_opts.s_min_batch_time;
5302        sbi->s_max_batch_time = old_opts.s_max_batch_time;
5303#ifdef CONFIG_QUOTA
5304        sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
5305        for (i = 0; i < MAXQUOTAS; i++) {
5306                kfree(sbi->s_qf_names[i]);
5307                sbi->s_qf_names[i] = old_opts.s_qf_names[i];
5308        }
5309#endif
5310        kfree(orig_data);
5311        return err;
5312}
5313
5314static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
5315{
5316        struct super_block *sb = dentry->d_sb;
5317        struct ext4_sb_info *sbi = EXT4_SB(sb);
5318        struct ext4_super_block *es = sbi->s_es;
5319        ext4_fsblk_t overhead = 0, resv_blocks;
5320        u64 fsid;
5321        s64 bfree;
5322        resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
5323
5324        if (!test_opt(sb, MINIX_DF))
5325                overhead = sbi->s_overhead;
5326
5327        buf->f_type = EXT4_SUPER_MAGIC;
5328        buf->f_bsize = sb->s_blocksize;
5329        buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
5330        bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
5331                percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
5332        /* prevent underflow in case that few free space is available */
5333        buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
5334        buf->f_bavail = buf->f_bfree -
5335                        (ext4_r_blocks_count(es) + resv_blocks);
5336        if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
5337                buf->f_bavail = 0;
5338        buf->f_files = le32_to_cpu(es->s_inodes_count);
5339        buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
5340        buf->f_namelen = EXT4_NAME_LEN;
5341        fsid = le64_to_cpup((void *)es->s_uuid) ^
5342               le64_to_cpup((void *)es->s_uuid + sizeof(u64));
5343        buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
5344        buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
5345
5346        return 0;
5347}
5348
5349/* Helper function for writing quotas on sync - we need to start transaction
5350 * before quota file is locked for write. Otherwise the are possible deadlocks:
5351 * Process 1                         Process 2
5352 * ext4_create()                     quota_sync()
5353 *   jbd2_journal_start()                  write_dquot()
5354 *   dquot_initialize()                         down(dqio_mutex)
5355 *     down(dqio_mutex)                    jbd2_journal_start()
5356 *
5357 */
5358
5359#ifdef CONFIG_QUOTA
5360
5361static inline struct inode *dquot_to_inode(struct dquot *dquot)
5362{
5363        return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
5364}
5365
5366static int ext4_write_dquot(struct dquot *dquot)
5367{
5368        int ret, err;
5369        handle_t *handle;
5370        struct inode *inode;
5371
5372        inode = dquot_to_inode(dquot);
5373        handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
5374                                    EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
5375        if (IS_ERR(handle))
5376                return PTR_ERR(handle);
5377        ret = dquot_commit(dquot);
5378        err = ext4_journal_stop(handle);
5379        if (!ret)
5380                ret = err;
5381        return ret;
5382}
5383
5384static int ext4_acquire_dquot(struct dquot *dquot)
5385{
5386        int ret, err;
5387        handle_t *handle;
5388
5389        handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
5390                                    EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
5391        if (IS_ERR(handle))
5392                return PTR_ERR(handle);
5393        ret = dquot_acquire(dquot);
5394        err = ext4_journal_stop(handle);
5395        if (!ret)
5396                ret = err;
5397        return ret;
5398}
5399
5400static int ext4_release_dquot(struct dquot *dquot)
5401{
5402        int ret, err;
5403        handle_t *handle;
5404
5405        handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
5406                                    EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
5407        if (IS_ERR(handle)) {
5408                /* Release dquot anyway to avoid endless cycle in dqput() */
5409                dquot_release(dquot);
5410                return PTR_ERR(handle);
5411        }
5412        ret = dquot_release(dquot);
5413        err = ext4_journal_stop(handle);
5414        if (!ret)
5415                ret = err;
5416        return ret;
5417}
5418
5419static int ext4_mark_dquot_dirty(struct dquot *dquot)
5420{
5421        struct super_block *sb = dquot->dq_sb;
5422        struct ext4_sb_info *sbi = EXT4_SB(sb);
5423
5424        /* Are we journaling quotas? */
5425        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) ||
5426            sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
5427                dquot_mark_dquot_dirty(dquot);
5428                return ext4_write_dquot(dquot);
5429        } else {
5430                return dquot_mark_dquot_dirty(dquot);
5431        }
5432}
5433
5434static int ext4_write_info(struct super_block *sb, int type)
5435{
5436        int ret, err;
5437        handle_t *handle;
5438
5439        /* Data block + inode block */
5440        handle = ext4_journal_start(sb->s_root->d_inode, EXT4_HT_QUOTA, 2);
5441        if (IS_ERR(handle))
5442                return PTR_ERR(handle);
5443        ret = dquot_commit_info(sb, type);
5444        err = ext4_journal_stop(handle);
5445        if (!ret)
5446                ret = err;
5447        return ret;
5448}
5449
5450/*
5451 * Turn on quotas during mount time - we need to find
5452 * the quota file and such...
5453 */
5454static int ext4_quota_on_mount(struct super_block *sb, int type)
5455{
5456        return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
5457                                        EXT4_SB(sb)->s_jquota_fmt, type);
5458}
5459
5460static void lockdep_set_quota_inode(struct inode *inode, int subclass)
5461{
5462        struct ext4_inode_info *ei = EXT4_I(inode);
5463
5464        /* The first argument of lockdep_set_subclass has to be
5465         * *exactly* the same as the argument to init_rwsem() --- in
5466         * this case, in init_once() --- or lockdep gets unhappy
5467         * because the name of the lock is set using the
5468         * stringification of the argument to init_rwsem().
5469         */
5470        (void) ei;      /* shut up clang warning if !CONFIG_LOCKDEP */
5471        lockdep_set_subclass(&ei->i_data_sem, subclass);
5472}
5473
5474/*
5475 * Standard function to be called on quota_on
5476 */
5477static int ext4_quota_on(struct super_block *sb, int type, int format_id,
5478                         struct path *path)
5479{
5480        int err;
5481
5482        if (!test_opt(sb, QUOTA))
5483                return -EINVAL;
5484
5485        /* Quotafile not on the same filesystem? */
5486        if (path->dentry->d_sb != sb)
5487                return -EXDEV;
5488        /* Journaling quota? */
5489        if (EXT4_SB(sb)->s_qf_names[type]) {
5490                /* Quotafile not in fs root? */
5491                if (path->dentry->d_parent != sb->s_root)
5492                        ext4_msg(sb, KERN_WARNING,
5493                                "Quota file not on filesystem root. "
5494                                "Journaled quota will not work");
5495        }
5496
5497        /*
5498         * When we journal data on quota file, we have to flush journal to see
5499         * all updates to the file when we bypass pagecache...
5500         */
5501        if (EXT4_SB(sb)->s_journal &&
5502            ext4_should_journal_data(path->dentry->d_inode)) {
5503                /*
5504                 * We don't need to lock updates but journal_flush() could
5505                 * otherwise be livelocked...
5506                 */
5507                jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
5508                err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
5509                jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
5510                if (err)
5511                        return err;
5512        }
5513
5514        lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
5515        err = dquot_quota_on(sb, type, format_id, path);
5516        if (err) {
5517                lockdep_set_quota_inode(path->dentry->d_inode,
5518                                             I_DATA_SEM_NORMAL);
5519        } else {
5520                struct inode *inode = d_inode(path->dentry);
5521                handle_t *handle;
5522
5523                inode_lock(inode);
5524                handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
5525                if (IS_ERR(handle))
5526                        goto unlock_inode;
5527                EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL;
5528                inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
5529                                S_NOATIME | S_IMMUTABLE);
5530                ext4_mark_inode_dirty(handle, inode);
5531                ext4_journal_stop(handle);
5532        unlock_inode:
5533                inode_unlock(inode);
5534        }
5535        return err;
5536}
5537
5538static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
5539                             unsigned int flags)
5540{
5541        int err;
5542        struct inode *qf_inode;
5543        unsigned long qf_inums[MAXQUOTAS] = {
5544                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
5545                le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
5546        };
5547
5548        BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA));
5549
5550        if (!qf_inums[type])
5551                return -EPERM;
5552
5553        qf_inode = ext4_iget(sb, qf_inums[type]);
5554        if (IS_ERR(qf_inode)) {
5555                ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
5556                return PTR_ERR(qf_inode);
5557        }
5558
5559        /* Don't account quota for quota files to avoid recursion */
5560        qf_inode->i_flags |= S_NOQUOTA;
5561        lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
5562        err = dquot_enable(qf_inode, type, format_id, flags);
5563        if (err)
5564                lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
5565        iput(qf_inode);
5566
5567        return err;
5568}
5569
5570/* Enable usage tracking for all quota types. */
5571static int ext4_enable_quotas(struct super_block *sb)
5572{
5573        int type, err = 0;
5574        unsigned long qf_inums[MAXQUOTAS] = {
5575                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
5576                le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
5577        };
5578
5579        sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
5580        for (type = 0; type < MAXQUOTAS; type++) {
5581                if (qf_inums[type]) {
5582                        err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
5583                                                DQUOT_USAGE_ENABLED);
5584                        if (err) {
5585                                for (type--; type >= 0; type--)
5586                                        dquot_quota_off(sb, type);
5587
5588                                ext4_warning(sb,
5589                                        "Failed to enable quota tracking "
5590                                        "(type=%d, err=%d). Please run "
5591                                        "e2fsck to fix.", type, err);
5592                                return err;
5593                        }
5594                }
5595        }
5596        return 0;
5597}
5598
5599/*
5600 * quota_on function that is used when QUOTA feature is set.
5601 */
5602static int ext4_quota_on_sysfile(struct super_block *sb, int type,
5603                                 int format_id)
5604{
5605        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
5606                return -EINVAL;
5607
5608        /*
5609         * USAGE was enabled at mount time. Only need to enable LIMITS now.
5610         */
5611        return ext4_quota_enable(sb, type, format_id, DQUOT_LIMITS_ENABLED);
5612}
5613
5614static int ext4_quota_off(struct super_block *sb, int type)
5615{
5616        struct inode *inode = sb_dqopt(sb)->files[type];
5617        handle_t *handle;
5618        int err;
5619
5620        /* Force all delayed allocation blocks to be allocated.
5621         * Caller already holds s_umount sem */
5622        if (test_opt(sb, DELALLOC))
5623                sync_filesystem(sb);
5624
5625        if (!inode || !igrab(inode))
5626                goto out;
5627
5628        err = dquot_quota_off(sb, type);
5629        if (err || EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
5630                goto out_put;
5631
5632        inode_lock(inode);
5633        /* Update modification times of quota files when userspace can
5634         * start looking at them */
5635        handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
5636        if (IS_ERR(handle))
5637                goto out_unlock;
5638        EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
5639        inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
5640        inode->i_mtime = inode->i_ctime = CURRENT_TIME;
5641        ext4_mark_inode_dirty(handle, inode);
5642        ext4_journal_stop(handle);
5643out_unlock:
5644        inode_unlock(inode);
5645out_put:
5646        lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
5647        iput(inode);
5648        return err;
5649out:
5650        return dquot_quota_off(sb, type);
5651}
5652
5653/*
5654 * quota_off function that is used when QUOTA feature is set.
5655 */
5656static int ext4_quota_off_sysfile(struct super_block *sb, int type)
5657{
5658        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
5659                return -EINVAL;
5660
5661        /* Disable only the limits. */
5662        return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED);
5663}
5664
5665/* Read data from quotafile - avoid pagecache and such because we cannot afford
5666 * acquiring the locks... As quota files are never truncated and quota code
5667 * itself serializes the operations (and no one else should touch the files)
5668 * we don't have to be afraid of races */
5669static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
5670                               size_t len, loff_t off)
5671{
5672        struct inode *inode = sb_dqopt(sb)->files[type];
5673        ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
5674        int err = 0;
5675        int offset = off & (sb->s_blocksize - 1);
5676        int tocopy;
5677        size_t toread;
5678        struct buffer_head *bh;
5679        loff_t i_size = i_size_read(inode);
5680
5681        if (off > i_size)
5682                return 0;
5683        if (off+len > i_size)
5684                len = i_size-off;
5685        toread = len;
5686        while (toread > 0) {
5687                tocopy = sb->s_blocksize - offset < toread ?
5688                                sb->s_blocksize - offset : toread;
5689                bh = ext4_bread(NULL, inode, blk, 0, &err);
5690                if (err)
5691                        return err;
5692                if (!bh)        /* A hole? */
5693                        memset(data, 0, tocopy);
5694                else
5695                        memcpy(data, bh->b_data+offset, tocopy);
5696                brelse(bh);
5697                offset = 0;
5698                toread -= tocopy;
5699                data += tocopy;
5700                blk++;
5701        }
5702        return len;
5703}
5704
5705/* Write to quotafile (we know the transaction is already started and has
5706 * enough credits) */
5707static ssize_t ext4_quota_write(struct super_block *sb, int type,
5708                                const char *data, size_t len, loff_t off)
5709{
5710        struct inode *inode = sb_dqopt(sb)->files[type];
5711        ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
5712        int err = 0;
5713        int offset = off & (sb->s_blocksize - 1);
5714        int retries = 0;
5715        struct buffer_head *bh;
5716        handle_t *handle = journal_current_handle();
5717
5718        if (EXT4_SB(sb)->s_journal && !handle) {
5719                ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
5720                        " cancelled because transaction is not started",
5721                        (unsigned long long)off, (unsigned long long)len);
5722                return -EIO;
5723        }
5724        /*
5725         * Since we account only one data block in transaction credits,
5726         * then it is impossible to cross a block boundary.
5727         */
5728        if (sb->s_blocksize - offset < len) {
5729                ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
5730                        " cancelled because not block aligned",
5731                        (unsigned long long)off, (unsigned long long)len);
5732                return -EIO;
5733        }
5734
5735        do {
5736                bh = ext4_bread(handle, inode, blk,
5737                                EXT4_GET_BLOCKS_CREATE |
5738                                EXT4_GET_BLOCKS_METADATA_NOFAIL,
5739                                &err);
5740        } while (!bh && (err == -ENOSPC) &&
5741                 ext4_should_retry_alloc(inode->i_sb, &retries));
5742        if (!bh)
5743                goto out;
5744        BUFFER_TRACE(bh, "get write access");
5745        err = ext4_journal_get_write_access(handle, bh);
5746        if (err) {
5747                brelse(bh);
5748                goto out;
5749        }
5750        lock_buffer(bh);
5751        memcpy(bh->b_data+offset, data, len);
5752        flush_dcache_page(bh->b_page);
5753        unlock_buffer(bh);
5754        err = ext4_handle_dirty_metadata(handle, NULL, bh);
5755        brelse(bh);
5756out:
5757        if (err)
5758                return err;
5759        if (inode->i_size < off + len) {
5760                i_size_write(inode, off + len);
5761                EXT4_I(inode)->i_disksize = inode->i_size;
5762                ext4_mark_inode_dirty(handle, inode);
5763        }
5764        return len;
5765}
5766
5767#endif
5768
5769static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
5770                       const char *dev_name, void *data)
5771{
5772        return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
5773}
5774
5775#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
5776static inline void register_as_ext2(void)
5777{
5778        int err = register_filesystem(&ext2_fs_type);
5779        if (err)
5780                printk(KERN_WARNING
5781                       "EXT4-fs: Unable to register as ext2 (%d)\n", err);
5782}
5783
5784static inline void unregister_as_ext2(void)
5785{
5786        unregister_filesystem(&ext2_fs_type);
5787}
5788
5789static inline int ext2_feature_set_ok(struct super_block *sb)
5790{
5791        if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP))
5792                return 0;
5793        if (sb->s_flags & MS_RDONLY)
5794                return 1;
5795        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))
5796                return 0;
5797        return 1;
5798}
5799#else
5800static inline void register_as_ext2(void) { }
5801static inline void unregister_as_ext2(void) { }
5802static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
5803#endif
5804
5805#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
5806static inline void register_as_ext3(void)
5807{
5808        int err = register_filesystem(&ext3_fs_type);
5809        if (err)
5810                printk(KERN_WARNING
5811                       "EXT4-fs: Unable to register as ext3 (%d)\n", err);
5812}
5813
5814static inline void unregister_as_ext3(void)
5815{
5816        unregister_filesystem(&ext3_fs_type);
5817}
5818
5819static inline int ext3_feature_set_ok(struct super_block *sb)
5820{
5821        if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP))
5822                return 0;
5823        if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
5824                return 0;
5825        if (sb->s_flags & MS_RDONLY)
5826                return 1;
5827        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP))
5828                return 0;
5829        return 1;
5830}
5831#else
5832static inline void register_as_ext3(void) { }
5833static inline void unregister_as_ext3(void) { }
5834static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; }
5835#endif
5836
5837static struct file_system_type ext4_fs_type = {
5838        .owner          = THIS_MODULE,
5839        .name           = "ext4",
5840        .mount          = ext4_mount,
5841        .kill_sb        = kill_block_super,
5842        .fs_flags       = FS_REQUIRES_DEV | FS_HAS_INVALIDATE_RANGE |
5843                          FS_HAS_DIO_IODONE2,
5844};
5845MODULE_ALIAS_FS("ext4");
5846
5847static int __init ext4_init_feat_adverts(void)
5848{
5849        struct ext4_features *ef;
5850        int ret = -ENOMEM;
5851
5852        ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL);
5853        if (!ef)
5854                goto out;
5855
5856        ef->f_kobj.kset = ext4_kset;
5857        init_completion(&ef->f_kobj_unregister);
5858        ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL,
5859                                   "features");
5860        if (ret) {
5861                kfree(ef);
5862                goto out;
5863        }
5864
5865        ext4_feat = ef;
5866        ret = 0;
5867out:
5868        return ret;
5869}
5870
5871static void ext4_exit_feat_adverts(void)
5872{
5873        kobject_put(&ext4_feat->f_kobj);
5874        wait_for_completion(&ext4_feat->f_kobj_unregister);
5875        kfree(ext4_feat);
5876}
5877
5878/* Shared across all ext4 file systems */
5879wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
5880struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
5881
5882static int __init ext4_init_fs(void)
5883{
5884        int i, err;
5885
5886        ext4_li_info = NULL;
5887        mutex_init(&ext4_li_mtx);
5888
5889        /* Build-time check for flags consistency */
5890        ext4_check_flag_values();
5891
5892        for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
5893                mutex_init(&ext4__aio_mutex[i]);
5894                init_waitqueue_head(&ext4__ioend_wq[i]);
5895        }
5896
5897        err = ext4_init_es();
5898        if (err)
5899                return err;
5900
5901        err = ext4_init_pageio();
5902        if (err)
5903                goto out7;
5904
5905        err = ext4_init_system_zone();
5906        if (err)
5907                goto out6;
5908        ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
5909        if (!ext4_kset) {
5910                err = -ENOMEM;
5911                goto out5;
5912        }
5913        ext4_proc_root = proc_mkdir("fs/ext4", NULL);
5914
5915        err = ext4_init_feat_adverts();
5916        if (err)
5917                goto out4;
5918
5919        err = ext4_init_mballoc();
5920        if (err)
5921                goto out3;
5922
5923        err = ext4_init_xattr();
5924        if (err)
5925                goto out2;
5926        err = init_inodecache();
5927        if (err)
5928                goto out1;
5929        err = register_fo_extend(&ext4_file_operations);
5930        if (err)
5931                goto out_inodecache;
5932        register_as_ext3();
5933        register_as_ext2();
5934        err = register_filesystem(&ext4_fs_type);
5935        if (err)
5936                goto out;
5937
5938        return 0;
5939out:
5940        unregister_as_ext2();
5941        unregister_as_ext3();
5942        unregister_fo_extend(&ext4_file_operations);
5943out_inodecache:
5944        destroy_inodecache();
5945out1:
5946        ext4_exit_xattr();
5947out2:
5948        ext4_exit_mballoc();
5949out3:
5950        ext4_exit_feat_adverts();
5951out4:
5952        if (ext4_proc_root)
5953                remove_proc_entry("fs/ext4", NULL);
5954        kset_unregister(ext4_kset);
5955out5:
5956        ext4_exit_system_zone();
5957out6:
5958        ext4_exit_pageio();
5959out7:
5960        ext4_exit_es();
5961
5962        return err;
5963}
5964
5965static void __exit ext4_exit_fs(void)
5966{
5967        ext4_destroy_lazyinit_thread();
5968        unregister_as_ext2();
5969        unregister_as_ext3();
5970        unregister_filesystem(&ext4_fs_type);
5971        unregister_fo_extend(&ext4_file_operations);
5972        destroy_inodecache();
5973        ext4_exit_xattr();
5974        ext4_exit_mballoc();
5975        ext4_exit_feat_adverts();
5976        remove_proc_entry("fs/ext4", NULL);
5977        kset_unregister(ext4_kset);
5978        ext4_exit_system_zone();
5979        ext4_exit_pageio();
5980        ext4_exit_es();
5981}
5982
5983MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
5984MODULE_DESCRIPTION("Fourth Extended Filesystem");
5985MODULE_LICENSE("GPL");
5986module_init(ext4_init_fs)
5987module_exit(ext4_exit_fs)
5988