linux/fs/ext4/super.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/ext4/super.c
   3 *
   4 * Copyright (C) 1992, 1993, 1994, 1995
   5 * Remy Card (card@masi.ibp.fr)
   6 * Laboratoire MASI - Institut Blaise Pascal
   7 * Universite Pierre et Marie Curie (Paris VI)
   8 *
   9 *  from
  10 *
  11 *  linux/fs/minix/inode.c
  12 *
  13 *  Copyright (C) 1991, 1992  Linus Torvalds
  14 *
  15 *  Big-endian to little-endian byte-swapping/bitmaps by
  16 *        David S. Miller (davem@caip.rutgers.edu), 1995
  17 */
  18
  19#include <linux/module.h>
  20#include <linux/string.h>
  21#include <linux/fs.h>
  22#include <linux/time.h>
  23#include <linux/vmalloc.h>
  24#include <linux/slab.h>
  25#include <linux/init.h>
  26#include <linux/blkdev.h>
  27#include <linux/backing-dev.h>
  28#include <linux/parser.h>
  29#include <linux/buffer_head.h>
  30#include <linux/exportfs.h>
  31#include <linux/vfs.h>
  32#include <linux/random.h>
  33#include <linux/mount.h>
  34#include <linux/namei.h>
  35#include <linux/quotaops.h>
  36#include <linux/seq_file.h>
  37#include <linux/proc_fs.h>
  38#include <linux/ctype.h>
  39#include <linux/log2.h>
  40#include <linux/crc16.h>
  41#include <linux/cleancache.h>
  42#include <asm/uaccess.h>
  43
  44#include <linux/kthread.h>
  45#include <linux/freezer.h>
  46
  47#include "ext4.h"
  48#include "ext4_extents.h"       /* Needed for trace points definition */
  49#include "ext4_jbd2.h"
  50#include "xattr.h"
  51#include "acl.h"
  52#include "mballoc.h"
  53
  54#define CREATE_TRACE_POINTS
  55#include <trace/events/ext4.h>
  56
  57static struct proc_dir_entry *ext4_proc_root;
  58static struct kset *ext4_kset;
  59static struct ext4_lazy_init *ext4_li_info;
  60static struct mutex ext4_li_mtx;
  61static struct ext4_features *ext4_feat;
  62static int ext4_mballoc_ready;
  63static struct ratelimit_state ext4_mount_msg_ratelimit;
  64
  65static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
  66                             unsigned long journal_devnum);
  67static int ext4_show_options(struct seq_file *seq, struct dentry *root);
  68static int ext4_commit_super(struct super_block *sb, int sync);
  69static void ext4_mark_recovery_complete(struct super_block *sb,
  70                                        struct ext4_super_block *es);
  71static void ext4_clear_journal_err(struct super_block *sb,
  72                                   struct ext4_super_block *es);
  73static int ext4_sync_fs(struct super_block *sb, int wait);
  74static int ext4_remount(struct super_block *sb, int *flags, char *data);
  75static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
  76static int ext4_unfreeze(struct super_block *sb);
  77static int ext4_freeze(struct super_block *sb);
  78static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
  79                       const char *dev_name, void *data);
  80static inline int ext2_feature_set_ok(struct super_block *sb);
  81static inline int ext3_feature_set_ok(struct super_block *sb);
  82static int ext4_feature_set_ok(struct super_block *sb, int readonly);
  83static void ext4_destroy_lazyinit_thread(void);
  84static void ext4_unregister_li_request(struct super_block *sb);
  85static void ext4_clear_request_list(void);
  86static int ext4_reserve_clusters(struct ext4_sb_info *, ext4_fsblk_t);
  87
  88#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
  89static struct file_system_type ext2_fs_type = {
  90        .owner          = THIS_MODULE,
  91        .name           = "ext2",
  92        .mount          = ext4_mount,
  93        .kill_sb        = kill_block_super,
  94        .fs_flags       = FS_REQUIRES_DEV,
  95};
  96MODULE_ALIAS_FS("ext2");
  97MODULE_ALIAS("ext2");
  98#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
  99#else
 100#define IS_EXT2_SB(sb) (0)
 101#endif
 102
 103
 104static struct file_system_type ext3_fs_type = {
 105        .owner          = THIS_MODULE,
 106        .name           = "ext3",
 107        .mount          = ext4_mount,
 108        .kill_sb        = kill_block_super,
 109        .fs_flags       = FS_REQUIRES_DEV,
 110};
 111MODULE_ALIAS_FS("ext3");
 112MODULE_ALIAS("ext3");
 113#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
 114
 115static int ext4_verify_csum_type(struct super_block *sb,
 116                                 struct ext4_super_block *es)
 117{
 118        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
 119                                        EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
 120                return 1;
 121
 122        return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
 123}
 124
 125static __le32 ext4_superblock_csum(struct super_block *sb,
 126                                   struct ext4_super_block *es)
 127{
 128        struct ext4_sb_info *sbi = EXT4_SB(sb);
 129        int offset = offsetof(struct ext4_super_block, s_checksum);
 130        __u32 csum;
 131
 132        csum = ext4_chksum(sbi, ~0, (char *)es, offset);
 133
 134        return cpu_to_le32(csum);
 135}
 136
 137static int ext4_superblock_csum_verify(struct super_block *sb,
 138                                       struct ext4_super_block *es)
 139{
 140        if (!ext4_has_metadata_csum(sb))
 141                return 1;
 142
 143        return es->s_checksum == ext4_superblock_csum(sb, es);
 144}
 145
 146void ext4_superblock_csum_set(struct super_block *sb)
 147{
 148        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 149
 150        if (!ext4_has_metadata_csum(sb))
 151                return;
 152
 153        es->s_checksum = ext4_superblock_csum(sb, es);
 154}
 155
 156void *ext4_kvmalloc(size_t size, gfp_t flags)
 157{
 158        void *ret;
 159
 160        ret = kmalloc(size, flags | __GFP_NOWARN);
 161        if (!ret)
 162                ret = __vmalloc(size, flags, PAGE_KERNEL);
 163        return ret;
 164}
 165
 166void *ext4_kvzalloc(size_t size, gfp_t flags)
 167{
 168        void *ret;
 169
 170        ret = kzalloc(size, flags | __GFP_NOWARN);
 171        if (!ret)
 172                ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
 173        return ret;
 174}
 175
 176ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
 177                               struct ext4_group_desc *bg)
 178{
 179        return le32_to_cpu(bg->bg_block_bitmap_lo) |
 180                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 181                 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
 182}
 183
 184ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
 185                               struct ext4_group_desc *bg)
 186{
 187        return le32_to_cpu(bg->bg_inode_bitmap_lo) |
 188                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 189                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
 190}
 191
 192ext4_fsblk_t ext4_inode_table(struct super_block *sb,
 193                              struct ext4_group_desc *bg)
 194{
 195        return le32_to_cpu(bg->bg_inode_table_lo) |
 196                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 197                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
 198}
 199
 200__u32 ext4_free_group_clusters(struct super_block *sb,
 201                               struct ext4_group_desc *bg)
 202{
 203        return le16_to_cpu(bg->bg_free_blocks_count_lo) |
 204                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 205                 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
 206}
 207
 208__u32 ext4_free_inodes_count(struct super_block *sb,
 209                              struct ext4_group_desc *bg)
 210{
 211        return le16_to_cpu(bg->bg_free_inodes_count_lo) |
 212                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 213                 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
 214}
 215
 216__u32 ext4_used_dirs_count(struct super_block *sb,
 217                              struct ext4_group_desc *bg)
 218{
 219        return le16_to_cpu(bg->bg_used_dirs_count_lo) |
 220                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 221                 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
 222}
 223
 224__u32 ext4_itable_unused_count(struct super_block *sb,
 225                              struct ext4_group_desc *bg)
 226{
 227        return le16_to_cpu(bg->bg_itable_unused_lo) |
 228                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 229                 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
 230}
 231
 232void ext4_block_bitmap_set(struct super_block *sb,
 233                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 234{
 235        bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
 236        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 237                bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
 238}
 239
 240void ext4_inode_bitmap_set(struct super_block *sb,
 241                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 242{
 243        bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
 244        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 245                bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
 246}
 247
 248void ext4_inode_table_set(struct super_block *sb,
 249                          struct ext4_group_desc *bg, ext4_fsblk_t blk)
 250{
 251        bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
 252        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 253                bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
 254}
 255
 256void ext4_free_group_clusters_set(struct super_block *sb,
 257                                  struct ext4_group_desc *bg, __u32 count)
 258{
 259        bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
 260        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 261                bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
 262}
 263
 264void ext4_free_inodes_set(struct super_block *sb,
 265                          struct ext4_group_desc *bg, __u32 count)
 266{
 267        bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
 268        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 269                bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
 270}
 271
 272void ext4_used_dirs_set(struct super_block *sb,
 273                          struct ext4_group_desc *bg, __u32 count)
 274{
 275        bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
 276        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 277                bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
 278}
 279
 280void ext4_itable_unused_set(struct super_block *sb,
 281                          struct ext4_group_desc *bg, __u32 count)
 282{
 283        bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
 284        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 285                bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
 286}
 287
 288
 289static void __save_error_info(struct super_block *sb, const char *func,
 290                            unsigned int line)
 291{
 292        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 293
 294        EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 295        if (bdev_read_only(sb->s_bdev))
 296                return;
 297        es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
 298        es->s_last_error_time = cpu_to_le32(get_seconds());
 299        strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
 300        es->s_last_error_line = cpu_to_le32(line);
 301        if (!es->s_first_error_time) {
 302                es->s_first_error_time = es->s_last_error_time;
 303                strncpy(es->s_first_error_func, func,
 304                        sizeof(es->s_first_error_func));
 305                es->s_first_error_line = cpu_to_le32(line);
 306                es->s_first_error_ino = es->s_last_error_ino;
 307                es->s_first_error_block = es->s_last_error_block;
 308        }
 309        /*
 310         * Start the daily error reporting function if it hasn't been
 311         * started already
 312         */
 313        if (!es->s_error_count)
 314                mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
 315        le32_add_cpu(&es->s_error_count, 1);
 316}
 317
 318static void save_error_info(struct super_block *sb, const char *func,
 319                            unsigned int line)
 320{
 321        __save_error_info(sb, func, line);
 322        ext4_commit_super(sb, 1);
 323}
 324
 325/*
 326 * The del_gendisk() function uninitializes the disk-specific data
 327 * structures, including the bdi structure, without telling anyone
 328 * else.  Once this happens, any attempt to call mark_buffer_dirty()
 329 * (for example, by ext4_commit_super), will cause a kernel OOPS.
 330 * This is a kludge to prevent these oops until we can put in a proper
 331 * hook in del_gendisk() to inform the VFS and file system layers.
 332 */
 333static int block_device_ejected(struct super_block *sb)
 334{
 335        struct inode *bd_inode = sb->s_bdev->bd_inode;
 336        struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
 337
 338        return bdi->dev == NULL;
 339}
 340
 341static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
 342{
 343        struct super_block              *sb = journal->j_private;
 344        struct ext4_sb_info             *sbi = EXT4_SB(sb);
 345        int                             error = is_journal_aborted(journal);
 346        struct ext4_journal_cb_entry    *jce;
 347
 348        BUG_ON(txn->t_state == T_FINISHED);
 349        spin_lock(&sbi->s_md_lock);
 350        while (!list_empty(&txn->t_private_list)) {
 351                jce = list_entry(txn->t_private_list.next,
 352                                 struct ext4_journal_cb_entry, jce_list);
 353                list_del_init(&jce->jce_list);
 354                spin_unlock(&sbi->s_md_lock);
 355                jce->jce_func(sb, jce, error);
 356                spin_lock(&sbi->s_md_lock);
 357        }
 358        spin_unlock(&sbi->s_md_lock);
 359}
 360
 361/* Deal with the reporting of failure conditions on a filesystem such as
 362 * inconsistencies detected or read IO failures.
 363 *
 364 * On ext2, we can store the error state of the filesystem in the
 365 * superblock.  That is not possible on ext4, because we may have other
 366 * write ordering constraints on the superblock which prevent us from
 367 * writing it out straight away; and given that the journal is about to
 368 * be aborted, we can't rely on the current, or future, transactions to
 369 * write out the superblock safely.
 370 *
 371 * We'll just use the jbd2_journal_abort() error code to record an error in
 372 * the journal instead.  On recovery, the journal will complain about
 373 * that error until we've noted it down and cleared it.
 374 */
 375
 376static void ext4_handle_error(struct super_block *sb)
 377{
 378        if (sb->s_flags & MS_RDONLY)
 379                return;
 380
 381        if (!test_opt(sb, ERRORS_CONT)) {
 382                journal_t *journal = EXT4_SB(sb)->s_journal;
 383
 384                EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
 385                if (journal)
 386                        jbd2_journal_abort(journal, -EIO);
 387        }
 388        if (test_opt(sb, ERRORS_RO)) {
 389                ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 390                /*
 391                 * Make sure updated value of ->s_mount_flags will be visible
 392                 * before ->s_flags update
 393                 */
 394                smp_wmb();
 395                sb->s_flags |= MS_RDONLY;
 396        }
 397        if (test_opt(sb, ERRORS_PANIC))
 398                panic("EXT4-fs (device %s): panic forced after error\n",
 399                        sb->s_id);
 400}
 401
 402#define ext4_error_ratelimit(sb)                                        \
 403                ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state),     \
 404                             "EXT4-fs error")
 405
 406void __ext4_error(struct super_block *sb, const char *function,
 407                  unsigned int line, const char *fmt, ...)
 408{
 409        struct va_format vaf;
 410        va_list args;
 411
 412        if (ext4_error_ratelimit(sb)) {
 413                va_start(args, fmt);
 414                vaf.fmt = fmt;
 415                vaf.va = &args;
 416                printk(KERN_CRIT
 417                       "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
 418                       sb->s_id, function, line, current->comm, &vaf);
 419                va_end(args);
 420        }
 421        save_error_info(sb, function, line);
 422        ext4_handle_error(sb);
 423}
 424
 425void __ext4_error_inode(struct inode *inode, const char *function,
 426                        unsigned int line, ext4_fsblk_t block,
 427                        const char *fmt, ...)
 428{
 429        va_list args;
 430        struct va_format vaf;
 431        struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
 432
 433        es->s_last_error_ino = cpu_to_le32(inode->i_ino);
 434        es->s_last_error_block = cpu_to_le64(block);
 435        if (ext4_error_ratelimit(inode->i_sb)) {
 436                va_start(args, fmt);
 437                vaf.fmt = fmt;
 438                vaf.va = &args;
 439                if (block)
 440                        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
 441                               "inode #%lu: block %llu: comm %s: %pV\n",
 442                               inode->i_sb->s_id, function, line, inode->i_ino,
 443                               block, current->comm, &vaf);
 444                else
 445                        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
 446                               "inode #%lu: comm %s: %pV\n",
 447                               inode->i_sb->s_id, function, line, inode->i_ino,
 448                               current->comm, &vaf);
 449                va_end(args);
 450        }
 451        save_error_info(inode->i_sb, function, line);
 452        ext4_handle_error(inode->i_sb);
 453}
 454
 455void __ext4_error_file(struct file *file, const char *function,
 456                       unsigned int line, ext4_fsblk_t block,
 457                       const char *fmt, ...)
 458{
 459        va_list args;
 460        struct va_format vaf;
 461        struct ext4_super_block *es;
 462        struct inode *inode = file_inode(file);
 463        char pathname[80], *path;
 464
 465        es = EXT4_SB(inode->i_sb)->s_es;
 466        es->s_last_error_ino = cpu_to_le32(inode->i_ino);
 467        if (ext4_error_ratelimit(inode->i_sb)) {
 468                path = file_path(file, pathname, sizeof(pathname));
 469                if (IS_ERR(path))
 470                        path = "(unknown)";
 471                va_start(args, fmt);
 472                vaf.fmt = fmt;
 473                vaf.va = &args;
 474                if (block)
 475                        printk(KERN_CRIT
 476                               "EXT4-fs error (device %s): %s:%d: inode #%lu: "
 477                               "block %llu: comm %s: path %s: %pV\n",
 478                               inode->i_sb->s_id, function, line, inode->i_ino,
 479                               block, current->comm, path, &vaf);
 480                else
 481                        printk(KERN_CRIT
 482                               "EXT4-fs error (device %s): %s:%d: inode #%lu: "
 483                               "comm %s: path %s: %pV\n",
 484                               inode->i_sb->s_id, function, line, inode->i_ino,
 485                               current->comm, path, &vaf);
 486                va_end(args);
 487        }
 488        save_error_info(inode->i_sb, function, line);
 489        ext4_handle_error(inode->i_sb);
 490}
 491
 492const char *ext4_decode_error(struct super_block *sb, int errno,
 493                              char nbuf[16])
 494{
 495        char *errstr = NULL;
 496
 497        switch (errno) {
 498        case -EIO:
 499                errstr = "IO failure";
 500                break;
 501        case -ENOMEM:
 502                errstr = "Out of memory";
 503                break;
 504        case -EROFS:
 505                if (!sb || (EXT4_SB(sb)->s_journal &&
 506                            EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
 507                        errstr = "Journal has aborted";
 508                else
 509                        errstr = "Readonly filesystem";
 510                break;
 511        default:
 512                /* If the caller passed in an extra buffer for unknown
 513                 * errors, textualise them now.  Else we just return
 514                 * NULL. */
 515                if (nbuf) {
 516                        /* Check for truncated error codes... */
 517                        if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
 518                                errstr = nbuf;
 519                }
 520                break;
 521        }
 522
 523        return errstr;
 524}
 525
 526/* __ext4_std_error decodes expected errors from journaling functions
 527 * automatically and invokes the appropriate error response.  */
 528
 529void __ext4_std_error(struct super_block *sb, const char *function,
 530                      unsigned int line, int errno)
 531{
 532        char nbuf[16];
 533        const char *errstr;
 534
 535        /* Special case: if the error is EROFS, and we're not already
 536         * inside a transaction, then there's really no point in logging
 537         * an error. */
 538        if (errno == -EROFS && journal_current_handle() == NULL &&
 539            (sb->s_flags & MS_RDONLY))
 540                return;
 541
 542        if (ext4_error_ratelimit(sb)) {
 543                errstr = ext4_decode_error(sb, errno, nbuf);
 544                printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
 545                       sb->s_id, function, line, errstr);
 546        }
 547
 548        save_error_info(sb, function, line);
 549        ext4_handle_error(sb);
 550}
 551
 552/*
 553 * ext4_abort is a much stronger failure handler than ext4_error.  The
 554 * abort function may be used to deal with unrecoverable failures such
 555 * as journal IO errors or ENOMEM at a critical moment in log management.
 556 *
 557 * We unconditionally force the filesystem into an ABORT|READONLY state,
 558 * unless the error response on the fs has been set to panic in which
 559 * case we take the easy way out and panic immediately.
 560 */
 561
 562void __ext4_abort(struct super_block *sb, const char *function,
 563                unsigned int line, const char *fmt, ...)
 564{
 565        va_list args;
 566
 567        save_error_info(sb, function, line);
 568        va_start(args, fmt);
 569        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id,
 570               function, line);
 571        vprintk(fmt, args);
 572        printk("\n");
 573        va_end(args);
 574
 575        if ((sb->s_flags & MS_RDONLY) == 0) {
 576                ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 577                EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
 578                /*
 579                 * Make sure updated value of ->s_mount_flags will be visible
 580                 * before ->s_flags update
 581                 */
 582                smp_wmb();
 583                sb->s_flags |= MS_RDONLY;
 584                if (EXT4_SB(sb)->s_journal)
 585                        jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 586                save_error_info(sb, function, line);
 587        }
 588        if (test_opt(sb, ERRORS_PANIC))
 589                panic("EXT4-fs panic from previous error\n");
 590}
 591
 592void __ext4_msg(struct super_block *sb,
 593                const char *prefix, const char *fmt, ...)
 594{
 595        struct va_format vaf;
 596        va_list args;
 597
 598        if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs"))
 599                return;
 600
 601        va_start(args, fmt);
 602        vaf.fmt = fmt;
 603        vaf.va = &args;
 604        printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
 605        va_end(args);
 606}
 607
 608#define ext4_warning_ratelimit(sb)                                      \
 609                ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), \
 610                             "EXT4-fs warning")
 611
 612void __ext4_warning(struct super_block *sb, const char *function,
 613                    unsigned int line, const char *fmt, ...)
 614{
 615        struct va_format vaf;
 616        va_list args;
 617
 618        if (!ext4_warning_ratelimit(sb))
 619                return;
 620
 621        va_start(args, fmt);
 622        vaf.fmt = fmt;
 623        vaf.va = &args;
 624        printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
 625               sb->s_id, function, line, &vaf);
 626        va_end(args);
 627}
 628
 629void __ext4_warning_inode(const struct inode *inode, const char *function,
 630                          unsigned int line, const char *fmt, ...)
 631{
 632        struct va_format vaf;
 633        va_list args;
 634
 635        if (!ext4_warning_ratelimit(inode->i_sb))
 636                return;
 637
 638        va_start(args, fmt);
 639        vaf.fmt = fmt;
 640        vaf.va = &args;
 641        printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
 642               "inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
 643               function, line, inode->i_ino, current->comm, &vaf);
 644        va_end(args);
 645}
 646
 647void __ext4_grp_locked_error(const char *function, unsigned int line,
 648                             struct super_block *sb, ext4_group_t grp,
 649                             unsigned long ino, ext4_fsblk_t block,
 650                             const char *fmt, ...)
 651__releases(bitlock)
 652__acquires(bitlock)
 653{
 654        struct va_format vaf;
 655        va_list args;
 656        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 657
 658        es->s_last_error_ino = cpu_to_le32(ino);
 659        es->s_last_error_block = cpu_to_le64(block);
 660        __save_error_info(sb, function, line);
 661
 662        if (ext4_error_ratelimit(sb)) {
 663                va_start(args, fmt);
 664                vaf.fmt = fmt;
 665                vaf.va = &args;
 666                printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
 667                       sb->s_id, function, line, grp);
 668                if (ino)
 669                        printk(KERN_CONT "inode %lu: ", ino);
 670                if (block)
 671                        printk(KERN_CONT "block %llu:",
 672                               (unsigned long long) block);
 673                printk(KERN_CONT "%pV\n", &vaf);
 674                va_end(args);
 675        }
 676
 677        if (test_opt(sb, ERRORS_CONT)) {
 678                ext4_commit_super(sb, 0);
 679                return;
 680        }
 681
 682        ext4_unlock_group(sb, grp);
 683        ext4_handle_error(sb);
 684        /*
 685         * We only get here in the ERRORS_RO case; relocking the group
 686         * may be dangerous, but nothing bad will happen since the
 687         * filesystem will have already been marked read/only and the
 688         * journal has been aborted.  We return 1 as a hint to callers
 689         * who might what to use the return value from
 690         * ext4_grp_locked_error() to distinguish between the
 691         * ERRORS_CONT and ERRORS_RO case, and perhaps return more
 692         * aggressively from the ext4 function in question, with a
 693         * more appropriate error code.
 694         */
 695        ext4_lock_group(sb, grp);
 696        return;
 697}
 698
 699void ext4_update_dynamic_rev(struct super_block *sb)
 700{
 701        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 702
 703        if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
 704                return;
 705
 706        ext4_warning(sb,
 707                     "updating to rev %d because of new feature flag, "
 708                     "running e2fsck is recommended",
 709                     EXT4_DYNAMIC_REV);
 710
 711        es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
 712        es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
 713        es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
 714        /* leave es->s_feature_*compat flags alone */
 715        /* es->s_uuid will be set by e2fsck if empty */
 716
 717        /*
 718         * The rest of the superblock fields should be zero, and if not it
 719         * means they are likely already in use, so leave them alone.  We
 720         * can leave it up to e2fsck to clean up any inconsistencies there.
 721         */
 722}
 723
 724/*
 725 * Open the external journal device
 726 */
 727static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
 728{
 729        struct block_device *bdev;
 730        char b[BDEVNAME_SIZE];
 731
 732        bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
 733        if (IS_ERR(bdev))
 734                goto fail;
 735        return bdev;
 736
 737fail:
 738        ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
 739                        __bdevname(dev, b), PTR_ERR(bdev));
 740        return NULL;
 741}
 742
 743/*
 744 * Release the journal device
 745 */
 746static void ext4_blkdev_put(struct block_device *bdev)
 747{
 748        blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 749}
 750
 751static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
 752{
 753        struct block_device *bdev;
 754        bdev = sbi->journal_bdev;
 755        if (bdev) {
 756                ext4_blkdev_put(bdev);
 757                sbi->journal_bdev = NULL;
 758        }
 759}
 760
 761static inline struct inode *orphan_list_entry(struct list_head *l)
 762{
 763        return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
 764}
 765
 766static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
 767{
 768        struct list_head *l;
 769
 770        ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
 771                 le32_to_cpu(sbi->s_es->s_last_orphan));
 772
 773        printk(KERN_ERR "sb_info orphan list:\n");
 774        list_for_each(l, &sbi->s_orphan) {
 775                struct inode *inode = orphan_list_entry(l);
 776                printk(KERN_ERR "  "
 777                       "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
 778                       inode->i_sb->s_id, inode->i_ino, inode,
 779                       inode->i_mode, inode->i_nlink,
 780                       NEXT_ORPHAN(inode));
 781        }
 782}
 783
 784static void ext4_put_super(struct super_block *sb)
 785{
 786        struct ext4_sb_info *sbi = EXT4_SB(sb);
 787        struct ext4_super_block *es = sbi->s_es;
 788        int i, err;
 789
 790        ext4_unregister_li_request(sb);
 791        dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
 792
 793        flush_workqueue(sbi->rsv_conversion_wq);
 794        destroy_workqueue(sbi->rsv_conversion_wq);
 795
 796        if (sbi->s_journal) {
 797                err = jbd2_journal_destroy(sbi->s_journal);
 798                sbi->s_journal = NULL;
 799                if (err < 0)
 800                        ext4_abort(sb, "Couldn't clean up the journal");
 801        }
 802
 803        ext4_es_unregister_shrinker(sbi);
 804        del_timer_sync(&sbi->s_err_report);
 805        ext4_release_system_zone(sb);
 806        ext4_mb_release(sb);
 807        ext4_ext_release(sb);
 808        ext4_xattr_put_super(sb);
 809
 810        if (!(sb->s_flags & MS_RDONLY)) {
 811                EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 812                es->s_state = cpu_to_le16(sbi->s_mount_state);
 813        }
 814        if (!(sb->s_flags & MS_RDONLY))
 815                ext4_commit_super(sb, 1);
 816
 817        if (sbi->s_proc) {
 818                remove_proc_entry("options", sbi->s_proc);
 819                remove_proc_entry(sb->s_id, ext4_proc_root);
 820        }
 821        kobject_del(&sbi->s_kobj);
 822
 823        for (i = 0; i < sbi->s_gdb_count; i++)
 824                brelse(sbi->s_group_desc[i]);
 825        kvfree(sbi->s_group_desc);
 826        kvfree(sbi->s_flex_groups);
 827        percpu_counter_destroy(&sbi->s_freeclusters_counter);
 828        percpu_counter_destroy(&sbi->s_freeinodes_counter);
 829        percpu_counter_destroy(&sbi->s_dirs_counter);
 830        percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
 831        brelse(sbi->s_sbh);
 832#ifdef CONFIG_QUOTA
 833        for (i = 0; i < EXT4_MAXQUOTAS; i++)
 834                kfree(sbi->s_qf_names[i]);
 835#endif
 836
 837        /* Debugging code just in case the in-memory inode orphan list
 838         * isn't empty.  The on-disk one can be non-empty if we've
 839         * detected an error and taken the fs readonly, but the
 840         * in-memory list had better be clean by this point. */
 841        if (!list_empty(&sbi->s_orphan))
 842                dump_orphan_list(sb, sbi);
 843        J_ASSERT(list_empty(&sbi->s_orphan));
 844
 845        sync_blockdev(sb->s_bdev);
 846        invalidate_bdev(sb->s_bdev);
 847        if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
 848                /*
 849                 * Invalidate the journal device's buffers.  We don't want them
 850                 * floating about in memory - the physical journal device may
 851                 * hotswapped, and it breaks the `ro-after' testing code.
 852                 */
 853                sync_blockdev(sbi->journal_bdev);
 854                invalidate_bdev(sbi->journal_bdev);
 855                ext4_blkdev_remove(sbi);
 856        }
 857        if (sbi->s_mb_cache) {
 858                ext4_xattr_destroy_cache(sbi->s_mb_cache);
 859                sbi->s_mb_cache = NULL;
 860        }
 861        if (sbi->s_mmp_tsk)
 862                kthread_stop(sbi->s_mmp_tsk);
 863        sb->s_fs_info = NULL;
 864        /*
 865         * Now that we are completely done shutting down the
 866         * superblock, we need to actually destroy the kobject.
 867         */
 868        kobject_put(&sbi->s_kobj);
 869        wait_for_completion(&sbi->s_kobj_unregister);
 870        if (sbi->s_chksum_driver)
 871                crypto_free_shash(sbi->s_chksum_driver);
 872        kfree(sbi->s_blockgroup_lock);
 873        kfree(sbi);
 874}
 875
 876static struct kmem_cache *ext4_inode_cachep;
 877
 878/*
 879 * Called inside transaction, so use GFP_NOFS
 880 */
 881static struct inode *ext4_alloc_inode(struct super_block *sb)
 882{
 883        struct ext4_inode_info *ei;
 884
 885        ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
 886        if (!ei)
 887                return NULL;
 888
 889        ei->vfs_inode.i_version = 1;
 890        spin_lock_init(&ei->i_raw_lock);
 891        INIT_LIST_HEAD(&ei->i_prealloc_list);
 892        spin_lock_init(&ei->i_prealloc_lock);
 893        ext4_es_init_tree(&ei->i_es_tree);
 894        rwlock_init(&ei->i_es_lock);
 895        INIT_LIST_HEAD(&ei->i_es_list);
 896        ei->i_es_all_nr = 0;
 897        ei->i_es_shk_nr = 0;
 898        ei->i_es_shrink_lblk = 0;
 899        ei->i_reserved_data_blocks = 0;
 900        ei->i_reserved_meta_blocks = 0;
 901        ei->i_allocated_meta_blocks = 0;
 902        ei->i_da_metadata_calc_len = 0;
 903        ei->i_da_metadata_calc_last_lblock = 0;
 904        spin_lock_init(&(ei->i_block_reservation_lock));
 905#ifdef CONFIG_QUOTA
 906        ei->i_reserved_quota = 0;
 907        memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
 908#endif
 909        ei->jinode = NULL;
 910        INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
 911        spin_lock_init(&ei->i_completed_io_lock);
 912        ei->i_sync_tid = 0;
 913        ei->i_datasync_tid = 0;
 914        atomic_set(&ei->i_ioend_count, 0);
 915        atomic_set(&ei->i_unwritten, 0);
 916        INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
 917#ifdef CONFIG_EXT4_FS_ENCRYPTION
 918        ei->i_crypt_info = NULL;
 919#endif
 920        return &ei->vfs_inode;
 921}
 922
 923static int ext4_drop_inode(struct inode *inode)
 924{
 925        int drop = generic_drop_inode(inode);
 926
 927        trace_ext4_drop_inode(inode, drop);
 928        return drop;
 929}
 930
 931static void ext4_i_callback(struct rcu_head *head)
 932{
 933        struct inode *inode = container_of(head, struct inode, i_rcu);
 934        kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
 935}
 936
 937static void ext4_destroy_inode(struct inode *inode)
 938{
 939        if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
 940                ext4_msg(inode->i_sb, KERN_ERR,
 941                         "Inode %lu (%p): orphan list check failed!",
 942                         inode->i_ino, EXT4_I(inode));
 943                print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
 944                                EXT4_I(inode), sizeof(struct ext4_inode_info),
 945                                true);
 946                dump_stack();
 947        }
 948        call_rcu(&inode->i_rcu, ext4_i_callback);
 949}
 950
 951static void init_once(void *foo)
 952{
 953        struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
 954
 955        INIT_LIST_HEAD(&ei->i_orphan);
 956        init_rwsem(&ei->xattr_sem);
 957        init_rwsem(&ei->i_data_sem);
 958        inode_init_once(&ei->vfs_inode);
 959}
 960
 961static int __init init_inodecache(void)
 962{
 963        ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
 964                                             sizeof(struct ext4_inode_info),
 965                                             0, (SLAB_RECLAIM_ACCOUNT|
 966                                                SLAB_MEM_SPREAD),
 967                                             init_once);
 968        if (ext4_inode_cachep == NULL)
 969                return -ENOMEM;
 970        return 0;
 971}
 972
 973static void destroy_inodecache(void)
 974{
 975        /*
 976         * Make sure all delayed rcu free inodes are flushed before we
 977         * destroy cache.
 978         */
 979        rcu_barrier();
 980        kmem_cache_destroy(ext4_inode_cachep);
 981}
 982
 983void ext4_clear_inode(struct inode *inode)
 984{
 985        invalidate_inode_buffers(inode);
 986        clear_inode(inode);
 987        dquot_drop(inode);
 988        ext4_discard_preallocations(inode);
 989        ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
 990        if (EXT4_I(inode)->jinode) {
 991                jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
 992                                               EXT4_I(inode)->jinode);
 993                jbd2_free_inode(EXT4_I(inode)->jinode);
 994                EXT4_I(inode)->jinode = NULL;
 995        }
 996#ifdef CONFIG_EXT4_FS_ENCRYPTION
 997        if (EXT4_I(inode)->i_crypt_info)
 998                ext4_free_encryption_info(inode, EXT4_I(inode)->i_crypt_info);
 999#endif
1000}
1001
1002static struct inode *ext4_nfs_get_inode(struct super_block *sb,
1003                                        u64 ino, u32 generation)
1004{
1005        struct inode *inode;
1006
1007        if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
1008                return ERR_PTR(-ESTALE);
1009        if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
1010                return ERR_PTR(-ESTALE);
1011
1012        /* iget isn't really right if the inode is currently unallocated!!
1013         *
1014         * ext4_read_inode will return a bad_inode if the inode had been
1015         * deleted, so we should be safe.
1016         *
1017         * Currently we don't know the generation for parent directory, so
1018         * a generation of 0 means "accept any"
1019         */
1020        inode = ext4_iget_normal(sb, ino);
1021        if (IS_ERR(inode))
1022                return ERR_CAST(inode);
1023        if (generation && inode->i_generation != generation) {
1024                iput(inode);
1025                return ERR_PTR(-ESTALE);
1026        }
1027
1028        return inode;
1029}
1030
1031static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
1032                                        int fh_len, int fh_type)
1033{
1034        return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1035                                    ext4_nfs_get_inode);
1036}
1037
1038static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
1039                                        int fh_len, int fh_type)
1040{
1041        return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1042                                    ext4_nfs_get_inode);
1043}
1044
1045/*
1046 * Try to release metadata pages (indirect blocks, directories) which are
1047 * mapped via the block device.  Since these pages could have journal heads
1048 * which would prevent try_to_free_buffers() from freeing them, we must use
1049 * jbd2 layer's try_to_free_buffers() function to release them.
1050 */
1051static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
1052                                 gfp_t wait)
1053{
1054        journal_t *journal = EXT4_SB(sb)->s_journal;
1055
1056        WARN_ON(PageChecked(page));
1057        if (!page_has_buffers(page))
1058                return 0;
1059        if (journal)
1060                return jbd2_journal_try_to_free_buffers(journal, page,
1061                                                        wait & ~__GFP_WAIT);
1062        return try_to_free_buffers(page);
1063}
1064
1065#ifdef CONFIG_QUOTA
1066#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
1067#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
1068
1069static int ext4_write_dquot(struct dquot *dquot);
1070static int ext4_acquire_dquot(struct dquot *dquot);
1071static int ext4_release_dquot(struct dquot *dquot);
1072static int ext4_mark_dquot_dirty(struct dquot *dquot);
1073static int ext4_write_info(struct super_block *sb, int type);
1074static int ext4_quota_on(struct super_block *sb, int type, int format_id,
1075                         struct path *path);
1076static int ext4_quota_off(struct super_block *sb, int type);
1077static int ext4_quota_on_mount(struct super_block *sb, int type);
1078static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
1079                               size_t len, loff_t off);
1080static ssize_t ext4_quota_write(struct super_block *sb, int type,
1081                                const char *data, size_t len, loff_t off);
1082static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
1083                             unsigned int flags);
1084static int ext4_enable_quotas(struct super_block *sb);
1085
1086static struct dquot **ext4_get_dquots(struct inode *inode)
1087{
1088        return EXT4_I(inode)->i_dquot;
1089}
1090
1091static const struct dquot_operations ext4_quota_operations = {
1092        .get_reserved_space = ext4_get_reserved_space,
1093        .write_dquot    = ext4_write_dquot,
1094        .acquire_dquot  = ext4_acquire_dquot,
1095        .release_dquot  = ext4_release_dquot,
1096        .mark_dirty     = ext4_mark_dquot_dirty,
1097        .write_info     = ext4_write_info,
1098        .alloc_dquot    = dquot_alloc,
1099        .destroy_dquot  = dquot_destroy,
1100};
1101
1102static const struct quotactl_ops ext4_qctl_operations = {
1103        .quota_on       = ext4_quota_on,
1104        .quota_off      = ext4_quota_off,
1105        .quota_sync     = dquot_quota_sync,
1106        .get_state      = dquot_get_state,
1107        .set_info       = dquot_set_dqinfo,
1108        .get_dqblk      = dquot_get_dqblk,
1109        .set_dqblk      = dquot_set_dqblk
1110};
1111#endif
1112
1113static const struct super_operations ext4_sops = {
1114        .alloc_inode    = ext4_alloc_inode,
1115        .destroy_inode  = ext4_destroy_inode,
1116        .write_inode    = ext4_write_inode,
1117        .dirty_inode    = ext4_dirty_inode,
1118        .drop_inode     = ext4_drop_inode,
1119        .evict_inode    = ext4_evict_inode,
1120        .put_super      = ext4_put_super,
1121        .sync_fs        = ext4_sync_fs,
1122        .freeze_fs      = ext4_freeze,
1123        .unfreeze_fs    = ext4_unfreeze,
1124        .statfs         = ext4_statfs,
1125        .remount_fs     = ext4_remount,
1126        .show_options   = ext4_show_options,
1127#ifdef CONFIG_QUOTA
1128        .quota_read     = ext4_quota_read,
1129        .quota_write    = ext4_quota_write,
1130        .get_dquots     = ext4_get_dquots,
1131#endif
1132        .bdev_try_to_free_page = bdev_try_to_free_page,
1133};
1134
1135static const struct export_operations ext4_export_ops = {
1136        .fh_to_dentry = ext4_fh_to_dentry,
1137        .fh_to_parent = ext4_fh_to_parent,
1138        .get_parent = ext4_get_parent,
1139};
1140
1141enum {
1142        Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1143        Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
1144        Opt_nouid32, Opt_debug, Opt_removed,
1145        Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
1146        Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
1147        Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
1148        Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
1149        Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1150        Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
1151        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1152        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
1153        Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
1154        Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax,
1155        Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
1156        Opt_lazytime, Opt_nolazytime,
1157        Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1158        Opt_inode_readahead_blks, Opt_journal_ioprio,
1159        Opt_dioread_nolock, Opt_dioread_lock,
1160        Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1161        Opt_max_dir_size_kb, Opt_nojournal_checksum,
1162};
1163
1164static const match_table_t tokens = {
1165        {Opt_bsd_df, "bsddf"},
1166        {Opt_minix_df, "minixdf"},
1167        {Opt_grpid, "grpid"},
1168        {Opt_grpid, "bsdgroups"},
1169        {Opt_nogrpid, "nogrpid"},
1170        {Opt_nogrpid, "sysvgroups"},
1171        {Opt_resgid, "resgid=%u"},
1172        {Opt_resuid, "resuid=%u"},
1173        {Opt_sb, "sb=%u"},
1174        {Opt_err_cont, "errors=continue"},
1175        {Opt_err_panic, "errors=panic"},
1176        {Opt_err_ro, "errors=remount-ro"},
1177        {Opt_nouid32, "nouid32"},
1178        {Opt_debug, "debug"},
1179        {Opt_removed, "oldalloc"},
1180        {Opt_removed, "orlov"},
1181        {Opt_user_xattr, "user_xattr"},
1182        {Opt_nouser_xattr, "nouser_xattr"},
1183        {Opt_acl, "acl"},
1184        {Opt_noacl, "noacl"},
1185        {Opt_noload, "norecovery"},
1186        {Opt_noload, "noload"},
1187        {Opt_removed, "nobh"},
1188        {Opt_removed, "bh"},
1189        {Opt_commit, "commit=%u"},
1190        {Opt_min_batch_time, "min_batch_time=%u"},
1191        {Opt_max_batch_time, "max_batch_time=%u"},
1192        {Opt_journal_dev, "journal_dev=%u"},
1193        {Opt_journal_path, "journal_path=%s"},
1194        {Opt_journal_checksum, "journal_checksum"},
1195        {Opt_nojournal_checksum, "nojournal_checksum"},
1196        {Opt_journal_async_commit, "journal_async_commit"},
1197        {Opt_abort, "abort"},
1198        {Opt_data_journal, "data=journal"},
1199        {Opt_data_ordered, "data=ordered"},
1200        {Opt_data_writeback, "data=writeback"},
1201        {Opt_data_err_abort, "data_err=abort"},
1202        {Opt_data_err_ignore, "data_err=ignore"},
1203        {Opt_offusrjquota, "usrjquota="},
1204        {Opt_usrjquota, "usrjquota=%s"},
1205        {Opt_offgrpjquota, "grpjquota="},
1206        {Opt_grpjquota, "grpjquota=%s"},
1207        {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1208        {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1209        {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
1210        {Opt_grpquota, "grpquota"},
1211        {Opt_noquota, "noquota"},
1212        {Opt_quota, "quota"},
1213        {Opt_usrquota, "usrquota"},
1214        {Opt_barrier, "barrier=%u"},
1215        {Opt_barrier, "barrier"},
1216        {Opt_nobarrier, "nobarrier"},
1217        {Opt_i_version, "i_version"},
1218        {Opt_dax, "dax"},
1219        {Opt_stripe, "stripe=%u"},
1220        {Opt_delalloc, "delalloc"},
1221        {Opt_lazytime, "lazytime"},
1222        {Opt_nolazytime, "nolazytime"},
1223        {Opt_nodelalloc, "nodelalloc"},
1224        {Opt_removed, "mblk_io_submit"},
1225        {Opt_removed, "nomblk_io_submit"},
1226        {Opt_block_validity, "block_validity"},
1227        {Opt_noblock_validity, "noblock_validity"},
1228        {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1229        {Opt_journal_ioprio, "journal_ioprio=%u"},
1230        {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1231        {Opt_auto_da_alloc, "auto_da_alloc"},
1232        {Opt_noauto_da_alloc, "noauto_da_alloc"},
1233        {Opt_dioread_nolock, "dioread_nolock"},
1234        {Opt_dioread_lock, "dioread_lock"},
1235        {Opt_discard, "discard"},
1236        {Opt_nodiscard, "nodiscard"},
1237        {Opt_init_itable, "init_itable=%u"},
1238        {Opt_init_itable, "init_itable"},
1239        {Opt_noinit_itable, "noinit_itable"},
1240        {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
1241        {Opt_test_dummy_encryption, "test_dummy_encryption"},
1242        {Opt_removed, "check=none"},    /* mount option from ext2/3 */
1243        {Opt_removed, "nocheck"},       /* mount option from ext2/3 */
1244        {Opt_removed, "reservation"},   /* mount option from ext2/3 */
1245        {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
1246        {Opt_removed, "journal=%u"},    /* mount option from ext2/3 */
1247        {Opt_err, NULL},
1248};
1249
1250static ext4_fsblk_t get_sb_block(void **data)
1251{
1252        ext4_fsblk_t    sb_block;
1253        char            *options = (char *) *data;
1254
1255        if (!options || strncmp(options, "sb=", 3) != 0)
1256                return 1;       /* Default location */
1257
1258        options += 3;
1259        /* TODO: use simple_strtoll with >32bit ext4 */
1260        sb_block = simple_strtoul(options, &options, 0);
1261        if (*options && *options != ',') {
1262                printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
1263                       (char *) *data);
1264                return 1;
1265        }
1266        if (*options == ',')
1267                options++;
1268        *data = (void *) options;
1269
1270        return sb_block;
1271}
1272
1273#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1274static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n"
1275        "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
1276
1277#ifdef CONFIG_QUOTA
1278static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
1279{
1280        struct ext4_sb_info *sbi = EXT4_SB(sb);
1281        char *qname;
1282        int ret = -1;
1283
1284        if (sb_any_quota_loaded(sb) &&
1285                !sbi->s_qf_names[qtype]) {
1286                ext4_msg(sb, KERN_ERR,
1287                        "Cannot change journaled "
1288                        "quota options when quota turned on");
1289                return -1;
1290        }
1291        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
1292                ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options "
1293                         "when QUOTA feature is enabled");
1294                return -1;
1295        }
1296        qname = match_strdup(args);
1297        if (!qname) {
1298                ext4_msg(sb, KERN_ERR,
1299                        "Not enough memory for storing quotafile name");
1300                return -1;
1301        }
1302        if (sbi->s_qf_names[qtype]) {
1303                if (strcmp(sbi->s_qf_names[qtype], qname) == 0)
1304                        ret = 1;
1305                else
1306                        ext4_msg(sb, KERN_ERR,
1307                                 "%s quota file already specified",
1308                                 QTYPE2NAME(qtype));
1309                goto errout;
1310        }
1311        if (strchr(qname, '/')) {
1312                ext4_msg(sb, KERN_ERR,
1313                        "quotafile must be on filesystem root");
1314                goto errout;
1315        }
1316        sbi->s_qf_names[qtype] = qname;
1317        set_opt(sb, QUOTA);
1318        return 1;
1319errout:
1320        kfree(qname);
1321        return ret;
1322}
1323
1324static int clear_qf_name(struct super_block *sb, int qtype)
1325{
1326
1327        struct ext4_sb_info *sbi = EXT4_SB(sb);
1328
1329        if (sb_any_quota_loaded(sb) &&
1330                sbi->s_qf_names[qtype]) {
1331                ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
1332                        " when quota turned on");
1333                return -1;
1334        }
1335        kfree(sbi->s_qf_names[qtype]);
1336        sbi->s_qf_names[qtype] = NULL;
1337        return 1;
1338}
1339#endif
1340
1341#define MOPT_SET        0x0001
1342#define MOPT_CLEAR      0x0002
1343#define MOPT_NOSUPPORT  0x0004
1344#define MOPT_EXPLICIT   0x0008
1345#define MOPT_CLEAR_ERR  0x0010
1346#define MOPT_GTE0       0x0020
1347#ifdef CONFIG_QUOTA
1348#define MOPT_Q          0
1349#define MOPT_QFMT       0x0040
1350#else
1351#define MOPT_Q          MOPT_NOSUPPORT
1352#define MOPT_QFMT       MOPT_NOSUPPORT
1353#endif
1354#define MOPT_DATAJ      0x0080
1355#define MOPT_NO_EXT2    0x0100
1356#define MOPT_NO_EXT3    0x0200
1357#define MOPT_EXT4_ONLY  (MOPT_NO_EXT2 | MOPT_NO_EXT3)
1358#define MOPT_STRING     0x0400
1359
1360static const struct mount_opts {
1361        int     token;
1362        int     mount_opt;
1363        int     flags;
1364} ext4_mount_opts[] = {
1365        {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
1366        {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
1367        {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
1368        {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
1369        {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
1370        {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
1371        {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
1372         MOPT_EXT4_ONLY | MOPT_SET},
1373        {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
1374         MOPT_EXT4_ONLY | MOPT_CLEAR},
1375        {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
1376        {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
1377        {Opt_delalloc, EXT4_MOUNT_DELALLOC,
1378         MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1379        {Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
1380         MOPT_EXT4_ONLY | MOPT_CLEAR},
1381        {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1382         MOPT_EXT4_ONLY | MOPT_CLEAR},
1383        {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1384         MOPT_EXT4_ONLY | MOPT_SET},
1385        {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
1386                                    EXT4_MOUNT_JOURNAL_CHECKSUM),
1387         MOPT_EXT4_ONLY | MOPT_SET},
1388        {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
1389        {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR},
1390        {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
1391        {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
1392        {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
1393         MOPT_NO_EXT2 | MOPT_SET},
1394        {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
1395         MOPT_NO_EXT2 | MOPT_CLEAR},
1396        {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
1397        {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
1398        {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
1399        {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
1400        {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
1401        {Opt_commit, 0, MOPT_GTE0},
1402        {Opt_max_batch_time, 0, MOPT_GTE0},
1403        {Opt_min_batch_time, 0, MOPT_GTE0},
1404        {Opt_inode_readahead_blks, 0, MOPT_GTE0},
1405        {Opt_init_itable, 0, MOPT_GTE0},
1406        {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET},
1407        {Opt_stripe, 0, MOPT_GTE0},
1408        {Opt_resuid, 0, MOPT_GTE0},
1409        {Opt_resgid, 0, MOPT_GTE0},
1410        {Opt_journal_dev, 0, MOPT_NO_EXT2 | MOPT_GTE0},
1411        {Opt_journal_path, 0, MOPT_NO_EXT2 | MOPT_STRING},
1412        {Opt_journal_ioprio, 0, MOPT_NO_EXT2 | MOPT_GTE0},
1413        {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1414        {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1415        {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
1416         MOPT_NO_EXT2 | MOPT_DATAJ},
1417        {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1418        {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
1419#ifdef CONFIG_EXT4_FS_POSIX_ACL
1420        {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
1421        {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
1422#else
1423        {Opt_acl, 0, MOPT_NOSUPPORT},
1424        {Opt_noacl, 0, MOPT_NOSUPPORT},
1425#endif
1426        {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
1427        {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
1428        {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
1429        {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
1430                                                        MOPT_SET | MOPT_Q},
1431        {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
1432                                                        MOPT_SET | MOPT_Q},
1433        {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
1434                       EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q},
1435        {Opt_usrjquota, 0, MOPT_Q},
1436        {Opt_grpjquota, 0, MOPT_Q},
1437        {Opt_offusrjquota, 0, MOPT_Q},
1438        {Opt_offgrpjquota, 0, MOPT_Q},
1439        {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
1440        {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
1441        {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
1442        {Opt_max_dir_size_kb, 0, MOPT_GTE0},
1443        {Opt_test_dummy_encryption, 0, MOPT_GTE0},
1444        {Opt_err, 0, 0}
1445};
1446
1447static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1448                            substring_t *args, unsigned long *journal_devnum,
1449                            unsigned int *journal_ioprio, int is_remount)
1450{
1451        struct ext4_sb_info *sbi = EXT4_SB(sb);
1452        const struct mount_opts *m;
1453        kuid_t uid;
1454        kgid_t gid;
1455        int arg = 0;
1456
1457#ifdef CONFIG_QUOTA
1458        if (token == Opt_usrjquota)
1459                return set_qf_name(sb, USRQUOTA, &args[0]);
1460        else if (token == Opt_grpjquota)
1461                return set_qf_name(sb, GRPQUOTA, &args[0]);
1462        else if (token == Opt_offusrjquota)
1463                return clear_qf_name(sb, USRQUOTA);
1464        else if (token == Opt_offgrpjquota)
1465                return clear_qf_name(sb, GRPQUOTA);
1466#endif
1467        switch (token) {
1468        case Opt_noacl:
1469        case Opt_nouser_xattr:
1470                ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5");
1471                break;
1472        case Opt_sb:
1473                return 1;       /* handled by get_sb_block() */
1474        case Opt_removed:
1475                ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
1476                return 1;
1477        case Opt_abort:
1478                sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
1479                return 1;
1480        case Opt_i_version:
1481                sb->s_flags |= MS_I_VERSION;
1482                return 1;
1483        case Opt_lazytime:
1484                sb->s_flags |= MS_LAZYTIME;
1485                return 1;
1486        case Opt_nolazytime:
1487                sb->s_flags &= ~MS_LAZYTIME;
1488                return 1;
1489        }
1490
1491        for (m = ext4_mount_opts; m->token != Opt_err; m++)
1492                if (token == m->token)
1493                        break;
1494
1495        if (m->token == Opt_err) {
1496                ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
1497                         "or missing value", opt);
1498                return -1;
1499        }
1500
1501        if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
1502                ext4_msg(sb, KERN_ERR,
1503                         "Mount option \"%s\" incompatible with ext2", opt);
1504                return -1;
1505        }
1506        if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
1507                ext4_msg(sb, KERN_ERR,
1508                         "Mount option \"%s\" incompatible with ext3", opt);
1509                return -1;
1510        }
1511
1512        if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg))
1513                return -1;
1514        if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
1515                return -1;
1516        if (m->flags & MOPT_EXPLICIT)
1517                set_opt2(sb, EXPLICIT_DELALLOC);
1518        if (m->flags & MOPT_CLEAR_ERR)
1519                clear_opt(sb, ERRORS_MASK);
1520        if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
1521                ext4_msg(sb, KERN_ERR, "Cannot change quota "
1522                         "options when quota turned on");
1523                return -1;
1524        }
1525
1526        if (m->flags & MOPT_NOSUPPORT) {
1527                ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
1528        } else if (token == Opt_commit) {
1529                if (arg == 0)
1530                        arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
1531                sbi->s_commit_interval = HZ * arg;
1532        } else if (token == Opt_max_batch_time) {
1533                sbi->s_max_batch_time = arg;
1534        } else if (token == Opt_min_batch_time) {
1535                sbi->s_min_batch_time = arg;
1536        } else if (token == Opt_inode_readahead_blks) {
1537                if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) {
1538                        ext4_msg(sb, KERN_ERR,
1539                                 "EXT4-fs: inode_readahead_blks must be "
1540                                 "0 or a power of 2 smaller than 2^31");
1541                        return -1;
1542                }
1543                sbi->s_inode_readahead_blks = arg;
1544        } else if (token == Opt_init_itable) {
1545                set_opt(sb, INIT_INODE_TABLE);
1546                if (!args->from)
1547                        arg = EXT4_DEF_LI_WAIT_MULT;
1548                sbi->s_li_wait_mult = arg;
1549        } else if (token == Opt_max_dir_size_kb) {
1550                sbi->s_max_dir_size_kb = arg;
1551        } else if (token == Opt_stripe) {
1552                sbi->s_stripe = arg;
1553        } else if (token == Opt_resuid) {
1554                uid = make_kuid(current_user_ns(), arg);
1555                if (!uid_valid(uid)) {
1556                        ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg);
1557                        return -1;
1558                }
1559                sbi->s_resuid = uid;
1560        } else if (token == Opt_resgid) {
1561                gid = make_kgid(current_user_ns(), arg);
1562                if (!gid_valid(gid)) {
1563                        ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg);
1564                        return -1;
1565                }
1566                sbi->s_resgid = gid;
1567        } else if (token == Opt_journal_dev) {
1568                if (is_remount) {
1569                        ext4_msg(sb, KERN_ERR,
1570                                 "Cannot specify journal on remount");
1571                        return -1;
1572                }
1573                *journal_devnum = arg;
1574        } else if (token == Opt_journal_path) {
1575                char *journal_path;
1576                struct inode *journal_inode;
1577                struct path path;
1578                int error;
1579
1580                if (is_remount) {
1581                        ext4_msg(sb, KERN_ERR,
1582                                 "Cannot specify journal on remount");
1583                        return -1;
1584                }
1585                journal_path = match_strdup(&args[0]);
1586                if (!journal_path) {
1587                        ext4_msg(sb, KERN_ERR, "error: could not dup "
1588                                "journal device string");
1589                        return -1;
1590                }
1591
1592                error = kern_path(journal_path, LOOKUP_FOLLOW, &path);
1593                if (error) {
1594                        ext4_msg(sb, KERN_ERR, "error: could not find "
1595                                "journal device path: error %d", error);
1596                        kfree(journal_path);
1597                        return -1;
1598                }
1599
1600                journal_inode = d_inode(path.dentry);
1601                if (!S_ISBLK(journal_inode->i_mode)) {
1602                        ext4_msg(sb, KERN_ERR, "error: journal path %s "
1603                                "is not a block device", journal_path);
1604                        path_put(&path);
1605                        kfree(journal_path);
1606                        return -1;
1607                }
1608
1609                *journal_devnum = new_encode_dev(journal_inode->i_rdev);
1610                path_put(&path);
1611                kfree(journal_path);
1612        } else if (token == Opt_journal_ioprio) {
1613                if (arg > 7) {
1614                        ext4_msg(sb, KERN_ERR, "Invalid journal IO priority"
1615                                 " (must be 0-7)");
1616                        return -1;
1617                }
1618                *journal_ioprio =
1619                        IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
1620        } else if (token == Opt_test_dummy_encryption) {
1621#ifdef CONFIG_EXT4_FS_ENCRYPTION
1622                sbi->s_mount_flags |= EXT4_MF_TEST_DUMMY_ENCRYPTION;
1623                ext4_msg(sb, KERN_WARNING,
1624                         "Test dummy encryption mode enabled");
1625#else
1626                ext4_msg(sb, KERN_WARNING,
1627                         "Test dummy encryption mount option ignored");
1628#endif
1629        } else if (m->flags & MOPT_DATAJ) {
1630                if (is_remount) {
1631                        if (!sbi->s_journal)
1632                                ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
1633                        else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) {
1634                                ext4_msg(sb, KERN_ERR,
1635                                         "Cannot change data mode on remount");
1636                                return -1;
1637                        }
1638                } else {
1639                        clear_opt(sb, DATA_FLAGS);
1640                        sbi->s_mount_opt |= m->mount_opt;
1641                }
1642#ifdef CONFIG_QUOTA
1643        } else if (m->flags & MOPT_QFMT) {
1644                if (sb_any_quota_loaded(sb) &&
1645                    sbi->s_jquota_fmt != m->mount_opt) {
1646                        ext4_msg(sb, KERN_ERR, "Cannot change journaled "
1647                                 "quota options when quota turned on");
1648                        return -1;
1649                }
1650                if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
1651                                               EXT4_FEATURE_RO_COMPAT_QUOTA)) {
1652                        ext4_msg(sb, KERN_ERR,
1653                                 "Cannot set journaled quota options "
1654                                 "when QUOTA feature is enabled");
1655                        return -1;
1656                }
1657                sbi->s_jquota_fmt = m->mount_opt;
1658#endif
1659#ifndef CONFIG_FS_DAX
1660        } else if (token == Opt_dax) {
1661                ext4_msg(sb, KERN_INFO, "dax option not supported");
1662                return -1;
1663#endif
1664        } else {
1665                if (!args->from)
1666                        arg = 1;
1667                if (m->flags & MOPT_CLEAR)
1668                        arg = !arg;
1669                else if (unlikely(!(m->flags & MOPT_SET))) {
1670                        ext4_msg(sb, KERN_WARNING,
1671                                 "buggy handling of option %s", opt);
1672                        WARN_ON(1);
1673                        return -1;
1674                }
1675                if (arg != 0)
1676                        sbi->s_mount_opt |= m->mount_opt;
1677                else
1678                        sbi->s_mount_opt &= ~m->mount_opt;
1679        }
1680        return 1;
1681}
1682
1683static int parse_options(char *options, struct super_block *sb,
1684                         unsigned long *journal_devnum,
1685                         unsigned int *journal_ioprio,
1686                         int is_remount)
1687{
1688        struct ext4_sb_info *sbi = EXT4_SB(sb);
1689        char *p;
1690        substring_t args[MAX_OPT_ARGS];
1691        int token;
1692
1693        if (!options)
1694                return 1;
1695
1696        while ((p = strsep(&options, ",")) != NULL) {
1697                if (!*p)
1698                        continue;
1699                /*
1700                 * Initialize args struct so we know whether arg was
1701                 * found; some options take optional arguments.
1702                 */
1703                args[0].to = args[0].from = NULL;
1704                token = match_token(p, tokens, args);
1705                if (handle_mount_opt(sb, p, token, args, journal_devnum,
1706                                     journal_ioprio, is_remount) < 0)
1707                        return 0;
1708        }
1709#ifdef CONFIG_QUOTA
1710        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
1711            (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) {
1712                ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA "
1713                         "feature is enabled");
1714                return 0;
1715        }
1716        if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1717                if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
1718                        clear_opt(sb, USRQUOTA);
1719
1720                if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
1721                        clear_opt(sb, GRPQUOTA);
1722
1723                if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
1724                        ext4_msg(sb, KERN_ERR, "old and new quota "
1725                                        "format mixing");
1726                        return 0;
1727                }
1728
1729                if (!sbi->s_jquota_fmt) {
1730                        ext4_msg(sb, KERN_ERR, "journaled quota format "
1731                                        "not specified");
1732                        return 0;
1733                }
1734        }
1735#endif
1736        if (test_opt(sb, DIOREAD_NOLOCK)) {
1737                int blocksize =
1738                        BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
1739
1740                if (blocksize < PAGE_CACHE_SIZE) {
1741                        ext4_msg(sb, KERN_ERR, "can't mount with "
1742                                 "dioread_nolock if block size != PAGE_SIZE");
1743                        return 0;
1744                }
1745        }
1746        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
1747            test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
1748                ext4_msg(sb, KERN_ERR, "can't mount with journal_async_commit "
1749                         "in data=ordered mode");
1750                return 0;
1751        }
1752        return 1;
1753}
1754
1755static inline void ext4_show_quota_options(struct seq_file *seq,
1756                                           struct super_block *sb)
1757{
1758#if defined(CONFIG_QUOTA)
1759        struct ext4_sb_info *sbi = EXT4_SB(sb);
1760
1761        if (sbi->s_jquota_fmt) {
1762                char *fmtname = "";
1763
1764                switch (sbi->s_jquota_fmt) {
1765                case QFMT_VFS_OLD:
1766                        fmtname = "vfsold";
1767                        break;
1768                case QFMT_VFS_V0:
1769                        fmtname = "vfsv0";
1770                        break;
1771                case QFMT_VFS_V1:
1772                        fmtname = "vfsv1";
1773                        break;
1774                }
1775                seq_printf(seq, ",jqfmt=%s", fmtname);
1776        }
1777
1778        if (sbi->s_qf_names[USRQUOTA])
1779                seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]);
1780
1781        if (sbi->s_qf_names[GRPQUOTA])
1782                seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]);
1783#endif
1784}
1785
1786static const char *token2str(int token)
1787{
1788        const struct match_token *t;
1789
1790        for (t = tokens; t->token != Opt_err; t++)
1791                if (t->token == token && !strchr(t->pattern, '='))
1792                        break;
1793        return t->pattern;
1794}
1795
1796/*
1797 * Show an option if
1798 *  - it's set to a non-default value OR
1799 *  - if the per-sb default is different from the global default
1800 */
1801static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
1802                              int nodefs)
1803{
1804        struct ext4_sb_info *sbi = EXT4_SB(sb);
1805        struct ext4_super_block *es = sbi->s_es;
1806        int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt;
1807        const struct mount_opts *m;
1808        char sep = nodefs ? '\n' : ',';
1809
1810#define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
1811#define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
1812
1813        if (sbi->s_sb_block != 1)
1814                SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
1815
1816        for (m = ext4_mount_opts; m->token != Opt_err; m++) {
1817                int want_set = m->flags & MOPT_SET;
1818                if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
1819                    (m->flags & MOPT_CLEAR_ERR))
1820                        continue;
1821                if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
1822                        continue; /* skip if same as the default */
1823                if ((want_set &&
1824                     (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
1825                    (!want_set && (sbi->s_mount_opt & m->mount_opt)))
1826                        continue; /* select Opt_noFoo vs Opt_Foo */
1827                SEQ_OPTS_PRINT("%s", token2str(m->token));
1828        }
1829
1830        if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
1831            le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
1832                SEQ_OPTS_PRINT("resuid=%u",
1833                                from_kuid_munged(&init_user_ns, sbi->s_resuid));
1834        if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
1835            le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
1836                SEQ_OPTS_PRINT("resgid=%u",
1837                                from_kgid_munged(&init_user_ns, sbi->s_resgid));
1838        def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
1839        if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
1840                SEQ_OPTS_PUTS("errors=remount-ro");
1841        if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
1842                SEQ_OPTS_PUTS("errors=continue");
1843        if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
1844                SEQ_OPTS_PUTS("errors=panic");
1845        if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
1846                SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
1847        if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
1848                SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
1849        if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
1850                SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
1851        if (sb->s_flags & MS_I_VERSION)
1852                SEQ_OPTS_PUTS("i_version");
1853        if (nodefs || sbi->s_stripe)
1854                SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
1855        if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) {
1856                if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
1857                        SEQ_OPTS_PUTS("data=journal");
1858                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
1859                        SEQ_OPTS_PUTS("data=ordered");
1860                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
1861                        SEQ_OPTS_PUTS("data=writeback");
1862        }
1863        if (nodefs ||
1864            sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
1865                SEQ_OPTS_PRINT("inode_readahead_blks=%u",
1866                               sbi->s_inode_readahead_blks);
1867
1868        if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
1869                       (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
1870                SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
1871        if (nodefs || sbi->s_max_dir_size_kb)
1872                SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
1873
1874        ext4_show_quota_options(seq, sb);
1875        return 0;
1876}
1877
1878static int ext4_show_options(struct seq_file *seq, struct dentry *root)
1879{
1880        return _ext4_show_options(seq, root->d_sb, 0);
1881}
1882
1883static int options_seq_show(struct seq_file *seq, void *offset)
1884{
1885        struct super_block *sb = seq->private;
1886        int rc;
1887
1888        seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw");
1889        rc = _ext4_show_options(seq, sb, 1);
1890        seq_puts(seq, "\n");
1891        return rc;
1892}
1893
1894static int options_open_fs(struct inode *inode, struct file *file)
1895{
1896        return single_open(file, options_seq_show, PDE_DATA(inode));
1897}
1898
1899static const struct file_operations ext4_seq_options_fops = {
1900        .owner = THIS_MODULE,
1901        .open = options_open_fs,
1902        .read = seq_read,
1903        .llseek = seq_lseek,
1904        .release = single_release,
1905};
1906
1907static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1908                            int read_only)
1909{
1910        struct ext4_sb_info *sbi = EXT4_SB(sb);
1911        int res = 0;
1912
1913        if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
1914                ext4_msg(sb, KERN_ERR, "revision level too high, "
1915                         "forcing read-only mode");
1916                res = MS_RDONLY;
1917        }
1918        if (read_only)
1919                goto done;
1920        if (!(sbi->s_mount_state & EXT4_VALID_FS))
1921                ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
1922                         "running e2fsck is recommended");
1923        else if (sbi->s_mount_state & EXT4_ERROR_FS)
1924                ext4_msg(sb, KERN_WARNING,
1925                         "warning: mounting fs with errors, "
1926                         "running e2fsck is recommended");
1927        else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
1928                 le16_to_cpu(es->s_mnt_count) >=
1929                 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1930                ext4_msg(sb, KERN_WARNING,
1931                         "warning: maximal mount count reached, "
1932                         "running e2fsck is recommended");
1933        else if (le32_to_cpu(es->s_checkinterval) &&
1934                (le32_to_cpu(es->s_lastcheck) +
1935                        le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1936                ext4_msg(sb, KERN_WARNING,
1937                         "warning: checktime reached, "
1938                         "running e2fsck is recommended");
1939        if (!sbi->s_journal)
1940                es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1941        if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1942                es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
1943        le16_add_cpu(&es->s_mnt_count, 1);
1944        es->s_mtime = cpu_to_le32(get_seconds());
1945        ext4_update_dynamic_rev(sb);
1946        if (sbi->s_journal)
1947                EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
1948
1949        ext4_commit_super(sb, 1);
1950done:
1951        if (test_opt(sb, DEBUG))
1952                printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1953                                "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
1954                        sb->s_blocksize,
1955                        sbi->s_groups_count,
1956                        EXT4_BLOCKS_PER_GROUP(sb),
1957                        EXT4_INODES_PER_GROUP(sb),
1958                        sbi->s_mount_opt, sbi->s_mount_opt2);
1959
1960        cleancache_init_fs(sb);
1961        return res;
1962}
1963
1964int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
1965{
1966        struct ext4_sb_info *sbi = EXT4_SB(sb);
1967        struct flex_groups *new_groups;
1968        int size;
1969
1970        if (!sbi->s_log_groups_per_flex)
1971                return 0;
1972
1973        size = ext4_flex_group(sbi, ngroup - 1) + 1;
1974        if (size <= sbi->s_flex_groups_allocated)
1975                return 0;
1976
1977        size = roundup_pow_of_two(size * sizeof(struct flex_groups));
1978        new_groups = ext4_kvzalloc(size, GFP_KERNEL);
1979        if (!new_groups) {
1980                ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups",
1981                         size / (int) sizeof(struct flex_groups));
1982                return -ENOMEM;
1983        }
1984
1985        if (sbi->s_flex_groups) {
1986                memcpy(new_groups, sbi->s_flex_groups,
1987                       (sbi->s_flex_groups_allocated *
1988                        sizeof(struct flex_groups)));
1989                kvfree(sbi->s_flex_groups);
1990        }
1991        sbi->s_flex_groups = new_groups;
1992        sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
1993        return 0;
1994}
1995
1996static int ext4_fill_flex_info(struct super_block *sb)
1997{
1998        struct ext4_sb_info *sbi = EXT4_SB(sb);
1999        struct ext4_group_desc *gdp = NULL;
2000        ext4_group_t flex_group;
2001        int i, err;
2002
2003        sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
2004        if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
2005                sbi->s_log_groups_per_flex = 0;
2006                return 1;
2007        }
2008
2009        err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
2010        if (err)
2011                goto failed;
2012
2013        for (i = 0; i < sbi->s_groups_count; i++) {
2014                gdp = ext4_get_group_desc(sb, i, NULL);
2015
2016                flex_group = ext4_flex_group(sbi, i);
2017                atomic_add(ext4_free_inodes_count(sb, gdp),
2018                           &sbi->s_flex_groups[flex_group].free_inodes);
2019                atomic64_add(ext4_free_group_clusters(sb, gdp),
2020                             &sbi->s_flex_groups[flex_group].free_clusters);
2021                atomic_add(ext4_used_dirs_count(sb, gdp),
2022                           &sbi->s_flex_groups[flex_group].used_dirs);
2023        }
2024
2025        return 1;
2026failed:
2027        return 0;
2028}
2029
2030static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
2031                                   struct ext4_group_desc *gdp)
2032{
2033        int offset;
2034        __u16 crc = 0;
2035        __le32 le_group = cpu_to_le32(block_group);
2036
2037        if (ext4_has_metadata_csum(sbi->s_sb)) {
2038                /* Use new metadata_csum algorithm */
2039                __le16 save_csum;
2040                __u32 csum32;
2041
2042                save_csum = gdp->bg_checksum;
2043                gdp->bg_checksum = 0;
2044                csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
2045                                     sizeof(le_group));
2046                csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp,
2047                                     sbi->s_desc_size);
2048                gdp->bg_checksum = save_csum;
2049
2050                crc = csum32 & 0xFFFF;
2051                goto out;
2052        }
2053
2054        /* old crc16 code */
2055        if (!(sbi->s_es->s_feature_ro_compat &
2056              cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)))
2057                return 0;
2058
2059        offset = offsetof(struct ext4_group_desc, bg_checksum);
2060
2061        crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
2062        crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
2063        crc = crc16(crc, (__u8 *)gdp, offset);
2064        offset += sizeof(gdp->bg_checksum); /* skip checksum */
2065        /* for checksum of struct ext4_group_desc do the rest...*/
2066        if ((sbi->s_es->s_feature_incompat &
2067             cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
2068            offset < le16_to_cpu(sbi->s_es->s_desc_size))
2069                crc = crc16(crc, (__u8 *)gdp + offset,
2070                            le16_to_cpu(sbi->s_es->s_desc_size) -
2071                                offset);
2072
2073out:
2074        return cpu_to_le16(crc);
2075}
2076
2077int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
2078                                struct ext4_group_desc *gdp)
2079{
2080        if (ext4_has_group_desc_csum(sb) &&
2081            (gdp->bg_checksum != ext4_group_desc_csum(EXT4_SB(sb),
2082                                                      block_group, gdp)))
2083                return 0;
2084
2085        return 1;
2086}
2087
2088void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
2089                              struct ext4_group_desc *gdp)
2090{
2091        if (!ext4_has_group_desc_csum(sb))
2092                return;
2093        gdp->bg_checksum = ext4_group_desc_csum(EXT4_SB(sb), block_group, gdp);
2094}
2095
2096/* Called at mount-time, super-block is locked */
2097static int ext4_check_descriptors(struct super_block *sb,
2098                                  ext4_group_t *first_not_zeroed)
2099{
2100        struct ext4_sb_info *sbi = EXT4_SB(sb);
2101        ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
2102        ext4_fsblk_t last_block;
2103        ext4_fsblk_t block_bitmap;
2104        ext4_fsblk_t inode_bitmap;
2105        ext4_fsblk_t inode_table;
2106        int flexbg_flag = 0;
2107        ext4_group_t i, grp = sbi->s_groups_count;
2108
2109        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
2110                flexbg_flag = 1;
2111
2112        ext4_debug("Checking group descriptors");
2113
2114        for (i = 0; i < sbi->s_groups_count; i++) {
2115                struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
2116
2117                if (i == sbi->s_groups_count - 1 || flexbg_flag)
2118                        last_block = ext4_blocks_count(sbi->s_es) - 1;
2119                else
2120                        last_block = first_block +
2121                                (EXT4_BLOCKS_PER_GROUP(sb) - 1);
2122
2123                if ((grp == sbi->s_groups_count) &&
2124                   !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2125                        grp = i;
2126
2127                block_bitmap = ext4_block_bitmap(sb, gdp);
2128                if (block_bitmap < first_block || block_bitmap > last_block) {
2129                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2130                               "Block bitmap for group %u not in group "
2131                               "(block %llu)!", i, block_bitmap);
2132                        return 0;
2133                }
2134                inode_bitmap = ext4_inode_bitmap(sb, gdp);
2135                if (inode_bitmap < first_block || inode_bitmap > last_block) {
2136                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2137                               "Inode bitmap for group %u not in group "
2138                               "(block %llu)!", i, inode_bitmap);
2139                        return 0;
2140                }
2141                inode_table = ext4_inode_table(sb, gdp);
2142                if (inode_table < first_block ||
2143                    inode_table + sbi->s_itb_per_group - 1 > last_block) {
2144                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2145                               "Inode table for group %u not in group "
2146                               "(block %llu)!", i, inode_table);
2147                        return 0;
2148                }
2149                ext4_lock_group(sb, i);
2150                if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
2151                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2152                                 "Checksum for group %u failed (%u!=%u)",
2153                                 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
2154                                     gdp)), le16_to_cpu(gdp->bg_checksum));
2155                        if (!(sb->s_flags & MS_RDONLY)) {
2156                                ext4_unlock_group(sb, i);
2157                                return 0;
2158                        }
2159                }
2160                ext4_unlock_group(sb, i);
2161                if (!flexbg_flag)
2162                        first_block += EXT4_BLOCKS_PER_GROUP(sb);
2163        }
2164        if (NULL != first_not_zeroed)
2165                *first_not_zeroed = grp;
2166        return 1;
2167}
2168
2169/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
2170 * the superblock) which were deleted from all directories, but held open by
2171 * a process at the time of a crash.  We walk the list and try to delete these
2172 * inodes at recovery time (only with a read-write filesystem).
2173 *
2174 * In order to keep the orphan inode chain consistent during traversal (in
2175 * case of crash during recovery), we link each inode into the superblock
2176 * orphan list_head and handle it the same way as an inode deletion during
2177 * normal operation (which journals the operations for us).
2178 *
2179 * We only do an iget() and an iput() on each inode, which is very safe if we
2180 * accidentally point at an in-use or already deleted inode.  The worst that
2181 * can happen in this case is that we get a "bit already cleared" message from
2182 * ext4_free_inode().  The only reason we would point at a wrong inode is if
2183 * e2fsck was run on this filesystem, and it must have already done the orphan
2184 * inode cleanup for us, so we can safely abort without any further action.
2185 */
2186static void ext4_orphan_cleanup(struct super_block *sb,
2187                                struct ext4_super_block *es)
2188{
2189        unsigned int s_flags = sb->s_flags;
2190        int nr_orphans = 0, nr_truncates = 0;
2191#ifdef CONFIG_QUOTA
2192        int i;
2193#endif
2194        if (!es->s_last_orphan) {
2195                jbd_debug(4, "no orphan inodes to clean up\n");
2196                return;
2197        }
2198
2199        if (bdev_read_only(sb->s_bdev)) {
2200                ext4_msg(sb, KERN_ERR, "write access "
2201                        "unavailable, skipping orphan cleanup");
2202                return;
2203        }
2204
2205        /* Check if feature set would not allow a r/w mount */
2206        if (!ext4_feature_set_ok(sb, 0)) {
2207                ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
2208                         "unknown ROCOMPAT features");
2209                return;
2210        }
2211
2212        if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2213                /* don't clear list on RO mount w/ errors */
2214                if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
2215                        ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
2216                                  "clearing orphan list.\n");
2217                        es->s_last_orphan = 0;
2218                }
2219                jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
2220                return;
2221        }
2222
2223        if (s_flags & MS_RDONLY) {
2224                ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
2225                sb->s_flags &= ~MS_RDONLY;
2226        }
2227#ifdef CONFIG_QUOTA
2228        /* Needed for iput() to work correctly and not trash data */
2229        sb->s_flags |= MS_ACTIVE;
2230        /* Turn on quotas so that they are updated correctly */
2231        for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2232                if (EXT4_SB(sb)->s_qf_names[i]) {
2233                        int ret = ext4_quota_on_mount(sb, i);
2234                        if (ret < 0)
2235                                ext4_msg(sb, KERN_ERR,
2236                                        "Cannot turn on journaled "
2237                                        "quota: error %d", ret);
2238                }
2239        }
2240#endif
2241
2242        while (es->s_last_orphan) {
2243                struct inode *inode;
2244
2245                inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
2246                if (IS_ERR(inode)) {
2247                        es->s_last_orphan = 0;
2248                        break;
2249                }
2250
2251                list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
2252                dquot_initialize(inode);
2253                if (inode->i_nlink) {
2254                        if (test_opt(sb, DEBUG))
2255                                ext4_msg(sb, KERN_DEBUG,
2256                                        "%s: truncating inode %lu to %lld bytes",
2257                                        __func__, inode->i_ino, inode->i_size);
2258                        jbd_debug(2, "truncating inode %lu to %lld bytes\n",
2259                                  inode->i_ino, inode->i_size);
2260                        mutex_lock(&inode->i_mutex);
2261                        truncate_inode_pages(inode->i_mapping, inode->i_size);
2262                        ext4_truncate(inode);
2263                        mutex_unlock(&inode->i_mutex);
2264                        nr_truncates++;
2265                } else {
2266                        if (test_opt(sb, DEBUG))
2267                                ext4_msg(sb, KERN_DEBUG,
2268                                        "%s: deleting unreferenced inode %lu",
2269                                        __func__, inode->i_ino);
2270                        jbd_debug(2, "deleting unreferenced inode %lu\n",
2271                                  inode->i_ino);
2272                        nr_orphans++;
2273                }
2274                iput(inode);  /* The delete magic happens here! */
2275        }
2276
2277#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
2278
2279        if (nr_orphans)
2280                ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
2281                       PLURAL(nr_orphans));
2282        if (nr_truncates)
2283                ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
2284                       PLURAL(nr_truncates));
2285#ifdef CONFIG_QUOTA
2286        /* Turn quotas off */
2287        for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2288                if (sb_dqopt(sb)->files[i])
2289                        dquot_quota_off(sb, i);
2290        }
2291#endif
2292        sb->s_flags = s_flags; /* Restore MS_RDONLY status */
2293}
2294
2295/*
2296 * Maximal extent format file size.
2297 * Resulting logical blkno at s_maxbytes must fit in our on-disk
2298 * extent format containers, within a sector_t, and within i_blocks
2299 * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
2300 * so that won't be a limiting factor.
2301 *
2302 * However there is other limiting factor. We do store extents in the form
2303 * of starting block and length, hence the resulting length of the extent
2304 * covering maximum file size must fit into on-disk format containers as
2305 * well. Given that length is always by 1 unit bigger than max unit (because
2306 * we count 0 as well) we have to lower the s_maxbytes by one fs block.
2307 *
2308 * Note, this does *not* consider any metadata overhead for vfs i_blocks.
2309 */
2310static loff_t ext4_max_size(int blkbits, int has_huge_files)
2311{
2312        loff_t res;
2313        loff_t upper_limit = MAX_LFS_FILESIZE;
2314
2315        /* small i_blocks in vfs inode? */
2316        if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
2317                /*
2318                 * CONFIG_LBDAF is not enabled implies the inode
2319                 * i_block represent total blocks in 512 bytes
2320                 * 32 == size of vfs inode i_blocks * 8
2321                 */
2322                upper_limit = (1LL << 32) - 1;
2323
2324                /* total blocks in file system block size */
2325                upper_limit >>= (blkbits - 9);
2326                upper_limit <<= blkbits;
2327        }
2328
2329        /*
2330         * 32-bit extent-start container, ee_block. We lower the maxbytes
2331         * by one fs block, so ee_len can cover the extent of maximum file
2332         * size
2333         */
2334        res = (1LL << 32) - 1;
2335        res <<= blkbits;
2336
2337        /* Sanity check against vm- & vfs- imposed limits */
2338        if (res > upper_limit)
2339                res = upper_limit;
2340
2341        return res;
2342}
2343
2344/*
2345 * Maximal bitmap file size.  There is a direct, and {,double-,triple-}indirect
2346 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
2347 * We need to be 1 filesystem block less than the 2^48 sector limit.
2348 */
2349static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
2350{
2351        loff_t res = EXT4_NDIR_BLOCKS;
2352        int meta_blocks;
2353        loff_t upper_limit;
2354        /* This is calculated to be the largest file size for a dense, block
2355         * mapped file such that the file's total number of 512-byte sectors,
2356         * including data and all indirect blocks, does not exceed (2^48 - 1).
2357         *
2358         * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
2359         * number of 512-byte sectors of the file.
2360         */
2361
2362        if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
2363                /*
2364                 * !has_huge_files or CONFIG_LBDAF not enabled implies that
2365                 * the inode i_block field represents total file blocks in
2366                 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
2367                 */
2368                upper_limit = (1LL << 32) - 1;
2369
2370                /* total blocks in file system block size */
2371                upper_limit >>= (bits - 9);
2372
2373        } else {
2374                /*
2375                 * We use 48 bit ext4_inode i_blocks
2376                 * With EXT4_HUGE_FILE_FL set the i_blocks
2377                 * represent total number of blocks in
2378                 * file system block size
2379                 */
2380                upper_limit = (1LL << 48) - 1;
2381
2382        }
2383
2384        /* indirect blocks */
2385        meta_blocks = 1;
2386        /* double indirect blocks */
2387        meta_blocks += 1 + (1LL << (bits-2));
2388        /* tripple indirect blocks */
2389        meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
2390
2391        upper_limit -= meta_blocks;
2392        upper_limit <<= bits;
2393
2394        res += 1LL << (bits-2);
2395        res += 1LL << (2*(bits-2));
2396        res += 1LL << (3*(bits-2));
2397        res <<= bits;
2398        if (res > upper_limit)
2399                res = upper_limit;
2400
2401        if (res > MAX_LFS_FILESIZE)
2402                res = MAX_LFS_FILESIZE;
2403
2404        return res;
2405}
2406
2407static ext4_fsblk_t descriptor_loc(struct super_block *sb,
2408                                   ext4_fsblk_t logical_sb_block, int nr)
2409{
2410        struct ext4_sb_info *sbi = EXT4_SB(sb);
2411        ext4_group_t bg, first_meta_bg;
2412        int has_super = 0;
2413
2414        first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
2415
2416        if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
2417            nr < first_meta_bg)
2418                return logical_sb_block + nr + 1;
2419        bg = sbi->s_desc_per_block * nr;
2420        if (ext4_bg_has_super(sb, bg))
2421                has_super = 1;
2422
2423        /*
2424         * If we have a meta_bg fs with 1k blocks, group 0's GDT is at
2425         * block 2, not 1.  If s_first_data_block == 0 (bigalloc is enabled
2426         * on modern mke2fs or blksize > 1k on older mke2fs) then we must
2427         * compensate.
2428         */
2429        if (sb->s_blocksize == 1024 && nr == 0 &&
2430            le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) == 0)
2431                has_super++;
2432
2433        return (has_super + ext4_group_first_block_no(sb, bg));
2434}
2435
2436/**
2437 * ext4_get_stripe_size: Get the stripe size.
2438 * @sbi: In memory super block info
2439 *
2440 * If we have specified it via mount option, then
2441 * use the mount option value. If the value specified at mount time is
2442 * greater than the blocks per group use the super block value.
2443 * If the super block value is greater than blocks per group return 0.
2444 * Allocator needs it be less than blocks per group.
2445 *
2446 */
2447static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
2448{
2449        unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
2450        unsigned long stripe_width =
2451                        le32_to_cpu(sbi->s_es->s_raid_stripe_width);
2452        int ret;
2453
2454        if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
2455                ret = sbi->s_stripe;
2456        else if (stripe_width <= sbi->s_blocks_per_group)
2457                ret = stripe_width;
2458        else if (stride <= sbi->s_blocks_per_group)
2459                ret = stride;
2460        else
2461                ret = 0;
2462
2463        /*
2464         * If the stripe width is 1, this makes no sense and
2465         * we set it to 0 to turn off stripe handling code.
2466         */
2467        if (ret <= 1)
2468                ret = 0;
2469
2470        return ret;
2471}
2472
2473/* sysfs supprt */
2474
2475struct ext4_attr {
2476        struct attribute attr;
2477        ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *);
2478        ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *,
2479                         const char *, size_t);
2480        union {
2481                int offset;
2482                int deprecated_val;
2483        } u;
2484};
2485
2486static int parse_strtoull(const char *buf,
2487                unsigned long long max, unsigned long long *value)
2488{
2489        int ret;
2490
2491        ret = kstrtoull(skip_spaces(buf), 0, value);
2492        if (!ret && *value > max)
2493                ret = -EINVAL;
2494        return ret;
2495}
2496
2497static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
2498                                              struct ext4_sb_info *sbi,
2499                                              char *buf)
2500{
2501        return snprintf(buf, PAGE_SIZE, "%llu\n",
2502                (s64) EXT4_C2B(sbi,
2503                        percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
2504}
2505
2506static ssize_t session_write_kbytes_show(struct ext4_attr *a,
2507                                         struct ext4_sb_info *sbi, char *buf)
2508{
2509        struct super_block *sb = sbi->s_buddy_cache->i_sb;
2510
2511        if (!sb->s_bdev->bd_part)
2512                return snprintf(buf, PAGE_SIZE, "0\n");
2513        return snprintf(buf, PAGE_SIZE, "%lu\n",
2514                        (part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2515                         sbi->s_sectors_written_start) >> 1);
2516}
2517
2518static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2519                                          struct ext4_sb_info *sbi, char *buf)
2520{
2521        struct super_block *sb = sbi->s_buddy_cache->i_sb;
2522
2523        if (!sb->s_bdev->bd_part)
2524                return snprintf(buf, PAGE_SIZE, "0\n");
2525        return snprintf(buf, PAGE_SIZE, "%llu\n",
2526                        (unsigned long long)(sbi->s_kbytes_written +
2527                        ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2528                          EXT4_SB(sb)->s_sectors_written_start) >> 1)));
2529}
2530
2531static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2532                                          struct ext4_sb_info *sbi,
2533                                          const char *buf, size_t count)
2534{
2535        unsigned long t;
2536        int ret;
2537
2538        ret = kstrtoul(skip_spaces(buf), 0, &t);
2539        if (ret)
2540                return ret;
2541
2542        if (t && (!is_power_of_2(t) || t > 0x40000000))
2543                return -EINVAL;
2544
2545        sbi->s_inode_readahead_blks = t;
2546        return count;
2547}
2548
2549static ssize_t sbi_ui_show(struct ext4_attr *a,
2550                           struct ext4_sb_info *sbi, char *buf)
2551{
2552        unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset);
2553
2554        return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
2555}
2556
2557static ssize_t sbi_ui_store(struct ext4_attr *a,
2558                            struct ext4_sb_info *sbi,
2559                            const char *buf, size_t count)
2560{
2561        unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset);
2562        unsigned long t;
2563        int ret;
2564
2565        ret = kstrtoul(skip_spaces(buf), 0, &t);
2566        if (ret)
2567                return ret;
2568        *ui = t;
2569        return count;
2570}
2571
2572static ssize_t es_ui_show(struct ext4_attr *a,
2573                           struct ext4_sb_info *sbi, char *buf)
2574{
2575
2576        unsigned int *ui = (unsigned int *) (((char *) sbi->s_es) +
2577                           a->u.offset);
2578
2579        return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
2580}
2581
2582static ssize_t reserved_clusters_show(struct ext4_attr *a,
2583                                  struct ext4_sb_info *sbi, char *buf)
2584{
2585        return snprintf(buf, PAGE_SIZE, "%llu\n",
2586                (unsigned long long) atomic64_read(&sbi->s_resv_clusters));
2587}
2588
2589static ssize_t reserved_clusters_store(struct ext4_attr *a,
2590                                   struct ext4_sb_info *sbi,
2591                                   const char *buf, size_t count)
2592{
2593        unsigned long long val;
2594        int ret;
2595
2596        if (parse_strtoull(buf, -1ULL, &val))
2597                return -EINVAL;
2598        ret = ext4_reserve_clusters(sbi, val);
2599
2600        return ret ? ret : count;
2601}
2602
2603static ssize_t trigger_test_error(struct ext4_attr *a,
2604                                  struct ext4_sb_info *sbi,
2605                                  const char *buf, size_t count)
2606{
2607        int len = count;
2608
2609        if (!capable(CAP_SYS_ADMIN))
2610                return -EPERM;
2611
2612        if (len && buf[len-1] == '\n')
2613                len--;
2614
2615        if (len)
2616                ext4_error(sbi->s_sb, "%.*s", len, buf);
2617        return count;
2618}
2619
2620static ssize_t sbi_deprecated_show(struct ext4_attr *a,
2621                                   struct ext4_sb_info *sbi, char *buf)
2622{
2623        return snprintf(buf, PAGE_SIZE, "%d\n", a->u.deprecated_val);
2624}
2625
2626#define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \
2627static struct ext4_attr ext4_attr_##_name = {                   \
2628        .attr = {.name = __stringify(_name), .mode = _mode },   \
2629        .show   = _show,                                        \
2630        .store  = _store,                                       \
2631        .u = {                                                  \
2632                .offset = offsetof(struct ext4_sb_info, _elname),\
2633        },                                                      \
2634}
2635
2636#define EXT4_ATTR_OFFSET_ES(_name,_mode,_show,_store,_elname)           \
2637static struct ext4_attr ext4_attr_##_name = {                           \
2638        .attr = {.name = __stringify(_name), .mode = _mode },           \
2639        .show   = _show,                                                \
2640        .store  = _store,                                               \
2641        .u = {                                                          \
2642                .offset = offsetof(struct ext4_super_block, _elname),   \
2643        },                                                              \
2644}
2645
2646#define EXT4_ATTR(name, mode, show, store) \
2647static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
2648
2649#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL)
2650#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
2651#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
2652
2653#define EXT4_RO_ATTR_ES_UI(name, elname)        \
2654        EXT4_ATTR_OFFSET_ES(name, 0444, es_ui_show, NULL, elname)
2655#define EXT4_RW_ATTR_SBI_UI(name, elname)       \
2656        EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
2657
2658#define ATTR_LIST(name) &ext4_attr_##name.attr
2659#define EXT4_DEPRECATED_ATTR(_name, _val)       \
2660static struct ext4_attr ext4_attr_##_name = {                   \
2661        .attr = {.name = __stringify(_name), .mode = 0444 },    \
2662        .show   = sbi_deprecated_show,                          \
2663        .u = {                                                  \
2664                .deprecated_val = _val,                         \
2665        },                                                      \
2666}
2667
2668EXT4_RO_ATTR(delayed_allocation_blocks);
2669EXT4_RO_ATTR(session_write_kbytes);
2670EXT4_RO_ATTR(lifetime_write_kbytes);
2671EXT4_RW_ATTR(reserved_clusters);
2672EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
2673                 inode_readahead_blks_store, s_inode_readahead_blks);
2674EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
2675EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
2676EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
2677EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
2678EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
2679EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
2680EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
2681EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128);
2682EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
2683EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error);
2684EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval);
2685EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst);
2686EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval);
2687EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
2688EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
2689EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
2690EXT4_RO_ATTR_ES_UI(errors_count, s_error_count);
2691EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time);
2692EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time);
2693
2694static struct attribute *ext4_attrs[] = {
2695        ATTR_LIST(delayed_allocation_blocks),
2696        ATTR_LIST(session_write_kbytes),
2697        ATTR_LIST(lifetime_write_kbytes),
2698        ATTR_LIST(reserved_clusters),
2699        ATTR_LIST(inode_readahead_blks),
2700        ATTR_LIST(inode_goal),
2701        ATTR_LIST(mb_stats),
2702        ATTR_LIST(mb_max_to_scan),
2703        ATTR_LIST(mb_min_to_scan),
2704        ATTR_LIST(mb_order2_req),
2705        ATTR_LIST(mb_stream_req),
2706        ATTR_LIST(mb_group_prealloc),
2707        ATTR_LIST(max_writeback_mb_bump),
2708        ATTR_LIST(extent_max_zeroout_kb),
2709        ATTR_LIST(trigger_fs_error),
2710        ATTR_LIST(err_ratelimit_interval_ms),
2711        ATTR_LIST(err_ratelimit_burst),
2712        ATTR_LIST(warning_ratelimit_interval_ms),
2713        ATTR_LIST(warning_ratelimit_burst),
2714        ATTR_LIST(msg_ratelimit_interval_ms),
2715        ATTR_LIST(msg_ratelimit_burst),
2716        ATTR_LIST(errors_count),
2717        ATTR_LIST(first_error_time),
2718        ATTR_LIST(last_error_time),
2719        NULL,
2720};
2721
2722/* Features this copy of ext4 supports */
2723EXT4_INFO_ATTR(lazy_itable_init);
2724EXT4_INFO_ATTR(batched_discard);
2725EXT4_INFO_ATTR(meta_bg_resize);
2726EXT4_INFO_ATTR(encryption);
2727
2728static struct attribute *ext4_feat_attrs[] = {
2729        ATTR_LIST(lazy_itable_init),
2730        ATTR_LIST(batched_discard),
2731        ATTR_LIST(meta_bg_resize),
2732        ATTR_LIST(encryption),
2733        NULL,
2734};
2735
2736static ssize_t ext4_attr_show(struct kobject *kobj,
2737                              struct attribute *attr, char *buf)
2738{
2739        struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2740                                                s_kobj);
2741        struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2742
2743        return a->show ? a->show(a, sbi, buf) : 0;
2744}
2745
2746static ssize_t ext4_attr_store(struct kobject *kobj,
2747                               struct attribute *attr,
2748                               const char *buf, size_t len)
2749{
2750        struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2751                                                s_kobj);
2752        struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2753
2754        return a->store ? a->store(a, sbi, buf, len) : 0;
2755}
2756
2757static void ext4_sb_release(struct kobject *kobj)
2758{
2759        struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2760                                                s_kobj);
2761        complete(&sbi->s_kobj_unregister);
2762}
2763
2764static const struct sysfs_ops ext4_attr_ops = {
2765        .show   = ext4_attr_show,
2766        .store  = ext4_attr_store,
2767};
2768
2769static struct kobj_type ext4_ktype = {
2770        .default_attrs  = ext4_attrs,
2771        .sysfs_ops      = &ext4_attr_ops,
2772        .release        = ext4_sb_release,
2773};
2774
2775static void ext4_feat_release(struct kobject *kobj)
2776{
2777        complete(&ext4_feat->f_kobj_unregister);
2778}
2779
2780static ssize_t ext4_feat_show(struct kobject *kobj,
2781                              struct attribute *attr, char *buf)
2782{
2783        return snprintf(buf, PAGE_SIZE, "supported\n");
2784}
2785
2786/*
2787 * We can not use ext4_attr_show/store because it relies on the kobject
2788 * being embedded in the ext4_sb_info structure which is definitely not
2789 * true in this case.
2790 */
2791static const struct sysfs_ops ext4_feat_ops = {
2792        .show   = ext4_feat_show,
2793        .store  = NULL,
2794};
2795
2796static struct kobj_type ext4_feat_ktype = {
2797        .default_attrs  = ext4_feat_attrs,
2798        .sysfs_ops      = &ext4_feat_ops,
2799        .release        = ext4_feat_release,
2800};
2801
2802/*
2803 * Check whether this filesystem can be mounted based on
2804 * the features present and the RDONLY/RDWR mount requested.
2805 * Returns 1 if this filesystem can be mounted as requested,
2806 * 0 if it cannot be.
2807 */
2808static int ext4_feature_set_ok(struct super_block *sb, int readonly)
2809{
2810        if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) {
2811                ext4_msg(sb, KERN_ERR,
2812                        "Couldn't mount because of "
2813                        "unsupported optional features (%x)",
2814                        (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
2815                        ~EXT4_FEATURE_INCOMPAT_SUPP));
2816                return 0;
2817        }
2818
2819        if (readonly)
2820                return 1;
2821
2822        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_READONLY)) {
2823                ext4_msg(sb, KERN_INFO, "filesystem is read-only");
2824                sb->s_flags |= MS_RDONLY;
2825                return 1;
2826        }
2827
2828        /* Check that feature set is OK for a read-write mount */
2829        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) {
2830                ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
2831                         "unsupported optional features (%x)",
2832                         (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
2833                                ~EXT4_FEATURE_RO_COMPAT_SUPP));
2834                return 0;
2835        }
2836        /*
2837         * Large file size enabled file system can only be mounted
2838         * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF
2839         */
2840        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
2841                if (sizeof(blkcnt_t) < sizeof(u64)) {
2842                        ext4_msg(sb, KERN_ERR, "Filesystem with huge files "
2843                                 "cannot be mounted RDWR without "
2844                                 "CONFIG_LBDAF");
2845                        return 0;
2846                }
2847        }
2848        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) &&
2849            !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
2850                ext4_msg(sb, KERN_ERR,
2851                         "Can't support bigalloc feature without "
2852                         "extents feature\n");
2853                return 0;
2854        }
2855
2856#ifndef CONFIG_QUOTA
2857        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
2858            !readonly) {
2859                ext4_msg(sb, KERN_ERR,
2860                         "Filesystem with quota feature cannot be mounted RDWR "
2861                         "without CONFIG_QUOTA");
2862                return 0;
2863        }
2864#endif  /* CONFIG_QUOTA */
2865        return 1;
2866}
2867
2868/*
2869 * This function is called once a day if we have errors logged
2870 * on the file system
2871 */
2872static void print_daily_error_info(unsigned long arg)
2873{
2874        struct super_block *sb = (struct super_block *) arg;
2875        struct ext4_sb_info *sbi;
2876        struct ext4_super_block *es;
2877
2878        sbi = EXT4_SB(sb);
2879        es = sbi->s_es;
2880
2881        if (es->s_error_count)
2882                /* fsck newer than v1.41.13 is needed to clean this condition. */
2883                ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
2884                         le32_to_cpu(es->s_error_count));
2885        if (es->s_first_error_time) {
2886                printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d",
2887                       sb->s_id, le32_to_cpu(es->s_first_error_time),
2888                       (int) sizeof(es->s_first_error_func),
2889                       es->s_first_error_func,
2890                       le32_to_cpu(es->s_first_error_line));
2891                if (es->s_first_error_ino)
2892                        printk(": inode %u",
2893                               le32_to_cpu(es->s_first_error_ino));
2894                if (es->s_first_error_block)
2895                        printk(": block %llu", (unsigned long long)
2896                               le64_to_cpu(es->s_first_error_block));
2897                printk("\n");
2898        }
2899        if (es->s_last_error_time) {
2900                printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d",
2901                       sb->s_id, le32_to_cpu(es->s_last_error_time),
2902                       (int) sizeof(es->s_last_error_func),
2903                       es->s_last_error_func,
2904                       le32_to_cpu(es->s_last_error_line));
2905                if (es->s_last_error_ino)
2906                        printk(": inode %u",
2907                               le32_to_cpu(es->s_last_error_ino));
2908                if (es->s_last_error_block)
2909                        printk(": block %llu", (unsigned long long)
2910                               le64_to_cpu(es->s_last_error_block));
2911                printk("\n");
2912        }
2913        mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
2914}
2915
2916/* Find next suitable group and run ext4_init_inode_table */
2917static int ext4_run_li_request(struct ext4_li_request *elr)
2918{
2919        struct ext4_group_desc *gdp = NULL;
2920        ext4_group_t group, ngroups;
2921        struct super_block *sb;
2922        unsigned long timeout = 0;
2923        int ret = 0;
2924
2925        sb = elr->lr_super;
2926        ngroups = EXT4_SB(sb)->s_groups_count;
2927
2928        sb_start_write(sb);
2929        for (group = elr->lr_next_group; group < ngroups; group++) {
2930                gdp = ext4_get_group_desc(sb, group, NULL);
2931                if (!gdp) {
2932                        ret = 1;
2933                        break;
2934                }
2935
2936                if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2937                        break;
2938        }
2939
2940        if (group >= ngroups)
2941                ret = 1;
2942
2943        if (!ret) {
2944                timeout = jiffies;
2945                ret = ext4_init_inode_table(sb, group,
2946                                            elr->lr_timeout ? 0 : 1);
2947                if (elr->lr_timeout == 0) {
2948                        timeout = (jiffies - timeout) *
2949                                  elr->lr_sbi->s_li_wait_mult;
2950                        elr->lr_timeout = timeout;
2951                }
2952                elr->lr_next_sched = jiffies + elr->lr_timeout;
2953                elr->lr_next_group = group + 1;
2954        }
2955        sb_end_write(sb);
2956
2957        return ret;
2958}
2959
2960/*
2961 * Remove lr_request from the list_request and free the
2962 * request structure. Should be called with li_list_mtx held
2963 */
2964static void ext4_remove_li_request(struct ext4_li_request *elr)
2965{
2966        struct ext4_sb_info *sbi;
2967
2968        if (!elr)
2969                return;
2970
2971        sbi = elr->lr_sbi;
2972
2973        list_del(&elr->lr_request);
2974        sbi->s_li_request = NULL;
2975        kfree(elr);
2976}
2977
2978static void ext4_unregister_li_request(struct super_block *sb)
2979{
2980        mutex_lock(&ext4_li_mtx);
2981        if (!ext4_li_info) {
2982                mutex_unlock(&ext4_li_mtx);
2983                return;
2984        }
2985
2986        mutex_lock(&ext4_li_info->li_list_mtx);
2987        ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
2988        mutex_unlock(&ext4_li_info->li_list_mtx);
2989        mutex_unlock(&ext4_li_mtx);
2990}
2991
2992static struct task_struct *ext4_lazyinit_task;
2993
2994/*
2995 * This is the function where ext4lazyinit thread lives. It walks
2996 * through the request list searching for next scheduled filesystem.
2997 * When such a fs is found, run the lazy initialization request
2998 * (ext4_rn_li_request) and keep track of the time spend in this
2999 * function. Based on that time we compute next schedule time of
3000 * the request. When walking through the list is complete, compute
3001 * next waking time and put itself into sleep.
3002 */
3003static int ext4_lazyinit_thread(void *arg)
3004{
3005        struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
3006        struct list_head *pos, *n;
3007        struct ext4_li_request *elr;
3008        unsigned long next_wakeup, cur;
3009
3010        BUG_ON(NULL == eli);
3011
3012cont_thread:
3013        while (true) {
3014                next_wakeup = MAX_JIFFY_OFFSET;
3015
3016                mutex_lock(&eli->li_list_mtx);
3017                if (list_empty(&eli->li_request_list)) {
3018                        mutex_unlock(&eli->li_list_mtx);
3019                        goto exit_thread;
3020                }
3021
3022                list_for_each_safe(pos, n, &eli->li_request_list) {
3023                        elr = list_entry(pos, struct ext4_li_request,
3024                                         lr_request);
3025
3026                        if (time_after_eq(jiffies, elr->lr_next_sched)) {
3027                                if (ext4_run_li_request(elr) != 0) {
3028                                        /* error, remove the lazy_init job */
3029                                        ext4_remove_li_request(elr);
3030                                        continue;
3031                                }
3032                        }
3033
3034                        if (time_before(elr->lr_next_sched, next_wakeup))
3035                                next_wakeup = elr->lr_next_sched;
3036                }
3037                mutex_unlock(&eli->li_list_mtx);
3038
3039                try_to_freeze();
3040
3041                cur = jiffies;
3042                if ((time_after_eq(cur, next_wakeup)) ||
3043                    (MAX_JIFFY_OFFSET == next_wakeup)) {
3044                        cond_resched();
3045                        continue;
3046                }
3047
3048                schedule_timeout_interruptible(next_wakeup - cur);
3049
3050                if (kthread_should_stop()) {
3051                        ext4_clear_request_list();
3052                        goto exit_thread;
3053                }
3054        }
3055
3056exit_thread:
3057        /*
3058         * It looks like the request list is empty, but we need
3059         * to check it under the li_list_mtx lock, to prevent any
3060         * additions into it, and of course we should lock ext4_li_mtx
3061         * to atomically free the list and ext4_li_info, because at
3062         * this point another ext4 filesystem could be registering
3063         * new one.
3064         */
3065        mutex_lock(&ext4_li_mtx);
3066        mutex_lock(&eli->li_list_mtx);
3067        if (!list_empty(&eli->li_request_list)) {
3068                mutex_unlock(&eli->li_list_mtx);
3069                mutex_unlock(&ext4_li_mtx);
3070                goto cont_thread;
3071        }
3072        mutex_unlock(&eli->li_list_mtx);
3073        kfree(ext4_li_info);
3074        ext4_li_info = NULL;
3075        mutex_unlock(&ext4_li_mtx);
3076
3077        return 0;
3078}
3079
3080static void ext4_clear_request_list(void)
3081{
3082        struct list_head *pos, *n;
3083        struct ext4_li_request *elr;
3084
3085        mutex_lock(&ext4_li_info->li_list_mtx);
3086        list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
3087                elr = list_entry(pos, struct ext4_li_request,
3088                                 lr_request);
3089                ext4_remove_li_request(elr);
3090        }
3091        mutex_unlock(&ext4_li_info->li_list_mtx);
3092}
3093
3094static int ext4_run_lazyinit_thread(void)
3095{
3096        ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
3097                                         ext4_li_info, "ext4lazyinit");
3098        if (IS_ERR(ext4_lazyinit_task)) {
3099                int err = PTR_ERR(ext4_lazyinit_task);
3100                ext4_clear_request_list();
3101                kfree(ext4_li_info);
3102                ext4_li_info = NULL;
3103                printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
3104                                 "initialization thread\n",
3105                                 err);
3106                return err;
3107        }
3108        ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
3109        return 0;
3110}
3111
3112/*
3113 * Check whether it make sense to run itable init. thread or not.
3114 * If there is at least one uninitialized inode table, return
3115 * corresponding group number, else the loop goes through all
3116 * groups and return total number of groups.
3117 */
3118static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
3119{
3120        ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
3121        struct ext4_group_desc *gdp = NULL;
3122
3123        for (group = 0; group < ngroups; group++) {
3124                gdp = ext4_get_group_desc(sb, group, NULL);
3125                if (!gdp)
3126                        continue;
3127
3128                if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3129                        break;
3130        }
3131
3132        return group;
3133}
3134
3135static int ext4_li_info_new(void)
3136{
3137        struct ext4_lazy_init *eli = NULL;
3138
3139        eli = kzalloc(sizeof(*eli), GFP_KERNEL);
3140        if (!eli)
3141                return -ENOMEM;
3142
3143        INIT_LIST_HEAD(&eli->li_request_list);
3144        mutex_init(&eli->li_list_mtx);
3145
3146        eli->li_state |= EXT4_LAZYINIT_QUIT;
3147
3148        ext4_li_info = eli;
3149
3150        return 0;
3151}
3152
3153static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
3154                                            ext4_group_t start)
3155{
3156        struct ext4_sb_info *sbi = EXT4_SB(sb);
3157        struct ext4_li_request *elr;
3158
3159        elr = kzalloc(sizeof(*elr), GFP_KERNEL);
3160        if (!elr)
3161                return NULL;
3162
3163        elr->lr_super = sb;
3164        elr->lr_sbi = sbi;
3165        elr->lr_next_group = start;
3166
3167        /*
3168         * Randomize first schedule time of the request to
3169         * spread the inode table initialization requests
3170         * better.
3171         */
3172        elr->lr_next_sched = jiffies + (prandom_u32() %
3173                                (EXT4_DEF_LI_MAX_START_DELAY * HZ));
3174        return elr;
3175}
3176
3177int ext4_register_li_request(struct super_block *sb,
3178                             ext4_group_t first_not_zeroed)
3179{
3180        struct ext4_sb_info *sbi = EXT4_SB(sb);
3181        struct ext4_li_request *elr = NULL;
3182        ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
3183        int ret = 0;
3184
3185        mutex_lock(&ext4_li_mtx);
3186        if (sbi->s_li_request != NULL) {
3187                /*
3188                 * Reset timeout so it can be computed again, because
3189                 * s_li_wait_mult might have changed.
3190                 */
3191                sbi->s_li_request->lr_timeout = 0;
3192                goto out;
3193        }
3194
3195        if (first_not_zeroed == ngroups ||
3196            (sb->s_flags & MS_RDONLY) ||
3197            !test_opt(sb, INIT_INODE_TABLE))
3198                goto out;
3199
3200        elr = ext4_li_request_new(sb, first_not_zeroed);
3201        if (!elr) {
3202                ret = -ENOMEM;
3203                goto out;
3204        }
3205
3206        if (NULL == ext4_li_info) {
3207                ret = ext4_li_info_new();
3208                if (ret)
3209                        goto out;
3210        }
3211
3212        mutex_lock(&ext4_li_info->li_list_mtx);
3213        list_add(&elr->lr_request, &ext4_li_info->li_request_list);
3214        mutex_unlock(&ext4_li_info->li_list_mtx);
3215
3216        sbi->s_li_request = elr;
3217        /*
3218         * set elr to NULL here since it has been inserted to
3219         * the request_list and the removal and free of it is
3220         * handled by ext4_clear_request_list from now on.
3221         */
3222        elr = NULL;
3223
3224        if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
3225                ret = ext4_run_lazyinit_thread();
3226                if (ret)
3227                        goto out;
3228        }
3229out:
3230        mutex_unlock(&ext4_li_mtx);
3231        if (ret)
3232                kfree(elr);
3233        return ret;
3234}
3235
3236/*
3237 * We do not need to lock anything since this is called on
3238 * module unload.
3239 */
3240static void ext4_destroy_lazyinit_thread(void)
3241{
3242        /*
3243         * If thread exited earlier
3244         * there's nothing to be done.
3245         */
3246        if (!ext4_li_info || !ext4_lazyinit_task)
3247                return;
3248
3249        kthread_stop(ext4_lazyinit_task);
3250}
3251
3252static int set_journal_csum_feature_set(struct super_block *sb)
3253{
3254        int ret = 1;
3255        int compat, incompat;
3256        struct ext4_sb_info *sbi = EXT4_SB(sb);
3257
3258        if (ext4_has_metadata_csum(sb)) {
3259                /* journal checksum v3 */
3260                compat = 0;
3261                incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
3262        } else {
3263                /* journal checksum v1 */
3264                compat = JBD2_FEATURE_COMPAT_CHECKSUM;
3265                incompat = 0;
3266        }
3267
3268        jbd2_journal_clear_features(sbi->s_journal,
3269                        JBD2_FEATURE_COMPAT_CHECKSUM, 0,
3270                        JBD2_FEATURE_INCOMPAT_CSUM_V3 |
3271                        JBD2_FEATURE_INCOMPAT_CSUM_V2);
3272        if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
3273                ret = jbd2_journal_set_features(sbi->s_journal,
3274                                compat, 0,
3275                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
3276                                incompat);
3277        } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
3278                ret = jbd2_journal_set_features(sbi->s_journal,
3279                                compat, 0,
3280                                incompat);
3281                jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3282                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3283        } else {
3284                jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3285                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3286        }
3287
3288        return ret;
3289}
3290
3291/*
3292 * Note: calculating the overhead so we can be compatible with
3293 * historical BSD practice is quite difficult in the face of
3294 * clusters/bigalloc.  This is because multiple metadata blocks from
3295 * different block group can end up in the same allocation cluster.
3296 * Calculating the exact overhead in the face of clustered allocation
3297 * requires either O(all block bitmaps) in memory or O(number of block
3298 * groups**2) in time.  We will still calculate the superblock for
3299 * older file systems --- and if we come across with a bigalloc file
3300 * system with zero in s_overhead_clusters the estimate will be close to
3301 * correct especially for very large cluster sizes --- but for newer
3302 * file systems, it's better to calculate this figure once at mkfs
3303 * time, and store it in the superblock.  If the superblock value is
3304 * present (even for non-bigalloc file systems), we will use it.
3305 */
3306static int count_overhead(struct super_block *sb, ext4_group_t grp,
3307                          char *buf)
3308{
3309        struct ext4_sb_info     *sbi = EXT4_SB(sb);
3310        struct ext4_group_desc  *gdp;
3311        ext4_fsblk_t            first_block, last_block, b;
3312        ext4_group_t            i, ngroups = ext4_get_groups_count(sb);
3313        int                     s, j, count = 0;
3314
3315        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC))
3316                return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
3317                        sbi->s_itb_per_group + 2);
3318
3319        first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
3320                (grp * EXT4_BLOCKS_PER_GROUP(sb));
3321        last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
3322        for (i = 0; i < ngroups; i++) {
3323                gdp = ext4_get_group_desc(sb, i, NULL);
3324                b = ext4_block_bitmap(sb, gdp);
3325                if (b >= first_block && b <= last_block) {
3326                        ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3327                        count++;
3328                }
3329                b = ext4_inode_bitmap(sb, gdp);
3330                if (b >= first_block && b <= last_block) {
3331                        ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3332                        count++;
3333                }
3334                b = ext4_inode_table(sb, gdp);
3335                if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
3336                        for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
3337                                int c = EXT4_B2C(sbi, b - first_block);
3338                                ext4_set_bit(c, buf);
3339                                count++;
3340                        }
3341                if (i != grp)
3342                        continue;
3343                s = 0;
3344                if (ext4_bg_has_super(sb, grp)) {
3345                        ext4_set_bit(s++, buf);
3346                        count++;
3347                }
3348                for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) {
3349                        ext4_set_bit(EXT4_B2C(sbi, s++), buf);
3350                        count++;
3351                }
3352        }
3353        if (!count)
3354                return 0;
3355        return EXT4_CLUSTERS_PER_GROUP(sb) -
3356                ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
3357}
3358
3359/*
3360 * Compute the overhead and stash it in sbi->s_overhead
3361 */
3362int ext4_calculate_overhead(struct super_block *sb)
3363{
3364        struct ext4_sb_info *sbi = EXT4_SB(sb);
3365        struct ext4_super_block *es = sbi->s_es;
3366        ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3367        ext4_fsblk_t overhead = 0;
3368        char *buf = (char *) get_zeroed_page(GFP_NOFS);
3369
3370        if (!buf)
3371                return -ENOMEM;
3372
3373        /*
3374         * Compute the overhead (FS structures).  This is constant
3375         * for a given filesystem unless the number of block groups
3376         * changes so we cache the previous value until it does.
3377         */
3378
3379        /*
3380         * All of the blocks before first_data_block are overhead
3381         */
3382        overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
3383
3384        /*
3385         * Add the overhead found in each block group
3386         */
3387        for (i = 0; i < ngroups; i++) {
3388                int blks;
3389
3390                blks = count_overhead(sb, i, buf);
3391                overhead += blks;
3392                if (blks)
3393                        memset(buf, 0, PAGE_SIZE);
3394                cond_resched();
3395        }
3396        /* Add the internal journal blocks as well */
3397        if (sbi->s_journal && !sbi->journal_bdev)
3398                overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
3399
3400        sbi->s_overhead = overhead;
3401        smp_wmb();
3402        free_page((unsigned long) buf);
3403        return 0;
3404}
3405
3406
3407static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb)
3408{
3409        ext4_fsblk_t resv_clusters;
3410
3411        /*
3412         * There's no need to reserve anything when we aren't using extents.
3413         * The space estimates are exact, there are no unwritten extents,
3414         * hole punching doesn't need new metadata... This is needed especially
3415         * to keep ext2/3 backward compatibility.
3416         */
3417        if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
3418                return 0;
3419        /*
3420         * By default we reserve 2% or 4096 clusters, whichever is smaller.
3421         * This should cover the situations where we can not afford to run
3422         * out of space like for example punch hole, or converting
3423         * unwritten extents in delalloc path. In most cases such
3424         * allocation would require 1, or 2 blocks, higher numbers are
3425         * very rare.
3426         */
3427        resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >>
3428                        EXT4_SB(sb)->s_cluster_bits;
3429
3430        do_div(resv_clusters, 50);
3431        resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
3432
3433        return resv_clusters;
3434}
3435
3436
3437static int ext4_reserve_clusters(struct ext4_sb_info *sbi, ext4_fsblk_t count)
3438{
3439        ext4_fsblk_t clusters = ext4_blocks_count(sbi->s_es) >>
3440                                sbi->s_cluster_bits;
3441
3442        if (count >= clusters)
3443                return -EINVAL;
3444
3445        atomic64_set(&sbi->s_resv_clusters, count);
3446        return 0;
3447}
3448
3449static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3450{
3451        char *orig_data = kstrdup(data, GFP_KERNEL);
3452        struct buffer_head *bh;
3453        struct ext4_super_block *es = NULL;
3454        struct ext4_sb_info *sbi;
3455        ext4_fsblk_t block;
3456        ext4_fsblk_t sb_block = get_sb_block(&data);
3457        ext4_fsblk_t logical_sb_block;
3458        unsigned long offset = 0;
3459        unsigned long journal_devnum = 0;
3460        unsigned long def_mount_opts;
3461        struct inode *root;
3462        const char *descr;
3463        int ret = -ENOMEM;
3464        int blocksize, clustersize;
3465        unsigned int db_count;
3466        unsigned int i;
3467        int needs_recovery, has_huge_files, has_bigalloc;
3468        __u64 blocks_count;
3469        int err = 0;
3470        unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
3471        ext4_group_t first_not_zeroed;
3472
3473        sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
3474        if (!sbi)
3475                goto out_free_orig;
3476
3477        sbi->s_blockgroup_lock =
3478                kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
3479        if (!sbi->s_blockgroup_lock) {
3480                kfree(sbi);
3481                goto out_free_orig;
3482        }
3483        sb->s_fs_info = sbi;
3484        sbi->s_sb = sb;
3485        sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
3486        sbi->s_sb_block = sb_block;
3487        if (sb->s_bdev->bd_part)
3488                sbi->s_sectors_written_start =
3489                        part_stat_read(sb->s_bdev->bd_part, sectors[1]);
3490
3491        /* Cleanup superblock name */
3492        strreplace(sb->s_id, '/', '!');
3493
3494        /* -EINVAL is default */
3495        ret = -EINVAL;
3496        blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
3497        if (!blocksize) {
3498                ext4_msg(sb, KERN_ERR, "unable to set blocksize");
3499                goto out_fail;
3500        }
3501
3502        /*
3503         * The ext4 superblock will not be buffer aligned for other than 1kB
3504         * block sizes.  We need to calculate the offset from buffer start.
3505         */
3506        if (blocksize != EXT4_MIN_BLOCK_SIZE) {
3507                logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
3508                offset = do_div(logical_sb_block, blocksize);
3509        } else {
3510                logical_sb_block = sb_block;
3511        }
3512
3513        if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) {
3514                ext4_msg(sb, KERN_ERR, "unable to read superblock");
3515                goto out_fail;
3516        }
3517        /*
3518         * Note: s_es must be initialized as soon as possible because
3519         *       some ext4 macro-instructions depend on its value
3520         */
3521        es = (struct ext4_super_block *) (bh->b_data + offset);
3522        sbi->s_es = es;
3523        sb->s_magic = le16_to_cpu(es->s_magic);
3524        if (sb->s_magic != EXT4_SUPER_MAGIC)
3525                goto cantfind_ext4;
3526        sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
3527
3528        /* Warn if metadata_csum and gdt_csum are both set. */
3529        if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3530                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
3531            EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
3532                ext4_warning(sb, "metadata_csum and uninit_bg are "
3533                             "redundant flags; please run fsck.");
3534
3535        /* Check for a known checksum algorithm */
3536        if (!ext4_verify_csum_type(sb, es)) {
3537                ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3538                         "unknown checksum algorithm.");
3539                silent = 1;
3540                goto cantfind_ext4;
3541        }
3542
3543        /* Load the checksum driver */
3544        if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3545                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
3546                sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
3547                if (IS_ERR(sbi->s_chksum_driver)) {
3548                        ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
3549                        ret = PTR_ERR(sbi->s_chksum_driver);
3550                        sbi->s_chksum_driver = NULL;
3551                        goto failed_mount;
3552                }
3553        }
3554
3555        /* Check superblock checksum */
3556        if (!ext4_superblock_csum_verify(sb, es)) {
3557                ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3558                         "invalid superblock checksum.  Run e2fsck?");
3559                silent = 1;
3560                goto cantfind_ext4;
3561        }
3562
3563        /* Precompute checksum seed for all metadata */
3564        if (ext4_has_metadata_csum(sb))
3565                sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
3566                                               sizeof(es->s_uuid));
3567
3568        /* Set defaults before we parse the mount options */
3569        def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
3570        set_opt(sb, INIT_INODE_TABLE);
3571        if (def_mount_opts & EXT4_DEFM_DEBUG)
3572                set_opt(sb, DEBUG);
3573        if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
3574                set_opt(sb, GRPID);
3575        if (def_mount_opts & EXT4_DEFM_UID16)
3576                set_opt(sb, NO_UID32);
3577        /* xattr user namespace & acls are now defaulted on */
3578        set_opt(sb, XATTR_USER);
3579#ifdef CONFIG_EXT4_FS_POSIX_ACL
3580        set_opt(sb, POSIX_ACL);
3581#endif
3582        /* don't forget to enable journal_csum when metadata_csum is enabled. */
3583        if (ext4_has_metadata_csum(sb))
3584                set_opt(sb, JOURNAL_CHECKSUM);
3585
3586        if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
3587                set_opt(sb, JOURNAL_DATA);
3588        else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
3589                set_opt(sb, ORDERED_DATA);
3590        else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
3591                set_opt(sb, WRITEBACK_DATA);
3592
3593        if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
3594                set_opt(sb, ERRORS_PANIC);
3595        else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
3596                set_opt(sb, ERRORS_CONT);
3597        else
3598                set_opt(sb, ERRORS_RO);
3599        /* block_validity enabled by default; disable with noblock_validity */
3600        set_opt(sb, BLOCK_VALIDITY);
3601        if (def_mount_opts & EXT4_DEFM_DISCARD)
3602                set_opt(sb, DISCARD);
3603
3604        sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
3605        sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
3606        sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
3607        sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
3608        sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
3609
3610        if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
3611                set_opt(sb, BARRIER);
3612
3613        /*
3614         * enable delayed allocation by default
3615         * Use -o nodelalloc to turn it off
3616         */
3617        if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
3618            ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
3619                set_opt(sb, DELALLOC);
3620
3621        /*
3622         * set default s_li_wait_mult for lazyinit, for the case there is
3623         * no mount option specified.
3624         */
3625        sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
3626
3627        if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
3628                           &journal_devnum, &journal_ioprio, 0)) {
3629                ext4_msg(sb, KERN_WARNING,
3630                         "failed to parse options in superblock: %s",
3631                         sbi->s_es->s_mount_opts);
3632        }
3633        sbi->s_def_mount_opt = sbi->s_mount_opt;
3634        if (!parse_options((char *) data, sb, &journal_devnum,
3635                           &journal_ioprio, 0))
3636                goto failed_mount;
3637
3638        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
3639                printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
3640                            "with data=journal disables delayed "
3641                            "allocation and O_DIRECT support!\n");
3642                if (test_opt2(sb, EXPLICIT_DELALLOC)) {
3643                        ext4_msg(sb, KERN_ERR, "can't mount with "
3644                                 "both data=journal and delalloc");
3645                        goto failed_mount;
3646                }
3647                if (test_opt(sb, DIOREAD_NOLOCK)) {
3648                        ext4_msg(sb, KERN_ERR, "can't mount with "
3649                                 "both data=journal and dioread_nolock");
3650                        goto failed_mount;
3651                }
3652                if (test_opt(sb, DAX)) {
3653                        ext4_msg(sb, KERN_ERR, "can't mount with "
3654                                 "both data=journal and dax");
3655                        goto failed_mount;
3656                }
3657                if (test_opt(sb, DELALLOC))
3658                        clear_opt(sb, DELALLOC);
3659        } else {
3660                sb->s_iflags |= SB_I_CGROUPWB;
3661        }
3662
3663        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3664                (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
3665
3666        if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
3667            (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
3668             EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
3669             EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
3670                ext4_msg(sb, KERN_WARNING,
3671                       "feature flags set on rev 0 fs, "
3672                       "running e2fsck is recommended");
3673
3674        if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
3675                set_opt2(sb, HURD_COMPAT);
3676                if (EXT4_HAS_INCOMPAT_FEATURE(sb,
3677                                              EXT4_FEATURE_INCOMPAT_64BIT)) {
3678                        ext4_msg(sb, KERN_ERR,
3679                                 "The Hurd can't support 64-bit file systems");
3680                        goto failed_mount;
3681                }
3682        }
3683
3684        if (IS_EXT2_SB(sb)) {
3685                if (ext2_feature_set_ok(sb))
3686                        ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
3687                                 "using the ext4 subsystem");
3688                else {
3689                        ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
3690                                 "to feature incompatibilities");
3691                        goto failed_mount;
3692                }
3693        }
3694
3695        if (IS_EXT3_SB(sb)) {
3696                if (ext3_feature_set_ok(sb))
3697                        ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
3698                                 "using the ext4 subsystem");
3699                else {
3700                        ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
3701                                 "to feature incompatibilities");
3702                        goto failed_mount;
3703                }
3704        }
3705
3706        /*
3707         * Check feature flags regardless of the revision level, since we
3708         * previously didn't change the revision level when setting the flags,
3709         * so there is a chance incompat flags are set on a rev 0 filesystem.
3710         */
3711        if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
3712                goto failed_mount;
3713
3714        blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
3715        if (blocksize < EXT4_MIN_BLOCK_SIZE ||
3716            blocksize > EXT4_MAX_BLOCK_SIZE) {
3717                ext4_msg(sb, KERN_ERR,
3718                       "Unsupported filesystem blocksize %d", blocksize);
3719                goto failed_mount;
3720        }
3721
3722        if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
3723                if (blocksize != PAGE_SIZE) {
3724                        ext4_msg(sb, KERN_ERR,
3725                                        "error: unsupported blocksize for dax");
3726                        goto failed_mount;
3727                }
3728                if (!sb->s_bdev->bd_disk->fops->direct_access) {
3729                        ext4_msg(sb, KERN_ERR,
3730                                        "error: device does not support dax");
3731                        goto failed_mount;
3732                }
3733        }
3734
3735        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT) &&
3736            es->s_encryption_level) {
3737                ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
3738                         es->s_encryption_level);
3739                goto failed_mount;
3740        }
3741
3742        if (sb->s_blocksize != blocksize) {
3743                /* Validate the filesystem blocksize */
3744                if (!sb_set_blocksize(sb, blocksize)) {
3745                        ext4_msg(sb, KERN_ERR, "bad block size %d",
3746                                        blocksize);
3747                        goto failed_mount;
3748                }
3749
3750                brelse(bh);
3751                logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
3752                offset = do_div(logical_sb_block, blocksize);
3753                bh = sb_bread_unmovable(sb, logical_sb_block);
3754                if (!bh) {
3755                        ext4_msg(sb, KERN_ERR,
3756                               "Can't read superblock on 2nd try");
3757                        goto failed_mount;
3758                }
3759                es = (struct ext4_super_block *)(bh->b_data + offset);
3760                sbi->s_es = es;
3761                if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
3762                        ext4_msg(sb, KERN_ERR,
3763                               "Magic mismatch, very weird!");
3764                        goto failed_mount;
3765                }
3766        }
3767
3768        has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3769                                EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
3770        sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
3771                                                      has_huge_files);
3772        sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
3773
3774        if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
3775                sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
3776                sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
3777        } else {
3778                sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
3779                sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
3780                if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
3781                    (!is_power_of_2(sbi->s_inode_size)) ||
3782                    (sbi->s_inode_size > blocksize)) {
3783                        ext4_msg(sb, KERN_ERR,
3784                               "unsupported inode size: %d",
3785                               sbi->s_inode_size);
3786                        goto failed_mount;
3787                }
3788                if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
3789                        sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
3790        }
3791
3792        sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
3793        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
3794                if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
3795                    sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
3796                    !is_power_of_2(sbi->s_desc_size)) {
3797                        ext4_msg(sb, KERN_ERR,
3798                               "unsupported descriptor size %lu",
3799                               sbi->s_desc_size);
3800                        goto failed_mount;
3801                }
3802        } else
3803                sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
3804
3805        sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
3806        sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
3807        if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
3808                goto cantfind_ext4;
3809
3810        sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
3811        if (sbi->s_inodes_per_block == 0)
3812                goto cantfind_ext4;
3813        sbi->s_itb_per_group = sbi->s_inodes_per_group /
3814                                        sbi->s_inodes_per_block;
3815        sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
3816        sbi->s_sbh = bh;
3817        sbi->s_mount_state = le16_to_cpu(es->s_state);
3818        sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
3819        sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
3820
3821        for (i = 0; i < 4; i++)
3822                sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
3823        sbi->s_def_hash_version = es->s_def_hash_version;
3824        if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) {
3825                i = le32_to_cpu(es->s_flags);
3826                if (i & EXT2_FLAGS_UNSIGNED_HASH)
3827                        sbi->s_hash_unsigned = 3;
3828                else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
3829#ifdef __CHAR_UNSIGNED__
3830                        if (!(sb->s_flags & MS_RDONLY))
3831                                es->s_flags |=
3832                                        cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
3833                        sbi->s_hash_unsigned = 3;
3834#else
3835                        if (!(sb->s_flags & MS_RDONLY))
3836                                es->s_flags |=
3837                                        cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
3838#endif
3839                }
3840        }
3841
3842        /* Handle clustersize */
3843        clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
3844        has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3845                                EXT4_FEATURE_RO_COMPAT_BIGALLOC);
3846        if (has_bigalloc) {
3847                if (clustersize < blocksize) {
3848                        ext4_msg(sb, KERN_ERR,
3849                                 "cluster size (%d) smaller than "
3850                                 "block size (%d)", clustersize, blocksize);
3851                        goto failed_mount;
3852                }
3853                sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
3854                        le32_to_cpu(es->s_log_block_size);
3855                sbi->s_clusters_per_group =
3856                        le32_to_cpu(es->s_clusters_per_group);
3857                if (sbi->s_clusters_per_group > blocksize * 8) {
3858                        ext4_msg(sb, KERN_ERR,
3859                                 "#clusters per group too big: %lu",
3860                                 sbi->s_clusters_per_group);
3861                        goto failed_mount;
3862                }
3863                if (sbi->s_blocks_per_group !=
3864                    (sbi->s_clusters_per_group * (clustersize / blocksize))) {
3865                        ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
3866                                 "clusters per group (%lu) inconsistent",
3867                                 sbi->s_blocks_per_group,
3868                                 sbi->s_clusters_per_group);
3869                        goto failed_mount;
3870                }
3871        } else {
3872                if (clustersize != blocksize) {
3873                        ext4_warning(sb, "fragment/cluster size (%d) != "
3874                                     "block size (%d)", clustersize,
3875                                     blocksize);
3876                        clustersize = blocksize;
3877                }
3878                if (sbi->s_blocks_per_group > blocksize * 8) {
3879                        ext4_msg(sb, KERN_ERR,
3880                                 "#blocks per group too big: %lu",
3881                                 sbi->s_blocks_per_group);
3882                        goto failed_mount;
3883                }
3884                sbi->s_clusters_per_group = sbi->s_blocks_per_group;
3885                sbi->s_cluster_bits = 0;
3886        }
3887        sbi->s_cluster_ratio = clustersize / blocksize;
3888
3889        if (sbi->s_inodes_per_group > blocksize * 8) {
3890                ext4_msg(sb, KERN_ERR,
3891                       "#inodes per group too big: %lu",
3892                       sbi->s_inodes_per_group);
3893                goto failed_mount;
3894        }
3895
3896        /* Do we have standard group size of clustersize * 8 blocks ? */
3897        if (sbi->s_blocks_per_group == clustersize << 3)
3898                set_opt2(sb, STD_GROUP_SIZE);
3899
3900        /*
3901         * Test whether we have more sectors than will fit in sector_t,
3902         * and whether the max offset is addressable by the page cache.
3903         */
3904        err = generic_check_addressable(sb->s_blocksize_bits,
3905                                        ext4_blocks_count(es));
3906        if (err) {
3907                ext4_msg(sb, KERN_ERR, "filesystem"
3908                         " too large to mount safely on this system");
3909                if (sizeof(sector_t) < 8)
3910                        ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
3911                goto failed_mount;
3912        }
3913
3914        if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
3915                goto cantfind_ext4;
3916
3917        /* check blocks count against device size */
3918        blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
3919        if (blocks_count && ext4_blocks_count(es) > blocks_count) {
3920                ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
3921                       "exceeds size of device (%llu blocks)",
3922                       ext4_blocks_count(es), blocks_count);
3923                goto failed_mount;
3924        }
3925
3926        /*
3927         * It makes no sense for the first data block to be beyond the end
3928         * of the filesystem.
3929         */
3930        if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
3931                ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
3932                         "block %u is beyond end of filesystem (%llu)",
3933                         le32_to_cpu(es->s_first_data_block),
3934                         ext4_blocks_count(es));
3935                goto failed_mount;
3936        }
3937        blocks_count = (ext4_blocks_count(es) -
3938                        le32_to_cpu(es->s_first_data_block) +
3939                        EXT4_BLOCKS_PER_GROUP(sb) - 1);
3940        do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
3941        if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
3942                ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
3943                       "(block count %llu, first data block %u, "
3944                       "blocks per group %lu)", sbi->s_groups_count,
3945                       ext4_blocks_count(es),
3946                       le32_to_cpu(es->s_first_data_block),
3947                       EXT4_BLOCKS_PER_GROUP(sb));
3948                goto failed_mount;
3949        }
3950        sbi->s_groups_count = blocks_count;
3951        sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
3952                        (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
3953        db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
3954                   EXT4_DESC_PER_BLOCK(sb);
3955        sbi->s_group_desc = ext4_kvmalloc(db_count *
3956                                          sizeof(struct buffer_head *),
3957                                          GFP_KERNEL);
3958        if (sbi->s_group_desc == NULL) {
3959                ext4_msg(sb, KERN_ERR, "not enough memory");
3960                ret = -ENOMEM;
3961                goto failed_mount;
3962        }
3963
3964        if (ext4_proc_root)
3965                sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
3966
3967        if (sbi->s_proc)
3968                proc_create_data("options", S_IRUGO, sbi->s_proc,
3969                                 &ext4_seq_options_fops, sb);
3970
3971        bgl_lock_init(sbi->s_blockgroup_lock);
3972
3973        for (i = 0; i < db_count; i++) {
3974                block = descriptor_loc(sb, logical_sb_block, i);
3975                sbi->s_group_desc[i] = sb_bread_unmovable(sb, block);
3976                if (!sbi->s_group_desc[i]) {
3977                        ext4_msg(sb, KERN_ERR,
3978                               "can't read group descriptor %d", i);
3979                        db_count = i;
3980                        goto failed_mount2;
3981                }
3982        }
3983        if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
3984                ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
3985                goto failed_mount2;
3986        }
3987
3988        sbi->s_gdb_count = db_count;
3989        get_random_bytes(&sbi->s_next_generation, sizeof(u32));
3990        spin_lock_init(&sbi->s_next_gen_lock);
3991
3992        setup_timer(&sbi->s_err_report, print_daily_error_info,
3993                (unsigned long) sb);
3994
3995        /* Register extent status tree shrinker */
3996        if (ext4_es_register_shrinker(sbi))
3997                goto failed_mount3;
3998
3999        sbi->s_stripe = ext4_get_stripe_size(sbi);
4000        sbi->s_extent_max_zeroout_kb = 32;
4001
4002        /*
4003         * set up enough so that it can read an inode
4004         */
4005        sb->s_op = &ext4_sops;
4006        sb->s_export_op = &ext4_export_ops;
4007        sb->s_xattr = ext4_xattr_handlers;
4008#ifdef CONFIG_QUOTA
4009        sb->dq_op = &ext4_quota_operations;
4010        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
4011                sb->s_qcop = &dquot_quotactl_sysfile_ops;
4012        else
4013                sb->s_qcop = &ext4_qctl_operations;
4014        sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
4015#endif
4016        memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
4017
4018        INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
4019        mutex_init(&sbi->s_orphan_lock);
4020
4021        sb->s_root = NULL;
4022
4023        needs_recovery = (es->s_last_orphan != 0 ||
4024                          EXT4_HAS_INCOMPAT_FEATURE(sb,
4025                                    EXT4_FEATURE_INCOMPAT_RECOVER));
4026
4027        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) &&
4028            !(sb->s_flags & MS_RDONLY))
4029                if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
4030                        goto failed_mount3a;
4031
4032        /*
4033         * The first inode we look at is the journal inode.  Don't try
4034         * root first: it may be modified in the journal!
4035         */
4036        if (!test_opt(sb, NOLOAD) &&
4037            EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
4038                if (ext4_load_journal(sb, es, journal_devnum))
4039                        goto failed_mount3a;
4040        } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
4041              EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
4042                ext4_msg(sb, KERN_ERR, "required journal recovery "
4043                       "suppressed and not mounted read-only");
4044                goto failed_mount_wq;
4045        } else {
4046                clear_opt(sb, DATA_FLAGS);
4047                sbi->s_journal = NULL;
4048                needs_recovery = 0;
4049                goto no_journal;
4050        }
4051
4052        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT) &&
4053            !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4054                                       JBD2_FEATURE_INCOMPAT_64BIT)) {
4055                ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
4056                goto failed_mount_wq;
4057        }
4058
4059        if (!set_journal_csum_feature_set(sb)) {
4060                ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
4061                         "feature set");
4062                goto failed_mount_wq;
4063        }
4064
4065        /* We have now updated the journal if required, so we can
4066         * validate the data journaling mode. */
4067        switch (test_opt(sb, DATA_FLAGS)) {
4068        case 0:
4069                /* No mode set, assume a default based on the journal
4070                 * capabilities: ORDERED_DATA if the journal can
4071                 * cope, else JOURNAL_DATA
4072                 */
4073                if (jbd2_journal_check_available_features
4074                    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
4075                        set_opt(sb, ORDERED_DATA);
4076                else
4077                        set_opt(sb, JOURNAL_DATA);
4078                break;
4079
4080        case EXT4_MOUNT_ORDERED_DATA:
4081        case EXT4_MOUNT_WRITEBACK_DATA:
4082                if (!jbd2_journal_check_available_features
4083                    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
4084                        ext4_msg(sb, KERN_ERR, "Journal does not support "
4085                               "requested data journaling mode");
4086                        goto failed_mount_wq;
4087                }
4088        default:
4089                break;
4090        }
4091        set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
4092
4093        sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
4094
4095no_journal:
4096        if (ext4_mballoc_ready) {
4097                sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id);
4098                if (!sbi->s_mb_cache) {
4099                        ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
4100                        goto failed_mount_wq;
4101                }
4102        }
4103
4104        if ((DUMMY_ENCRYPTION_ENABLED(sbi) ||
4105             EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT)) &&
4106            (blocksize != PAGE_CACHE_SIZE)) {
4107                ext4_msg(sb, KERN_ERR,
4108                         "Unsupported blocksize for fs encryption");
4109                goto failed_mount_wq;
4110        }
4111
4112        if (DUMMY_ENCRYPTION_ENABLED(sbi) &&
4113            !(sb->s_flags & MS_RDONLY) &&
4114            !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT)) {
4115                EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT);
4116                ext4_commit_super(sb, 1);
4117        }
4118
4119        /*
4120         * Get the # of file system overhead blocks from the
4121         * superblock if present.
4122         */
4123        if (es->s_overhead_clusters)
4124                sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
4125        else {
4126                err = ext4_calculate_overhead(sb);
4127                if (err)
4128                        goto failed_mount_wq;
4129        }
4130
4131        /*
4132         * The maximum number of concurrent works can be high and
4133         * concurrency isn't really necessary.  Limit it to 1.
4134         */
4135        EXT4_SB(sb)->rsv_conversion_wq =
4136                alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
4137        if (!EXT4_SB(sb)->rsv_conversion_wq) {
4138                printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
4139                ret = -ENOMEM;
4140                goto failed_mount4;
4141        }
4142
4143        /*
4144         * The jbd2_journal_load will have done any necessary log recovery,
4145         * so we can safely mount the rest of the filesystem now.
4146         */
4147
4148        root = ext4_iget(sb, EXT4_ROOT_INO);
4149        if (IS_ERR(root)) {
4150                ext4_msg(sb, KERN_ERR, "get root inode failed");
4151                ret = PTR_ERR(root);
4152                root = NULL;
4153                goto failed_mount4;
4154        }
4155        if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
4156                ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
4157                iput(root);
4158                goto failed_mount4;
4159        }
4160        sb->s_root = d_make_root(root);
4161        if (!sb->s_root) {
4162                ext4_msg(sb, KERN_ERR, "get root dentry failed");
4163                ret = -ENOMEM;
4164                goto failed_mount4;
4165        }
4166
4167        if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY))
4168                sb->s_flags |= MS_RDONLY;
4169
4170        /* determine the minimum size of new large inodes, if present */
4171        if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
4172                sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
4173                                                     EXT4_GOOD_OLD_INODE_SIZE;
4174                if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
4175                                       EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
4176                        if (sbi->s_want_extra_isize <
4177                            le16_to_cpu(es->s_want_extra_isize))
4178                                sbi->s_want_extra_isize =
4179                                        le16_to_cpu(es->s_want_extra_isize);
4180                        if (sbi->s_want_extra_isize <
4181                            le16_to_cpu(es->s_min_extra_isize))
4182                                sbi->s_want_extra_isize =
4183                                        le16_to_cpu(es->s_min_extra_isize);
4184                }
4185        }
4186        /* Check if enough inode space is available */
4187        if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
4188                                                        sbi->s_inode_size) {
4189                sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
4190                                                       EXT4_GOOD_OLD_INODE_SIZE;
4191                ext4_msg(sb, KERN_INFO, "required extra inode space not"
4192                         "available");
4193        }
4194
4195        err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb));
4196        if (err) {
4197                ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
4198                         "reserved pool", ext4_calculate_resv_clusters(sb));
4199                goto failed_mount4a;
4200        }
4201
4202        err = ext4_setup_system_zone(sb);
4203        if (err) {
4204                ext4_msg(sb, KERN_ERR, "failed to initialize system "
4205                         "zone (%d)", err);
4206                goto failed_mount4a;
4207        }
4208
4209        ext4_ext_init(sb);
4210        err = ext4_mb_init(sb);
4211        if (err) {
4212                ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
4213                         err);
4214                goto failed_mount5;
4215        }
4216
4217        block = ext4_count_free_clusters(sb);
4218        ext4_free_blocks_count_set(sbi->s_es, 
4219                                   EXT4_C2B(sbi, block));
4220        err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
4221                                  GFP_KERNEL);
4222        if (!err) {
4223                unsigned long freei = ext4_count_free_inodes(sb);
4224                sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
4225                err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
4226                                          GFP_KERNEL);
4227        }
4228        if (!err)
4229                err = percpu_counter_init(&sbi->s_dirs_counter,
4230                                          ext4_count_dirs(sb), GFP_KERNEL);
4231        if (!err)
4232                err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
4233                                          GFP_KERNEL);
4234        if (err) {
4235                ext4_msg(sb, KERN_ERR, "insufficient memory");
4236                goto failed_mount6;
4237        }
4238
4239        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
4240                if (!ext4_fill_flex_info(sb)) {
4241                        ext4_msg(sb, KERN_ERR,
4242                               "unable to initialize "
4243                               "flex_bg meta info!");
4244                        goto failed_mount6;
4245                }
4246
4247        err = ext4_register_li_request(sb, first_not_zeroed);
4248        if (err)
4249                goto failed_mount6;
4250
4251        sbi->s_kobj.kset = ext4_kset;
4252        init_completion(&sbi->s_kobj_unregister);
4253        err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
4254                                   "%s", sb->s_id);
4255        if (err)
4256                goto failed_mount7;
4257
4258#ifdef CONFIG_QUOTA
4259        /* Enable quota usage during mount. */
4260        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
4261            !(sb->s_flags & MS_RDONLY)) {
4262                err = ext4_enable_quotas(sb);
4263                if (err)
4264                        goto failed_mount8;
4265        }
4266#endif  /* CONFIG_QUOTA */
4267
4268        EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
4269        ext4_orphan_cleanup(sb, es);
4270        EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
4271        if (needs_recovery) {
4272                ext4_msg(sb, KERN_INFO, "recovery complete");
4273                ext4_mark_recovery_complete(sb, es);
4274        }
4275        if (EXT4_SB(sb)->s_journal) {
4276                if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
4277                        descr = " journalled data mode";
4278                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
4279                        descr = " ordered data mode";
4280                else
4281                        descr = " writeback data mode";
4282        } else
4283                descr = "out journal";
4284
4285        if (test_opt(sb, DISCARD)) {
4286                struct request_queue *q = bdev_get_queue(sb->s_bdev);
4287                if (!blk_queue_discard(q))
4288                        ext4_msg(sb, KERN_WARNING,
4289                                 "mounting with \"discard\" option, but "
4290                                 "the device does not support discard");
4291        }
4292
4293        if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
4294                ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
4295                         "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
4296                         *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
4297
4298        if (es->s_error_count)
4299                mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
4300
4301        /* Enable message ratelimiting. Default is 10 messages per 5 secs. */
4302        ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
4303        ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
4304        ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
4305
4306        kfree(orig_data);
4307        return 0;
4308
4309cantfind_ext4:
4310        if (!silent)
4311                ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
4312        goto failed_mount;
4313
4314#ifdef CONFIG_QUOTA
4315failed_mount8:
4316        kobject_del(&sbi->s_kobj);
4317#endif
4318failed_mount7:
4319        ext4_unregister_li_request(sb);
4320failed_mount6:
4321        ext4_mb_release(sb);
4322        if (sbi->s_flex_groups)
4323                kvfree(sbi->s_flex_groups);
4324        percpu_counter_destroy(&sbi->s_freeclusters_counter);
4325        percpu_counter_destroy(&sbi->s_freeinodes_counter);
4326        percpu_counter_destroy(&sbi->s_dirs_counter);
4327        percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
4328failed_mount5:
4329        ext4_ext_release(sb);
4330        ext4_release_system_zone(sb);
4331failed_mount4a:
4332        dput(sb->s_root);
4333        sb->s_root = NULL;
4334failed_mount4:
4335        ext4_msg(sb, KERN_ERR, "mount failed");
4336        if (EXT4_SB(sb)->rsv_conversion_wq)
4337                destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
4338failed_mount_wq:
4339        if (sbi->s_journal) {
4340                jbd2_journal_destroy(sbi->s_journal);
4341                sbi->s_journal = NULL;
4342        }
4343failed_mount3a:
4344        ext4_es_unregister_shrinker(sbi);
4345failed_mount3:
4346        del_timer_sync(&sbi->s_err_report);
4347        if (sbi->s_mmp_tsk)
4348                kthread_stop(sbi->s_mmp_tsk);
4349failed_mount2:
4350        for (i = 0; i < db_count; i++)
4351                brelse(sbi->s_group_desc[i]);
4352        kvfree(sbi->s_group_desc);
4353failed_mount:
4354        if (sbi->s_chksum_driver)
4355                crypto_free_shash(sbi->s_chksum_driver);
4356        if (sbi->s_proc) {
4357                remove_proc_entry("options", sbi->s_proc);
4358                remove_proc_entry(sb->s_id, ext4_proc_root);
4359        }
4360#ifdef CONFIG_QUOTA
4361        for (i = 0; i < EXT4_MAXQUOTAS; i++)
4362                kfree(sbi->s_qf_names[i]);
4363#endif
4364        ext4_blkdev_remove(sbi);
4365        brelse(bh);
4366out_fail:
4367        sb->s_fs_info = NULL;
4368        kfree(sbi->s_blockgroup_lock);
4369        kfree(sbi);
4370out_free_orig:
4371        kfree(orig_data);
4372        return err ? err : ret;
4373}
4374
4375/*
4376 * Setup any per-fs journal parameters now.  We'll do this both on
4377 * initial mount, once the journal has been initialised but before we've
4378 * done any recovery; and again on any subsequent remount.
4379 */
4380static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
4381{
4382        struct ext4_sb_info *sbi = EXT4_SB(sb);
4383
4384        journal->j_commit_interval = sbi->s_commit_interval;
4385        journal->j_min_batch_time = sbi->s_min_batch_time;
4386        journal->j_max_batch_time = sbi->s_max_batch_time;
4387
4388        write_lock(&journal->j_state_lock);
4389        if (test_opt(sb, BARRIER))
4390                journal->j_flags |= JBD2_BARRIER;
4391        else
4392                journal->j_flags &= ~JBD2_BARRIER;
4393        if (test_opt(sb, DATA_ERR_ABORT))
4394                journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
4395        else
4396                journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
4397        write_unlock(&journal->j_state_lock);
4398}
4399
4400static journal_t *ext4_get_journal(struct super_block *sb,
4401                                   unsigned int journal_inum)
4402{
4403        struct inode *journal_inode;
4404        journal_t *journal;
4405
4406        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4407
4408        /* First, test for the existence of a valid inode on disk.  Bad
4409         * things happen if we iget() an unused inode, as the subsequent
4410         * iput() will try to delete it. */
4411
4412        journal_inode = ext4_iget(sb, journal_inum);
4413        if (IS_ERR(journal_inode)) {
4414                ext4_msg(sb, KERN_ERR, "no journal found");
4415                return NULL;
4416        }
4417        if (!journal_inode->i_nlink) {
4418                make_bad_inode(journal_inode);
4419                iput(journal_inode);
4420                ext4_msg(sb, KERN_ERR, "journal inode is deleted");
4421                return NULL;
4422        }
4423
4424        jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
4425                  journal_inode, journal_inode->i_size);
4426        if (!S_ISREG(journal_inode->i_mode)) {
4427                ext4_msg(sb, KERN_ERR, "invalid journal inode");
4428                iput(journal_inode);
4429                return NULL;
4430        }
4431
4432        journal = jbd2_journal_init_inode(journal_inode);
4433        if (!journal) {
4434                ext4_msg(sb, KERN_ERR, "Could not load journal inode");
4435                iput(journal_inode);
4436                return NULL;
4437        }
4438        journal->j_private = sb;
4439        ext4_init_journal_params(sb, journal);
4440        return journal;
4441}
4442
4443static journal_t *ext4_get_dev_journal(struct super_block *sb,
4444                                       dev_t j_dev)
4445{
4446        struct buffer_head *bh;
4447        journal_t *journal;
4448        ext4_fsblk_t start;
4449        ext4_fsblk_t len;
4450        int hblock, blocksize;
4451        ext4_fsblk_t sb_block;
4452        unsigned long offset;
4453        struct ext4_super_block *es;
4454        struct block_device *bdev;
4455
4456        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4457
4458        bdev = ext4_blkdev_get(j_dev, sb);
4459        if (bdev == NULL)
4460                return NULL;
4461
4462        blocksize = sb->s_blocksize;
4463        hblock = bdev_logical_block_size(bdev);
4464        if (blocksize < hblock) {
4465                ext4_msg(sb, KERN_ERR,
4466                        "blocksize too small for journal device");
4467                goto out_bdev;
4468        }
4469
4470        sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
4471        offset = EXT4_MIN_BLOCK_SIZE % blocksize;
4472        set_blocksize(bdev, blocksize);
4473        if (!(bh = __bread(bdev, sb_block, blocksize))) {
4474                ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
4475                       "external journal");
4476                goto out_bdev;
4477        }
4478
4479        es = (struct ext4_super_block *) (bh->b_data + offset);
4480        if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
4481            !(le32_to_cpu(es->s_feature_incompat) &
4482              EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
4483                ext4_msg(sb, KERN_ERR, "external journal has "
4484                                        "bad superblock");
4485                brelse(bh);
4486                goto out_bdev;
4487        }
4488
4489        if ((le32_to_cpu(es->s_feature_ro_compat) &
4490             EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
4491            es->s_checksum != ext4_superblock_csum(sb, es)) {
4492                ext4_msg(sb, KERN_ERR, "external journal has "
4493                                       "corrupt superblock");
4494                brelse(bh);
4495                goto out_bdev;
4496        }
4497
4498        if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
4499                ext4_msg(sb, KERN_ERR, "journal UUID does not match");
4500                brelse(bh);
4501                goto out_bdev;
4502        }
4503
4504        len = ext4_blocks_count(es);
4505        start = sb_block + 1;
4506        brelse(bh);     /* we're done with the superblock */
4507
4508        journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
4509                                        start, len, blocksize);
4510        if (!journal) {
4511                ext4_msg(sb, KERN_ERR, "failed to create device journal");
4512                goto out_bdev;
4513        }
4514        journal->j_private = sb;
4515        ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer);
4516        wait_on_buffer(journal->j_sb_buffer);
4517        if (!buffer_uptodate(journal->j_sb_buffer)) {
4518                ext4_msg(sb, KERN_ERR, "I/O error on journal device");
4519                goto out_journal;
4520        }
4521        if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
4522                ext4_msg(sb, KERN_ERR, "External journal has more than one "
4523                                        "user (unsupported) - %d",
4524                        be32_to_cpu(journal->j_superblock->s_nr_users));
4525                goto out_journal;
4526        }
4527        EXT4_SB(sb)->journal_bdev = bdev;
4528        ext4_init_journal_params(sb, journal);
4529        return journal;
4530
4531out_journal:
4532        jbd2_journal_destroy(journal);
4533out_bdev:
4534        ext4_blkdev_put(bdev);
4535        return NULL;
4536}
4537
4538static int ext4_load_journal(struct super_block *sb,
4539                             struct ext4_super_block *es,
4540                             unsigned long journal_devnum)
4541{
4542        journal_t *journal;
4543        unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
4544        dev_t journal_dev;
4545        int err = 0;
4546        int really_read_only;
4547
4548        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4549
4550        if (journal_devnum &&
4551            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
4552                ext4_msg(sb, KERN_INFO, "external journal device major/minor "
4553                        "numbers have changed");
4554                journal_dev = new_decode_dev(journal_devnum);
4555        } else
4556                journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
4557
4558        really_read_only = bdev_read_only(sb->s_bdev);
4559
4560        /*
4561         * Are we loading a blank journal or performing recovery after a
4562         * crash?  For recovery, we need to check in advance whether we
4563         * can get read-write access to the device.
4564         */
4565        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
4566                if (sb->s_flags & MS_RDONLY) {
4567                        ext4_msg(sb, KERN_INFO, "INFO: recovery "
4568                                        "required on readonly filesystem");
4569                        if (really_read_only) {
4570                                ext4_msg(sb, KERN_ERR, "write access "
4571                                        "unavailable, cannot proceed");
4572                                return -EROFS;
4573                        }
4574                        ext4_msg(sb, KERN_INFO, "write access will "
4575                               "be enabled during recovery");
4576                }
4577        }
4578
4579        if (journal_inum && journal_dev) {
4580                ext4_msg(sb, KERN_ERR, "filesystem has both journal "
4581                       "and inode journals!");
4582                return -EINVAL;
4583        }
4584
4585        if (journal_inum) {
4586                if (!(journal = ext4_get_journal(sb, journal_inum)))
4587                        return -EINVAL;
4588        } else {
4589                if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
4590                        return -EINVAL;
4591        }
4592
4593        if (!(journal->j_flags & JBD2_BARRIER))
4594                ext4_msg(sb, KERN_INFO, "barriers disabled");
4595
4596        if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
4597                err = jbd2_journal_wipe(journal, !really_read_only);
4598        if (!err) {
4599                char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
4600                if (save)
4601                        memcpy(save, ((char *) es) +
4602                               EXT4_S_ERR_START, EXT4_S_ERR_LEN);
4603                err = jbd2_journal_load(journal);
4604                if (save)
4605                        memcpy(((char *) es) + EXT4_S_ERR_START,
4606                               save, EXT4_S_ERR_LEN);
4607                kfree(save);
4608        }
4609
4610        if (err) {
4611                ext4_msg(sb, KERN_ERR, "error loading journal");
4612                jbd2_journal_destroy(journal);
4613                return err;
4614        }
4615
4616        EXT4_SB(sb)->s_journal = journal;
4617        ext4_clear_journal_err(sb, es);
4618
4619        if (!really_read_only && journal_devnum &&
4620            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
4621                es->s_journal_dev = cpu_to_le32(journal_devnum);
4622
4623                /* Make sure we flush the recovery flag to disk. */
4624                ext4_commit_super(sb, 1);
4625        }
4626
4627        return 0;
4628}
4629
4630static int ext4_commit_super(struct super_block *sb, int sync)
4631{
4632        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
4633        struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
4634        int error = 0;
4635
4636        if (!sbh || block_device_ejected(sb))
4637                return error;
4638        if (buffer_write_io_error(sbh)) {
4639                /*
4640                 * Oh, dear.  A previous attempt to write the
4641                 * superblock failed.  This could happen because the
4642                 * USB device was yanked out.  Or it could happen to
4643                 * be a transient write error and maybe the block will
4644                 * be remapped.  Nothing we can do but to retry the
4645                 * write and hope for the best.
4646                 */
4647                ext4_msg(sb, KERN_ERR, "previous I/O error to "
4648                       "superblock detected");
4649                clear_buffer_write_io_error(sbh);
4650                set_buffer_uptodate(sbh);
4651        }
4652        /*
4653         * If the file system is mounted read-only, don't update the
4654         * superblock write time.  This avoids updating the superblock
4655         * write time when we are mounting the root file system
4656         * read/only but we need to replay the journal; at that point,
4657         * for people who are east of GMT and who make their clock
4658         * tick in localtime for Windows bug-for-bug compatibility,
4659         * the clock is set in the future, and this will cause e2fsck
4660         * to complain and force a full file system check.
4661         */
4662        if (!(sb->s_flags & MS_RDONLY))
4663                es->s_wtime = cpu_to_le32(get_seconds());
4664        if (sb->s_bdev->bd_part)
4665                es->s_kbytes_written =
4666                        cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
4667                            ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
4668                              EXT4_SB(sb)->s_sectors_written_start) >> 1));
4669        else
4670                es->s_kbytes_written =
4671                        cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
4672        if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter))
4673                ext4_free_blocks_count_set(es,
4674                        EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
4675                                &EXT4_SB(sb)->s_freeclusters_counter)));
4676        if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
4677                es->s_free_inodes_count =
4678                        cpu_to_le32(percpu_counter_sum_positive(
4679                                &EXT4_SB(sb)->s_freeinodes_counter));
4680        BUFFER_TRACE(sbh, "marking dirty");
4681        ext4_superblock_csum_set(sb);
4682        mark_buffer_dirty(sbh);
4683        if (sync) {
4684                error = __sync_dirty_buffer(sbh,
4685                        test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC);
4686                if (error)
4687                        return error;
4688
4689                error = buffer_write_io_error(sbh);
4690                if (error) {
4691                        ext4_msg(sb, KERN_ERR, "I/O error while writing "
4692                               "superblock");
4693                        clear_buffer_write_io_error(sbh);
4694                        set_buffer_uptodate(sbh);
4695                }
4696        }
4697        return error;
4698}
4699
4700/*
4701 * Have we just finished recovery?  If so, and if we are mounting (or
4702 * remounting) the filesystem readonly, then we will end up with a
4703 * consistent fs on disk.  Record that fact.
4704 */
4705static void ext4_mark_recovery_complete(struct super_block *sb,
4706                                        struct ext4_super_block *es)
4707{
4708        journal_t *journal = EXT4_SB(sb)->s_journal;
4709
4710        if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
4711                BUG_ON(journal != NULL);
4712                return;
4713        }
4714        jbd2_journal_lock_updates(journal);
4715        if (jbd2_journal_flush(journal) < 0)
4716                goto out;
4717
4718        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
4719            sb->s_flags & MS_RDONLY) {
4720                EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
4721                ext4_commit_super(sb, 1);
4722        }
4723
4724out:
4725        jbd2_journal_unlock_updates(journal);
4726}
4727
4728/*
4729 * If we are mounting (or read-write remounting) a filesystem whose journal
4730 * has recorded an error from a previous lifetime, move that error to the
4731 * main filesystem now.
4732 */
4733static void ext4_clear_journal_err(struct super_block *sb,
4734                                   struct ext4_super_block *es)
4735{
4736        journal_t *journal;
4737        int j_errno;
4738        const char *errstr;
4739
4740        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4741
4742        journal = EXT4_SB(sb)->s_journal;
4743
4744        /*
4745         * Now check for any error status which may have been recorded in the
4746         * journal by a prior ext4_error() or ext4_abort()
4747         */
4748
4749        j_errno = jbd2_journal_errno(journal);
4750        if (j_errno) {
4751                char nbuf[16];
4752
4753                errstr = ext4_decode_error(sb, j_errno, nbuf);
4754                ext4_warning(sb, "Filesystem error recorded "
4755                             "from previous mount: %s", errstr);
4756                ext4_warning(sb, "Marking fs in need of filesystem check.");
4757
4758                EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
4759                es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
4760                ext4_commit_super(sb, 1);
4761
4762                jbd2_journal_clear_err(journal);
4763                jbd2_journal_update_sb_errno(journal);
4764        }
4765}
4766
4767/*
4768 * Force the running and committing transactions to commit,
4769 * and wait on the commit.
4770 */
4771int ext4_force_commit(struct super_block *sb)
4772{
4773        journal_t *journal;
4774
4775        if (sb->s_flags & MS_RDONLY)
4776                return 0;
4777
4778        journal = EXT4_SB(sb)->s_journal;
4779        return ext4_journal_force_commit(journal);
4780}
4781
4782static int ext4_sync_fs(struct super_block *sb, int wait)
4783{
4784        int ret = 0;
4785        tid_t target;
4786        bool needs_barrier = false;
4787        struct ext4_sb_info *sbi = EXT4_SB(sb);
4788
4789        trace_ext4_sync_fs(sb, wait);
4790        flush_workqueue(sbi->rsv_conversion_wq);
4791        /*
4792         * Writeback quota in non-journalled quota case - journalled quota has
4793         * no dirty dquots
4794         */
4795        dquot_writeback_dquots(sb, -1);
4796        /*
4797         * Data writeback is possible w/o journal transaction, so barrier must
4798         * being sent at the end of the function. But we can skip it if
4799         * transaction_commit will do it for us.
4800         */
4801        if (sbi->s_journal) {
4802                target = jbd2_get_latest_transaction(sbi->s_journal);
4803                if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
4804                    !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
4805                        needs_barrier = true;
4806
4807                if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
4808                        if (wait)
4809                                ret = jbd2_log_wait_commit(sbi->s_journal,
4810                                                           target);
4811                }
4812        } else if (wait && test_opt(sb, BARRIER))
4813                needs_barrier = true;
4814        if (needs_barrier) {
4815                int err;
4816                err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
4817                if (!ret)
4818                        ret = err;
4819        }
4820
4821        return ret;
4822}
4823
4824/*
4825 * LVM calls this function before a (read-only) snapshot is created.  This
4826 * gives us a chance to flush the journal completely and mark the fs clean.
4827 *
4828 * Note that only this function cannot bring a filesystem to be in a clean
4829 * state independently. It relies on upper layer to stop all data & metadata
4830 * modifications.
4831 */
4832static int ext4_freeze(struct super_block *sb)
4833{
4834        int error = 0;
4835        journal_t *journal;
4836
4837        if (sb->s_flags & MS_RDONLY)
4838                return 0;
4839
4840        journal = EXT4_SB(sb)->s_journal;
4841
4842        if (journal) {
4843                /* Now we set up the journal barrier. */
4844                jbd2_journal_lock_updates(journal);
4845
4846                /*
4847                 * Don't clear the needs_recovery flag if we failed to
4848                 * flush the journal.
4849                 */
4850                error = jbd2_journal_flush(journal);
4851                if (error < 0)
4852                        goto out;
4853
4854                /* Journal blocked and flushed, clear needs_recovery flag. */
4855                EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
4856        }
4857
4858        error = ext4_commit_super(sb, 1);
4859out:
4860        if (journal)
4861                /* we rely on upper layer to stop further updates */
4862                jbd2_journal_unlock_updates(journal);
4863        return error;
4864}
4865
4866/*
4867 * Called by LVM after the snapshot is done.  We need to reset the RECOVER
4868 * flag here, even though the filesystem is not technically dirty yet.
4869 */
4870static int ext4_unfreeze(struct super_block *sb)
4871{
4872        if (sb->s_flags & MS_RDONLY)
4873                return 0;
4874
4875        if (EXT4_SB(sb)->s_journal) {
4876                /* Reset the needs_recovery flag before the fs is unlocked. */
4877                EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
4878        }
4879
4880        ext4_commit_super(sb, 1);
4881        return 0;
4882}
4883
4884/*
4885 * Structure to save mount options for ext4_remount's benefit
4886 */
4887struct ext4_mount_options {
4888        unsigned long s_mount_opt;
4889        unsigned long s_mount_opt2;
4890        kuid_t s_resuid;
4891        kgid_t s_resgid;
4892        unsigned long s_commit_interval;
4893        u32 s_min_batch_time, s_max_batch_time;
4894#ifdef CONFIG_QUOTA
4895        int s_jquota_fmt;
4896        char *s_qf_names[EXT4_MAXQUOTAS];
4897#endif
4898};
4899
4900static int ext4_remount(struct super_block *sb, int *flags, char *data)
4901{
4902        struct ext4_super_block *es;
4903        struct ext4_sb_info *sbi = EXT4_SB(sb);
4904        unsigned long old_sb_flags;
4905        struct ext4_mount_options old_opts;
4906        int enable_quota = 0;
4907        ext4_group_t g;
4908        unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
4909        int err = 0;
4910#ifdef CONFIG_QUOTA
4911        int i, j;
4912#endif
4913        char *orig_data = kstrdup(data, GFP_KERNEL);
4914
4915        /* Store the original options */
4916        old_sb_flags = sb->s_flags;
4917        old_opts.s_mount_opt = sbi->s_mount_opt;
4918        old_opts.s_mount_opt2 = sbi->s_mount_opt2;
4919        old_opts.s_resuid = sbi->s_resuid;
4920        old_opts.s_resgid = sbi->s_resgid;
4921        old_opts.s_commit_interval = sbi->s_commit_interval;
4922        old_opts.s_min_batch_time = sbi->s_min_batch_time;
4923        old_opts.s_max_batch_time = sbi->s_max_batch_time;
4924#ifdef CONFIG_QUOTA
4925        old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
4926        for (i = 0; i < EXT4_MAXQUOTAS; i++)
4927                if (sbi->s_qf_names[i]) {
4928                        old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
4929                                                         GFP_KERNEL);
4930                        if (!old_opts.s_qf_names[i]) {
4931                                for (j = 0; j < i; j++)
4932                                        kfree(old_opts.s_qf_names[j]);
4933                                kfree(orig_data);
4934                                return -ENOMEM;
4935                        }
4936                } else
4937                        old_opts.s_qf_names[i] = NULL;
4938#endif
4939        if (sbi->s_journal && sbi->s_journal->j_task->io_context)
4940                journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
4941
4942        if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
4943                err = -EINVAL;
4944                goto restore_opts;
4945        }
4946
4947        if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
4948            test_opt(sb, JOURNAL_CHECKSUM)) {
4949                ext4_msg(sb, KERN_ERR, "changing journal_checksum "
4950                         "during remount not supported; ignoring");
4951                sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
4952        }
4953
4954        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
4955                if (test_opt2(sb, EXPLICIT_DELALLOC)) {
4956                        ext4_msg(sb, KERN_ERR, "can't mount with "
4957                                 "both data=journal and delalloc");
4958                        err = -EINVAL;
4959                        goto restore_opts;
4960                }
4961                if (test_opt(sb, DIOREAD_NOLOCK)) {
4962                        ext4_msg(sb, KERN_ERR, "can't mount with "
4963                                 "both data=journal and dioread_nolock");
4964                        err = -EINVAL;
4965                        goto restore_opts;
4966                }
4967                if (test_opt(sb, DAX)) {
4968                        ext4_msg(sb, KERN_ERR, "can't mount with "
4969                                 "both data=journal and dax");
4970                        err = -EINVAL;
4971                        goto restore_opts;
4972                }
4973        }
4974
4975        if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
4976                ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
4977                        "dax flag with busy inodes while remounting");
4978                sbi->s_mount_opt ^= EXT4_MOUNT_DAX;
4979        }
4980
4981        if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
4982                ext4_abort(sb, "Abort forced by user");
4983
4984        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
4985                (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
4986
4987        es = sbi->s_es;
4988
4989        if (sbi->s_journal) {
4990                ext4_init_journal_params(sb, sbi->s_journal);
4991                set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
4992        }
4993
4994        if (*flags & MS_LAZYTIME)
4995                sb->s_flags |= MS_LAZYTIME;
4996
4997        if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
4998                if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
4999                        err = -EROFS;
5000                        goto restore_opts;
5001                }
5002
5003                if (*flags & MS_RDONLY) {
5004                        err = sync_filesystem(sb);
5005                        if (err < 0)
5006                                goto restore_opts;
5007                        err = dquot_suspend(sb, -1);
5008                        if (err < 0)
5009                                goto restore_opts;
5010
5011                        /*
5012                         * First of all, the unconditional stuff we have to do
5013                         * to disable replay of the journal when we next remount
5014                         */
5015                        sb->s_flags |= MS_RDONLY;
5016
5017                        /*
5018                         * OK, test if we are remounting a valid rw partition
5019                         * readonly, and if so set the rdonly flag and then
5020                         * mark the partition as valid again.
5021                         */
5022                        if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
5023                            (sbi->s_mount_state & EXT4_VALID_FS))
5024                                es->s_state = cpu_to_le16(sbi->s_mount_state);
5025
5026                        if (sbi->s_journal)
5027                                ext4_mark_recovery_complete(sb, es);
5028                } else {
5029                        /* Make sure we can mount this feature set readwrite */
5030                        if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
5031                                        EXT4_FEATURE_RO_COMPAT_READONLY) ||
5032                            !ext4_feature_set_ok(sb, 0)) {
5033                                err = -EROFS;
5034                                goto restore_opts;
5035                        }
5036                        /*
5037                         * Make sure the group descriptor checksums
5038                         * are sane.  If they aren't, refuse to remount r/w.
5039                         */
5040                        for (g = 0; g < sbi->s_groups_count; g++) {
5041                                struct ext4_group_desc *gdp =
5042                                        ext4_get_group_desc(sb, g, NULL);
5043
5044                                if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
5045                                        ext4_msg(sb, KERN_ERR,
5046               "ext4_remount: Checksum for group %u failed (%u!=%u)",
5047                g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
5048                                               le16_to_cpu(gdp->bg_checksum));
5049                                        err = -EINVAL;
5050                                        goto restore_opts;
5051                                }
5052                        }
5053
5054                        /*
5055                         * If we have an unprocessed orphan list hanging
5056                         * around from a previously readonly bdev mount,
5057                         * require a full umount/remount for now.
5058                         */
5059                        if (es->s_last_orphan) {
5060                                ext4_msg(sb, KERN_WARNING, "Couldn't "
5061                                       "remount RDWR because of unprocessed "
5062                                       "orphan inode list.  Please "
5063                                       "umount/remount instead");
5064                                err = -EINVAL;
5065                                goto restore_opts;
5066                        }
5067
5068                        /*
5069                         * Mounting a RDONLY partition read-write, so reread
5070                         * and store the current valid flag.  (It may have
5071                         * been changed by e2fsck since we originally mounted
5072                         * the partition.)
5073                         */
5074                        if (sbi->s_journal)
5075                                ext4_clear_journal_err(sb, es);
5076                        sbi->s_mount_state = le16_to_cpu(es->s_state);
5077                        if (!ext4_setup_super(sb, es, 0))
5078                                sb->s_flags &= ~MS_RDONLY;
5079                        if (EXT4_HAS_INCOMPAT_FEATURE(sb,
5080                                                     EXT4_FEATURE_INCOMPAT_MMP))
5081                                if (ext4_multi_mount_protect(sb,
5082                                                le64_to_cpu(es->s_mmp_block))) {
5083                                        err = -EROFS;
5084                                        goto restore_opts;
5085                                }
5086                        enable_quota = 1;
5087                }
5088        }
5089
5090        /*
5091         * Reinitialize lazy itable initialization thread based on
5092         * current settings
5093         */
5094        if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE))
5095                ext4_unregister_li_request(sb);
5096        else {
5097                ext4_group_t first_not_zeroed;
5098                first_not_zeroed = ext4_has_uninit_itable(sb);
5099                ext4_register_li_request(sb, first_not_zeroed);
5100        }
5101
5102        ext4_setup_system_zone(sb);
5103        if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
5104                ext4_commit_super(sb, 1);
5105
5106#ifdef CONFIG_QUOTA
5107        /* Release old quota file names */
5108        for (i = 0; i < EXT4_MAXQUOTAS; i++)
5109                kfree(old_opts.s_qf_names[i]);
5110        if (enable_quota) {
5111                if (sb_any_quota_suspended(sb))
5112                        dquot_resume(sb, -1);
5113                else if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
5114                                        EXT4_FEATURE_RO_COMPAT_QUOTA)) {
5115                        err = ext4_enable_quotas(sb);
5116                        if (err)
5117                                goto restore_opts;
5118                }
5119        }
5120#endif
5121
5122        *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME);
5123        ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
5124        kfree(orig_data);
5125        return 0;
5126
5127restore_opts:
5128        sb->s_flags = old_sb_flags;
5129        sbi->s_mount_opt = old_opts.s_mount_opt;
5130        sbi->s_mount_opt2 = old_opts.s_mount_opt2;
5131        sbi->s_resuid = old_opts.s_resuid;
5132        sbi->s_resgid = old_opts.s_resgid;
5133        sbi->s_commit_interval = old_opts.s_commit_interval;
5134        sbi->s_min_batch_time = old_opts.s_min_batch_time;
5135        sbi->s_max_batch_time = old_opts.s_max_batch_time;
5136#ifdef CONFIG_QUOTA
5137        sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
5138        for (i = 0; i < EXT4_MAXQUOTAS; i++) {
5139                kfree(sbi->s_qf_names[i]);
5140                sbi->s_qf_names[i] = old_opts.s_qf_names[i];
5141        }
5142#endif
5143        kfree(orig_data);
5144        return err;
5145}
5146
5147static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
5148{
5149        struct super_block *sb = dentry->d_sb;
5150        struct ext4_sb_info *sbi = EXT4_SB(sb);
5151        struct ext4_super_block *es = sbi->s_es;
5152        ext4_fsblk_t overhead = 0, resv_blocks;
5153        u64 fsid;
5154        s64 bfree;
5155        resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
5156
5157        if (!test_opt(sb, MINIX_DF))
5158                overhead = sbi->s_overhead;
5159
5160        buf->f_type = EXT4_SUPER_MAGIC;
5161        buf->f_bsize = sb->s_blocksize;
5162        buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
5163        bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
5164                percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
5165        /* prevent underflow in case that few free space is available */
5166        buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
5167        buf->f_bavail = buf->f_bfree -
5168                        (ext4_r_blocks_count(es) + resv_blocks);
5169        if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
5170                buf->f_bavail = 0;
5171        buf->f_files = le32_to_cpu(es->s_inodes_count);
5172        buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
5173        buf->f_namelen = EXT4_NAME_LEN;
5174        fsid = le64_to_cpup((void *)es->s_uuid) ^
5175               le64_to_cpup((void *)es->s_uuid + sizeof(u64));
5176        buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
5177        buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
5178
5179        return 0;
5180}
5181
5182/* Helper function for writing quotas on sync - we need to start transaction
5183 * before quota file is locked for write. Otherwise the are possible deadlocks:
5184 * Process 1                         Process 2
5185 * ext4_create()                     quota_sync()
5186 *   jbd2_journal_start()                  write_dquot()
5187 *   dquot_initialize()                         down(dqio_mutex)
5188 *     down(dqio_mutex)                    jbd2_journal_start()
5189 *
5190 */
5191
5192#ifdef CONFIG_QUOTA
5193
5194static inline struct inode *dquot_to_inode(struct dquot *dquot)
5195{
5196        return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
5197}
5198
5199static int ext4_write_dquot(struct dquot *dquot)
5200{
5201        int ret, err;
5202        handle_t *handle;
5203        struct inode *inode;
5204
5205        inode = dquot_to_inode(dquot);
5206        handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
5207                                    EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
5208        if (IS_ERR(handle))
5209                return PTR_ERR(handle);
5210        ret = dquot_commit(dquot);
5211        err = ext4_journal_stop(handle);
5212        if (!ret)
5213                ret = err;
5214        return ret;
5215}
5216
5217static int ext4_acquire_dquot(struct dquot *dquot)
5218{
5219        int ret, err;
5220        handle_t *handle;
5221
5222        handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
5223                                    EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
5224        if (IS_ERR(handle))
5225                return PTR_ERR(handle);
5226        ret = dquot_acquire(dquot);
5227        err = ext4_journal_stop(handle);
5228        if (!ret)
5229                ret = err;
5230        return ret;
5231}
5232
5233static int ext4_release_dquot(struct dquot *dquot)
5234{
5235        int ret, err;
5236        handle_t *handle;
5237
5238        handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
5239                                    EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
5240        if (IS_ERR(handle)) {
5241                /* Release dquot anyway to avoid endless cycle in dqput() */
5242                dquot_release(dquot);
5243                return PTR_ERR(handle);
5244        }
5245        ret = dquot_release(dquot);
5246        err = ext4_journal_stop(handle);
5247        if (!ret)
5248                ret = err;
5249        return ret;
5250}
5251
5252static int ext4_mark_dquot_dirty(struct dquot *dquot)
5253{
5254        struct super_block *sb = dquot->dq_sb;
5255        struct ext4_sb_info *sbi = EXT4_SB(sb);
5256
5257        /* Are we journaling quotas? */
5258        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) ||
5259            sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
5260                dquot_mark_dquot_dirty(dquot);
5261                return ext4_write_dquot(dquot);
5262        } else {
5263                return dquot_mark_dquot_dirty(dquot);
5264        }
5265}
5266
5267static int ext4_write_info(struct super_block *sb, int type)
5268{
5269        int ret, err;
5270        handle_t *handle;
5271
5272        /* Data block + inode block */
5273        handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2);
5274        if (IS_ERR(handle))
5275                return PTR_ERR(handle);
5276        ret = dquot_commit_info(sb, type);
5277        err = ext4_journal_stop(handle);
5278        if (!ret)
5279                ret = err;
5280        return ret;
5281}
5282
5283/*
5284 * Turn on quotas during mount time - we need to find
5285 * the quota file and such...
5286 */
5287static int ext4_quota_on_mount(struct super_block *sb, int type)
5288{
5289        return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
5290                                        EXT4_SB(sb)->s_jquota_fmt, type);
5291}
5292
5293/*
5294 * Standard function to be called on quota_on
5295 */
5296static int ext4_quota_on(struct super_block *sb, int type, int format_id,
5297                         struct path *path)
5298{
5299        int err;
5300
5301        if (!test_opt(sb, QUOTA))
5302                return -EINVAL;
5303
5304        /* Quotafile not on the same filesystem? */
5305        if (path->dentry->d_sb != sb)
5306                return -EXDEV;
5307        /* Journaling quota? */
5308        if (EXT4_SB(sb)->s_qf_names[type]) {
5309                /* Quotafile not in fs root? */
5310                if (path->dentry->d_parent != sb->s_root)
5311                        ext4_msg(sb, KERN_WARNING,
5312                                "Quota file not on filesystem root. "
5313                                "Journaled quota will not work");
5314        }
5315
5316        /*
5317         * When we journal data on quota file, we have to flush journal to see
5318         * all updates to the file when we bypass pagecache...
5319         */
5320        if (EXT4_SB(sb)->s_journal &&
5321            ext4_should_journal_data(d_inode(path->dentry))) {
5322                /*
5323                 * We don't need to lock updates but journal_flush() could
5324                 * otherwise be livelocked...
5325                 */
5326                jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
5327                err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
5328                jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
5329                if (err)
5330                        return err;
5331        }
5332
5333        return dquot_quota_on(sb, type, format_id, path);
5334}
5335
5336static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
5337                             unsigned int flags)
5338{
5339        int err;
5340        struct inode *qf_inode;
5341        unsigned long qf_inums[EXT4_MAXQUOTAS] = {
5342                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
5343                le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
5344        };
5345
5346        BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA));
5347
5348        if (!qf_inums[type])
5349                return -EPERM;
5350
5351        qf_inode = ext4_iget(sb, qf_inums[type]);
5352        if (IS_ERR(qf_inode)) {
5353                ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
5354                return PTR_ERR(qf_inode);
5355        }
5356
5357        /* Don't account quota for quota files to avoid recursion */
5358        qf_inode->i_flags |= S_NOQUOTA;
5359        err = dquot_enable(qf_inode, type, format_id, flags);
5360        iput(qf_inode);
5361
5362        return err;
5363}
5364
5365/* Enable usage tracking for all quota types. */
5366static int ext4_enable_quotas(struct super_block *sb)
5367{
5368        int type, err = 0;
5369        unsigned long qf_inums[EXT4_MAXQUOTAS] = {
5370                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
5371                le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
5372        };
5373
5374        sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
5375        for (type = 0; type < EXT4_MAXQUOTAS; type++) {
5376                if (qf_inums[type]) {
5377                        err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
5378                                                DQUOT_USAGE_ENABLED);
5379                        if (err) {
5380                                ext4_warning(sb,
5381                                        "Failed to enable quota tracking "
5382                                        "(type=%d, err=%d). Please run "
5383                                        "e2fsck to fix.", type, err);
5384                                return err;
5385                        }
5386                }
5387        }
5388        return 0;
5389}
5390
5391static int ext4_quota_off(struct super_block *sb, int type)
5392{
5393        struct inode *inode = sb_dqopt(sb)->files[type];
5394        handle_t *handle;
5395
5396        /* Force all delayed allocation blocks to be allocated.
5397         * Caller already holds s_umount sem */
5398        if (test_opt(sb, DELALLOC))
5399                sync_filesystem(sb);
5400
5401        if (!inode)
5402                goto out;
5403
5404        /* Update modification times of quota files when userspace can
5405         * start looking at them */
5406        handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
5407        if (IS_ERR(handle))
5408                goto out;
5409        inode->i_mtime = inode->i_ctime = CURRENT_TIME;
5410        ext4_mark_inode_dirty(handle, inode);
5411        ext4_journal_stop(handle);
5412
5413out:
5414        return dquot_quota_off(sb, type);
5415}
5416
5417/* Read data from quotafile - avoid pagecache and such because we cannot afford
5418 * acquiring the locks... As quota files are never truncated and quota code
5419 * itself serializes the operations (and no one else should touch the files)
5420 * we don't have to be afraid of races */
5421static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
5422                               size_t len, loff_t off)
5423{
5424        struct inode *inode = sb_dqopt(sb)->files[type];
5425        ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
5426        int offset = off & (sb->s_blocksize - 1);
5427        int tocopy;
5428        size_t toread;
5429        struct buffer_head *bh;
5430        loff_t i_size = i_size_read(inode);
5431
5432        if (off > i_size)
5433                return 0;
5434        if (off+len > i_size)
5435                len = i_size-off;
5436        toread = len;
5437        while (toread > 0) {
5438                tocopy = sb->s_blocksize - offset < toread ?
5439                                sb->s_blocksize - offset : toread;
5440                bh = ext4_bread(NULL, inode, blk, 0);
5441                if (IS_ERR(bh))
5442                        return PTR_ERR(bh);
5443                if (!bh)        /* A hole? */
5444                        memset(data, 0, tocopy);
5445                else
5446                        memcpy(data, bh->b_data+offset, tocopy);
5447                brelse(bh);
5448                offset = 0;
5449                toread -= tocopy;
5450                data += tocopy;
5451                blk++;
5452        }
5453        return len;
5454}
5455
5456/* Write to quotafile (we know the transaction is already started and has
5457 * enough credits) */
5458static ssize_t ext4_quota_write(struct super_block *sb, int type,
5459                                const char *data, size_t len, loff_t off)
5460{
5461        struct inode *inode = sb_dqopt(sb)->files[type];
5462        ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
5463        int err, offset = off & (sb->s_blocksize - 1);
5464        int retries = 0;
5465        struct buffer_head *bh;
5466        handle_t *handle = journal_current_handle();
5467
5468        if (EXT4_SB(sb)->s_journal && !handle) {
5469                ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
5470                        " cancelled because transaction is not started",
5471                        (unsigned long long)off, (unsigned long long)len);
5472                return -EIO;
5473        }
5474        /*
5475         * Since we account only one data block in transaction credits,
5476         * then it is impossible to cross a block boundary.
5477         */
5478        if (sb->s_blocksize - offset < len) {
5479                ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
5480                        " cancelled because not block aligned",
5481                        (unsigned long long)off, (unsigned long long)len);
5482                return -EIO;
5483        }
5484
5485        do {
5486                bh = ext4_bread(handle, inode, blk,
5487                                EXT4_GET_BLOCKS_CREATE |
5488                                EXT4_GET_BLOCKS_METADATA_NOFAIL);
5489        } while (IS_ERR(bh) && (PTR_ERR(bh) == -ENOSPC) &&
5490                 ext4_should_retry_alloc(inode->i_sb, &retries));
5491        if (IS_ERR(bh))
5492                return PTR_ERR(bh);
5493        if (!bh)
5494                goto out;
5495        BUFFER_TRACE(bh, "get write access");
5496        err = ext4_journal_get_write_access(handle, bh);
5497        if (err) {
5498                brelse(bh);
5499                return err;
5500        }
5501        lock_buffer(bh);
5502        memcpy(bh->b_data+offset, data, len);
5503        flush_dcache_page(bh->b_page);
5504        unlock_buffer(bh);
5505        err = ext4_handle_dirty_metadata(handle, NULL, bh);
5506        brelse(bh);
5507out:
5508        if (inode->i_size < off + len) {
5509                i_size_write(inode, off + len);
5510                EXT4_I(inode)->i_disksize = inode->i_size;
5511                ext4_mark_inode_dirty(handle, inode);
5512        }
5513        return len;
5514}
5515
5516#endif
5517
5518static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
5519                       const char *dev_name, void *data)
5520{
5521        return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
5522}
5523
5524#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
5525static inline void register_as_ext2(void)
5526{
5527        int err = register_filesystem(&ext2_fs_type);
5528        if (err)
5529                printk(KERN_WARNING
5530                       "EXT4-fs: Unable to register as ext2 (%d)\n", err);
5531}
5532
5533static inline void unregister_as_ext2(void)
5534{
5535        unregister_filesystem(&ext2_fs_type);
5536}
5537
5538static inline int ext2_feature_set_ok(struct super_block *sb)
5539{
5540        if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP))
5541                return 0;
5542        if (sb->s_flags & MS_RDONLY)
5543                return 1;
5544        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))
5545                return 0;
5546        return 1;
5547}
5548#else
5549static inline void register_as_ext2(void) { }
5550static inline void unregister_as_ext2(void) { }
5551static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
5552#endif
5553
5554static inline void register_as_ext3(void)
5555{
5556        int err = register_filesystem(&ext3_fs_type);
5557        if (err)
5558                printk(KERN_WARNING
5559                       "EXT4-fs: Unable to register as ext3 (%d)\n", err);
5560}
5561
5562static inline void unregister_as_ext3(void)
5563{
5564        unregister_filesystem(&ext3_fs_type);
5565}
5566
5567static inline int ext3_feature_set_ok(struct super_block *sb)
5568{
5569        if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP))
5570                return 0;
5571        if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
5572                return 0;
5573        if (sb->s_flags & MS_RDONLY)
5574                return 1;
5575        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP))
5576                return 0;
5577        return 1;
5578}
5579
5580static struct file_system_type ext4_fs_type = {
5581        .owner          = THIS_MODULE,
5582        .name           = "ext4",
5583        .mount          = ext4_mount,
5584        .kill_sb        = kill_block_super,
5585        .fs_flags       = FS_REQUIRES_DEV,
5586};
5587MODULE_ALIAS_FS("ext4");
5588
5589static int __init ext4_init_feat_adverts(void)
5590{
5591        struct ext4_features *ef;
5592        int ret = -ENOMEM;
5593
5594        ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL);
5595        if (!ef)
5596                goto out;
5597
5598        ef->f_kobj.kset = ext4_kset;
5599        init_completion(&ef->f_kobj_unregister);
5600        ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL,
5601                                   "features");
5602        if (ret) {
5603                kfree(ef);
5604                goto out;
5605        }
5606
5607        ext4_feat = ef;
5608        ret = 0;
5609out:
5610        return ret;
5611}
5612
5613static void ext4_exit_feat_adverts(void)
5614{
5615        kobject_put(&ext4_feat->f_kobj);
5616        wait_for_completion(&ext4_feat->f_kobj_unregister);
5617        kfree(ext4_feat);
5618}
5619
5620/* Shared across all ext4 file systems */
5621wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
5622struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
5623
5624static int __init ext4_init_fs(void)
5625{
5626        int i, err;
5627
5628        ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
5629        ext4_li_info = NULL;
5630        mutex_init(&ext4_li_mtx);
5631
5632        /* Build-time check for flags consistency */
5633        ext4_check_flag_values();
5634
5635        for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
5636                mutex_init(&ext4__aio_mutex[i]);
5637                init_waitqueue_head(&ext4__ioend_wq[i]);
5638        }
5639
5640        err = ext4_init_es();
5641        if (err)
5642                return err;
5643
5644        err = ext4_init_pageio();
5645        if (err)
5646                goto out7;
5647
5648        err = ext4_init_system_zone();
5649        if (err)
5650                goto out6;
5651        ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
5652        if (!ext4_kset) {
5653                err = -ENOMEM;
5654                goto out5;
5655        }
5656        ext4_proc_root = proc_mkdir("fs/ext4", NULL);
5657
5658        err = ext4_init_feat_adverts();
5659        if (err)
5660                goto out4;
5661
5662        err = ext4_init_mballoc();
5663        if (err)
5664                goto out2;
5665        else
5666                ext4_mballoc_ready = 1;
5667        err = init_inodecache();
5668        if (err)
5669                goto out1;
5670        register_as_ext3();
5671        register_as_ext2();
5672        err = register_filesystem(&ext4_fs_type);
5673        if (err)
5674                goto out;
5675
5676        return 0;
5677out:
5678        unregister_as_ext2();
5679        unregister_as_ext3();
5680        destroy_inodecache();
5681out1:
5682        ext4_mballoc_ready = 0;
5683        ext4_exit_mballoc();
5684out2:
5685        ext4_exit_feat_adverts();
5686out4:
5687        if (ext4_proc_root)
5688                remove_proc_entry("fs/ext4", NULL);
5689        kset_unregister(ext4_kset);
5690out5:
5691        ext4_exit_system_zone();
5692out6:
5693        ext4_exit_pageio();
5694out7:
5695        ext4_exit_es();
5696
5697        return err;
5698}
5699
5700static void __exit ext4_exit_fs(void)
5701{
5702        ext4_exit_crypto();
5703        ext4_destroy_lazyinit_thread();
5704        unregister_as_ext2();
5705        unregister_as_ext3();
5706        unregister_filesystem(&ext4_fs_type);
5707        destroy_inodecache();
5708        ext4_exit_mballoc();
5709        ext4_exit_feat_adverts();
5710        remove_proc_entry("fs/ext4", NULL);
5711        kset_unregister(ext4_kset);
5712        ext4_exit_system_zone();
5713        ext4_exit_pageio();
5714        ext4_exit_es();
5715}
5716
5717MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
5718MODULE_DESCRIPTION("Fourth Extended Filesystem");
5719MODULE_LICENSE("GPL");
5720module_init(ext4_init_fs)
5721module_exit(ext4_exit_fs)
5722