linux/fs/btrfs/super.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2007 Oracle.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18
  19#include <linux/blkdev.h>
  20#include <linux/module.h>
  21#include <linux/buffer_head.h>
  22#include <linux/fs.h>
  23#include <linux/pagemap.h>
  24#include <linux/highmem.h>
  25#include <linux/time.h>
  26#include <linux/init.h>
  27#include <linux/seq_file.h>
  28#include <linux/string.h>
  29#include <linux/backing-dev.h>
  30#include <linux/mount.h>
  31#include <linux/mpage.h>
  32#include <linux/swap.h>
  33#include <linux/writeback.h>
  34#include <linux/statfs.h>
  35#include <linux/compat.h>
  36#include <linux/parser.h>
  37#include <linux/ctype.h>
  38#include <linux/namei.h>
  39#include <linux/miscdevice.h>
  40#include <linux/magic.h>
  41#include <linux/slab.h>
  42#include <linux/cleancache.h>
  43#include <linux/ratelimit.h>
  44#include <linux/btrfs.h>
  45#include "delayed-inode.h"
  46#include "ctree.h"
  47#include "disk-io.h"
  48#include "transaction.h"
  49#include "btrfs_inode.h"
  50#include "print-tree.h"
  51#include "hash.h"
  52#include "props.h"
  53#include "xattr.h"
  54#include "volumes.h"
  55#include "export.h"
  56#include "compression.h"
  57#include "rcu-string.h"
  58#include "dev-replace.h"
  59#include "free-space-cache.h"
  60#include "backref.h"
  61#include "tests/btrfs-tests.h"
  62
  63#include "qgroup.h"
  64#include "backref.h"
  65#define CREATE_TRACE_POINTS
  66#include <trace/events/btrfs.h>
  67
  68static const struct super_operations btrfs_super_ops;
  69static struct file_system_type btrfs_fs_type;
  70
  71static int btrfs_remount(struct super_block *sb, int *flags, char *data);
  72
  73const char *btrfs_decode_error(int errno)
  74{
  75        char *errstr = "unknown";
  76
  77        switch (errno) {
  78        case -EIO:
  79                errstr = "IO failure";
  80                break;
  81        case -ENOMEM:
  82                errstr = "Out of memory";
  83                break;
  84        case -EROFS:
  85                errstr = "Readonly filesystem";
  86                break;
  87        case -EEXIST:
  88                errstr = "Object already exists";
  89                break;
  90        case -ENOSPC:
  91                errstr = "No space left";
  92                break;
  93        case -ENOENT:
  94                errstr = "No such entry";
  95                break;
  96        }
  97
  98        return errstr;
  99}
 100
 101/* btrfs handle error by forcing the filesystem readonly */
 102static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
 103{
 104        struct super_block *sb = fs_info->sb;
 105
 106        if (sb_rdonly(sb))
 107                return;
 108
 109        if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
 110                sb->s_flags |= MS_RDONLY;
 111                btrfs_info(fs_info, "forced readonly");
 112                /*
 113                 * Note that a running device replace operation is not
 114                 * canceled here although there is no way to update
 115                 * the progress. It would add the risk of a deadlock,
 116                 * therefore the canceling is omitted. The only penalty
 117                 * is that some I/O remains active until the procedure
 118                 * completes. The next time when the filesystem is
 119                 * mounted writeable again, the device replace
 120                 * operation continues.
 121                 */
 122        }
 123}
 124
 125/*
 126 * __btrfs_handle_fs_error decodes expected errors from the caller and
 127 * invokes the approciate error response.
 128 */
 129__cold
 130void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
 131                       unsigned int line, int errno, const char *fmt, ...)
 132{
 133        struct super_block *sb = fs_info->sb;
 134#ifdef CONFIG_PRINTK
 135        const char *errstr;
 136#endif
 137
 138        /*
 139         * Special case: if the error is EROFS, and we're already
 140         * under MS_RDONLY, then it is safe here.
 141         */
 142        if (errno == -EROFS && sb_rdonly(sb))
 143                return;
 144
 145#ifdef CONFIG_PRINTK
 146        errstr = btrfs_decode_error(errno);
 147        if (fmt) {
 148                struct va_format vaf;
 149                va_list args;
 150
 151                va_start(args, fmt);
 152                vaf.fmt = fmt;
 153                vaf.va = &args;
 154
 155                pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
 156                        sb->s_id, function, line, errno, errstr, &vaf);
 157                va_end(args);
 158        } else {
 159                pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
 160                        sb->s_id, function, line, errno, errstr);
 161        }
 162#endif
 163
 164        /*
 165         * Today we only save the error info to memory.  Long term we'll
 166         * also send it down to the disk
 167         */
 168        set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
 169
 170        /* Don't go through full error handling during mount */
 171        if (sb->s_flags & MS_BORN)
 172                btrfs_handle_error(fs_info);
 173}
 174
 175#ifdef CONFIG_PRINTK
 176static const char * const logtypes[] = {
 177        "emergency",
 178        "alert",
 179        "critical",
 180        "error",
 181        "warning",
 182        "notice",
 183        "info",
 184        "debug",
 185};
 186
 187
 188/*
 189 * Use one ratelimit state per log level so that a flood of less important
 190 * messages doesn't cause more important ones to be dropped.
 191 */
 192static struct ratelimit_state printk_limits[] = {
 193        RATELIMIT_STATE_INIT(printk_limits[0], DEFAULT_RATELIMIT_INTERVAL, 100),
 194        RATELIMIT_STATE_INIT(printk_limits[1], DEFAULT_RATELIMIT_INTERVAL, 100),
 195        RATELIMIT_STATE_INIT(printk_limits[2], DEFAULT_RATELIMIT_INTERVAL, 100),
 196        RATELIMIT_STATE_INIT(printk_limits[3], DEFAULT_RATELIMIT_INTERVAL, 100),
 197        RATELIMIT_STATE_INIT(printk_limits[4], DEFAULT_RATELIMIT_INTERVAL, 100),
 198        RATELIMIT_STATE_INIT(printk_limits[5], DEFAULT_RATELIMIT_INTERVAL, 100),
 199        RATELIMIT_STATE_INIT(printk_limits[6], DEFAULT_RATELIMIT_INTERVAL, 100),
 200        RATELIMIT_STATE_INIT(printk_limits[7], DEFAULT_RATELIMIT_INTERVAL, 100),
 201};
 202
 203void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
 204{
 205        struct super_block *sb = fs_info->sb;
 206        char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0";
 207        struct va_format vaf;
 208        va_list args;
 209        int kern_level;
 210        const char *type = logtypes[4];
 211        struct ratelimit_state *ratelimit = &printk_limits[4];
 212
 213        va_start(args, fmt);
 214
 215        while ((kern_level = printk_get_level(fmt)) != 0) {
 216                size_t size = printk_skip_level(fmt) - fmt;
 217
 218                if (kern_level >= '0' && kern_level <= '7') {
 219                        memcpy(lvl, fmt,  size);
 220                        lvl[size] = '\0';
 221                        type = logtypes[kern_level - '0'];
 222                        ratelimit = &printk_limits[kern_level - '0'];
 223                }
 224                fmt += size;
 225        }
 226
 227        vaf.fmt = fmt;
 228        vaf.va = &args;
 229
 230        if (__ratelimit(ratelimit))
 231                printk("%sBTRFS %s (device %s): %pV\n", lvl, type, sb->s_id, &vaf);
 232
 233        va_end(args);
 234}
 235#endif
 236
 237/*
 238 * We only mark the transaction aborted and then set the file system read-only.
 239 * This will prevent new transactions from starting or trying to join this
 240 * one.
 241 *
 242 * This means that error recovery at the call site is limited to freeing
 243 * any local memory allocations and passing the error code up without
 244 * further cleanup. The transaction should complete as it normally would
 245 * in the call path but will return -EIO.
 246 *
 247 * We'll complete the cleanup in btrfs_end_transaction and
 248 * btrfs_commit_transaction.
 249 */
 250__cold
 251void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
 252                               const char *function,
 253                               unsigned int line, int errno)
 254{
 255        struct btrfs_fs_info *fs_info = trans->fs_info;
 256
 257        trans->aborted = errno;
 258        /* Nothing used. The other threads that have joined this
 259         * transaction may be able to continue. */
 260        if (!trans->dirty && list_empty(&trans->new_bgs)) {
 261                const char *errstr;
 262
 263                errstr = btrfs_decode_error(errno);
 264                btrfs_warn(fs_info,
 265                           "%s:%d: Aborting unused transaction(%s).",
 266                           function, line, errstr);
 267                return;
 268        }
 269        WRITE_ONCE(trans->transaction->aborted, errno);
 270        /* Wake up anybody who may be waiting on this transaction */
 271        wake_up(&fs_info->transaction_wait);
 272        wake_up(&fs_info->transaction_blocked_wait);
 273        __btrfs_handle_fs_error(fs_info, function, line, errno, NULL);
 274}
 275/*
 276 * __btrfs_panic decodes unexpected, fatal errors from the caller,
 277 * issues an alert, and either panics or BUGs, depending on mount options.
 278 */
 279__cold
 280void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
 281                   unsigned int line, int errno, const char *fmt, ...)
 282{
 283        char *s_id = "<unknown>";
 284        const char *errstr;
 285        struct va_format vaf = { .fmt = fmt };
 286        va_list args;
 287
 288        if (fs_info)
 289                s_id = fs_info->sb->s_id;
 290
 291        va_start(args, fmt);
 292        vaf.va = &args;
 293
 294        errstr = btrfs_decode_error(errno);
 295        if (fs_info && (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR))
 296                panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
 297                        s_id, function, line, &vaf, errno, errstr);
 298
 299        btrfs_crit(fs_info, "panic in %s:%d: %pV (errno=%d %s)",
 300                   function, line, &vaf, errno, errstr);
 301        va_end(args);
 302        /* Caller calls BUG() */
 303}
 304
 305static void btrfs_put_super(struct super_block *sb)
 306{
 307        close_ctree(btrfs_sb(sb));
 308}
 309
 310enum {
 311        Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum,
 312        Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
 313        Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
 314        Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
 315        Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
 316        Opt_space_cache, Opt_space_cache_version, Opt_clear_cache,
 317        Opt_user_subvol_rm_allowed, Opt_enospc_debug, Opt_subvolrootid,
 318        Opt_defrag, Opt_inode_cache, Opt_no_space_cache, Opt_recovery,
 319        Opt_skip_balance, Opt_check_integrity,
 320        Opt_check_integrity_including_extent_data,
 321        Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,
 322        Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
 323        Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
 324        Opt_datasum, Opt_treelog, Opt_noinode_cache, Opt_usebackuproot,
 325        Opt_nologreplay, Opt_norecovery,
 326#ifdef CONFIG_BTRFS_DEBUG
 327        Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
 328#endif
 329        Opt_err,
 330};
 331
 332static const match_table_t tokens = {
 333        {Opt_degraded, "degraded"},
 334        {Opt_subvol, "subvol=%s"},
 335        {Opt_subvolid, "subvolid=%s"},
 336        {Opt_device, "device=%s"},
 337        {Opt_nodatasum, "nodatasum"},
 338        {Opt_datasum, "datasum"},
 339        {Opt_nodatacow, "nodatacow"},
 340        {Opt_datacow, "datacow"},
 341        {Opt_nobarrier, "nobarrier"},
 342        {Opt_barrier, "barrier"},
 343        {Opt_max_inline, "max_inline=%s"},
 344        {Opt_alloc_start, "alloc_start=%s"},
 345        {Opt_thread_pool, "thread_pool=%d"},
 346        {Opt_compress, "compress"},
 347        {Opt_compress_type, "compress=%s"},
 348        {Opt_compress_force, "compress-force"},
 349        {Opt_compress_force_type, "compress-force=%s"},
 350        {Opt_ssd, "ssd"},
 351        {Opt_ssd_spread, "ssd_spread"},
 352        {Opt_nossd, "nossd"},
 353        {Opt_acl, "acl"},
 354        {Opt_noacl, "noacl"},
 355        {Opt_notreelog, "notreelog"},
 356        {Opt_treelog, "treelog"},
 357        {Opt_nologreplay, "nologreplay"},
 358        {Opt_norecovery, "norecovery"},
 359        {Opt_flushoncommit, "flushoncommit"},
 360        {Opt_noflushoncommit, "noflushoncommit"},
 361        {Opt_ratio, "metadata_ratio=%d"},
 362        {Opt_discard, "discard"},
 363        {Opt_nodiscard, "nodiscard"},
 364        {Opt_space_cache, "space_cache"},
 365        {Opt_space_cache_version, "space_cache=%s"},
 366        {Opt_clear_cache, "clear_cache"},
 367        {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
 368        {Opt_enospc_debug, "enospc_debug"},
 369        {Opt_noenospc_debug, "noenospc_debug"},
 370        {Opt_subvolrootid, "subvolrootid=%d"},
 371        {Opt_defrag, "autodefrag"},
 372        {Opt_nodefrag, "noautodefrag"},
 373        {Opt_inode_cache, "inode_cache"},
 374        {Opt_noinode_cache, "noinode_cache"},
 375        {Opt_no_space_cache, "nospace_cache"},
 376        {Opt_recovery, "recovery"}, /* deprecated */
 377        {Opt_usebackuproot, "usebackuproot"},
 378        {Opt_skip_balance, "skip_balance"},
 379        {Opt_check_integrity, "check_int"},
 380        {Opt_check_integrity_including_extent_data, "check_int_data"},
 381        {Opt_check_integrity_print_mask, "check_int_print_mask=%d"},
 382        {Opt_rescan_uuid_tree, "rescan_uuid_tree"},
 383        {Opt_fatal_errors, "fatal_errors=%s"},
 384        {Opt_commit_interval, "commit=%d"},
 385#ifdef CONFIG_BTRFS_DEBUG
 386        {Opt_fragment_data, "fragment=data"},
 387        {Opt_fragment_metadata, "fragment=metadata"},
 388        {Opt_fragment_all, "fragment=all"},
 389#endif
 390        {Opt_err, NULL},
 391};
 392
 393/*
 394 * Regular mount options parser.  Everything that is needed only when
 395 * reading in a new superblock is parsed here.
 396 * XXX JDM: This needs to be cleaned up for remount.
 397 */
 398int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 399                        unsigned long new_flags)
 400{
 401        substring_t args[MAX_OPT_ARGS];
 402        char *p, *num, *orig = NULL;
 403        u64 cache_gen;
 404        int intarg;
 405        int ret = 0;
 406        char *compress_type;
 407        bool compress_force = false;
 408        enum btrfs_compression_type saved_compress_type;
 409        bool saved_compress_force;
 410        int no_compress = 0;
 411
 412        cache_gen = btrfs_super_cache_generation(info->super_copy);
 413        if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
 414                btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE);
 415        else if (cache_gen)
 416                btrfs_set_opt(info->mount_opt, SPACE_CACHE);
 417
 418        /*
 419         * Even the options are empty, we still need to do extra check
 420         * against new flags
 421         */
 422        if (!options)
 423                goto check;
 424
 425        /*
 426         * strsep changes the string, duplicate it because parse_options
 427         * gets called twice
 428         */
 429        options = kstrdup(options, GFP_KERNEL);
 430        if (!options)
 431                return -ENOMEM;
 432
 433        orig = options;
 434
 435        while ((p = strsep(&options, ",")) != NULL) {
 436                int token;
 437                if (!*p)
 438                        continue;
 439
 440                token = match_token(p, tokens, args);
 441                switch (token) {
 442                case Opt_degraded:
 443                        btrfs_info(info, "allowing degraded mounts");
 444                        btrfs_set_opt(info->mount_opt, DEGRADED);
 445                        break;
 446                case Opt_subvol:
 447                case Opt_subvolid:
 448                case Opt_subvolrootid:
 449                case Opt_device:
 450                        /*
 451                         * These are parsed by btrfs_parse_early_options
 452                         * and can be happily ignored here.
 453                         */
 454                        break;
 455                case Opt_nodatasum:
 456                        btrfs_set_and_info(info, NODATASUM,
 457                                           "setting nodatasum");
 458                        break;
 459                case Opt_datasum:
 460                        if (btrfs_test_opt(info, NODATASUM)) {
 461                                if (btrfs_test_opt(info, NODATACOW))
 462                                        btrfs_info(info,
 463                                                   "setting datasum, datacow enabled");
 464                                else
 465                                        btrfs_info(info, "setting datasum");
 466                        }
 467                        btrfs_clear_opt(info->mount_opt, NODATACOW);
 468                        btrfs_clear_opt(info->mount_opt, NODATASUM);
 469                        break;
 470                case Opt_nodatacow:
 471                        if (!btrfs_test_opt(info, NODATACOW)) {
 472                                if (!btrfs_test_opt(info, COMPRESS) ||
 473                                    !btrfs_test_opt(info, FORCE_COMPRESS)) {
 474                                        btrfs_info(info,
 475                                                   "setting nodatacow, compression disabled");
 476                                } else {
 477                                        btrfs_info(info, "setting nodatacow");
 478                                }
 479                        }
 480                        btrfs_clear_opt(info->mount_opt, COMPRESS);
 481                        btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
 482                        btrfs_set_opt(info->mount_opt, NODATACOW);
 483                        btrfs_set_opt(info->mount_opt, NODATASUM);
 484                        break;
 485                case Opt_datacow:
 486                        btrfs_clear_and_info(info, NODATACOW,
 487                                             "setting datacow");
 488                        break;
 489                case Opt_compress_force:
 490                case Opt_compress_force_type:
 491                        compress_force = true;
 492                        /* Fallthrough */
 493                case Opt_compress:
 494                case Opt_compress_type:
 495                        saved_compress_type = btrfs_test_opt(info,
 496                                                             COMPRESS) ?
 497                                info->compress_type : BTRFS_COMPRESS_NONE;
 498                        saved_compress_force =
 499                                btrfs_test_opt(info, FORCE_COMPRESS);
 500                        if (token == Opt_compress ||
 501                            token == Opt_compress_force ||
 502                            strncmp(args[0].from, "zlib", 4) == 0) {
 503                                compress_type = "zlib";
 504                                info->compress_type = BTRFS_COMPRESS_ZLIB;
 505                                btrfs_set_opt(info->mount_opt, COMPRESS);
 506                                btrfs_clear_opt(info->mount_opt, NODATACOW);
 507                                btrfs_clear_opt(info->mount_opt, NODATASUM);
 508                                no_compress = 0;
 509                        } else if (strncmp(args[0].from, "lzo", 3) == 0) {
 510                                compress_type = "lzo";
 511                                info->compress_type = BTRFS_COMPRESS_LZO;
 512                                btrfs_set_opt(info->mount_opt, COMPRESS);
 513                                btrfs_clear_opt(info->mount_opt, NODATACOW);
 514                                btrfs_clear_opt(info->mount_opt, NODATASUM);
 515                                btrfs_set_fs_incompat(info, COMPRESS_LZO);
 516                                no_compress = 0;
 517                        } else if (strcmp(args[0].from, "zstd") == 0) {
 518                                compress_type = "zstd";
 519                                info->compress_type = BTRFS_COMPRESS_ZSTD;
 520                                btrfs_set_opt(info->mount_opt, COMPRESS);
 521                                btrfs_clear_opt(info->mount_opt, NODATACOW);
 522                                btrfs_clear_opt(info->mount_opt, NODATASUM);
 523                                btrfs_set_fs_incompat(info, COMPRESS_ZSTD);
 524                                no_compress = 0;
 525                        } else if (strncmp(args[0].from, "no", 2) == 0) {
 526                                compress_type = "no";
 527                                btrfs_clear_opt(info->mount_opt, COMPRESS);
 528                                btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
 529                                compress_force = false;
 530                                no_compress++;
 531                        } else {
 532                                ret = -EINVAL;
 533                                goto out;
 534                        }
 535
 536                        if (compress_force) {
 537                                btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
 538                        } else {
 539                                /*
 540                                 * If we remount from compress-force=xxx to
 541                                 * compress=xxx, we need clear FORCE_COMPRESS
 542                                 * flag, otherwise, there is no way for users
 543                                 * to disable forcible compression separately.
 544                                 */
 545                                btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
 546                        }
 547                        if ((btrfs_test_opt(info, COMPRESS) &&
 548                             (info->compress_type != saved_compress_type ||
 549                              compress_force != saved_compress_force)) ||
 550                            (!btrfs_test_opt(info, COMPRESS) &&
 551                             no_compress == 1)) {
 552                                btrfs_info(info, "%s %s compression",
 553                                           (compress_force) ? "force" : "use",
 554                                           compress_type);
 555                        }
 556                        compress_force = false;
 557                        break;
 558                case Opt_ssd:
 559                        btrfs_set_and_info(info, SSD,
 560                                           "enabling ssd optimizations");
 561                        btrfs_clear_opt(info->mount_opt, NOSSD);
 562                        break;
 563                case Opt_ssd_spread:
 564                        btrfs_set_and_info(info, SSD,
 565                                           "enabling ssd optimizations");
 566                        btrfs_set_and_info(info, SSD_SPREAD,
 567                                           "using spread ssd allocation scheme");
 568                        btrfs_clear_opt(info->mount_opt, NOSSD);
 569                        break;
 570                case Opt_nossd:
 571                        btrfs_set_opt(info->mount_opt, NOSSD);
 572                        btrfs_clear_and_info(info, SSD,
 573                                             "not using ssd optimizations");
 574                        btrfs_clear_and_info(info, SSD_SPREAD,
 575                                             "not using spread ssd allocation scheme");
 576                        break;
 577                case Opt_barrier:
 578                        btrfs_clear_and_info(info, NOBARRIER,
 579                                             "turning on barriers");
 580                        break;
 581                case Opt_nobarrier:
 582                        btrfs_set_and_info(info, NOBARRIER,
 583                                           "turning off barriers");
 584                        break;
 585                case Opt_thread_pool:
 586                        ret = match_int(&args[0], &intarg);
 587                        if (ret) {
 588                                goto out;
 589                        } else if (intarg > 0) {
 590                                info->thread_pool_size = intarg;
 591                        } else {
 592                                ret = -EINVAL;
 593                                goto out;
 594                        }
 595                        break;
 596                case Opt_max_inline:
 597                        num = match_strdup(&args[0]);
 598                        if (num) {
 599                                info->max_inline = memparse(num, NULL);
 600                                kfree(num);
 601
 602                                if (info->max_inline) {
 603                                        info->max_inline = min_t(u64,
 604                                                info->max_inline,
 605                                                info->sectorsize);
 606                                }
 607                                btrfs_info(info, "max_inline at %llu",
 608                                           info->max_inline);
 609                        } else {
 610                                ret = -ENOMEM;
 611                                goto out;
 612                        }
 613                        break;
 614                case Opt_alloc_start:
 615                        btrfs_info(info,
 616                                "option alloc_start is obsolete, ignored");
 617                        break;
 618                case Opt_acl:
 619#ifdef CONFIG_BTRFS_FS_POSIX_ACL
 620                        info->sb->s_flags |= MS_POSIXACL;
 621                        break;
 622#else
 623                        btrfs_err(info, "support for ACL not compiled in!");
 624                        ret = -EINVAL;
 625                        goto out;
 626#endif
 627                case Opt_noacl:
 628                        info->sb->s_flags &= ~MS_POSIXACL;
 629                        break;
 630                case Opt_notreelog:
 631                        btrfs_set_and_info(info, NOTREELOG,
 632                                           "disabling tree log");
 633                        break;
 634                case Opt_treelog:
 635                        btrfs_clear_and_info(info, NOTREELOG,
 636                                             "enabling tree log");
 637                        break;
 638                case Opt_norecovery:
 639                case Opt_nologreplay:
 640                        btrfs_set_and_info(info, NOLOGREPLAY,
 641                                           "disabling log replay at mount time");
 642                        break;
 643                case Opt_flushoncommit:
 644                        btrfs_set_and_info(info, FLUSHONCOMMIT,
 645                                           "turning on flush-on-commit");
 646                        break;
 647                case Opt_noflushoncommit:
 648                        btrfs_clear_and_info(info, FLUSHONCOMMIT,
 649                                             "turning off flush-on-commit");
 650                        break;
 651                case Opt_ratio:
 652                        ret = match_int(&args[0], &intarg);
 653                        if (ret) {
 654                                goto out;
 655                        } else if (intarg >= 0) {
 656                                info->metadata_ratio = intarg;
 657                                btrfs_info(info, "metadata ratio %d",
 658                                           info->metadata_ratio);
 659                        } else {
 660                                ret = -EINVAL;
 661                                goto out;
 662                        }
 663                        break;
 664                case Opt_discard:
 665                        btrfs_set_and_info(info, DISCARD,
 666                                           "turning on discard");
 667                        break;
 668                case Opt_nodiscard:
 669                        btrfs_clear_and_info(info, DISCARD,
 670                                             "turning off discard");
 671                        break;
 672                case Opt_space_cache:
 673                case Opt_space_cache_version:
 674                        if (token == Opt_space_cache ||
 675                            strcmp(args[0].from, "v1") == 0) {
 676                                btrfs_clear_opt(info->mount_opt,
 677                                                FREE_SPACE_TREE);
 678                                btrfs_set_and_info(info, SPACE_CACHE,
 679                                           "enabling disk space caching");
 680                        } else if (strcmp(args[0].from, "v2") == 0) {
 681                                btrfs_clear_opt(info->mount_opt,
 682                                                SPACE_CACHE);
 683                                btrfs_set_and_info(info, FREE_SPACE_TREE,
 684                                                   "enabling free space tree");
 685                        } else {
 686                                ret = -EINVAL;
 687                                goto out;
 688                        }
 689                        break;
 690                case Opt_rescan_uuid_tree:
 691                        btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
 692                        break;
 693                case Opt_no_space_cache:
 694                        if (btrfs_test_opt(info, SPACE_CACHE)) {
 695                                btrfs_clear_and_info(info, SPACE_CACHE,
 696                                             "disabling disk space caching");
 697                        }
 698                        if (btrfs_test_opt(info, FREE_SPACE_TREE)) {
 699                                btrfs_clear_and_info(info, FREE_SPACE_TREE,
 700                                             "disabling free space tree");
 701                        }
 702                        break;
 703                case Opt_inode_cache:
 704                        btrfs_set_pending_and_info(info, INODE_MAP_CACHE,
 705                                           "enabling inode map caching");
 706                        break;
 707                case Opt_noinode_cache:
 708                        btrfs_clear_pending_and_info(info, INODE_MAP_CACHE,
 709                                             "disabling inode map caching");
 710                        break;
 711                case Opt_clear_cache:
 712                        btrfs_set_and_info(info, CLEAR_CACHE,
 713                                           "force clearing of disk cache");
 714                        break;
 715                case Opt_user_subvol_rm_allowed:
 716                        btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
 717                        break;
 718                case Opt_enospc_debug:
 719                        btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
 720                        break;
 721                case Opt_noenospc_debug:
 722                        btrfs_clear_opt(info->mount_opt, ENOSPC_DEBUG);
 723                        break;
 724                case Opt_defrag:
 725                        btrfs_set_and_info(info, AUTO_DEFRAG,
 726                                           "enabling auto defrag");
 727                        break;
 728                case Opt_nodefrag:
 729                        btrfs_clear_and_info(info, AUTO_DEFRAG,
 730                                             "disabling auto defrag");
 731                        break;
 732                case Opt_recovery:
 733                        btrfs_warn(info,
 734                                   "'recovery' is deprecated, use 'usebackuproot' instead");
 735                case Opt_usebackuproot:
 736                        btrfs_info(info,
 737                                   "trying to use backup root at mount time");
 738                        btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
 739                        break;
 740                case Opt_skip_balance:
 741                        btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
 742                        break;
 743#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
 744                case Opt_check_integrity_including_extent_data:
 745                        btrfs_info(info,
 746                                   "enabling check integrity including extent data");
 747                        btrfs_set_opt(info->mount_opt,
 748                                      CHECK_INTEGRITY_INCLUDING_EXTENT_DATA);
 749                        btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
 750                        break;
 751                case Opt_check_integrity:
 752                        btrfs_info(info, "enabling check integrity");
 753                        btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
 754                        break;
 755                case Opt_check_integrity_print_mask:
 756                        ret = match_int(&args[0], &intarg);
 757                        if (ret) {
 758                                goto out;
 759                        } else if (intarg >= 0) {
 760                                info->check_integrity_print_mask = intarg;
 761                                btrfs_info(info,
 762                                           "check_integrity_print_mask 0x%x",
 763                                           info->check_integrity_print_mask);
 764                        } else {
 765                                ret = -EINVAL;
 766                                goto out;
 767                        }
 768                        break;
 769#else
 770                case Opt_check_integrity_including_extent_data:
 771                case Opt_check_integrity:
 772                case Opt_check_integrity_print_mask:
 773                        btrfs_err(info,
 774                                  "support for check_integrity* not compiled in!");
 775                        ret = -EINVAL;
 776                        goto out;
 777#endif
 778                case Opt_fatal_errors:
 779                        if (strcmp(args[0].from, "panic") == 0)
 780                                btrfs_set_opt(info->mount_opt,
 781                                              PANIC_ON_FATAL_ERROR);
 782                        else if (strcmp(args[0].from, "bug") == 0)
 783                                btrfs_clear_opt(info->mount_opt,
 784                                              PANIC_ON_FATAL_ERROR);
 785                        else {
 786                                ret = -EINVAL;
 787                                goto out;
 788                        }
 789                        break;
 790                case Opt_commit_interval:
 791                        intarg = 0;
 792                        ret = match_int(&args[0], &intarg);
 793                        if (ret < 0) {
 794                                btrfs_err(info, "invalid commit interval");
 795                                ret = -EINVAL;
 796                                goto out;
 797                        }
 798                        if (intarg > 0) {
 799                                if (intarg > 300) {
 800                                        btrfs_warn(info,
 801                                                "excessive commit interval %d",
 802                                                intarg);
 803                                }
 804                                info->commit_interval = intarg;
 805                        } else {
 806                                btrfs_info(info,
 807                                           "using default commit interval %ds",
 808                                           BTRFS_DEFAULT_COMMIT_INTERVAL);
 809                                info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
 810                        }
 811                        break;
 812#ifdef CONFIG_BTRFS_DEBUG
 813                case Opt_fragment_all:
 814                        btrfs_info(info, "fragmenting all space");
 815                        btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
 816                        btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA);
 817                        break;
 818                case Opt_fragment_metadata:
 819                        btrfs_info(info, "fragmenting metadata");
 820                        btrfs_set_opt(info->mount_opt,
 821                                      FRAGMENT_METADATA);
 822                        break;
 823                case Opt_fragment_data:
 824                        btrfs_info(info, "fragmenting data");
 825                        btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
 826                        break;
 827#endif
 828                case Opt_err:
 829                        btrfs_info(info, "unrecognized mount option '%s'", p);
 830                        ret = -EINVAL;
 831                        goto out;
 832                default:
 833                        break;
 834                }
 835        }
 836check:
 837        /*
 838         * Extra check for current option against current flag
 839         */
 840        if (btrfs_test_opt(info, NOLOGREPLAY) && !(new_flags & MS_RDONLY)) {
 841                btrfs_err(info,
 842                          "nologreplay must be used with ro mount option");
 843                ret = -EINVAL;
 844        }
 845out:
 846        if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) &&
 847            !btrfs_test_opt(info, FREE_SPACE_TREE) &&
 848            !btrfs_test_opt(info, CLEAR_CACHE)) {
 849                btrfs_err(info, "cannot disable free space tree");
 850                ret = -EINVAL;
 851
 852        }
 853        if (!ret && btrfs_test_opt(info, SPACE_CACHE))
 854                btrfs_info(info, "disk space caching is enabled");
 855        if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE))
 856                btrfs_info(info, "using free space tree");
 857        kfree(orig);
 858        return ret;
 859}
 860
 861/*
 862 * Parse mount options that are required early in the mount process.
 863 *
 864 * All other options will be parsed on much later in the mount process and
 865 * only when we need to allocate a new super block.
 866 */
 867static int btrfs_parse_early_options(const char *options, fmode_t flags,
 868                void *holder, char **subvol_name, u64 *subvol_objectid,
 869                struct btrfs_fs_devices **fs_devices)
 870{
 871        substring_t args[MAX_OPT_ARGS];
 872        char *device_name, *opts, *orig, *p;
 873        char *num = NULL;
 874        int error = 0;
 875
 876        if (!options)
 877                return 0;
 878
 879        /*
 880         * strsep changes the string, duplicate it because parse_options
 881         * gets called twice
 882         */
 883        opts = kstrdup(options, GFP_KERNEL);
 884        if (!opts)
 885                return -ENOMEM;
 886        orig = opts;
 887
 888        while ((p = strsep(&opts, ",")) != NULL) {
 889                int token;
 890                if (!*p)
 891                        continue;
 892
 893                token = match_token(p, tokens, args);
 894                switch (token) {
 895                case Opt_subvol:
 896                        kfree(*subvol_name);
 897                        *subvol_name = match_strdup(&args[0]);
 898                        if (!*subvol_name) {
 899                                error = -ENOMEM;
 900                                goto out;
 901                        }
 902                        break;
 903                case Opt_subvolid:
 904                        num = match_strdup(&args[0]);
 905                        if (num) {
 906                                *subvol_objectid = memparse(num, NULL);
 907                                kfree(num);
 908                                /* we want the original fs_tree */
 909                                if (!*subvol_objectid)
 910                                        *subvol_objectid =
 911                                                BTRFS_FS_TREE_OBJECTID;
 912                        } else {
 913                                error = -EINVAL;
 914                                goto out;
 915                        }
 916                        break;
 917                case Opt_subvolrootid:
 918                        pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n");
 919                        break;
 920                case Opt_device:
 921                        device_name = match_strdup(&args[0]);
 922                        if (!device_name) {
 923                                error = -ENOMEM;
 924                                goto out;
 925                        }
 926                        error = btrfs_scan_one_device(device_name,
 927                                        flags, holder, fs_devices);
 928                        kfree(device_name);
 929                        if (error)
 930                                goto out;
 931                        break;
 932                default:
 933                        break;
 934                }
 935        }
 936
 937out:
 938        kfree(orig);
 939        return error;
 940}
 941
 942static char *get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
 943                                           u64 subvol_objectid)
 944{
 945        struct btrfs_root *root = fs_info->tree_root;
 946        struct btrfs_root *fs_root;
 947        struct btrfs_root_ref *root_ref;
 948        struct btrfs_inode_ref *inode_ref;
 949        struct btrfs_key key;
 950        struct btrfs_path *path = NULL;
 951        char *name = NULL, *ptr;
 952        u64 dirid;
 953        int len;
 954        int ret;
 955
 956        path = btrfs_alloc_path();
 957        if (!path) {
 958                ret = -ENOMEM;
 959                goto err;
 960        }
 961        path->leave_spinning = 1;
 962
 963        name = kmalloc(PATH_MAX, GFP_KERNEL);
 964        if (!name) {
 965                ret = -ENOMEM;
 966                goto err;
 967        }
 968        ptr = name + PATH_MAX - 1;
 969        ptr[0] = '\0';
 970
 971        /*
 972         * Walk up the subvolume trees in the tree of tree roots by root
 973         * backrefs until we hit the top-level subvolume.
 974         */
 975        while (subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
 976                key.objectid = subvol_objectid;
 977                key.type = BTRFS_ROOT_BACKREF_KEY;
 978                key.offset = (u64)-1;
 979
 980                ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 981                if (ret < 0) {
 982                        goto err;
 983                } else if (ret > 0) {
 984                        ret = btrfs_previous_item(root, path, subvol_objectid,
 985                                                  BTRFS_ROOT_BACKREF_KEY);
 986                        if (ret < 0) {
 987                                goto err;
 988                        } else if (ret > 0) {
 989                                ret = -ENOENT;
 990                                goto err;
 991                        }
 992                }
 993
 994                btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 995                subvol_objectid = key.offset;
 996
 997                root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
 998                                          struct btrfs_root_ref);
 999                len = btrfs_root_ref_name_len(path->nodes[0], root_ref);
1000                ptr -= len + 1;
1001                if (ptr < name) {
1002                        ret = -ENAMETOOLONG;
1003                        goto err;
1004                }
1005                read_extent_buffer(path->nodes[0], ptr + 1,
1006                                   (unsigned long)(root_ref + 1), len);
1007                ptr[0] = '/';
1008                dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref);
1009                btrfs_release_path(path);
1010
1011                key.objectid = subvol_objectid;
1012                key.type = BTRFS_ROOT_ITEM_KEY;
1013                key.offset = (u64)-1;
1014                fs_root = btrfs_read_fs_root_no_name(fs_info, &key);
1015                if (IS_ERR(fs_root)) {
1016                        ret = PTR_ERR(fs_root);
1017                        goto err;
1018                }
1019
1020                /*
1021                 * Walk up the filesystem tree by inode refs until we hit the
1022                 * root directory.
1023                 */
1024                while (dirid != BTRFS_FIRST_FREE_OBJECTID) {
1025                        key.objectid = dirid;
1026                        key.type = BTRFS_INODE_REF_KEY;
1027                        key.offset = (u64)-1;
1028
1029                        ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
1030                        if (ret < 0) {
1031                                goto err;
1032                        } else if (ret > 0) {
1033                                ret = btrfs_previous_item(fs_root, path, dirid,
1034                                                          BTRFS_INODE_REF_KEY);
1035                                if (ret < 0) {
1036                                        goto err;
1037                                } else if (ret > 0) {
1038                                        ret = -ENOENT;
1039                                        goto err;
1040                                }
1041                        }
1042
1043                        btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1044                        dirid = key.offset;
1045
1046                        inode_ref = btrfs_item_ptr(path->nodes[0],
1047                                                   path->slots[0],
1048                                                   struct btrfs_inode_ref);
1049                        len = btrfs_inode_ref_name_len(path->nodes[0],
1050                                                       inode_ref);
1051                        ptr -= len + 1;
1052                        if (ptr < name) {
1053                                ret = -ENAMETOOLONG;
1054                                goto err;
1055                        }
1056                        read_extent_buffer(path->nodes[0], ptr + 1,
1057                                           (unsigned long)(inode_ref + 1), len);
1058                        ptr[0] = '/';
1059                        btrfs_release_path(path);
1060                }
1061        }
1062
1063        btrfs_free_path(path);
1064        if (ptr == name + PATH_MAX - 1) {
1065                name[0] = '/';
1066                name[1] = '\0';
1067        } else {
1068                memmove(name, ptr, name + PATH_MAX - ptr);
1069        }
1070        return name;
1071
1072err:
1073        btrfs_free_path(path);
1074        kfree(name);
1075        return ERR_PTR(ret);
1076}
1077
1078static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objectid)
1079{
1080        struct btrfs_root *root = fs_info->tree_root;
1081        struct btrfs_dir_item *di;
1082        struct btrfs_path *path;
1083        struct btrfs_key location;
1084        u64 dir_id;
1085
1086        path = btrfs_alloc_path();
1087        if (!path)
1088                return -ENOMEM;
1089        path->leave_spinning = 1;
1090
1091        /*
1092         * Find the "default" dir item which points to the root item that we
1093         * will mount by default if we haven't been given a specific subvolume
1094         * to mount.
1095         */
1096        dir_id = btrfs_super_root_dir(fs_info->super_copy);
1097        di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
1098        if (IS_ERR(di)) {
1099                btrfs_free_path(path);
1100                return PTR_ERR(di);
1101        }
1102        if (!di) {
1103                /*
1104                 * Ok the default dir item isn't there.  This is weird since
1105                 * it's always been there, but don't freak out, just try and
1106                 * mount the top-level subvolume.
1107                 */
1108                btrfs_free_path(path);
1109                *objectid = BTRFS_FS_TREE_OBJECTID;
1110                return 0;
1111        }
1112
1113        btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
1114        btrfs_free_path(path);
1115        *objectid = location.objectid;
1116        return 0;
1117}
1118
1119static int btrfs_fill_super(struct super_block *sb,
1120                            struct btrfs_fs_devices *fs_devices,
1121                            void *data)
1122{
1123        struct inode *inode;
1124        struct btrfs_fs_info *fs_info = btrfs_sb(sb);
1125        struct btrfs_key key;
1126        int err;
1127
1128        sb->s_maxbytes = MAX_LFS_FILESIZE;
1129        sb->s_magic = BTRFS_SUPER_MAGIC;
1130        sb->s_op = &btrfs_super_ops;
1131        sb->s_d_op = &btrfs_dentry_operations;
1132        sb->s_export_op = &btrfs_export_ops;
1133        sb->s_xattr = btrfs_xattr_handlers;
1134        sb->s_time_gran = 1;
1135#ifdef CONFIG_BTRFS_FS_POSIX_ACL
1136        sb->s_flags |= MS_POSIXACL;
1137#endif
1138        sb->s_flags |= SB_I_VERSION;
1139        sb->s_iflags |= SB_I_CGROUPWB;
1140
1141        err = super_setup_bdi(sb);
1142        if (err) {
1143                btrfs_err(fs_info, "super_setup_bdi failed");
1144                return err;
1145        }
1146
1147        err = open_ctree(sb, fs_devices, (char *)data);
1148        if (err) {
1149                btrfs_err(fs_info, "open_ctree failed");
1150                return err;
1151        }
1152
1153        key.objectid = BTRFS_FIRST_FREE_OBJECTID;
1154        key.type = BTRFS_INODE_ITEM_KEY;
1155        key.offset = 0;
1156        inode = btrfs_iget(sb, &key, fs_info->fs_root, NULL);
1157        if (IS_ERR(inode)) {
1158                err = PTR_ERR(inode);
1159                goto fail_close;
1160        }
1161
1162        sb->s_root = d_make_root(inode);
1163        if (!sb->s_root) {
1164                err = -ENOMEM;
1165                goto fail_close;
1166        }
1167
1168        cleancache_init_fs(sb);
1169        sb->s_flags |= MS_ACTIVE;
1170        return 0;
1171
1172fail_close:
1173        close_ctree(fs_info);
1174        return err;
1175}
1176
1177int btrfs_sync_fs(struct super_block *sb, int wait)
1178{
1179        struct btrfs_trans_handle *trans;
1180        struct btrfs_fs_info *fs_info = btrfs_sb(sb);
1181        struct btrfs_root *root = fs_info->tree_root;
1182
1183        trace_btrfs_sync_fs(fs_info, wait);
1184
1185        if (!wait) {
1186                filemap_flush(fs_info->btree_inode->i_mapping);
1187                return 0;
1188        }
1189
1190        btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
1191
1192        trans = btrfs_attach_transaction_barrier(root);
1193        if (IS_ERR(trans)) {
1194                /* no transaction, don't bother */
1195                if (PTR_ERR(trans) == -ENOENT) {
1196                        /*
1197                         * Exit unless we have some pending changes
1198                         * that need to go through commit
1199                         */
1200                        if (fs_info->pending_changes == 0)
1201                                return 0;
1202                        /*
1203                         * A non-blocking test if the fs is frozen. We must not
1204                         * start a new transaction here otherwise a deadlock
1205                         * happens. The pending operations are delayed to the
1206                         * next commit after thawing.
1207                         */
1208                        if (__sb_start_write(sb, SB_FREEZE_WRITE, false))
1209                                __sb_end_write(sb, SB_FREEZE_WRITE);
1210                        else
1211                                return 0;
1212                        trans = btrfs_start_transaction(root, 0);
1213                }
1214                if (IS_ERR(trans))
1215                        return PTR_ERR(trans);
1216        }
1217        return btrfs_commit_transaction(trans);
1218}
1219
1220static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
1221{
1222        struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
1223        char *compress_type;
1224
1225        if (btrfs_test_opt(info, DEGRADED))
1226                seq_puts(seq, ",degraded");
1227        if (btrfs_test_opt(info, NODATASUM))
1228                seq_puts(seq, ",nodatasum");
1229        if (btrfs_test_opt(info, NODATACOW))
1230                seq_puts(seq, ",nodatacow");
1231        if (btrfs_test_opt(info, NOBARRIER))
1232                seq_puts(seq, ",nobarrier");
1233        if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE)
1234                seq_printf(seq, ",max_inline=%llu", info->max_inline);
1235        if (info->thread_pool_size !=  min_t(unsigned long,
1236                                             num_online_cpus() + 2, 8))
1237                seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
1238        if (btrfs_test_opt(info, COMPRESS)) {
1239                if (info->compress_type == BTRFS_COMPRESS_ZLIB)
1240                        compress_type = "zlib";
1241                else if (info->compress_type == BTRFS_COMPRESS_LZO)
1242                        compress_type = "lzo";
1243                else
1244                        compress_type = "zstd";
1245                if (btrfs_test_opt(info, FORCE_COMPRESS))
1246                        seq_printf(seq, ",compress-force=%s", compress_type);
1247                else
1248                        seq_printf(seq, ",compress=%s", compress_type);
1249        }
1250        if (btrfs_test_opt(info, NOSSD))
1251                seq_puts(seq, ",nossd");
1252        if (btrfs_test_opt(info, SSD_SPREAD))
1253                seq_puts(seq, ",ssd_spread");
1254        else if (btrfs_test_opt(info, SSD))
1255                seq_puts(seq, ",ssd");
1256        if (btrfs_test_opt(info, NOTREELOG))
1257                seq_puts(seq, ",notreelog");
1258        if (btrfs_test_opt(info, NOLOGREPLAY))
1259                seq_puts(seq, ",nologreplay");
1260        if (btrfs_test_opt(info, FLUSHONCOMMIT))
1261                seq_puts(seq, ",flushoncommit");
1262        if (btrfs_test_opt(info, DISCARD))
1263                seq_puts(seq, ",discard");
1264        if (!(info->sb->s_flags & MS_POSIXACL))
1265                seq_puts(seq, ",noacl");
1266        if (btrfs_test_opt(info, SPACE_CACHE))
1267                seq_puts(seq, ",space_cache");
1268        else if (btrfs_test_opt(info, FREE_SPACE_TREE))
1269                seq_puts(seq, ",space_cache=v2");
1270        else
1271                seq_puts(seq, ",nospace_cache");
1272        if (btrfs_test_opt(info, RESCAN_UUID_TREE))
1273                seq_puts(seq, ",rescan_uuid_tree");
1274        if (btrfs_test_opt(info, CLEAR_CACHE))
1275                seq_puts(seq, ",clear_cache");
1276        if (btrfs_test_opt(info, USER_SUBVOL_RM_ALLOWED))
1277                seq_puts(seq, ",user_subvol_rm_allowed");
1278        if (btrfs_test_opt(info, ENOSPC_DEBUG))
1279                seq_puts(seq, ",enospc_debug");
1280        if (btrfs_test_opt(info, AUTO_DEFRAG))
1281                seq_puts(seq, ",autodefrag");
1282        if (btrfs_test_opt(info, INODE_MAP_CACHE))
1283                seq_puts(seq, ",inode_cache");
1284        if (btrfs_test_opt(info, SKIP_BALANCE))
1285                seq_puts(seq, ",skip_balance");
1286#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
1287        if (btrfs_test_opt(info, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA))
1288                seq_puts(seq, ",check_int_data");
1289        else if (btrfs_test_opt(info, CHECK_INTEGRITY))
1290                seq_puts(seq, ",check_int");
1291        if (info->check_integrity_print_mask)
1292                seq_printf(seq, ",check_int_print_mask=%d",
1293                                info->check_integrity_print_mask);
1294#endif
1295        if (info->metadata_ratio)
1296                seq_printf(seq, ",metadata_ratio=%d",
1297                                info->metadata_ratio);
1298        if (btrfs_test_opt(info, PANIC_ON_FATAL_ERROR))
1299                seq_puts(seq, ",fatal_errors=panic");
1300        if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
1301                seq_printf(seq, ",commit=%d", info->commit_interval);
1302#ifdef CONFIG_BTRFS_DEBUG
1303        if (btrfs_test_opt(info, FRAGMENT_DATA))
1304                seq_puts(seq, ",fragment=data");
1305        if (btrfs_test_opt(info, FRAGMENT_METADATA))
1306                seq_puts(seq, ",fragment=metadata");
1307#endif
1308        seq_printf(seq, ",subvolid=%llu",
1309                  BTRFS_I(d_inode(dentry))->root->root_key.objectid);
1310        seq_puts(seq, ",subvol=");
1311        seq_dentry(seq, dentry, " \t\n\\");
1312        return 0;
1313}
1314
1315static int btrfs_test_super(struct super_block *s, void *data)
1316{
1317        struct btrfs_fs_info *p = data;
1318        struct btrfs_fs_info *fs_info = btrfs_sb(s);
1319
1320        return fs_info->fs_devices == p->fs_devices;
1321}
1322
1323static int btrfs_set_super(struct super_block *s, void *data)
1324{
1325        int err = set_anon_super(s, data);
1326        if (!err)
1327                s->s_fs_info = data;
1328        return err;
1329}
1330
1331/*
1332 * subvolumes are identified by ino 256
1333 */
1334static inline int is_subvolume_inode(struct inode *inode)
1335{
1336        if (inode && inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
1337                return 1;
1338        return 0;
1339}
1340
1341/*
1342 * This will add subvolid=0 to the argument string while removing any subvol=
1343 * and subvolid= arguments to make sure we get the top-level root for path
1344 * walking to the subvol we want.
1345 */
1346static char *setup_root_args(char *args)
1347{
1348        char *buf, *dst, *sep;
1349
1350        if (!args)
1351                return kstrdup("subvolid=0", GFP_KERNEL);
1352
1353        /* The worst case is that we add ",subvolid=0" to the end. */
1354        buf = dst = kmalloc(strlen(args) + strlen(",subvolid=0") + 1,
1355                        GFP_KERNEL);
1356        if (!buf)
1357                return NULL;
1358
1359        while (1) {
1360                sep = strchrnul(args, ',');
1361                if (!strstarts(args, "subvol=") &&
1362                    !strstarts(args, "subvolid=")) {
1363                        memcpy(dst, args, sep - args);
1364                        dst += sep - args;
1365                        *dst++ = ',';
1366                }
1367                if (*sep)
1368                        args = sep + 1;
1369                else
1370                        break;
1371        }
1372        strcpy(dst, "subvolid=0");
1373
1374        return buf;
1375}
1376
1377static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
1378                                   int flags, const char *device_name,
1379                                   char *data)
1380{
1381        struct dentry *root;
1382        struct vfsmount *mnt = NULL;
1383        char *newargs;
1384        int ret;
1385
1386        newargs = setup_root_args(data);
1387        if (!newargs) {
1388                root = ERR_PTR(-ENOMEM);
1389                goto out;
1390        }
1391
1392        mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs);
1393        if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) {
1394                if (flags & MS_RDONLY) {
1395                        mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY,
1396                                             device_name, newargs);
1397                } else {
1398                        mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY,
1399                                             device_name, newargs);
1400                        if (IS_ERR(mnt)) {
1401                                root = ERR_CAST(mnt);
1402                                mnt = NULL;
1403                                goto out;
1404                        }
1405
1406                        down_write(&mnt->mnt_sb->s_umount);
1407                        ret = btrfs_remount(mnt->mnt_sb, &flags, NULL);
1408                        up_write(&mnt->mnt_sb->s_umount);
1409                        if (ret < 0) {
1410                                root = ERR_PTR(ret);
1411                                goto out;
1412                        }
1413                }
1414        }
1415        if (IS_ERR(mnt)) {
1416                root = ERR_CAST(mnt);
1417                mnt = NULL;
1418                goto out;
1419        }
1420
1421        if (!subvol_name) {
1422                if (!subvol_objectid) {
1423                        ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb),
1424                                                          &subvol_objectid);
1425                        if (ret) {
1426                                root = ERR_PTR(ret);
1427                                goto out;
1428                        }
1429                }
1430                subvol_name = get_subvol_name_from_objectid(btrfs_sb(mnt->mnt_sb),
1431                                                            subvol_objectid);
1432                if (IS_ERR(subvol_name)) {
1433                        root = ERR_CAST(subvol_name);
1434                        subvol_name = NULL;
1435                        goto out;
1436                }
1437
1438        }
1439
1440        root = mount_subtree(mnt, subvol_name);
1441        /* mount_subtree() drops our reference on the vfsmount. */
1442        mnt = NULL;
1443
1444        if (!IS_ERR(root)) {
1445                struct super_block *s = root->d_sb;
1446                struct btrfs_fs_info *fs_info = btrfs_sb(s);
1447                struct inode *root_inode = d_inode(root);
1448                u64 root_objectid = BTRFS_I(root_inode)->root->root_key.objectid;
1449
1450                ret = 0;
1451                if (!is_subvolume_inode(root_inode)) {
1452                        btrfs_err(fs_info, "'%s' is not a valid subvolume",
1453                               subvol_name);
1454                        ret = -EINVAL;
1455                }
1456                if (subvol_objectid && root_objectid != subvol_objectid) {
1457                        /*
1458                         * This will also catch a race condition where a
1459                         * subvolume which was passed by ID is renamed and
1460                         * another subvolume is renamed over the old location.
1461                         */
1462                        btrfs_err(fs_info,
1463                                  "subvol '%s' does not match subvolid %llu",
1464                                  subvol_name, subvol_objectid);
1465                        ret = -EINVAL;
1466                }
1467                if (ret) {
1468                        dput(root);
1469                        root = ERR_PTR(ret);
1470                        deactivate_locked_super(s);
1471                }
1472        }
1473
1474out:
1475        mntput(mnt);
1476        kfree(newargs);
1477        kfree(subvol_name);
1478        return root;
1479}
1480
1481static int parse_security_options(char *orig_opts,
1482                                  struct security_mnt_opts *sec_opts)
1483{
1484        char *secdata = NULL;
1485        int ret = 0;
1486
1487        secdata = alloc_secdata();
1488        if (!secdata)
1489                return -ENOMEM;
1490        ret = security_sb_copy_data(orig_opts, secdata);
1491        if (ret) {
1492                free_secdata(secdata);
1493                return ret;
1494        }
1495        ret = security_sb_parse_opts_str(secdata, sec_opts);
1496        free_secdata(secdata);
1497        return ret;
1498}
1499
1500static int setup_security_options(struct btrfs_fs_info *fs_info,
1501                                  struct super_block *sb,
1502                                  struct security_mnt_opts *sec_opts)
1503{
1504        int ret = 0;
1505
1506        /*
1507         * Call security_sb_set_mnt_opts() to check whether new sec_opts
1508         * is valid.
1509         */
1510        ret = security_sb_set_mnt_opts(sb, sec_opts, 0, NULL);
1511        if (ret)
1512                return ret;
1513
1514#ifdef CONFIG_SECURITY
1515        if (!fs_info->security_opts.num_mnt_opts) {
1516                /* first time security setup, copy sec_opts to fs_info */
1517                memcpy(&fs_info->security_opts, sec_opts, sizeof(*sec_opts));
1518        } else {
1519                /*
1520                 * Since SELinux (the only one supporting security_mnt_opts)
1521                 * does NOT support changing context during remount/mount of
1522                 * the same sb, this must be the same or part of the same
1523                 * security options, just free it.
1524                 */
1525                security_free_mnt_opts(sec_opts);
1526        }
1527#endif
1528        return ret;
1529}
1530
1531/*
1532 * Find a superblock for the given device / mount point.
1533 *
1534 * Note:  This is based on get_sb_bdev from fs/super.c with a few additions
1535 *        for multiple device setup.  Make sure to keep it in sync.
1536 */
1537static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1538                const char *device_name, void *data)
1539{
1540        struct block_device *bdev = NULL;
1541        struct super_block *s;
1542        struct btrfs_fs_devices *fs_devices = NULL;
1543        struct btrfs_fs_info *fs_info = NULL;
1544        struct security_mnt_opts new_sec_opts;
1545        fmode_t mode = FMODE_READ;
1546        char *subvol_name = NULL;
1547        u64 subvol_objectid = 0;
1548        int error = 0;
1549
1550        if (!(flags & MS_RDONLY))
1551                mode |= FMODE_WRITE;
1552
1553        error = btrfs_parse_early_options(data, mode, fs_type,
1554                                          &subvol_name, &subvol_objectid,
1555                                          &fs_devices);
1556        if (error) {
1557                kfree(subvol_name);
1558                return ERR_PTR(error);
1559        }
1560
1561        if (subvol_name || subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
1562                /* mount_subvol() will free subvol_name. */
1563                return mount_subvol(subvol_name, subvol_objectid, flags,
1564                                    device_name, data);
1565        }
1566
1567        security_init_mnt_opts(&new_sec_opts);
1568        if (data) {
1569                error = parse_security_options(data, &new_sec_opts);
1570                if (error)
1571                        return ERR_PTR(error);
1572        }
1573
1574        error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices);
1575        if (error)
1576                goto error_sec_opts;
1577
1578        /*
1579         * Setup a dummy root and fs_info for test/set super.  This is because
1580         * we don't actually fill this stuff out until open_ctree, but we need
1581         * it for searching for existing supers, so this lets us do that and
1582         * then open_ctree will properly initialize everything later.
1583         */
1584        fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
1585        if (!fs_info) {
1586                error = -ENOMEM;
1587                goto error_sec_opts;
1588        }
1589
1590        fs_info->fs_devices = fs_devices;
1591
1592        fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
1593        fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
1594        security_init_mnt_opts(&fs_info->security_opts);
1595        if (!fs_info->super_copy || !fs_info->super_for_commit) {
1596                error = -ENOMEM;
1597                goto error_fs_info;
1598        }
1599
1600        error = btrfs_open_devices(fs_devices, mode, fs_type);
1601        if (error)
1602                goto error_fs_info;
1603
1604        if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) {
1605                error = -EACCES;
1606                goto error_close_devices;
1607        }
1608
1609        bdev = fs_devices->latest_bdev;
1610        s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | MS_NOSEC,
1611                 fs_info);
1612        if (IS_ERR(s)) {
1613                error = PTR_ERR(s);
1614                goto error_close_devices;
1615        }
1616
1617        if (s->s_root) {
1618                btrfs_close_devices(fs_devices);
1619                free_fs_info(fs_info);
1620                if ((flags ^ s->s_flags) & MS_RDONLY)
1621                        error = -EBUSY;
1622        } else {
1623                snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
1624                btrfs_sb(s)->bdev_holder = fs_type;
1625                error = btrfs_fill_super(s, fs_devices, data);
1626        }
1627        if (error) {
1628                deactivate_locked_super(s);
1629                goto error_sec_opts;
1630        }
1631
1632        fs_info = btrfs_sb(s);
1633        error = setup_security_options(fs_info, s, &new_sec_opts);
1634        if (error) {
1635                deactivate_locked_super(s);
1636                goto error_sec_opts;
1637        }
1638
1639        return dget(s->s_root);
1640
1641error_close_devices:
1642        btrfs_close_devices(fs_devices);
1643error_fs_info:
1644        free_fs_info(fs_info);
1645error_sec_opts:
1646        security_free_mnt_opts(&new_sec_opts);
1647        return ERR_PTR(error);
1648}
1649
1650static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
1651                                     int new_pool_size, int old_pool_size)
1652{
1653        if (new_pool_size == old_pool_size)
1654                return;
1655
1656        fs_info->thread_pool_size = new_pool_size;
1657
1658        btrfs_info(fs_info, "resize thread pool %d -> %d",
1659               old_pool_size, new_pool_size);
1660
1661        btrfs_workqueue_set_max(fs_info->workers, new_pool_size);
1662        btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
1663        btrfs_workqueue_set_max(fs_info->submit_workers, new_pool_size);
1664        btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
1665        btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size);
1666        btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size);
1667        btrfs_workqueue_set_max(fs_info->endio_meta_write_workers,
1668                                new_pool_size);
1669        btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size);
1670        btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size);
1671        btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size);
1672        btrfs_workqueue_set_max(fs_info->readahead_workers, new_pool_size);
1673        btrfs_workqueue_set_max(fs_info->scrub_wr_completion_workers,
1674                                new_pool_size);
1675}
1676
1677static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info)
1678{
1679        set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
1680}
1681
1682static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
1683                                       unsigned long old_opts, int flags)
1684{
1685        if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
1686            (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) ||
1687             (flags & MS_RDONLY))) {
1688                /* wait for any defraggers to finish */
1689                wait_event(fs_info->transaction_wait,
1690                           (atomic_read(&fs_info->defrag_running) == 0));
1691                if (flags & MS_RDONLY)
1692                        sync_filesystem(fs_info->sb);
1693        }
1694}
1695
1696static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
1697                                         unsigned long old_opts)
1698{
1699        /*
1700         * We need to cleanup all defragable inodes if the autodefragment is
1701         * close or the filesystem is read only.
1702         */
1703        if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
1704            (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || sb_rdonly(fs_info->sb))) {
1705                btrfs_cleanup_defrag_inodes(fs_info);
1706        }
1707
1708        clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
1709}
1710
1711static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1712{
1713        struct btrfs_fs_info *fs_info = btrfs_sb(sb);
1714        struct btrfs_root *root = fs_info->tree_root;
1715        unsigned old_flags = sb->s_flags;
1716        unsigned long old_opts = fs_info->mount_opt;
1717        unsigned long old_compress_type = fs_info->compress_type;
1718        u64 old_max_inline = fs_info->max_inline;
1719        int old_thread_pool_size = fs_info->thread_pool_size;
1720        unsigned int old_metadata_ratio = fs_info->metadata_ratio;
1721        int ret;
1722
1723        sync_filesystem(sb);
1724        btrfs_remount_prepare(fs_info);
1725
1726        if (data) {
1727                struct security_mnt_opts new_sec_opts;
1728
1729                security_init_mnt_opts(&new_sec_opts);
1730                ret = parse_security_options(data, &new_sec_opts);
1731                if (ret)
1732                        goto restore;
1733                ret = setup_security_options(fs_info, sb,
1734                                             &new_sec_opts);
1735                if (ret) {
1736                        security_free_mnt_opts(&new_sec_opts);
1737                        goto restore;
1738                }
1739        }
1740
1741        ret = btrfs_parse_options(fs_info, data, *flags);
1742        if (ret) {
1743                ret = -EINVAL;
1744                goto restore;
1745        }
1746
1747        btrfs_remount_begin(fs_info, old_opts, *flags);
1748        btrfs_resize_thread_pool(fs_info,
1749                fs_info->thread_pool_size, old_thread_pool_size);
1750
1751        if ((bool)(*flags & MS_RDONLY) == sb_rdonly(sb))
1752                goto out;
1753
1754        if (*flags & MS_RDONLY) {
1755                /*
1756                 * this also happens on 'umount -rf' or on shutdown, when
1757                 * the filesystem is busy.
1758                 */
1759                cancel_work_sync(&fs_info->async_reclaim_work);
1760
1761                /* wait for the uuid_scan task to finish */
1762                down(&fs_info->uuid_tree_rescan_sem);
1763                /* avoid complains from lockdep et al. */
1764                up(&fs_info->uuid_tree_rescan_sem);
1765
1766                sb->s_flags |= MS_RDONLY;
1767
1768                /*
1769                 * Setting MS_RDONLY will put the cleaner thread to
1770                 * sleep at the next loop if it's already active.
1771                 * If it's already asleep, we'll leave unused block
1772                 * groups on disk until we're mounted read-write again
1773                 * unless we clean them up here.
1774                 */
1775                btrfs_delete_unused_bgs(fs_info);
1776
1777                btrfs_dev_replace_suspend_for_unmount(fs_info);
1778                btrfs_scrub_cancel(fs_info);
1779                btrfs_pause_balance(fs_info);
1780
1781                ret = btrfs_commit_super(fs_info);
1782                if (ret)
1783                        goto restore;
1784        } else {
1785                if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
1786                        btrfs_err(fs_info,
1787                                "Remounting read-write after error is not allowed");
1788                        ret = -EINVAL;
1789                        goto restore;
1790                }
1791                if (fs_info->fs_devices->rw_devices == 0) {
1792                        ret = -EACCES;
1793                        goto restore;
1794                }
1795
1796                if (!btrfs_check_rw_degradable(fs_info)) {
1797                        btrfs_warn(fs_info,
1798                                "too many missing devices, writeable remount is not allowed");
1799                        ret = -EACCES;
1800                        goto restore;
1801                }
1802
1803                if (btrfs_super_log_root(fs_info->super_copy) != 0) {
1804                        ret = -EINVAL;
1805                        goto restore;
1806                }
1807
1808                ret = btrfs_cleanup_fs_roots(fs_info);
1809                if (ret)
1810                        goto restore;
1811
1812                /* recover relocation */
1813                mutex_lock(&fs_info->cleaner_mutex);
1814                ret = btrfs_recover_relocation(root);
1815                mutex_unlock(&fs_info->cleaner_mutex);
1816                if (ret)
1817                        goto restore;
1818
1819                ret = btrfs_resume_balance_async(fs_info);
1820                if (ret)
1821                        goto restore;
1822
1823                ret = btrfs_resume_dev_replace_async(fs_info);
1824                if (ret) {
1825                        btrfs_warn(fs_info, "failed to resume dev_replace");
1826                        goto restore;
1827                }
1828
1829                btrfs_qgroup_rescan_resume(fs_info);
1830
1831                if (!fs_info->uuid_root) {
1832                        btrfs_info(fs_info, "creating UUID tree");
1833                        ret = btrfs_create_uuid_tree(fs_info);
1834                        if (ret) {
1835                                btrfs_warn(fs_info,
1836                                           "failed to create the UUID tree %d",
1837                                           ret);
1838                                goto restore;
1839                        }
1840                }
1841                sb->s_flags &= ~MS_RDONLY;
1842
1843                set_bit(BTRFS_FS_OPEN, &fs_info->flags);
1844        }
1845out:
1846        wake_up_process(fs_info->transaction_kthread);
1847        btrfs_remount_cleanup(fs_info, old_opts);
1848        return 0;
1849
1850restore:
1851        /* We've hit an error - don't reset MS_RDONLY */
1852        if (sb_rdonly(sb))
1853                old_flags |= MS_RDONLY;
1854        sb->s_flags = old_flags;
1855        fs_info->mount_opt = old_opts;
1856        fs_info->compress_type = old_compress_type;
1857        fs_info->max_inline = old_max_inline;
1858        btrfs_resize_thread_pool(fs_info,
1859                old_thread_pool_size, fs_info->thread_pool_size);
1860        fs_info->metadata_ratio = old_metadata_ratio;
1861        btrfs_remount_cleanup(fs_info, old_opts);
1862        return ret;
1863}
1864
1865/* Used to sort the devices by max_avail(descending sort) */
1866static int btrfs_cmp_device_free_bytes(const void *dev_info1,
1867                                       const void *dev_info2)
1868{
1869        if (((struct btrfs_device_info *)dev_info1)->max_avail >
1870            ((struct btrfs_device_info *)dev_info2)->max_avail)
1871                return -1;
1872        else if (((struct btrfs_device_info *)dev_info1)->max_avail <
1873                 ((struct btrfs_device_info *)dev_info2)->max_avail)
1874                return 1;
1875        else
1876        return 0;
1877}
1878
1879/*
1880 * sort the devices by max_avail, in which max free extent size of each device
1881 * is stored.(Descending Sort)
1882 */
1883static inline void btrfs_descending_sort_devices(
1884                                        struct btrfs_device_info *devices,
1885                                        size_t nr_devices)
1886{
1887        sort(devices, nr_devices, sizeof(struct btrfs_device_info),
1888             btrfs_cmp_device_free_bytes, NULL);
1889}
1890
1891/*
1892 * The helper to calc the free space on the devices that can be used to store
1893 * file data.
1894 */
1895static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
1896                                       u64 *free_bytes)
1897{
1898        struct btrfs_device_info *devices_info;
1899        struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
1900        struct btrfs_device *device;
1901        u64 skip_space;
1902        u64 type;
1903        u64 avail_space;
1904        u64 min_stripe_size;
1905        int min_stripes = 1, num_stripes = 1;
1906        int i = 0, nr_devices;
1907
1908        /*
1909         * We aren't under the device list lock, so this is racy-ish, but good
1910         * enough for our purposes.
1911         */
1912        nr_devices = fs_info->fs_devices->open_devices;
1913        if (!nr_devices) {
1914                smp_mb();
1915                nr_devices = fs_info->fs_devices->open_devices;
1916                ASSERT(nr_devices);
1917                if (!nr_devices) {
1918                        *free_bytes = 0;
1919                        return 0;
1920                }
1921        }
1922
1923        devices_info = kmalloc_array(nr_devices, sizeof(*devices_info),
1924                               GFP_KERNEL);
1925        if (!devices_info)
1926                return -ENOMEM;
1927
1928        /* calc min stripe number for data space allocation */
1929        type = btrfs_data_alloc_profile(fs_info);
1930        if (type & BTRFS_BLOCK_GROUP_RAID0) {
1931                min_stripes = 2;
1932                num_stripes = nr_devices;
1933        } else if (type & BTRFS_BLOCK_GROUP_RAID1) {
1934                min_stripes = 2;
1935                num_stripes = 2;
1936        } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
1937                min_stripes = 4;
1938                num_stripes = 4;
1939        }
1940
1941        if (type & BTRFS_BLOCK_GROUP_DUP)
1942                min_stripe_size = 2 * BTRFS_STRIPE_LEN;
1943        else
1944                min_stripe_size = BTRFS_STRIPE_LEN;
1945
1946        rcu_read_lock();
1947        list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
1948                if (!device->in_fs_metadata || !device->bdev ||
1949                    device->is_tgtdev_for_dev_replace)
1950                        continue;
1951
1952                if (i >= nr_devices)
1953                        break;
1954
1955                avail_space = device->total_bytes - device->bytes_used;
1956
1957                /* align with stripe_len */
1958                avail_space = div_u64(avail_space, BTRFS_STRIPE_LEN);
1959                avail_space *= BTRFS_STRIPE_LEN;
1960
1961                /*
1962                 * In order to avoid overwriting the superblock on the drive,
1963                 * btrfs starts at an offset of at least 1MB when doing chunk
1964                 * allocation.
1965                 */
1966                skip_space = SZ_1M;
1967
1968                /*
1969                 * we can use the free space in [0, skip_space - 1], subtract
1970                 * it from the total.
1971                 */
1972                if (avail_space && avail_space >= skip_space)
1973                        avail_space -= skip_space;
1974                else
1975                        avail_space = 0;
1976
1977                if (avail_space < min_stripe_size)
1978                        continue;
1979
1980                devices_info[i].dev = device;
1981                devices_info[i].max_avail = avail_space;
1982
1983                i++;
1984        }
1985        rcu_read_unlock();
1986
1987        nr_devices = i;
1988
1989        btrfs_descending_sort_devices(devices_info, nr_devices);
1990
1991        i = nr_devices - 1;
1992        avail_space = 0;
1993        while (nr_devices >= min_stripes) {
1994                if (num_stripes > nr_devices)
1995                        num_stripes = nr_devices;
1996
1997                if (devices_info[i].max_avail >= min_stripe_size) {
1998                        int j;
1999                        u64 alloc_size;
2000
2001                        avail_space += devices_info[i].max_avail * num_stripes;
2002                        alloc_size = devices_info[i].max_avail;
2003                        for (j = i + 1 - num_stripes; j <= i; j++)
2004                                devices_info[j].max_avail -= alloc_size;
2005                }
2006                i--;
2007                nr_devices--;
2008        }
2009
2010        kfree(devices_info);
2011        *free_bytes = avail_space;
2012        return 0;
2013}
2014
2015/*
2016 * Calculate numbers for 'df', pessimistic in case of mixed raid profiles.
2017 *
2018 * If there's a redundant raid level at DATA block groups, use the respective
2019 * multiplier to scale the sizes.
2020 *
2021 * Unused device space usage is based on simulating the chunk allocator
2022 * algorithm that respects the device sizes and order of allocations.  This is
2023 * a close approximation of the actual use but there are other factors that may
2024 * change the result (like a new metadata chunk).
2025 *
2026 * If metadata is exhausted, f_bavail will be 0.
2027 */
2028static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
2029{
2030        struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
2031        struct btrfs_super_block *disk_super = fs_info->super_copy;
2032        struct list_head *head = &fs_info->space_info;
2033        struct btrfs_space_info *found;
2034        u64 total_used = 0;
2035        u64 total_free_data = 0;
2036        u64 total_free_meta = 0;
2037        int bits = dentry->d_sb->s_blocksize_bits;
2038        __be32 *fsid = (__be32 *)fs_info->fsid;
2039        unsigned factor = 1;
2040        struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
2041        int ret;
2042        u64 thresh = 0;
2043        int mixed = 0;
2044
2045        rcu_read_lock();
2046        list_for_each_entry_rcu(found, head, list) {
2047                if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
2048                        int i;
2049
2050                        total_free_data += found->disk_total - found->disk_used;
2051                        total_free_data -=
2052                                btrfs_account_ro_block_groups_free_space(found);
2053
2054                        for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
2055                                if (!list_empty(&found->block_groups[i])) {
2056                                        switch (i) {
2057                                        case BTRFS_RAID_DUP:
2058                                        case BTRFS_RAID_RAID1:
2059                                        case BTRFS_RAID_RAID10:
2060                                                factor = 2;
2061                                        }
2062                                }
2063                        }
2064                }
2065
2066                /*
2067                 * Metadata in mixed block goup profiles are accounted in data
2068                 */
2069                if (!mixed && found->flags & BTRFS_BLOCK_GROUP_METADATA) {
2070                        if (found->flags & BTRFS_BLOCK_GROUP_DATA)
2071                                mixed = 1;
2072                        else
2073                                total_free_meta += found->disk_total -
2074                                        found->disk_used;
2075                }
2076
2077                total_used += found->disk_used;
2078        }
2079
2080        rcu_read_unlock();
2081
2082        buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor);
2083        buf->f_blocks >>= bits;
2084        buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits);
2085
2086        /* Account global block reserve as used, it's in logical size already */
2087        spin_lock(&block_rsv->lock);
2088        /* Mixed block groups accounting is not byte-accurate, avoid overflow */
2089        if (buf->f_bfree >= block_rsv->size >> bits)
2090                buf->f_bfree -= block_rsv->size >> bits;
2091        else
2092                buf->f_bfree = 0;
2093        spin_unlock(&block_rsv->lock);
2094
2095        buf->f_bavail = div_u64(total_free_data, factor);
2096        ret = btrfs_calc_avail_data_space(fs_info, &total_free_data);
2097        if (ret)
2098                return ret;
2099        buf->f_bavail += div_u64(total_free_data, factor);
2100        buf->f_bavail = buf->f_bavail >> bits;
2101
2102        /*
2103         * We calculate the remaining metadata space minus global reserve. If
2104         * this is (supposedly) smaller than zero, there's no space. But this
2105         * does not hold in practice, the exhausted state happens where's still
2106         * some positive delta. So we apply some guesswork and compare the
2107         * delta to a 4M threshold.  (Practically observed delta was ~2M.)
2108         *
2109         * We probably cannot calculate the exact threshold value because this
2110         * depends on the internal reservations requested by various
2111         * operations, so some operations that consume a few metadata will
2112         * succeed even if the Avail is zero. But this is better than the other
2113         * way around.
2114         */
2115        thresh = 4 * 1024 * 1024;
2116
2117        if (!mixed && total_free_meta - thresh < block_rsv->size)
2118                buf->f_bavail = 0;
2119
2120        buf->f_type = BTRFS_SUPER_MAGIC;
2121        buf->f_bsize = dentry->d_sb->s_blocksize;
2122        buf->f_namelen = BTRFS_NAME_LEN;
2123
2124        /* We treat it as constant endianness (it doesn't matter _which_)
2125           because we want the fsid to come out the same whether mounted
2126           on a big-endian or little-endian host */
2127        buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
2128        buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
2129        /* Mask in the root object ID too, to disambiguate subvols */
2130        buf->f_fsid.val[0] ^= BTRFS_I(d_inode(dentry))->root->objectid >> 32;
2131        buf->f_fsid.val[1] ^= BTRFS_I(d_inode(dentry))->root->objectid;
2132
2133        return 0;
2134}
2135
2136static void btrfs_kill_super(struct super_block *sb)
2137{
2138        struct btrfs_fs_info *fs_info = btrfs_sb(sb);
2139        kill_anon_super(sb);
2140        free_fs_info(fs_info);
2141}
2142
2143static struct file_system_type btrfs_fs_type = {
2144        .owner          = THIS_MODULE,
2145        .name           = "btrfs",
2146        .mount          = btrfs_mount,
2147        .kill_sb        = btrfs_kill_super,
2148        .fs_flags       = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
2149};
2150MODULE_ALIAS_FS("btrfs");
2151
2152static int btrfs_control_open(struct inode *inode, struct file *file)
2153{
2154        /*
2155         * The control file's private_data is used to hold the
2156         * transaction when it is started and is used to keep
2157         * track of whether a transaction is already in progress.
2158         */
2159        file->private_data = NULL;
2160        return 0;
2161}
2162
2163/*
2164 * used by btrfsctl to scan devices when no FS is mounted
2165 */
2166static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
2167                                unsigned long arg)
2168{
2169        struct btrfs_ioctl_vol_args *vol;
2170        struct btrfs_fs_devices *fs_devices;
2171        int ret = -ENOTTY;
2172
2173        if (!capable(CAP_SYS_ADMIN))
2174                return -EPERM;
2175
2176        vol = memdup_user((void __user *)arg, sizeof(*vol));
2177        if (IS_ERR(vol))
2178                return PTR_ERR(vol);
2179
2180        switch (cmd) {
2181        case BTRFS_IOC_SCAN_DEV:
2182                ret = btrfs_scan_one_device(vol->name, FMODE_READ,
2183                                            &btrfs_fs_type, &fs_devices);
2184                break;
2185        case BTRFS_IOC_DEVICES_READY:
2186                ret = btrfs_scan_one_device(vol->name, FMODE_READ,
2187                                            &btrfs_fs_type, &fs_devices);
2188                if (ret)
2189                        break;
2190                ret = !(fs_devices->num_devices == fs_devices->total_devices);
2191                break;
2192        case BTRFS_IOC_GET_SUPPORTED_FEATURES:
2193                ret = btrfs_ioctl_get_supported_features((void __user*)arg);
2194                break;
2195        }
2196
2197        kfree(vol);
2198        return ret;
2199}
2200
2201static int btrfs_freeze(struct super_block *sb)
2202{
2203        struct btrfs_trans_handle *trans;
2204        struct btrfs_fs_info *fs_info = btrfs_sb(sb);
2205        struct btrfs_root *root = fs_info->tree_root;
2206
2207        set_bit(BTRFS_FS_FROZEN, &fs_info->flags);
2208        /*
2209         * We don't need a barrier here, we'll wait for any transaction that
2210         * could be in progress on other threads (and do delayed iputs that
2211         * we want to avoid on a frozen filesystem), or do the commit
2212         * ourselves.
2213         */
2214        trans = btrfs_attach_transaction_barrier(root);
2215        if (IS_ERR(trans)) {
2216                /* no transaction, don't bother */
2217                if (PTR_ERR(trans) == -ENOENT)
2218                        return 0;
2219                return PTR_ERR(trans);
2220        }
2221        return btrfs_commit_transaction(trans);
2222}
2223
2224static int btrfs_unfreeze(struct super_block *sb)
2225{
2226        struct btrfs_fs_info *fs_info = btrfs_sb(sb);
2227
2228        clear_bit(BTRFS_FS_FROZEN, &fs_info->flags);
2229        return 0;
2230}
2231
2232static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
2233{
2234        struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
2235        struct btrfs_fs_devices *cur_devices;
2236        struct btrfs_device *dev, *first_dev = NULL;
2237        struct list_head *head;
2238        struct rcu_string *name;
2239
2240        mutex_lock(&fs_info->fs_devices->device_list_mutex);
2241        cur_devices = fs_info->fs_devices;
2242        while (cur_devices) {
2243                head = &cur_devices->devices;
2244                list_for_each_entry(dev, head, dev_list) {
2245                        if (dev->missing)
2246                                continue;
2247                        if (!dev->name)
2248                                continue;
2249                        if (!first_dev || dev->devid < first_dev->devid)
2250                                first_dev = dev;
2251                }
2252                cur_devices = cur_devices->seed;
2253        }
2254
2255        if (first_dev) {
2256                rcu_read_lock();
2257                name = rcu_dereference(first_dev->name);
2258                seq_escape(m, name->str, " \t\n\\");
2259                rcu_read_unlock();
2260        } else {
2261                WARN_ON(1);
2262        }
2263        mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2264        return 0;
2265}
2266
2267static const struct super_operations btrfs_super_ops = {
2268        .drop_inode     = btrfs_drop_inode,
2269        .evict_inode    = btrfs_evict_inode,
2270        .put_super      = btrfs_put_super,
2271        .sync_fs        = btrfs_sync_fs,
2272        .show_options   = btrfs_show_options,
2273        .show_devname   = btrfs_show_devname,
2274        .write_inode    = btrfs_write_inode,
2275        .alloc_inode    = btrfs_alloc_inode,
2276        .destroy_inode  = btrfs_destroy_inode,
2277        .statfs         = btrfs_statfs,
2278        .remount_fs     = btrfs_remount,
2279        .freeze_fs      = btrfs_freeze,
2280        .unfreeze_fs    = btrfs_unfreeze,
2281};
2282
2283static const struct file_operations btrfs_ctl_fops = {
2284        .open = btrfs_control_open,
2285        .unlocked_ioctl  = btrfs_control_ioctl,
2286        .compat_ioctl = btrfs_control_ioctl,
2287        .owner   = THIS_MODULE,
2288        .llseek = noop_llseek,
2289};
2290
2291static struct miscdevice btrfs_misc = {
2292        .minor          = BTRFS_MINOR,
2293        .name           = "btrfs-control",
2294        .fops           = &btrfs_ctl_fops
2295};
2296
2297MODULE_ALIAS_MISCDEV(BTRFS_MINOR);
2298MODULE_ALIAS("devname:btrfs-control");
2299
2300static int btrfs_interface_init(void)
2301{
2302        return misc_register(&btrfs_misc);
2303}
2304
2305static void btrfs_interface_exit(void)
2306{
2307        misc_deregister(&btrfs_misc);
2308}
2309
2310static void btrfs_print_mod_info(void)
2311{
2312        pr_info("Btrfs loaded, crc32c=%s"
2313#ifdef CONFIG_BTRFS_DEBUG
2314                        ", debug=on"
2315#endif
2316#ifdef CONFIG_BTRFS_ASSERT
2317                        ", assert=on"
2318#endif
2319#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
2320                        ", integrity-checker=on"
2321#endif
2322                        "\n",
2323                        btrfs_crc32c_impl());
2324}
2325
2326static int __init init_btrfs_fs(void)
2327{
2328        int err;
2329
2330        err = btrfs_hash_init();
2331        if (err)
2332                return err;
2333
2334        btrfs_props_init();
2335
2336        err = btrfs_init_sysfs();
2337        if (err)
2338                goto free_hash;
2339
2340        btrfs_init_compress();
2341
2342        err = btrfs_init_cachep();
2343        if (err)
2344                goto free_compress;
2345
2346        err = extent_io_init();
2347        if (err)
2348                goto free_cachep;
2349
2350        err = extent_map_init();
2351        if (err)
2352                goto free_extent_io;
2353
2354        err = ordered_data_init();
2355        if (err)
2356                goto free_extent_map;
2357
2358        err = btrfs_delayed_inode_init();
2359        if (err)
2360                goto free_ordered_data;
2361
2362        err = btrfs_auto_defrag_init();
2363        if (err)
2364                goto free_delayed_inode;
2365
2366        err = btrfs_delayed_ref_init();
2367        if (err)
2368                goto free_auto_defrag;
2369
2370        err = btrfs_prelim_ref_init();
2371        if (err)
2372                goto free_delayed_ref;
2373
2374        err = btrfs_end_io_wq_init();
2375        if (err)
2376                goto free_prelim_ref;
2377
2378        err = btrfs_interface_init();
2379        if (err)
2380                goto free_end_io_wq;
2381
2382        btrfs_init_lockdep();
2383
2384        btrfs_print_mod_info();
2385
2386        err = btrfs_run_sanity_tests();
2387        if (err)
2388                goto unregister_ioctl;
2389
2390        err = register_filesystem(&btrfs_fs_type);
2391        if (err)
2392                goto unregister_ioctl;
2393
2394        return 0;
2395
2396unregister_ioctl:
2397        btrfs_interface_exit();
2398free_end_io_wq:
2399        btrfs_end_io_wq_exit();
2400free_prelim_ref:
2401        btrfs_prelim_ref_exit();
2402free_delayed_ref:
2403        btrfs_delayed_ref_exit();
2404free_auto_defrag:
2405        btrfs_auto_defrag_exit();
2406free_delayed_inode:
2407        btrfs_delayed_inode_exit();
2408free_ordered_data:
2409        ordered_data_exit();
2410free_extent_map:
2411        extent_map_exit();
2412free_extent_io:
2413        extent_io_exit();
2414free_cachep:
2415        btrfs_destroy_cachep();
2416free_compress:
2417        btrfs_exit_compress();
2418        btrfs_exit_sysfs();
2419free_hash:
2420        btrfs_hash_exit();
2421        return err;
2422}
2423
2424static void __exit exit_btrfs_fs(void)
2425{
2426        btrfs_destroy_cachep();
2427        btrfs_delayed_ref_exit();
2428        btrfs_auto_defrag_exit();
2429        btrfs_delayed_inode_exit();
2430        btrfs_prelim_ref_exit();
2431        ordered_data_exit();
2432        extent_map_exit();
2433        extent_io_exit();
2434        btrfs_interface_exit();
2435        btrfs_end_io_wq_exit();
2436        unregister_filesystem(&btrfs_fs_type);
2437        btrfs_exit_sysfs();
2438        btrfs_cleanup_fs_uuids();
2439        btrfs_exit_compress();
2440        btrfs_hash_exit();
2441}
2442
2443late_initcall(init_btrfs_fs);
2444module_exit(exit_btrfs_fs)
2445
2446MODULE_LICENSE("GPL");
2447