LXR linux/fs/xfs/xfs

   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   4 * All Rights Reserved.
   5 */
   6
   7#include "xfs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_log_format.h"
  11#include "xfs_trans_resv.h"
  12#include "xfs_sb.h"
  13#include "xfs_mount.h"
  14#include "xfs_inode.h"
  15#include "xfs_btree.h"
  16#include "xfs_bmap.h"
  17#include "xfs_alloc.h"
  18#include "xfs_fsops.h"
  19#include "xfs_trans.h"
  20#include "xfs_buf_item.h"
  21#include "xfs_log.h"
  22#include "xfs_log_priv.h"
  23#include "xfs_dir2.h"
  24#include "xfs_extfree_item.h"
  25#include "xfs_mru_cache.h"
  26#include "xfs_inode_item.h"
  27#include "xfs_icache.h"
  28#include "xfs_trace.h"
  29#include "xfs_icreate_item.h"
  30#include "xfs_filestream.h"
  31#include "xfs_quota.h"
  32#include "xfs_sysfs.h"
  33#include "xfs_ondisk.h"
  34#include "xfs_rmap_item.h"
  35#include "xfs_refcount_item.h"
  36#include "xfs_bmap_item.h"
  37#include "xfs_reflink.h"
  38
  39#include <linux/magic.h>
  40#include <linux/parser.h>
  41
  42static const struct super_operations xfs_super_operations;
  43struct bio_set xfs_ioend_bioset;
  44
  45static struct kset *xfs_kset;           /* top-level xfs sysfs dir */
  46#ifdef DEBUG
  47static struct xfs_kobj xfs_dbg_kobj;    /* global debug sysfs attrs */
  48#endif
  49
  50/*
  51 * Table driven mount option parser.
  52 */
  53enum {
  54        Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, Opt_biosize,
  55        Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
  56        Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
  57        Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep,
  58        Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2,
  59        Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota,
  60        Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
  61        Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
  62        Opt_discard, Opt_nodiscard, Opt_dax, Opt_err,
  63};
  64
  65static const match_table_t tokens = {
  66        {Opt_logbufs,   "logbufs=%u"},  /* number of XFS log buffers */
  67        {Opt_logbsize,  "logbsize=%s"}, /* size of XFS log buffers */
  68        {Opt_logdev,    "logdev=%s"},   /* log device */
  69        {Opt_rtdev,     "rtdev=%s"},    /* realtime I/O device */
  70        {Opt_biosize,   "biosize=%u"},  /* log2 of preferred buffered io size */
  71        {Opt_wsync,     "wsync"},       /* safe-mode nfs compatible mount */
  72        {Opt_noalign,   "noalign"},     /* turn off stripe alignment */
  73        {Opt_swalloc,   "swalloc"},     /* turn on stripe width allocation */
  74        {Opt_sunit,     "sunit=%u"},    /* data volume stripe unit */
  75        {Opt_swidth,    "swidth=%u"},   /* data volume stripe width */
  76        {Opt_nouuid,    "nouuid"},      /* ignore filesystem UUID */
  77        {Opt_grpid,     "grpid"},       /* group-ID from parent directory */
  78        {Opt_nogrpid,   "nogrpid"},     /* group-ID from current process */
  79        {Opt_bsdgroups, "bsdgroups"},   /* group-ID from parent directory */
  80        {Opt_sysvgroups,"sysvgroups"},  /* group-ID from current process */
  81        {Opt_allocsize, "allocsize=%s"},/* preferred allocation size */
  82        {Opt_norecovery,"norecovery"},  /* don't run XFS recovery */
  83        {Opt_inode64,   "inode64"},     /* inodes can be allocated anywhere */
  84        {Opt_inode32,   "inode32"},     /* inode allocation limited to
  85                                         * XFS_MAXINUMBER_32 */
  86        {Opt_ikeep,     "ikeep"},       /* do not free empty inode clusters */
  87        {Opt_noikeep,   "noikeep"},     /* free empty inode clusters */
  88        {Opt_largeio,   "largeio"},     /* report large I/O sizes in stat() */
  89        {Opt_nolargeio, "nolargeio"},   /* do not report large I/O sizes
  90                                         * in stat(). */
  91        {Opt_attr2,     "attr2"},       /* do use attr2 attribute format */
  92        {Opt_noattr2,   "noattr2"},     /* do not use attr2 attribute format */
  93        {Opt_filestreams,"filestreams"},/* use filestreams allocator */
  94        {Opt_quota,     "quota"},       /* disk quotas (user) */
  95        {Opt_noquota,   "noquota"},     /* no quotas */
  96        {Opt_usrquota,  "usrquota"},    /* user quota enabled */
  97        {Opt_grpquota,  "grpquota"},    /* group quota enabled */
  98        {Opt_prjquota,  "prjquota"},    /* project quota enabled */
  99        {Opt_uquota,    "uquota"},      /* user quota (IRIX variant) */
 100        {Opt_gquota,    "gquota"},      /* group quota (IRIX variant) */
 101        {Opt_pquota,    "pquota"},      /* project quota (IRIX variant) */
 102        {Opt_uqnoenforce,"uqnoenforce"},/* user quota limit enforcement */
 103        {Opt_gqnoenforce,"gqnoenforce"},/* group quota limit enforcement */
 104        {Opt_pqnoenforce,"pqnoenforce"},/* project quota limit enforcement */
 105        {Opt_qnoenforce, "qnoenforce"}, /* same as uqnoenforce */
 106        {Opt_discard,   "discard"},     /* Discard unused blocks */
 107        {Opt_nodiscard, "nodiscard"},   /* Do not discard unused blocks */
 108        {Opt_dax,       "dax"},         /* Enable direct access to bdev pages */
 109        {Opt_err,       NULL},
 110};
 111
 112
 113STATIC int
 114suffix_kstrtoint(const substring_t *s, unsigned int base, int *res)
 115{
 116        int     last, shift_left_factor = 0, _res;
 117        char    *value;
 118        int     ret = 0;
 119
 120        value = match_strdup(s);
 121        if (!value)
 122                return -ENOMEM;
 123
 124        last = strlen(value) - 1;
 125        if (value[last] == 'K' || value[last] == 'k') {
 126                shift_left_factor = 10;
 127                value[last] = '\0';
 128        }
 129        if (value[last] == 'M' || value[last] == 'm') {
 130                shift_left_factor = 20;
 131                value[last] = '\0';
 132        }
 133        if (value[last] == 'G' || value[last] == 'g') {
 134                shift_left_factor = 30;
 135                value[last] = '\0';
 136        }
 137
 138        if (kstrtoint(value, base, &_res))
 139                ret = -EINVAL;
 140        kfree(value);
 141        *res = _res << shift_left_factor;
 142        return ret;
 143}
 144
 145/*
 146 * This function fills in xfs_mount_t fields based on mount args.
 147 * Note: the superblock has _not_ yet been read in.
 148 *
 149 * Note that this function leaks the various device name allocations on
 150 * failure.  The caller takes care of them.
 151 *
 152 * *sb is const because this is also used to test options on the remount
 153 * path, and we don't want this to have any side effects at remount time.
 154 * Today this function does not change *sb, but just to future-proof...
 155 */
 156STATIC int
 157xfs_parseargs(
 158        struct xfs_mount        *mp,
 159        char                    *options)
 160{
 161        const struct super_block *sb = mp->m_super;
 162        char                    *p;
 163        substring_t             args[MAX_OPT_ARGS];
 164        int                     dsunit = 0;
 165        int                     dswidth = 0;
 166        int                     iosize = 0;
 167        uint8_t                 iosizelog = 0;
 168
 169        /*
 170         * set up the mount name first so all the errors will refer to the
 171         * correct device.
 172         */
 173        mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
 174        if (!mp->m_fsname)
 175                return -ENOMEM;
 176        mp->m_fsname_len = strlen(mp->m_fsname) + 1;
 177
 178        /*
 179         * Copy binary VFS mount flags we are interested in.
 180         */
 181        if (sb_rdonly(sb))
 182                mp->m_flags |= XFS_MOUNT_RDONLY;
 183        if (sb->s_flags & SB_DIRSYNC)
 184                mp->m_flags |= XFS_MOUNT_DIRSYNC;
 185        if (sb->s_flags & SB_SYNCHRONOUS)
 186                mp->m_flags |= XFS_MOUNT_WSYNC;
 187
 188        /*
 189         * Set some default flags that could be cleared by the mount option
 190         * parsing.
 191         */
 192        mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
 193
 194        /*
 195         * These can be overridden by the mount option parsing.
 196         */
 197        mp->m_logbufs = -1;
 198        mp->m_logbsize = -1;
 199
 200        if (!options)
 201                goto done;
 202
 203        while ((p = strsep(&options, ",")) != NULL) {
 204                int             token;
 205
 206                if (!*p)
 207                        continue;
 208
 209                token = match_token(p, tokens, args);
 210                switch (token) {
 211                case Opt_logbufs:
 212                        if (match_int(args, &mp->m_logbufs))
 213                                return -EINVAL;
 214                        break;
 215                case Opt_logbsize:
 216                        if (suffix_kstrtoint(args, 10, &mp->m_logbsize))
 217                                return -EINVAL;
 218                        break;
 219                case Opt_logdev:
 220                        kfree(mp->m_logname);
 221                        mp->m_logname = match_strdup(args);
 222                        if (!mp->m_logname)
 223                                return -ENOMEM;
 224                        break;
 225                case Opt_rtdev:
 226                        kfree(mp->m_rtname);
 227                        mp->m_rtname = match_strdup(args);
 228                        if (!mp->m_rtname)
 229                                return -ENOMEM;
 230                        break;
 231                case Opt_allocsize:
 232                case Opt_biosize:
 233                        if (suffix_kstrtoint(args, 10, &iosize))
 234                                return -EINVAL;
 235                        iosizelog = ffs(iosize) - 1;
 236                        break;
 237                case Opt_grpid:
 238                case Opt_bsdgroups:
 239                        mp->m_flags |= XFS_MOUNT_GRPID;
 240                        break;
 241                case Opt_nogrpid:
 242                case Opt_sysvgroups:
 243                        mp->m_flags &= ~XFS_MOUNT_GRPID;
 244                        break;
 245                case Opt_wsync:
 246                        mp->m_flags |= XFS_MOUNT_WSYNC;
 247                        break;
 248                case Opt_norecovery:
 249                        mp->m_flags |= XFS_MOUNT_NORECOVERY;
 250                        break;
 251                case Opt_noalign:
 252                        mp->m_flags |= XFS_MOUNT_NOALIGN;
 253                        break;
 254                case Opt_swalloc:
 255                        mp->m_flags |= XFS_MOUNT_SWALLOC;
 256                        break;
 257                case Opt_sunit:
 258                        if (match_int(args, &dsunit))
 259                                return -EINVAL;
 260                        break;
 261                case Opt_swidth:
 262                        if (match_int(args, &dswidth))
 263                                return -EINVAL;
 264                        break;
 265                case Opt_inode32:
 266                        mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
 267                        break;
 268                case Opt_inode64:
 269                        mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
 270                        break;
 271                case Opt_nouuid:
 272                        mp->m_flags |= XFS_MOUNT_NOUUID;
 273                        break;
 274                case Opt_ikeep:
 275                        mp->m_flags |= XFS_MOUNT_IKEEP;
 276                        break;
 277                case Opt_noikeep:
 278                        mp->m_flags &= ~XFS_MOUNT_IKEEP;
 279                        break;
 280                case Opt_largeio:
 281                        mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
 282                        break;
 283                case Opt_nolargeio:
 284                        mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
 285                        break;
 286                case Opt_attr2:
 287                        mp->m_flags |= XFS_MOUNT_ATTR2;
 288                        break;
 289                case Opt_noattr2:
 290                        mp->m_flags &= ~XFS_MOUNT_ATTR2;
 291                        mp->m_flags |= XFS_MOUNT_NOATTR2;
 292                        break;
 293                case Opt_filestreams:
 294                        mp->m_flags |= XFS_MOUNT_FILESTREAMS;
 295                        break;
 296                case Opt_noquota:
 297                        mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
 298                        mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
 299                        mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
 300                        break;
 301                case Opt_quota:
 302                case Opt_uquota:
 303                case Opt_usrquota:
 304                        mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
 305                                         XFS_UQUOTA_ENFD);
 306                        break;
 307                case Opt_qnoenforce:
 308                case Opt_uqnoenforce:
 309                        mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
 310                        mp->m_qflags &= ~XFS_UQUOTA_ENFD;
 311                        break;
 312                case Opt_pquota:
 313                case Opt_prjquota:
 314                        mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
 315                                         XFS_PQUOTA_ENFD);
 316                        break;
 317                case Opt_pqnoenforce:
 318                        mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
 319                        mp->m_qflags &= ~XFS_PQUOTA_ENFD;
 320                        break;
 321                case Opt_gquota:
 322                case Opt_grpquota:
 323                        mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
 324                                         XFS_GQUOTA_ENFD);
 325                        break;
 326                case Opt_gqnoenforce:
 327                        mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
 328                        mp->m_qflags &= ~XFS_GQUOTA_ENFD;
 329                        break;
 330                case Opt_discard:
 331                        mp->m_flags |= XFS_MOUNT_DISCARD;
 332                        break;
 333                case Opt_nodiscard:
 334                        mp->m_flags &= ~XFS_MOUNT_DISCARD;
 335                        break;
 336#ifdef CONFIG_FS_DAX
 337                case Opt_dax:
 338                        mp->m_flags |= XFS_MOUNT_DAX;
 339                        break;
 340#endif
 341                default:
 342                        xfs_warn(mp, "unknown mount option [%s].", p);
 343                        return -EINVAL;
 344                }
 345        }
 346
 347        /*
 348         * no recovery flag requires a read-only mount
 349         */
 350        if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
 351            !(mp->m_flags & XFS_MOUNT_RDONLY)) {
 352                xfs_warn(mp, "no-recovery mounts must be read-only.");
 353                return -EINVAL;
 354        }
 355
 356        if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
 357                xfs_warn(mp,
 358        "sunit and swidth options incompatible with the noalign option");
 359                return -EINVAL;
 360        }
 361
 362#ifndef CONFIG_XFS_QUOTA
 363        if (XFS_IS_QUOTA_RUNNING(mp)) {
 364                xfs_warn(mp, "quota support not available in this kernel.");
 365                return -EINVAL;
 366        }
 367#endif
 368
 369        if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
 370                xfs_warn(mp, "sunit and swidth must be specified together");
 371                return -EINVAL;
 372        }
 373
 374        if (dsunit && (dswidth % dsunit != 0)) {
 375                xfs_warn(mp,
 376        "stripe width (%d) must be a multiple of the stripe unit (%d)",
 377                        dswidth, dsunit);
 378                return -EINVAL;
 379        }
 380
 381done:
 382        if (dsunit && !(mp->m_flags & XFS_MOUNT_NOALIGN)) {
 383                /*
 384                 * At this point the superblock has not been read
 385                 * in, therefore we do not know the block size.
 386                 * Before the mount call ends we will convert
 387                 * these to FSBs.
 388                 */
 389                mp->m_dalign = dsunit;
 390                mp->m_swidth = dswidth;
 391        }
 392
 393        if (mp->m_logbufs != -1 &&
 394            mp->m_logbufs != 0 &&
 395            (mp->m_logbufs < XLOG_MIN_ICLOGS ||
 396             mp->m_logbufs > XLOG_MAX_ICLOGS)) {
 397                xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
 398                        mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
 399                return -EINVAL;
 400        }
 401        if (mp->m_logbsize != -1 &&
 402            mp->m_logbsize !=  0 &&
 403            (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
 404             mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
 405             !is_power_of_2(mp->m_logbsize))) {
 406                xfs_warn(mp,
 407                        "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
 408                        mp->m_logbsize);
 409                return -EINVAL;
 410        }
 411
 412        if (iosizelog) {
 413                if (iosizelog > XFS_MAX_IO_LOG ||
 414                    iosizelog < XFS_MIN_IO_LOG) {
 415                        xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
 416                                iosizelog, XFS_MIN_IO_LOG,
 417                                XFS_MAX_IO_LOG);
 418                        return -EINVAL;
 419                }
 420
 421                mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
 422                mp->m_readio_log = iosizelog;
 423                mp->m_writeio_log = iosizelog;
 424        }
 425
 426        return 0;
 427}
 428
 429struct proc_xfs_info {
 430        uint64_t        flag;
 431        char            *str;
 432};
 433
 434STATIC void
 435xfs_showargs(
 436        struct xfs_mount        *mp,
 437        struct seq_file         *m)
 438{
 439        static struct proc_xfs_info xfs_info_set[] = {
 440                /* the few simple ones we can get from the mount struct */
 441                { XFS_MOUNT_IKEEP,              ",ikeep" },
 442                { XFS_MOUNT_WSYNC,              ",wsync" },
 443                { XFS_MOUNT_NOALIGN,            ",noalign" },
 444                { XFS_MOUNT_SWALLOC,            ",swalloc" },
 445                { XFS_MOUNT_NOUUID,             ",nouuid" },
 446                { XFS_MOUNT_NORECOVERY,         ",norecovery" },
 447                { XFS_MOUNT_ATTR2,              ",attr2" },
 448                { XFS_MOUNT_FILESTREAMS,        ",filestreams" },
 449                { XFS_MOUNT_GRPID,              ",grpid" },
 450                { XFS_MOUNT_DISCARD,            ",discard" },
 451                { XFS_MOUNT_SMALL_INUMS,        ",inode32" },
 452                { XFS_MOUNT_DAX,                ",dax" },
 453                { 0, NULL }
 454        };
 455        static struct proc_xfs_info xfs_info_unset[] = {
 456                /* the few simple ones we can get from the mount struct */
 457                { XFS_MOUNT_COMPAT_IOSIZE,      ",largeio" },
 458                { XFS_MOUNT_SMALL_INUMS,        ",inode64" },
 459                { 0, NULL }
 460        };
 461        struct proc_xfs_info    *xfs_infop;
 462
 463        for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
 464                if (mp->m_flags & xfs_infop->flag)
 465                        seq_puts(m, xfs_infop->str);
 466        }
 467        for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) {
 468                if (!(mp->m_flags & xfs_infop->flag))
 469                        seq_puts(m, xfs_infop->str);
 470        }
 471
 472        if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
 473                seq_printf(m, ",allocsize=%dk",
 474                                (int)(1 << mp->m_writeio_log) >> 10);
 475
 476        if (mp->m_logbufs > 0)
 477                seq_printf(m, ",logbufs=%d", mp->m_logbufs);
 478        if (mp->m_logbsize > 0)
 479                seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10);
 480
 481        if (mp->m_logname)
 482                seq_show_option(m, "logdev", mp->m_logname);
 483        if (mp->m_rtname)
 484                seq_show_option(m, "rtdev", mp->m_rtname);
 485
 486        if (mp->m_dalign > 0)
 487                seq_printf(m, ",sunit=%d",
 488                                (int)XFS_FSB_TO_BB(mp, mp->m_dalign));
 489        if (mp->m_swidth > 0)
 490                seq_printf(m, ",swidth=%d",
 491                                (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
 492
 493        if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
 494                seq_puts(m, ",usrquota");
 495        else if (mp->m_qflags & XFS_UQUOTA_ACCT)
 496                seq_puts(m, ",uqnoenforce");
 497
 498        if (mp->m_qflags & XFS_PQUOTA_ACCT) {
 499                if (mp->m_qflags & XFS_PQUOTA_ENFD)
 500                        seq_puts(m, ",prjquota");
 501                else
 502                        seq_puts(m, ",pqnoenforce");
 503        }
 504        if (mp->m_qflags & XFS_GQUOTA_ACCT) {
 505                if (mp->m_qflags & XFS_GQUOTA_ENFD)
 506                        seq_puts(m, ",grpquota");
 507                else
 508                        seq_puts(m, ",gqnoenforce");
 509        }
 510
 511        if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
 512                seq_puts(m, ",noquota");
 513}
 514
 515static uint64_t
 516xfs_max_file_offset(
 517        unsigned int            blockshift)
 518{
 519        unsigned int            pagefactor = 1;
 520        unsigned int            bitshift = BITS_PER_LONG - 1;
 521
 522        /* Figure out maximum filesize, on Linux this can depend on
 523         * the filesystem blocksize (on 32 bit platforms).
 524         * __block_write_begin does this in an [unsigned] long long...
 525         *      page->index << (PAGE_SHIFT - bbits)
 526         * So, for page sized blocks (4K on 32 bit platforms),
 527         * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
 528         *      (((u64)PAGE_SIZE << (BITS_PER_LONG-1))-1)
 529         * but for smaller blocksizes it is less (bbits = log2 bsize).
 530         */
 531
 532#if BITS_PER_LONG == 32
 533        ASSERT(sizeof(sector_t) == 8);
 534        pagefactor = PAGE_SIZE;
 535        bitshift = BITS_PER_LONG;
 536#endif
 537
 538        return (((uint64_t)pagefactor) << bitshift) - 1;
 539}
 540
 541/*
 542 * Set parameters for inode allocation heuristics, taking into account
 543 * filesystem size and inode32/inode64 mount options; i.e. specifically
 544 * whether or not XFS_MOUNT_SMALL_INUMS is set.
 545 *
 546 * Inode allocation patterns are altered only if inode32 is requested
 547 * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large.
 548 * If altered, XFS_MOUNT_32BITINODES is set as well.
 549 *
 550 * An agcount independent of that in the mount structure is provided
 551 * because in the growfs case, mp->m_sb.sb_agcount is not yet updated
 552 * to the potentially higher ag count.
 553 *
 554 * Returns the maximum AG index which may contain inodes.
 555 */
 556xfs_agnumber_t
 557xfs_set_inode_alloc(
 558        struct xfs_mount *mp,
 559        xfs_agnumber_t  agcount)
 560{
 561        xfs_agnumber_t  index;
 562        xfs_agnumber_t  maxagi = 0;
 563        xfs_sb_t        *sbp = &mp->m_sb;
 564        xfs_agnumber_t  max_metadata;
 565        xfs_agino_t     agino;
 566        xfs_ino_t       ino;
 567
 568        /*
 569         * Calculate how much should be reserved for inodes to meet
 570         * the max inode percentage.  Used only for inode32.
 571         */
 572        if (M_IGEO(mp)->maxicount) {
 573                uint64_t        icount;
 574
 575                icount = sbp->sb_dblocks * sbp->sb_imax_pct;
 576                do_div(icount, 100);
 577                icount += sbp->sb_agblocks - 1;
 578                do_div(icount, sbp->sb_agblocks);
 579                max_metadata = icount;
 580        } else {
 581                max_metadata = agcount;
 582        }
 583
 584        /* Get the last possible inode in the filesystem */
 585        agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1);
 586        ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
 587
 588        /*
 589         * If user asked for no more than 32-bit inodes, and the fs is
 590         * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter
 591         * the allocator to accommodate the request.
 592         */
 593        if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
 594                mp->m_flags |= XFS_MOUNT_32BITINODES;
 595        else
 596                mp->m_flags &= ~XFS_MOUNT_32BITINODES;
 597
 598        for (index = 0; index < agcount; index++) {
 599                struct xfs_perag        *pag;
 600
 601                ino = XFS_AGINO_TO_INO(mp, index, agino);
 602
 603                pag = xfs_perag_get(mp, index);
 604
 605                if (mp->m_flags & XFS_MOUNT_32BITINODES) {
 606                        if (ino > XFS_MAXINUMBER_32) {
 607                                pag->pagi_inodeok = 0;
 608                                pag->pagf_metadata = 0;
 609                        } else {
 610                                pag->pagi_inodeok = 1;
 611                                maxagi++;
 612                                if (index < max_metadata)
 613                                        pag->pagf_metadata = 1;
 614                                else
 615                                        pag->pagf_metadata = 0;
 616                        }
 617                } else {
 618                        pag->pagi_inodeok = 1;
 619                        pag->pagf_metadata = 0;
 620                }
 621
 622                xfs_perag_put(pag);
 623        }
 624
 625        return (mp->m_flags & XFS_MOUNT_32BITINODES) ? maxagi : agcount;
 626}
 627
 628STATIC int
 629xfs_blkdev_get(
 630        xfs_mount_t             *mp,
 631        const char              *name,
 632        struct block_device     **bdevp)
 633{
 634        int                     error = 0;
 635
 636        *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
 637                                    mp);
 638        if (IS_ERR(*bdevp)) {
 639                error = PTR_ERR(*bdevp);
 640                xfs_warn(mp, "Invalid device [%s], error=%d", name, error);
 641        }
 642
 643        return error;
 644}
 645
 646STATIC void
 647xfs_blkdev_put(
 648        struct block_device     *bdev)
 649{
 650        if (bdev)
 651                blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 652}
 653
 654void
 655xfs_blkdev_issue_flush(
 656        xfs_buftarg_t           *buftarg)
 657{
 658        blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS, NULL);
 659}
 660
 661STATIC void
 662xfs_close_devices(
 663        struct xfs_mount        *mp)
 664{
 665        struct dax_device *dax_ddev = mp->m_ddev_targp->bt_daxdev;
 666
 667        if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
 668                struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
 669                struct dax_device *dax_logdev = mp->m_logdev_targp->bt_daxdev;
 670
 671                xfs_free_buftarg(mp->m_logdev_targp);
 672                xfs_blkdev_put(logdev);
 673                fs_put_dax(dax_logdev);
 674        }
 675        if (mp->m_rtdev_targp) {
 676                struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
 677                struct dax_device *dax_rtdev = mp->m_rtdev_targp->bt_daxdev;
 678
 679                xfs_free_buftarg(mp->m_rtdev_targp);
 680                xfs_blkdev_put(rtdev);
 681                fs_put_dax(dax_rtdev);
 682        }
 683        xfs_free_buftarg(mp->m_ddev_targp);
 684        fs_put_dax(dax_ddev);
 685}
 686
 687/*
 688 * The file system configurations are:
 689 *      (1) device (partition) with data and internal log
 690 *      (2) logical volume with data and log subvolumes.
 691 *      (3) logical volume with data, log, and realtime subvolumes.
 692 *
 693 * We only have to handle opening the log and realtime volumes here if
 694 * they are present.  The data subvolume has already been opened by
 695 * get_sb_bdev() and is stored in sb->s_bdev.
 696 */
 697STATIC int
 698xfs_open_devices(
 699        struct xfs_mount        *mp)
 700{
 701        struct block_device     *ddev = mp->m_super->s_bdev;
 702        struct dax_device       *dax_ddev = fs_dax_get_by_bdev(ddev);
 703        struct dax_device       *dax_logdev = NULL, *dax_rtdev = NULL;
 704        struct block_device     *logdev = NULL, *rtdev = NULL;
 705        int                     error;
 706
 707        /*
 708         * Open real time and log devices - order is important.
 709         */
 710        if (mp->m_logname) {
 711                error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
 712                if (error)
 713                        goto out;
 714                dax_logdev = fs_dax_get_by_bdev(logdev);
 715        }
 716
 717        if (mp->m_rtname) {
 718                error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
 719                if (error)
 720                        goto out_close_logdev;
 721
 722                if (rtdev == ddev || rtdev == logdev) {
 723                        xfs_warn(mp,
 724        "Cannot mount filesystem with identical rtdev and ddev/logdev.");
 725                        error = -EINVAL;
 726                        goto out_close_rtdev;
 727                }
 728                dax_rtdev = fs_dax_get_by_bdev(rtdev);
 729        }
 730
 731        /*
 732         * Setup xfs_mount buffer target pointers
 733         */
 734        error = -ENOMEM;
 735        mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, dax_ddev);
 736        if (!mp->m_ddev_targp)
 737                goto out_close_rtdev;
 738
 739        if (rtdev) {
 740                mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, dax_rtdev);
 741                if (!mp->m_rtdev_targp)
 742                        goto out_free_ddev_targ;
 743        }
 744
 745        if (logdev && logdev != ddev) {
 746                mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, dax_logdev);
 747                if (!mp->m_logdev_targp)
 748                        goto out_free_rtdev_targ;
 749        } else {
 750                mp->m_logdev_targp = mp->m_ddev_targp;
 751        }
 752
 753        return 0;
 754
 755 out_free_rtdev_targ:
 756        if (mp->m_rtdev_targp)
 757                xfs_free_buftarg(mp->m_rtdev_targp);
 758 out_free_ddev_targ:
 759        xfs_free_buftarg(mp->m_ddev_targp);
 760 out_close_rtdev:
 761        xfs_blkdev_put(rtdev);
 762        fs_put_dax(dax_rtdev);
 763 out_close_logdev:
 764        if (logdev && logdev != ddev) {
 765                xfs_blkdev_put(logdev);
 766                fs_put_dax(dax_logdev);
 767        }
 768 out:
 769        fs_put_dax(dax_ddev);
 770        return error;
 771}
 772
 773/*
 774 * Setup xfs_mount buffer target pointers based on superblock
 775 */
 776STATIC int
 777xfs_setup_devices(
 778        struct xfs_mount        *mp)
 779{
 780        int                     error;
 781
 782        error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
 783        if (error)
 784                return error;
 785
 786        if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
 787                unsigned int    log_sector_size = BBSIZE;
 788
 789                if (xfs_sb_version_hassector(&mp->m_sb))
 790                        log_sector_size = mp->m_sb.sb_logsectsize;
 791                error = xfs_setsize_buftarg(mp->m_logdev_targp,
 792                                            log_sector_size);
 793                if (error)
 794                        return error;
 795        }
 796        if (mp->m_rtdev_targp) {
 797                error = xfs_setsize_buftarg(mp->m_rtdev_targp,
 798                                            mp->m_sb.sb_sectsize);
 799                if (error)
 800                        return error;
 801        }
 802
 803        return 0;
 804}
 805
 806STATIC int
 807xfs_init_mount_workqueues(
 808        struct xfs_mount        *mp)
 809{
 810        mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s",
 811                        WQ_MEM_RECLAIM|WQ_FREEZABLE, 1, mp->m_fsname);
 812        if (!mp->m_buf_workqueue)
 813                goto out;
 814
 815        mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
 816                        WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
 817        if (!mp->m_unwritten_workqueue)
 818                goto out_destroy_buf;
 819
 820        mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s",
 821                        WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
 822        if (!mp->m_cil_workqueue)
 823                goto out_destroy_unwritten;
 824
 825        mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
 826                        WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
 827        if (!mp->m_reclaim_workqueue)
 828                goto out_destroy_cil;
 829
 830        mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
 831                        WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
 832        if (!mp->m_eofblocks_workqueue)
 833                goto out_destroy_reclaim;
 834
 835        mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0,
 836                                               mp->m_fsname);
 837        if (!mp->m_sync_workqueue)
 838                goto out_destroy_eofb;
 839
 840        return 0;
 841
 842out_destroy_eofb:
 843        destroy_workqueue(mp->m_eofblocks_workqueue);
 844out_destroy_reclaim:
 845        destroy_workqueue(mp->m_reclaim_workqueue);
 846out_destroy_cil:
 847        destroy_workqueue(mp->m_cil_workqueue);
 848out_destroy_unwritten:
 849        destroy_workqueue(mp->m_unwritten_workqueue);
 850out_destroy_buf:
 851        destroy_workqueue(mp->m_buf_workqueue);
 852out:
 853        return -ENOMEM;
 854}
 855
 856STATIC void
 857xfs_destroy_mount_workqueues(
 858        struct xfs_mount        *mp)
 859{
 860        destroy_workqueue(mp->m_sync_workqueue);
 861        destroy_workqueue(mp->m_eofblocks_workqueue);
 862        destroy_workqueue(mp->m_reclaim_workqueue);
 863        destroy_workqueue(mp->m_cil_workqueue);
 864        destroy_workqueue(mp->m_unwritten_workqueue);
 865        destroy_workqueue(mp->m_buf_workqueue);
 866}
 867
 868/*
 869 * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK
 870 * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting
 871 * for IO to complete so that we effectively throttle multiple callers to the
 872 * rate at which IO is completing.
 873 */
 874void
 875xfs_flush_inodes(
 876        struct xfs_mount        *mp)
 877{
 878        struct super_block      *sb = mp->m_super;
 879
 880        if (down_read_trylock(&sb->s_umount)) {
 881                sync_inodes_sb(sb);
 882                up_read(&sb->s_umount);
 883        }
 884}
 885
 886/* Catch misguided souls that try to use this interface on XFS */
 887STATIC struct inode *
 888xfs_fs_alloc_inode(
 889        struct super_block      *sb)
 890{
 891        BUG();
 892        return NULL;
 893}
 894
 895#ifdef DEBUG
 896static void
 897xfs_check_delalloc(
 898        struct xfs_inode        *ip,
 899        int                     whichfork)
 900{
 901        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
 902        struct xfs_bmbt_irec    got;
 903        struct xfs_iext_cursor  icur;
 904
 905        if (!ifp || !xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got))
 906                return;
 907        do {
 908                if (isnullstartblock(got.br_startblock)) {
 909                        xfs_warn(ip->i_mount,
 910        "ino %llx %s fork has delalloc extent at [0x%llx:0x%llx]",
 911                                ip->i_ino,
 912                                whichfork == XFS_DATA_FORK ? "data" : "cow",
 913                                got.br_startoff, got.br_blockcount);
 914                }
 915        } while (xfs_iext_next_extent(ifp, &icur, &got));
 916}
 917#else
 918#define xfs_check_delalloc(ip, whichfork)       do { } while (0)
 919#endif
 920
 921/*
 922 * Now that the generic code is guaranteed not to be accessing
 923 * the linux inode, we can inactivate and reclaim the inode.
 924 */
 925STATIC void
 926xfs_fs_destroy_inode(
 927        struct inode            *inode)
 928{
 929        struct xfs_inode        *ip = XFS_I(inode);
 930
 931        trace_xfs_destroy_inode(ip);
 932
 933        ASSERT(!rwsem_is_locked(&inode->i_rwsem));
 934        XFS_STATS_INC(ip->i_mount, vn_rele);
 935        XFS_STATS_INC(ip->i_mount, vn_remove);
 936
 937        xfs_inactive(ip);
 938
 939        if (!XFS_FORCED_SHUTDOWN(ip->i_mount) && ip->i_delayed_blks) {
 940                xfs_check_delalloc(ip, XFS_DATA_FORK);
 941                xfs_check_delalloc(ip, XFS_COW_FORK);
 942                ASSERT(0);
 943        }
 944
 945        XFS_STATS_INC(ip->i_mount, vn_reclaim);
 946
 947        /*
 948         * We should never get here with one of the reclaim flags already set.
 949         */
 950        ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
 951        ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
 952
 953        /*
 954         * We always use background reclaim here because even if the
 955         * inode is clean, it still may be under IO and hence we have
 956         * to take the flush lock. The background reclaim path handles
 957         * this more efficiently than we can here, so simply let background
 958         * reclaim tear down all inodes.
 959         */
 960        xfs_inode_set_reclaim_tag(ip);
 961}
 962
 963static void
 964xfs_fs_dirty_inode(
 965        struct inode                    *inode,
 966        int                             flag)
 967{
 968        struct xfs_inode                *ip = XFS_I(inode);
 969        struct xfs_mount                *mp = ip->i_mount;
 970        struct xfs_trans                *tp;
 971
 972        if (!(inode->i_sb->s_flags & SB_LAZYTIME))
 973                return;
 974        if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME))
 975                return;
 976
 977        if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
 978                return;
 979        xfs_ilock(ip, XFS_ILOCK_EXCL);
 980        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 981        xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
 982        xfs_trans_commit(tp);
 983}
 984
 985/*
 986 * Slab object creation initialisation for the XFS inode.
 987 * This covers only the idempotent fields in the XFS inode;
 988 * all other fields need to be initialised on allocation
 989 * from the slab. This avoids the need to repeatedly initialise
 990 * fields in the xfs inode that left in the initialise state
 991 * when freeing the inode.
 992 */
 993STATIC void
 994xfs_fs_inode_init_once(
 995        void                    *inode)
 996{
 997        struct xfs_inode        *ip = inode;
 998
 999        memset(ip, 0, sizeof(struct xfs_inode));
1000

1001        /* vfs inode */
1002        inode_init_once(VFS_I(ip));
1003
1004        /* xfs inode */
1005        atomic_set(&ip->i_pincount, 0);
1006        spin_lock_init(&ip->i_flags_lock);
1007
1008        mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
1009                     "xfsino", ip->i_ino);
1010        mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
1011                     "xfsino", ip->i_ino);
1012}
1013
1014/*
1015 * We do an unlocked check for XFS_IDONTCACHE here because we are already
1016 * serialised against cache hits here via the inode->i_lock and igrab() in
1017 * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be
1018 * racing with us, and it avoids needing to grab a spinlock here for every inode
1019 * we drop the final reference on.
1020 */
1021STATIC int
1022xfs_fs_drop_inode(
1023        struct inode            *inode)
1024{
1025        struct xfs_inode        *ip = XFS_I(inode);
1026
1027        /*
1028         * If this unlinked inode is in the middle of recovery, don't
1029         * drop the inode just yet; log recovery will take care of
1030         * that.  See the comment for this inode flag.
1031         */
1032        if (ip->i_flags & XFS_IRECOVERY) {
1033                ASSERT(ip->i_mount->m_log->l_flags & XLOG_RECOVERY_NEEDED);
1034                return 0;
1035        }
1036
1037        return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE);
1038}
1039
1040STATIC void
1041xfs_free_fsname(
1042        struct xfs_mount        *mp)
1043{
1044        kfree(mp->m_fsname);
1045        kfree(mp->m_rtname);
1046        kfree(mp->m_logname);
1047}
1048
1049STATIC int
1050xfs_fs_sync_fs(
1051        struct super_block      *sb,
1052        int                     wait)
1053{
1054        struct xfs_mount        *mp = XFS_M(sb);
1055
1056        /*
1057         * Doing anything during the async pass would be counterproductive.
1058         */
1059        if (!wait)
1060                return 0;
1061
1062        xfs_log_force(mp, XFS_LOG_SYNC);
1063        if (laptop_mode) {
1064                /*
1065                 * The disk must be active because we're syncing.
1066                 * We schedule log work now (now that the disk is
1067                 * active) instead of later (when it might not be).
1068                 */
1069                flush_delayed_work(&mp->m_log->l_work);
1070        }
1071
1072        return 0;
1073}
1074
1075STATIC int
1076xfs_fs_statfs(
1077        struct dentry           *dentry,
1078        struct kstatfs          *statp)
1079{
1080        struct xfs_mount        *mp = XFS_M(dentry->d_sb);
1081        xfs_sb_t                *sbp = &mp->m_sb;
1082        struct xfs_inode        *ip = XFS_I(d_inode(dentry));
1083        uint64_t                fakeinos, id;
1084        uint64_t                icount;
1085        uint64_t                ifree;
1086        uint64_t                fdblocks;
1087        xfs_extlen_t            lsize;
1088        int64_t                 ffree;
1089
1090        statp->f_type = XFS_SUPER_MAGIC;
1091        statp->f_namelen = MAXNAMELEN - 1;
1092
1093        id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
1094        statp->f_fsid.val[0] = (u32)id;
1095        statp->f_fsid.val[1] = (u32)(id >> 32);
1096
1097        icount = percpu_counter_sum(&mp->m_icount);
1098        ifree = percpu_counter_sum(&mp->m_ifree);
1099        fdblocks = percpu_counter_sum(&mp->m_fdblocks);
1100
1101        spin_lock(&mp->m_sb_lock);
1102        statp->f_bsize = sbp->sb_blocksize;
1103        lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
1104        statp->f_blocks = sbp->sb_dblocks - lsize;
1105        spin_unlock(&mp->m_sb_lock);
1106
1107        statp->f_bfree = fdblocks - mp->m_alloc_set_aside;
1108        statp->f_bavail = statp->f_bfree;
1109
1110        fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree);
1111        statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
1112        if (M_IGEO(mp)->maxicount)
1113                statp->f_files = min_t(typeof(statp->f_files),
1114                                        statp->f_files,
1115                                        M_IGEO(mp)->maxicount);
1116
1117        /* If sb_icount overshot maxicount, report actual allocation */
1118        statp->f_files = max_t(typeof(statp->f_files),
1119                                        statp->f_files,
1120                                        sbp->sb_icount);
1121
1122        /* make sure statp->f_ffree does not underflow */
1123        ffree = statp->f_files - (icount - ifree);
1124        statp->f_ffree = max_t(int64_t, ffree, 0);
1125
1126
1127        if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
1128            ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
1129                              (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
1130                xfs_qm_statvfs(ip, statp);
1131
1132        if (XFS_IS_REALTIME_MOUNT(mp) &&
1133            (ip->i_d.di_flags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) {
1134                statp->f_blocks = sbp->sb_rblocks;
1135                statp->f_bavail = statp->f_bfree =
1136                        sbp->sb_frextents * sbp->sb_rextsize;
1137        }
1138
1139        return 0;
1140}
1141
1142STATIC void
1143xfs_save_resvblks(struct xfs_mount *mp)
1144{
1145        uint64_t resblks = 0;
1146
1147        mp->m_resblks_save = mp->m_resblks;
1148        xfs_reserve_blocks(mp, &resblks, NULL);
1149}
1150
1151STATIC void
1152xfs_restore_resvblks(struct xfs_mount *mp)
1153{
1154        uint64_t resblks;
1155
1156        if (mp->m_resblks_save) {
1157                resblks = mp->m_resblks_save;
1158                mp->m_resblks_save = 0;
1159        } else
1160                resblks = xfs_default_resblks(mp);
1161
1162        xfs_reserve_blocks(mp, &resblks, NULL);
1163}
1164
1165/*
1166 * Trigger writeback of all the dirty metadata in the file system.
1167 *
1168 * This ensures that the metadata is written to their location on disk rather
1169 * than just existing in transactions in the log. This means after a quiesce
1170 * there is no log replay required to write the inodes to disk - this is the
1171 * primary difference between a sync and a quiesce.
1172 *
1173 * Note: xfs_log_quiesce() stops background log work - the callers must ensure
1174 * it is started again when appropriate.
1175 */
1176void
1177xfs_quiesce_attr(
1178        struct xfs_mount        *mp)
1179{
1180        int     error = 0;
1181
1182        /* wait for all modifications to complete */
1183        while (atomic_read(&mp->m_active_trans) > 0)
1184                delay(100);
1185
1186        /* force the log to unpin objects from the now complete transactions */
1187        xfs_log_force(mp, XFS_LOG_SYNC);
1188
1189        /* reclaim inodes to do any IO before the freeze completes */
1190        xfs_reclaim_inodes(mp, 0);
1191        xfs_reclaim_inodes(mp, SYNC_WAIT);
1192
1193        /* Push the superblock and write an unmount record */
1194        error = xfs_log_sbcount(mp);
1195        if (error)
1196                xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
1197                                "Frozen image may not be consistent.");
1198        /*
1199         * Just warn here till VFS can correctly support
1200         * read-only remount without racing.
1201         */
1202        WARN_ON(atomic_read(&mp->m_active_trans) != 0);
1203
1204        xfs_log_quiesce(mp);
1205}
1206
1207STATIC int
1208xfs_test_remount_options(
1209        struct super_block      *sb,
1210        char                    *options)
1211{
1212        int                     error = 0;
1213        struct xfs_mount        *tmp_mp;
1214
1215        tmp_mp = kmem_zalloc(sizeof(*tmp_mp), KM_MAYFAIL);
1216        if (!tmp_mp)
1217                return -ENOMEM;
1218
1219        tmp_mp->m_super = sb;
1220        error = xfs_parseargs(tmp_mp, options);
1221        xfs_free_fsname(tmp_mp);
1222        kmem_free(tmp_mp);
1223
1224        return error;
1225}
1226
1227STATIC int
1228xfs_fs_remount(
1229        struct super_block      *sb,
1230        int                     *flags,
1231        char                    *options)
1232{
1233        struct xfs_mount        *mp = XFS_M(sb);
1234        xfs_sb_t                *sbp = &mp->m_sb;
1235        substring_t             args[MAX_OPT_ARGS];
1236        char                    *p;
1237        int                     error;
1238
1239        /* First, check for complete junk; i.e. invalid options */
1240        error = xfs_test_remount_options(sb, options);
1241        if (error)
1242                return error;
1243
1244        sync_filesystem(sb);
1245        while ((p = strsep(&options, ",")) != NULL) {
1246                int token;
1247
1248                if (!*p)
1249                        continue;
1250
1251                token = match_token(p, tokens, args);
1252                switch (token) {
1253                case Opt_inode64:
1254                        mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
1255                        mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
1256                        break;
1257                case Opt_inode32:
1258                        mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
1259                        mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
1260                        break;
1261                default:
1262                        /*
1263                         * Logically we would return an error here to prevent
1264                         * users from believing they might have changed
1265                         * mount options using remount which can't be changed.
1266                         *
1267                         * But unfortunately mount(8) adds all options from
1268                         * mtab and fstab to the mount arguments in some cases
1269                         * so we can't blindly reject options, but have to
1270                         * check for each specified option if it actually
1271                         * differs from the currently set option and only
1272                         * reject it if that's the case.
1273                         *
1274                         * Until that is implemented we return success for
1275                         * every remount request, and silently ignore all
1276                         * options that we can't actually change.
1277                         */
1278#if 0
1279                        xfs_info(mp,
1280                "mount option \"%s\" not supported for remount", p);
1281                        return -EINVAL;
1282#else
1283                        break;
1284#endif
1285                }
1286        }
1287
1288        /* ro -> rw */
1289        if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & SB_RDONLY)) {
1290                if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
1291                        xfs_warn(mp,
1292                "ro->rw transition prohibited on norecovery mount");
1293                        return -EINVAL;
1294                }
1295
1296                if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
1297                    xfs_sb_has_ro_compat_feature(sbp,
1298                                        XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
1299                        xfs_warn(mp,
1300"ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
1301                                (sbp->sb_features_ro_compat &
1302                                        XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
1303                        return -EINVAL;
1304                }
1305
1306                mp->m_flags &= ~XFS_MOUNT_RDONLY;
1307
1308                /*
1309                 * If this is the first remount to writeable state we
1310                 * might have some superblock changes to update.
1311                 */
1312                if (mp->m_update_sb) {
1313                        error = xfs_sync_sb(mp, false);
1314                        if (error) {
1315                                xfs_warn(mp, "failed to write sb changes");
1316                                return error;
1317                        }
1318                        mp->m_update_sb = false;
1319                }
1320
1321                /*
1322                 * Fill out the reserve pool if it is empty. Use the stashed
1323                 * value if it is non-zero, otherwise go with the default.
1324                 */
1325                xfs_restore_resvblks(mp);
1326                xfs_log_work_queue(mp);
1327
1328                /* Recover any CoW blocks that never got remapped. */
1329                error = xfs_reflink_recover_cow(mp);
1330                if (error) {
1331                        xfs_err(mp,
1332        "Error %d recovering leftover CoW allocations.", error);
1333                        xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1334                        return error;
1335                }
1336                xfs_start_block_reaping(mp);
1337
1338                /* Create the per-AG metadata reservation pool .*/
1339                error = xfs_fs_reserve_ag_blocks(mp);
1340                if (error && error != -ENOSPC)
1341                        return error;
1342        }
1343
1344        /* rw -> ro */
1345        if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) {
1346                /*
1347                 * Cancel background eofb scanning so it cannot race with the
1348                 * final log force+buftarg wait and deadlock the remount.
1349                 */
1350                xfs_stop_block_reaping(mp);
1351
1352                /* Get rid of any leftover CoW reservations... */
1353                error = xfs_icache_free_cowblocks(mp, NULL);
1354                if (error) {
1355                        xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1356                        return error;
1357                }
1358
1359                /* Free the per-AG metadata reservation pool. */
1360                error = xfs_fs_unreserve_ag_blocks(mp);
1361                if (error) {
1362                        xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1363                        return error;
1364                }
1365
1366                /*
1367                 * Before we sync the metadata, we need to free up the reserve
1368                 * block pool so that the used block count in the superblock on
1369                 * disk is correct at the end of the remount. Stash the current
1370                 * reserve pool size so that if we get remounted rw, we can
1371                 * return it to the same size.
1372                 */
1373                xfs_save_resvblks(mp);
1374
1375                xfs_quiesce_attr(mp);
1376                mp->m_flags |= XFS_MOUNT_RDONLY;
1377        }
1378
1379        return 0;
1380}
1381
1382/*
1383 * Second stage of a freeze. The data is already frozen so we only
1384 * need to take care of the metadata. Once that's done sync the superblock
1385 * to the log to dirty it in case of a crash while frozen. This ensures that we
1386 * will recover the unlinked inode lists on the next mount.
1387 */
1388STATIC int
1389xfs_fs_freeze(
1390        struct super_block      *sb)
1391{
1392        struct xfs_mount        *mp = XFS_M(sb);
1393
1394        xfs_stop_block_reaping(mp);
1395        xfs_save_resvblks(mp);
1396        xfs_quiesce_attr(mp);
1397        return xfs_sync_sb(mp, true);
1398}
1399
1400STATIC int
1401xfs_fs_unfreeze(
1402        struct super_block      *sb)
1403{
1404        struct xfs_mount        *mp = XFS_M(sb);
1405
1406        xfs_restore_resvblks(mp);
1407        xfs_log_work_queue(mp);
1408        xfs_start_block_reaping(mp);
1409        return 0;
1410}
1411
1412STATIC int
1413xfs_fs_show_options(
1414        struct seq_file         *m,
1415        struct dentry           *root)
1416{
1417        xfs_showargs(XFS_M(root->d_sb), m);
1418        return 0;
1419}
1420
1421/*
1422 * This function fills in xfs_mount_t fields based on mount args.
1423 * Note: the superblock _has_ now been read in.
1424 */
1425STATIC int
1426xfs_finish_flags(
1427        struct xfs_mount        *mp)
1428{
1429        int                     ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
1430
1431        /* Fail a mount where the logbuf is smaller than the log stripe */
1432        if (xfs_sb_version_haslogv2(&mp->m_sb)) {
1433                if (mp->m_logbsize <= 0 &&
1434                    mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
1435                        mp->m_logbsize = mp->m_sb.sb_logsunit;
1436                } else if (mp->m_logbsize > 0 &&
1437                           mp->m_logbsize < mp->m_sb.sb_logsunit) {
1438                        xfs_warn(mp,
1439                "logbuf size must be greater than or equal to log stripe size");
1440                        return -EINVAL;
1441                }
1442        } else {
1443                /* Fail a mount if the logbuf is larger than 32K */
1444                if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
1445                        xfs_warn(mp,
1446                "logbuf size for version 1 logs must be 16K or 32K");
1447                        return -EINVAL;
1448                }
1449        }
1450
1451        /*
1452         * V5 filesystems always use attr2 format for attributes.
1453         */
1454        if (xfs_sb_version_hascrc(&mp->m_sb) &&
1455            (mp->m_flags & XFS_MOUNT_NOATTR2)) {
1456                xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
1457                             "attr2 is always enabled for V5 filesystems.");
1458                return -EINVAL;
1459        }
1460
1461        /*
1462         * mkfs'ed attr2 will turn on attr2 mount unless explicitly
1463         * told by noattr2 to turn it off
1464         */
1465        if (xfs_sb_version_hasattr2(&mp->m_sb) &&
1466            !(mp->m_flags & XFS_MOUNT_NOATTR2))
1467                mp->m_flags |= XFS_MOUNT_ATTR2;
1468
1469        /*
1470         * prohibit r/w mounts of read-only filesystems
1471         */
1472        if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
1473                xfs_warn(mp,
1474                        "cannot mount a read-only filesystem as read-write");
1475                return -EROFS;
1476        }
1477
1478        if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
1479            (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE)) &&
1480            !xfs_sb_version_has_pquotino(&mp->m_sb)) {
1481                xfs_warn(mp,
1482                  "Super block does not support project and group quota together");
1483                return -EINVAL;
1484        }
1485
1486        return 0;
1487}
1488
1489static int
1490xfs_init_percpu_counters(
1491        struct xfs_mount        *mp)
1492{
1493        int             error;
1494
1495        error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
1496        if (error)
1497                return -ENOMEM;
1498
1499        error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL);
1500        if (error)
1501                goto free_icount;
1502
1503        error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
1504        if (error)
1505                goto free_ifree;
1506
1507        error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL);
1508        if (error)
1509                goto free_fdblocks;
1510
1511        return 0;
1512
1513free_fdblocks:
1514        percpu_counter_destroy(&mp->m_fdblocks);
1515free_ifree:
1516        percpu_counter_destroy(&mp->m_ifree);
1517free_icount:
1518        percpu_counter_destroy(&mp->m_icount);
1519        return -ENOMEM;
1520}
1521
1522void
1523xfs_reinit_percpu_counters(
1524        struct xfs_mount        *mp)
1525{
1526        percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
1527        percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
1528        percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
1529}
1530
1531static void
1532xfs_destroy_percpu_counters(
1533        struct xfs_mount        *mp)
1534{
1535        percpu_counter_destroy(&mp->m_icount);
1536        percpu_counter_destroy(&mp->m_ifree);
1537        percpu_counter_destroy(&mp->m_fdblocks);
1538        ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
1539               percpu_counter_sum(&mp->m_delalloc_blks) == 0);
1540        percpu_counter_destroy(&mp->m_delalloc_blks);
1541}
1542
1543static struct xfs_mount *
1544xfs_mount_alloc(
1545        struct super_block      *sb)
1546{
1547        struct xfs_mount        *mp;
1548
1549        mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
1550        if (!mp)
1551                return NULL;
1552
1553        mp->m_super = sb;
1554        spin_lock_init(&mp->m_sb_lock);
1555        spin_lock_init(&mp->m_agirotor_lock);
1556        INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
1557        spin_lock_init(&mp->m_perag_lock);
1558        mutex_init(&mp->m_growlock);
1559        atomic_set(&mp->m_active_trans, 0);
1560        INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
1561        INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
1562        INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker);
1563        mp->m_kobj.kobject.kset = xfs_kset;
1564        /*
1565         * We don't create the finobt per-ag space reservation until after log
1566         * recovery, so we must set this to true so that an ifree transaction
1567         * started during log recovery will not depend on space reservations
1568         * for finobt expansion.
1569         */
1570        mp->m_finobt_nores = true;
1571        return mp;
1572}
1573
1574
1575STATIC int
1576xfs_fs_fill_super(
1577        struct super_block      *sb,
1578        void                    *data,
1579        int                     silent)
1580{
1581        struct inode            *root;
1582        struct xfs_mount        *mp = NULL;
1583        int                     flags = 0, error = -ENOMEM;
1584
1585        /*
1586         * allocate mp and do all low-level struct initializations before we
1587         * attach it to the super
1588         */
1589        mp = xfs_mount_alloc(sb);
1590        if (!mp)
1591                goto out;
1592        sb->s_fs_info = mp;
1593
1594        error = xfs_parseargs(mp, (char *)data);
1595        if (error)
1596                goto out_free_fsname;
1597
1598        sb_min_blocksize(sb, BBSIZE);
1599        sb->s_xattr = xfs_xattr_handlers;
1600        sb->s_export_op = &xfs_export_operations;
1601#ifdef CONFIG_XFS_QUOTA
1602        sb->s_qcop = &xfs_quotactl_operations;
1603        sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
1604#endif
1605        sb->s_op = &xfs_super_operations;
1606
1607        /*
1608         * Delay mount work if the debug hook is set. This is debug
1609         * instrumention to coordinate simulation of xfs mount failures with
1610         * VFS superblock operations
1611         */
1612        if (xfs_globals.mount_delay) {
1613                xfs_notice(mp, "Delaying mount for %d seconds.",
1614                        xfs_globals.mount_delay);
1615                msleep(xfs_globals.mount_delay * 1000);
1616        }
1617
1618        if (silent)
1619                flags |= XFS_MFSI_QUIET;
1620
1621        error = xfs_open_devices(mp);
1622        if (error)
1623                goto out_free_fsname;
1624
1625        error = xfs_init_mount_workqueues(mp);
1626        if (error)
1627                goto out_close_devices;
1628
1629        error = xfs_init_percpu_counters(mp);
1630        if (error)
1631                goto out_destroy_workqueues;
1632
1633        /* Allocate stats memory before we do operations that might use it */
1634        mp->m_stats.xs_stats = alloc_percpu(struct xfsstats);
1635        if (!mp->m_stats.xs_stats) {
1636                error = -ENOMEM;
1637                goto out_destroy_counters;
1638        }
1639
1640        error = xfs_readsb(mp, flags);
1641        if (error)
1642                goto out_free_stats;
1643
1644        error = xfs_finish_flags(mp);
1645        if (error)
1646                goto out_free_sb;
1647
1648        error = xfs_setup_devices(mp);
1649        if (error)
1650                goto out_free_sb;
1651
1652        error = xfs_filestream_mount(mp);
1653        if (error)
1654                goto out_free_sb;
1655
1656        /*
1657         * we must configure the block size in the superblock before we run the
1658         * full mount process as the mount process can lookup and cache inodes.
1659         */
1660        sb->s_magic = XFS_SUPER_MAGIC;
1661        sb->s_blocksize = mp->m_sb.sb_blocksize;
1662        sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
1663        sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
1664        sb->s_max_links = XFS_MAXLINK;
1665        sb->s_time_gran = 1;
1666        sb->s_iflags |= SB_I_CGROUPWB;
1667
1668        set_posix_acl_flag(sb);
1669
1670        /* version 5 superblocks support inode version counters. */
1671        if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
1672                sb->s_flags |= SB_I_VERSION;
1673
1674        if (mp->m_flags & XFS_MOUNT_DAX) {
1675                bool rtdev_is_dax = false, datadev_is_dax;
1676
1677                xfs_warn(mp,
1678                "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
1679
1680                datadev_is_dax = bdev_dax_supported(mp->m_ddev_targp->bt_bdev,
1681                        sb->s_blocksize);
1682                if (mp->m_rtdev_targp)
1683                        rtdev_is_dax = bdev_dax_supported(
1684                                mp->m_rtdev_targp->bt_bdev, sb->s_blocksize);
1685                if (!rtdev_is_dax && !datadev_is_dax) {
1686                        xfs_alert(mp,
1687                        "DAX unsupported by block device. Turning off DAX.");
1688                        mp->m_flags &= ~XFS_MOUNT_DAX;
1689                }
1690                if (xfs_sb_version_hasreflink(&mp->m_sb)) {
1691                        xfs_alert(mp,
1692                "DAX and reflink cannot be used together!");
1693                        error = -EINVAL;
1694                        goto out_filestream_unmount;
1695                }
1696        }
1697
1698        if (mp->m_flags & XFS_MOUNT_DISCARD) {
1699                struct request_queue *q = bdev_get_queue(sb->s_bdev);
1700
1701                if (!blk_queue_discard(q)) {
1702                        xfs_warn(mp, "mounting with \"discard\" option, but "
1703                                        "the device does not support discard");
1704                        mp->m_flags &= ~XFS_MOUNT_DISCARD;
1705                }
1706        }
1707
1708        if (xfs_sb_version_hasreflink(&mp->m_sb)) {
1709                if (mp->m_sb.sb_rblocks) {
1710                        xfs_alert(mp,
1711        "reflink not compatible with realtime device!");
1712                        error = -EINVAL;
1713                        goto out_filestream_unmount;
1714                }
1715
1716                if (xfs_globals.always_cow) {
1717                        xfs_info(mp, "using DEBUG-only always_cow mode.");
1718                        mp->m_always_cow = true;
1719                }
1720        }
1721
1722        if (xfs_sb_version_hasrmapbt(&mp->m_sb) && mp->m_sb.sb_rblocks) {
1723                xfs_alert(mp,
1724        "reverse mapping btree not compatible with realtime device!");
1725                error = -EINVAL;
1726                goto out_filestream_unmount;
1727        }
1728
1729        error = xfs_mountfs(mp);
1730        if (error)
1731                goto out_filestream_unmount;
1732
1733        root = igrab(VFS_I(mp->m_rootip));
1734        if (!root) {
1735                error = -ENOENT;
1736                goto out_unmount;
1737        }
1738        sb->s_root = d_make_root(root);
1739        if (!sb->s_root) {
1740                error = -ENOMEM;
1741                goto out_unmount;
1742        }
1743
1744        return 0;
1745
1746 out_filestream_unmount:
1747        xfs_filestream_unmount(mp);
1748 out_free_sb:
1749        xfs_freesb(mp);
1750 out_free_stats:
1751        free_percpu(mp->m_stats.xs_stats);
1752 out_destroy_counters:
1753        xfs_destroy_percpu_counters(mp);
1754 out_destroy_workqueues:
1755        xfs_destroy_mount_workqueues(mp);
1756 out_close_devices:
1757        xfs_close_devices(mp);
1758 out_free_fsname:
1759        sb->s_fs_info = NULL;
1760        xfs_free_fsname(mp);
1761        kfree(mp);
1762 out:
1763        return error;
1764
1765 out_unmount:
1766        xfs_filestream_unmount(mp);
1767        xfs_unmountfs(mp);
1768        goto out_free_sb;
1769}
1770
1771STATIC void
1772xfs_fs_put_super(
1773        struct super_block      *sb)
1774{
1775        struct xfs_mount        *mp = XFS_M(sb);
1776
1777        /* if ->fill_super failed, we have no mount to tear down */
1778        if (!sb->s_fs_info)
1779                return;
1780
1781        xfs_notice(mp, "Unmounting Filesystem");
1782        xfs_filestream_unmount(mp);
1783        xfs_unmountfs(mp);
1784
1785        xfs_freesb(mp);
1786        free_percpu(mp->m_stats.xs_stats);
1787        xfs_destroy_percpu_counters(mp);
1788        xfs_destroy_mount_workqueues(mp);
1789        xfs_close_devices(mp);
1790
1791        sb->s_fs_info = NULL;
1792        xfs_free_fsname(mp);
1793        kfree(mp);
1794}
1795
1796STATIC struct dentry *
1797xfs_fs_mount(
1798        struct file_system_type *fs_type,
1799        int                     flags,
1800        const char              *dev_name,
1801        void                    *data)
1802{
1803        return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
1804}
1805
1806static long
1807xfs_fs_nr_cached_objects(
1808        struct super_block      *sb,
1809        struct shrink_control   *sc)
1810{
1811        /* Paranoia: catch incorrect calls during mount setup or teardown */
1812        if (WARN_ON_ONCE(!sb->s_fs_info))
1813                return 0;
1814        return xfs_reclaim_inodes_count(XFS_M(sb));
1815}
1816
1817static long
1818xfs_fs_free_cached_objects(
1819        struct super_block      *sb,
1820        struct shrink_control   *sc)
1821{
1822        return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan);
1823}
1824
1825static const struct super_operations xfs_super_operations = {
1826        .alloc_inode            = xfs_fs_alloc_inode,
1827        .destroy_inode          = xfs_fs_destroy_inode,
1828        .dirty_inode            = xfs_fs_dirty_inode,
1829        .drop_inode             = xfs_fs_drop_inode,
1830        .put_super              = xfs_fs_put_super,
1831        .sync_fs                = xfs_fs_sync_fs,
1832        .freeze_fs              = xfs_fs_freeze,
1833        .unfreeze_fs            = xfs_fs_unfreeze,
1834        .statfs                 = xfs_fs_statfs,
1835        .remount_fs             = xfs_fs_remount,
1836        .show_options           = xfs_fs_show_options,
1837        .nr_cached_objects      = xfs_fs_nr_cached_objects,
1838        .free_cached_objects    = xfs_fs_free_cached_objects,
1839};
1840
1841static struct file_system_type xfs_fs_type = {
1842        .owner                  = THIS_MODULE,
1843        .name                   = "xfs",
1844        .mount                  = xfs_fs_mount,
1845        .kill_sb                = kill_block_super,
1846        .fs_flags               = FS_REQUIRES_DEV,
1847};
1848MODULE_ALIAS_FS("xfs");
1849
1850STATIC int __init
1851xfs_init_zones(void)
1852{
1853        if (bioset_init(&xfs_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
1854                        offsetof(struct xfs_ioend, io_inline_bio),
1855                        BIOSET_NEED_BVECS))
1856                goto out;
1857
1858        xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
1859                                                "xfs_log_ticket");
1860        if (!xfs_log_ticket_zone)
1861                goto out_free_ioend_bioset;
1862
1863        xfs_bmap_free_item_zone = kmem_zone_init(
1864                        sizeof(struct xfs_extent_free_item),
1865                        "xfs_bmap_free_item");
1866        if (!xfs_bmap_free_item_zone)
1867                goto out_destroy_log_ticket_zone;
1868
1869        xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
1870                                                "xfs_btree_cur");
1871        if (!xfs_btree_cur_zone)
1872                goto out_destroy_bmap_free_item_zone;
1873
1874        xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
1875                                                "xfs_da_state");
1876        if (!xfs_da_state_zone)
1877                goto out_destroy_btree_cur_zone;
1878
1879        xfs_ifork_zone = kmem_zone_init(sizeof(struct xfs_ifork), "xfs_ifork");
1880        if (!xfs_ifork_zone)
1881                goto out_destroy_da_state_zone;
1882
1883        xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
1884        if (!xfs_trans_zone)
1885                goto out_destroy_ifork_zone;
1886
1887
1888        /*
1889         * The size of the zone allocated buf log item is the maximum
1890         * size possible under XFS.  This wastes a little bit of memory,
1891         * but it is much faster.
1892         */
1893        xfs_buf_item_zone = kmem_zone_init(sizeof(struct xfs_buf_log_item),
1894                                           "xfs_buf_item");
1895        if (!xfs_buf_item_zone)
1896                goto out_destroy_trans_zone;
1897
1898        xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
1899                        ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
1900                                 sizeof(xfs_extent_t))), "xfs_efd_item");
1901        if (!xfs_efd_zone)
1902                goto out_destroy_buf_item_zone;
1903
1904        xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
1905                        ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
1906                                sizeof(xfs_extent_t))), "xfs_efi_item");
1907        if (!xfs_efi_zone)
1908                goto out_destroy_efd_zone;
1909
1910        xfs_inode_zone =
1911                kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
1912                        KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD |
1913                        KM_ZONE_ACCOUNT, xfs_fs_inode_init_once);
1914        if (!xfs_inode_zone)
1915                goto out_destroy_efi_zone;
1916
1917        xfs_ili_zone =
1918                kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
1919                                        KM_ZONE_SPREAD, NULL);
1920        if (!xfs_ili_zone)
1921                goto out_destroy_inode_zone;
1922        xfs_icreate_zone = kmem_zone_init(sizeof(struct xfs_icreate_item),
1923                                        "xfs_icr");
1924        if (!xfs_icreate_zone)
1925                goto out_destroy_ili_zone;
1926
1927        xfs_rud_zone = kmem_zone_init(sizeof(struct xfs_rud_log_item),
1928                        "xfs_rud_item");
1929        if (!xfs_rud_zone)
1930                goto out_destroy_icreate_zone;
1931
1932        xfs_rui_zone = kmem_zone_init(
1933                        xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS),
1934                        "xfs_rui_item");
1935        if (!xfs_rui_zone)
1936                goto out_destroy_rud_zone;
1937
1938        xfs_cud_zone = kmem_zone_init(sizeof(struct xfs_cud_log_item),
1939                        "xfs_cud_item");
1940        if (!xfs_cud_zone)
1941                goto out_destroy_rui_zone;
1942
1943        xfs_cui_zone = kmem_zone_init(
1944                        xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS),
1945                        "xfs_cui_item");
1946        if (!xfs_cui_zone)
1947                goto out_destroy_cud_zone;
1948
1949        xfs_bud_zone = kmem_zone_init(sizeof(struct xfs_bud_log_item),
1950                        "xfs_bud_item");
1951        if (!xfs_bud_zone)
1952                goto out_destroy_cui_zone;
1953
1954        xfs_bui_zone = kmem_zone_init(
1955                        xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS),
1956                        "xfs_bui_item");
1957        if (!xfs_bui_zone)
1958                goto out_destroy_bud_zone;
1959
1960        return 0;
1961
1962 out_destroy_bud_zone:
1963        kmem_zone_destroy(xfs_bud_zone);
1964 out_destroy_cui_zone:
1965        kmem_zone_destroy(xfs_cui_zone);
1966 out_destroy_cud_zone:
1967        kmem_zone_destroy(xfs_cud_zone);
1968 out_destroy_rui_zone:
1969        kmem_zone_destroy(xfs_rui_zone);
1970 out_destroy_rud_zone:
1971        kmem_zone_destroy(xfs_rud_zone);
1972 out_destroy_icreate_zone:
1973        kmem_zone_destroy(xfs_icreate_zone);
1974 out_destroy_ili_zone:
1975        kmem_zone_destroy(xfs_ili_zone);
1976 out_destroy_inode_zone:
1977        kmem_zone_destroy(xfs_inode_zone);
1978 out_destroy_efi_zone:
1979        kmem_zone_destroy(xfs_efi_zone);
1980 out_destroy_efd_zone:
1981        kmem_zone_destroy(xfs_efd_zone);
1982 out_destroy_buf_item_zone:
1983        kmem_zone_destroy(xfs_buf_item_zone);
1984 out_destroy_trans_zone:
1985        kmem_zone_destroy(xfs_trans_zone);
1986 out_destroy_ifork_zone:
1987        kmem_zone_destroy(xfs_ifork_zone);
1988 out_destroy_da_state_zone:
1989        kmem_zone_destroy(xfs_da_state_zone);
1990 out_destroy_btree_cur_zone:
1991        kmem_zone_destroy(xfs_btree_cur_zone);
1992 out_destroy_bmap_free_item_zone:
1993        kmem_zone_destroy(xfs_bmap_free_item_zone);
1994 out_destroy_log_ticket_zone:
1995        kmem_zone_destroy(xfs_log_ticket_zone);
1996 out_free_ioend_bioset:
1997        bioset_exit(&xfs_ioend_bioset);
1998 out:
1999        return -ENOMEM;
2000}

2001
2002STATIC void
2003xfs_destroy_zones(void)
2004{
2005        /*
2006         * Make sure all delayed rcu free are flushed before we
2007         * destroy caches.
2008         */
2009        rcu_barrier();
2010        kmem_zone_destroy(xfs_bui_zone);
2011        kmem_zone_destroy(xfs_bud_zone);
2012        kmem_zone_destroy(xfs_cui_zone);
2013        kmem_zone_destroy(xfs_cud_zone);
2014        kmem_zone_destroy(xfs_rui_zone);
2015        kmem_zone_destroy(xfs_rud_zone);
2016        kmem_zone_destroy(xfs_icreate_zone);
2017        kmem_zone_destroy(xfs_ili_zone);
2018        kmem_zone_destroy(xfs_inode_zone);
2019        kmem_zone_destroy(xfs_efi_zone);
2020        kmem_zone_destroy(xfs_efd_zone);
2021        kmem_zone_destroy(xfs_buf_item_zone);
2022        kmem_zone_destroy(xfs_trans_zone);
2023        kmem_zone_destroy(xfs_ifork_zone);
2024        kmem_zone_destroy(xfs_da_state_zone);
2025        kmem_zone_destroy(xfs_btree_cur_zone);
2026        kmem_zone_destroy(xfs_bmap_free_item_zone);
2027        kmem_zone_destroy(xfs_log_ticket_zone);
2028        bioset_exit(&xfs_ioend_bioset);
2029}
2030
2031STATIC int __init
2032xfs_init_workqueues(void)
2033{
2034        /*
2035         * The allocation workqueue can be used in memory reclaim situations
2036         * (writepage path), and parallelism is only limited by the number of
2037         * AGs in all the filesystems mounted. Hence use the default large
2038         * max_active value for this workqueue.
2039         */
2040        xfs_alloc_wq = alloc_workqueue("xfsalloc",
2041                        WQ_MEM_RECLAIM|WQ_FREEZABLE, 0);
2042        if (!xfs_alloc_wq)
2043                return -ENOMEM;
2044
2045        xfs_discard_wq = alloc_workqueue("xfsdiscard", WQ_UNBOUND, 0);
2046        if (!xfs_discard_wq)
2047                goto out_free_alloc_wq;
2048
2049        return 0;
2050out_free_alloc_wq:
2051        destroy_workqueue(xfs_alloc_wq);
2052        return -ENOMEM;
2053}
2054
2055STATIC void
2056xfs_destroy_workqueues(void)
2057{
2058        destroy_workqueue(xfs_discard_wq);
2059        destroy_workqueue(xfs_alloc_wq);
2060}
2061
2062STATIC int __init
2063init_xfs_fs(void)
2064{
2065        int                     error;
2066
2067        xfs_check_ondisk_structs();
2068
2069        printk(KERN_INFO XFS_VERSION_STRING " with "
2070                         XFS_BUILD_OPTIONS " enabled\n");
2071
2072        xfs_dir_startup();
2073
2074        error = xfs_init_zones();
2075        if (error)
2076                goto out;
2077
2078        error = xfs_init_workqueues();
2079        if (error)
2080                goto out_destroy_zones;
2081
2082        error = xfs_mru_cache_init();
2083        if (error)
2084                goto out_destroy_wq;
2085
2086        error = xfs_buf_init();
2087        if (error)
2088                goto out_mru_cache_uninit;
2089
2090        error = xfs_init_procfs();
2091        if (error)
2092                goto out_buf_terminate;
2093
2094        error = xfs_sysctl_register();
2095        if (error)
2096                goto out_cleanup_procfs;
2097
2098        xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj);
2099        if (!xfs_kset) {
2100                error = -ENOMEM;
2101                goto out_sysctl_unregister;
2102        }
2103
2104        xfsstats.xs_kobj.kobject.kset = xfs_kset;
2105
2106        xfsstats.xs_stats = alloc_percpu(struct xfsstats);
2107        if (!xfsstats.xs_stats) {
2108                error = -ENOMEM;
2109                goto out_kset_unregister;
2110        }
2111
2112        error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL,
2113                               "stats");
2114        if (error)
2115                goto out_free_stats;
2116
2117#ifdef DEBUG
2118        xfs_dbg_kobj.kobject.kset = xfs_kset;
2119        error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug");
2120        if (error)
2121                goto out_remove_stats_kobj;
2122#endif
2123
2124        error = xfs_qm_init();
2125        if (error)
2126                goto out_remove_dbg_kobj;
2127
2128        error = register_filesystem(&xfs_fs_type);
2129        if (error)
2130                goto out_qm_exit;
2131        return 0;
2132
2133 out_qm_exit:
2134        xfs_qm_exit();
2135 out_remove_dbg_kobj:
2136#ifdef DEBUG
2137        xfs_sysfs_del(&xfs_dbg_kobj);
2138 out_remove_stats_kobj:
2139#endif
2140        xfs_sysfs_del(&xfsstats.xs_kobj);
2141 out_free_stats:
2142        free_percpu(xfsstats.xs_stats);
2143 out_kset_unregister:
2144        kset_unregister(xfs_kset);
2145 out_sysctl_unregister:
2146        xfs_sysctl_unregister();
2147 out_cleanup_procfs:
2148        xfs_cleanup_procfs();
2149 out_buf_terminate:
2150        xfs_buf_terminate();
2151 out_mru_cache_uninit:
2152        xfs_mru_cache_uninit();
2153 out_destroy_wq:
2154        xfs_destroy_workqueues();
2155 out_destroy_zones:
2156        xfs_destroy_zones();
2157 out:
2158        return error;
2159}
2160
2161STATIC void __exit
2162exit_xfs_fs(void)
2163{
2164        xfs_qm_exit();
2165        unregister_filesystem(&xfs_fs_type);
2166#ifdef DEBUG
2167        xfs_sysfs_del(&xfs_dbg_kobj);
2168#endif
2169        xfs_sysfs_del(&xfsstats.xs_kobj);
2170        free_percpu(xfsstats.xs_stats);
2171        kset_unregister(xfs_kset);
2172        xfs_sysctl_unregister();
2173        xfs_cleanup_procfs();
2174        xfs_buf_terminate();
2175        xfs_mru_cache_uninit();
2176        xfs_destroy_workqueues();
2177        xfs_destroy_zones();
2178        xfs_uuid_table_free();
2179}
2180
2181module_init(init_xfs_fs);
2182module_exit(exit_xfs_fs);
2183
2184MODULE_AUTHOR("Silicon Graphics, Inc.");
2185MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
2186MODULE_LICENSE("GPL");
2187