linux/fs/xfs/xfs_super.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   4 * All Rights Reserved.
   5 */
   6
   7#include "xfs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_log_format.h"
  11#include "xfs_trans_resv.h"
  12#include "xfs_sb.h"
  13#include "xfs_mount.h"
  14#include "xfs_da_format.h"
  15#include "xfs_inode.h"
  16#include "xfs_btree.h"
  17#include "xfs_bmap.h"
  18#include "xfs_alloc.h"
  19#include "xfs_error.h"
  20#include "xfs_fsops.h"
  21#include "xfs_trans.h"
  22#include "xfs_buf_item.h"
  23#include "xfs_log.h"
  24#include "xfs_log_priv.h"
  25#include "xfs_da_btree.h"
  26#include "xfs_dir2.h"
  27#include "xfs_extfree_item.h"
  28#include "xfs_mru_cache.h"
  29#include "xfs_inode_item.h"
  30#include "xfs_icache.h"
  31#include "xfs_trace.h"
  32#include "xfs_icreate_item.h"
  33#include "xfs_filestream.h"
  34#include "xfs_quota.h"
  35#include "xfs_sysfs.h"
  36#include "xfs_ondisk.h"
  37#include "xfs_rmap_item.h"
  38#include "xfs_refcount_item.h"
  39#include "xfs_bmap_item.h"
  40#include "xfs_reflink.h"
  41
  42#include <linux/namei.h>
  43#include <linux/dax.h>
  44#include <linux/init.h>
  45#include <linux/slab.h>
  46#include <linux/mount.h>
  47#include <linux/mempool.h>
  48#include <linux/writeback.h>
  49#include <linux/kthread.h>
  50#include <linux/freezer.h>
  51#include <linux/parser.h>
  52
  53static const struct super_operations xfs_super_operations;
  54struct bio_set xfs_ioend_bioset;
  55
  56static struct kset *xfs_kset;           /* top-level xfs sysfs dir */
  57#ifdef DEBUG
  58static struct xfs_kobj xfs_dbg_kobj;    /* global debug sysfs attrs */
  59#endif
  60
  61/*
  62 * Table driven mount option parser.
  63 */
  64enum {
  65        Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, Opt_biosize,
  66        Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
  67        Opt_mtpt, Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
  68        Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep,
  69        Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2,
  70        Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota,
  71        Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
  72        Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
  73        Opt_discard, Opt_nodiscard, Opt_dax, Opt_err,
  74};
  75
  76static const match_table_t tokens = {
  77        {Opt_logbufs,   "logbufs=%u"},  /* number of XFS log buffers */
  78        {Opt_logbsize,  "logbsize=%s"}, /* size of XFS log buffers */
  79        {Opt_logdev,    "logdev=%s"},   /* log device */
  80        {Opt_rtdev,     "rtdev=%s"},    /* realtime I/O device */
  81        {Opt_biosize,   "biosize=%u"},  /* log2 of preferred buffered io size */
  82        {Opt_wsync,     "wsync"},       /* safe-mode nfs compatible mount */
  83        {Opt_noalign,   "noalign"},     /* turn off stripe alignment */
  84        {Opt_swalloc,   "swalloc"},     /* turn on stripe width allocation */
  85        {Opt_sunit,     "sunit=%u"},    /* data volume stripe unit */
  86        {Opt_swidth,    "swidth=%u"},   /* data volume stripe width */
  87        {Opt_nouuid,    "nouuid"},      /* ignore filesystem UUID */
  88        {Opt_mtpt,      "mtpt"},        /* filesystem mount point */
  89        {Opt_grpid,     "grpid"},       /* group-ID from parent directory */
  90        {Opt_nogrpid,   "nogrpid"},     /* group-ID from current process */
  91        {Opt_bsdgroups, "bsdgroups"},   /* group-ID from parent directory */
  92        {Opt_sysvgroups,"sysvgroups"},  /* group-ID from current process */
  93        {Opt_allocsize, "allocsize=%s"},/* preferred allocation size */
  94        {Opt_norecovery,"norecovery"},  /* don't run XFS recovery */
  95        {Opt_inode64,   "inode64"},     /* inodes can be allocated anywhere */
  96        {Opt_inode32,   "inode32"},     /* inode allocation limited to
  97                                         * XFS_MAXINUMBER_32 */
  98        {Opt_ikeep,     "ikeep"},       /* do not free empty inode clusters */
  99        {Opt_noikeep,   "noikeep"},     /* free empty inode clusters */
 100        {Opt_largeio,   "largeio"},     /* report large I/O sizes in stat() */
 101        {Opt_nolargeio, "nolargeio"},   /* do not report large I/O sizes
 102                                         * in stat(). */
 103        {Opt_attr2,     "attr2"},       /* do use attr2 attribute format */
 104        {Opt_noattr2,   "noattr2"},     /* do not use attr2 attribute format */
 105        {Opt_filestreams,"filestreams"},/* use filestreams allocator */
 106        {Opt_quota,     "quota"},       /* disk quotas (user) */
 107        {Opt_noquota,   "noquota"},     /* no quotas */
 108        {Opt_usrquota,  "usrquota"},    /* user quota enabled */
 109        {Opt_grpquota,  "grpquota"},    /* group quota enabled */
 110        {Opt_prjquota,  "prjquota"},    /* project quota enabled */
 111        {Opt_uquota,    "uquota"},      /* user quota (IRIX variant) */
 112        {Opt_gquota,    "gquota"},      /* group quota (IRIX variant) */
 113        {Opt_pquota,    "pquota"},      /* project quota (IRIX variant) */
 114        {Opt_uqnoenforce,"uqnoenforce"},/* user quota limit enforcement */
 115        {Opt_gqnoenforce,"gqnoenforce"},/* group quota limit enforcement */
 116        {Opt_pqnoenforce,"pqnoenforce"},/* project quota limit enforcement */
 117        {Opt_qnoenforce, "qnoenforce"}, /* same as uqnoenforce */
 118        {Opt_discard,   "discard"},     /* Discard unused blocks */
 119        {Opt_nodiscard, "nodiscard"},   /* Do not discard unused blocks */
 120        {Opt_dax,       "dax"},         /* Enable direct access to bdev pages */
 121        {Opt_err,       NULL},
 122};
 123
 124
 125STATIC int
 126suffix_kstrtoint(const substring_t *s, unsigned int base, int *res)
 127{
 128        int     last, shift_left_factor = 0, _res;
 129        char    *value;
 130        int     ret = 0;
 131
 132        value = match_strdup(s);
 133        if (!value)
 134                return -ENOMEM;
 135
 136        last = strlen(value) - 1;
 137        if (value[last] == 'K' || value[last] == 'k') {
 138                shift_left_factor = 10;
 139                value[last] = '\0';
 140        }
 141        if (value[last] == 'M' || value[last] == 'm') {
 142                shift_left_factor = 20;
 143                value[last] = '\0';
 144        }
 145        if (value[last] == 'G' || value[last] == 'g') {
 146                shift_left_factor = 30;
 147                value[last] = '\0';
 148        }
 149
 150        if (kstrtoint(value, base, &_res))
 151                ret = -EINVAL;
 152        kfree(value);
 153        *res = _res << shift_left_factor;
 154        return ret;
 155}
 156
 157/*
 158 * This function fills in xfs_mount_t fields based on mount args.
 159 * Note: the superblock has _not_ yet been read in.
 160 *
 161 * Note that this function leaks the various device name allocations on
 162 * failure.  The caller takes care of them.
 163 *
 164 * *sb is const because this is also used to test options on the remount
 165 * path, and we don't want this to have any side effects at remount time.
 166 * Today this function does not change *sb, but just to future-proof...
 167 */
 168STATIC int
 169xfs_parseargs(
 170        struct xfs_mount        *mp,
 171        char                    *options)
 172{
 173        const struct super_block *sb = mp->m_super;
 174        char                    *p;
 175        substring_t             args[MAX_OPT_ARGS];
 176        int                     dsunit = 0;
 177        int                     dswidth = 0;
 178        int                     iosize = 0;
 179        uint8_t                 iosizelog = 0;
 180
 181        /*
 182         * set up the mount name first so all the errors will refer to the
 183         * correct device.
 184         */
 185        mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
 186        if (!mp->m_fsname)
 187                return -ENOMEM;
 188        mp->m_fsname_len = strlen(mp->m_fsname) + 1;
 189
 190        /*
 191         * Copy binary VFS mount flags we are interested in.
 192         */
 193        if (sb_rdonly(sb))
 194                mp->m_flags |= XFS_MOUNT_RDONLY;
 195        if (sb->s_flags & SB_DIRSYNC)
 196                mp->m_flags |= XFS_MOUNT_DIRSYNC;
 197        if (sb->s_flags & SB_SYNCHRONOUS)
 198                mp->m_flags |= XFS_MOUNT_WSYNC;
 199
 200        /*
 201         * Set some default flags that could be cleared by the mount option
 202         * parsing.
 203         */
 204        mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
 205
 206        /*
 207         * These can be overridden by the mount option parsing.
 208         */
 209        mp->m_logbufs = -1;
 210        mp->m_logbsize = -1;
 211
 212        if (!options)
 213                goto done;
 214
 215        while ((p = strsep(&options, ",")) != NULL) {
 216                int             token;
 217
 218                if (!*p)
 219                        continue;
 220
 221                token = match_token(p, tokens, args);
 222                switch (token) {
 223                case Opt_logbufs:
 224                        if (match_int(args, &mp->m_logbufs))
 225                                return -EINVAL;
 226                        break;
 227                case Opt_logbsize:
 228                        if (suffix_kstrtoint(args, 10, &mp->m_logbsize))
 229                                return -EINVAL;
 230                        break;
 231                case Opt_logdev:
 232                        kfree(mp->m_logname);
 233                        mp->m_logname = match_strdup(args);
 234                        if (!mp->m_logname)
 235                                return -ENOMEM;
 236                        break;
 237                case Opt_mtpt:
 238                        xfs_warn(mp, "%s option not allowed on this system", p);
 239                        return -EINVAL;
 240                case Opt_rtdev:
 241                        kfree(mp->m_rtname);
 242                        mp->m_rtname = match_strdup(args);
 243                        if (!mp->m_rtname)
 244                                return -ENOMEM;
 245                        break;
 246                case Opt_allocsize:
 247                case Opt_biosize:
 248                        if (suffix_kstrtoint(args, 10, &iosize))
 249                                return -EINVAL;
 250                        iosizelog = ffs(iosize) - 1;
 251                        break;
 252                case Opt_grpid:
 253                case Opt_bsdgroups:
 254                        mp->m_flags |= XFS_MOUNT_GRPID;
 255                        break;
 256                case Opt_nogrpid:
 257                case Opt_sysvgroups:
 258                        mp->m_flags &= ~XFS_MOUNT_GRPID;
 259                        break;
 260                case Opt_wsync:
 261                        mp->m_flags |= XFS_MOUNT_WSYNC;
 262                        break;
 263                case Opt_norecovery:
 264                        mp->m_flags |= XFS_MOUNT_NORECOVERY;
 265                        break;
 266                case Opt_noalign:
 267                        mp->m_flags |= XFS_MOUNT_NOALIGN;
 268                        break;
 269                case Opt_swalloc:
 270                        mp->m_flags |= XFS_MOUNT_SWALLOC;
 271                        break;
 272                case Opt_sunit:
 273                        if (match_int(args, &dsunit))
 274                                return -EINVAL;
 275                        break;
 276                case Opt_swidth:
 277                        if (match_int(args, &dswidth))
 278                                return -EINVAL;
 279                        break;
 280                case Opt_inode32:
 281                        mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
 282                        break;
 283                case Opt_inode64:
 284                        mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
 285                        break;
 286                case Opt_nouuid:
 287                        mp->m_flags |= XFS_MOUNT_NOUUID;
 288                        break;
 289                case Opt_ikeep:
 290                        mp->m_flags |= XFS_MOUNT_IKEEP;
 291                        break;
 292                case Opt_noikeep:
 293                        mp->m_flags &= ~XFS_MOUNT_IKEEP;
 294                        break;
 295                case Opt_largeio:
 296                        mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
 297                        break;
 298                case Opt_nolargeio:
 299                        mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
 300                        break;
 301                case Opt_attr2:
 302                        mp->m_flags |= XFS_MOUNT_ATTR2;
 303                        break;
 304                case Opt_noattr2:
 305                        mp->m_flags &= ~XFS_MOUNT_ATTR2;
 306                        mp->m_flags |= XFS_MOUNT_NOATTR2;
 307                        break;
 308                case Opt_filestreams:
 309                        mp->m_flags |= XFS_MOUNT_FILESTREAMS;
 310                        break;
 311                case Opt_noquota:
 312                        mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
 313                        mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
 314                        mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
 315                        break;
 316                case Opt_quota:
 317                case Opt_uquota:
 318                case Opt_usrquota:
 319                        mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
 320                                         XFS_UQUOTA_ENFD);
 321                        break;
 322                case Opt_qnoenforce:
 323                case Opt_uqnoenforce:
 324                        mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
 325                        mp->m_qflags &= ~XFS_UQUOTA_ENFD;
 326                        break;
 327                case Opt_pquota:
 328                case Opt_prjquota:
 329                        mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
 330                                         XFS_PQUOTA_ENFD);
 331                        break;
 332                case Opt_pqnoenforce:
 333                        mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
 334                        mp->m_qflags &= ~XFS_PQUOTA_ENFD;
 335                        break;
 336                case Opt_gquota:
 337                case Opt_grpquota:
 338                        mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
 339                                         XFS_GQUOTA_ENFD);
 340                        break;
 341                case Opt_gqnoenforce:
 342                        mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
 343                        mp->m_qflags &= ~XFS_GQUOTA_ENFD;
 344                        break;
 345                case Opt_discard:
 346                        mp->m_flags |= XFS_MOUNT_DISCARD;
 347                        break;
 348                case Opt_nodiscard:
 349                        mp->m_flags &= ~XFS_MOUNT_DISCARD;
 350                        break;
 351#ifdef CONFIG_FS_DAX
 352                case Opt_dax:
 353                        mp->m_flags |= XFS_MOUNT_DAX;
 354                        break;
 355#endif
 356                default:
 357                        xfs_warn(mp, "unknown mount option [%s].", p);
 358                        return -EINVAL;
 359                }
 360        }
 361
 362        /*
 363         * no recovery flag requires a read-only mount
 364         */
 365        if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
 366            !(mp->m_flags & XFS_MOUNT_RDONLY)) {
 367                xfs_warn(mp, "no-recovery mounts must be read-only.");
 368                return -EINVAL;
 369        }
 370
 371        if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
 372                xfs_warn(mp,
 373        "sunit and swidth options incompatible with the noalign option");
 374                return -EINVAL;
 375        }
 376
 377#ifndef CONFIG_XFS_QUOTA
 378        if (XFS_IS_QUOTA_RUNNING(mp)) {
 379                xfs_warn(mp, "quota support not available in this kernel.");
 380                return -EINVAL;
 381        }
 382#endif
 383
 384        if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
 385                xfs_warn(mp, "sunit and swidth must be specified together");
 386                return -EINVAL;
 387        }
 388
 389        if (dsunit && (dswidth % dsunit != 0)) {
 390                xfs_warn(mp,
 391        "stripe width (%d) must be a multiple of the stripe unit (%d)",
 392                        dswidth, dsunit);
 393                return -EINVAL;
 394        }
 395
 396done:
 397        if (dsunit && !(mp->m_flags & XFS_MOUNT_NOALIGN)) {
 398                /*
 399                 * At this point the superblock has not been read
 400                 * in, therefore we do not know the block size.
 401                 * Before the mount call ends we will convert
 402                 * these to FSBs.
 403                 */
 404                mp->m_dalign = dsunit;
 405                mp->m_swidth = dswidth;
 406        }
 407
 408        if (mp->m_logbufs != -1 &&
 409            mp->m_logbufs != 0 &&
 410            (mp->m_logbufs < XLOG_MIN_ICLOGS ||
 411             mp->m_logbufs > XLOG_MAX_ICLOGS)) {
 412                xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
 413                        mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
 414                return -EINVAL;
 415        }
 416        if (mp->m_logbsize != -1 &&
 417            mp->m_logbsize !=  0 &&
 418            (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
 419             mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
 420             !is_power_of_2(mp->m_logbsize))) {
 421                xfs_warn(mp,
 422                        "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
 423                        mp->m_logbsize);
 424                return -EINVAL;
 425        }
 426
 427        if (iosizelog) {
 428                if (iosizelog > XFS_MAX_IO_LOG ||
 429                    iosizelog < XFS_MIN_IO_LOG) {
 430                        xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
 431                                iosizelog, XFS_MIN_IO_LOG,
 432                                XFS_MAX_IO_LOG);
 433                        return -EINVAL;
 434                }
 435
 436                mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
 437                mp->m_readio_log = iosizelog;
 438                mp->m_writeio_log = iosizelog;
 439        }
 440
 441        return 0;
 442}
 443
 444struct proc_xfs_info {
 445        uint64_t        flag;
 446        char            *str;
 447};
 448
 449STATIC int
 450xfs_showargs(
 451        struct xfs_mount        *mp,
 452        struct seq_file         *m)
 453{
 454        static struct proc_xfs_info xfs_info_set[] = {
 455                /* the few simple ones we can get from the mount struct */
 456                { XFS_MOUNT_IKEEP,              ",ikeep" },
 457                { XFS_MOUNT_WSYNC,              ",wsync" },
 458                { XFS_MOUNT_NOALIGN,            ",noalign" },
 459                { XFS_MOUNT_SWALLOC,            ",swalloc" },
 460                { XFS_MOUNT_NOUUID,             ",nouuid" },
 461                { XFS_MOUNT_NORECOVERY,         ",norecovery" },
 462                { XFS_MOUNT_ATTR2,              ",attr2" },
 463                { XFS_MOUNT_FILESTREAMS,        ",filestreams" },
 464                { XFS_MOUNT_GRPID,              ",grpid" },
 465                { XFS_MOUNT_DISCARD,            ",discard" },
 466                { XFS_MOUNT_SMALL_INUMS,        ",inode32" },
 467                { XFS_MOUNT_DAX,                ",dax" },
 468                { 0, NULL }
 469        };
 470        static struct proc_xfs_info xfs_info_unset[] = {
 471                /* the few simple ones we can get from the mount struct */
 472                { XFS_MOUNT_COMPAT_IOSIZE,      ",largeio" },
 473                { XFS_MOUNT_SMALL_INUMS,        ",inode64" },
 474                { 0, NULL }
 475        };
 476        struct proc_xfs_info    *xfs_infop;
 477
 478        for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
 479                if (mp->m_flags & xfs_infop->flag)
 480                        seq_puts(m, xfs_infop->str);
 481        }
 482        for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) {
 483                if (!(mp->m_flags & xfs_infop->flag))
 484                        seq_puts(m, xfs_infop->str);
 485        }
 486
 487        if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
 488                seq_printf(m, ",allocsize=%dk",
 489                                (int)(1 << mp->m_writeio_log) >> 10);
 490
 491        if (mp->m_logbufs > 0)
 492                seq_printf(m, ",logbufs=%d", mp->m_logbufs);
 493        if (mp->m_logbsize > 0)
 494                seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10);
 495
 496        if (mp->m_logname)
 497                seq_show_option(m, "logdev", mp->m_logname);
 498        if (mp->m_rtname)
 499                seq_show_option(m, "rtdev", mp->m_rtname);
 500
 501        if (mp->m_dalign > 0)
 502                seq_printf(m, ",sunit=%d",
 503                                (int)XFS_FSB_TO_BB(mp, mp->m_dalign));
 504        if (mp->m_swidth > 0)
 505                seq_printf(m, ",swidth=%d",
 506                                (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
 507
 508        if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
 509                seq_puts(m, ",usrquota");
 510        else if (mp->m_qflags & XFS_UQUOTA_ACCT)
 511                seq_puts(m, ",uqnoenforce");
 512
 513        if (mp->m_qflags & XFS_PQUOTA_ACCT) {
 514                if (mp->m_qflags & XFS_PQUOTA_ENFD)
 515                        seq_puts(m, ",prjquota");
 516                else
 517                        seq_puts(m, ",pqnoenforce");
 518        }
 519        if (mp->m_qflags & XFS_GQUOTA_ACCT) {
 520                if (mp->m_qflags & XFS_GQUOTA_ENFD)
 521                        seq_puts(m, ",grpquota");
 522                else
 523                        seq_puts(m, ",gqnoenforce");
 524        }
 525
 526        if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
 527                seq_puts(m, ",noquota");
 528
 529        return 0;
 530}
 531static uint64_t
 532xfs_max_file_offset(
 533        unsigned int            blockshift)
 534{
 535        unsigned int            pagefactor = 1;
 536        unsigned int            bitshift = BITS_PER_LONG - 1;
 537
 538        /* Figure out maximum filesize, on Linux this can depend on
 539         * the filesystem blocksize (on 32 bit platforms).
 540         * __block_write_begin does this in an [unsigned] long...
 541         *      page->index << (PAGE_SHIFT - bbits)
 542         * So, for page sized blocks (4K on 32 bit platforms),
 543         * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
 544         *      (((u64)PAGE_SIZE << (BITS_PER_LONG-1))-1)
 545         * but for smaller blocksizes it is less (bbits = log2 bsize).
 546         * Note1: get_block_t takes a long (implicit cast from above)
 547         * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
 548         * can optionally convert the [unsigned] long from above into
 549         * an [unsigned] long long.
 550         */
 551
 552#if BITS_PER_LONG == 32
 553# if defined(CONFIG_LBDAF)
 554        ASSERT(sizeof(sector_t) == 8);
 555        pagefactor = PAGE_SIZE;
 556        bitshift = BITS_PER_LONG;
 557# else
 558        pagefactor = PAGE_SIZE >> (PAGE_SHIFT - blockshift);
 559# endif
 560#endif
 561
 562        return (((uint64_t)pagefactor) << bitshift) - 1;
 563}
 564
 565/*
 566 * Set parameters for inode allocation heuristics, taking into account
 567 * filesystem size and inode32/inode64 mount options; i.e. specifically
 568 * whether or not XFS_MOUNT_SMALL_INUMS is set.
 569 *
 570 * Inode allocation patterns are altered only if inode32 is requested
 571 * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large.
 572 * If altered, XFS_MOUNT_32BITINODES is set as well.
 573 *
 574 * An agcount independent of that in the mount structure is provided
 575 * because in the growfs case, mp->m_sb.sb_agcount is not yet updated
 576 * to the potentially higher ag count.
 577 *
 578 * Returns the maximum AG index which may contain inodes.
 579 */
 580xfs_agnumber_t
 581xfs_set_inode_alloc(
 582        struct xfs_mount *mp,
 583        xfs_agnumber_t  agcount)
 584{
 585        xfs_agnumber_t  index;
 586        xfs_agnumber_t  maxagi = 0;
 587        xfs_sb_t        *sbp = &mp->m_sb;
 588        xfs_agnumber_t  max_metadata;
 589        xfs_agino_t     agino;
 590        xfs_ino_t       ino;
 591
 592        /*
 593         * Calculate how much should be reserved for inodes to meet
 594         * the max inode percentage.  Used only for inode32.
 595         */
 596        if (mp->m_maxicount) {
 597                uint64_t        icount;
 598
 599                icount = sbp->sb_dblocks * sbp->sb_imax_pct;
 600                do_div(icount, 100);
 601                icount += sbp->sb_agblocks - 1;
 602                do_div(icount, sbp->sb_agblocks);
 603                max_metadata = icount;
 604        } else {
 605                max_metadata = agcount;
 606        }
 607
 608        /* Get the last possible inode in the filesystem */
 609        agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
 610        ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
 611
 612        /*
 613         * If user asked for no more than 32-bit inodes, and the fs is
 614         * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter
 615         * the allocator to accommodate the request.
 616         */
 617        if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
 618                mp->m_flags |= XFS_MOUNT_32BITINODES;
 619        else
 620                mp->m_flags &= ~XFS_MOUNT_32BITINODES;
 621
 622        for (index = 0; index < agcount; index++) {
 623                struct xfs_perag        *pag;
 624
 625                ino = XFS_AGINO_TO_INO(mp, index, agino);
 626
 627                pag = xfs_perag_get(mp, index);
 628
 629                if (mp->m_flags & XFS_MOUNT_32BITINODES) {
 630                        if (ino > XFS_MAXINUMBER_32) {
 631                                pag->pagi_inodeok = 0;
 632                                pag->pagf_metadata = 0;
 633                        } else {
 634                                pag->pagi_inodeok = 1;
 635                                maxagi++;
 636                                if (index < max_metadata)
 637                                        pag->pagf_metadata = 1;
 638                                else
 639                                        pag->pagf_metadata = 0;
 640                        }
 641                } else {
 642                        pag->pagi_inodeok = 1;
 643                        pag->pagf_metadata = 0;
 644                }
 645
 646                xfs_perag_put(pag);
 647        }
 648
 649        return (mp->m_flags & XFS_MOUNT_32BITINODES) ? maxagi : agcount;
 650}
 651
 652STATIC int
 653xfs_blkdev_get(
 654        xfs_mount_t             *mp,
 655        const char              *name,
 656        struct block_device     **bdevp)
 657{
 658        int                     error = 0;
 659
 660        *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
 661                                    mp);
 662        if (IS_ERR(*bdevp)) {
 663                error = PTR_ERR(*bdevp);
 664                xfs_warn(mp, "Invalid device [%s], error=%d", name, error);
 665        }
 666
 667        return error;
 668}
 669
 670STATIC void
 671xfs_blkdev_put(
 672        struct block_device     *bdev)
 673{
 674        if (bdev)
 675                blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 676}
 677
 678void
 679xfs_blkdev_issue_flush(
 680        xfs_buftarg_t           *buftarg)
 681{
 682        blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS, NULL);
 683}
 684
 685STATIC void
 686xfs_close_devices(
 687        struct xfs_mount        *mp)
 688{
 689        struct dax_device *dax_ddev = mp->m_ddev_targp->bt_daxdev;
 690
 691        if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
 692                struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
 693                struct dax_device *dax_logdev = mp->m_logdev_targp->bt_daxdev;
 694
 695                xfs_free_buftarg(mp->m_logdev_targp);
 696                xfs_blkdev_put(logdev);
 697                fs_put_dax(dax_logdev);
 698        }
 699        if (mp->m_rtdev_targp) {
 700                struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
 701                struct dax_device *dax_rtdev = mp->m_rtdev_targp->bt_daxdev;
 702
 703                xfs_free_buftarg(mp->m_rtdev_targp);
 704                xfs_blkdev_put(rtdev);
 705                fs_put_dax(dax_rtdev);
 706        }
 707        xfs_free_buftarg(mp->m_ddev_targp);
 708        fs_put_dax(dax_ddev);
 709}
 710
 711/*
 712 * The file system configurations are:
 713 *      (1) device (partition) with data and internal log
 714 *      (2) logical volume with data and log subvolumes.
 715 *      (3) logical volume with data, log, and realtime subvolumes.
 716 *
 717 * We only have to handle opening the log and realtime volumes here if
 718 * they are present.  The data subvolume has already been opened by
 719 * get_sb_bdev() and is stored in sb->s_bdev.
 720 */
 721STATIC int
 722xfs_open_devices(
 723        struct xfs_mount        *mp)
 724{
 725        struct block_device     *ddev = mp->m_super->s_bdev;
 726        struct dax_device       *dax_ddev = fs_dax_get_by_bdev(ddev);
 727        struct dax_device       *dax_logdev = NULL, *dax_rtdev = NULL;
 728        struct block_device     *logdev = NULL, *rtdev = NULL;
 729        int                     error;
 730
 731        /*
 732         * Open real time and log devices - order is important.
 733         */
 734        if (mp->m_logname) {
 735                error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
 736                if (error)
 737                        goto out;
 738                dax_logdev = fs_dax_get_by_bdev(logdev);
 739        }
 740
 741        if (mp->m_rtname) {
 742                error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
 743                if (error)
 744                        goto out_close_logdev;
 745
 746                if (rtdev == ddev || rtdev == logdev) {
 747                        xfs_warn(mp,
 748        "Cannot mount filesystem with identical rtdev and ddev/logdev.");
 749                        error = -EINVAL;
 750                        goto out_close_rtdev;
 751                }
 752                dax_rtdev = fs_dax_get_by_bdev(rtdev);
 753        }
 754
 755        /*
 756         * Setup xfs_mount buffer target pointers
 757         */
 758        error = -ENOMEM;
 759        mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, dax_ddev);
 760        if (!mp->m_ddev_targp)
 761                goto out_close_rtdev;
 762
 763        if (rtdev) {
 764                mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, dax_rtdev);
 765                if (!mp->m_rtdev_targp)
 766                        goto out_free_ddev_targ;
 767        }
 768
 769        if (logdev && logdev != ddev) {
 770                mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, dax_logdev);
 771                if (!mp->m_logdev_targp)
 772                        goto out_free_rtdev_targ;
 773        } else {
 774                mp->m_logdev_targp = mp->m_ddev_targp;
 775        }
 776
 777        return 0;
 778
 779 out_free_rtdev_targ:
 780        if (mp->m_rtdev_targp)
 781                xfs_free_buftarg(mp->m_rtdev_targp);
 782 out_free_ddev_targ:
 783        xfs_free_buftarg(mp->m_ddev_targp);
 784 out_close_rtdev:
 785        xfs_blkdev_put(rtdev);
 786        fs_put_dax(dax_rtdev);
 787 out_close_logdev:
 788        if (logdev && logdev != ddev) {
 789                xfs_blkdev_put(logdev);
 790                fs_put_dax(dax_logdev);
 791        }
 792 out:
 793        fs_put_dax(dax_ddev);
 794        return error;
 795}
 796
 797/*
 798 * Setup xfs_mount buffer target pointers based on superblock
 799 */
 800STATIC int
 801xfs_setup_devices(
 802        struct xfs_mount        *mp)
 803{
 804        int                     error;
 805
 806        error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
 807        if (error)
 808                return error;
 809
 810        if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
 811                unsigned int    log_sector_size = BBSIZE;
 812
 813                if (xfs_sb_version_hassector(&mp->m_sb))
 814                        log_sector_size = mp->m_sb.sb_logsectsize;
 815                error = xfs_setsize_buftarg(mp->m_logdev_targp,
 816                                            log_sector_size);
 817                if (error)
 818                        return error;
 819        }
 820        if (mp->m_rtdev_targp) {
 821                error = xfs_setsize_buftarg(mp->m_rtdev_targp,
 822                                            mp->m_sb.sb_sectsize);
 823                if (error)
 824                        return error;
 825        }
 826
 827        return 0;
 828}
 829
 830STATIC int
 831xfs_init_mount_workqueues(
 832        struct xfs_mount        *mp)
 833{
 834        mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s",
 835                        WQ_MEM_RECLAIM|WQ_FREEZABLE, 1, mp->m_fsname);
 836        if (!mp->m_buf_workqueue)
 837                goto out;
 838
 839        mp->m_data_workqueue = alloc_workqueue("xfs-data/%s",
 840                        WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
 841        if (!mp->m_data_workqueue)
 842                goto out_destroy_buf;
 843
 844        mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
 845                        WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
 846        if (!mp->m_unwritten_workqueue)
 847                goto out_destroy_data_iodone_queue;
 848
 849        mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s",
 850                        WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
 851        if (!mp->m_cil_workqueue)
 852                goto out_destroy_unwritten;
 853
 854        mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
 855                        WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
 856        if (!mp->m_reclaim_workqueue)
 857                goto out_destroy_cil;
 858
 859        mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
 860                        WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0,
 861                        mp->m_fsname);
 862        if (!mp->m_log_workqueue)
 863                goto out_destroy_reclaim;
 864
 865        mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
 866                        WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
 867        if (!mp->m_eofblocks_workqueue)
 868                goto out_destroy_log;
 869
 870        mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0,
 871                                               mp->m_fsname);
 872        if (!mp->m_sync_workqueue)
 873                goto out_destroy_eofb;
 874
 875        return 0;
 876
 877out_destroy_eofb:
 878        destroy_workqueue(mp->m_eofblocks_workqueue);
 879out_destroy_log:
 880        destroy_workqueue(mp->m_log_workqueue);
 881out_destroy_reclaim:
 882        destroy_workqueue(mp->m_reclaim_workqueue);
 883out_destroy_cil:
 884        destroy_workqueue(mp->m_cil_workqueue);
 885out_destroy_unwritten:
 886        destroy_workqueue(mp->m_unwritten_workqueue);
 887out_destroy_data_iodone_queue:
 888        destroy_workqueue(mp->m_data_workqueue);
 889out_destroy_buf:
 890        destroy_workqueue(mp->m_buf_workqueue);
 891out:
 892        return -ENOMEM;
 893}
 894
 895STATIC void
 896xfs_destroy_mount_workqueues(
 897        struct xfs_mount        *mp)
 898{
 899        destroy_workqueue(mp->m_sync_workqueue);
 900        destroy_workqueue(mp->m_eofblocks_workqueue);
 901        destroy_workqueue(mp->m_log_workqueue);
 902        destroy_workqueue(mp->m_reclaim_workqueue);
 903        destroy_workqueue(mp->m_cil_workqueue);
 904        destroy_workqueue(mp->m_data_workqueue);
 905        destroy_workqueue(mp->m_unwritten_workqueue);
 906        destroy_workqueue(mp->m_buf_workqueue);
 907}
 908
 909/*
 910 * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK
 911 * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting
 912 * for IO to complete so that we effectively throttle multiple callers to the
 913 * rate at which IO is completing.
 914 */
 915void
 916xfs_flush_inodes(
 917        struct xfs_mount        *mp)
 918{
 919        struct super_block      *sb = mp->m_super;
 920
 921        if (down_read_trylock(&sb->s_umount)) {
 922                sync_inodes_sb(sb);
 923                up_read(&sb->s_umount);
 924        }
 925}
 926
 927/* Catch misguided souls that try to use this interface on XFS */
 928STATIC struct inode *
 929xfs_fs_alloc_inode(
 930        struct super_block      *sb)
 931{
 932        BUG();
 933        return NULL;
 934}
 935
 936/*
 937 * Now that the generic code is guaranteed not to be accessing
 938 * the linux inode, we can inactivate and reclaim the inode.
 939 */
 940STATIC void
 941xfs_fs_destroy_inode(
 942        struct inode            *inode)
 943{
 944        struct xfs_inode        *ip = XFS_I(inode);
 945
 946        trace_xfs_destroy_inode(ip);
 947
 948        ASSERT(!rwsem_is_locked(&inode->i_rwsem));
 949        XFS_STATS_INC(ip->i_mount, vn_rele);
 950        XFS_STATS_INC(ip->i_mount, vn_remove);
 951
 952        xfs_inactive(ip);
 953
 954        ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
 955        XFS_STATS_INC(ip->i_mount, vn_reclaim);
 956
 957        /*
 958         * We should never get here with one of the reclaim flags already set.
 959         */
 960        ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
 961        ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
 962
 963        /*
 964         * We always use background reclaim here because even if the
 965         * inode is clean, it still may be under IO and hence we have
 966         * to take the flush lock. The background reclaim path handles
 967         * this more efficiently than we can here, so simply let background
 968         * reclaim tear down all inodes.
 969         */
 970        xfs_inode_set_reclaim_tag(ip);
 971}
 972
 973static void
 974xfs_fs_dirty_inode(
 975        struct inode                    *inode,
 976        int                             flag)
 977{
 978        struct xfs_inode                *ip = XFS_I(inode);
 979        struct xfs_mount                *mp = ip->i_mount;
 980        struct xfs_trans                *tp;
 981
 982        if (!(inode->i_sb->s_flags & SB_LAZYTIME))
 983                return;
 984        if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME))
 985                return;
 986
 987        if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
 988                return;
 989        xfs_ilock(ip, XFS_ILOCK_EXCL);
 990        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 991        xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
 992        xfs_trans_commit(tp);
 993}
 994
 995/*
 996 * Slab object creation initialisation for the XFS inode.
 997 * This covers only the idempotent fields in the XFS inode;
 998 * all other fields need to be initialised on allocation
 999 * from the slab. This avoids the need to repeatedly initialise
1000 * fields in the xfs inode that left in the initialise state
1001 * when freeing the inode.
1002 */
1003STATIC void
1004xfs_fs_inode_init_once(
1005        void                    *inode)
1006{
1007        struct xfs_inode        *ip = inode;
1008
1009        memset(ip, 0, sizeof(struct xfs_inode));
1010
1011        /* vfs inode */
1012        inode_init_once(VFS_I(ip));
1013
1014        /* xfs inode */
1015        atomic_set(&ip->i_pincount, 0);
1016        spin_lock_init(&ip->i_flags_lock);
1017
1018        mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
1019                     "xfsino", ip->i_ino);
1020        mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
1021                     "xfsino", ip->i_ino);
1022}
1023
1024/*
1025 * We do an unlocked check for XFS_IDONTCACHE here because we are already
1026 * serialised against cache hits here via the inode->i_lock and igrab() in
1027 * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be
1028 * racing with us, and it avoids needing to grab a spinlock here for every inode
1029 * we drop the final reference on.
1030 */
1031STATIC int
1032xfs_fs_drop_inode(
1033        struct inode            *inode)
1034{
1035        struct xfs_inode        *ip = XFS_I(inode);
1036
1037        /*
1038         * If this unlinked inode is in the middle of recovery, don't
1039         * drop the inode just yet; log recovery will take care of
1040         * that.  See the comment for this inode flag.
1041         */
1042        if (ip->i_flags & XFS_IRECOVERY) {
1043                ASSERT(ip->i_mount->m_log->l_flags & XLOG_RECOVERY_NEEDED);
1044                return 0;
1045        }
1046
1047        return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE);
1048}
1049
1050STATIC void
1051xfs_free_fsname(
1052        struct xfs_mount        *mp)
1053{
1054        kfree(mp->m_fsname);
1055        kfree(mp->m_rtname);
1056        kfree(mp->m_logname);
1057}
1058
1059STATIC int
1060xfs_fs_sync_fs(
1061        struct super_block      *sb,
1062        int                     wait)
1063{
1064        struct xfs_mount        *mp = XFS_M(sb);
1065
1066        /*
1067         * Doing anything during the async pass would be counterproductive.
1068         */
1069        if (!wait)
1070                return 0;
1071
1072        xfs_log_force(mp, XFS_LOG_SYNC);
1073        if (laptop_mode) {
1074                /*
1075                 * The disk must be active because we're syncing.
1076                 * We schedule log work now (now that the disk is
1077                 * active) instead of later (when it might not be).
1078                 */
1079                flush_delayed_work(&mp->m_log->l_work);
1080        }
1081
1082        return 0;
1083}
1084
1085STATIC int
1086xfs_fs_statfs(
1087        struct dentry           *dentry,
1088        struct kstatfs          *statp)
1089{
1090        struct xfs_mount        *mp = XFS_M(dentry->d_sb);
1091        xfs_sb_t                *sbp = &mp->m_sb;
1092        struct xfs_inode        *ip = XFS_I(d_inode(dentry));
1093        uint64_t                fakeinos, id;
1094        uint64_t                icount;
1095        uint64_t                ifree;
1096        uint64_t                fdblocks;
1097        xfs_extlen_t            lsize;
1098        int64_t                 ffree;
1099
1100        statp->f_type = XFS_SB_MAGIC;
1101        statp->f_namelen = MAXNAMELEN - 1;
1102
1103        id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
1104        statp->f_fsid.val[0] = (u32)id;
1105        statp->f_fsid.val[1] = (u32)(id >> 32);
1106
1107        icount = percpu_counter_sum(&mp->m_icount);
1108        ifree = percpu_counter_sum(&mp->m_ifree);
1109        fdblocks = percpu_counter_sum(&mp->m_fdblocks);
1110
1111        spin_lock(&mp->m_sb_lock);
1112        statp->f_bsize = sbp->sb_blocksize;
1113        lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
1114        statp->f_blocks = sbp->sb_dblocks - lsize;
1115        spin_unlock(&mp->m_sb_lock);
1116
1117        statp->f_bfree = fdblocks - mp->m_alloc_set_aside;
1118        statp->f_bavail = statp->f_bfree;
1119
1120        fakeinos = statp->f_bfree << sbp->sb_inopblog;
1121        statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
1122        if (mp->m_maxicount)
1123                statp->f_files = min_t(typeof(statp->f_files),
1124                                        statp->f_files,
1125                                        mp->m_maxicount);
1126
1127        /* If sb_icount overshot maxicount, report actual allocation */
1128        statp->f_files = max_t(typeof(statp->f_files),
1129                                        statp->f_files,
1130                                        sbp->sb_icount);
1131
1132        /* make sure statp->f_ffree does not underflow */
1133        ffree = statp->f_files - (icount - ifree);
1134        statp->f_ffree = max_t(int64_t, ffree, 0);
1135
1136
1137        if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
1138            ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
1139                              (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
1140                xfs_qm_statvfs(ip, statp);
1141
1142        if (XFS_IS_REALTIME_MOUNT(mp) &&
1143            (ip->i_d.di_flags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) {
1144                statp->f_blocks = sbp->sb_rblocks;
1145                statp->f_bavail = statp->f_bfree =
1146                        sbp->sb_frextents * sbp->sb_rextsize;
1147        }
1148
1149        return 0;
1150}
1151
1152STATIC void
1153xfs_save_resvblks(struct xfs_mount *mp)
1154{
1155        uint64_t resblks = 0;
1156
1157        mp->m_resblks_save = mp->m_resblks;
1158        xfs_reserve_blocks(mp, &resblks, NULL);
1159}
1160
1161STATIC void
1162xfs_restore_resvblks(struct xfs_mount *mp)
1163{
1164        uint64_t resblks;
1165
1166        if (mp->m_resblks_save) {
1167                resblks = mp->m_resblks_save;
1168                mp->m_resblks_save = 0;
1169        } else
1170                resblks = xfs_default_resblks(mp);
1171
1172        xfs_reserve_blocks(mp, &resblks, NULL);
1173}
1174
1175/*
1176 * Trigger writeback of all the dirty metadata in the file system.
1177 *
1178 * This ensures that the metadata is written to their location on disk rather
1179 * than just existing in transactions in the log. This means after a quiesce
1180 * there is no log replay required to write the inodes to disk - this is the
1181 * primary difference between a sync and a quiesce.
1182 *
1183 * Note: xfs_log_quiesce() stops background log work - the callers must ensure
1184 * it is started again when appropriate.
1185 */
1186void
1187xfs_quiesce_attr(
1188        struct xfs_mount        *mp)
1189{
1190        int     error = 0;
1191
1192        /* wait for all modifications to complete */
1193        while (atomic_read(&mp->m_active_trans) > 0)
1194                delay(100);
1195
1196        /* force the log to unpin objects from the now complete transactions */
1197        xfs_log_force(mp, XFS_LOG_SYNC);
1198
1199        /* reclaim inodes to do any IO before the freeze completes */
1200        xfs_reclaim_inodes(mp, 0);
1201        xfs_reclaim_inodes(mp, SYNC_WAIT);
1202
1203        /* Push the superblock and write an unmount record */
1204        error = xfs_log_sbcount(mp);
1205        if (error)
1206                xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
1207                                "Frozen image may not be consistent.");
1208        /*
1209         * Just warn here till VFS can correctly support
1210         * read-only remount without racing.
1211         */
1212        WARN_ON(atomic_read(&mp->m_active_trans) != 0);
1213
1214        xfs_log_quiesce(mp);
1215}
1216
1217STATIC int
1218xfs_test_remount_options(
1219        struct super_block      *sb,
1220        char                    *options)
1221{
1222        int                     error = 0;
1223        struct xfs_mount        *tmp_mp;
1224
1225        tmp_mp = kmem_zalloc(sizeof(*tmp_mp), KM_MAYFAIL);
1226        if (!tmp_mp)
1227                return -ENOMEM;
1228
1229        tmp_mp->m_super = sb;
1230        error = xfs_parseargs(tmp_mp, options);
1231        xfs_free_fsname(tmp_mp);
1232        kmem_free(tmp_mp);
1233
1234        return error;
1235}
1236
1237STATIC int
1238xfs_fs_remount(
1239        struct super_block      *sb,
1240        int                     *flags,
1241        char                    *options)
1242{
1243        struct xfs_mount        *mp = XFS_M(sb);
1244        xfs_sb_t                *sbp = &mp->m_sb;
1245        substring_t             args[MAX_OPT_ARGS];
1246        char                    *p;
1247        int                     error;
1248
1249        /* First, check for complete junk; i.e. invalid options */
1250        error = xfs_test_remount_options(sb, options);
1251        if (error)
1252                return error;
1253
1254        sync_filesystem(sb);
1255        while ((p = strsep(&options, ",")) != NULL) {
1256                int token;
1257
1258                if (!*p)
1259                        continue;
1260
1261                token = match_token(p, tokens, args);
1262                switch (token) {
1263                case Opt_inode64:
1264                        mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
1265                        mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
1266                        break;
1267                case Opt_inode32:
1268                        mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
1269                        mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
1270                        break;
1271                default:
1272                        /*
1273                         * Logically we would return an error here to prevent
1274                         * users from believing they might have changed
1275                         * mount options using remount which can't be changed.
1276                         *
1277                         * But unfortunately mount(8) adds all options from
1278                         * mtab and fstab to the mount arguments in some cases
1279                         * so we can't blindly reject options, but have to
1280                         * check for each specified option if it actually
1281                         * differs from the currently set option and only
1282                         * reject it if that's the case.
1283                         *
1284                         * Until that is implemented we return success for
1285                         * every remount request, and silently ignore all
1286                         * options that we can't actually change.
1287                         */
1288#if 0
1289                        xfs_info(mp,
1290                "mount option \"%s\" not supported for remount", p);
1291                        return -EINVAL;
1292#else
1293                        break;
1294#endif
1295                }
1296        }
1297
1298        /* ro -> rw */
1299        if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & SB_RDONLY)) {
1300                if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
1301                        xfs_warn(mp,
1302                "ro->rw transition prohibited on norecovery mount");
1303                        return -EINVAL;
1304                }
1305
1306                if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
1307                    xfs_sb_has_ro_compat_feature(sbp,
1308                                        XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
1309                        xfs_warn(mp,
1310"ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
1311                                (sbp->sb_features_ro_compat &
1312                                        XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
1313                        return -EINVAL;
1314                }
1315
1316                mp->m_flags &= ~XFS_MOUNT_RDONLY;
1317
1318                /*
1319                 * If this is the first remount to writeable state we
1320                 * might have some superblock changes to update.
1321                 */
1322                if (mp->m_update_sb) {
1323                        error = xfs_sync_sb(mp, false);
1324                        if (error) {
1325                                xfs_warn(mp, "failed to write sb changes");
1326                                return error;
1327                        }
1328                        mp->m_update_sb = false;
1329                }
1330
1331                /*
1332                 * Fill out the reserve pool if it is empty. Use the stashed
1333                 * value if it is non-zero, otherwise go with the default.
1334                 */
1335                xfs_restore_resvblks(mp);
1336                xfs_log_work_queue(mp);
1337
1338                /* Recover any CoW blocks that never got remapped. */
1339                error = xfs_reflink_recover_cow(mp);
1340                if (error) {
1341                        xfs_err(mp,
1342        "Error %d recovering leftover CoW allocations.", error);
1343                        xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1344                        return error;
1345                }
1346                xfs_icache_enable_reclaim(mp);
1347
1348                /* Create the per-AG metadata reservation pool .*/
1349                error = xfs_fs_reserve_ag_blocks(mp);
1350                if (error && error != -ENOSPC)
1351                        return error;
1352        }
1353
1354        /* rw -> ro */
1355        if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) {
1356                /*
1357                 * Cancel background eofb scanning so it cannot race with the
1358                 * final log force+buftarg wait and deadlock the remount.
1359                 */
1360                xfs_icache_disable_reclaim(mp);
1361
1362                /* Get rid of any leftover CoW reservations... */
1363                error = xfs_icache_free_cowblocks(mp, NULL);
1364                if (error) {
1365                        xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1366                        return error;
1367                }
1368
1369                /* Free the per-AG metadata reservation pool. */
1370                error = xfs_fs_unreserve_ag_blocks(mp);
1371                if (error) {
1372                        xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1373                        return error;
1374                }
1375
1376                /*
1377                 * Before we sync the metadata, we need to free up the reserve
1378                 * block pool so that the used block count in the superblock on
1379                 * disk is correct at the end of the remount. Stash the current
1380                 * reserve pool size so that if we get remounted rw, we can
1381                 * return it to the same size.
1382                 */
1383                xfs_save_resvblks(mp);
1384
1385                xfs_quiesce_attr(mp);
1386                mp->m_flags |= XFS_MOUNT_RDONLY;
1387        }
1388
1389        return 0;
1390}
1391
1392/*
1393 * Second stage of a freeze. The data is already frozen so we only
1394 * need to take care of the metadata. Once that's done sync the superblock
1395 * to the log to dirty it in case of a crash while frozen. This ensures that we
1396 * will recover the unlinked inode lists on the next mount.
1397 */
1398STATIC int
1399xfs_fs_freeze(
1400        struct super_block      *sb)
1401{
1402        struct xfs_mount        *mp = XFS_M(sb);
1403
1404        xfs_icache_disable_reclaim(mp);
1405        xfs_save_resvblks(mp);
1406        xfs_quiesce_attr(mp);
1407        return xfs_sync_sb(mp, true);
1408}
1409
1410STATIC int
1411xfs_fs_unfreeze(
1412        struct super_block      *sb)
1413{
1414        struct xfs_mount        *mp = XFS_M(sb);
1415
1416        xfs_restore_resvblks(mp);
1417        xfs_log_work_queue(mp);
1418        xfs_icache_enable_reclaim(mp);
1419        return 0;
1420}
1421
1422STATIC int
1423xfs_fs_show_options(
1424        struct seq_file         *m,
1425        struct dentry           *root)
1426{
1427        return xfs_showargs(XFS_M(root->d_sb), m);
1428}
1429
1430/*
1431 * This function fills in xfs_mount_t fields based on mount args.
1432 * Note: the superblock _has_ now been read in.
1433 */
1434STATIC int
1435xfs_finish_flags(
1436        struct xfs_mount        *mp)
1437{
1438        int                     ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
1439
1440        /* Fail a mount where the logbuf is smaller than the log stripe */
1441        if (xfs_sb_version_haslogv2(&mp->m_sb)) {
1442                if (mp->m_logbsize <= 0 &&
1443                    mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
1444                        mp->m_logbsize = mp->m_sb.sb_logsunit;
1445                } else if (mp->m_logbsize > 0 &&
1446                           mp->m_logbsize < mp->m_sb.sb_logsunit) {
1447                        xfs_warn(mp,
1448                "logbuf size must be greater than or equal to log stripe size");
1449                        return -EINVAL;
1450                }
1451        } else {
1452                /* Fail a mount if the logbuf is larger than 32K */
1453                if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
1454                        xfs_warn(mp,
1455                "logbuf size for version 1 logs must be 16K or 32K");
1456                        return -EINVAL;
1457                }
1458        }
1459
1460        /*
1461         * V5 filesystems always use attr2 format for attributes.
1462         */
1463        if (xfs_sb_version_hascrc(&mp->m_sb) &&
1464            (mp->m_flags & XFS_MOUNT_NOATTR2)) {
1465                xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
1466                             "attr2 is always enabled for V5 filesystems.");
1467                return -EINVAL;
1468        }
1469
1470        /*
1471         * mkfs'ed attr2 will turn on attr2 mount unless explicitly
1472         * told by noattr2 to turn it off
1473         */
1474        if (xfs_sb_version_hasattr2(&mp->m_sb) &&
1475            !(mp->m_flags & XFS_MOUNT_NOATTR2))
1476                mp->m_flags |= XFS_MOUNT_ATTR2;
1477
1478        /*
1479         * prohibit r/w mounts of read-only filesystems
1480         */
1481        if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
1482                xfs_warn(mp,
1483                        "cannot mount a read-only filesystem as read-write");
1484                return -EROFS;
1485        }
1486
1487        if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
1488            (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE)) &&
1489            !xfs_sb_version_has_pquotino(&mp->m_sb)) {
1490                xfs_warn(mp,
1491                  "Super block does not support project and group quota together");
1492                return -EINVAL;
1493        }
1494
1495        return 0;
1496}
1497
1498static int
1499xfs_init_percpu_counters(
1500        struct xfs_mount        *mp)
1501{
1502        int             error;
1503
1504        error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
1505        if (error)
1506                return -ENOMEM;
1507
1508        error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL);
1509        if (error)
1510                goto free_icount;
1511
1512        error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
1513        if (error)
1514                goto free_ifree;
1515
1516        return 0;
1517
1518free_ifree:
1519        percpu_counter_destroy(&mp->m_ifree);
1520free_icount:
1521        percpu_counter_destroy(&mp->m_icount);
1522        return -ENOMEM;
1523}
1524
1525void
1526xfs_reinit_percpu_counters(
1527        struct xfs_mount        *mp)
1528{
1529        percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
1530        percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
1531        percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
1532}
1533
1534static void
1535xfs_destroy_percpu_counters(
1536        struct xfs_mount        *mp)
1537{
1538        percpu_counter_destroy(&mp->m_icount);
1539        percpu_counter_destroy(&mp->m_ifree);
1540        percpu_counter_destroy(&mp->m_fdblocks);
1541}
1542
1543static struct xfs_mount *
1544xfs_mount_alloc(
1545        struct super_block      *sb)
1546{
1547        struct xfs_mount        *mp;
1548
1549        mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
1550        if (!mp)
1551                return NULL;
1552
1553        mp->m_super = sb;
1554        spin_lock_init(&mp->m_sb_lock);
1555        spin_lock_init(&mp->m_agirotor_lock);
1556        INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
1557        spin_lock_init(&mp->m_perag_lock);
1558        mutex_init(&mp->m_growlock);
1559        atomic_set(&mp->m_active_trans, 0);
1560        INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
1561        INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
1562        INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker);
1563        mp->m_kobj.kobject.kset = xfs_kset;
1564        return mp;
1565}
1566
1567
1568STATIC int
1569xfs_fs_fill_super(
1570        struct super_block      *sb,
1571        void                    *data,
1572        int                     silent)
1573{
1574        struct inode            *root;
1575        struct xfs_mount        *mp = NULL;
1576        int                     flags = 0, error = -ENOMEM;
1577
1578        /*
1579         * allocate mp and do all low-level struct initializations before we
1580         * attach it to the super
1581         */
1582        mp = xfs_mount_alloc(sb);
1583        if (!mp)
1584                goto out;
1585        sb->s_fs_info = mp;
1586
1587        error = xfs_parseargs(mp, (char *)data);
1588        if (error)
1589                goto out_free_fsname;
1590
1591        sb_min_blocksize(sb, BBSIZE);
1592        sb->s_xattr = xfs_xattr_handlers;
1593        sb->s_export_op = &xfs_export_operations;
1594#ifdef CONFIG_XFS_QUOTA
1595        sb->s_qcop = &xfs_quotactl_operations;
1596        sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
1597#endif
1598        sb->s_op = &xfs_super_operations;
1599
1600        /*
1601         * Delay mount work if the debug hook is set. This is debug
1602         * instrumention to coordinate simulation of xfs mount failures with
1603         * VFS superblock operations
1604         */
1605        if (xfs_globals.mount_delay) {
1606                xfs_notice(mp, "Delaying mount for %d seconds.",
1607                        xfs_globals.mount_delay);
1608                msleep(xfs_globals.mount_delay * 1000);
1609        }
1610
1611        if (silent)
1612                flags |= XFS_MFSI_QUIET;
1613
1614        error = xfs_open_devices(mp);
1615        if (error)
1616                goto out_free_fsname;
1617
1618        error = xfs_init_mount_workqueues(mp);
1619        if (error)
1620                goto out_close_devices;
1621
1622        error = xfs_init_percpu_counters(mp);
1623        if (error)
1624                goto out_destroy_workqueues;
1625
1626        /* Allocate stats memory before we do operations that might use it */
1627        mp->m_stats.xs_stats = alloc_percpu(struct xfsstats);
1628        if (!mp->m_stats.xs_stats) {
1629                error = -ENOMEM;
1630                goto out_destroy_counters;
1631        }
1632
1633        error = xfs_readsb(mp, flags);
1634        if (error)
1635                goto out_free_stats;
1636
1637        error = xfs_finish_flags(mp);
1638        if (error)
1639                goto out_free_sb;
1640
1641        error = xfs_setup_devices(mp);
1642        if (error)
1643                goto out_free_sb;
1644
1645        error = xfs_filestream_mount(mp);
1646        if (error)
1647                goto out_free_sb;
1648
1649        /*
1650         * we must configure the block size in the superblock before we run the
1651         * full mount process as the mount process can lookup and cache inodes.
1652         */
1653        sb->s_magic = XFS_SB_MAGIC;
1654        sb->s_blocksize = mp->m_sb.sb_blocksize;
1655        sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
1656        sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
1657        sb->s_max_links = XFS_MAXLINK;
1658        sb->s_time_gran = 1;
1659        set_posix_acl_flag(sb);
1660
1661        /* version 5 superblocks support inode version counters. */
1662        if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
1663                sb->s_flags |= SB_I_VERSION;
1664
1665        if (mp->m_flags & XFS_MOUNT_DAX) {
1666                bool rtdev_is_dax = false, datadev_is_dax;
1667
1668                xfs_warn(mp,
1669                "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
1670
1671                datadev_is_dax = bdev_dax_supported(mp->m_ddev_targp->bt_bdev,
1672                        sb->s_blocksize);
1673                if (mp->m_rtdev_targp)
1674                        rtdev_is_dax = bdev_dax_supported(
1675                                mp->m_rtdev_targp->bt_bdev, sb->s_blocksize);
1676                if (!rtdev_is_dax && !datadev_is_dax) {
1677                        xfs_alert(mp,
1678                        "DAX unsupported by block device. Turning off DAX.");
1679                        mp->m_flags &= ~XFS_MOUNT_DAX;
1680                }
1681                if (xfs_sb_version_hasreflink(&mp->m_sb)) {
1682                        xfs_alert(mp,
1683                "DAX and reflink cannot be used together!");
1684                        error = -EINVAL;
1685                        goto out_filestream_unmount;
1686                }
1687        }
1688
1689        if (mp->m_flags & XFS_MOUNT_DISCARD) {
1690                struct request_queue *q = bdev_get_queue(sb->s_bdev);
1691
1692                if (!blk_queue_discard(q)) {
1693                        xfs_warn(mp, "mounting with \"discard\" option, but "
1694                                        "the device does not support discard");
1695                        mp->m_flags &= ~XFS_MOUNT_DISCARD;
1696                }
1697        }
1698
1699        if (xfs_sb_version_hasreflink(&mp->m_sb) && mp->m_sb.sb_rblocks) {
1700                xfs_alert(mp,
1701        "reflink not compatible with realtime device!");
1702                error = -EINVAL;
1703                goto out_filestream_unmount;
1704        }
1705
1706        if (xfs_sb_version_hasrmapbt(&mp->m_sb) && mp->m_sb.sb_rblocks) {
1707                xfs_alert(mp,
1708        "reverse mapping btree not compatible with realtime device!");
1709                error = -EINVAL;
1710                goto out_filestream_unmount;
1711        }
1712
1713        error = xfs_mountfs(mp);
1714        if (error)
1715                goto out_filestream_unmount;
1716
1717        root = igrab(VFS_I(mp->m_rootip));
1718        if (!root) {
1719                error = -ENOENT;
1720                goto out_unmount;
1721        }
1722        sb->s_root = d_make_root(root);
1723        if (!sb->s_root) {
1724                error = -ENOMEM;
1725                goto out_unmount;
1726        }
1727
1728        return 0;
1729
1730 out_filestream_unmount:
1731        xfs_filestream_unmount(mp);
1732 out_free_sb:
1733        xfs_freesb(mp);
1734 out_free_stats:
1735        free_percpu(mp->m_stats.xs_stats);
1736 out_destroy_counters:
1737        xfs_destroy_percpu_counters(mp);
1738 out_destroy_workqueues:
1739        xfs_destroy_mount_workqueues(mp);
1740 out_close_devices:
1741        xfs_close_devices(mp);
1742 out_free_fsname:
1743        sb->s_fs_info = NULL;
1744        xfs_free_fsname(mp);
1745        kfree(mp);
1746 out:
1747        return error;
1748
1749 out_unmount:
1750        xfs_filestream_unmount(mp);
1751        xfs_unmountfs(mp);
1752        goto out_free_sb;
1753}
1754
1755STATIC void
1756xfs_fs_put_super(
1757        struct super_block      *sb)
1758{
1759        struct xfs_mount        *mp = XFS_M(sb);
1760
1761        /* if ->fill_super failed, we have no mount to tear down */
1762        if (!sb->s_fs_info)
1763                return;
1764
1765        xfs_notice(mp, "Unmounting Filesystem");
1766        xfs_filestream_unmount(mp);
1767        xfs_unmountfs(mp);
1768
1769        xfs_freesb(mp);
1770        free_percpu(mp->m_stats.xs_stats);
1771        xfs_destroy_percpu_counters(mp);
1772        xfs_destroy_mount_workqueues(mp);
1773        xfs_close_devices(mp);
1774
1775        sb->s_fs_info = NULL;
1776        xfs_free_fsname(mp);
1777        kfree(mp);
1778}
1779
1780STATIC struct dentry *
1781xfs_fs_mount(
1782        struct file_system_type *fs_type,
1783        int                     flags,
1784        const char              *dev_name,
1785        void                    *data)
1786{
1787        return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
1788}
1789
1790static long
1791xfs_fs_nr_cached_objects(
1792        struct super_block      *sb,
1793        struct shrink_control   *sc)
1794{
1795        /* Paranoia: catch incorrect calls during mount setup or teardown */
1796        if (WARN_ON_ONCE(!sb->s_fs_info))
1797                return 0;
1798        return xfs_reclaim_inodes_count(XFS_M(sb));
1799}
1800
1801static long
1802xfs_fs_free_cached_objects(
1803        struct super_block      *sb,
1804        struct shrink_control   *sc)
1805{
1806        return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan);
1807}
1808
1809static const struct super_operations xfs_super_operations = {
1810        .alloc_inode            = xfs_fs_alloc_inode,
1811        .destroy_inode          = xfs_fs_destroy_inode,
1812        .dirty_inode            = xfs_fs_dirty_inode,
1813        .drop_inode             = xfs_fs_drop_inode,
1814        .put_super              = xfs_fs_put_super,
1815        .sync_fs                = xfs_fs_sync_fs,
1816        .freeze_fs              = xfs_fs_freeze,
1817        .unfreeze_fs            = xfs_fs_unfreeze,
1818        .statfs                 = xfs_fs_statfs,
1819        .remount_fs             = xfs_fs_remount,
1820        .show_options           = xfs_fs_show_options,
1821        .nr_cached_objects      = xfs_fs_nr_cached_objects,
1822        .free_cached_objects    = xfs_fs_free_cached_objects,
1823};
1824
1825static struct file_system_type xfs_fs_type = {
1826        .owner                  = THIS_MODULE,
1827        .name                   = "xfs",
1828        .mount                  = xfs_fs_mount,
1829        .kill_sb                = kill_block_super,
1830        .fs_flags               = FS_REQUIRES_DEV,
1831};
1832MODULE_ALIAS_FS("xfs");
1833
1834STATIC int __init
1835xfs_init_zones(void)
1836{
1837        if (bioset_init(&xfs_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
1838                        offsetof(struct xfs_ioend, io_inline_bio),
1839                        BIOSET_NEED_BVECS))
1840                goto out;
1841
1842        xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
1843                                                "xfs_log_ticket");
1844        if (!xfs_log_ticket_zone)
1845                goto out_free_ioend_bioset;
1846
1847        xfs_bmap_free_item_zone = kmem_zone_init(
1848                        sizeof(struct xfs_extent_free_item),
1849                        "xfs_bmap_free_item");
1850        if (!xfs_bmap_free_item_zone)
1851                goto out_destroy_log_ticket_zone;
1852
1853        xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
1854                                                "xfs_btree_cur");
1855        if (!xfs_btree_cur_zone)
1856                goto out_destroy_bmap_free_item_zone;
1857
1858        xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
1859                                                "xfs_da_state");
1860        if (!xfs_da_state_zone)
1861                goto out_destroy_btree_cur_zone;
1862
1863        xfs_ifork_zone = kmem_zone_init(sizeof(struct xfs_ifork), "xfs_ifork");
1864        if (!xfs_ifork_zone)
1865                goto out_destroy_da_state_zone;
1866
1867        xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
1868        if (!xfs_trans_zone)
1869                goto out_destroy_ifork_zone;
1870
1871
1872        /*
1873         * The size of the zone allocated buf log item is the maximum
1874         * size possible under XFS.  This wastes a little bit of memory,
1875         * but it is much faster.
1876         */
1877        xfs_buf_item_zone = kmem_zone_init(sizeof(struct xfs_buf_log_item),
1878                                           "xfs_buf_item");
1879        if (!xfs_buf_item_zone)
1880                goto out_destroy_trans_zone;
1881
1882        xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
1883                        ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
1884                                 sizeof(xfs_extent_t))), "xfs_efd_item");
1885        if (!xfs_efd_zone)
1886                goto out_destroy_buf_item_zone;
1887
1888        xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
1889                        ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
1890                                sizeof(xfs_extent_t))), "xfs_efi_item");
1891        if (!xfs_efi_zone)
1892                goto out_destroy_efd_zone;
1893
1894        xfs_inode_zone =
1895                kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
1896                        KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD |
1897                        KM_ZONE_ACCOUNT, xfs_fs_inode_init_once);
1898        if (!xfs_inode_zone)
1899                goto out_destroy_efi_zone;
1900
1901        xfs_ili_zone =
1902                kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
1903                                        KM_ZONE_SPREAD, NULL);
1904        if (!xfs_ili_zone)
1905                goto out_destroy_inode_zone;
1906        xfs_icreate_zone = kmem_zone_init(sizeof(struct xfs_icreate_item),
1907                                        "xfs_icr");
1908        if (!xfs_icreate_zone)
1909                goto out_destroy_ili_zone;
1910
1911        xfs_rud_zone = kmem_zone_init(sizeof(struct xfs_rud_log_item),
1912                        "xfs_rud_item");
1913        if (!xfs_rud_zone)
1914                goto out_destroy_icreate_zone;
1915
1916        xfs_rui_zone = kmem_zone_init(
1917                        xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS),
1918                        "xfs_rui_item");
1919        if (!xfs_rui_zone)
1920                goto out_destroy_rud_zone;
1921
1922        xfs_cud_zone = kmem_zone_init(sizeof(struct xfs_cud_log_item),
1923                        "xfs_cud_item");
1924        if (!xfs_cud_zone)
1925                goto out_destroy_rui_zone;
1926
1927        xfs_cui_zone = kmem_zone_init(
1928                        xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS),
1929                        "xfs_cui_item");
1930        if (!xfs_cui_zone)
1931                goto out_destroy_cud_zone;
1932
1933        xfs_bud_zone = kmem_zone_init(sizeof(struct xfs_bud_log_item),
1934                        "xfs_bud_item");
1935        if (!xfs_bud_zone)
1936                goto out_destroy_cui_zone;
1937
1938        xfs_bui_zone = kmem_zone_init(
1939                        xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS),
1940                        "xfs_bui_item");
1941        if (!xfs_bui_zone)
1942                goto out_destroy_bud_zone;
1943
1944        return 0;
1945
1946 out_destroy_bud_zone:
1947        kmem_zone_destroy(xfs_bud_zone);
1948 out_destroy_cui_zone:
1949        kmem_zone_destroy(xfs_cui_zone);
1950 out_destroy_cud_zone:
1951        kmem_zone_destroy(xfs_cud_zone);
1952 out_destroy_rui_zone:
1953        kmem_zone_destroy(xfs_rui_zone);
1954 out_destroy_rud_zone:
1955        kmem_zone_destroy(xfs_rud_zone);
1956 out_destroy_icreate_zone:
1957        kmem_zone_destroy(xfs_icreate_zone);
1958 out_destroy_ili_zone:
1959        kmem_zone_destroy(xfs_ili_zone);
1960 out_destroy_inode_zone:
1961        kmem_zone_destroy(xfs_inode_zone);
1962 out_destroy_efi_zone:
1963        kmem_zone_destroy(xfs_efi_zone);
1964 out_destroy_efd_zone:
1965        kmem_zone_destroy(xfs_efd_zone);
1966 out_destroy_buf_item_zone:
1967        kmem_zone_destroy(xfs_buf_item_zone);
1968 out_destroy_trans_zone:
1969        kmem_zone_destroy(xfs_trans_zone);
1970 out_destroy_ifork_zone:
1971        kmem_zone_destroy(xfs_ifork_zone);
1972 out_destroy_da_state_zone:
1973        kmem_zone_destroy(xfs_da_state_zone);
1974 out_destroy_btree_cur_zone:
1975        kmem_zone_destroy(xfs_btree_cur_zone);
1976 out_destroy_bmap_free_item_zone:
1977        kmem_zone_destroy(xfs_bmap_free_item_zone);
1978 out_destroy_log_ticket_zone:
1979        kmem_zone_destroy(xfs_log_ticket_zone);
1980 out_free_ioend_bioset:
1981        bioset_exit(&xfs_ioend_bioset);
1982 out:
1983        return -ENOMEM;
1984}
1985
1986STATIC void
1987xfs_destroy_zones(void)
1988{
1989        /*
1990         * Make sure all delayed rcu free are flushed before we
1991         * destroy caches.
1992         */
1993        rcu_barrier();
1994        kmem_zone_destroy(xfs_bui_zone);
1995        kmem_zone_destroy(xfs_bud_zone);
1996        kmem_zone_destroy(xfs_cui_zone);
1997        kmem_zone_destroy(xfs_cud_zone);
1998        kmem_zone_destroy(xfs_rui_zone);
1999        kmem_zone_destroy(xfs_rud_zone);
2000        kmem_zone_destroy(xfs_icreate_zone);
2001        kmem_zone_destroy(xfs_ili_zone);
2002        kmem_zone_destroy(xfs_inode_zone);
2003        kmem_zone_destroy(xfs_efi_zone);
2004        kmem_zone_destroy(xfs_efd_zone);
2005        kmem_zone_destroy(xfs_buf_item_zone);
2006        kmem_zone_destroy(xfs_trans_zone);
2007        kmem_zone_destroy(xfs_ifork_zone);
2008        kmem_zone_destroy(xfs_da_state_zone);
2009        kmem_zone_destroy(xfs_btree_cur_zone);
2010        kmem_zone_destroy(xfs_bmap_free_item_zone);
2011        kmem_zone_destroy(xfs_log_ticket_zone);
2012        bioset_exit(&xfs_ioend_bioset);
2013}
2014
2015STATIC int __init
2016xfs_init_workqueues(void)
2017{
2018        /*
2019         * The allocation workqueue can be used in memory reclaim situations
2020         * (writepage path), and parallelism is only limited by the number of
2021         * AGs in all the filesystems mounted. Hence use the default large
2022         * max_active value for this workqueue.
2023         */
2024        xfs_alloc_wq = alloc_workqueue("xfsalloc",
2025                        WQ_MEM_RECLAIM|WQ_FREEZABLE, 0);
2026        if (!xfs_alloc_wq)
2027                return -ENOMEM;
2028
2029        xfs_discard_wq = alloc_workqueue("xfsdiscard", WQ_UNBOUND, 0);
2030        if (!xfs_discard_wq)
2031                goto out_free_alloc_wq;
2032
2033        return 0;
2034out_free_alloc_wq:
2035        destroy_workqueue(xfs_alloc_wq);
2036        return -ENOMEM;
2037}
2038
2039STATIC void
2040xfs_destroy_workqueues(void)
2041{
2042        destroy_workqueue(xfs_discard_wq);
2043        destroy_workqueue(xfs_alloc_wq);
2044}
2045
2046STATIC int __init
2047init_xfs_fs(void)
2048{
2049        int                     error;
2050
2051        xfs_check_ondisk_structs();
2052
2053        printk(KERN_INFO XFS_VERSION_STRING " with "
2054                         XFS_BUILD_OPTIONS " enabled\n");
2055
2056        xfs_extent_free_init_defer_op();
2057        xfs_rmap_update_init_defer_op();
2058        xfs_refcount_update_init_defer_op();
2059        xfs_bmap_update_init_defer_op();
2060
2061        xfs_dir_startup();
2062
2063        error = xfs_init_zones();
2064        if (error)
2065                goto out;
2066
2067        error = xfs_init_workqueues();
2068        if (error)
2069                goto out_destroy_zones;
2070
2071        error = xfs_mru_cache_init();
2072        if (error)
2073                goto out_destroy_wq;
2074
2075        error = xfs_buf_init();
2076        if (error)
2077                goto out_mru_cache_uninit;
2078
2079        error = xfs_init_procfs();
2080        if (error)
2081                goto out_buf_terminate;
2082
2083        error = xfs_sysctl_register();
2084        if (error)
2085                goto out_cleanup_procfs;
2086
2087        xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj);
2088        if (!xfs_kset) {
2089                error = -ENOMEM;
2090                goto out_sysctl_unregister;
2091        }
2092
2093        xfsstats.xs_kobj.kobject.kset = xfs_kset;
2094
2095        xfsstats.xs_stats = alloc_percpu(struct xfsstats);
2096        if (!xfsstats.xs_stats) {
2097                error = -ENOMEM;
2098                goto out_kset_unregister;
2099        }
2100
2101        error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL,
2102                               "stats");
2103        if (error)
2104                goto out_free_stats;
2105
2106#ifdef DEBUG
2107        xfs_dbg_kobj.kobject.kset = xfs_kset;
2108        error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug");
2109        if (error)
2110                goto out_remove_stats_kobj;
2111#endif
2112
2113        error = xfs_qm_init();
2114        if (error)
2115                goto out_remove_dbg_kobj;
2116
2117        error = register_filesystem(&xfs_fs_type);
2118        if (error)
2119                goto out_qm_exit;
2120        return 0;
2121
2122 out_qm_exit:
2123        xfs_qm_exit();
2124 out_remove_dbg_kobj:
2125#ifdef DEBUG
2126        xfs_sysfs_del(&xfs_dbg_kobj);
2127 out_remove_stats_kobj:
2128#endif
2129        xfs_sysfs_del(&xfsstats.xs_kobj);
2130 out_free_stats:
2131        free_percpu(xfsstats.xs_stats);
2132 out_kset_unregister:
2133        kset_unregister(xfs_kset);
2134 out_sysctl_unregister:
2135        xfs_sysctl_unregister();
2136 out_cleanup_procfs:
2137        xfs_cleanup_procfs();
2138 out_buf_terminate:
2139        xfs_buf_terminate();
2140 out_mru_cache_uninit:
2141        xfs_mru_cache_uninit();
2142 out_destroy_wq:
2143        xfs_destroy_workqueues();
2144 out_destroy_zones:
2145        xfs_destroy_zones();
2146 out:
2147        return error;
2148}
2149
2150STATIC void __exit
2151exit_xfs_fs(void)
2152{
2153        xfs_qm_exit();
2154        unregister_filesystem(&xfs_fs_type);
2155#ifdef DEBUG
2156        xfs_sysfs_del(&xfs_dbg_kobj);
2157#endif
2158        xfs_sysfs_del(&xfsstats.xs_kobj);
2159        free_percpu(xfsstats.xs_stats);
2160        kset_unregister(xfs_kset);
2161        xfs_sysctl_unregister();
2162        xfs_cleanup_procfs();
2163        xfs_buf_terminate();
2164        xfs_mru_cache_uninit();
2165        xfs_destroy_workqueues();
2166        xfs_destroy_zones();
2167        xfs_uuid_table_free();
2168}
2169
2170module_init(init_xfs_fs);
2171module_exit(exit_xfs_fs);
2172
2173MODULE_AUTHOR("Silicon Graphics, Inc.");
2174MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
2175MODULE_LICENSE("GPL");
2176