LXR linux/fs/xfs/xfs

   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
   4 * All Rights Reserved.
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_log_format.h"
  11#include "xfs_trans_resv.h"
  12#include "xfs_bit.h"
  13#include "xfs_sb.h"
  14#include "xfs_mount.h"
  15#include "xfs_defer.h"
  16#include "xfs_da_format.h"
  17#include "xfs_da_btree.h"
  18#include "xfs_inode.h"
  19#include "xfs_dir2.h"
  20#include "xfs_ialloc.h"
  21#include "xfs_alloc.h"
  22#include "xfs_rtalloc.h"
  23#include "xfs_bmap.h"
  24#include "xfs_trans.h"
  25#include "xfs_trans_priv.h"
  26#include "xfs_log.h"
  27#include "xfs_error.h"
  28#include "xfs_quota.h"
  29#include "xfs_fsops.h"
  30#include "xfs_trace.h"
  31#include "xfs_icache.h"
  32#include "xfs_sysfs.h"
  33#include "xfs_rmap_btree.h"
  34#include "xfs_refcount_btree.h"
  35#include "xfs_reflink.h"
  36#include "xfs_extent_busy.h"
  37
  38
  39static DEFINE_MUTEX(xfs_uuid_table_mutex);
  40static int xfs_uuid_table_size;
  41static uuid_t *xfs_uuid_table;
  42
  43void
  44xfs_uuid_table_free(void)
  45{
  46        if (xfs_uuid_table_size == 0)
  47                return;
  48        kmem_free(xfs_uuid_table);
  49        xfs_uuid_table = NULL;
  50        xfs_uuid_table_size = 0;
  51}
  52
  53/*
  54 * See if the UUID is unique among mounted XFS filesystems.
  55 * Mount fails if UUID is nil or a FS with the same UUID is already mounted.
  56 */
  57STATIC int
  58xfs_uuid_mount(
  59        struct xfs_mount        *mp)
  60{
  61        uuid_t                  *uuid = &mp->m_sb.sb_uuid;
  62        int                     hole, i;
  63
  64        /* Publish UUID in struct super_block */
  65        uuid_copy(&mp->m_super->s_uuid, uuid);
  66
  67        if (mp->m_flags & XFS_MOUNT_NOUUID)
  68                return 0;
  69
  70        if (uuid_is_null(uuid)) {
  71                xfs_warn(mp, "Filesystem has null UUID - can't mount");
  72                return -EINVAL;
  73        }
  74
  75        mutex_lock(&xfs_uuid_table_mutex);
  76        for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) {
  77                if (uuid_is_null(&xfs_uuid_table[i])) {
  78                        hole = i;
  79                        continue;
  80                }
  81                if (uuid_equal(uuid, &xfs_uuid_table[i]))
  82                        goto out_duplicate;
  83        }
  84
  85        if (hole < 0) {
  86                xfs_uuid_table = kmem_realloc(xfs_uuid_table,
  87                        (xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table),
  88                        KM_SLEEP);
  89                hole = xfs_uuid_table_size++;
  90        }
  91        xfs_uuid_table[hole] = *uuid;
  92        mutex_unlock(&xfs_uuid_table_mutex);
  93
  94        return 0;
  95
  96 out_duplicate:
  97        mutex_unlock(&xfs_uuid_table_mutex);
  98        xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid);
  99        return -EINVAL;
 100}
 101
 102STATIC void
 103xfs_uuid_unmount(
 104        struct xfs_mount        *mp)
 105{
 106        uuid_t                  *uuid = &mp->m_sb.sb_uuid;
 107        int                     i;
 108
 109        if (mp->m_flags & XFS_MOUNT_NOUUID)
 110                return;
 111
 112        mutex_lock(&xfs_uuid_table_mutex);
 113        for (i = 0; i < xfs_uuid_table_size; i++) {
 114                if (uuid_is_null(&xfs_uuid_table[i]))
 115                        continue;
 116                if (!uuid_equal(uuid, &xfs_uuid_table[i]))
 117                        continue;
 118                memset(&xfs_uuid_table[i], 0, sizeof(uuid_t));
 119                break;
 120        }
 121        ASSERT(i < xfs_uuid_table_size);
 122        mutex_unlock(&xfs_uuid_table_mutex);
 123}
 124
 125
 126STATIC void
 127__xfs_free_perag(
 128        struct rcu_head *head)
 129{
 130        struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
 131
 132        ASSERT(atomic_read(&pag->pag_ref) == 0);
 133        kmem_free(pag);
 134}
 135
 136/*
 137 * Free up the per-ag resources associated with the mount structure.
 138 */
 139STATIC void
 140xfs_free_perag(
 141        xfs_mount_t     *mp)
 142{
 143        xfs_agnumber_t  agno;
 144        struct xfs_perag *pag;
 145
 146        for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
 147                spin_lock(&mp->m_perag_lock);
 148                pag = radix_tree_delete(&mp->m_perag_tree, agno);
 149                spin_unlock(&mp->m_perag_lock);
 150                ASSERT(pag);
 151                ASSERT(atomic_read(&pag->pag_ref) == 0);
 152                xfs_buf_hash_destroy(pag);
 153                mutex_destroy(&pag->pag_ici_reclaim_lock);
 154                call_rcu(&pag->rcu_head, __xfs_free_perag);
 155        }
 156}
 157
 158/*
 159 * Check size of device based on the (data/realtime) block count.
 160 * Note: this check is used by the growfs code as well as mount.
 161 */
 162int
 163xfs_sb_validate_fsb_count(
 164        xfs_sb_t        *sbp,
 165        uint64_t        nblocks)
 166{
 167        ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
 168        ASSERT(sbp->sb_blocklog >= BBSHIFT);
 169
 170        /* Limited by ULONG_MAX of page cache index */
 171        if (nblocks >> (PAGE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
 172                return -EFBIG;
 173        return 0;
 174}
 175
 176int
 177xfs_initialize_perag(
 178        xfs_mount_t     *mp,
 179        xfs_agnumber_t  agcount,
 180        xfs_agnumber_t  *maxagi)
 181{
 182        xfs_agnumber_t  index;
 183        xfs_agnumber_t  first_initialised = NULLAGNUMBER;
 184        xfs_perag_t     *pag;
 185        int             error = -ENOMEM;
 186
 187        /*
 188         * Walk the current per-ag tree so we don't try to initialise AGs
 189         * that already exist (growfs case). Allocate and insert all the
 190         * AGs we don't find ready for initialisation.
 191         */
 192        for (index = 0; index < agcount; index++) {
 193                pag = xfs_perag_get(mp, index);
 194                if (pag) {
 195                        xfs_perag_put(pag);
 196                        continue;
 197                }
 198
 199                pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
 200                if (!pag)
 201                        goto out_unwind_new_pags;
 202                pag->pag_agno = index;
 203                pag->pag_mount = mp;
 204                spin_lock_init(&pag->pag_ici_lock);
 205                mutex_init(&pag->pag_ici_reclaim_lock);
 206                INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
 207                if (xfs_buf_hash_init(pag))
 208                        goto out_free_pag;
 209                init_waitqueue_head(&pag->pagb_wait);
 210                spin_lock_init(&pag->pagb_lock);
 211                pag->pagb_count = 0;
 212                pag->pagb_tree = RB_ROOT;
 213
 214                if (radix_tree_preload(GFP_NOFS))
 215                        goto out_hash_destroy;
 216
 217                spin_lock(&mp->m_perag_lock);
 218                if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
 219                        BUG();
 220                        spin_unlock(&mp->m_perag_lock);
 221                        radix_tree_preload_end();
 222                        error = -EEXIST;
 223                        goto out_hash_destroy;
 224                }
 225                spin_unlock(&mp->m_perag_lock);
 226                radix_tree_preload_end();
 227                /* first new pag is fully initialized */
 228                if (first_initialised == NULLAGNUMBER)
 229                        first_initialised = index;
 230        }
 231
 232        index = xfs_set_inode_alloc(mp, agcount);
 233
 234        if (maxagi)
 235                *maxagi = index;
 236
 237        mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp);
 238        return 0;
 239
 240out_hash_destroy:
 241        xfs_buf_hash_destroy(pag);
 242out_free_pag:
 243        mutex_destroy(&pag->pag_ici_reclaim_lock);
 244        kmem_free(pag);
 245out_unwind_new_pags:
 246        /* unwind any prior newly initialized pags */
 247        for (index = first_initialised; index < agcount; index++) {
 248                pag = radix_tree_delete(&mp->m_perag_tree, index);
 249                if (!pag)
 250                        break;
 251                xfs_buf_hash_destroy(pag);
 252                mutex_destroy(&pag->pag_ici_reclaim_lock);
 253                kmem_free(pag);
 254        }
 255        return error;
 256}
 257
 258/*
 259 * xfs_readsb
 260 *
 261 * Does the initial read of the superblock.
 262 */
 263int
 264xfs_readsb(
 265        struct xfs_mount *mp,
 266        int             flags)
 267{
 268        unsigned int    sector_size;
 269        struct xfs_buf  *bp;
 270        struct xfs_sb   *sbp = &mp->m_sb;
 271        int             error;
 272        int             loud = !(flags & XFS_MFSI_QUIET);
 273        const struct xfs_buf_ops *buf_ops;
 274
 275        ASSERT(mp->m_sb_bp == NULL);
 276        ASSERT(mp->m_ddev_targp != NULL);
 277
 278        /*
 279         * For the initial read, we must guess at the sector
 280         * size based on the block device.  It's enough to
 281         * get the sb_sectsize out of the superblock and
 282         * then reread with the proper length.
 283         * We don't verify it yet, because it may not be complete.
 284         */
 285        sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
 286        buf_ops = NULL;
 287
 288        /*
 289         * Allocate a (locked) buffer to hold the superblock. This will be kept
 290         * around at all times to optimize access to the superblock. Therefore,
 291         * set XBF_NO_IOACCT to make sure it doesn't hold the buftarg count
 292         * elevated.
 293         */
 294reread:
 295        error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
 296                                      BTOBB(sector_size), XBF_NO_IOACCT, &bp,
 297                                      buf_ops);
 298        if (error) {
 299                if (loud)
 300                        xfs_warn(mp, "SB validate failed with error %d.", error);
 301                /* bad CRC means corrupted metadata */
 302                if (error == -EFSBADCRC)
 303                        error = -EFSCORRUPTED;
 304                return error;
 305        }
 306
 307        /*
 308         * Initialize the mount structure from the superblock.
 309         */
 310        xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
 311
 312        /*
 313         * If we haven't validated the superblock, do so now before we try
 314         * to check the sector size and reread the superblock appropriately.
 315         */
 316        if (sbp->sb_magicnum != XFS_SB_MAGIC) {
 317                if (loud)
 318                        xfs_warn(mp, "Invalid superblock magic number");
 319                error = -EINVAL;
 320                goto release_buf;
 321        }
 322
 323        /*
 324         * We must be able to do sector-sized and sector-aligned IO.
 325         */
 326        if (sector_size > sbp->sb_sectsize) {
 327                if (loud)
 328                        xfs_warn(mp, "device supports %u byte sectors (not %u)",
 329                                sector_size, sbp->sb_sectsize);
 330                error = -ENOSYS;
 331                goto release_buf;
 332        }
 333
 334        if (buf_ops == NULL) {
 335                /*
 336                 * Re-read the superblock so the buffer is correctly sized,
 337                 * and properly verified.
 338                 */
 339                xfs_buf_relse(bp);
 340                sector_size = sbp->sb_sectsize;
 341                buf_ops = loud ? &xfs_sb_buf_ops : &xfs_sb_quiet_buf_ops;
 342                goto reread;
 343        }
 344
 345        xfs_reinit_percpu_counters(mp);
 346
 347        /* no need to be quiet anymore, so reset the buf ops */
 348        bp->b_ops = &xfs_sb_buf_ops;
 349
 350        mp->m_sb_bp = bp;
 351        xfs_buf_unlock(bp);
 352        return 0;
 353
 354release_buf:
 355        xfs_buf_relse(bp);
 356        return error;
 357}
 358
 359/*
 360 * Update alignment values based on mount options and sb values
 361 */
 362STATIC int
 363xfs_update_alignment(xfs_mount_t *mp)
 364{
 365        xfs_sb_t        *sbp = &(mp->m_sb);
 366
 367        if (mp->m_dalign) {
 368                /*
 369                 * If stripe unit and stripe width are not multiples
 370                 * of the fs blocksize turn off alignment.
 371                 */
 372                if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
 373                    (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
 374                        xfs_warn(mp,
 375                "alignment check failed: sunit/swidth vs. blocksize(%d)",
 376                                sbp->sb_blocksize);
 377                        return -EINVAL;
 378                } else {
 379                        /*
 380                         * Convert the stripe unit and width to FSBs.
 381                         */
 382                        mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
 383                        if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
 384                                xfs_warn(mp,
 385                        "alignment check failed: sunit/swidth vs. agsize(%d)",
 386                                         sbp->sb_agblocks);
 387                                return -EINVAL;
 388                        } else if (mp->m_dalign) {
 389                                mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
 390                        } else {
 391                                xfs_warn(mp,
 392                        "alignment check failed: sunit(%d) less than bsize(%d)",
 393                                         mp->m_dalign, sbp->sb_blocksize);
 394                                return -EINVAL;
 395                        }
 396                }
 397
 398                /*
 399                 * Update superblock with new values
 400                 * and log changes
 401                 */
 402                if (xfs_sb_version_hasdalign(sbp)) {
 403                        if (sbp->sb_unit != mp->m_dalign) {
 404                                sbp->sb_unit = mp->m_dalign;
 405                                mp->m_update_sb = true;
 406                        }
 407                        if (sbp->sb_width != mp->m_swidth) {
 408                                sbp->sb_width = mp->m_swidth;
 409                                mp->m_update_sb = true;
 410                        }
 411                } else {
 412                        xfs_warn(mp,
 413        "cannot change alignment: superblock does not support data alignment");
 414                        return -EINVAL;
 415                }
 416        } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
 417                    xfs_sb_version_hasdalign(&mp->m_sb)) {
 418                        mp->m_dalign = sbp->sb_unit;
 419                        mp->m_swidth = sbp->sb_width;
 420        }
 421
 422        return 0;
 423}
 424
 425/*
 426 * Set the maximum inode count for this filesystem
 427 */
 428STATIC void
 429xfs_set_maxicount(xfs_mount_t *mp)
 430{
 431        xfs_sb_t        *sbp = &(mp->m_sb);
 432        uint64_t        icount;
 433
 434        if (sbp->sb_imax_pct) {
 435                /*
 436                 * Make sure the maximum inode count is a multiple
 437                 * of the units we allocate inodes in.
 438                 */
 439                icount = sbp->sb_dblocks * sbp->sb_imax_pct;
 440                do_div(icount, 100);
 441                do_div(icount, mp->m_ialloc_blks);
 442                mp->m_maxicount = (icount * mp->m_ialloc_blks)  <<
 443                                   sbp->sb_inopblog;
 444        } else {
 445                mp->m_maxicount = 0;
 446        }
 447}
 448
 449/*
 450 * Set the default minimum read and write sizes unless
 451 * already specified in a mount option.
 452 * We use smaller I/O sizes when the file system
 453 * is being used for NFS service (wsync mount option).
 454 */
 455STATIC void
 456xfs_set_rw_sizes(xfs_mount_t *mp)
 457{
 458        xfs_sb_t        *sbp = &(mp->m_sb);
 459        int             readio_log, writeio_log;
 460
 461        if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
 462                if (mp->m_flags & XFS_MOUNT_WSYNC) {
 463                        readio_log = XFS_WSYNC_READIO_LOG;
 464                        writeio_log = XFS_WSYNC_WRITEIO_LOG;
 465                } else {
 466                        readio_log = XFS_READIO_LOG_LARGE;
 467                        writeio_log = XFS_WRITEIO_LOG_LARGE;
 468                }
 469        } else {
 470                readio_log = mp->m_readio_log;
 471                writeio_log = mp->m_writeio_log;
 472        }
 473
 474        if (sbp->sb_blocklog > readio_log) {
 475                mp->m_readio_log = sbp->sb_blocklog;
 476        } else {
 477                mp->m_readio_log = readio_log;
 478        }
 479        mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog);
 480        if (sbp->sb_blocklog > writeio_log) {
 481                mp->m_writeio_log = sbp->sb_blocklog;
 482        } else {
 483                mp->m_writeio_log = writeio_log;
 484        }
 485        mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog);
 486}
 487
 488/*
 489 * precalculate the low space thresholds for dynamic speculative preallocation.
 490 */
 491void
 492xfs_set_low_space_thresholds(
 493        struct xfs_mount        *mp)
 494{
 495        int i;
 496
 497        for (i = 0; i < XFS_LOWSP_MAX; i++) {
 498                uint64_t space = mp->m_sb.sb_dblocks;
 499
 500                do_div(space, 100);
 501                mp->m_low_space[i] = space * (i + 1);
 502        }
 503}
 504
 505
 506/*
 507 * Set whether we're using inode alignment.
 508 */
 509STATIC void
 510xfs_set_inoalignment(xfs_mount_t *mp)
 511{
 512        if (xfs_sb_version_hasalign(&mp->m_sb) &&
 513                mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
 514                mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
 515        else
 516                mp->m_inoalign_mask = 0;
 517        /*
 518         * If we are using stripe alignment, check whether
 519         * the stripe unit is a multiple of the inode alignment
 520         */
 521        if (mp->m_dalign && mp->m_inoalign_mask &&
 522            !(mp->m_dalign & mp->m_inoalign_mask))
 523                mp->m_sinoalign = mp->m_dalign;
 524        else
 525                mp->m_sinoalign = 0;
 526}
 527
 528/*
 529 * Check that the data (and log if separate) is an ok size.
 530 */
 531STATIC int
 532xfs_check_sizes(
 533        struct xfs_mount *mp)
 534{
 535        struct xfs_buf  *bp;
 536        xfs_daddr_t     d;
 537        int             error;
 538
 539        d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
 540        if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
 541                xfs_warn(mp, "filesystem size mismatch detected");
 542                return -EFBIG;
 543        }
 544        error = xfs_buf_read_uncached(mp->m_ddev_targp,
 545                                        d - XFS_FSS_TO_BB(mp, 1),
 546                                        XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
 547        if (error) {
 548                xfs_warn(mp, "last sector read failed");
 549                return error;
 550        }
 551        xfs_buf_relse(bp);
 552
 553        if (mp->m_logdev_targp == mp->m_ddev_targp)
 554                return 0;
 555
 556        d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
 557        if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
 558                xfs_warn(mp, "log size mismatch detected");
 559                return -EFBIG;
 560        }
 561        error = xfs_buf_read_uncached(mp->m_logdev_targp,
 562                                        d - XFS_FSB_TO_BB(mp, 1),
 563                                        XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
 564        if (error) {
 565                xfs_warn(mp, "log device read failed");
 566                return error;
 567        }
 568        xfs_buf_relse(bp);
 569        return 0;
 570}
 571
 572/*
 573 * Clear the quotaflags in memory and in the superblock.
 574 */
 575int
 576xfs_mount_reset_sbqflags(
 577        struct xfs_mount        *mp)
 578{
 579        mp->m_qflags = 0;
 580
 581        /* It is OK to look at sb_qflags in the mount path without m_sb_lock. */
 582        if (mp->m_sb.sb_qflags == 0)
 583                return 0;
 584        spin_lock(&mp->m_sb_lock);
 585        mp->m_sb.sb_qflags = 0;
 586        spin_unlock(&mp->m_sb_lock);
 587
 588        if (!xfs_fs_writable(mp, SB_FREEZE_WRITE))
 589                return 0;
 590
 591        return xfs_sync_sb(mp, false);
 592}
 593
 594uint64_t
 595xfs_default_resblks(xfs_mount_t *mp)
 596{
 597        uint64_t resblks;
 598
 599        /*
 600         * We default to 5% or 8192 fsbs of space reserved, whichever is
 601         * smaller.  This is intended to cover concurrent allocation
 602         * transactions when we initially hit enospc. These each require a 4
 603         * block reservation. Hence by default we cover roughly 2000 concurrent
 604         * allocation reservations.
 605         */
 606        resblks = mp->m_sb.sb_dblocks;
 607        do_div(resblks, 20);
 608        resblks = min_t(uint64_t, resblks, 8192);
 609        return resblks;
 610}
 611
 612/* Ensure the summary counts are correct. */
 613STATIC int
 614xfs_check_summary_counts(
 615        struct xfs_mount        *mp)
 616{
 617        /*
 618         * The AG0 superblock verifier rejects in-progress filesystems,
 619         * so we should never see the flag set this far into mounting.
 620         */
 621        if (mp->m_sb.sb_inprogress) {
 622                xfs_err(mp, "sb_inprogress set after log recovery??");
 623                WARN_ON(1);
 624                return -EFSCORRUPTED;
 625        }
 626
 627        /*
 628         * Now the log is mounted, we know if it was an unclean shutdown or
 629         * not. If it was, with the first phase of recovery has completed, we
 630         * have consistent AG blocks on disk. We have not recovered EFIs yet,
 631         * but they are recovered transactionally in the second recovery phase
 632         * later.
 633         *
 634         * If the log was clean when we mounted, we can check the summary
 635         * counters.  If any of them are obviously incorrect, we can recompute
 636         * them from the AGF headers in the next step.
 637         */
 638        if (XFS_LAST_UNMOUNT_WAS_CLEAN(mp) &&
 639            (mp->m_sb.sb_fdblocks > mp->m_sb.sb_dblocks ||
 640             !xfs_verify_icount(mp, mp->m_sb.sb_icount) ||
 641             mp->m_sb.sb_ifree > mp->m_sb.sb_icount))
 642                mp->m_flags |= XFS_MOUNT_BAD_SUMMARY;
 643
 644        /*
 645         * We can safely re-initialise incore superblock counters from the
 646         * per-ag data. These may not be correct if the filesystem was not
 647         * cleanly unmounted, so we waited for recovery to finish before doing
 648         * this.
 649         *
 650         * If the filesystem was cleanly unmounted or the previous check did
 651         * not flag anything weird, then we can trust the values in the
 652         * superblock to be correct and we don't need to do anything here.
 653         * Otherwise, recalculate the summary counters.
 654         */
 655        if ((!xfs_sb_version_haslazysbcount(&mp->m_sb) ||
 656             XFS_LAST_UNMOUNT_WAS_CLEAN(mp)) &&
 657            !(mp->m_flags & XFS_MOUNT_BAD_SUMMARY))
 658                return 0;
 659
 660        return xfs_initialize_perag_data(mp, mp->m_sb.sb_agcount);
 661}
 662
 663/*
 664 * This function does the following on an initial mount of a file system:
 665 *      - reads the superblock from disk and init the mount struct
 666 *      - if we're a 32-bit kernel, do a size check on the superblock
 667 *              so we don't mount terabyte filesystems
 668 *      - init mount struct realtime fields
 669 *      - allocate inode hash table for fs
 670 *      - init directory manager
 671 *      - perform recovery and init the log manager
 672 */
 673int
 674xfs_mountfs(
 675        struct xfs_mount        *mp)
 676{
 677        struct xfs_sb           *sbp = &(mp->m_sb);
 678        struct xfs_inode        *rip;
 679        uint64_t                resblks;
 680        uint                    quotamount = 0;
 681        uint                    quotaflags = 0;
 682        int                     error = 0;
 683
 684        xfs_sb_mount_common(mp, sbp);
 685
 686        /*
 687         * Check for a mismatched features2 values.  Older kernels read & wrote
 688         * into the wrong sb offset for sb_features2 on some platforms due to
 689         * xfs_sb_t not being 64bit size aligned when sb_features2 was added,
 690         * which made older superblock reading/writing routines swap it as a
 691         * 64-bit value.
 692         *
 693         * For backwards compatibility, we make both slots equal.
 694         *
 695         * If we detect a mismatched field, we OR the set bits into the existing
 696         * features2 field in case it has already been modified; we don't want
 697         * to lose any features.  We then update the bad location with the ORed
 698         * value so that older kernels will see any features2 flags. The
 699         * superblock writeback code ensures the new sb_features2 is copied to
 700         * sb_bad_features2 before it is logged or written to disk.
 701         */
 702        if (xfs_sb_has_mismatched_features2(sbp)) {
 703                xfs_warn(mp, "correcting sb_features alignment problem");
 704                sbp->sb_features2 |= sbp->sb_bad_features2;
 705                mp->m_update_sb = true;
 706
 707                /*
 708                 * Re-check for ATTR2 in case it was found in bad_features2
 709                 * slot.
 710                 */
 711                if (xfs_sb_version_hasattr2(&mp->m_sb) &&
 712                   !(mp->m_flags & XFS_MOUNT_NOATTR2))
 713                        mp->m_flags |= XFS_MOUNT_ATTR2;
 714        }
 715
 716        if (xfs_sb_version_hasattr2(&mp->m_sb) &&
 717           (mp->m_flags & XFS_MOUNT_NOATTR2)) {
 718                xfs_sb_version_removeattr2(&mp->m_sb);
 719                mp->m_update_sb = true;
 720
 721                /* update sb_versionnum for the clearing of the morebits */
 722                if (!sbp->sb_features2)
 723                        mp->m_update_sb = true;
 724        }
 725
 726        /* always use v2 inodes by default now */
 727        if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) {
 728                mp->m_sb.sb_versionnum |= XFS_SB_VERSION_NLINKBIT;
 729                mp->m_update_sb = true;
 730        }
 731
 732        /*
 733         * Check if sb_agblocks is aligned at stripe boundary
 734         * If sb_agblocks is NOT aligned turn off m_dalign since
 735         * allocator alignment is within an ag, therefore ag has
 736         * to be aligned at stripe boundary.
 737         */
 738        error = xfs_update_alignment(mp);
 739        if (error)
 740                goto out;
 741
 742        xfs_alloc_compute_maxlevels(mp);
 743        xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
 744        xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
 745        xfs_ialloc_compute_maxlevels(mp);
 746        xfs_rmapbt_compute_maxlevels(mp);
 747        xfs_refcountbt_compute_maxlevels(mp);
 748
 749        xfs_set_maxicount(mp);
 750
 751        /* enable fail_at_unmount as default */
 752        mp->m_fail_unmount = true;
 753
 754        error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname);
 755        if (error)
 756                goto out;
 757
 758        error = xfs_sysfs_init(&mp->m_stats.xs_kobj, &xfs_stats_ktype,
 759                               &mp->m_kobj, "stats");
 760        if (error)
 761                goto out_remove_sysfs;
 762
 763        error = xfs_error_sysfs_init(mp);
 764        if (error)
 765                goto out_del_stats;
 766
 767        error = xfs_errortag_init(mp);
 768        if (error)
 769                goto out_remove_error_sysfs;
 770
 771        error = xfs_uuid_mount(mp);
 772        if (error)
 773                goto out_remove_errortag;
 774
 775        /*
 776         * Set the minimum read and write sizes
 777         */
 778        xfs_set_rw_sizes(mp);
 779
 780        /* set the low space thresholds for dynamic preallocation */
 781        xfs_set_low_space_thresholds(mp);
 782
 783        /*
 784         * Set the inode cluster size.
 785         * This may still be overridden by the file system
 786         * block size if it is larger than the chosen cluster size.
 787         *
 788         * For v5 filesystems, scale the cluster size with the inode size to
 789         * keep a constant ratio of inode per cluster buffer, but only if mkfs
 790         * has set the inode alignment value appropriately for larger cluster
 791         * sizes.
 792         */
 793        mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
 794        if (xfs_sb_version_hascrc(&mp->m_sb)) {
 795                int     new_size = mp->m_inode_cluster_size;
 796
 797                new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
 798                if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
 799                        mp->m_inode_cluster_size = new_size;
 800        }
 801        mp->m_blocks_per_cluster = xfs_icluster_size_fsb(mp);
 802        mp->m_inodes_per_cluster = XFS_FSB_TO_INO(mp, mp->m_blocks_per_cluster);
 803        mp->m_cluster_align = xfs_ialloc_cluster_alignment(mp);
 804        mp->m_cluster_align_inodes = XFS_FSB_TO_INO(mp, mp->m_cluster_align);
 805
 806        /*
 807         * If enabled, sparse inode chunk alignment is expected to match the
 808         * cluster size. Full inode chunk alignment must match the chunk size,
 809         * but that is checked on sb read verification...
 810         */
 811        if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
 812            mp->m_sb.sb_spino_align !=
 813                        XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) {
 814                xfs_warn(mp,
 815        "Sparse inode block alignment (%u) must match cluster size (%llu).",
 816                         mp->m_sb.sb_spino_align,
 817                         XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size));
 818                error = -EINVAL;
 819                goto out_remove_uuid;
 820        }
 821
 822        /*
 823         * Set inode alignment fields
 824         */
 825        xfs_set_inoalignment(mp);
 826
 827        /*
 828         * Check that the data (and log if separate) is an ok size.
 829         */
 830        error = xfs_check_sizes(mp);
 831        if (error)
 832                goto out_remove_uuid;
 833
 834        /*
 835         * Initialize realtime fields in the mount structure
 836         */
 837        error = xfs_rtmount_init(mp);
 838        if (error) {
 839                xfs_warn(mp, "RT mount failed");
 840                goto out_remove_uuid;
 841        }
 842
 843        /*
 844         *  Copies the low order bits of the timestamp and the randomly
 845         *  set "sequence" number out of a UUID.
 846         */
 847        mp->m_fixedfsid[0] =
 848                (get_unaligned_be16(&sbp->sb_uuid.b[8]) << 16) |
 849                 get_unaligned_be16(&sbp->sb_uuid.b[4]);
 850        mp->m_fixedfsid[1] = get_unaligned_be32(&sbp->sb_uuid.b[0]);
 851
 852        error = xfs_da_mount(mp);
 853        if (error) {
 854                xfs_warn(mp, "Failed dir/attr init: %d", error);
 855                goto out_remove_uuid;
 856        }
 857
 858        /*
 859         * Initialize the precomputed transaction reservations values.
 860         */
 861        xfs_trans_init(mp);
 862
 863        /*
 864         * Allocate and initialize the per-ag data.
 865         */
 866        error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
 867        if (error) {
 868                xfs_warn(mp, "Failed per-ag init: %d", error);
 869                goto out_free_dir;
 870        }
 871
 872        if (!sbp->sb_logblocks) {
 873                xfs_warn(mp, "no log defined");
 874                XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp);
 875                error = -EFSCORRUPTED;
 876                goto out_free_perag;
 877        }
 878
 879        /*
 880         * Log's mount-time initialization. The first part of recovery can place
 881         * some items on the AIL, to be handled when recovery is finished or
 882         * cancelled.
 883         */
 884        error = xfs_log_mount(mp, mp->m_logdev_targp,
 885                              XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
 886                              XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
 887        if (error) {
 888                xfs_warn(mp, "log mount failed");
 889                goto out_fail_wait;
 890        }
 891
 892        /* Make sure the summary counts are ok. */
 893        error = xfs_check_summary_counts(mp);
 894        if (error)
 895                goto out_log_dealloc;
 896
 897        /*
 898         * Get and sanity-check the root inode.
 899         * Save the pointer to it in the mount structure.
 900         */
 901        error = xfs_iget(mp, NULL, sbp->sb_rootino, XFS_IGET_UNTRUSTED,
 902                         XFS_ILOCK_EXCL, &rip);
 903        if (error) {
 904                xfs_warn(mp,
 905                        "Failed to read root inode 0x%llx, error %d",
 906                        sbp->sb_rootino, -error);
 907                goto out_log_dealloc;
 908        }
 909
 910        ASSERT(rip != NULL);
 911
 912        if (unlikely(!S_ISDIR(VFS_I(rip)->i_mode))) {
 913                xfs_warn(mp, "corrupted root inode %llu: not a directory",
 914                        (unsigned long long)rip->i_ino);
 915                xfs_iunlock(rip, XFS_ILOCK_EXCL);
 916                XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
 917                                 mp);
 918                error = -EFSCORRUPTED;
 919                goto out_rele_rip;
 920        }
 921        mp->m_rootip = rip;     /* save it */
 922
 923        xfs_iunlock(rip, XFS_ILOCK_EXCL);
 924
 925        /*
 926         * Initialize realtime inode pointers in the mount structure
 927         */
 928        error = xfs_rtmount_inodes(mp);
 929        if (error) {
 930                /*
 931                 * Free up the root inode.
 932                 */
 933                xfs_warn(mp, "failed to read RT inodes");
 934                goto out_rele_rip;
 935        }
 936
 937        /*
 938         * If this is a read-only mount defer the superblock updates until
 939         * the next remount into writeable mode.  Otherwise we would never
 940         * perform the update e.g. for the root filesystem.
 941         */
 942        if (mp->m_update_sb && !(mp->m_flags & XFS_MOUNT_RDONLY)) {
 943                error = xfs_sync_sb(mp, false);
 944                if (error) {
 945                        xfs_warn(mp, "failed to write sb changes");
 946                        goto out_rtunmount;
 947                }
 948        }
 949
 950        /*
 951         * Initialise the XFS quota management subsystem for this mount
 952         */
 953        if (XFS_IS_QUOTA_RUNNING(mp)) {
 954                error = xfs_qm_newmount(mp, &quotamount, &quotaflags);
 955                if (error)
 956                        goto out_rtunmount;
 957        } else {
 958                ASSERT(!XFS_IS_QUOTA_ON(mp));
 959
 960                /*
 961                 * If a file system had quotas running earlier, but decided to
 962                 * mount without -o uquota/pquota/gquota options, revoke the
 963                 * quotachecked license.
 964                 */
 965                if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
 966                        xfs_notice(mp, "resetting quota flags");
 967                        error = xfs_mount_reset_sbqflags(mp);
 968                        if (error)
 969                                goto out_rtunmount;
 970                }
 971        }
 972
 973        /*
 974         * Finish recovering the file system.  This part needed to be delayed
 975         * until after the root and real-time bitmap inodes were consistently
 976         * read in.
 977         */
 978        error = xfs_log_mount_finish(mp);
 979        if (error) {
 980                xfs_warn(mp, "log mount finish failed");
 981                goto out_rtunmount;
 982        }
 983
 984        /*
 985         * Now the log is fully replayed, we can transition to full read-only
 986         * mode for read-only mounts. This will sync all the metadata and clean
 987         * the log so that the recovery we just performed does not have to be
 988         * replayed again on the next mount.
 989         *
 990         * We use the same quiesce mechanism as the rw->ro remount, as they are
 991         * semantically identical operations.
 992         */
 993        if ((mp->m_flags & (XFS_MOUNT_RDONLY|XFS_MOUNT_NORECOVERY)) ==
 994                                                        XFS_MOUNT_RDONLY) {
 995                xfs_quiesce_attr(mp);
 996        }
 997
 998        /*
 999         * Complete the quota initialisation, post-log-replay component.
1000         */

1001        if (quotamount) {
1002                ASSERT(mp->m_qflags == 0);
1003                mp->m_qflags = quotaflags;
1004
1005                xfs_qm_mount_quotas(mp);
1006        }
1007
1008        /*
1009         * Now we are mounted, reserve a small amount of unused space for
1010         * privileged transactions. This is needed so that transaction
1011         * space required for critical operations can dip into this pool
1012         * when at ENOSPC. This is needed for operations like create with
1013         * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
1014         * are not allowed to use this reserved space.
1015         *
1016         * This may drive us straight to ENOSPC on mount, but that implies
1017         * we were already there on the last unmount. Warn if this occurs.
1018         */
1019        if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
1020                resblks = xfs_default_resblks(mp);
1021                error = xfs_reserve_blocks(mp, &resblks, NULL);
1022                if (error)
1023                        xfs_warn(mp,
1024        "Unable to allocate reserve blocks. Continuing without reserve pool.");
1025
1026                /* Recover any CoW blocks that never got remapped. */
1027                error = xfs_reflink_recover_cow(mp);
1028                if (error) {
1029                        xfs_err(mp,
1030        "Error %d recovering leftover CoW allocations.", error);
1031                        xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1032                        goto out_quota;
1033                }
1034
1035                /* Reserve AG blocks for future btree expansion. */
1036                error = xfs_fs_reserve_ag_blocks(mp);
1037                if (error && error != -ENOSPC)
1038                        goto out_agresv;
1039        }
1040
1041        return 0;
1042
1043 out_agresv:
1044        xfs_fs_unreserve_ag_blocks(mp);
1045 out_quota:
1046        xfs_qm_unmount_quotas(mp);
1047 out_rtunmount:
1048        xfs_rtunmount_inodes(mp);
1049 out_rele_rip:
1050        xfs_irele(rip);
1051        /* Clean out dquots that might be in memory after quotacheck. */
1052        xfs_qm_unmount(mp);
1053        /*
1054         * Cancel all delayed reclaim work and reclaim the inodes directly.
1055         * We have to do this /after/ rtunmount and qm_unmount because those
1056         * two will have scheduled delayed reclaim for the rt/quota inodes.
1057         *
1058         * This is slightly different from the unmountfs call sequence
1059         * because we could be tearing down a partially set up mount.  In
1060         * particular, if log_mount_finish fails we bail out without calling
1061         * qm_unmount_quotas and therefore rely on qm_unmount to release the
1062         * quota inodes.
1063         */
1064        cancel_delayed_work_sync(&mp->m_reclaim_work);
1065        xfs_reclaim_inodes(mp, SYNC_WAIT);
1066 out_log_dealloc:
1067        mp->m_flags |= XFS_MOUNT_UNMOUNTING;
1068        xfs_log_mount_cancel(mp);
1069 out_fail_wait:
1070        if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
1071                xfs_wait_buftarg(mp->m_logdev_targp);
1072        xfs_wait_buftarg(mp->m_ddev_targp);
1073 out_free_perag:
1074        xfs_free_perag(mp);
1075 out_free_dir:
1076        xfs_da_unmount(mp);
1077 out_remove_uuid:
1078        xfs_uuid_unmount(mp);
1079 out_remove_errortag:
1080        xfs_errortag_del(mp);
1081 out_remove_error_sysfs:
1082        xfs_error_sysfs_del(mp);
1083 out_del_stats:
1084        xfs_sysfs_del(&mp->m_stats.xs_kobj);
1085 out_remove_sysfs:
1086        xfs_sysfs_del(&mp->m_kobj);
1087 out:
1088        return error;
1089}
1090
1091/*
1092 * This flushes out the inodes,dquots and the superblock, unmounts the
1093 * log and makes sure that incore structures are freed.
1094 */
1095void
1096xfs_unmountfs(
1097        struct xfs_mount        *mp)
1098{
1099        uint64_t                resblks;
1100        int                     error;
1101
1102        xfs_icache_disable_reclaim(mp);
1103        xfs_fs_unreserve_ag_blocks(mp);
1104        xfs_qm_unmount_quotas(mp);
1105        xfs_rtunmount_inodes(mp);
1106        xfs_irele(mp->m_rootip);
1107
1108        /*
1109         * We can potentially deadlock here if we have an inode cluster
1110         * that has been freed has its buffer still pinned in memory because
1111         * the transaction is still sitting in a iclog. The stale inodes
1112         * on that buffer will have their flush locks held until the
1113         * transaction hits the disk and the callbacks run. the inode
1114         * flush takes the flush lock unconditionally and with nothing to
1115         * push out the iclog we will never get that unlocked. hence we
1116         * need to force the log first.
1117         */
1118        xfs_log_force(mp, XFS_LOG_SYNC);
1119
1120        /*
1121         * Wait for all busy extents to be freed, including completion of
1122         * any discard operation.
1123         */
1124        xfs_extent_busy_wait_all(mp);
1125        flush_workqueue(xfs_discard_wq);
1126
1127        /*
1128         * We now need to tell the world we are unmounting. This will allow
1129         * us to detect that the filesystem is going away and we should error
1130         * out anything that we have been retrying in the background. This will
1131         * prevent neverending retries in AIL pushing from hanging the unmount.
1132         */
1133        mp->m_flags |= XFS_MOUNT_UNMOUNTING;
1134
1135        /*
1136         * Flush all pending changes from the AIL.
1137         */
1138        xfs_ail_push_all_sync(mp->m_ail);
1139
1140        /*
1141         * And reclaim all inodes.  At this point there should be no dirty
1142         * inodes and none should be pinned or locked, but use synchronous
1143         * reclaim just to be sure. We can stop background inode reclaim
1144         * here as well if it is still running.
1145         */
1146        cancel_delayed_work_sync(&mp->m_reclaim_work);
1147        xfs_reclaim_inodes(mp, SYNC_WAIT);
1148
1149        xfs_qm_unmount(mp);
1150
1151        /*
1152         * Unreserve any blocks we have so that when we unmount we don't account
1153         * the reserved free space as used. This is really only necessary for
1154         * lazy superblock counting because it trusts the incore superblock
1155         * counters to be absolutely correct on clean unmount.
1156         *
1157         * We don't bother correcting this elsewhere for lazy superblock
1158         * counting because on mount of an unclean filesystem we reconstruct the
1159         * correct counter value and this is irrelevant.
1160         *
1161         * For non-lazy counter filesystems, this doesn't matter at all because
1162         * we only every apply deltas to the superblock and hence the incore
1163         * value does not matter....
1164         */
1165        resblks = 0;
1166        error = xfs_reserve_blocks(mp, &resblks, NULL);
1167        if (error)
1168                xfs_warn(mp, "Unable to free reserved block pool. "
1169                                "Freespace may not be correct on next mount.");
1170
1171        error = xfs_log_sbcount(mp);
1172        if (error)
1173                xfs_warn(mp, "Unable to update superblock counters. "
1174                                "Freespace may not be correct on next mount.");
1175
1176
1177        xfs_log_unmount(mp);
1178        xfs_da_unmount(mp);
1179        xfs_uuid_unmount(mp);
1180
1181#if defined(DEBUG)
1182        xfs_errortag_clearall(mp);
1183#endif
1184        xfs_free_perag(mp);
1185
1186        xfs_errortag_del(mp);
1187        xfs_error_sysfs_del(mp);
1188        xfs_sysfs_del(&mp->m_stats.xs_kobj);
1189        xfs_sysfs_del(&mp->m_kobj);
1190}
1191
1192/*
1193 * Determine whether modifications can proceed. The caller specifies the minimum
1194 * freeze level for which modifications should not be allowed. This allows
1195 * certain operations to proceed while the freeze sequence is in progress, if
1196 * necessary.
1197 */
1198bool
1199xfs_fs_writable(
1200        struct xfs_mount        *mp,
1201        int                     level)
1202{
1203        ASSERT(level > SB_UNFROZEN);
1204        if ((mp->m_super->s_writers.frozen >= level) ||
1205            XFS_FORCED_SHUTDOWN(mp) || (mp->m_flags & XFS_MOUNT_RDONLY))
1206                return false;
1207
1208        return true;
1209}
1210
1211/*
1212 * xfs_log_sbcount
1213 *
1214 * Sync the superblock counters to disk.
1215 *
1216 * Note this code can be called during the process of freezing, so we use the
1217 * transaction allocator that does not block when the transaction subsystem is
1218 * in its frozen state.
1219 */
1220int
1221xfs_log_sbcount(xfs_mount_t *mp)
1222{
1223        /* allow this to proceed during the freeze sequence... */
1224        if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
1225                return 0;
1226
1227        /*
1228         * we don't need to do this if we are updating the superblock
1229         * counters on every modification.
1230         */
1231        if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
1232                return 0;
1233
1234        return xfs_sync_sb(mp, true);
1235}
1236
1237/*
1238 * Deltas for the inode count are +/-64, hence we use a large batch size
1239 * of 128 so we don't need to take the counter lock on every update.
1240 */
1241#define XFS_ICOUNT_BATCH        128
1242int
1243xfs_mod_icount(
1244        struct xfs_mount        *mp,
1245        int64_t                 delta)
1246{
1247        percpu_counter_add_batch(&mp->m_icount, delta, XFS_ICOUNT_BATCH);
1248        if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) {
1249                ASSERT(0);
1250                percpu_counter_add(&mp->m_icount, -delta);
1251                return -EINVAL;
1252        }
1253        return 0;
1254}
1255
1256int
1257xfs_mod_ifree(
1258        struct xfs_mount        *mp,
1259        int64_t                 delta)
1260{
1261        percpu_counter_add(&mp->m_ifree, delta);
1262        if (percpu_counter_compare(&mp->m_ifree, 0) < 0) {
1263                ASSERT(0);
1264                percpu_counter_add(&mp->m_ifree, -delta);
1265                return -EINVAL;
1266        }
1267        return 0;
1268}
1269
1270/*
1271 * Deltas for the block count can vary from 1 to very large, but lock contention
1272 * only occurs on frequent small block count updates such as in the delayed
1273 * allocation path for buffered writes (page a time updates). Hence we set
1274 * a large batch count (1024) to minimise global counter updates except when
1275 * we get near to ENOSPC and we have to be very accurate with our updates.
1276 */
1277#define XFS_FDBLOCKS_BATCH      1024
1278int
1279xfs_mod_fdblocks(
1280        struct xfs_mount        *mp,
1281        int64_t                 delta,
1282        bool                    rsvd)
1283{
1284        int64_t                 lcounter;
1285        long long               res_used;
1286        s32                     batch;
1287
1288        if (delta > 0) {
1289                /*
1290                 * If the reserve pool is depleted, put blocks back into it
1291                 * first. Most of the time the pool is full.
1292                 */
1293                if (likely(mp->m_resblks == mp->m_resblks_avail)) {
1294                        percpu_counter_add(&mp->m_fdblocks, delta);
1295                        return 0;
1296                }
1297
1298                spin_lock(&mp->m_sb_lock);
1299                res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
1300
1301                if (res_used > delta) {
1302                        mp->m_resblks_avail += delta;
1303                } else {
1304                        delta -= res_used;
1305                        mp->m_resblks_avail = mp->m_resblks;
1306                        percpu_counter_add(&mp->m_fdblocks, delta);
1307                }
1308                spin_unlock(&mp->m_sb_lock);
1309                return 0;
1310        }
1311
1312        /*
1313         * Taking blocks away, need to be more accurate the closer we
1314         * are to zero.
1315         *
1316         * If the counter has a value of less than 2 * max batch size,
1317         * then make everything serialise as we are real close to
1318         * ENOSPC.
1319         */
1320        if (__percpu_counter_compare(&mp->m_fdblocks, 2 * XFS_FDBLOCKS_BATCH,
1321                                     XFS_FDBLOCKS_BATCH) < 0)
1322                batch = 1;
1323        else
1324                batch = XFS_FDBLOCKS_BATCH;
1325
1326        percpu_counter_add_batch(&mp->m_fdblocks, delta, batch);
1327        if (__percpu_counter_compare(&mp->m_fdblocks, mp->m_alloc_set_aside,
1328                                     XFS_FDBLOCKS_BATCH) >= 0) {
1329                /* we had space! */
1330                return 0;
1331        }
1332
1333        /*
1334         * lock up the sb for dipping into reserves before releasing the space
1335         * that took us to ENOSPC.
1336         */
1337        spin_lock(&mp->m_sb_lock);
1338        percpu_counter_add(&mp->m_fdblocks, -delta);
1339        if (!rsvd)
1340                goto fdblocks_enospc;
1341
1342        lcounter = (long long)mp->m_resblks_avail + delta;
1343        if (lcounter >= 0) {
1344                mp->m_resblks_avail = lcounter;
1345                spin_unlock(&mp->m_sb_lock);
1346                return 0;
1347        }
1348        printk_once(KERN_WARNING
1349                "Filesystem \"%s\": reserve blocks depleted! "
1350                "Consider increasing reserve pool size.",
1351                mp->m_fsname);
1352fdblocks_enospc:
1353        spin_unlock(&mp->m_sb_lock);
1354        return -ENOSPC;
1355}
1356
1357int
1358xfs_mod_frextents(
1359        struct xfs_mount        *mp,
1360        int64_t                 delta)
1361{
1362        int64_t                 lcounter;
1363        int                     ret = 0;
1364
1365        spin_lock(&mp->m_sb_lock);
1366        lcounter = mp->m_sb.sb_frextents + delta;
1367        if (lcounter < 0)
1368                ret = -ENOSPC;
1369        else
1370                mp->m_sb.sb_frextents = lcounter;
1371        spin_unlock(&mp->m_sb_lock);
1372        return ret;
1373}
1374
1375/*
1376 * xfs_getsb() is called to obtain the buffer for the superblock.
1377 * The buffer is returned locked and read in from disk.
1378 * The buffer should be released with a call to xfs_brelse().
1379 *
1380 * If the flags parameter is BUF_TRYLOCK, then we'll only return
1381 * the superblock buffer if it can be locked without sleeping.
1382 * If it can't then we'll return NULL.
1383 */
1384struct xfs_buf *
1385xfs_getsb(
1386        struct xfs_mount        *mp,
1387        int                     flags)
1388{
1389        struct xfs_buf          *bp = mp->m_sb_bp;
1390
1391        if (!xfs_buf_trylock(bp)) {
1392                if (flags & XBF_TRYLOCK)
1393                        return NULL;
1394                xfs_buf_lock(bp);
1395        }
1396
1397        xfs_buf_hold(bp);
1398        ASSERT(bp->b_flags & XBF_DONE);
1399        return bp;
1400}
1401
1402/*
1403 * Used to free the superblock along various error paths.
1404 */
1405void
1406xfs_freesb(
1407        struct xfs_mount        *mp)
1408{
1409        struct xfs_buf          *bp = mp->m_sb_bp;
1410
1411        xfs_buf_lock(bp);
1412        mp->m_sb_bp = NULL;
1413        xfs_buf_relse(bp);
1414}
1415
1416/*
1417 * If the underlying (data/log/rt) device is readonly, there are some
1418 * operations that cannot proceed.
1419 */
1420int
1421xfs_dev_is_read_only(
1422        struct xfs_mount        *mp,
1423        char                    *message)
1424{
1425        if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
1426            xfs_readonly_buftarg(mp->m_logdev_targp) ||
1427            (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
1428                xfs_notice(mp, "%s required on read-only device.", message);
1429                xfs_notice(mp, "write access unavailable, cannot proceed.");
1430                return -EROFS;
1431        }
1432        return 0;
1433}
1434
1435/* Force the summary counters to be recalculated at next mount. */
1436void
1437xfs_force_summary_recalc(
1438        struct xfs_mount        *mp)
1439{
1440        if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
1441                return;
1442
1443        spin_lock(&mp->m_sb_lock);
1444        mp->m_flags |= XFS_MOUNT_BAD_SUMMARY;
1445        spin_unlock(&mp->m_sb_lock);
1446}
1447