linux/fs/xfs/libxfs/xfs_alloc.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_format.h"
  21#include "xfs_log_format.h"
  22#include "xfs_shared.h"
  23#include "xfs_trans_resv.h"
  24#include "xfs_bit.h"
  25#include "xfs_sb.h"
  26#include "xfs_mount.h"
  27#include "xfs_defer.h"
  28#include "xfs_inode.h"
  29#include "xfs_btree.h"
  30#include "xfs_alloc_btree.h"
  31#include "xfs_alloc.h"
  32#include "xfs_extent_busy.h"
  33#include "xfs_errortag.h"
  34#include "xfs_error.h"
  35#include "xfs_cksum.h"
  36#include "xfs_trace.h"
  37#include "xfs_trans.h"
  38#include "xfs_buf_item.h"
  39#include "xfs_log.h"
  40
  41struct workqueue_struct *xfs_alloc_wq;
  42
  43#define XFS_ABSDIFF(a,b)        (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
  44
  45#define XFSA_FIXUP_BNO_OK       1
  46#define XFSA_FIXUP_CNT_OK       2
  47
  48STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
  49STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
  50STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
  51STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
  52                xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
  53
  54/*
  55 * Lookup the record equal to [bno, len] in the btree given by cur.
  56 */
  57STATIC int                              /* error */
  58xfs_alloc_lookup_eq(
  59        struct xfs_btree_cur    *cur,   /* btree cursor */
  60        xfs_agblock_t           bno,    /* starting block of extent */
  61        xfs_extlen_t            len,    /* length of extent */
  62        int                     *stat)  /* success/failure */
  63{
  64        cur->bc_rec.a.ar_startblock = bno;
  65        cur->bc_rec.a.ar_blockcount = len;
  66        return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
  67}
  68
  69/*
  70 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
  71 * AGF buffer (PV 947395), we place constraints on the relationship among
  72 * actual allocations for data blocks, freelist blocks, and potential file data
  73 * bmap btree blocks. However, these restrictions may result in no actual space
  74 * allocated for a delayed extent, for example, a data block in a certain AG is
  75 * allocated but there is no additional block for the additional bmap btree
  76 * block due to a split of the bmap btree of the file. The result of this may
  77 * lead to an infinite loop when the file gets flushed to disk and all delayed
  78 * extents need to be actually allocated. To get around this, we explicitly set
  79 * aside a few blocks which will not be reserved in delayed allocation.
  80 *
  81 * When rmap is disabled, we need to reserve 4 fsbs _per AG_ for the freelist
  82 * and 4 more to handle a potential split of the file's bmap btree.
  83 *
  84 * When rmap is enabled, we must also be able to handle two rmap btree inserts
  85 * to record both the file data extent and a new bmbt block.  The bmbt block
  86 * might not be in the same AG as the file data extent.  In the worst case
  87 * the bmap btree splits multiple levels and all the new blocks come from
  88 * different AGs, so set aside enough to handle rmap btree splits in all AGs.
  89 */
  90unsigned int
  91xfs_alloc_set_aside(
  92        struct xfs_mount        *mp)
  93{
  94        return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4);
  95}
  96
  97/*
  98 * When deciding how much space to allocate out of an AG, we limit the
  99 * allocation maximum size to the size the AG. However, we cannot use all the
 100 * blocks in the AG - some are permanently used by metadata. These
 101 * blocks are generally:
 102 *      - the AG superblock, AGF, AGI and AGFL
 103 *      - the AGF (bno and cnt) and AGI btree root blocks, and optionally
 104 *        the AGI free inode and rmap btree root blocks.
 105 *      - blocks on the AGFL according to xfs_alloc_set_aside() limits
 106 *      - the rmapbt root block
 107 *
 108 * The AG headers are sector sized, so the amount of space they take up is
 109 * dependent on filesystem geometry. The others are all single blocks.
 110 */
 111unsigned int
 112xfs_alloc_ag_max_usable(
 113        struct xfs_mount        *mp)
 114{
 115        unsigned int            blocks;
 116
 117        blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
 118        blocks += XFS_ALLOC_AGFL_RESERVE;
 119        blocks += 3;                    /* AGF, AGI btree root blocks */
 120        if (xfs_sb_version_hasfinobt(&mp->m_sb))
 121                blocks++;               /* finobt root block */
 122
 123        return mp->m_sb.sb_agblocks - blocks;
 124}
 125
 126/*
 127 * Lookup the first record greater than or equal to [bno, len]
 128 * in the btree given by cur.
 129 */
 130int                             /* error */
 131xfs_alloc_lookup_ge(
 132        struct xfs_btree_cur    *cur,   /* btree cursor */
 133        xfs_agblock_t           bno,    /* starting block of extent */
 134        xfs_extlen_t            len,    /* length of extent */
 135        int                     *stat)  /* success/failure */
 136{
 137        cur->bc_rec.a.ar_startblock = bno;
 138        cur->bc_rec.a.ar_blockcount = len;
 139        return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
 140}
 141
 142/*
 143 * Lookup the first record less than or equal to [bno, len]
 144 * in the btree given by cur.
 145 */
 146static int                              /* error */
 147xfs_alloc_lookup_le(
 148        struct xfs_btree_cur    *cur,   /* btree cursor */
 149        xfs_agblock_t           bno,    /* starting block of extent */
 150        xfs_extlen_t            len,    /* length of extent */
 151        int                     *stat)  /* success/failure */
 152{
 153        cur->bc_rec.a.ar_startblock = bno;
 154        cur->bc_rec.a.ar_blockcount = len;
 155        return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
 156}
 157
 158/*
 159 * Update the record referred to by cur to the value given
 160 * by [bno, len].
 161 * This either works (return 0) or gets an EFSCORRUPTED error.
 162 */
 163STATIC int                              /* error */
 164xfs_alloc_update(
 165        struct xfs_btree_cur    *cur,   /* btree cursor */
 166        xfs_agblock_t           bno,    /* starting block of extent */
 167        xfs_extlen_t            len)    /* length of extent */
 168{
 169        union xfs_btree_rec     rec;
 170
 171        rec.alloc.ar_startblock = cpu_to_be32(bno);
 172        rec.alloc.ar_blockcount = cpu_to_be32(len);
 173        return xfs_btree_update(cur, &rec);
 174}
 175
 176/*
 177 * Get the data from the pointed-to record.
 178 */
 179int                                     /* error */
 180xfs_alloc_get_rec(
 181        struct xfs_btree_cur    *cur,   /* btree cursor */
 182        xfs_agblock_t           *bno,   /* output: starting block of extent */
 183        xfs_extlen_t            *len,   /* output: length of extent */
 184        int                     *stat)  /* output: success/failure */
 185{
 186        struct xfs_mount        *mp = cur->bc_mp;
 187        xfs_agnumber_t          agno = cur->bc_private.a.agno;
 188        union xfs_btree_rec     *rec;
 189        int                     error;
 190
 191        error = xfs_btree_get_rec(cur, &rec, stat);
 192        if (error || !(*stat))
 193                return error;
 194
 195        *bno = be32_to_cpu(rec->alloc.ar_startblock);
 196        *len = be32_to_cpu(rec->alloc.ar_blockcount);
 197
 198        if (*len == 0)
 199                goto out_bad_rec;
 200
 201        /* check for valid extent range, including overflow */
 202        if (!xfs_verify_agbno(mp, agno, *bno))
 203                goto out_bad_rec;
 204        if (*bno > *bno + *len)
 205                goto out_bad_rec;
 206        if (!xfs_verify_agbno(mp, agno, *bno + *len - 1))
 207                goto out_bad_rec;
 208
 209        return 0;
 210
 211out_bad_rec:
 212        xfs_warn(mp,
 213                "%s Freespace BTree record corruption in AG %d detected!",
 214                cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size", agno);
 215        xfs_warn(mp,
 216                "start block 0x%x block count 0x%x", *bno, *len);
 217        return -EFSCORRUPTED;
 218}
 219
 220/*
 221 * Compute aligned version of the found extent.
 222 * Takes alignment and min length into account.
 223 */
 224STATIC bool
 225xfs_alloc_compute_aligned(
 226        xfs_alloc_arg_t *args,          /* allocation argument structure */
 227        xfs_agblock_t   foundbno,       /* starting block in found extent */
 228        xfs_extlen_t    foundlen,       /* length in found extent */
 229        xfs_agblock_t   *resbno,        /* result block number */
 230        xfs_extlen_t    *reslen,        /* result length */
 231        unsigned        *busy_gen)
 232{
 233        xfs_agblock_t   bno = foundbno;
 234        xfs_extlen_t    len = foundlen;
 235        xfs_extlen_t    diff;
 236        bool            busy;
 237
 238        /* Trim busy sections out of found extent */
 239        busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
 240
 241        /*
 242         * If we have a largish extent that happens to start before min_agbno,
 243         * see if we can shift it into range...
 244         */
 245        if (bno < args->min_agbno && bno + len > args->min_agbno) {
 246                diff = args->min_agbno - bno;
 247                if (len > diff) {
 248                        bno += diff;
 249                        len -= diff;
 250                }
 251        }
 252
 253        if (args->alignment > 1 && len >= args->minlen) {
 254                xfs_agblock_t   aligned_bno = roundup(bno, args->alignment);
 255
 256                diff = aligned_bno - bno;
 257
 258                *resbno = aligned_bno;
 259                *reslen = diff >= len ? 0 : len - diff;
 260        } else {
 261                *resbno = bno;
 262                *reslen = len;
 263        }
 264
 265        return busy;
 266}
 267
 268/*
 269 * Compute best start block and diff for "near" allocations.
 270 * freelen >= wantlen already checked by caller.
 271 */
 272STATIC xfs_extlen_t                     /* difference value (absolute) */
 273xfs_alloc_compute_diff(
 274        xfs_agblock_t   wantbno,        /* target starting block */
 275        xfs_extlen_t    wantlen,        /* target length */
 276        xfs_extlen_t    alignment,      /* target alignment */
 277        int             datatype,       /* are we allocating data? */
 278        xfs_agblock_t   freebno,        /* freespace's starting block */
 279        xfs_extlen_t    freelen,        /* freespace's length */
 280        xfs_agblock_t   *newbnop)       /* result: best start block from free */
 281{
 282        xfs_agblock_t   freeend;        /* end of freespace extent */
 283        xfs_agblock_t   newbno1;        /* return block number */
 284        xfs_agblock_t   newbno2;        /* other new block number */
 285        xfs_extlen_t    newlen1=0;      /* length with newbno1 */
 286        xfs_extlen_t    newlen2=0;      /* length with newbno2 */
 287        xfs_agblock_t   wantend;        /* end of target extent */
 288        bool            userdata = xfs_alloc_is_userdata(datatype);
 289
 290        ASSERT(freelen >= wantlen);
 291        freeend = freebno + freelen;
 292        wantend = wantbno + wantlen;
 293        /*
 294         * We want to allocate from the start of a free extent if it is past
 295         * the desired block or if we are allocating user data and the free
 296         * extent is before desired block. The second case is there to allow
 297         * for contiguous allocation from the remaining free space if the file
 298         * grows in the short term.
 299         */
 300        if (freebno >= wantbno || (userdata && freeend < wantend)) {
 301                if ((newbno1 = roundup(freebno, alignment)) >= freeend)
 302                        newbno1 = NULLAGBLOCK;
 303        } else if (freeend >= wantend && alignment > 1) {
 304                newbno1 = roundup(wantbno, alignment);
 305                newbno2 = newbno1 - alignment;
 306                if (newbno1 >= freeend)
 307                        newbno1 = NULLAGBLOCK;
 308                else
 309                        newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
 310                if (newbno2 < freebno)
 311                        newbno2 = NULLAGBLOCK;
 312                else
 313                        newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
 314                if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
 315                        if (newlen1 < newlen2 ||
 316                            (newlen1 == newlen2 &&
 317                             XFS_ABSDIFF(newbno1, wantbno) >
 318                             XFS_ABSDIFF(newbno2, wantbno)))
 319                                newbno1 = newbno2;
 320                } else if (newbno2 != NULLAGBLOCK)
 321                        newbno1 = newbno2;
 322        } else if (freeend >= wantend) {
 323                newbno1 = wantbno;
 324        } else if (alignment > 1) {
 325                newbno1 = roundup(freeend - wantlen, alignment);
 326                if (newbno1 > freeend - wantlen &&
 327                    newbno1 - alignment >= freebno)
 328                        newbno1 -= alignment;
 329                else if (newbno1 >= freeend)
 330                        newbno1 = NULLAGBLOCK;
 331        } else
 332                newbno1 = freeend - wantlen;
 333        *newbnop = newbno1;
 334        return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
 335}
 336
 337/*
 338 * Fix up the length, based on mod and prod.
 339 * len should be k * prod + mod for some k.
 340 * If len is too small it is returned unchanged.
 341 * If len hits maxlen it is left alone.
 342 */
 343STATIC void
 344xfs_alloc_fix_len(
 345        xfs_alloc_arg_t *args)          /* allocation argument structure */
 346{
 347        xfs_extlen_t    k;
 348        xfs_extlen_t    rlen;
 349
 350        ASSERT(args->mod < args->prod);
 351        rlen = args->len;
 352        ASSERT(rlen >= args->minlen);
 353        ASSERT(rlen <= args->maxlen);
 354        if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
 355            (args->mod == 0 && rlen < args->prod))
 356                return;
 357        k = rlen % args->prod;
 358        if (k == args->mod)
 359                return;
 360        if (k > args->mod)
 361                rlen = rlen - (k - args->mod);
 362        else
 363                rlen = rlen - args->prod + (args->mod - k);
 364        /* casts to (int) catch length underflows */
 365        if ((int)rlen < (int)args->minlen)
 366                return;
 367        ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
 368        ASSERT(rlen % args->prod == args->mod);
 369        ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
 370                rlen + args->minleft);
 371        args->len = rlen;
 372}
 373
 374/*
 375 * Update the two btrees, logically removing from freespace the extent
 376 * starting at rbno, rlen blocks.  The extent is contained within the
 377 * actual (current) free extent fbno for flen blocks.
 378 * Flags are passed in indicating whether the cursors are set to the
 379 * relevant records.
 380 */
 381STATIC int                              /* error code */
 382xfs_alloc_fixup_trees(
 383        xfs_btree_cur_t *cnt_cur,       /* cursor for by-size btree */
 384        xfs_btree_cur_t *bno_cur,       /* cursor for by-block btree */
 385        xfs_agblock_t   fbno,           /* starting block of free extent */
 386        xfs_extlen_t    flen,           /* length of free extent */
 387        xfs_agblock_t   rbno,           /* starting block of returned extent */
 388        xfs_extlen_t    rlen,           /* length of returned extent */
 389        int             flags)          /* flags, XFSA_FIXUP_... */
 390{
 391        int             error;          /* error code */
 392        int             i;              /* operation results */
 393        xfs_agblock_t   nfbno1;         /* first new free startblock */
 394        xfs_agblock_t   nfbno2;         /* second new free startblock */
 395        xfs_extlen_t    nflen1=0;       /* first new free length */
 396        xfs_extlen_t    nflen2=0;       /* second new free length */
 397        struct xfs_mount *mp;
 398
 399        mp = cnt_cur->bc_mp;
 400
 401        /*
 402         * Look up the record in the by-size tree if necessary.
 403         */
 404        if (flags & XFSA_FIXUP_CNT_OK) {
 405#ifdef DEBUG
 406                if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
 407                        return error;
 408                XFS_WANT_CORRUPTED_RETURN(mp,
 409                        i == 1 && nfbno1 == fbno && nflen1 == flen);
 410#endif
 411        } else {
 412                if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
 413                        return error;
 414                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 415        }
 416        /*
 417         * Look up the record in the by-block tree if necessary.
 418         */
 419        if (flags & XFSA_FIXUP_BNO_OK) {
 420#ifdef DEBUG
 421                if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
 422                        return error;
 423                XFS_WANT_CORRUPTED_RETURN(mp,
 424                        i == 1 && nfbno1 == fbno && nflen1 == flen);
 425#endif
 426        } else {
 427                if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
 428                        return error;
 429                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 430        }
 431
 432#ifdef DEBUG
 433        if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
 434                struct xfs_btree_block  *bnoblock;
 435                struct xfs_btree_block  *cntblock;
 436
 437                bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
 438                cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
 439
 440                XFS_WANT_CORRUPTED_RETURN(mp,
 441                        bnoblock->bb_numrecs == cntblock->bb_numrecs);
 442        }
 443#endif
 444
 445        /*
 446         * Deal with all four cases: the allocated record is contained
 447         * within the freespace record, so we can have new freespace
 448         * at either (or both) end, or no freespace remaining.
 449         */
 450        if (rbno == fbno && rlen == flen)
 451                nfbno1 = nfbno2 = NULLAGBLOCK;
 452        else if (rbno == fbno) {
 453                nfbno1 = rbno + rlen;
 454                nflen1 = flen - rlen;
 455                nfbno2 = NULLAGBLOCK;
 456        } else if (rbno + rlen == fbno + flen) {
 457                nfbno1 = fbno;
 458                nflen1 = flen - rlen;
 459                nfbno2 = NULLAGBLOCK;
 460        } else {
 461                nfbno1 = fbno;
 462                nflen1 = rbno - fbno;
 463                nfbno2 = rbno + rlen;
 464                nflen2 = (fbno + flen) - nfbno2;
 465        }
 466        /*
 467         * Delete the entry from the by-size btree.
 468         */
 469        if ((error = xfs_btree_delete(cnt_cur, &i)))
 470                return error;
 471        XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 472        /*
 473         * Add new by-size btree entry(s).
 474         */
 475        if (nfbno1 != NULLAGBLOCK) {
 476                if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
 477                        return error;
 478                XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 479                if ((error = xfs_btree_insert(cnt_cur, &i)))
 480                        return error;
 481                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 482        }
 483        if (nfbno2 != NULLAGBLOCK) {
 484                if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
 485                        return error;
 486                XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 487                if ((error = xfs_btree_insert(cnt_cur, &i)))
 488                        return error;
 489                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 490        }
 491        /*
 492         * Fix up the by-block btree entry(s).
 493         */
 494        if (nfbno1 == NULLAGBLOCK) {
 495                /*
 496                 * No remaining freespace, just delete the by-block tree entry.
 497                 */
 498                if ((error = xfs_btree_delete(bno_cur, &i)))
 499                        return error;
 500                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 501        } else {
 502                /*
 503                 * Update the by-block entry to start later|be shorter.
 504                 */
 505                if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
 506                        return error;
 507        }
 508        if (nfbno2 != NULLAGBLOCK) {
 509                /*
 510                 * 2 resulting free entries, need to add one.
 511                 */
 512                if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
 513                        return error;
 514                XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 515                if ((error = xfs_btree_insert(bno_cur, &i)))
 516                        return error;
 517                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 518        }
 519        return 0;
 520}
 521
 522static xfs_failaddr_t
 523xfs_agfl_verify(
 524        struct xfs_buf  *bp)
 525{
 526        struct xfs_mount *mp = bp->b_target->bt_mount;
 527        struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
 528        int             i;
 529
 530        /*
 531         * There is no verification of non-crc AGFLs because mkfs does not
 532         * initialise the AGFL to zero or NULL. Hence the only valid part of the
 533         * AGFL is what the AGF says is active. We can't get to the AGF, so we
 534         * can't verify just those entries are valid.
 535         */
 536        if (!xfs_sb_version_hascrc(&mp->m_sb))
 537                return NULL;
 538
 539        if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
 540                return __this_address;
 541        if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
 542                return __this_address;
 543        /*
 544         * during growfs operations, the perag is not fully initialised,
 545         * so we can't use it for any useful checking. growfs ensures we can't
 546         * use it by using uncached buffers that don't have the perag attached
 547         * so we can detect and avoid this problem.
 548         */
 549        if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
 550                return __this_address;
 551
 552        for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
 553                if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
 554                    be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
 555                        return __this_address;
 556        }
 557
 558        if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
 559                return __this_address;
 560        return NULL;
 561}
 562
 563static void
 564xfs_agfl_read_verify(
 565        struct xfs_buf  *bp)
 566{
 567        struct xfs_mount *mp = bp->b_target->bt_mount;
 568        xfs_failaddr_t  fa;
 569
 570        /*
 571         * There is no verification of non-crc AGFLs because mkfs does not
 572         * initialise the AGFL to zero or NULL. Hence the only valid part of the
 573         * AGFL is what the AGF says is active. We can't get to the AGF, so we
 574         * can't verify just those entries are valid.
 575         */
 576        if (!xfs_sb_version_hascrc(&mp->m_sb))
 577                return;
 578
 579        if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
 580                xfs_verifier_error(bp, -EFSBADCRC, __this_address);
 581        else {
 582                fa = xfs_agfl_verify(bp);
 583                if (fa)
 584                        xfs_verifier_error(bp, -EFSCORRUPTED, fa);
 585        }
 586}
 587
 588static void
 589xfs_agfl_write_verify(
 590        struct xfs_buf  *bp)
 591{
 592        struct xfs_mount *mp = bp->b_target->bt_mount;
 593        struct xfs_buf_log_item *bip = bp->b_fspriv;
 594        xfs_failaddr_t          fa;
 595
 596        /* no verification of non-crc AGFLs */
 597        if (!xfs_sb_version_hascrc(&mp->m_sb))
 598                return;
 599
 600        fa = xfs_agfl_verify(bp);
 601        if (fa) {
 602                xfs_verifier_error(bp, -EFSCORRUPTED, fa);
 603                return;
 604        }
 605
 606        if (bip)
 607                XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
 608
 609        xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
 610}
 611
 612const struct xfs_buf_ops xfs_agfl_buf_ops = {
 613        .name = "xfs_agfl",
 614        .verify_read = xfs_agfl_read_verify,
 615        .verify_write = xfs_agfl_write_verify,
 616        .verify_struct = xfs_agfl_verify,
 617};
 618
 619/*
 620 * Read in the allocation group free block array.
 621 */
 622int                                     /* error */
 623xfs_alloc_read_agfl(
 624        xfs_mount_t     *mp,            /* mount point structure */
 625        xfs_trans_t     *tp,            /* transaction pointer */
 626        xfs_agnumber_t  agno,           /* allocation group number */
 627        xfs_buf_t       **bpp)          /* buffer for the ag free block array */
 628{
 629        xfs_buf_t       *bp;            /* return value */
 630        int             error;
 631
 632        ASSERT(agno != NULLAGNUMBER);
 633        error = xfs_trans_read_buf(
 634                        mp, tp, mp->m_ddev_targp,
 635                        XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
 636                        XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
 637        if (error)
 638                return error;
 639        xfs_buf_set_ref(bp, XFS_AGFL_REF);
 640        *bpp = bp;
 641        return 0;
 642}
 643
 644STATIC int
 645xfs_alloc_update_counters(
 646        struct xfs_trans        *tp,
 647        struct xfs_perag        *pag,
 648        struct xfs_buf          *agbp,
 649        long                    len)
 650{
 651        struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
 652
 653        pag->pagf_freeblks += len;
 654        be32_add_cpu(&agf->agf_freeblks, len);
 655
 656        xfs_trans_agblocks_delta(tp, len);
 657        if (unlikely(be32_to_cpu(agf->agf_freeblks) >
 658                     be32_to_cpu(agf->agf_length)))
 659                return -EFSCORRUPTED;
 660
 661        xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
 662        return 0;
 663}
 664
 665/*
 666 * Allocation group level functions.
 667 */
 668
 669/*
 670 * Allocate a variable extent in the allocation group agno.
 671 * Type and bno are used to determine where in the allocation group the
 672 * extent will start.
 673 * Extent's length (returned in *len) will be between minlen and maxlen,
 674 * and of the form k * prod + mod unless there's nothing that large.
 675 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 676 */
 677STATIC int                      /* error */
 678xfs_alloc_ag_vextent(
 679        xfs_alloc_arg_t *args)  /* argument structure for allocation */
 680{
 681        int             error=0;
 682
 683        ASSERT(args->minlen > 0);
 684        ASSERT(args->maxlen > 0);
 685        ASSERT(args->minlen <= args->maxlen);
 686        ASSERT(args->mod < args->prod);
 687        ASSERT(args->alignment > 0);
 688        /*
 689         * Branch to correct routine based on the type.
 690         */
 691        args->wasfromfl = 0;
 692        switch (args->type) {
 693        case XFS_ALLOCTYPE_THIS_AG:
 694                error = xfs_alloc_ag_vextent_size(args);
 695                break;
 696        case XFS_ALLOCTYPE_NEAR_BNO:
 697                error = xfs_alloc_ag_vextent_near(args);
 698                break;
 699        case XFS_ALLOCTYPE_THIS_BNO:
 700                error = xfs_alloc_ag_vextent_exact(args);
 701                break;
 702        default:
 703                ASSERT(0);
 704                /* NOTREACHED */
 705        }
 706
 707        if (error || args->agbno == NULLAGBLOCK)
 708                return error;
 709
 710        ASSERT(args->len >= args->minlen);
 711        ASSERT(args->len <= args->maxlen);
 712        ASSERT(!args->wasfromfl || !args->isfl);
 713        ASSERT(args->agbno % args->alignment == 0);
 714
 715        if (!args->wasfromfl) {
 716                error = xfs_alloc_update_counters(args->tp, args->pag,
 717                                                  args->agbp,
 718                                                  -((long)(args->len)));
 719                if (error)
 720                        return error;
 721
 722                ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
 723                                              args->agbno, args->len));
 724        }
 725
 726        if (!args->isfl) {
 727                xfs_trans_mod_sb(args->tp, args->wasdel ?
 728                                 XFS_TRANS_SB_RES_FDBLOCKS :
 729                                 XFS_TRANS_SB_FDBLOCKS,
 730                                 -((long)(args->len)));
 731        }
 732
 733        XFS_STATS_INC(args->mp, xs_allocx);
 734        XFS_STATS_ADD(args->mp, xs_allocb, args->len);
 735        return error;
 736}
 737
 738/*
 739 * Allocate a variable extent at exactly agno/bno.
 740 * Extent's length (returned in *len) will be between minlen and maxlen,
 741 * and of the form k * prod + mod unless there's nothing that large.
 742 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
 743 */
 744STATIC int                      /* error */
 745xfs_alloc_ag_vextent_exact(
 746        xfs_alloc_arg_t *args)  /* allocation argument structure */
 747{
 748        xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
 749        xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
 750        int             error;
 751        xfs_agblock_t   fbno;   /* start block of found extent */
 752        xfs_extlen_t    flen;   /* length of found extent */
 753        xfs_agblock_t   tbno;   /* start block of busy extent */
 754        xfs_extlen_t    tlen;   /* length of busy extent */
 755        xfs_agblock_t   tend;   /* end block of busy extent */
 756        int             i;      /* success/failure of operation */
 757        unsigned        busy_gen;
 758
 759        ASSERT(args->alignment == 1);
 760
 761        /*
 762         * Allocate/initialize a cursor for the by-number freespace btree.
 763         */
 764        bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 765                                          args->agno, XFS_BTNUM_BNO);
 766
 767        /*
 768         * Lookup bno and minlen in the btree (minlen is irrelevant, really).
 769         * Look for the closest free block <= bno, it must contain bno
 770         * if any free block does.
 771         */
 772        error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
 773        if (error)
 774                goto error0;
 775        if (!i)
 776                goto not_found;
 777
 778        /*
 779         * Grab the freespace record.
 780         */
 781        error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
 782        if (error)
 783                goto error0;
 784        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 785        ASSERT(fbno <= args->agbno);
 786
 787        /*
 788         * Check for overlapping busy extents.
 789         */
 790        tbno = fbno;
 791        tlen = flen;
 792        xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
 793
 794        /*
 795         * Give up if the start of the extent is busy, or the freespace isn't
 796         * long enough for the minimum request.
 797         */
 798        if (tbno > args->agbno)
 799                goto not_found;
 800        if (tlen < args->minlen)
 801                goto not_found;
 802        tend = tbno + tlen;
 803        if (tend < args->agbno + args->minlen)
 804                goto not_found;
 805
 806        /*
 807         * End of extent will be smaller of the freespace end and the
 808         * maximal requested end.
 809         *
 810         * Fix the length according to mod and prod if given.
 811         */
 812        args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
 813                                                - args->agbno;
 814        xfs_alloc_fix_len(args);
 815        ASSERT(args->agbno + args->len <= tend);
 816
 817        /*
 818         * We are allocating agbno for args->len
 819         * Allocate/initialize a cursor for the by-size btree.
 820         */
 821        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 822                args->agno, XFS_BTNUM_CNT);
 823        ASSERT(args->agbno + args->len <=
 824                be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
 825        error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
 826                                      args->len, XFSA_FIXUP_BNO_OK);
 827        if (error) {
 828                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
 829                goto error0;
 830        }
 831
 832        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 833        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 834
 835        args->wasfromfl = 0;
 836        trace_xfs_alloc_exact_done(args);
 837        return 0;
 838
 839not_found:
 840        /* Didn't find it, return null. */
 841        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 842        args->agbno = NULLAGBLOCK;
 843        trace_xfs_alloc_exact_notfound(args);
 844        return 0;
 845
 846error0:
 847        xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
 848        trace_xfs_alloc_exact_error(args);
 849        return error;
 850}
 851
 852/*
 853 * Search the btree in a given direction via the search cursor and compare
 854 * the records found against the good extent we've already found.
 855 */
 856STATIC int
 857xfs_alloc_find_best_extent(
 858        struct xfs_alloc_arg    *args,  /* allocation argument structure */
 859        struct xfs_btree_cur    **gcur, /* good cursor */
 860        struct xfs_btree_cur    **scur, /* searching cursor */
 861        xfs_agblock_t           gdiff,  /* difference for search comparison */
 862        xfs_agblock_t           *sbno,  /* extent found by search */
 863        xfs_extlen_t            *slen,  /* extent length */
 864        xfs_agblock_t           *sbnoa, /* aligned extent found by search */
 865        xfs_extlen_t            *slena, /* aligned extent length */
 866        int                     dir)    /* 0 = search right, 1 = search left */
 867{
 868        xfs_agblock_t           new;
 869        xfs_agblock_t           sdiff;
 870        int                     error;
 871        int                     i;
 872        unsigned                busy_gen;
 873
 874        /* The good extent is perfect, no need to  search. */
 875        if (!gdiff)
 876                goto out_use_good;
 877
 878        /*
 879         * Look until we find a better one, run out of space or run off the end.
 880         */
 881        do {
 882                error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
 883                if (error)
 884                        goto error0;
 885                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 886                xfs_alloc_compute_aligned(args, *sbno, *slen,
 887                                sbnoa, slena, &busy_gen);
 888
 889                /*
 890                 * The good extent is closer than this one.
 891                 */
 892                if (!dir) {
 893                        if (*sbnoa > args->max_agbno)
 894                                goto out_use_good;
 895                        if (*sbnoa >= args->agbno + gdiff)
 896                                goto out_use_good;
 897                } else {
 898                        if (*sbnoa < args->min_agbno)
 899                                goto out_use_good;
 900                        if (*sbnoa <= args->agbno - gdiff)
 901                                goto out_use_good;
 902                }
 903
 904                /*
 905                 * Same distance, compare length and pick the best.
 906                 */
 907                if (*slena >= args->minlen) {
 908                        args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
 909                        xfs_alloc_fix_len(args);
 910
 911                        sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
 912                                                       args->alignment,
 913                                                       args->datatype, *sbnoa,
 914                                                       *slena, &new);
 915
 916                        /*
 917                         * Choose closer size and invalidate other cursor.
 918                         */
 919                        if (sdiff < gdiff)
 920                                goto out_use_search;
 921                        goto out_use_good;
 922                }
 923
 924                if (!dir)
 925                        error = xfs_btree_increment(*scur, 0, &i);
 926                else
 927                        error = xfs_btree_decrement(*scur, 0, &i);
 928                if (error)
 929                        goto error0;
 930        } while (i);
 931
 932out_use_good:
 933        xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
 934        *scur = NULL;
 935        return 0;
 936
 937out_use_search:
 938        xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
 939        *gcur = NULL;
 940        return 0;
 941
 942error0:
 943        /* caller invalidates cursors */
 944        return error;
 945}
 946
 947/*
 948 * Allocate a variable extent near bno in the allocation group agno.
 949 * Extent's length (returned in len) will be between minlen and maxlen,
 950 * and of the form k * prod + mod unless there's nothing that large.
 951 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 952 */
 953STATIC int                              /* error */
 954xfs_alloc_ag_vextent_near(
 955        xfs_alloc_arg_t *args)          /* allocation argument structure */
 956{
 957        xfs_btree_cur_t *bno_cur_gt;    /* cursor for bno btree, right side */
 958        xfs_btree_cur_t *bno_cur_lt;    /* cursor for bno btree, left side */
 959        xfs_btree_cur_t *cnt_cur;       /* cursor for count btree */
 960        xfs_agblock_t   gtbno;          /* start bno of right side entry */
 961        xfs_agblock_t   gtbnoa;         /* aligned ... */
 962        xfs_extlen_t    gtdiff;         /* difference to right side entry */
 963        xfs_extlen_t    gtlen;          /* length of right side entry */
 964        xfs_extlen_t    gtlena;         /* aligned ... */
 965        xfs_agblock_t   gtnew;          /* useful start bno of right side */
 966        int             error;          /* error code */
 967        int             i;              /* result code, temporary */
 968        int             j;              /* result code, temporary */
 969        xfs_agblock_t   ltbno;          /* start bno of left side entry */
 970        xfs_agblock_t   ltbnoa;         /* aligned ... */
 971        xfs_extlen_t    ltdiff;         /* difference to left side entry */
 972        xfs_extlen_t    ltlen;          /* length of left side entry */
 973        xfs_extlen_t    ltlena;         /* aligned ... */
 974        xfs_agblock_t   ltnew;          /* useful start bno of left side */
 975        xfs_extlen_t    rlen;           /* length of returned extent */
 976        bool            busy;
 977        unsigned        busy_gen;
 978#ifdef DEBUG
 979        /*
 980         * Randomly don't execute the first algorithm.
 981         */
 982        int             dofirst;        /* set to do first algorithm */
 983
 984        dofirst = prandom_u32() & 1;
 985#endif
 986
 987        /* handle unitialized agbno range so caller doesn't have to */
 988        if (!args->min_agbno && !args->max_agbno)
 989                args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
 990        ASSERT(args->min_agbno <= args->max_agbno);
 991
 992        /* clamp agbno to the range if it's outside */
 993        if (args->agbno < args->min_agbno)
 994                args->agbno = args->min_agbno;
 995        if (args->agbno > args->max_agbno)
 996                args->agbno = args->max_agbno;
 997
 998restart:
 999        bno_cur_lt = NULL;
1000        bno_cur_gt = NULL;
1001        ltlen = 0;
1002        gtlena = 0;
1003        ltlena = 0;
1004        busy = false;
1005
1006        /*
1007         * Get a cursor for the by-size btree.
1008         */
1009        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1010                args->agno, XFS_BTNUM_CNT);
1011
1012        /*
1013         * See if there are any free extents as big as maxlen.
1014         */
1015        if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
1016                goto error0;
1017        /*
1018         * If none, then pick up the last entry in the tree unless the
1019         * tree is empty.
1020         */
1021        if (!i) {
1022                if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
1023                                &ltlen, &i)))
1024                        goto error0;
1025                if (i == 0 || ltlen == 0) {
1026                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1027                        trace_xfs_alloc_near_noentry(args);
1028                        return 0;
1029                }
1030                ASSERT(i == 1);
1031        }
1032        args->wasfromfl = 0;
1033
1034        /*
1035         * First algorithm.
1036         * If the requested extent is large wrt the freespaces available
1037         * in this a.g., then the cursor will be pointing to a btree entry
1038         * near the right edge of the tree.  If it's in the last btree leaf
1039         * block, then we just examine all the entries in that block
1040         * that are big enough, and pick the best one.
1041         * This is written as a while loop so we can break out of it,
1042         * but we never loop back to the top.
1043         */
1044        while (xfs_btree_islastblock(cnt_cur, 0)) {
1045                xfs_extlen_t    bdiff;
1046                int             besti=0;
1047                xfs_extlen_t    blen=0;
1048                xfs_agblock_t   bnew=0;
1049
1050#ifdef DEBUG
1051                if (dofirst)
1052                        break;
1053#endif
1054                /*
1055                 * Start from the entry that lookup found, sequence through
1056                 * all larger free blocks.  If we're actually pointing at a
1057                 * record smaller than maxlen, go to the start of this block,
1058                 * and skip all those smaller than minlen.
1059                 */
1060                if (ltlen || args->alignment > 1) {
1061                        cnt_cur->bc_ptrs[0] = 1;
1062                        do {
1063                                if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
1064                                                &ltlen, &i)))
1065                                        goto error0;
1066                                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1067                                if (ltlen >= args->minlen)
1068                                        break;
1069                                if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
1070                                        goto error0;
1071                        } while (i);
1072                        ASSERT(ltlen >= args->minlen);
1073                        if (!i)
1074                                break;
1075                }
1076                i = cnt_cur->bc_ptrs[0];
1077                for (j = 1, blen = 0, bdiff = 0;
1078                     !error && j && (blen < args->maxlen || bdiff > 0);
1079                     error = xfs_btree_increment(cnt_cur, 0, &j)) {
1080                        /*
1081                         * For each entry, decide if it's better than
1082                         * the previous best entry.
1083                         */
1084                        if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1085                                goto error0;
1086                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1087                        busy = xfs_alloc_compute_aligned(args, ltbno, ltlen,
1088                                        &ltbnoa, &ltlena, &busy_gen);
1089                        if (ltlena < args->minlen)
1090                                continue;
1091                        if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
1092                                continue;
1093                        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1094                        xfs_alloc_fix_len(args);
1095                        ASSERT(args->len >= args->minlen);
1096                        if (args->len < blen)
1097                                continue;
1098                        ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1099                                args->alignment, args->datatype, ltbnoa,
1100                                ltlena, &ltnew);
1101                        if (ltnew != NULLAGBLOCK &&
1102                            (args->len > blen || ltdiff < bdiff)) {
1103                                bdiff = ltdiff;
1104                                bnew = ltnew;
1105                                blen = args->len;
1106                                besti = cnt_cur->bc_ptrs[0];
1107                        }
1108                }
1109                /*
1110                 * It didn't work.  We COULD be in a case where
1111                 * there's a good record somewhere, so try again.
1112                 */
1113                if (blen == 0)
1114                        break;
1115                /*
1116                 * Point at the best entry, and retrieve it again.
1117                 */
1118                cnt_cur->bc_ptrs[0] = besti;
1119                if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1120                        goto error0;
1121                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1122                ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1123                args->len = blen;
1124
1125                /*
1126                 * We are allocating starting at bnew for blen blocks.
1127                 */
1128                args->agbno = bnew;
1129                ASSERT(bnew >= ltbno);
1130                ASSERT(bnew + blen <= ltbno + ltlen);
1131                /*
1132                 * Set up a cursor for the by-bno tree.
1133                 */
1134                bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
1135                        args->agbp, args->agno, XFS_BTNUM_BNO);
1136                /*
1137                 * Fix up the btree entries.
1138                 */
1139                if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
1140                                ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
1141                        goto error0;
1142                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1143                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1144
1145                trace_xfs_alloc_near_first(args);
1146                return 0;
1147        }
1148        /*
1149         * Second algorithm.
1150         * Search in the by-bno tree to the left and to the right
1151         * simultaneously, until in each case we find a space big enough,
1152         * or run into the edge of the tree.  When we run into the edge,
1153         * we deallocate that cursor.
1154         * If both searches succeed, we compare the two spaces and pick
1155         * the better one.
1156         * With alignment, it's possible for both to fail; the upper
1157         * level algorithm that picks allocation groups for allocations
1158         * is not supposed to do this.
1159         */
1160        /*
1161         * Allocate and initialize the cursor for the leftward search.
1162         */
1163        bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1164                args->agno, XFS_BTNUM_BNO);
1165        /*
1166         * Lookup <= bno to find the leftward search's starting point.
1167         */
1168        if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
1169                goto error0;
1170        if (!i) {
1171                /*
1172                 * Didn't find anything; use this cursor for the rightward
1173                 * search.
1174                 */
1175                bno_cur_gt = bno_cur_lt;
1176                bno_cur_lt = NULL;
1177        }
1178        /*
1179         * Found something.  Duplicate the cursor for the rightward search.
1180         */
1181        else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
1182                goto error0;
1183        /*
1184         * Increment the cursor, so we will point at the entry just right
1185         * of the leftward entry if any, or to the leftmost entry.
1186         */
1187        if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1188                goto error0;
1189        if (!i) {
1190                /*
1191                 * It failed, there are no rightward entries.
1192                 */
1193                xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
1194                bno_cur_gt = NULL;
1195        }
1196        /*
1197         * Loop going left with the leftward cursor, right with the
1198         * rightward cursor, until either both directions give up or
1199         * we find an entry at least as big as minlen.
1200         */
1201        do {
1202                if (bno_cur_lt) {
1203                        if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
1204                                goto error0;
1205                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1206                        busy |= xfs_alloc_compute_aligned(args, ltbno, ltlen,
1207                                        &ltbnoa, &ltlena, &busy_gen);
1208                        if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
1209                                break;
1210                        if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
1211                                goto error0;
1212                        if (!i || ltbnoa < args->min_agbno) {
1213                                xfs_btree_del_cursor(bno_cur_lt,
1214                                                     XFS_BTREE_NOERROR);
1215                                bno_cur_lt = NULL;
1216                        }
1217                }
1218                if (bno_cur_gt) {
1219                        if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1220                                goto error0;
1221                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1222                        busy |= xfs_alloc_compute_aligned(args, gtbno, gtlen,
1223                                        &gtbnoa, &gtlena, &busy_gen);
1224                        if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
1225                                break;
1226                        if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1227                                goto error0;
1228                        if (!i || gtbnoa > args->max_agbno) {
1229                                xfs_btree_del_cursor(bno_cur_gt,
1230                                                     XFS_BTREE_NOERROR);
1231                                bno_cur_gt = NULL;
1232                        }
1233                }
1234        } while (bno_cur_lt || bno_cur_gt);
1235
1236        /*
1237         * Got both cursors still active, need to find better entry.
1238         */
1239        if (bno_cur_lt && bno_cur_gt) {
1240                if (ltlena >= args->minlen) {
1241                        /*
1242                         * Left side is good, look for a right side entry.
1243                         */
1244                        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1245                        xfs_alloc_fix_len(args);
1246                        ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1247                                args->alignment, args->datatype, ltbnoa,
1248                                ltlena, &ltnew);
1249
1250                        error = xfs_alloc_find_best_extent(args,
1251                                                &bno_cur_lt, &bno_cur_gt,
1252                                                ltdiff, &gtbno, &gtlen,
1253                                                &gtbnoa, &gtlena,
1254                                                0 /* search right */);
1255                } else {
1256                        ASSERT(gtlena >= args->minlen);
1257
1258                        /*
1259                         * Right side is good, look for a left side entry.
1260                         */
1261                        args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1262                        xfs_alloc_fix_len(args);
1263                        gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1264                                args->alignment, args->datatype, gtbnoa,
1265                                gtlena, &gtnew);
1266
1267                        error = xfs_alloc_find_best_extent(args,
1268                                                &bno_cur_gt, &bno_cur_lt,
1269                                                gtdiff, &ltbno, &ltlen,
1270                                                &ltbnoa, &ltlena,
1271                                                1 /* search left */);
1272                }
1273
1274                if (error)
1275                        goto error0;
1276        }
1277
1278        /*
1279         * If we couldn't get anything, give up.
1280         */
1281        if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1282                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1283
1284                if (busy) {
1285                        trace_xfs_alloc_near_busy(args);
1286                        xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
1287                        goto restart;
1288                }
1289                trace_xfs_alloc_size_neither(args);
1290                args->agbno = NULLAGBLOCK;
1291                return 0;
1292        }
1293
1294        /*
1295         * At this point we have selected a freespace entry, either to the
1296         * left or to the right.  If it's on the right, copy all the
1297         * useful variables to the "left" set so we only have one
1298         * copy of this code.
1299         */
1300        if (bno_cur_gt) {
1301                bno_cur_lt = bno_cur_gt;
1302                bno_cur_gt = NULL;
1303                ltbno = gtbno;
1304                ltbnoa = gtbnoa;
1305                ltlen = gtlen;
1306                ltlena = gtlena;
1307                j = 1;
1308        } else
1309                j = 0;
1310
1311        /*
1312         * Fix up the length and compute the useful address.
1313         */
1314        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1315        xfs_alloc_fix_len(args);
1316        rlen = args->len;
1317        (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
1318                                     args->datatype, ltbnoa, ltlena, &ltnew);
1319        ASSERT(ltnew >= ltbno);
1320        ASSERT(ltnew + rlen <= ltbnoa + ltlena);
1321        ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1322        ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
1323        args->agbno = ltnew;
1324
1325        if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1326                        ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1327                goto error0;
1328
1329        if (j)
1330                trace_xfs_alloc_near_greater(args);
1331        else
1332                trace_xfs_alloc_near_lesser(args);
1333
1334        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1335        xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1336        return 0;
1337
1338 error0:
1339        trace_xfs_alloc_near_error(args);
1340        if (cnt_cur != NULL)
1341                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1342        if (bno_cur_lt != NULL)
1343                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1344        if (bno_cur_gt != NULL)
1345                xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1346        return error;
1347}
1348
1349/*
1350 * Allocate a variable extent anywhere in the allocation group agno.
1351 * Extent's length (returned in len) will be between minlen and maxlen,
1352 * and of the form k * prod + mod unless there's nothing that large.
1353 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1354 */
1355STATIC int                              /* error */
1356xfs_alloc_ag_vextent_size(
1357        xfs_alloc_arg_t *args)          /* allocation argument structure */
1358{
1359        xfs_btree_cur_t *bno_cur;       /* cursor for bno btree */
1360        xfs_btree_cur_t *cnt_cur;       /* cursor for cnt btree */
1361        int             error;          /* error result */
1362        xfs_agblock_t   fbno;           /* start of found freespace */
1363        xfs_extlen_t    flen;           /* length of found freespace */
1364        int             i;              /* temp status variable */
1365        xfs_agblock_t   rbno;           /* returned block number */
1366        xfs_extlen_t    rlen;           /* length of returned extent */
1367        bool            busy;
1368        unsigned        busy_gen;
1369
1370restart:
1371        /*
1372         * Allocate and initialize a cursor for the by-size btree.
1373         */
1374        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1375                args->agno, XFS_BTNUM_CNT);
1376        bno_cur = NULL;
1377        busy = false;
1378
1379        /*
1380         * Look for an entry >= maxlen+alignment-1 blocks.
1381         */
1382        if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1383                        args->maxlen + args->alignment - 1, &i)))
1384                goto error0;
1385
1386        /*
1387         * If none then we have to settle for a smaller extent. In the case that
1388         * there are no large extents, this will return the last entry in the
1389         * tree unless the tree is empty. In the case that there are only busy
1390         * large extents, this will return the largest small extent unless there
1391         * are no smaller extents available.
1392         */
1393        if (!i) {
1394                error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1395                                                   &fbno, &flen, &i);
1396                if (error)
1397                        goto error0;
1398                if (i == 0 || flen == 0) {
1399                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1400                        trace_xfs_alloc_size_noentry(args);
1401                        return 0;
1402                }
1403                ASSERT(i == 1);
1404                busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1405                                &rlen, &busy_gen);
1406        } else {
1407                /*
1408                 * Search for a non-busy extent that is large enough.
1409                 */
1410                for (;;) {
1411                        error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1412                        if (error)
1413                                goto error0;
1414                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1415
1416                        busy = xfs_alloc_compute_aligned(args, fbno, flen,
1417                                        &rbno, &rlen, &busy_gen);
1418
1419                        if (rlen >= args->maxlen)
1420                                break;
1421
1422                        error = xfs_btree_increment(cnt_cur, 0, &i);
1423                        if (error)
1424                                goto error0;
1425                        if (i == 0) {
1426                                /*
1427                                 * Our only valid extents must have been busy.
1428                                 * Make it unbusy by forcing the log out and
1429                                 * retrying.
1430                                 */
1431                                xfs_btree_del_cursor(cnt_cur,
1432                                                     XFS_BTREE_NOERROR);
1433                                trace_xfs_alloc_size_busy(args);
1434                                xfs_extent_busy_flush(args->mp,
1435                                                        args->pag, busy_gen);
1436                                goto restart;
1437                        }
1438                }
1439        }
1440
1441        /*
1442         * In the first case above, we got the last entry in the
1443         * by-size btree.  Now we check to see if the space hits maxlen
1444         * once aligned; if not, we search left for something better.
1445         * This can't happen in the second case above.
1446         */
1447        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1448        XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1449                        (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1450        if (rlen < args->maxlen) {
1451                xfs_agblock_t   bestfbno;
1452                xfs_extlen_t    bestflen;
1453                xfs_agblock_t   bestrbno;
1454                xfs_extlen_t    bestrlen;
1455
1456                bestrlen = rlen;
1457                bestrbno = rbno;
1458                bestflen = flen;
1459                bestfbno = fbno;
1460                for (;;) {
1461                        if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1462                                goto error0;
1463                        if (i == 0)
1464                                break;
1465                        if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1466                                        &i)))
1467                                goto error0;
1468                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1469                        if (flen < bestrlen)
1470                                break;
1471                        busy = xfs_alloc_compute_aligned(args, fbno, flen,
1472                                        &rbno, &rlen, &busy_gen);
1473                        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1474                        XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1475                                (rlen <= flen && rbno + rlen <= fbno + flen),
1476                                error0);
1477                        if (rlen > bestrlen) {
1478                                bestrlen = rlen;
1479                                bestrbno = rbno;
1480                                bestflen = flen;
1481                                bestfbno = fbno;
1482                                if (rlen == args->maxlen)
1483                                        break;
1484                        }
1485                }
1486                if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1487                                &i)))
1488                        goto error0;
1489                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1490                rlen = bestrlen;
1491                rbno = bestrbno;
1492                flen = bestflen;
1493                fbno = bestfbno;
1494        }
1495        args->wasfromfl = 0;
1496        /*
1497         * Fix up the length.
1498         */
1499        args->len = rlen;
1500        if (rlen < args->minlen) {
1501                if (busy) {
1502                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1503                        trace_xfs_alloc_size_busy(args);
1504                        xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
1505                        goto restart;
1506                }
1507                goto out_nominleft;
1508        }
1509        xfs_alloc_fix_len(args);
1510
1511        rlen = args->len;
1512        XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
1513        /*
1514         * Allocate and initialize a cursor for the by-block tree.
1515         */
1516        bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1517                args->agno, XFS_BTNUM_BNO);
1518        if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1519                        rbno, rlen, XFSA_FIXUP_CNT_OK)))
1520                goto error0;
1521        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1522        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1523        cnt_cur = bno_cur = NULL;
1524        args->len = rlen;
1525        args->agbno = rbno;
1526        XFS_WANT_CORRUPTED_GOTO(args->mp,
1527                args->agbno + args->len <=
1528                        be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1529                error0);
1530        trace_xfs_alloc_size_done(args);
1531        return 0;
1532
1533error0:
1534        trace_xfs_alloc_size_error(args);
1535        if (cnt_cur)
1536                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1537        if (bno_cur)
1538                xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1539        return error;
1540
1541out_nominleft:
1542        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1543        trace_xfs_alloc_size_nominleft(args);
1544        args->agbno = NULLAGBLOCK;
1545        return 0;
1546}
1547
1548/*
1549 * Deal with the case where only small freespaces remain.
1550 * Either return the contents of the last freespace record,
1551 * or allocate space from the freelist if there is nothing in the tree.
1552 */
1553STATIC int                      /* error */
1554xfs_alloc_ag_vextent_small(
1555        xfs_alloc_arg_t *args,  /* allocation argument structure */
1556        xfs_btree_cur_t *ccur,  /* by-size cursor */
1557        xfs_agblock_t   *fbnop, /* result block number */
1558        xfs_extlen_t    *flenp, /* result length */
1559        int             *stat)  /* status: 0-freelist, 1-normal/none */
1560{
1561        int             error;
1562        xfs_agblock_t   fbno;
1563        xfs_extlen_t    flen;
1564        int             i;
1565
1566        if ((error = xfs_btree_decrement(ccur, 0, &i)))
1567                goto error0;
1568        if (i) {
1569                if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
1570                        goto error0;
1571                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1572        }
1573        /*
1574         * Nothing in the btree, try the freelist.  Make sure
1575         * to respect minleft even when pulling from the
1576         * freelist.
1577         */
1578        else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
1579                 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1580                  > args->minleft)) {
1581                error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1582                if (error)
1583                        goto error0;
1584                if (fbno != NULLAGBLOCK) {
1585                        xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
1586                              xfs_alloc_allow_busy_reuse(args->datatype));
1587
1588                        if (xfs_alloc_is_userdata(args->datatype)) {
1589                                xfs_buf_t       *bp;
1590
1591                                bp = xfs_btree_get_bufs(args->mp, args->tp,
1592                                        args->agno, fbno, 0);
1593                                if (!bp) {
1594                                        error = -EFSCORRUPTED;
1595                                        goto error0;
1596                                }
1597                                xfs_trans_binval(args->tp, bp);
1598                        }
1599                        args->len = 1;
1600                        args->agbno = fbno;
1601                        XFS_WANT_CORRUPTED_GOTO(args->mp,
1602                                args->agbno + args->len <=
1603                                be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1604                                error0);
1605                        args->wasfromfl = 1;
1606                        trace_xfs_alloc_small_freelist(args);
1607                        *stat = 0;
1608                        return 0;
1609                }
1610                /*
1611                 * Nothing in the freelist.
1612                 */
1613                else
1614                        flen = 0;
1615        }
1616        /*
1617         * Can't allocate from the freelist for some reason.
1618         */
1619        else {
1620                fbno = NULLAGBLOCK;
1621                flen = 0;
1622        }
1623        /*
1624         * Can't do the allocation, give up.
1625         */
1626        if (flen < args->minlen) {
1627                args->agbno = NULLAGBLOCK;
1628                trace_xfs_alloc_small_notenough(args);
1629                flen = 0;
1630        }
1631        *fbnop = fbno;
1632        *flenp = flen;
1633        *stat = 1;
1634        trace_xfs_alloc_small_done(args);
1635        return 0;
1636
1637error0:
1638        trace_xfs_alloc_small_error(args);
1639        return error;
1640}
1641
1642/*
1643 * Free the extent starting at agno/bno for length.
1644 */
1645STATIC int                      /* error */
1646xfs_free_ag_extent(
1647        xfs_trans_t     *tp,    /* transaction pointer */
1648        xfs_buf_t       *agbp,  /* buffer for a.g. freelist header */
1649        xfs_agnumber_t  agno,   /* allocation group number */
1650        xfs_agblock_t   bno,    /* starting block number */
1651        xfs_extlen_t    len,    /* length of extent */
1652        int             isfl)   /* set if is freelist blocks - no sb acctg */
1653{
1654        xfs_btree_cur_t *bno_cur;       /* cursor for by-block btree */
1655        xfs_btree_cur_t *cnt_cur;       /* cursor for by-size btree */
1656        int             error;          /* error return value */
1657        xfs_agblock_t   gtbno;          /* start of right neighbor block */
1658        xfs_extlen_t    gtlen;          /* length of right neighbor block */
1659        int             haveleft;       /* have a left neighbor block */
1660        int             haveright;      /* have a right neighbor block */
1661        int             i;              /* temp, result code */
1662        xfs_agblock_t   ltbno;          /* start of left neighbor block */
1663        xfs_extlen_t    ltlen;          /* length of left neighbor block */
1664        xfs_mount_t     *mp;            /* mount point struct for filesystem */
1665        xfs_agblock_t   nbno;           /* new starting block of freespace */
1666        xfs_extlen_t    nlen;           /* new length of freespace */
1667        xfs_perag_t     *pag;           /* per allocation group data */
1668
1669        mp = tp->t_mountp;
1670        /*
1671         * Allocate and initialize a cursor for the by-block btree.
1672         */
1673        bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1674        cnt_cur = NULL;
1675        /*
1676         * Look for a neighboring block on the left (lower block numbers)
1677         * that is contiguous with this space.
1678         */
1679        if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1680                goto error0;
1681        if (haveleft) {
1682                /*
1683                 * There is a block to our left.
1684                 */
1685                if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1686                        goto error0;
1687                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1688                /*
1689                 * It's not contiguous, though.
1690                 */
1691                if (ltbno + ltlen < bno)
1692                        haveleft = 0;
1693                else {
1694                        /*
1695                         * If this failure happens the request to free this
1696                         * space was invalid, it's (partly) already free.
1697                         * Very bad.
1698                         */
1699                        XFS_WANT_CORRUPTED_GOTO(mp,
1700                                                ltbno + ltlen <= bno, error0);
1701                }
1702        }
1703        /*
1704         * Look for a neighboring block on the right (higher block numbers)
1705         * that is contiguous with this space.
1706         */
1707        if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1708                goto error0;
1709        if (haveright) {
1710                /*
1711                 * There is a block to our right.
1712                 */
1713                if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1714                        goto error0;
1715                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1716                /*
1717                 * It's not contiguous, though.
1718                 */
1719                if (bno + len < gtbno)
1720                        haveright = 0;
1721                else {
1722                        /*
1723                         * If this failure happens the request to free this
1724                         * space was invalid, it's (partly) already free.
1725                         * Very bad.
1726                         */
1727                        XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0);
1728                }
1729        }
1730        /*
1731         * Now allocate and initialize a cursor for the by-size tree.
1732         */
1733        cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1734        /*
1735         * Have both left and right contiguous neighbors.
1736         * Merge all three into a single free block.
1737         */
1738        if (haveleft && haveright) {
1739                /*
1740                 * Delete the old by-size entry on the left.
1741                 */
1742                if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1743                        goto error0;
1744                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1745                if ((error = xfs_btree_delete(cnt_cur, &i)))
1746                        goto error0;
1747                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1748                /*
1749                 * Delete the old by-size entry on the right.
1750                 */
1751                if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1752                        goto error0;
1753                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1754                if ((error = xfs_btree_delete(cnt_cur, &i)))
1755                        goto error0;
1756                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1757                /*
1758                 * Delete the old by-block entry for the right block.
1759                 */
1760                if ((error = xfs_btree_delete(bno_cur, &i)))
1761                        goto error0;
1762                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1763                /*
1764                 * Move the by-block cursor back to the left neighbor.
1765                 */
1766                if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1767                        goto error0;
1768                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1769#ifdef DEBUG
1770                /*
1771                 * Check that this is the right record: delete didn't
1772                 * mangle the cursor.
1773                 */
1774                {
1775                        xfs_agblock_t   xxbno;
1776                        xfs_extlen_t    xxlen;
1777
1778                        if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1779                                        &i)))
1780                                goto error0;
1781                        XFS_WANT_CORRUPTED_GOTO(mp,
1782                                i == 1 && xxbno == ltbno && xxlen == ltlen,
1783                                error0);
1784                }
1785#endif
1786                /*
1787                 * Update remaining by-block entry to the new, joined block.
1788                 */
1789                nbno = ltbno;
1790                nlen = len + ltlen + gtlen;
1791                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1792                        goto error0;
1793        }
1794        /*
1795         * Have only a left contiguous neighbor.
1796         * Merge it together with the new freespace.
1797         */
1798        else if (haveleft) {
1799                /*
1800                 * Delete the old by-size entry on the left.
1801                 */
1802                if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1803                        goto error0;
1804                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1805                if ((error = xfs_btree_delete(cnt_cur, &i)))
1806                        goto error0;
1807                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1808                /*
1809                 * Back up the by-block cursor to the left neighbor, and
1810                 * update its length.
1811                 */
1812                if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1813                        goto error0;
1814                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1815                nbno = ltbno;
1816                nlen = len + ltlen;
1817                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1818                        goto error0;
1819        }
1820        /*
1821         * Have only a right contiguous neighbor.
1822         * Merge it together with the new freespace.
1823         */
1824        else if (haveright) {
1825                /*
1826                 * Delete the old by-size entry on the right.
1827                 */
1828                if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1829                        goto error0;
1830                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1831                if ((error = xfs_btree_delete(cnt_cur, &i)))
1832                        goto error0;
1833                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1834                /*
1835                 * Update the starting block and length of the right
1836                 * neighbor in the by-block tree.
1837                 */
1838                nbno = bno;
1839                nlen = len + gtlen;
1840                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1841                        goto error0;
1842        }
1843        /*
1844         * No contiguous neighbors.
1845         * Insert the new freespace into the by-block tree.
1846         */
1847        else {
1848                nbno = bno;
1849                nlen = len;
1850                if ((error = xfs_btree_insert(bno_cur, &i)))
1851                        goto error0;
1852                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1853        }
1854        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1855        bno_cur = NULL;
1856        /*
1857         * In all cases we need to insert the new freespace in the by-size tree.
1858         */
1859        if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1860                goto error0;
1861        XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0);
1862        if ((error = xfs_btree_insert(cnt_cur, &i)))
1863                goto error0;
1864        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1865        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1866        cnt_cur = NULL;
1867
1868        /*
1869         * Update the freespace totals in the ag and superblock.
1870         */
1871        pag = xfs_perag_get(mp, agno);
1872        error = xfs_alloc_update_counters(tp, pag, agbp, len);
1873        xfs_perag_put(pag);
1874        if (error)
1875                goto error0;
1876
1877        if (!isfl)
1878                xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1879        XFS_STATS_INC(mp, xs_freex);
1880        XFS_STATS_ADD(mp, xs_freeb, len);
1881
1882        trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
1883
1884        return 0;
1885
1886 error0:
1887        trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1);
1888        if (bno_cur)
1889                xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1890        if (cnt_cur)
1891                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1892        return error;
1893}
1894
1895/*
1896 * Visible (exported) allocation/free functions.
1897 * Some of these are used just by xfs_alloc_btree.c and this file.
1898 */
1899
1900/*
1901 * Compute and fill in value of m_ag_maxlevels.
1902 */
1903void
1904xfs_alloc_compute_maxlevels(
1905        xfs_mount_t     *mp)    /* file system mount structure */
1906{
1907        mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp, mp->m_alloc_mnr,
1908                        (mp->m_sb.sb_agblocks + 1) / 2);
1909}
1910
1911/*
1912 * Find the length of the longest extent in an AG.
1913 */
1914xfs_extlen_t
1915xfs_alloc_longest_free_extent(
1916        struct xfs_mount        *mp,
1917        struct xfs_perag        *pag,
1918        xfs_extlen_t            need)
1919{
1920        xfs_extlen_t            delta = 0;
1921
1922        if (need > pag->pagf_flcount)
1923                delta = need - pag->pagf_flcount;
1924
1925        if (pag->pagf_longest > delta)
1926                return pag->pagf_longest - delta;
1927        return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1928}
1929
1930unsigned int
1931xfs_alloc_min_freelist(
1932        struct xfs_mount        *mp,
1933        struct xfs_perag        *pag)
1934{
1935        unsigned int            min_free;
1936
1937        /* space needed by-bno freespace btree */
1938        min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
1939                                       mp->m_ag_maxlevels);
1940        /* space needed by-size freespace btree */
1941        min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
1942                                       mp->m_ag_maxlevels);
1943
1944        return min_free;
1945}
1946
1947/*
1948 * Check if the operation we are fixing up the freelist for should go ahead or
1949 * not. If we are freeing blocks, we always allow it, otherwise the allocation
1950 * is dependent on whether the size and shape of free space available will
1951 * permit the requested allocation to take place.
1952 */
1953static bool
1954xfs_alloc_space_available(
1955        struct xfs_alloc_arg    *args,
1956        xfs_extlen_t            min_free,
1957        int                     flags)
1958{
1959        struct xfs_perag        *pag = args->pag;
1960        xfs_extlen_t            alloc_len, longest;
1961        int                     available;
1962
1963        if (flags & XFS_ALLOC_FLAG_FREEING)
1964                return true;
1965
1966        /* do we have enough contiguous free space for the allocation? */
1967        alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
1968        longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free);
1969        if (longest < alloc_len)
1970                return false;
1971
1972        /* do have enough free space remaining for the allocation? */
1973        available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
1974                          min_free - args->minleft);
1975        if (available < (int)max(args->total, alloc_len))
1976                return false;
1977
1978        /*
1979         * Clamp maxlen to the amount of free space available for the actual
1980         * extent allocation.
1981         */
1982        if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
1983                args->maxlen = available;
1984                ASSERT(args->maxlen > 0);
1985                ASSERT(args->maxlen >= args->minlen);
1986        }
1987
1988        return true;
1989}
1990
1991/*
1992 * Check the agfl fields of the agf for inconsistency or corruption. The purpose
1993 * is to detect an agfl header padding mismatch between current and early v5
1994 * kernels. This problem manifests as a 1-slot size difference between the
1995 * on-disk flcount and the active [first, last] range of a wrapped agfl. This
1996 * may also catch variants of agfl count corruption unrelated to padding. Either
1997 * way, we'll reset the agfl and warn the user.
1998 *
1999 * Return true if a reset is required before the agfl can be used, false
2000 * otherwise.
2001 */
2002static bool
2003xfs_agfl_needs_reset(
2004        struct xfs_mount        *mp,
2005        struct xfs_agf          *agf)
2006{
2007        uint32_t                f = be32_to_cpu(agf->agf_flfirst);
2008        uint32_t                l = be32_to_cpu(agf->agf_fllast);
2009        uint32_t                c = be32_to_cpu(agf->agf_flcount);
2010        int                     agfl_size = XFS_AGFL_SIZE(mp);
2011        int                     active;
2012
2013        /* no agfl header on v4 supers */
2014        if (!xfs_sb_version_hascrc(&mp->m_sb))
2015                return false;
2016
2017        /*
2018         * The agf read verifier catches severe corruption of these fields.
2019         * Repeat some sanity checks to cover a packed -> unpacked mismatch if
2020         * the verifier allows it.
2021         */
2022        if (f >= agfl_size || l >= agfl_size)
2023                return true;
2024        if (c > agfl_size)
2025                return true;
2026
2027        /*
2028         * Check consistency between the on-disk count and the active range. An
2029         * agfl padding mismatch manifests as an inconsistent flcount.
2030         */
2031        if (c && l >= f)
2032                active = l - f + 1;
2033        else if (c)
2034                active = agfl_size - f + l + 1;
2035        else
2036                active = 0;
2037
2038        return active != c;
2039}
2040
2041/*
2042 * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
2043 * agfl content cannot be trusted. Warn the user that a repair is required to
2044 * recover leaked blocks.
2045 *
2046 * The purpose of this mechanism is to handle filesystems affected by the agfl
2047 * header padding mismatch problem. A reset keeps the filesystem online with a
2048 * relatively minor free space accounting inconsistency rather than suffer the
2049 * inevitable crash from use of an invalid agfl block.
2050 */
2051static void
2052xfs_agfl_reset(
2053        struct xfs_trans        *tp,
2054        struct xfs_buf          *agbp,
2055        struct xfs_perag        *pag)
2056{
2057        struct xfs_mount        *mp = tp->t_mountp;
2058        struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
2059
2060        ASSERT(pag->pagf_agflreset);
2061        trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
2062
2063        xfs_warn(mp,
2064               "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
2065               "Please unmount and run xfs_repair.",
2066                 pag->pag_agno, pag->pagf_flcount);
2067
2068        agf->agf_flfirst = 0;
2069        agf->agf_fllast = cpu_to_be32(XFS_AGFL_SIZE(mp) - 1);
2070        agf->agf_flcount = 0;
2071        xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
2072                                    XFS_AGF_FLCOUNT);
2073
2074        pag->pagf_flcount = 0;
2075        pag->pagf_agflreset = false;
2076}
2077
2078/*
2079 * Decide whether to use this allocation group for this allocation.
2080 * If so, fix up the btree freelist's size.
2081 */
2082int                     /* error */
2083xfs_alloc_fix_freelist(
2084        struct xfs_alloc_arg    *args,  /* allocation argument structure */
2085        int                     flags)  /* XFS_ALLOC_FLAG_... */
2086{
2087        struct xfs_mount        *mp = args->mp;
2088        struct xfs_perag        *pag = args->pag;
2089        struct xfs_trans        *tp = args->tp;
2090        struct xfs_buf          *agbp = NULL;
2091        struct xfs_buf          *agflbp = NULL;
2092        struct xfs_alloc_arg    targs;  /* local allocation arguments */
2093        xfs_agblock_t           bno;    /* freelist block */
2094        xfs_extlen_t            need;   /* total blocks needed in freelist */
2095        int                     error = 0;
2096
2097        if (!pag->pagf_init) {
2098                error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2099                if (error)
2100                        goto out_no_agbp;
2101                if (!pag->pagf_init) {
2102                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2103                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2104                        goto out_agbp_relse;
2105                }
2106        }
2107
2108        /*
2109         * If this is a metadata preferred pag and we are user data then try
2110         * somewhere else if we are not being asked to try harder at this
2111         * point
2112         */
2113        if (pag->pagf_metadata && xfs_alloc_is_userdata(args->datatype) &&
2114            (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2115                ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2116                goto out_agbp_relse;
2117        }
2118
2119        need = xfs_alloc_min_freelist(mp, pag);
2120        if (!xfs_alloc_space_available(args, need, flags |
2121                        XFS_ALLOC_FLAG_CHECK))
2122                goto out_agbp_relse;
2123
2124        /*
2125         * Get the a.g. freespace buffer.
2126         * Can fail if we're not blocking on locks, and it's held.
2127         */
2128        if (!agbp) {
2129                error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2130                if (error)
2131                        goto out_no_agbp;
2132                if (!agbp) {
2133                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2134                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2135                        goto out_no_agbp;
2136                }
2137        }
2138
2139        /* reset a padding mismatched agfl before final free space check */
2140        if (pag->pagf_agflreset)
2141                xfs_agfl_reset(tp, agbp, pag);
2142
2143        /* If there isn't enough total space or single-extent, reject it. */
2144        need = xfs_alloc_min_freelist(mp, pag);
2145        if (!xfs_alloc_space_available(args, need, flags))
2146                goto out_agbp_relse;
2147
2148        /*
2149         * Make the freelist shorter if it's too long.
2150         *
2151         * Note that from this point onwards, we will always release the agf and
2152         * agfl buffers on error. This handles the case where we error out and
2153         * the buffers are clean or may not have been joined to the transaction
2154         * and hence need to be released manually. If they have been joined to
2155         * the transaction, then xfs_trans_brelse() will handle them
2156         * appropriately based on the recursion count and dirty state of the
2157         * buffer.
2158         *
2159         * XXX (dgc): When we have lots of free space, does this buy us
2160         * anything other than extra overhead when we need to put more blocks
2161         * back on the free list? Maybe we should only do this when space is
2162         * getting low or the AGFL is more than half full?
2163         */
2164        while (pag->pagf_flcount > need) {
2165                struct xfs_buf  *bp;
2166
2167                error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2168                if (error)
2169                        goto out_agbp_relse;
2170                error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1);
2171                if (error)
2172                        goto out_agbp_relse;
2173                bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
2174                if (!bp) {
2175                        error = -EFSCORRUPTED;
2176                        goto out_agbp_relse;
2177                }
2178                xfs_trans_binval(tp, bp);
2179        }
2180
2181        memset(&targs, 0, sizeof(targs));
2182        targs.tp = tp;
2183        targs.mp = mp;
2184        targs.agbp = agbp;
2185        targs.agno = args->agno;
2186        targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
2187        targs.type = XFS_ALLOCTYPE_THIS_AG;
2188        targs.pag = pag;
2189        error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
2190        if (error)
2191                goto out_agbp_relse;
2192
2193        /* Make the freelist longer if it's too short. */
2194        while (pag->pagf_flcount < need) {
2195                targs.agbno = 0;
2196                targs.maxlen = need - pag->pagf_flcount;
2197
2198                /* Allocate as many blocks as possible at once. */
2199                error = xfs_alloc_ag_vextent(&targs);
2200                if (error)
2201                        goto out_agflbp_relse;
2202
2203                /*
2204                 * Stop if we run out.  Won't happen if callers are obeying
2205                 * the restrictions correctly.  Can happen for free calls
2206                 * on a completely full ag.
2207                 */
2208                if (targs.agbno == NULLAGBLOCK) {
2209                        if (flags & XFS_ALLOC_FLAG_FREEING)
2210                                break;
2211                        goto out_agflbp_relse;
2212                }
2213                /*
2214                 * Put each allocated block on the list.
2215                 */
2216                for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
2217                        error = xfs_alloc_put_freelist(tp, agbp,
2218                                                        agflbp, bno, 0);
2219                        if (error)
2220                                goto out_agflbp_relse;
2221                }
2222        }
2223        xfs_trans_brelse(tp, agflbp);
2224        args->agbp = agbp;
2225        return 0;
2226
2227out_agflbp_relse:
2228        xfs_trans_brelse(tp, agflbp);
2229out_agbp_relse:
2230        if (agbp)
2231                xfs_trans_brelse(tp, agbp);
2232out_no_agbp:
2233        args->agbp = NULL;
2234        return error;
2235}
2236
2237/*
2238 * Get a block from the freelist.
2239 * Returns with the buffer for the block gotten.
2240 */
2241int                             /* error */
2242xfs_alloc_get_freelist(
2243        xfs_trans_t     *tp,    /* transaction pointer */
2244        xfs_buf_t       *agbp,  /* buffer containing the agf structure */
2245        xfs_agblock_t   *bnop,  /* block address retrieved from freelist */
2246        int             btreeblk) /* destination is a AGF btree */
2247{
2248        xfs_agf_t       *agf;   /* a.g. freespace structure */
2249        xfs_buf_t       *agflbp;/* buffer for a.g. freelist structure */
2250        xfs_agblock_t   bno;    /* block number returned */
2251        __be32          *agfl_bno;
2252        int             error;
2253        int             logflags;
2254        xfs_mount_t     *mp = tp->t_mountp;
2255        xfs_perag_t     *pag;   /* per allocation group data */
2256
2257        /*
2258         * Freelist is empty, give up.
2259         */
2260        agf = XFS_BUF_TO_AGF(agbp);
2261        if (!agf->agf_flcount) {
2262                *bnop = NULLAGBLOCK;
2263                return 0;
2264        }
2265        /*
2266         * Read the array of free blocks.
2267         */
2268        error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2269                                    &agflbp);
2270        if (error)
2271                return error;
2272
2273
2274        /*
2275         * Get the block number and update the data structures.
2276         */
2277        agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2278        bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2279        be32_add_cpu(&agf->agf_flfirst, 1);
2280        xfs_trans_brelse(tp, agflbp);
2281        if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
2282                agf->agf_flfirst = 0;
2283
2284        pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2285        ASSERT(!pag->pagf_agflreset);
2286        be32_add_cpu(&agf->agf_flcount, -1);
2287        xfs_trans_agflist_delta(tp, -1);
2288        pag->pagf_flcount--;
2289        xfs_perag_put(pag);
2290
2291        logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2292        if (btreeblk) {
2293                be32_add_cpu(&agf->agf_btreeblks, 1);
2294                pag->pagf_btreeblks++;
2295                logflags |= XFS_AGF_BTREEBLKS;
2296        }
2297
2298        xfs_alloc_log_agf(tp, agbp, logflags);
2299        *bnop = bno;
2300
2301        return 0;
2302}
2303
2304/*
2305 * Log the given fields from the agf structure.
2306 */
2307void
2308xfs_alloc_log_agf(
2309        xfs_trans_t     *tp,    /* transaction pointer */
2310        xfs_buf_t       *bp,    /* buffer for a.g. freelist header */
2311        int             fields) /* mask of fields to be logged (XFS_AGF_...) */
2312{
2313        int     first;          /* first byte offset */
2314        int     last;           /* last byte offset */
2315        static const short      offsets[] = {
2316                offsetof(xfs_agf_t, agf_magicnum),
2317                offsetof(xfs_agf_t, agf_versionnum),
2318                offsetof(xfs_agf_t, agf_seqno),
2319                offsetof(xfs_agf_t, agf_length),
2320                offsetof(xfs_agf_t, agf_roots[0]),
2321                offsetof(xfs_agf_t, agf_levels[0]),
2322                offsetof(xfs_agf_t, agf_flfirst),
2323                offsetof(xfs_agf_t, agf_fllast),
2324                offsetof(xfs_agf_t, agf_flcount),
2325                offsetof(xfs_agf_t, agf_freeblks),
2326                offsetof(xfs_agf_t, agf_longest),
2327                offsetof(xfs_agf_t, agf_btreeblks),
2328                offsetof(xfs_agf_t, agf_uuid),
2329                /* needed so that we don't log the whole rest of the structure: */
2330                offsetof(xfs_agf_t, agf_spare64),
2331                sizeof(xfs_agf_t)
2332        };
2333
2334        trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2335
2336        xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
2337
2338        xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2339        xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2340}
2341
2342/*
2343 * Interface for inode allocation to force the pag data to be initialized.
2344 */
2345int                                     /* error */
2346xfs_alloc_pagf_init(
2347        xfs_mount_t             *mp,    /* file system mount structure */
2348        xfs_trans_t             *tp,    /* transaction pointer */
2349        xfs_agnumber_t          agno,   /* allocation group number */
2350        int                     flags)  /* XFS_ALLOC_FLAGS_... */
2351{
2352        xfs_buf_t               *bp;
2353        int                     error;
2354
2355        if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2356                return error;
2357        if (bp)
2358                xfs_trans_brelse(tp, bp);
2359        return 0;
2360}
2361
2362/*
2363 * Put the block on the freelist for the allocation group.
2364 */
2365int                                     /* error */
2366xfs_alloc_put_freelist(
2367        xfs_trans_t             *tp,    /* transaction pointer */
2368        xfs_buf_t               *agbp,  /* buffer for a.g. freelist header */
2369        xfs_buf_t               *agflbp,/* buffer for a.g. free block array */
2370        xfs_agblock_t           bno,    /* block being freed */
2371        int                     btreeblk) /* block came from a AGF btree */
2372{
2373        xfs_agf_t               *agf;   /* a.g. freespace structure */
2374        __be32                  *blockp;/* pointer to array entry */
2375        int                     error;
2376        int                     logflags;
2377        xfs_mount_t             *mp;    /* mount structure */
2378        xfs_perag_t             *pag;   /* per allocation group data */
2379        __be32                  *agfl_bno;
2380        int                     startoff;
2381
2382        agf = XFS_BUF_TO_AGF(agbp);
2383        mp = tp->t_mountp;
2384
2385        if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
2386                        be32_to_cpu(agf->agf_seqno), &agflbp)))
2387                return error;
2388        be32_add_cpu(&agf->agf_fllast, 1);
2389        if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
2390                agf->agf_fllast = 0;
2391
2392        pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2393        ASSERT(!pag->pagf_agflreset);
2394        be32_add_cpu(&agf->agf_flcount, 1);
2395        xfs_trans_agflist_delta(tp, 1);
2396        pag->pagf_flcount++;
2397
2398        logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2399        if (btreeblk) {
2400                be32_add_cpu(&agf->agf_btreeblks, -1);
2401                pag->pagf_btreeblks--;
2402                logflags |= XFS_AGF_BTREEBLKS;
2403        }
2404        xfs_perag_put(pag);
2405
2406        xfs_alloc_log_agf(tp, agbp, logflags);
2407
2408        ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2409
2410        agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2411        blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
2412        *blockp = cpu_to_be32(bno);
2413        startoff = (char *)blockp - (char *)agflbp->b_addr;
2414
2415        xfs_alloc_log_agf(tp, agbp, logflags);
2416
2417        xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
2418        xfs_trans_log_buf(tp, agflbp, startoff,
2419                          startoff + sizeof(xfs_agblock_t) - 1);
2420        return 0;
2421}
2422
2423static xfs_failaddr_t
2424xfs_agf_verify(
2425        struct xfs_buf          *bp)
2426 {
2427        struct xfs_mount        *mp = bp->b_target->bt_mount;
2428        struct xfs_agf          *agf = XFS_BUF_TO_AGF(bp);
2429        unsigned int            xfs_agfl_size;
2430
2431        if (xfs_sb_version_hascrc(&mp->m_sb)) {
2432                if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
2433                        return __this_address;
2434                if (!xfs_log_check_lsn(mp,
2435                                be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
2436                        return __this_address;
2437        }
2438
2439        /*
2440         * RHEL7: The RHEL7 v5 AGFL is one slot smaller than upstream due to the
2441         * header padding problem. Allow the on-disk AGFL fields to exceed the
2442         * expected size by one so the verifier doesn't generate corruption
2443         * errors for valid upstream (or RHEL8) filesystems. The AGF read
2444         * handler detects this incompatibility and triggers an AGFL reset at
2445         * block allocation time such that the filesystem remains online. See
2446         * xfs_agfl_needs_reset() for further details.
2447         */
2448        xfs_agfl_size = XFS_AGFL_SIZE(mp);
2449        if (xfs_sb_version_hascrc(&mp->m_sb))
2450                xfs_agfl_size++;
2451
2452        if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
2453              XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2454              be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2455              be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size &&
2456              be32_to_cpu(agf->agf_fllast) < xfs_agfl_size &&
2457              be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size))
2458                return __this_address;
2459
2460        if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
2461            be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
2462            be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
2463            be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
2464                return __this_address;
2465
2466        /*
2467         * during growfs operations, the perag is not fully initialised,
2468         * so we can't use it for any useful checking. growfs ensures we can't
2469         * use it by using uncached buffers that don't have the perag attached
2470         * so we can detect and avoid this problem.
2471         */
2472        if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
2473                return __this_address;
2474
2475        if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2476            be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
2477                return __this_address;
2478
2479        return NULL;
2480
2481}
2482
2483static void
2484xfs_agf_read_verify(
2485        struct xfs_buf  *bp)
2486{
2487        struct xfs_mount *mp = bp->b_target->bt_mount;
2488        xfs_failaddr_t  fa;
2489
2490        if (xfs_sb_version_hascrc(&mp->m_sb) &&
2491            !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
2492                xfs_verifier_error(bp, -EFSBADCRC, __this_address);
2493        else {
2494                fa = xfs_agf_verify(bp);
2495                if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
2496                        xfs_verifier_error(bp, -EFSCORRUPTED, fa);
2497        }
2498}
2499
2500static void
2501xfs_agf_write_verify(
2502        struct xfs_buf  *bp)
2503{
2504        struct xfs_mount *mp = bp->b_target->bt_mount;
2505        struct xfs_buf_log_item *bip = bp->b_fspriv;
2506        xfs_failaddr_t          fa;
2507
2508        fa = xfs_agf_verify(bp);
2509        if (fa) {
2510                xfs_verifier_error(bp, -EFSCORRUPTED, fa);
2511                return;
2512        }
2513
2514        if (!xfs_sb_version_hascrc(&mp->m_sb))
2515                return;
2516
2517        if (bip)
2518                XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2519
2520        xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
2521}
2522
2523const struct xfs_buf_ops xfs_agf_buf_ops = {
2524        .name = "xfs_agf",
2525        .verify_read = xfs_agf_read_verify,
2526        .verify_write = xfs_agf_write_verify,
2527        .verify_struct = xfs_agf_verify,
2528};
2529
2530/*
2531 * Read in the allocation group header (free/alloc section).
2532 */
2533int                                     /* error */
2534xfs_read_agf(
2535        struct xfs_mount        *mp,    /* mount point structure */
2536        struct xfs_trans        *tp,    /* transaction pointer */
2537        xfs_agnumber_t          agno,   /* allocation group number */
2538        int                     flags,  /* XFS_BUF_ */
2539        struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
2540{
2541        int             error;
2542
2543        trace_xfs_read_agf(mp, agno);
2544
2545        ASSERT(agno != NULLAGNUMBER);
2546        error = xfs_trans_read_buf(
2547                        mp, tp, mp->m_ddev_targp,
2548                        XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2549                        XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
2550        if (error)
2551                return error;
2552        if (!*bpp)
2553                return 0;
2554
2555        ASSERT(!(*bpp)->b_error);
2556        xfs_buf_set_ref(*bpp, XFS_AGF_REF);
2557        return 0;
2558}
2559
2560/*
2561 * Read in the allocation group header (free/alloc section).
2562 */
2563int                                     /* error */
2564xfs_alloc_read_agf(
2565        struct xfs_mount        *mp,    /* mount point structure */
2566        struct xfs_trans        *tp,    /* transaction pointer */
2567        xfs_agnumber_t          agno,   /* allocation group number */
2568        int                     flags,  /* XFS_ALLOC_FLAG_... */
2569        struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
2570{
2571        struct xfs_agf          *agf;           /* ag freelist header */
2572        struct xfs_perag        *pag;           /* per allocation group data */
2573        int                     error;
2574
2575        trace_xfs_alloc_read_agf(mp, agno);
2576
2577        ASSERT(agno != NULLAGNUMBER);
2578        error = xfs_read_agf(mp, tp, agno,
2579                        (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2580                        bpp);
2581        if (error)
2582                return error;
2583        if (!*bpp)
2584                return 0;
2585        ASSERT(!(*bpp)->b_error);
2586
2587        agf = XFS_BUF_TO_AGF(*bpp);
2588        pag = xfs_perag_get(mp, agno);
2589        if (!pag->pagf_init) {
2590                pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
2591                pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
2592                pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2593                pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2594                pag->pagf_levels[XFS_BTNUM_BNOi] =
2595                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2596                pag->pagf_levels[XFS_BTNUM_CNTi] =
2597                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
2598                spin_lock_init(&pag->pagb_lock);
2599                pag->pagb_count = 0;
2600                pag->pagb_tree = RB_ROOT;
2601                pag->pagf_init = 1;
2602                pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf);
2603        }
2604#ifdef DEBUG
2605        else if (!XFS_FORCED_SHUTDOWN(mp)) {
2606                ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
2607                ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
2608                ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2609                ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2610                ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
2611                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2612                ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
2613                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2614        }
2615#endif
2616        xfs_perag_put(pag);
2617        return 0;
2618}
2619
2620/*
2621 * Allocate an extent (variable-size).
2622 * Depending on the allocation type, we either look in a single allocation
2623 * group or loop over the allocation groups to find the result.
2624 */
2625int                             /* error */
2626xfs_alloc_vextent(
2627        xfs_alloc_arg_t *args)  /* allocation argument structure */
2628{
2629        xfs_agblock_t   agsize; /* allocation group size */
2630        int             error;
2631        int             flags;  /* XFS_ALLOC_FLAG_... locking flags */
2632        xfs_mount_t     *mp;    /* mount structure pointer */
2633        xfs_agnumber_t  sagno;  /* starting allocation group number */
2634        xfs_alloctype_t type;   /* input allocation type */
2635        int             bump_rotor = 0;
2636        xfs_agnumber_t  rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2637
2638        mp = args->mp;
2639        type = args->otype = args->type;
2640        args->agbno = NULLAGBLOCK;
2641        /*
2642         * Just fix this up, for the case where the last a.g. is shorter
2643         * (or there's only one a.g.) and the caller couldn't easily figure
2644         * that out (xfs_bmap_alloc).
2645         */
2646        agsize = mp->m_sb.sb_agblocks;
2647        if (args->maxlen > agsize)
2648                args->maxlen = agsize;
2649        if (args->alignment == 0)
2650                args->alignment = 1;
2651        ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2652        ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2653        ASSERT(args->minlen <= args->maxlen);
2654        ASSERT(args->minlen <= agsize);
2655        ASSERT(args->mod < args->prod);
2656        if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2657            XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2658            args->minlen > args->maxlen || args->minlen > agsize ||
2659            args->mod >= args->prod) {
2660                args->fsbno = NULLFSBLOCK;
2661                trace_xfs_alloc_vextent_badargs(args);
2662                return 0;
2663        }
2664
2665        switch (type) {
2666        case XFS_ALLOCTYPE_THIS_AG:
2667        case XFS_ALLOCTYPE_NEAR_BNO:
2668        case XFS_ALLOCTYPE_THIS_BNO:
2669                /*
2670                 * These three force us into a single a.g.
2671                 */
2672                args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2673                args->pag = xfs_perag_get(mp, args->agno);
2674                error = xfs_alloc_fix_freelist(args, 0);
2675                if (error) {
2676                        trace_xfs_alloc_vextent_nofix(args);
2677                        goto error0;
2678                }
2679                if (!args->agbp) {
2680                        trace_xfs_alloc_vextent_noagbp(args);
2681                        break;
2682                }
2683                args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2684                if ((error = xfs_alloc_ag_vextent(args)))
2685                        goto error0;
2686                break;
2687        case XFS_ALLOCTYPE_START_BNO:
2688                /*
2689                 * Try near allocation first, then anywhere-in-ag after
2690                 * the first a.g. fails.
2691                 */
2692                if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
2693                    (mp->m_flags & XFS_MOUNT_32BITINODES)) {
2694                        args->fsbno = XFS_AGB_TO_FSB(mp,
2695                                        ((mp->m_agfrotor / rotorstep) %
2696                                        mp->m_sb.sb_agcount), 0);
2697                        bump_rotor = 1;
2698                }
2699                args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2700                args->type = XFS_ALLOCTYPE_NEAR_BNO;
2701                /* FALLTHROUGH */
2702        case XFS_ALLOCTYPE_FIRST_AG:
2703                /*
2704                 * Rotate through the allocation groups looking for a winner.
2705                 */
2706                if (type == XFS_ALLOCTYPE_FIRST_AG) {
2707                        /*
2708                         * Start with allocation group given by bno.
2709                         */
2710                        args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2711                        args->type = XFS_ALLOCTYPE_THIS_AG;
2712                        sagno = 0;
2713                        flags = 0;
2714                } else {
2715                        /*
2716                         * Start with the given allocation group.
2717                         */
2718                        args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2719                        flags = XFS_ALLOC_FLAG_TRYLOCK;
2720                }
2721                /*
2722                 * Loop over allocation groups twice; first time with
2723                 * trylock set, second time without.
2724                 */
2725                for (;;) {
2726                        args->pag = xfs_perag_get(mp, args->agno);
2727                        error = xfs_alloc_fix_freelist(args, flags);
2728                        if (error) {
2729                                trace_xfs_alloc_vextent_nofix(args);
2730                                goto error0;
2731                        }
2732                        /*
2733                         * If we get a buffer back then the allocation will fly.
2734                         */
2735                        if (args->agbp) {
2736                                if ((error = xfs_alloc_ag_vextent(args)))
2737                                        goto error0;
2738                                break;
2739                        }
2740
2741                        trace_xfs_alloc_vextent_loopfailed(args);
2742
2743                        /*
2744                         * Didn't work, figure out the next iteration.
2745                         */
2746                        if (args->agno == sagno &&
2747                            type == XFS_ALLOCTYPE_START_BNO)
2748                                args->type = XFS_ALLOCTYPE_THIS_AG;
2749                        /*
2750                        * For the first allocation, we can try any AG to get
2751                        * space.  However, if we already have allocated a
2752                        * block, we don't want to try AGs whose number is below
2753                        * sagno. Otherwise, we may end up with out-of-order
2754                        * locking of AGF, which might cause deadlock.
2755                        */
2756                        if (++(args->agno) == mp->m_sb.sb_agcount) {
2757                                if (args->firstblock != NULLFSBLOCK)
2758                                        args->agno = sagno;
2759                                else
2760                                        args->agno = 0;
2761                        }
2762                        /*
2763                         * Reached the starting a.g., must either be done
2764                         * or switch to non-trylock mode.
2765                         */
2766                        if (args->agno == sagno) {
2767                                if (flags == 0) {
2768                                        args->agbno = NULLAGBLOCK;
2769                                        trace_xfs_alloc_vextent_allfailed(args);
2770                                        break;
2771                                }
2772
2773                                flags = 0;
2774                                if (type == XFS_ALLOCTYPE_START_BNO) {
2775                                        args->agbno = XFS_FSB_TO_AGBNO(mp,
2776                                                args->fsbno);
2777                                        args->type = XFS_ALLOCTYPE_NEAR_BNO;
2778                                }
2779                        }
2780                        xfs_perag_put(args->pag);
2781                }
2782                if (bump_rotor) {
2783                        if (args->agno == sagno)
2784                                mp->m_agfrotor = (mp->m_agfrotor + 1) %
2785                                        (mp->m_sb.sb_agcount * rotorstep);
2786                        else
2787                                mp->m_agfrotor = (args->agno * rotorstep + 1) %
2788                                        (mp->m_sb.sb_agcount * rotorstep);
2789                }
2790                break;
2791        default:
2792                ASSERT(0);
2793                /* NOTREACHED */
2794        }
2795        if (args->agbno == NULLAGBLOCK)
2796                args->fsbno = NULLFSBLOCK;
2797        else {
2798                args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2799#ifdef DEBUG
2800                ASSERT(args->len >= args->minlen);
2801                ASSERT(args->len <= args->maxlen);
2802                ASSERT(args->agbno % args->alignment == 0);
2803                XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2804                        args->len);
2805#endif
2806
2807                /* Zero the extent if we were asked to do so */
2808                if (args->datatype & XFS_ALLOC_USERDATA_ZERO) {
2809                        error = xfs_zero_extent(args->ip, args->fsbno, args->len);
2810                        if (error)
2811                                goto error0;
2812                }
2813
2814        }
2815        xfs_perag_put(args->pag);
2816        return 0;
2817error0:
2818        xfs_perag_put(args->pag);
2819        return error;
2820}
2821
2822/* Ensure that the freelist is at full capacity. */
2823int
2824xfs_free_extent_fix_freelist(
2825        struct xfs_trans        *tp,
2826        xfs_agnumber_t          agno,
2827        struct xfs_buf          **agbp)
2828{
2829        struct xfs_alloc_arg    args;
2830        int                     error;
2831
2832        memset(&args, 0, sizeof(struct xfs_alloc_arg));
2833        args.tp = tp;
2834        args.mp = tp->t_mountp;
2835        args.agno = agno;
2836
2837        /*
2838         * validate that the block number is legal - the enables us to detect
2839         * and handle a silent filesystem corruption rather than crashing.
2840         */
2841        if (args.agno >= args.mp->m_sb.sb_agcount)
2842                return -EFSCORRUPTED;
2843
2844        args.pag = xfs_perag_get(args.mp, args.agno);
2845        ASSERT(args.pag);
2846
2847        error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
2848        if (error)
2849                goto out;
2850
2851        *agbp = args.agbp;
2852out:
2853        xfs_perag_put(args.pag);
2854        return error;
2855}
2856
2857/*
2858 * Free an extent.
2859 * Just break up the extent address and hand off to xfs_free_ag_extent
2860 * after fixing up the freelist.
2861 */
2862int                             /* error */
2863xfs_free_extent(
2864        struct xfs_trans        *tp,    /* transaction pointer */
2865        xfs_fsblock_t           bno,    /* starting block number of extent */
2866        xfs_extlen_t            len)    /* length of extent */
2867{
2868        struct xfs_mount        *mp = tp->t_mountp;
2869        struct xfs_buf          *agbp;
2870        xfs_agnumber_t          agno = XFS_FSB_TO_AGNO(mp, bno);
2871        xfs_agblock_t           agbno = XFS_FSB_TO_AGBNO(mp, bno);
2872        int                     error;
2873
2874        ASSERT(len != 0);
2875
2876        if (XFS_TEST_ERROR(false, mp,
2877                        XFS_ERRTAG_FREE_EXTENT))
2878                return -EIO;
2879
2880        error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
2881        if (error)
2882                return error;
2883
2884        XFS_WANT_CORRUPTED_GOTO(mp, agbno < mp->m_sb.sb_agblocks, err);
2885
2886        /* validate the extent size is legal now we have the agf locked */
2887        XFS_WANT_CORRUPTED_GOTO(mp,
2888                agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length),
2889                                err);
2890
2891        error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, 0);
2892        if (error)
2893                goto err;
2894
2895        xfs_extent_busy_insert(tp, agno, agbno, len, 0);
2896        return 0;
2897
2898err:
2899        xfs_trans_brelse(tp, agbp);
2900        return error;
2901}
2902
2903/* Find the size of the AG, in blocks. */
2904xfs_agblock_t
2905xfs_ag_block_count(
2906        struct xfs_mount        *mp,
2907        xfs_agnumber_t          agno)
2908{
2909        ASSERT(agno < mp->m_sb.sb_agcount);
2910
2911        if (agno < mp->m_sb.sb_agcount - 1)
2912                return mp->m_sb.sb_agblocks;
2913        return mp->m_sb.sb_dblocks - (agno * mp->m_sb.sb_agblocks);
2914}
2915
2916/*
2917 * Verify that an AG block number pointer neither points outside the AG
2918 * nor points at static metadata.
2919 */
2920bool
2921xfs_verify_agbno(
2922        struct xfs_mount        *mp,
2923        xfs_agnumber_t          agno,
2924        xfs_agblock_t           agbno)
2925{
2926        xfs_agblock_t           eoag;
2927
2928        eoag = xfs_ag_block_count(mp, agno);
2929        if (agbno >= eoag)
2930                return false;
2931        if (agbno <= XFS_AGFL_BLOCK(mp))
2932                return false;
2933        return true;
2934}
2935
2936/*
2937 * Verify that an FS block number pointer neither points outside the
2938 * filesystem nor points at static AG metadata.
2939 */
2940bool
2941xfs_verify_fsbno(
2942        struct xfs_mount        *mp,
2943        xfs_fsblock_t           fsbno)
2944{
2945        xfs_agnumber_t          agno = XFS_FSB_TO_AGNO(mp, fsbno);
2946
2947        if (agno >= mp->m_sb.sb_agcount)
2948                return false;
2949        return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno));
2950}
2951