linux/fs/xfs/libxfs/xfs_alloc.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   4 * All Rights Reserved.
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_format.h"
   9#include "xfs_log_format.h"
  10#include "xfs_shared.h"
  11#include "xfs_trans_resv.h"
  12#include "xfs_bit.h"
  13#include "xfs_sb.h"
  14#include "xfs_mount.h"
  15#include "xfs_defer.h"
  16#include "xfs_btree.h"
  17#include "xfs_rmap.h"
  18#include "xfs_alloc_btree.h"
  19#include "xfs_alloc.h"
  20#include "xfs_extent_busy.h"
  21#include "xfs_errortag.h"
  22#include "xfs_error.h"
  23#include "xfs_trace.h"
  24#include "xfs_trans.h"
  25#include "xfs_buf_item.h"
  26#include "xfs_log.h"
  27#include "xfs_ag_resv.h"
  28#include "xfs_bmap.h"
  29
  30extern kmem_zone_t      *xfs_bmap_free_item_zone;
  31
  32struct workqueue_struct *xfs_alloc_wq;
  33
  34#define XFS_ABSDIFF(a,b)        (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
  35
  36#define XFSA_FIXUP_BNO_OK       1
  37#define XFSA_FIXUP_CNT_OK       2
  38
  39STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
  40STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
  41STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
  42
  43/*
  44 * Size of the AGFL.  For CRC-enabled filesystes we steal a couple of slots in
  45 * the beginning of the block for a proper header with the location information
  46 * and CRC.
  47 */
  48unsigned int
  49xfs_agfl_size(
  50        struct xfs_mount        *mp)
  51{
  52        unsigned int            size = mp->m_sb.sb_sectsize;
  53
  54        if (xfs_sb_version_hascrc(&mp->m_sb))
  55                size -= sizeof(struct xfs_agfl);
  56
  57        return size / sizeof(xfs_agblock_t);
  58}
  59
  60unsigned int
  61xfs_refc_block(
  62        struct xfs_mount        *mp)
  63{
  64        if (xfs_sb_version_hasrmapbt(&mp->m_sb))
  65                return XFS_RMAP_BLOCK(mp) + 1;
  66        if (xfs_sb_version_hasfinobt(&mp->m_sb))
  67                return XFS_FIBT_BLOCK(mp) + 1;
  68        return XFS_IBT_BLOCK(mp) + 1;
  69}
  70
  71xfs_extlen_t
  72xfs_prealloc_blocks(
  73        struct xfs_mount        *mp)
  74{
  75        if (xfs_sb_version_hasreflink(&mp->m_sb))
  76                return xfs_refc_block(mp) + 1;
  77        if (xfs_sb_version_hasrmapbt(&mp->m_sb))
  78                return XFS_RMAP_BLOCK(mp) + 1;
  79        if (xfs_sb_version_hasfinobt(&mp->m_sb))
  80                return XFS_FIBT_BLOCK(mp) + 1;
  81        return XFS_IBT_BLOCK(mp) + 1;
  82}
  83
  84/*
  85 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
  86 * AGF buffer (PV 947395), we place constraints on the relationship among
  87 * actual allocations for data blocks, freelist blocks, and potential file data
  88 * bmap btree blocks. However, these restrictions may result in no actual space
  89 * allocated for a delayed extent, for example, a data block in a certain AG is
  90 * allocated but there is no additional block for the additional bmap btree
  91 * block due to a split of the bmap btree of the file. The result of this may
  92 * lead to an infinite loop when the file gets flushed to disk and all delayed
  93 * extents need to be actually allocated. To get around this, we explicitly set
  94 * aside a few blocks which will not be reserved in delayed allocation.
  95 *
  96 * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a
  97 * potential split of the file's bmap btree.
  98 */
  99unsigned int
 100xfs_alloc_set_aside(
 101        struct xfs_mount        *mp)
 102{
 103        return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4);
 104}
 105
 106/*
 107 * When deciding how much space to allocate out of an AG, we limit the
 108 * allocation maximum size to the size the AG. However, we cannot use all the
 109 * blocks in the AG - some are permanently used by metadata. These
 110 * blocks are generally:
 111 *      - the AG superblock, AGF, AGI and AGFL
 112 *      - the AGF (bno and cnt) and AGI btree root blocks, and optionally
 113 *        the AGI free inode and rmap btree root blocks.
 114 *      - blocks on the AGFL according to xfs_alloc_set_aside() limits
 115 *      - the rmapbt root block
 116 *
 117 * The AG headers are sector sized, so the amount of space they take up is
 118 * dependent on filesystem geometry. The others are all single blocks.
 119 */
 120unsigned int
 121xfs_alloc_ag_max_usable(
 122        struct xfs_mount        *mp)
 123{
 124        unsigned int            blocks;
 125
 126        blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
 127        blocks += XFS_ALLOC_AGFL_RESERVE;
 128        blocks += 3;                    /* AGF, AGI btree root blocks */
 129        if (xfs_sb_version_hasfinobt(&mp->m_sb))
 130                blocks++;               /* finobt root block */
 131        if (xfs_sb_version_hasrmapbt(&mp->m_sb))
 132                blocks++;               /* rmap root block */
 133        if (xfs_sb_version_hasreflink(&mp->m_sb))
 134                blocks++;               /* refcount root block */
 135
 136        return mp->m_sb.sb_agblocks - blocks;
 137}
 138
 139/*
 140 * Lookup the record equal to [bno, len] in the btree given by cur.
 141 */
 142STATIC int                              /* error */
 143xfs_alloc_lookup_eq(
 144        struct xfs_btree_cur    *cur,   /* btree cursor */
 145        xfs_agblock_t           bno,    /* starting block of extent */
 146        xfs_extlen_t            len,    /* length of extent */
 147        int                     *stat)  /* success/failure */
 148{
 149        cur->bc_rec.a.ar_startblock = bno;
 150        cur->bc_rec.a.ar_blockcount = len;
 151        return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
 152}
 153
 154/*
 155 * Lookup the first record greater than or equal to [bno, len]
 156 * in the btree given by cur.
 157 */
 158int                             /* error */
 159xfs_alloc_lookup_ge(
 160        struct xfs_btree_cur    *cur,   /* btree cursor */
 161        xfs_agblock_t           bno,    /* starting block of extent */
 162        xfs_extlen_t            len,    /* length of extent */
 163        int                     *stat)  /* success/failure */
 164{
 165        cur->bc_rec.a.ar_startblock = bno;
 166        cur->bc_rec.a.ar_blockcount = len;
 167        return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
 168}
 169
 170/*
 171 * Lookup the first record less than or equal to [bno, len]
 172 * in the btree given by cur.
 173 */
 174int                                     /* error */
 175xfs_alloc_lookup_le(
 176        struct xfs_btree_cur    *cur,   /* btree cursor */
 177        xfs_agblock_t           bno,    /* starting block of extent */
 178        xfs_extlen_t            len,    /* length of extent */
 179        int                     *stat)  /* success/failure */
 180{
 181        cur->bc_rec.a.ar_startblock = bno;
 182        cur->bc_rec.a.ar_blockcount = len;
 183        return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
 184}
 185
 186/*
 187 * Update the record referred to by cur to the value given
 188 * by [bno, len].
 189 * This either works (return 0) or gets an EFSCORRUPTED error.
 190 */
 191STATIC int                              /* error */
 192xfs_alloc_update(
 193        struct xfs_btree_cur    *cur,   /* btree cursor */
 194        xfs_agblock_t           bno,    /* starting block of extent */
 195        xfs_extlen_t            len)    /* length of extent */
 196{
 197        union xfs_btree_rec     rec;
 198
 199        rec.alloc.ar_startblock = cpu_to_be32(bno);
 200        rec.alloc.ar_blockcount = cpu_to_be32(len);
 201        return xfs_btree_update(cur, &rec);
 202}
 203
 204/*
 205 * Get the data from the pointed-to record.
 206 */
 207int                                     /* error */
 208xfs_alloc_get_rec(
 209        struct xfs_btree_cur    *cur,   /* btree cursor */
 210        xfs_agblock_t           *bno,   /* output: starting block of extent */
 211        xfs_extlen_t            *len,   /* output: length of extent */
 212        int                     *stat)  /* output: success/failure */
 213{
 214        struct xfs_mount        *mp = cur->bc_mp;
 215        xfs_agnumber_t          agno = cur->bc_private.a.agno;
 216        union xfs_btree_rec     *rec;
 217        int                     error;
 218
 219        error = xfs_btree_get_rec(cur, &rec, stat);
 220        if (error || !(*stat))
 221                return error;
 222
 223        *bno = be32_to_cpu(rec->alloc.ar_startblock);
 224        *len = be32_to_cpu(rec->alloc.ar_blockcount);
 225
 226        if (*len == 0)
 227                goto out_bad_rec;
 228
 229        /* check for valid extent range, including overflow */
 230        if (!xfs_verify_agbno(mp, agno, *bno))
 231                goto out_bad_rec;
 232        if (*bno > *bno + *len)
 233                goto out_bad_rec;
 234        if (!xfs_verify_agbno(mp, agno, *bno + *len - 1))
 235                goto out_bad_rec;
 236
 237        return 0;
 238
 239out_bad_rec:
 240        xfs_warn(mp,
 241                "%s Freespace BTree record corruption in AG %d detected!",
 242                cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size", agno);
 243        xfs_warn(mp,
 244                "start block 0x%x block count 0x%x", *bno, *len);
 245        return -EFSCORRUPTED;
 246}
 247
 248/*
 249 * Compute aligned version of the found extent.
 250 * Takes alignment and min length into account.
 251 */
 252STATIC bool
 253xfs_alloc_compute_aligned(
 254        xfs_alloc_arg_t *args,          /* allocation argument structure */
 255        xfs_agblock_t   foundbno,       /* starting block in found extent */
 256        xfs_extlen_t    foundlen,       /* length in found extent */
 257        xfs_agblock_t   *resbno,        /* result block number */
 258        xfs_extlen_t    *reslen,        /* result length */
 259        unsigned        *busy_gen)
 260{
 261        xfs_agblock_t   bno = foundbno;
 262        xfs_extlen_t    len = foundlen;
 263        xfs_extlen_t    diff;
 264        bool            busy;
 265
 266        /* Trim busy sections out of found extent */
 267        busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
 268
 269        /*
 270         * If we have a largish extent that happens to start before min_agbno,
 271         * see if we can shift it into range...
 272         */
 273        if (bno < args->min_agbno && bno + len > args->min_agbno) {
 274                diff = args->min_agbno - bno;
 275                if (len > diff) {
 276                        bno += diff;
 277                        len -= diff;
 278                }
 279        }
 280
 281        if (args->alignment > 1 && len >= args->minlen) {
 282                xfs_agblock_t   aligned_bno = roundup(bno, args->alignment);
 283
 284                diff = aligned_bno - bno;
 285
 286                *resbno = aligned_bno;
 287                *reslen = diff >= len ? 0 : len - diff;
 288        } else {
 289                *resbno = bno;
 290                *reslen = len;
 291        }
 292
 293        return busy;
 294}
 295
 296/*
 297 * Compute best start block and diff for "near" allocations.
 298 * freelen >= wantlen already checked by caller.
 299 */
 300STATIC xfs_extlen_t                     /* difference value (absolute) */
 301xfs_alloc_compute_diff(
 302        xfs_agblock_t   wantbno,        /* target starting block */
 303        xfs_extlen_t    wantlen,        /* target length */
 304        xfs_extlen_t    alignment,      /* target alignment */
 305        int             datatype,       /* are we allocating data? */
 306        xfs_agblock_t   freebno,        /* freespace's starting block */
 307        xfs_extlen_t    freelen,        /* freespace's length */
 308        xfs_agblock_t   *newbnop)       /* result: best start block from free */
 309{
 310        xfs_agblock_t   freeend;        /* end of freespace extent */
 311        xfs_agblock_t   newbno1;        /* return block number */
 312        xfs_agblock_t   newbno2;        /* other new block number */
 313        xfs_extlen_t    newlen1=0;      /* length with newbno1 */
 314        xfs_extlen_t    newlen2=0;      /* length with newbno2 */
 315        xfs_agblock_t   wantend;        /* end of target extent */
 316        bool            userdata = xfs_alloc_is_userdata(datatype);
 317
 318        ASSERT(freelen >= wantlen);
 319        freeend = freebno + freelen;
 320        wantend = wantbno + wantlen;
 321        /*
 322         * We want to allocate from the start of a free extent if it is past
 323         * the desired block or if we are allocating user data and the free
 324         * extent is before desired block. The second case is there to allow
 325         * for contiguous allocation from the remaining free space if the file
 326         * grows in the short term.
 327         */
 328        if (freebno >= wantbno || (userdata && freeend < wantend)) {
 329                if ((newbno1 = roundup(freebno, alignment)) >= freeend)
 330                        newbno1 = NULLAGBLOCK;
 331        } else if (freeend >= wantend && alignment > 1) {
 332                newbno1 = roundup(wantbno, alignment);
 333                newbno2 = newbno1 - alignment;
 334                if (newbno1 >= freeend)
 335                        newbno1 = NULLAGBLOCK;
 336                else
 337                        newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
 338                if (newbno2 < freebno)
 339                        newbno2 = NULLAGBLOCK;
 340                else
 341                        newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
 342                if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
 343                        if (newlen1 < newlen2 ||
 344                            (newlen1 == newlen2 &&
 345                             XFS_ABSDIFF(newbno1, wantbno) >
 346                             XFS_ABSDIFF(newbno2, wantbno)))
 347                                newbno1 = newbno2;
 348                } else if (newbno2 != NULLAGBLOCK)
 349                        newbno1 = newbno2;
 350        } else if (freeend >= wantend) {
 351                newbno1 = wantbno;
 352        } else if (alignment > 1) {
 353                newbno1 = roundup(freeend - wantlen, alignment);
 354                if (newbno1 > freeend - wantlen &&
 355                    newbno1 - alignment >= freebno)
 356                        newbno1 -= alignment;
 357                else if (newbno1 >= freeend)
 358                        newbno1 = NULLAGBLOCK;
 359        } else
 360                newbno1 = freeend - wantlen;
 361        *newbnop = newbno1;
 362        return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
 363}
 364
 365/*
 366 * Fix up the length, based on mod and prod.
 367 * len should be k * prod + mod for some k.
 368 * If len is too small it is returned unchanged.
 369 * If len hits maxlen it is left alone.
 370 */
 371STATIC void
 372xfs_alloc_fix_len(
 373        xfs_alloc_arg_t *args)          /* allocation argument structure */
 374{
 375        xfs_extlen_t    k;
 376        xfs_extlen_t    rlen;
 377
 378        ASSERT(args->mod < args->prod);
 379        rlen = args->len;
 380        ASSERT(rlen >= args->minlen);
 381        ASSERT(rlen <= args->maxlen);
 382        if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
 383            (args->mod == 0 && rlen < args->prod))
 384                return;
 385        k = rlen % args->prod;
 386        if (k == args->mod)
 387                return;
 388        if (k > args->mod)
 389                rlen = rlen - (k - args->mod);
 390        else
 391                rlen = rlen - args->prod + (args->mod - k);
 392        /* casts to (int) catch length underflows */
 393        if ((int)rlen < (int)args->minlen)
 394                return;
 395        ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
 396        ASSERT(rlen % args->prod == args->mod);
 397        ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
 398                rlen + args->minleft);
 399        args->len = rlen;
 400}
 401
 402/*
 403 * Update the two btrees, logically removing from freespace the extent
 404 * starting at rbno, rlen blocks.  The extent is contained within the
 405 * actual (current) free extent fbno for flen blocks.
 406 * Flags are passed in indicating whether the cursors are set to the
 407 * relevant records.
 408 */
 409STATIC int                              /* error code */
 410xfs_alloc_fixup_trees(
 411        xfs_btree_cur_t *cnt_cur,       /* cursor for by-size btree */
 412        xfs_btree_cur_t *bno_cur,       /* cursor for by-block btree */
 413        xfs_agblock_t   fbno,           /* starting block of free extent */
 414        xfs_extlen_t    flen,           /* length of free extent */
 415        xfs_agblock_t   rbno,           /* starting block of returned extent */
 416        xfs_extlen_t    rlen,           /* length of returned extent */
 417        int             flags)          /* flags, XFSA_FIXUP_... */
 418{
 419        int             error;          /* error code */
 420        int             i;              /* operation results */
 421        xfs_agblock_t   nfbno1;         /* first new free startblock */
 422        xfs_agblock_t   nfbno2;         /* second new free startblock */
 423        xfs_extlen_t    nflen1=0;       /* first new free length */
 424        xfs_extlen_t    nflen2=0;       /* second new free length */
 425        struct xfs_mount *mp;
 426
 427        mp = cnt_cur->bc_mp;
 428
 429        /*
 430         * Look up the record in the by-size tree if necessary.
 431         */
 432        if (flags & XFSA_FIXUP_CNT_OK) {
 433#ifdef DEBUG
 434                if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
 435                        return error;
 436                XFS_WANT_CORRUPTED_RETURN(mp,
 437                        i == 1 && nfbno1 == fbno && nflen1 == flen);
 438#endif
 439        } else {
 440                if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
 441                        return error;
 442                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 443        }
 444        /*
 445         * Look up the record in the by-block tree if necessary.
 446         */
 447        if (flags & XFSA_FIXUP_BNO_OK) {
 448#ifdef DEBUG
 449                if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
 450                        return error;
 451                XFS_WANT_CORRUPTED_RETURN(mp,
 452                        i == 1 && nfbno1 == fbno && nflen1 == flen);
 453#endif
 454        } else {
 455                if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
 456                        return error;
 457                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 458        }
 459
 460#ifdef DEBUG
 461        if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
 462                struct xfs_btree_block  *bnoblock;
 463                struct xfs_btree_block  *cntblock;
 464
 465                bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
 466                cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
 467
 468                XFS_WANT_CORRUPTED_RETURN(mp,
 469                        bnoblock->bb_numrecs == cntblock->bb_numrecs);
 470        }
 471#endif
 472
 473        /*
 474         * Deal with all four cases: the allocated record is contained
 475         * within the freespace record, so we can have new freespace
 476         * at either (or both) end, or no freespace remaining.
 477         */
 478        if (rbno == fbno && rlen == flen)
 479                nfbno1 = nfbno2 = NULLAGBLOCK;
 480        else if (rbno == fbno) {
 481                nfbno1 = rbno + rlen;
 482                nflen1 = flen - rlen;
 483                nfbno2 = NULLAGBLOCK;
 484        } else if (rbno + rlen == fbno + flen) {
 485                nfbno1 = fbno;
 486                nflen1 = flen - rlen;
 487                nfbno2 = NULLAGBLOCK;
 488        } else {
 489                nfbno1 = fbno;
 490                nflen1 = rbno - fbno;
 491                nfbno2 = rbno + rlen;
 492                nflen2 = (fbno + flen) - nfbno2;
 493        }
 494        /*
 495         * Delete the entry from the by-size btree.
 496         */
 497        if ((error = xfs_btree_delete(cnt_cur, &i)))
 498                return error;
 499        XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 500        /*
 501         * Add new by-size btree entry(s).
 502         */
 503        if (nfbno1 != NULLAGBLOCK) {
 504                if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
 505                        return error;
 506                XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 507                if ((error = xfs_btree_insert(cnt_cur, &i)))
 508                        return error;
 509                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 510        }
 511        if (nfbno2 != NULLAGBLOCK) {
 512                if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
 513                        return error;
 514                XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 515                if ((error = xfs_btree_insert(cnt_cur, &i)))
 516                        return error;
 517                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 518        }
 519        /*
 520         * Fix up the by-block btree entry(s).
 521         */
 522        if (nfbno1 == NULLAGBLOCK) {
 523                /*
 524                 * No remaining freespace, just delete the by-block tree entry.
 525                 */
 526                if ((error = xfs_btree_delete(bno_cur, &i)))
 527                        return error;
 528                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 529        } else {
 530                /*
 531                 * Update the by-block entry to start later|be shorter.
 532                 */
 533                if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
 534                        return error;
 535        }
 536        if (nfbno2 != NULLAGBLOCK) {
 537                /*
 538                 * 2 resulting free entries, need to add one.
 539                 */
 540                if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
 541                        return error;
 542                XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 543                if ((error = xfs_btree_insert(bno_cur, &i)))
 544                        return error;
 545                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 546        }
 547        return 0;
 548}
 549
 550static xfs_failaddr_t
 551xfs_agfl_verify(
 552        struct xfs_buf  *bp)
 553{
 554        struct xfs_mount *mp = bp->b_mount;
 555        struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
 556        int             i;
 557
 558        /*
 559         * There is no verification of non-crc AGFLs because mkfs does not
 560         * initialise the AGFL to zero or NULL. Hence the only valid part of the
 561         * AGFL is what the AGF says is active. We can't get to the AGF, so we
 562         * can't verify just those entries are valid.
 563         */
 564        if (!xfs_sb_version_hascrc(&mp->m_sb))
 565                return NULL;
 566
 567        if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
 568                return __this_address;
 569        if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
 570                return __this_address;
 571        /*
 572         * during growfs operations, the perag is not fully initialised,
 573         * so we can't use it for any useful checking. growfs ensures we can't
 574         * use it by using uncached buffers that don't have the perag attached
 575         * so we can detect and avoid this problem.
 576         */
 577        if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
 578                return __this_address;
 579
 580        for (i = 0; i < xfs_agfl_size(mp); i++) {
 581                if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
 582                    be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
 583                        return __this_address;
 584        }
 585
 586        if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
 587                return __this_address;
 588        return NULL;
 589}
 590
 591static void
 592xfs_agfl_read_verify(
 593        struct xfs_buf  *bp)
 594{
 595        struct xfs_mount *mp = bp->b_mount;
 596        xfs_failaddr_t  fa;
 597
 598        /*
 599         * There is no verification of non-crc AGFLs because mkfs does not
 600         * initialise the AGFL to zero or NULL. Hence the only valid part of the
 601         * AGFL is what the AGF says is active. We can't get to the AGF, so we
 602         * can't verify just those entries are valid.
 603         */
 604        if (!xfs_sb_version_hascrc(&mp->m_sb))
 605                return;
 606
 607        if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
 608                xfs_verifier_error(bp, -EFSBADCRC, __this_address);
 609        else {
 610                fa = xfs_agfl_verify(bp);
 611                if (fa)
 612                        xfs_verifier_error(bp, -EFSCORRUPTED, fa);
 613        }
 614}
 615
 616static void
 617xfs_agfl_write_verify(
 618        struct xfs_buf  *bp)
 619{
 620        struct xfs_mount        *mp = bp->b_mount;
 621        struct xfs_buf_log_item *bip = bp->b_log_item;
 622        xfs_failaddr_t          fa;
 623
 624        /* no verification of non-crc AGFLs */
 625        if (!xfs_sb_version_hascrc(&mp->m_sb))
 626                return;
 627
 628        fa = xfs_agfl_verify(bp);
 629        if (fa) {
 630                xfs_verifier_error(bp, -EFSCORRUPTED, fa);
 631                return;
 632        }
 633
 634        if (bip)
 635                XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
 636
 637        xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
 638}
 639
 640const struct xfs_buf_ops xfs_agfl_buf_ops = {
 641        .name = "xfs_agfl",
 642        .magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) },
 643        .verify_read = xfs_agfl_read_verify,
 644        .verify_write = xfs_agfl_write_verify,
 645        .verify_struct = xfs_agfl_verify,
 646};
 647
 648/*
 649 * Read in the allocation group free block array.
 650 */
 651int                                     /* error */
 652xfs_alloc_read_agfl(
 653        xfs_mount_t     *mp,            /* mount point structure */
 654        xfs_trans_t     *tp,            /* transaction pointer */
 655        xfs_agnumber_t  agno,           /* allocation group number */
 656        xfs_buf_t       **bpp)          /* buffer for the ag free block array */
 657{
 658        xfs_buf_t       *bp;            /* return value */
 659        int             error;
 660
 661        ASSERT(agno != NULLAGNUMBER);
 662        error = xfs_trans_read_buf(
 663                        mp, tp, mp->m_ddev_targp,
 664                        XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
 665                        XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
 666        if (error)
 667                return error;
 668        xfs_buf_set_ref(bp, XFS_AGFL_REF);
 669        *bpp = bp;
 670        return 0;
 671}
 672
 673STATIC int
 674xfs_alloc_update_counters(
 675        struct xfs_trans        *tp,
 676        struct xfs_perag        *pag,
 677        struct xfs_buf          *agbp,
 678        long                    len)
 679{
 680        struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
 681
 682        pag->pagf_freeblks += len;
 683        be32_add_cpu(&agf->agf_freeblks, len);
 684
 685        xfs_trans_agblocks_delta(tp, len);
 686        if (unlikely(be32_to_cpu(agf->agf_freeblks) >
 687                     be32_to_cpu(agf->agf_length)))
 688                return -EFSCORRUPTED;
 689
 690        xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
 691        return 0;
 692}
 693
 694/*
 695 * Allocation group level functions.
 696 */
 697
 698/*
 699 * Deal with the case where only small freespaces remain. Either return the
 700 * contents of the last freespace record, or allocate space from the freelist if
 701 * there is nothing in the tree.
 702 */
 703STATIC int                      /* error */
 704xfs_alloc_ag_vextent_small(
 705        struct xfs_alloc_arg    *args,  /* allocation argument structure */
 706        struct xfs_btree_cur    *ccur,  /* optional by-size cursor */
 707        xfs_agblock_t           *fbnop, /* result block number */
 708        xfs_extlen_t            *flenp, /* result length */
 709        int                     *stat)  /* status: 0-freelist, 1-normal/none */
 710{
 711        int                     error = 0;
 712        xfs_agblock_t           fbno = NULLAGBLOCK;
 713        xfs_extlen_t            flen = 0;
 714        int                     i = 0;
 715
 716        /*
 717         * If a cntbt cursor is provided, try to allocate the largest record in
 718         * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
 719         * allocation. Make sure to respect minleft even when pulling from the
 720         * freelist.
 721         */
 722        if (ccur)
 723                error = xfs_btree_decrement(ccur, 0, &i);
 724        if (error)
 725                goto error;
 726        if (i) {
 727                error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
 728                if (error)
 729                        goto error;
 730                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error);
 731                goto out;
 732        }
 733
 734        if (args->minlen != 1 || args->alignment != 1 ||
 735            args->resv == XFS_AG_RESV_AGFL ||
 736            (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) <=
 737             args->minleft))
 738                goto out;
 739
 740        error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
 741        if (error)
 742                goto error;
 743        if (fbno == NULLAGBLOCK)
 744                goto out;
 745
 746        xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
 747                              xfs_alloc_allow_busy_reuse(args->datatype));
 748
 749        if (xfs_alloc_is_userdata(args->datatype)) {
 750                struct xfs_buf  *bp;
 751
 752                bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno);
 753                if (!bp) {
 754                        error = -EFSCORRUPTED;
 755                        goto error;
 756                }
 757                xfs_trans_binval(args->tp, bp);
 758        }
 759        *fbnop = args->agbno = fbno;
 760        *flenp = args->len = 1;
 761        XFS_WANT_CORRUPTED_GOTO(args->mp,
 762                fbno < be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
 763                error);
 764        args->wasfromfl = 1;
 765        trace_xfs_alloc_small_freelist(args);
 766
 767        /*
 768         * If we're feeding an AGFL block to something that doesn't live in the
 769         * free space, we need to clear out the OWN_AG rmap.
 770         */
 771        error = xfs_rmap_free(args->tp, args->agbp, args->agno, fbno, 1,
 772                              &XFS_RMAP_OINFO_AG);
 773        if (error)
 774                goto error;
 775
 776        *stat = 0;
 777        return 0;
 778
 779out:
 780        /*
 781         * Can't do the allocation, give up.
 782         */
 783        if (flen < args->minlen) {
 784                args->agbno = NULLAGBLOCK;
 785                trace_xfs_alloc_small_notenough(args);
 786                flen = 0;
 787        }
 788        *fbnop = fbno;
 789        *flenp = flen;
 790        *stat = 1;
 791        trace_xfs_alloc_small_done(args);
 792        return 0;
 793
 794error:
 795        trace_xfs_alloc_small_error(args);
 796        return error;
 797}
 798
 799/*
 800 * Allocate a variable extent in the allocation group agno.
 801 * Type and bno are used to determine where in the allocation group the
 802 * extent will start.
 803 * Extent's length (returned in *len) will be between minlen and maxlen,
 804 * and of the form k * prod + mod unless there's nothing that large.
 805 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 806 */
 807STATIC int                      /* error */
 808xfs_alloc_ag_vextent(
 809        xfs_alloc_arg_t *args)  /* argument structure for allocation */
 810{
 811        int             error=0;
 812
 813        ASSERT(args->minlen > 0);
 814        ASSERT(args->maxlen > 0);
 815        ASSERT(args->minlen <= args->maxlen);
 816        ASSERT(args->mod < args->prod);
 817        ASSERT(args->alignment > 0);
 818
 819        /*
 820         * Branch to correct routine based on the type.
 821         */
 822        args->wasfromfl = 0;
 823        switch (args->type) {
 824        case XFS_ALLOCTYPE_THIS_AG:
 825                error = xfs_alloc_ag_vextent_size(args);
 826                break;
 827        case XFS_ALLOCTYPE_NEAR_BNO:
 828                error = xfs_alloc_ag_vextent_near(args);
 829                break;
 830        case XFS_ALLOCTYPE_THIS_BNO:
 831                error = xfs_alloc_ag_vextent_exact(args);
 832                break;
 833        default:
 834                ASSERT(0);
 835                /* NOTREACHED */
 836        }
 837
 838        if (error || args->agbno == NULLAGBLOCK)
 839                return error;
 840
 841        ASSERT(args->len >= args->minlen);
 842        ASSERT(args->len <= args->maxlen);
 843        ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
 844        ASSERT(args->agbno % args->alignment == 0);
 845
 846        /* if not file data, insert new block into the reverse map btree */
 847        if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
 848                error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
 849                                       args->agbno, args->len, &args->oinfo);
 850                if (error)
 851                        return error;
 852        }
 853
 854        if (!args->wasfromfl) {
 855                error = xfs_alloc_update_counters(args->tp, args->pag,
 856                                                  args->agbp,
 857                                                  -((long)(args->len)));
 858                if (error)
 859                        return error;
 860
 861                ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
 862                                              args->agbno, args->len));
 863        }
 864
 865        xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
 866
 867        XFS_STATS_INC(args->mp, xs_allocx);
 868        XFS_STATS_ADD(args->mp, xs_allocb, args->len);
 869        return error;
 870}
 871
 872/*
 873 * Allocate a variable extent at exactly agno/bno.
 874 * Extent's length (returned in *len) will be between minlen and maxlen,
 875 * and of the form k * prod + mod unless there's nothing that large.
 876 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
 877 */
 878STATIC int                      /* error */
 879xfs_alloc_ag_vextent_exact(
 880        xfs_alloc_arg_t *args)  /* allocation argument structure */
 881{
 882        xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
 883        xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
 884        int             error;
 885        xfs_agblock_t   fbno;   /* start block of found extent */
 886        xfs_extlen_t    flen;   /* length of found extent */
 887        xfs_agblock_t   tbno;   /* start block of busy extent */
 888        xfs_extlen_t    tlen;   /* length of busy extent */
 889        xfs_agblock_t   tend;   /* end block of busy extent */
 890        int             i;      /* success/failure of operation */
 891        unsigned        busy_gen;
 892
 893        ASSERT(args->alignment == 1);
 894
 895        /*
 896         * Allocate/initialize a cursor for the by-number freespace btree.
 897         */
 898        bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 899                                          args->agno, XFS_BTNUM_BNO);
 900
 901        /*
 902         * Lookup bno and minlen in the btree (minlen is irrelevant, really).
 903         * Look for the closest free block <= bno, it must contain bno
 904         * if any free block does.
 905         */
 906        error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
 907        if (error)
 908                goto error0;
 909        if (!i)
 910                goto not_found;
 911
 912        /*
 913         * Grab the freespace record.
 914         */
 915        error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
 916        if (error)
 917                goto error0;
 918        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 919        ASSERT(fbno <= args->agbno);
 920
 921        /*
 922         * Check for overlapping busy extents.
 923         */
 924        tbno = fbno;
 925        tlen = flen;
 926        xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
 927
 928        /*
 929         * Give up if the start of the extent is busy, or the freespace isn't
 930         * long enough for the minimum request.
 931         */
 932        if (tbno > args->agbno)
 933                goto not_found;
 934        if (tlen < args->minlen)
 935                goto not_found;
 936        tend = tbno + tlen;
 937        if (tend < args->agbno + args->minlen)
 938                goto not_found;
 939
 940        /*
 941         * End of extent will be smaller of the freespace end and the
 942         * maximal requested end.
 943         *
 944         * Fix the length according to mod and prod if given.
 945         */
 946        args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
 947                                                - args->agbno;
 948        xfs_alloc_fix_len(args);
 949        ASSERT(args->agbno + args->len <= tend);
 950
 951        /*
 952         * We are allocating agbno for args->len
 953         * Allocate/initialize a cursor for the by-size btree.
 954         */
 955        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 956                args->agno, XFS_BTNUM_CNT);
 957        ASSERT(args->agbno + args->len <=
 958                be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
 959        error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
 960                                      args->len, XFSA_FIXUP_BNO_OK);
 961        if (error) {
 962                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
 963                goto error0;
 964        }
 965
 966        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 967        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 968
 969        args->wasfromfl = 0;
 970        trace_xfs_alloc_exact_done(args);
 971        return 0;
 972
 973not_found:
 974        /* Didn't find it, return null. */
 975        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 976        args->agbno = NULLAGBLOCK;
 977        trace_xfs_alloc_exact_notfound(args);
 978        return 0;
 979
 980error0:
 981        xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
 982        trace_xfs_alloc_exact_error(args);
 983        return error;
 984}
 985
 986/*
 987 * Search the btree in a given direction via the search cursor and compare
 988 * the records found against the good extent we've already found.
 989 */
 990STATIC int
 991xfs_alloc_find_best_extent(
 992        struct xfs_alloc_arg    *args,  /* allocation argument structure */
 993        struct xfs_btree_cur    **gcur, /* good cursor */
 994        struct xfs_btree_cur    **scur, /* searching cursor */
 995        xfs_agblock_t           gdiff,  /* difference for search comparison */
 996        xfs_agblock_t           *sbno,  /* extent found by search */
 997        xfs_extlen_t            *slen,  /* extent length */
 998        xfs_agblock_t           *sbnoa, /* aligned extent found by search */
 999        xfs_extlen_t            *slena, /* aligned extent length */
1000        int                     dir)    /* 0 = search right, 1 = search left */
1001{
1002        xfs_agblock_t           new;
1003        xfs_agblock_t           sdiff;
1004        int                     error;
1005        int                     i;
1006        unsigned                busy_gen;
1007
1008        /* The good extent is perfect, no need to  search. */
1009        if (!gdiff)
1010                goto out_use_good;
1011
1012        /*
1013         * Look until we find a better one, run out of space or run off the end.
1014         */
1015        do {
1016                error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
1017                if (error)
1018                        goto error0;
1019                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1020                xfs_alloc_compute_aligned(args, *sbno, *slen,
1021                                sbnoa, slena, &busy_gen);
1022
1023                /*
1024                 * The good extent is closer than this one.
1025                 */
1026                if (!dir) {
1027                        if (*sbnoa > args->max_agbno)
1028                                goto out_use_good;
1029                        if (*sbnoa >= args->agbno + gdiff)
1030                                goto out_use_good;
1031                } else {
1032                        if (*sbnoa < args->min_agbno)
1033                                goto out_use_good;
1034                        if (*sbnoa <= args->agbno - gdiff)
1035                                goto out_use_good;
1036                }
1037
1038                /*
1039                 * Same distance, compare length and pick the best.
1040                 */
1041                if (*slena >= args->minlen) {
1042                        args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
1043                        xfs_alloc_fix_len(args);
1044
1045                        sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1046                                                       args->alignment,
1047                                                       args->datatype, *sbnoa,
1048                                                       *slena, &new);
1049
1050                        /*
1051                         * Choose closer size and invalidate other cursor.
1052                         */
1053                        if (sdiff < gdiff)
1054                                goto out_use_search;
1055                        goto out_use_good;
1056                }
1057
1058                if (!dir)
1059                        error = xfs_btree_increment(*scur, 0, &i);
1060                else
1061                        error = xfs_btree_decrement(*scur, 0, &i);
1062                if (error)
1063                        goto error0;
1064        } while (i);
1065
1066out_use_good:
1067        xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
1068        *scur = NULL;
1069        return 0;
1070
1071out_use_search:
1072        xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
1073        *gcur = NULL;
1074        return 0;
1075
1076error0:
1077        /* caller invalidates cursors */
1078        return error;
1079}
1080
1081/*
1082 * Allocate a variable extent near bno in the allocation group agno.
1083 * Extent's length (returned in len) will be between minlen and maxlen,
1084 * and of the form k * prod + mod unless there's nothing that large.
1085 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1086 */
1087STATIC int                              /* error */
1088xfs_alloc_ag_vextent_near(
1089        xfs_alloc_arg_t *args)          /* allocation argument structure */
1090{
1091        xfs_btree_cur_t *bno_cur_gt;    /* cursor for bno btree, right side */
1092        xfs_btree_cur_t *bno_cur_lt;    /* cursor for bno btree, left side */
1093        xfs_btree_cur_t *cnt_cur;       /* cursor for count btree */
1094        xfs_agblock_t   gtbno;          /* start bno of right side entry */
1095        xfs_agblock_t   gtbnoa;         /* aligned ... */
1096        xfs_extlen_t    gtdiff;         /* difference to right side entry */
1097        xfs_extlen_t    gtlen;          /* length of right side entry */
1098        xfs_extlen_t    gtlena;         /* aligned ... */
1099        xfs_agblock_t   gtnew;          /* useful start bno of right side */
1100        int             error;          /* error code */
1101        int             i;              /* result code, temporary */
1102        int             j;              /* result code, temporary */
1103        xfs_agblock_t   ltbno;          /* start bno of left side entry */
1104        xfs_agblock_t   ltbnoa;         /* aligned ... */
1105        xfs_extlen_t    ltdiff;         /* difference to left side entry */
1106        xfs_extlen_t    ltlen;          /* length of left side entry */
1107        xfs_extlen_t    ltlena;         /* aligned ... */
1108        xfs_agblock_t   ltnew;          /* useful start bno of left side */
1109        xfs_extlen_t    rlen;           /* length of returned extent */
1110        bool            busy;
1111        unsigned        busy_gen;
1112#ifdef DEBUG
1113        /*
1114         * Randomly don't execute the first algorithm.
1115         */
1116        int             dofirst;        /* set to do first algorithm */
1117
1118        dofirst = prandom_u32() & 1;
1119#endif
1120
1121        /* handle unitialized agbno range so caller doesn't have to */
1122        if (!args->min_agbno && !args->max_agbno)
1123                args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
1124        ASSERT(args->min_agbno <= args->max_agbno);
1125
1126        /* clamp agbno to the range if it's outside */
1127        if (args->agbno < args->min_agbno)
1128                args->agbno = args->min_agbno;
1129        if (args->agbno > args->max_agbno)
1130                args->agbno = args->max_agbno;
1131
1132restart:
1133        bno_cur_lt = NULL;
1134        bno_cur_gt = NULL;
1135        ltlen = 0;
1136        gtlena = 0;
1137        ltlena = 0;
1138        busy = false;
1139
1140        /*
1141         * Get a cursor for the by-size btree.
1142         */
1143        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1144                args->agno, XFS_BTNUM_CNT);
1145
1146        /*
1147         * See if there are any free extents as big as maxlen.
1148         */
1149        if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
1150                goto error0;
1151        /*
1152         * If none, then pick up the last entry in the tree unless the
1153         * tree is empty.
1154         */
1155        if (!i) {
1156                if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
1157                                &ltlen, &i)))
1158                        goto error0;
1159                if (i == 0 || ltlen == 0) {
1160                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1161                        trace_xfs_alloc_near_noentry(args);
1162                        return 0;
1163                }
1164                ASSERT(i == 1);
1165        }
1166        args->wasfromfl = 0;
1167
1168        /*
1169         * First algorithm.
1170         * If the requested extent is large wrt the freespaces available
1171         * in this a.g., then the cursor will be pointing to a btree entry
1172         * near the right edge of the tree.  If it's in the last btree leaf
1173         * block, then we just examine all the entries in that block
1174         * that are big enough, and pick the best one.
1175         * This is written as a while loop so we can break out of it,
1176         * but we never loop back to the top.
1177         */
1178        while (xfs_btree_islastblock(cnt_cur, 0)) {
1179                xfs_extlen_t    bdiff;
1180                int             besti=0;
1181                xfs_extlen_t    blen=0;
1182                xfs_agblock_t   bnew=0;
1183
1184#ifdef DEBUG
1185                if (dofirst)
1186                        break;
1187#endif
1188                /*
1189                 * Start from the entry that lookup found, sequence through
1190                 * all larger free blocks.  If we're actually pointing at a
1191                 * record smaller than maxlen, go to the start of this block,
1192                 * and skip all those smaller than minlen.
1193                 */
1194                if (ltlen || args->alignment > 1) {
1195                        cnt_cur->bc_ptrs[0] = 1;
1196                        do {
1197                                if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
1198                                                &ltlen, &i)))
1199                                        goto error0;
1200                                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1201                                if (ltlen >= args->minlen)
1202                                        break;
1203                                if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
1204                                        goto error0;
1205                        } while (i);
1206                        ASSERT(ltlen >= args->minlen);
1207                        if (!i)
1208                                break;
1209                }
1210                i = cnt_cur->bc_ptrs[0];
1211                for (j = 1, blen = 0, bdiff = 0;
1212                     !error && j && (blen < args->maxlen || bdiff > 0);
1213                     error = xfs_btree_increment(cnt_cur, 0, &j)) {
1214                        /*
1215                         * For each entry, decide if it's better than
1216                         * the previous best entry.
1217                         */
1218                        if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1219                                goto error0;
1220                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1221                        busy = xfs_alloc_compute_aligned(args, ltbno, ltlen,
1222                                        &ltbnoa, &ltlena, &busy_gen);
1223                        if (ltlena < args->minlen)
1224                                continue;
1225                        if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
1226                                continue;
1227                        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1228                        xfs_alloc_fix_len(args);
1229                        ASSERT(args->len >= args->minlen);
1230                        if (args->len < blen)
1231                                continue;
1232                        ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1233                                args->alignment, args->datatype, ltbnoa,
1234                                ltlena, &ltnew);
1235                        if (ltnew != NULLAGBLOCK &&
1236                            (args->len > blen || ltdiff < bdiff)) {
1237                                bdiff = ltdiff;
1238                                bnew = ltnew;
1239                                blen = args->len;
1240                                besti = cnt_cur->bc_ptrs[0];
1241                        }
1242                }
1243                /*
1244                 * It didn't work.  We COULD be in a case where
1245                 * there's a good record somewhere, so try again.
1246                 */
1247                if (blen == 0)
1248                        break;
1249                /*
1250                 * Point at the best entry, and retrieve it again.
1251                 */
1252                cnt_cur->bc_ptrs[0] = besti;
1253                if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1254                        goto error0;
1255                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1256                ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1257                args->len = blen;
1258
1259                /*
1260                 * We are allocating starting at bnew for blen blocks.
1261                 */
1262                args->agbno = bnew;
1263                ASSERT(bnew >= ltbno);
1264                ASSERT(bnew + blen <= ltbno + ltlen);
1265                /*
1266                 * Set up a cursor for the by-bno tree.
1267                 */
1268                bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
1269                        args->agbp, args->agno, XFS_BTNUM_BNO);
1270                /*
1271                 * Fix up the btree entries.
1272                 */
1273                if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
1274                                ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
1275                        goto error0;
1276                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1277                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1278
1279                trace_xfs_alloc_near_first(args);
1280                return 0;
1281        }
1282        /*
1283         * Second algorithm.
1284         * Search in the by-bno tree to the left and to the right
1285         * simultaneously, until in each case we find a space big enough,
1286         * or run into the edge of the tree.  When we run into the edge,
1287         * we deallocate that cursor.
1288         * If both searches succeed, we compare the two spaces and pick
1289         * the better one.
1290         * With alignment, it's possible for both to fail; the upper
1291         * level algorithm that picks allocation groups for allocations
1292         * is not supposed to do this.
1293         */
1294        /*
1295         * Allocate and initialize the cursor for the leftward search.
1296         */
1297        bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1298                args->agno, XFS_BTNUM_BNO);
1299        /*
1300         * Lookup <= bno to find the leftward search's starting point.
1301         */
1302        if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
1303                goto error0;
1304        if (!i) {
1305                /*
1306                 * Didn't find anything; use this cursor for the rightward
1307                 * search.
1308                 */
1309                bno_cur_gt = bno_cur_lt;
1310                bno_cur_lt = NULL;
1311        }
1312        /*
1313         * Found something.  Duplicate the cursor for the rightward search.
1314         */
1315        else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
1316                goto error0;
1317        /*
1318         * Increment the cursor, so we will point at the entry just right
1319         * of the leftward entry if any, or to the leftmost entry.
1320         */
1321        if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1322                goto error0;
1323        if (!i) {
1324                /*
1325                 * It failed, there are no rightward entries.
1326                 */
1327                xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
1328                bno_cur_gt = NULL;
1329        }
1330        /*
1331         * Loop going left with the leftward cursor, right with the
1332         * rightward cursor, until either both directions give up or
1333         * we find an entry at least as big as minlen.
1334         */
1335        do {
1336                if (bno_cur_lt) {
1337                        if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
1338                                goto error0;
1339                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1340                        busy |= xfs_alloc_compute_aligned(args, ltbno, ltlen,
1341                                        &ltbnoa, &ltlena, &busy_gen);
1342                        if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
1343                                break;
1344                        if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
1345                                goto error0;
1346                        if (!i || ltbnoa < args->min_agbno) {
1347                                xfs_btree_del_cursor(bno_cur_lt,
1348                                                     XFS_BTREE_NOERROR);
1349                                bno_cur_lt = NULL;
1350                        }
1351                }
1352                if (bno_cur_gt) {
1353                        if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1354                                goto error0;
1355                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1356                        busy |= xfs_alloc_compute_aligned(args, gtbno, gtlen,
1357                                        &gtbnoa, &gtlena, &busy_gen);
1358                        if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
1359                                break;
1360                        if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1361                                goto error0;
1362                        if (!i || gtbnoa > args->max_agbno) {
1363                                xfs_btree_del_cursor(bno_cur_gt,
1364                                                     XFS_BTREE_NOERROR);
1365                                bno_cur_gt = NULL;
1366                        }
1367                }
1368        } while (bno_cur_lt || bno_cur_gt);
1369
1370        /*
1371         * Got both cursors still active, need to find better entry.
1372         */
1373        if (bno_cur_lt && bno_cur_gt) {
1374                if (ltlena >= args->minlen) {
1375                        /*
1376                         * Left side is good, look for a right side entry.
1377                         */
1378                        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1379                        xfs_alloc_fix_len(args);
1380                        ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1381                                args->alignment, args->datatype, ltbnoa,
1382                                ltlena, &ltnew);
1383
1384                        error = xfs_alloc_find_best_extent(args,
1385                                                &bno_cur_lt, &bno_cur_gt,
1386                                                ltdiff, &gtbno, &gtlen,
1387                                                &gtbnoa, &gtlena,
1388                                                0 /* search right */);
1389                } else {
1390                        ASSERT(gtlena >= args->minlen);
1391
1392                        /*
1393                         * Right side is good, look for a left side entry.
1394                         */
1395                        args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1396                        xfs_alloc_fix_len(args);
1397                        gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1398                                args->alignment, args->datatype, gtbnoa,
1399                                gtlena, &gtnew);
1400
1401                        error = xfs_alloc_find_best_extent(args,
1402                                                &bno_cur_gt, &bno_cur_lt,
1403                                                gtdiff, &ltbno, &ltlen,
1404                                                &ltbnoa, &ltlena,
1405                                                1 /* search left */);
1406                }
1407
1408                if (error)
1409                        goto error0;
1410        }
1411
1412        /*
1413         * If we couldn't get anything, give up.
1414         */
1415        if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1416                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1417
1418                if (busy) {
1419                        trace_xfs_alloc_near_busy(args);
1420                        xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
1421                        goto restart;
1422                }
1423                trace_xfs_alloc_size_neither(args);
1424                args->agbno = NULLAGBLOCK;
1425                return 0;
1426        }
1427
1428        /*
1429         * At this point we have selected a freespace entry, either to the
1430         * left or to the right.  If it's on the right, copy all the
1431         * useful variables to the "left" set so we only have one
1432         * copy of this code.
1433         */
1434        if (bno_cur_gt) {
1435                bno_cur_lt = bno_cur_gt;
1436                bno_cur_gt = NULL;
1437                ltbno = gtbno;
1438                ltbnoa = gtbnoa;
1439                ltlen = gtlen;
1440                ltlena = gtlena;
1441                j = 1;
1442        } else
1443                j = 0;
1444
1445        /*
1446         * Fix up the length and compute the useful address.
1447         */
1448        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1449        xfs_alloc_fix_len(args);
1450        rlen = args->len;
1451        (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
1452                                     args->datatype, ltbnoa, ltlena, &ltnew);
1453        ASSERT(ltnew >= ltbno);
1454        ASSERT(ltnew + rlen <= ltbnoa + ltlena);
1455        ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1456        ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
1457        args->agbno = ltnew;
1458
1459        if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1460                        ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1461                goto error0;
1462
1463        if (j)
1464                trace_xfs_alloc_near_greater(args);
1465        else
1466                trace_xfs_alloc_near_lesser(args);
1467
1468        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1469        xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1470        return 0;
1471
1472 error0:
1473        trace_xfs_alloc_near_error(args);
1474        if (cnt_cur != NULL)
1475                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1476        if (bno_cur_lt != NULL)
1477                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1478        if (bno_cur_gt != NULL)
1479                xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1480        return error;
1481}
1482
1483/*
1484 * Allocate a variable extent anywhere in the allocation group agno.
1485 * Extent's length (returned in len) will be between minlen and maxlen,
1486 * and of the form k * prod + mod unless there's nothing that large.
1487 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1488 */
1489STATIC int                              /* error */
1490xfs_alloc_ag_vextent_size(
1491        xfs_alloc_arg_t *args)          /* allocation argument structure */
1492{
1493        xfs_btree_cur_t *bno_cur;       /* cursor for bno btree */
1494        xfs_btree_cur_t *cnt_cur;       /* cursor for cnt btree */
1495        int             error;          /* error result */
1496        xfs_agblock_t   fbno;           /* start of found freespace */
1497        xfs_extlen_t    flen;           /* length of found freespace */
1498        int             i;              /* temp status variable */
1499        xfs_agblock_t   rbno;           /* returned block number */
1500        xfs_extlen_t    rlen;           /* length of returned extent */
1501        bool            busy;
1502        unsigned        busy_gen;
1503
1504restart:
1505        /*
1506         * Allocate and initialize a cursor for the by-size btree.
1507         */
1508        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1509                args->agno, XFS_BTNUM_CNT);
1510        bno_cur = NULL;
1511        busy = false;
1512
1513        /*
1514         * Look for an entry >= maxlen+alignment-1 blocks.
1515         */
1516        if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1517                        args->maxlen + args->alignment - 1, &i)))
1518                goto error0;
1519
1520        /*
1521         * If none then we have to settle for a smaller extent. In the case that
1522         * there are no large extents, this will return the last entry in the
1523         * tree unless the tree is empty. In the case that there are only busy
1524         * large extents, this will return the largest small extent unless there
1525         * are no smaller extents available.
1526         */
1527        if (!i) {
1528                error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1529                                                   &fbno, &flen, &i);
1530                if (error)
1531                        goto error0;
1532                if (i == 0 || flen == 0) {
1533                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1534                        trace_xfs_alloc_size_noentry(args);
1535                        return 0;
1536                }
1537                ASSERT(i == 1);
1538                busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1539                                &rlen, &busy_gen);
1540        } else {
1541                /*
1542                 * Search for a non-busy extent that is large enough.
1543                 */
1544                for (;;) {
1545                        error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1546                        if (error)
1547                                goto error0;
1548                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1549
1550                        busy = xfs_alloc_compute_aligned(args, fbno, flen,
1551                                        &rbno, &rlen, &busy_gen);
1552
1553                        if (rlen >= args->maxlen)
1554                                break;
1555
1556                        error = xfs_btree_increment(cnt_cur, 0, &i);
1557                        if (error)
1558                                goto error0;
1559                        if (i == 0) {
1560                                /*
1561                                 * Our only valid extents must have been busy.
1562                                 * Make it unbusy by forcing the log out and
1563                                 * retrying.
1564                                 */
1565                                xfs_btree_del_cursor(cnt_cur,
1566                                                     XFS_BTREE_NOERROR);
1567                                trace_xfs_alloc_size_busy(args);
1568                                xfs_extent_busy_flush(args->mp,
1569                                                        args->pag, busy_gen);
1570                                goto restart;
1571                        }
1572                }
1573        }
1574
1575        /*
1576         * In the first case above, we got the last entry in the
1577         * by-size btree.  Now we check to see if the space hits maxlen
1578         * once aligned; if not, we search left for something better.
1579         * This can't happen in the second case above.
1580         */
1581        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1582        XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1583                        (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1584        if (rlen < args->maxlen) {
1585                xfs_agblock_t   bestfbno;
1586                xfs_extlen_t    bestflen;
1587                xfs_agblock_t   bestrbno;
1588                xfs_extlen_t    bestrlen;
1589
1590                bestrlen = rlen;
1591                bestrbno = rbno;
1592                bestflen = flen;
1593                bestfbno = fbno;
1594                for (;;) {
1595                        if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1596                                goto error0;
1597                        if (i == 0)
1598                                break;
1599                        if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1600                                        &i)))
1601                                goto error0;
1602                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1603                        if (flen < bestrlen)
1604                                break;
1605                        busy = xfs_alloc_compute_aligned(args, fbno, flen,
1606                                        &rbno, &rlen, &busy_gen);
1607                        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1608                        XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1609                                (rlen <= flen && rbno + rlen <= fbno + flen),
1610                                error0);
1611                        if (rlen > bestrlen) {
1612                                bestrlen = rlen;
1613                                bestrbno = rbno;
1614                                bestflen = flen;
1615                                bestfbno = fbno;
1616                                if (rlen == args->maxlen)
1617                                        break;
1618                        }
1619                }
1620                if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1621                                &i)))
1622                        goto error0;
1623                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1624                rlen = bestrlen;
1625                rbno = bestrbno;
1626                flen = bestflen;
1627                fbno = bestfbno;
1628        }
1629        args->wasfromfl = 0;
1630        /*
1631         * Fix up the length.
1632         */
1633        args->len = rlen;
1634        if (rlen < args->minlen) {
1635                if (busy) {
1636                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1637                        trace_xfs_alloc_size_busy(args);
1638                        xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
1639                        goto restart;
1640                }
1641                goto out_nominleft;
1642        }
1643        xfs_alloc_fix_len(args);
1644
1645        rlen = args->len;
1646        XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
1647        /*
1648         * Allocate and initialize a cursor for the by-block tree.
1649         */
1650        bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1651                args->agno, XFS_BTNUM_BNO);
1652        if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1653                        rbno, rlen, XFSA_FIXUP_CNT_OK)))
1654                goto error0;
1655        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1656        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1657        cnt_cur = bno_cur = NULL;
1658        args->len = rlen;
1659        args->agbno = rbno;
1660        XFS_WANT_CORRUPTED_GOTO(args->mp,
1661                args->agbno + args->len <=
1662                        be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1663                error0);
1664        trace_xfs_alloc_size_done(args);
1665        return 0;
1666
1667error0:
1668        trace_xfs_alloc_size_error(args);
1669        if (cnt_cur)
1670                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1671        if (bno_cur)
1672                xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1673        return error;
1674
1675out_nominleft:
1676        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1677        trace_xfs_alloc_size_nominleft(args);
1678        args->agbno = NULLAGBLOCK;
1679        return 0;
1680}
1681
1682/*
1683 * Free the extent starting at agno/bno for length.
1684 */
1685STATIC int
1686xfs_free_ag_extent(
1687        struct xfs_trans                *tp,
1688        struct xfs_buf                  *agbp,
1689        xfs_agnumber_t                  agno,
1690        xfs_agblock_t                   bno,
1691        xfs_extlen_t                    len,
1692        const struct xfs_owner_info     *oinfo,
1693        enum xfs_ag_resv_type           type)
1694{
1695        struct xfs_mount                *mp;
1696        struct xfs_perag                *pag;
1697        struct xfs_btree_cur            *bno_cur;
1698        struct xfs_btree_cur            *cnt_cur;
1699        xfs_agblock_t                   gtbno; /* start of right neighbor */
1700        xfs_extlen_t                    gtlen; /* length of right neighbor */
1701        xfs_agblock_t                   ltbno; /* start of left neighbor */
1702        xfs_extlen_t                    ltlen; /* length of left neighbor */
1703        xfs_agblock_t                   nbno; /* new starting block of freesp */
1704        xfs_extlen_t                    nlen; /* new length of freespace */
1705        int                             haveleft; /* have a left neighbor */
1706        int                             haveright; /* have a right neighbor */
1707        int                             i;
1708        int                             error;
1709
1710        bno_cur = cnt_cur = NULL;
1711        mp = tp->t_mountp;
1712
1713        if (!xfs_rmap_should_skip_owner_update(oinfo)) {
1714                error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
1715                if (error)
1716                        goto error0;
1717        }
1718
1719        /*
1720         * Allocate and initialize a cursor for the by-block btree.
1721         */
1722        bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1723        /*
1724         * Look for a neighboring block on the left (lower block numbers)
1725         * that is contiguous with this space.
1726         */
1727        if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1728                goto error0;
1729        if (haveleft) {
1730                /*
1731                 * There is a block to our left.
1732                 */
1733                if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1734                        goto error0;
1735                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1736                /*
1737                 * It's not contiguous, though.
1738                 */
1739                if (ltbno + ltlen < bno)
1740                        haveleft = 0;
1741                else {
1742                        /*
1743                         * If this failure happens the request to free this
1744                         * space was invalid, it's (partly) already free.
1745                         * Very bad.
1746                         */
1747                        XFS_WANT_CORRUPTED_GOTO(mp,
1748                                                ltbno + ltlen <= bno, error0);
1749                }
1750        }
1751        /*
1752         * Look for a neighboring block on the right (higher block numbers)
1753         * that is contiguous with this space.
1754         */
1755        if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1756                goto error0;
1757        if (haveright) {
1758                /*
1759                 * There is a block to our right.
1760                 */
1761                if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1762                        goto error0;
1763                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1764                /*
1765                 * It's not contiguous, though.
1766                 */
1767                if (bno + len < gtbno)
1768                        haveright = 0;
1769                else {
1770                        /*
1771                         * If this failure happens the request to free this
1772                         * space was invalid, it's (partly) already free.
1773                         * Very bad.
1774                         */
1775                        XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0);
1776                }
1777        }
1778        /*
1779         * Now allocate and initialize a cursor for the by-size tree.
1780         */
1781        cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1782        /*
1783         * Have both left and right contiguous neighbors.
1784         * Merge all three into a single free block.
1785         */
1786        if (haveleft && haveright) {
1787                /*
1788                 * Delete the old by-size entry on the left.
1789                 */
1790                if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1791                        goto error0;
1792                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1793                if ((error = xfs_btree_delete(cnt_cur, &i)))
1794                        goto error0;
1795                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1796                /*
1797                 * Delete the old by-size entry on the right.
1798                 */
1799                if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1800                        goto error0;
1801                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1802                if ((error = xfs_btree_delete(cnt_cur, &i)))
1803                        goto error0;
1804                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1805                /*
1806                 * Delete the old by-block entry for the right block.
1807                 */
1808                if ((error = xfs_btree_delete(bno_cur, &i)))
1809                        goto error0;
1810                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1811                /*
1812                 * Move the by-block cursor back to the left neighbor.
1813                 */
1814                if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1815                        goto error0;
1816                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1817#ifdef DEBUG
1818                /*
1819                 * Check that this is the right record: delete didn't
1820                 * mangle the cursor.
1821                 */
1822                {
1823                        xfs_agblock_t   xxbno;
1824                        xfs_extlen_t    xxlen;
1825
1826                        if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1827                                        &i)))
1828                                goto error0;
1829                        XFS_WANT_CORRUPTED_GOTO(mp,
1830                                i == 1 && xxbno == ltbno && xxlen == ltlen,
1831                                error0);
1832                }
1833#endif
1834                /*
1835                 * Update remaining by-block entry to the new, joined block.
1836                 */
1837                nbno = ltbno;
1838                nlen = len + ltlen + gtlen;
1839                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1840                        goto error0;
1841        }
1842        /*
1843         * Have only a left contiguous neighbor.
1844         * Merge it together with the new freespace.
1845         */
1846        else if (haveleft) {
1847                /*
1848                 * Delete the old by-size entry on the left.
1849                 */
1850                if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1851                        goto error0;
1852                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1853                if ((error = xfs_btree_delete(cnt_cur, &i)))
1854                        goto error0;
1855                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1856                /*
1857                 * Back up the by-block cursor to the left neighbor, and
1858                 * update its length.
1859                 */
1860                if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1861                        goto error0;
1862                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1863                nbno = ltbno;
1864                nlen = len + ltlen;
1865                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1866                        goto error0;
1867        }
1868        /*
1869         * Have only a right contiguous neighbor.
1870         * Merge it together with the new freespace.
1871         */
1872        else if (haveright) {
1873                /*
1874                 * Delete the old by-size entry on the right.
1875                 */
1876                if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1877                        goto error0;
1878                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1879                if ((error = xfs_btree_delete(cnt_cur, &i)))
1880                        goto error0;
1881                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1882                /*
1883                 * Update the starting block and length of the right
1884                 * neighbor in the by-block tree.
1885                 */
1886                nbno = bno;
1887                nlen = len + gtlen;
1888                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1889                        goto error0;
1890        }
1891        /*
1892         * No contiguous neighbors.
1893         * Insert the new freespace into the by-block tree.
1894         */
1895        else {
1896                nbno = bno;
1897                nlen = len;
1898                if ((error = xfs_btree_insert(bno_cur, &i)))
1899                        goto error0;
1900                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1901        }
1902        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1903        bno_cur = NULL;
1904        /*
1905         * In all cases we need to insert the new freespace in the by-size tree.
1906         */
1907        if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1908                goto error0;
1909        XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0);
1910        if ((error = xfs_btree_insert(cnt_cur, &i)))
1911                goto error0;
1912        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1913        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1914        cnt_cur = NULL;
1915
1916        /*
1917         * Update the freespace totals in the ag and superblock.
1918         */
1919        pag = xfs_perag_get(mp, agno);
1920        error = xfs_alloc_update_counters(tp, pag, agbp, len);
1921        xfs_ag_resv_free_extent(pag, type, tp, len);
1922        xfs_perag_put(pag);
1923        if (error)
1924                goto error0;
1925
1926        XFS_STATS_INC(mp, xs_freex);
1927        XFS_STATS_ADD(mp, xs_freeb, len);
1928
1929        trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
1930
1931        return 0;
1932
1933 error0:
1934        trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
1935        if (bno_cur)
1936                xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1937        if (cnt_cur)
1938                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1939        return error;
1940}
1941
1942/*
1943 * Visible (exported) allocation/free functions.
1944 * Some of these are used just by xfs_alloc_btree.c and this file.
1945 */
1946
1947/*
1948 * Compute and fill in value of m_ag_maxlevels.
1949 */
1950void
1951xfs_alloc_compute_maxlevels(
1952        xfs_mount_t     *mp)    /* file system mount structure */
1953{
1954        mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
1955                        (mp->m_sb.sb_agblocks + 1) / 2);
1956}
1957
1958/*
1959 * Find the length of the longest extent in an AG.  The 'need' parameter
1960 * specifies how much space we're going to need for the AGFL and the
1961 * 'reserved' parameter tells us how many blocks in this AG are reserved for
1962 * other callers.
1963 */
1964xfs_extlen_t
1965xfs_alloc_longest_free_extent(
1966        struct xfs_perag        *pag,
1967        xfs_extlen_t            need,
1968        xfs_extlen_t            reserved)
1969{
1970        xfs_extlen_t            delta = 0;
1971
1972        /*
1973         * If the AGFL needs a recharge, we'll have to subtract that from the
1974         * longest extent.
1975         */
1976        if (need > pag->pagf_flcount)
1977                delta = need - pag->pagf_flcount;
1978
1979        /*
1980         * If we cannot maintain others' reservations with space from the
1981         * not-longest freesp extents, we'll have to subtract /that/ from
1982         * the longest extent too.
1983         */
1984        if (pag->pagf_freeblks - pag->pagf_longest < reserved)
1985                delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
1986
1987        /*
1988         * If the longest extent is long enough to satisfy all the
1989         * reservations and AGFL rules in place, we can return this extent.
1990         */
1991        if (pag->pagf_longest > delta)
1992                return pag->pagf_longest - delta;
1993
1994        /* Otherwise, let the caller try for 1 block if there's space. */
1995        return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1996}
1997
1998unsigned int
1999xfs_alloc_min_freelist(
2000        struct xfs_mount        *mp,
2001        struct xfs_perag        *pag)
2002{
2003        unsigned int            min_free;
2004
2005        /* space needed by-bno freespace btree */
2006        min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
2007                                       mp->m_ag_maxlevels);
2008        /* space needed by-size freespace btree */
2009        min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
2010                                       mp->m_ag_maxlevels);
2011        /* space needed reverse mapping used space btree */
2012        if (xfs_sb_version_hasrmapbt(&mp->m_sb))
2013                min_free += min_t(unsigned int,
2014                                  pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
2015                                  mp->m_rmap_maxlevels);
2016
2017        return min_free;
2018}
2019
2020/*
2021 * Check if the operation we are fixing up the freelist for should go ahead or
2022 * not. If we are freeing blocks, we always allow it, otherwise the allocation
2023 * is dependent on whether the size and shape of free space available will
2024 * permit the requested allocation to take place.
2025 */
2026static bool
2027xfs_alloc_space_available(
2028        struct xfs_alloc_arg    *args,
2029        xfs_extlen_t            min_free,
2030        int                     flags)
2031{
2032        struct xfs_perag        *pag = args->pag;
2033        xfs_extlen_t            alloc_len, longest;
2034        xfs_extlen_t            reservation; /* blocks that are still reserved */
2035        int                     available;
2036        xfs_extlen_t            agflcount;
2037
2038        if (flags & XFS_ALLOC_FLAG_FREEING)
2039                return true;
2040
2041        reservation = xfs_ag_resv_needed(pag, args->resv);
2042
2043        /* do we have enough contiguous free space for the allocation? */
2044        alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
2045        longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
2046        if (longest < alloc_len)
2047                return false;
2048
2049        /*
2050         * Do we have enough free space remaining for the allocation? Don't
2051         * account extra agfl blocks because we are about to defer free them,
2052         * making them unavailable until the current transaction commits.
2053         */
2054        agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free);
2055        available = (int)(pag->pagf_freeblks + agflcount -
2056                          reservation - min_free - args->minleft);
2057        if (available < (int)max(args->total, alloc_len))
2058                return false;
2059
2060        /*
2061         * Clamp maxlen to the amount of free space available for the actual
2062         * extent allocation.
2063         */
2064        if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
2065                args->maxlen = available;
2066                ASSERT(args->maxlen > 0);
2067                ASSERT(args->maxlen >= args->minlen);
2068        }
2069
2070        return true;
2071}
2072
2073int
2074xfs_free_agfl_block(
2075        struct xfs_trans        *tp,
2076        xfs_agnumber_t          agno,
2077        xfs_agblock_t           agbno,
2078        struct xfs_buf          *agbp,
2079        struct xfs_owner_info   *oinfo)
2080{
2081        int                     error;
2082        struct xfs_buf          *bp;
2083
2084        error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
2085                                   XFS_AG_RESV_AGFL);
2086        if (error)
2087                return error;
2088
2089        bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno);
2090        if (!bp)
2091                return -EFSCORRUPTED;
2092        xfs_trans_binval(tp, bp);
2093
2094        return 0;
2095}
2096
2097/*
2098 * Check the agfl fields of the agf for inconsistency or corruption. The purpose
2099 * is to detect an agfl header padding mismatch between current and early v5
2100 * kernels. This problem manifests as a 1-slot size difference between the
2101 * on-disk flcount and the active [first, last] range of a wrapped agfl. This
2102 * may also catch variants of agfl count corruption unrelated to padding. Either
2103 * way, we'll reset the agfl and warn the user.
2104 *
2105 * Return true if a reset is required before the agfl can be used, false
2106 * otherwise.
2107 */
2108static bool
2109xfs_agfl_needs_reset(
2110        struct xfs_mount        *mp,
2111        struct xfs_agf          *agf)
2112{
2113        uint32_t                f = be32_to_cpu(agf->agf_flfirst);
2114        uint32_t                l = be32_to_cpu(agf->agf_fllast);
2115        uint32_t                c = be32_to_cpu(agf->agf_flcount);
2116        int                     agfl_size = xfs_agfl_size(mp);
2117        int                     active;
2118
2119        /* no agfl header on v4 supers */
2120        if (!xfs_sb_version_hascrc(&mp->m_sb))
2121                return false;
2122
2123        /*
2124         * The agf read verifier catches severe corruption of these fields.
2125         * Repeat some sanity checks to cover a packed -> unpacked mismatch if
2126         * the verifier allows it.
2127         */
2128        if (f >= agfl_size || l >= agfl_size)
2129                return true;
2130        if (c > agfl_size)
2131                return true;
2132
2133        /*
2134         * Check consistency between the on-disk count and the active range. An
2135         * agfl padding mismatch manifests as an inconsistent flcount.
2136         */
2137        if (c && l >= f)
2138                active = l - f + 1;
2139        else if (c)
2140                active = agfl_size - f + l + 1;
2141        else
2142                active = 0;
2143
2144        return active != c;
2145}
2146
2147/*
2148 * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
2149 * agfl content cannot be trusted. Warn the user that a repair is required to
2150 * recover leaked blocks.
2151 *
2152 * The purpose of this mechanism is to handle filesystems affected by the agfl
2153 * header padding mismatch problem. A reset keeps the filesystem online with a
2154 * relatively minor free space accounting inconsistency rather than suffer the
2155 * inevitable crash from use of an invalid agfl block.
2156 */
2157static void
2158xfs_agfl_reset(
2159        struct xfs_trans        *tp,
2160        struct xfs_buf          *agbp,
2161        struct xfs_perag        *pag)
2162{
2163        struct xfs_mount        *mp = tp->t_mountp;
2164        struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
2165
2166        ASSERT(pag->pagf_agflreset);
2167        trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
2168
2169        xfs_warn(mp,
2170               "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
2171               "Please unmount and run xfs_repair.",
2172                 pag->pag_agno, pag->pagf_flcount);
2173
2174        agf->agf_flfirst = 0;
2175        agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
2176        agf->agf_flcount = 0;
2177        xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
2178                                    XFS_AGF_FLCOUNT);
2179
2180        pag->pagf_flcount = 0;
2181        pag->pagf_agflreset = false;
2182}
2183
2184/*
2185 * Defer an AGFL block free. This is effectively equivalent to
2186 * xfs_bmap_add_free() with some special handling particular to AGFL blocks.
2187 *
2188 * Deferring AGFL frees helps prevent log reservation overruns due to too many
2189 * allocation operations in a transaction. AGFL frees are prone to this problem
2190 * because for one they are always freed one at a time. Further, an immediate
2191 * AGFL block free can cause a btree join and require another block free before
2192 * the real allocation can proceed. Deferring the free disconnects freeing up
2193 * the AGFL slot from freeing the block.
2194 */
2195STATIC void
2196xfs_defer_agfl_block(
2197        struct xfs_trans                *tp,
2198        xfs_agnumber_t                  agno,
2199        xfs_fsblock_t                   agbno,
2200        struct xfs_owner_info           *oinfo)
2201{
2202        struct xfs_mount                *mp = tp->t_mountp;
2203        struct xfs_extent_free_item     *new;           /* new element */
2204
2205        ASSERT(xfs_bmap_free_item_zone != NULL);
2206        ASSERT(oinfo != NULL);
2207
2208        new = kmem_zone_alloc(xfs_bmap_free_item_zone, 0);
2209        new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
2210        new->xefi_blockcount = 1;
2211        new->xefi_oinfo = *oinfo;
2212
2213        trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
2214
2215        xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
2216}
2217
2218/*
2219 * Decide whether to use this allocation group for this allocation.
2220 * If so, fix up the btree freelist's size.
2221 */
2222int                     /* error */
2223xfs_alloc_fix_freelist(
2224        struct xfs_alloc_arg    *args,  /* allocation argument structure */
2225        int                     flags)  /* XFS_ALLOC_FLAG_... */
2226{
2227        struct xfs_mount        *mp = args->mp;
2228        struct xfs_perag        *pag = args->pag;
2229        struct xfs_trans        *tp = args->tp;
2230        struct xfs_buf          *agbp = NULL;
2231        struct xfs_buf          *agflbp = NULL;
2232        struct xfs_alloc_arg    targs;  /* local allocation arguments */
2233        xfs_agblock_t           bno;    /* freelist block */
2234        xfs_extlen_t            need;   /* total blocks needed in freelist */
2235        int                     error = 0;
2236
2237        /* deferred ops (AGFL block frees) require permanent transactions */
2238        ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
2239
2240        if (!pag->pagf_init) {
2241                error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2242                if (error)
2243                        goto out_no_agbp;
2244                if (!pag->pagf_init) {
2245                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2246                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2247                        goto out_agbp_relse;
2248                }
2249        }
2250
2251        /*
2252         * If this is a metadata preferred pag and we are user data then try
2253         * somewhere else if we are not being asked to try harder at this
2254         * point
2255         */
2256        if (pag->pagf_metadata && xfs_alloc_is_userdata(args->datatype) &&
2257            (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2258                ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2259                goto out_agbp_relse;
2260        }
2261
2262        need = xfs_alloc_min_freelist(mp, pag);
2263        if (!xfs_alloc_space_available(args, need, flags |
2264                        XFS_ALLOC_FLAG_CHECK))
2265                goto out_agbp_relse;
2266
2267        /*
2268         * Get the a.g. freespace buffer.
2269         * Can fail if we're not blocking on locks, and it's held.
2270         */
2271        if (!agbp) {
2272                error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2273                if (error)
2274                        goto out_no_agbp;
2275                if (!agbp) {
2276                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2277                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2278                        goto out_no_agbp;
2279                }
2280        }
2281
2282        /* reset a padding mismatched agfl before final free space check */
2283        if (pag->pagf_agflreset)
2284                xfs_agfl_reset(tp, agbp, pag);
2285
2286        /* If there isn't enough total space or single-extent, reject it. */
2287        need = xfs_alloc_min_freelist(mp, pag);
2288        if (!xfs_alloc_space_available(args, need, flags))
2289                goto out_agbp_relse;
2290
2291        /*
2292         * Make the freelist shorter if it's too long.
2293         *
2294         * Note that from this point onwards, we will always release the agf and
2295         * agfl buffers on error. This handles the case where we error out and
2296         * the buffers are clean or may not have been joined to the transaction
2297         * and hence need to be released manually. If they have been joined to
2298         * the transaction, then xfs_trans_brelse() will handle them
2299         * appropriately based on the recursion count and dirty state of the
2300         * buffer.
2301         *
2302         * XXX (dgc): When we have lots of free space, does this buy us
2303         * anything other than extra overhead when we need to put more blocks
2304         * back on the free list? Maybe we should only do this when space is
2305         * getting low or the AGFL is more than half full?
2306         *
2307         * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2308         * big; the NORMAP flag prevents AGFL expand/shrink operations from
2309         * updating the rmapbt.  Both flags are used in xfs_repair while we're
2310         * rebuilding the rmapbt, and neither are used by the kernel.  They're
2311         * both required to ensure that rmaps are correctly recorded for the
2312         * regenerated AGFL, bnobt, and cntbt.  See repair/phase5.c and
2313         * repair/rmap.c in xfsprogs for details.
2314         */
2315        memset(&targs, 0, sizeof(targs));
2316        /* struct copy below */
2317        if (flags & XFS_ALLOC_FLAG_NORMAP)
2318                targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
2319        else
2320                targs.oinfo = XFS_RMAP_OINFO_AG;
2321        while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
2322                error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2323                if (error)
2324                        goto out_agbp_relse;
2325
2326                /* defer agfl frees */
2327                xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
2328        }
2329
2330        targs.tp = tp;
2331        targs.mp = mp;
2332        targs.agbp = agbp;
2333        targs.agno = args->agno;
2334        targs.alignment = targs.minlen = targs.prod = 1;
2335        targs.type = XFS_ALLOCTYPE_THIS_AG;
2336        targs.pag = pag;
2337        error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
2338        if (error)
2339                goto out_agbp_relse;
2340
2341        /* Make the freelist longer if it's too short. */
2342        while (pag->pagf_flcount < need) {
2343                targs.agbno = 0;
2344                targs.maxlen = need - pag->pagf_flcount;
2345                targs.resv = XFS_AG_RESV_AGFL;
2346
2347                /* Allocate as many blocks as possible at once. */
2348                error = xfs_alloc_ag_vextent(&targs);
2349                if (error)
2350                        goto out_agflbp_relse;
2351
2352                /*
2353                 * Stop if we run out.  Won't happen if callers are obeying
2354                 * the restrictions correctly.  Can happen for free calls
2355                 * on a completely full ag.
2356                 */
2357                if (targs.agbno == NULLAGBLOCK) {
2358                        if (flags & XFS_ALLOC_FLAG_FREEING)
2359                                break;
2360                        goto out_agflbp_relse;
2361                }
2362                /*
2363                 * Put each allocated block on the list.
2364                 */
2365                for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
2366                        error = xfs_alloc_put_freelist(tp, agbp,
2367                                                        agflbp, bno, 0);
2368                        if (error)
2369                                goto out_agflbp_relse;
2370                }
2371        }
2372        xfs_trans_brelse(tp, agflbp);
2373        args->agbp = agbp;
2374        return 0;
2375
2376out_agflbp_relse:
2377        xfs_trans_brelse(tp, agflbp);
2378out_agbp_relse:
2379        if (agbp)
2380                xfs_trans_brelse(tp, agbp);
2381out_no_agbp:
2382        args->agbp = NULL;
2383        return error;
2384}
2385
2386/*
2387 * Get a block from the freelist.
2388 * Returns with the buffer for the block gotten.
2389 */
2390int                             /* error */
2391xfs_alloc_get_freelist(
2392        xfs_trans_t     *tp,    /* transaction pointer */
2393        xfs_buf_t       *agbp,  /* buffer containing the agf structure */
2394        xfs_agblock_t   *bnop,  /* block address retrieved from freelist */
2395        int             btreeblk) /* destination is a AGF btree */
2396{
2397        xfs_agf_t       *agf;   /* a.g. freespace structure */
2398        xfs_buf_t       *agflbp;/* buffer for a.g. freelist structure */
2399        xfs_agblock_t   bno;    /* block number returned */
2400        __be32          *agfl_bno;
2401        int             error;
2402        int             logflags;
2403        xfs_mount_t     *mp = tp->t_mountp;
2404        xfs_perag_t     *pag;   /* per allocation group data */
2405
2406        /*
2407         * Freelist is empty, give up.
2408         */
2409        agf = XFS_BUF_TO_AGF(agbp);
2410        if (!agf->agf_flcount) {
2411                *bnop = NULLAGBLOCK;
2412                return 0;
2413        }
2414        /*
2415         * Read the array of free blocks.
2416         */
2417        error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2418                                    &agflbp);
2419        if (error)
2420                return error;
2421
2422
2423        /*
2424         * Get the block number and update the data structures.
2425         */
2426        agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2427        bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2428        be32_add_cpu(&agf->agf_flfirst, 1);
2429        xfs_trans_brelse(tp, agflbp);
2430        if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
2431                agf->agf_flfirst = 0;
2432
2433        pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2434        ASSERT(!pag->pagf_agflreset);
2435        be32_add_cpu(&agf->agf_flcount, -1);
2436        xfs_trans_agflist_delta(tp, -1);
2437        pag->pagf_flcount--;
2438
2439        logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2440        if (btreeblk) {
2441                be32_add_cpu(&agf->agf_btreeblks, 1);
2442                pag->pagf_btreeblks++;
2443                logflags |= XFS_AGF_BTREEBLKS;
2444        }
2445        xfs_perag_put(pag);
2446
2447        xfs_alloc_log_agf(tp, agbp, logflags);
2448        *bnop = bno;
2449
2450        return 0;
2451}
2452
2453/*
2454 * Log the given fields from the agf structure.
2455 */
2456void
2457xfs_alloc_log_agf(
2458        xfs_trans_t     *tp,    /* transaction pointer */
2459        xfs_buf_t       *bp,    /* buffer for a.g. freelist header */
2460        int             fields) /* mask of fields to be logged (XFS_AGF_...) */
2461{
2462        int     first;          /* first byte offset */
2463        int     last;           /* last byte offset */
2464        static const short      offsets[] = {
2465                offsetof(xfs_agf_t, agf_magicnum),
2466                offsetof(xfs_agf_t, agf_versionnum),
2467                offsetof(xfs_agf_t, agf_seqno),
2468                offsetof(xfs_agf_t, agf_length),
2469                offsetof(xfs_agf_t, agf_roots[0]),
2470                offsetof(xfs_agf_t, agf_levels[0]),
2471                offsetof(xfs_agf_t, agf_flfirst),
2472                offsetof(xfs_agf_t, agf_fllast),
2473                offsetof(xfs_agf_t, agf_flcount),
2474                offsetof(xfs_agf_t, agf_freeblks),
2475                offsetof(xfs_agf_t, agf_longest),
2476                offsetof(xfs_agf_t, agf_btreeblks),
2477                offsetof(xfs_agf_t, agf_uuid),
2478                offsetof(xfs_agf_t, agf_rmap_blocks),
2479                offsetof(xfs_agf_t, agf_refcount_blocks),
2480                offsetof(xfs_agf_t, agf_refcount_root),
2481                offsetof(xfs_agf_t, agf_refcount_level),
2482                /* needed so that we don't log the whole rest of the structure: */
2483                offsetof(xfs_agf_t, agf_spare64),
2484                sizeof(xfs_agf_t)
2485        };
2486
2487        trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2488
2489        xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
2490
2491        xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2492        xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2493}
2494
2495/*
2496 * Interface for inode allocation to force the pag data to be initialized.
2497 */
2498int                                     /* error */
2499xfs_alloc_pagf_init(
2500        xfs_mount_t             *mp,    /* file system mount structure */
2501        xfs_trans_t             *tp,    /* transaction pointer */
2502        xfs_agnumber_t          agno,   /* allocation group number */
2503        int                     flags)  /* XFS_ALLOC_FLAGS_... */
2504{
2505        xfs_buf_t               *bp;
2506        int                     error;
2507
2508        if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2509                return error;
2510        if (bp)
2511                xfs_trans_brelse(tp, bp);
2512        return 0;
2513}
2514
2515/*
2516 * Put the block on the freelist for the allocation group.
2517 */
2518int                                     /* error */
2519xfs_alloc_put_freelist(
2520        xfs_trans_t             *tp,    /* transaction pointer */
2521        xfs_buf_t               *agbp,  /* buffer for a.g. freelist header */
2522        xfs_buf_t               *agflbp,/* buffer for a.g. free block array */
2523        xfs_agblock_t           bno,    /* block being freed */
2524        int                     btreeblk) /* block came from a AGF btree */
2525{
2526        xfs_agf_t               *agf;   /* a.g. freespace structure */
2527        __be32                  *blockp;/* pointer to array entry */
2528        int                     error;
2529        int                     logflags;
2530        xfs_mount_t             *mp;    /* mount structure */
2531        xfs_perag_t             *pag;   /* per allocation group data */
2532        __be32                  *agfl_bno;
2533        int                     startoff;
2534
2535        agf = XFS_BUF_TO_AGF(agbp);
2536        mp = tp->t_mountp;
2537
2538        if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
2539                        be32_to_cpu(agf->agf_seqno), &agflbp)))
2540                return error;
2541        be32_add_cpu(&agf->agf_fllast, 1);
2542        if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
2543                agf->agf_fllast = 0;
2544
2545        pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2546        ASSERT(!pag->pagf_agflreset);
2547        be32_add_cpu(&agf->agf_flcount, 1);
2548        xfs_trans_agflist_delta(tp, 1);
2549        pag->pagf_flcount++;
2550
2551        logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2552        if (btreeblk) {
2553                be32_add_cpu(&agf->agf_btreeblks, -1);
2554                pag->pagf_btreeblks--;
2555                logflags |= XFS_AGF_BTREEBLKS;
2556        }
2557        xfs_perag_put(pag);
2558
2559        xfs_alloc_log_agf(tp, agbp, logflags);
2560
2561        ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
2562
2563        agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2564        blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
2565        *blockp = cpu_to_be32(bno);
2566        startoff = (char *)blockp - (char *)agflbp->b_addr;
2567
2568        xfs_alloc_log_agf(tp, agbp, logflags);
2569
2570        xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
2571        xfs_trans_log_buf(tp, agflbp, startoff,
2572                          startoff + sizeof(xfs_agblock_t) - 1);
2573        return 0;
2574}
2575
2576static xfs_failaddr_t
2577xfs_agf_verify(
2578        struct xfs_buf          *bp)
2579{
2580        struct xfs_mount        *mp = bp->b_mount;
2581        struct xfs_agf          *agf = XFS_BUF_TO_AGF(bp);
2582
2583        if (xfs_sb_version_hascrc(&mp->m_sb)) {
2584                if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
2585                        return __this_address;
2586                if (!xfs_log_check_lsn(mp,
2587                                be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
2588                        return __this_address;
2589        }
2590
2591        if (!xfs_verify_magic(bp, agf->agf_magicnum))
2592                return __this_address;
2593
2594        if (!(XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2595              be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2596              be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) &&
2597              be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) &&
2598              be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)))
2599                return __this_address;
2600
2601        if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
2602            be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
2603            be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
2604            be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
2605                return __this_address;
2606
2607        if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
2608            (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
2609             be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
2610                return __this_address;
2611
2612        /*
2613         * during growfs operations, the perag is not fully initialised,
2614         * so we can't use it for any useful checking. growfs ensures we can't
2615         * use it by using uncached buffers that don't have the perag attached
2616         * so we can detect and avoid this problem.
2617         */
2618        if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
2619                return __this_address;
2620
2621        if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2622            be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
2623                return __this_address;
2624
2625        if (xfs_sb_version_hasreflink(&mp->m_sb) &&
2626            (be32_to_cpu(agf->agf_refcount_level) < 1 ||
2627             be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
2628                return __this_address;
2629
2630        return NULL;
2631
2632}
2633
2634static void
2635xfs_agf_read_verify(
2636        struct xfs_buf  *bp)
2637{
2638        struct xfs_mount *mp = bp->b_mount;
2639        xfs_failaddr_t  fa;
2640
2641        if (xfs_sb_version_hascrc(&mp->m_sb) &&
2642            !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
2643                xfs_verifier_error(bp, -EFSBADCRC, __this_address);
2644        else {
2645                fa = xfs_agf_verify(bp);
2646                if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
2647                        xfs_verifier_error(bp, -EFSCORRUPTED, fa);
2648        }
2649}
2650
2651static void
2652xfs_agf_write_verify(
2653        struct xfs_buf  *bp)
2654{
2655        struct xfs_mount        *mp = bp->b_mount;
2656        struct xfs_buf_log_item *bip = bp->b_log_item;
2657        xfs_failaddr_t          fa;
2658
2659        fa = xfs_agf_verify(bp);
2660        if (fa) {
2661                xfs_verifier_error(bp, -EFSCORRUPTED, fa);
2662                return;
2663        }
2664
2665        if (!xfs_sb_version_hascrc(&mp->m_sb))
2666                return;
2667
2668        if (bip)
2669                XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2670
2671        xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
2672}
2673
2674const struct xfs_buf_ops xfs_agf_buf_ops = {
2675        .name = "xfs_agf",
2676        .magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) },
2677        .verify_read = xfs_agf_read_verify,
2678        .verify_write = xfs_agf_write_verify,
2679        .verify_struct = xfs_agf_verify,
2680};
2681
2682/*
2683 * Read in the allocation group header (free/alloc section).
2684 */
2685int                                     /* error */
2686xfs_read_agf(
2687        struct xfs_mount        *mp,    /* mount point structure */
2688        struct xfs_trans        *tp,    /* transaction pointer */
2689        xfs_agnumber_t          agno,   /* allocation group number */
2690        int                     flags,  /* XFS_BUF_ */
2691        struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
2692{
2693        int             error;
2694
2695        trace_xfs_read_agf(mp, agno);
2696
2697        ASSERT(agno != NULLAGNUMBER);
2698        error = xfs_trans_read_buf(
2699                        mp, tp, mp->m_ddev_targp,
2700                        XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2701                        XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
2702        if (error)
2703                return error;
2704        if (!*bpp)
2705                return 0;
2706
2707        ASSERT(!(*bpp)->b_error);
2708        xfs_buf_set_ref(*bpp, XFS_AGF_REF);
2709        return 0;
2710}
2711
2712/*
2713 * Read in the allocation group header (free/alloc section).
2714 */
2715int                                     /* error */
2716xfs_alloc_read_agf(
2717        struct xfs_mount        *mp,    /* mount point structure */
2718        struct xfs_trans        *tp,    /* transaction pointer */
2719        xfs_agnumber_t          agno,   /* allocation group number */
2720        int                     flags,  /* XFS_ALLOC_FLAG_... */
2721        struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
2722{
2723        struct xfs_agf          *agf;           /* ag freelist header */
2724        struct xfs_perag        *pag;           /* per allocation group data */
2725        int                     error;
2726
2727        trace_xfs_alloc_read_agf(mp, agno);
2728
2729        ASSERT(agno != NULLAGNUMBER);
2730        error = xfs_read_agf(mp, tp, agno,
2731                        (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2732                        bpp);
2733        if (error)
2734                return error;
2735        if (!*bpp)
2736                return 0;
2737        ASSERT(!(*bpp)->b_error);
2738
2739        agf = XFS_BUF_TO_AGF(*bpp);
2740        pag = xfs_perag_get(mp, agno);
2741        if (!pag->pagf_init) {
2742                pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
2743                pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
2744                pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2745                pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2746                pag->pagf_levels[XFS_BTNUM_BNOi] =
2747                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2748                pag->pagf_levels[XFS_BTNUM_CNTi] =
2749                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
2750                pag->pagf_levels[XFS_BTNUM_RMAPi] =
2751                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
2752                pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
2753                pag->pagf_init = 1;
2754                pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf);
2755        }
2756#ifdef DEBUG
2757        else if (!XFS_FORCED_SHUTDOWN(mp)) {
2758                ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
2759                ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
2760                ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2761                ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2762                ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
2763                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2764                ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
2765                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2766        }
2767#endif
2768        xfs_perag_put(pag);
2769        return 0;
2770}
2771
2772/*
2773 * Allocate an extent (variable-size).
2774 * Depending on the allocation type, we either look in a single allocation
2775 * group or loop over the allocation groups to find the result.
2776 */
2777int                             /* error */
2778xfs_alloc_vextent(
2779        struct xfs_alloc_arg    *args)  /* allocation argument structure */
2780{
2781        xfs_agblock_t           agsize; /* allocation group size */
2782        int                     error;
2783        int                     flags;  /* XFS_ALLOC_FLAG_... locking flags */
2784        struct xfs_mount        *mp;    /* mount structure pointer */
2785        xfs_agnumber_t          sagno;  /* starting allocation group number */
2786        xfs_alloctype_t         type;   /* input allocation type */
2787        int                     bump_rotor = 0;
2788        xfs_agnumber_t          rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2789
2790        mp = args->mp;
2791        type = args->otype = args->type;
2792        args->agbno = NULLAGBLOCK;
2793        /*
2794         * Just fix this up, for the case where the last a.g. is shorter
2795         * (or there's only one a.g.) and the caller couldn't easily figure
2796         * that out (xfs_bmap_alloc).
2797         */
2798        agsize = mp->m_sb.sb_agblocks;
2799        if (args->maxlen > agsize)
2800                args->maxlen = agsize;
2801        if (args->alignment == 0)
2802                args->alignment = 1;
2803        ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2804        ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2805        ASSERT(args->minlen <= args->maxlen);
2806        ASSERT(args->minlen <= agsize);
2807        ASSERT(args->mod < args->prod);
2808        if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2809            XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2810            args->minlen > args->maxlen || args->minlen > agsize ||
2811            args->mod >= args->prod) {
2812                args->fsbno = NULLFSBLOCK;
2813                trace_xfs_alloc_vextent_badargs(args);
2814                return 0;
2815        }
2816
2817        switch (type) {
2818        case XFS_ALLOCTYPE_THIS_AG:
2819        case XFS_ALLOCTYPE_NEAR_BNO:
2820        case XFS_ALLOCTYPE_THIS_BNO:
2821                /*
2822                 * These three force us into a single a.g.
2823                 */
2824                args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2825                args->pag = xfs_perag_get(mp, args->agno);
2826                error = xfs_alloc_fix_freelist(args, 0);
2827                if (error) {
2828                        trace_xfs_alloc_vextent_nofix(args);
2829                        goto error0;
2830                }
2831                if (!args->agbp) {
2832                        trace_xfs_alloc_vextent_noagbp(args);
2833                        break;
2834                }
2835                args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2836                if ((error = xfs_alloc_ag_vextent(args)))
2837                        goto error0;
2838                break;
2839        case XFS_ALLOCTYPE_START_BNO:
2840                /*
2841                 * Try near allocation first, then anywhere-in-ag after
2842                 * the first a.g. fails.
2843                 */
2844                if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
2845                    (mp->m_flags & XFS_MOUNT_32BITINODES)) {
2846                        args->fsbno = XFS_AGB_TO_FSB(mp,
2847                                        ((mp->m_agfrotor / rotorstep) %
2848                                        mp->m_sb.sb_agcount), 0);
2849                        bump_rotor = 1;
2850                }
2851                args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2852                args->type = XFS_ALLOCTYPE_NEAR_BNO;
2853                /* FALLTHROUGH */
2854        case XFS_ALLOCTYPE_FIRST_AG:
2855                /*
2856                 * Rotate through the allocation groups looking for a winner.
2857                 */
2858                if (type == XFS_ALLOCTYPE_FIRST_AG) {
2859                        /*
2860                         * Start with allocation group given by bno.
2861                         */
2862                        args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2863                        args->type = XFS_ALLOCTYPE_THIS_AG;
2864                        sagno = 0;
2865                        flags = 0;
2866                } else {
2867                        /*
2868                         * Start with the given allocation group.
2869                         */
2870                        args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2871                        flags = XFS_ALLOC_FLAG_TRYLOCK;
2872                }
2873                /*
2874                 * Loop over allocation groups twice; first time with
2875                 * trylock set, second time without.
2876                 */
2877                for (;;) {
2878                        args->pag = xfs_perag_get(mp, args->agno);
2879                        error = xfs_alloc_fix_freelist(args, flags);
2880                        if (error) {
2881                                trace_xfs_alloc_vextent_nofix(args);
2882                                goto error0;
2883                        }
2884                        /*
2885                         * If we get a buffer back then the allocation will fly.
2886                         */
2887                        if (args->agbp) {
2888                                if ((error = xfs_alloc_ag_vextent(args)))
2889                                        goto error0;
2890                                break;
2891                        }
2892
2893                        trace_xfs_alloc_vextent_loopfailed(args);
2894
2895                        /*
2896                         * Didn't work, figure out the next iteration.
2897                         */
2898                        if (args->agno == sagno &&
2899                            type == XFS_ALLOCTYPE_START_BNO)
2900                                args->type = XFS_ALLOCTYPE_THIS_AG;
2901                        /*
2902                        * For the first allocation, we can try any AG to get
2903                        * space.  However, if we already have allocated a
2904                        * block, we don't want to try AGs whose number is below
2905                        * sagno. Otherwise, we may end up with out-of-order
2906                        * locking of AGF, which might cause deadlock.
2907                        */
2908                        if (++(args->agno) == mp->m_sb.sb_agcount) {
2909                                if (args->tp->t_firstblock != NULLFSBLOCK)
2910                                        args->agno = sagno;
2911                                else
2912                                        args->agno = 0;
2913                        }
2914                        /*
2915                         * Reached the starting a.g., must either be done
2916                         * or switch to non-trylock mode.
2917                         */
2918                        if (args->agno == sagno) {
2919                                if (flags == 0) {
2920                                        args->agbno = NULLAGBLOCK;
2921                                        trace_xfs_alloc_vextent_allfailed(args);
2922                                        break;
2923                                }
2924
2925                                flags = 0;
2926                                if (type == XFS_ALLOCTYPE_START_BNO) {
2927                                        args->agbno = XFS_FSB_TO_AGBNO(mp,
2928                                                args->fsbno);
2929                                        args->type = XFS_ALLOCTYPE_NEAR_BNO;
2930                                }
2931                        }
2932                        xfs_perag_put(args->pag);
2933                }
2934                if (bump_rotor) {
2935                        if (args->agno == sagno)
2936                                mp->m_agfrotor = (mp->m_agfrotor + 1) %
2937                                        (mp->m_sb.sb_agcount * rotorstep);
2938                        else
2939                                mp->m_agfrotor = (args->agno * rotorstep + 1) %
2940                                        (mp->m_sb.sb_agcount * rotorstep);
2941                }
2942                break;
2943        default:
2944                ASSERT(0);
2945                /* NOTREACHED */
2946        }
2947        if (args->agbno == NULLAGBLOCK)
2948                args->fsbno = NULLFSBLOCK;
2949        else {
2950                args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2951#ifdef DEBUG
2952                ASSERT(args->len >= args->minlen);
2953                ASSERT(args->len <= args->maxlen);
2954                ASSERT(args->agbno % args->alignment == 0);
2955                XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2956                        args->len);
2957#endif
2958
2959                /* Zero the extent if we were asked to do so */
2960                if (args->datatype & XFS_ALLOC_USERDATA_ZERO) {
2961                        error = xfs_zero_extent(args->ip, args->fsbno, args->len);
2962                        if (error)
2963                                goto error0;
2964                }
2965
2966        }
2967        xfs_perag_put(args->pag);
2968        return 0;
2969error0:
2970        xfs_perag_put(args->pag);
2971        return error;
2972}
2973
2974/* Ensure that the freelist is at full capacity. */
2975int
2976xfs_free_extent_fix_freelist(
2977        struct xfs_trans        *tp,
2978        xfs_agnumber_t          agno,
2979        struct xfs_buf          **agbp)
2980{
2981        struct xfs_alloc_arg    args;
2982        int                     error;
2983
2984        memset(&args, 0, sizeof(struct xfs_alloc_arg));
2985        args.tp = tp;
2986        args.mp = tp->t_mountp;
2987        args.agno = agno;
2988
2989        /*
2990         * validate that the block number is legal - the enables us to detect
2991         * and handle a silent filesystem corruption rather than crashing.
2992         */
2993        if (args.agno >= args.mp->m_sb.sb_agcount)
2994                return -EFSCORRUPTED;
2995
2996        args.pag = xfs_perag_get(args.mp, args.agno);
2997        ASSERT(args.pag);
2998
2999        error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
3000        if (error)
3001                goto out;
3002
3003        *agbp = args.agbp;
3004out:
3005        xfs_perag_put(args.pag);
3006        return error;
3007}
3008
3009/*
3010 * Free an extent.
3011 * Just break up the extent address and hand off to xfs_free_ag_extent
3012 * after fixing up the freelist.
3013 */
3014int
3015__xfs_free_extent(
3016        struct xfs_trans                *tp,
3017        xfs_fsblock_t                   bno,
3018        xfs_extlen_t                    len,
3019        const struct xfs_owner_info     *oinfo,
3020        enum xfs_ag_resv_type           type,
3021        bool                            skip_discard)
3022{
3023        struct xfs_mount                *mp = tp->t_mountp;
3024        struct xfs_buf                  *agbp;
3025        xfs_agnumber_t                  agno = XFS_FSB_TO_AGNO(mp, bno);
3026        xfs_agblock_t                   agbno = XFS_FSB_TO_AGBNO(mp, bno);
3027        int                             error;
3028        unsigned int                    busy_flags = 0;
3029
3030        ASSERT(len != 0);
3031        ASSERT(type != XFS_AG_RESV_AGFL);
3032
3033        if (XFS_TEST_ERROR(false, mp,
3034                        XFS_ERRTAG_FREE_EXTENT))
3035                return -EIO;
3036
3037        error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
3038        if (error)
3039                return error;
3040
3041        XFS_WANT_CORRUPTED_GOTO(mp, agbno < mp->m_sb.sb_agblocks, err);
3042
3043        /* validate the extent size is legal now we have the agf locked */
3044        XFS_WANT_CORRUPTED_GOTO(mp,
3045                agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length),
3046                                err);
3047
3048        error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
3049        if (error)
3050                goto err;
3051
3052        if (skip_discard)
3053                busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
3054        xfs_extent_busy_insert(tp, agno, agbno, len, busy_flags);
3055        return 0;
3056
3057err:
3058        xfs_trans_brelse(tp, agbp);
3059        return error;
3060}
3061
3062struct xfs_alloc_query_range_info {
3063        xfs_alloc_query_range_fn        fn;
3064        void                            *priv;
3065};
3066
3067/* Format btree record and pass to our callback. */
3068STATIC int
3069xfs_alloc_query_range_helper(
3070        struct xfs_btree_cur            *cur,
3071        union xfs_btree_rec             *rec,
3072        void                            *priv)
3073{
3074        struct xfs_alloc_query_range_info       *query = priv;
3075        struct xfs_alloc_rec_incore             irec;
3076
3077        irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
3078        irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
3079        return query->fn(cur, &irec, query->priv);
3080}
3081
3082/* Find all free space within a given range of blocks. */
3083int
3084xfs_alloc_query_range(
3085        struct xfs_btree_cur                    *cur,
3086        struct xfs_alloc_rec_incore             *low_rec,
3087        struct xfs_alloc_rec_incore             *high_rec,
3088        xfs_alloc_query_range_fn                fn,
3089        void                                    *priv)
3090{
3091        union xfs_btree_irec                    low_brec;
3092        union xfs_btree_irec                    high_brec;
3093        struct xfs_alloc_query_range_info       query;
3094
3095        ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3096        low_brec.a = *low_rec;
3097        high_brec.a = *high_rec;
3098        query.priv = priv;
3099        query.fn = fn;
3100        return xfs_btree_query_range(cur, &low_brec, &high_brec,
3101                        xfs_alloc_query_range_helper, &query);
3102}
3103
3104/* Find all free space records. */
3105int
3106xfs_alloc_query_all(
3107        struct xfs_btree_cur                    *cur,
3108        xfs_alloc_query_range_fn                fn,
3109        void                                    *priv)
3110{
3111        struct xfs_alloc_query_range_info       query;
3112
3113        ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3114        query.priv = priv;
3115        query.fn = fn;
3116        return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
3117}
3118
3119/* Is there a record covering a given extent? */
3120int
3121xfs_alloc_has_record(
3122        struct xfs_btree_cur    *cur,
3123        xfs_agblock_t           bno,
3124        xfs_extlen_t            len,
3125        bool                    *exists)
3126{
3127        union xfs_btree_irec    low;
3128        union xfs_btree_irec    high;
3129
3130        memset(&low, 0, sizeof(low));
3131        low.a.ar_startblock = bno;
3132        memset(&high, 0xFF, sizeof(high));
3133        high.a.ar_startblock = bno + len - 1;
3134
3135        return xfs_btree_has_record(cur, &low, &high, exists);
3136}
3137
3138/*
3139 * Walk all the blocks in the AGFL.  The @walk_fn can return any negative
3140 * error code or XFS_ITER_*.
3141 */
3142int
3143xfs_agfl_walk(
3144        struct xfs_mount        *mp,
3145        struct xfs_agf          *agf,
3146        struct xfs_buf          *agflbp,
3147        xfs_agfl_walk_fn        walk_fn,
3148        void                    *priv)
3149{
3150        __be32                  *agfl_bno;
3151        unsigned int            i;
3152        int                     error;
3153
3154        agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
3155        i = be32_to_cpu(agf->agf_flfirst);
3156
3157        /* Nothing to walk in an empty AGFL. */
3158        if (agf->agf_flcount == cpu_to_be32(0))
3159                return 0;
3160
3161        /* Otherwise, walk from first to last, wrapping as needed. */
3162        for (;;) {
3163                error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
3164                if (error)
3165                        return error;
3166                if (i == be32_to_cpu(agf->agf_fllast))
3167                        break;
3168                if (++i == xfs_agfl_size(mp))
3169                        i = 0;
3170        }
3171
3172        return 0;
3173}
3174