linux/fs/xfs/libxfs/xfs_alloc.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_format.h"
  21#include "xfs_log_format.h"
  22#include "xfs_shared.h"
  23#include "xfs_trans_resv.h"
  24#include "xfs_bit.h"
  25#include "xfs_sb.h"
  26#include "xfs_mount.h"
  27#include "xfs_defer.h"
  28#include "xfs_inode.h"
  29#include "xfs_btree.h"
  30#include "xfs_rmap.h"
  31#include "xfs_alloc_btree.h"
  32#include "xfs_alloc.h"
  33#include "xfs_extent_busy.h"
  34#include "xfs_error.h"
  35#include "xfs_cksum.h"
  36#include "xfs_trace.h"
  37#include "xfs_trans.h"
  38#include "xfs_buf_item.h"
  39#include "xfs_log.h"
  40#include "xfs_ag_resv.h"
  41
  42struct workqueue_struct *xfs_alloc_wq;
  43
  44#define XFS_ABSDIFF(a,b)        (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
  45
  46#define XFSA_FIXUP_BNO_OK       1
  47#define XFSA_FIXUP_CNT_OK       2
  48
  49STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
  50STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
  51STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
  52STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
  53                xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
  54
  55unsigned int
  56xfs_refc_block(
  57        struct xfs_mount        *mp)
  58{
  59        if (xfs_sb_version_hasrmapbt(&mp->m_sb))
  60                return XFS_RMAP_BLOCK(mp) + 1;
  61        if (xfs_sb_version_hasfinobt(&mp->m_sb))
  62                return XFS_FIBT_BLOCK(mp) + 1;
  63        return XFS_IBT_BLOCK(mp) + 1;
  64}
  65
  66xfs_extlen_t
  67xfs_prealloc_blocks(
  68        struct xfs_mount        *mp)
  69{
  70        if (xfs_sb_version_hasreflink(&mp->m_sb))
  71                return xfs_refc_block(mp) + 1;
  72        if (xfs_sb_version_hasrmapbt(&mp->m_sb))
  73                return XFS_RMAP_BLOCK(mp) + 1;
  74        if (xfs_sb_version_hasfinobt(&mp->m_sb))
  75                return XFS_FIBT_BLOCK(mp) + 1;
  76        return XFS_IBT_BLOCK(mp) + 1;
  77}
  78
  79/*
  80 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
  81 * AGF buffer (PV 947395), we place constraints on the relationship among
  82 * actual allocations for data blocks, freelist blocks, and potential file data
  83 * bmap btree blocks. However, these restrictions may result in no actual space
  84 * allocated for a delayed extent, for example, a data block in a certain AG is
  85 * allocated but there is no additional block for the additional bmap btree
  86 * block due to a split of the bmap btree of the file. The result of this may
  87 * lead to an infinite loop when the file gets flushed to disk and all delayed
  88 * extents need to be actually allocated. To get around this, we explicitly set
  89 * aside a few blocks which will not be reserved in delayed allocation.
  90 *
  91 * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a
  92 * potential split of the file's bmap btree.
  93 */
  94unsigned int
  95xfs_alloc_set_aside(
  96        struct xfs_mount        *mp)
  97{
  98        return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4);
  99}
 100
 101/*
 102 * When deciding how much space to allocate out of an AG, we limit the
 103 * allocation maximum size to the size the AG. However, we cannot use all the
 104 * blocks in the AG - some are permanently used by metadata. These
 105 * blocks are generally:
 106 *      - the AG superblock, AGF, AGI and AGFL
 107 *      - the AGF (bno and cnt) and AGI btree root blocks, and optionally
 108 *        the AGI free inode and rmap btree root blocks.
 109 *      - blocks on the AGFL according to xfs_alloc_set_aside() limits
 110 *      - the rmapbt root block
 111 *
 112 * The AG headers are sector sized, so the amount of space they take up is
 113 * dependent on filesystem geometry. The others are all single blocks.
 114 */
 115unsigned int
 116xfs_alloc_ag_max_usable(
 117        struct xfs_mount        *mp)
 118{
 119        unsigned int            blocks;
 120
 121        blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
 122        blocks += XFS_ALLOC_AGFL_RESERVE;
 123        blocks += 3;                    /* AGF, AGI btree root blocks */
 124        if (xfs_sb_version_hasfinobt(&mp->m_sb))
 125                blocks++;               /* finobt root block */
 126        if (xfs_sb_version_hasrmapbt(&mp->m_sb))
 127                blocks++;               /* rmap root block */
 128        if (xfs_sb_version_hasreflink(&mp->m_sb))
 129                blocks++;               /* refcount root block */
 130
 131        return mp->m_sb.sb_agblocks - blocks;
 132}
 133
 134/*
 135 * Lookup the record equal to [bno, len] in the btree given by cur.
 136 */
 137STATIC int                              /* error */
 138xfs_alloc_lookup_eq(
 139        struct xfs_btree_cur    *cur,   /* btree cursor */
 140        xfs_agblock_t           bno,    /* starting block of extent */
 141        xfs_extlen_t            len,    /* length of extent */
 142        int                     *stat)  /* success/failure */
 143{
 144        cur->bc_rec.a.ar_startblock = bno;
 145        cur->bc_rec.a.ar_blockcount = len;
 146        return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
 147}
 148
 149/*
 150 * Lookup the first record greater than or equal to [bno, len]
 151 * in the btree given by cur.
 152 */
 153int                             /* error */
 154xfs_alloc_lookup_ge(
 155        struct xfs_btree_cur    *cur,   /* btree cursor */
 156        xfs_agblock_t           bno,    /* starting block of extent */
 157        xfs_extlen_t            len,    /* length of extent */
 158        int                     *stat)  /* success/failure */
 159{
 160        cur->bc_rec.a.ar_startblock = bno;
 161        cur->bc_rec.a.ar_blockcount = len;
 162        return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
 163}
 164
 165/*
 166 * Lookup the first record less than or equal to [bno, len]
 167 * in the btree given by cur.
 168 */
 169static int                              /* error */
 170xfs_alloc_lookup_le(
 171        struct xfs_btree_cur    *cur,   /* btree cursor */
 172        xfs_agblock_t           bno,    /* starting block of extent */
 173        xfs_extlen_t            len,    /* length of extent */
 174        int                     *stat)  /* success/failure */
 175{
 176        cur->bc_rec.a.ar_startblock = bno;
 177        cur->bc_rec.a.ar_blockcount = len;
 178        return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
 179}
 180
 181/*
 182 * Update the record referred to by cur to the value given
 183 * by [bno, len].
 184 * This either works (return 0) or gets an EFSCORRUPTED error.
 185 */
 186STATIC int                              /* error */
 187xfs_alloc_update(
 188        struct xfs_btree_cur    *cur,   /* btree cursor */
 189        xfs_agblock_t           bno,    /* starting block of extent */
 190        xfs_extlen_t            len)    /* length of extent */
 191{
 192        union xfs_btree_rec     rec;
 193
 194        rec.alloc.ar_startblock = cpu_to_be32(bno);
 195        rec.alloc.ar_blockcount = cpu_to_be32(len);
 196        return xfs_btree_update(cur, &rec);
 197}
 198
 199/*
 200 * Get the data from the pointed-to record.
 201 */
 202int                                     /* error */
 203xfs_alloc_get_rec(
 204        struct xfs_btree_cur    *cur,   /* btree cursor */
 205        xfs_agblock_t           *bno,   /* output: starting block of extent */
 206        xfs_extlen_t            *len,   /* output: length of extent */
 207        int                     *stat)  /* output: success/failure */
 208{
 209        union xfs_btree_rec     *rec;
 210        int                     error;
 211
 212        error = xfs_btree_get_rec(cur, &rec, stat);
 213        if (!error && *stat == 1) {
 214                *bno = be32_to_cpu(rec->alloc.ar_startblock);
 215                *len = be32_to_cpu(rec->alloc.ar_blockcount);
 216        }
 217        return error;
 218}
 219
 220/*
 221 * Compute aligned version of the found extent.
 222 * Takes alignment and min length into account.
 223 */
 224STATIC bool
 225xfs_alloc_compute_aligned(
 226        xfs_alloc_arg_t *args,          /* allocation argument structure */
 227        xfs_agblock_t   foundbno,       /* starting block in found extent */
 228        xfs_extlen_t    foundlen,       /* length in found extent */
 229        xfs_agblock_t   *resbno,        /* result block number */
 230        xfs_extlen_t    *reslen,        /* result length */
 231        unsigned        *busy_gen)
 232{
 233        xfs_agblock_t   bno = foundbno;
 234        xfs_extlen_t    len = foundlen;
 235        xfs_extlen_t    diff;
 236        bool            busy;
 237
 238        /* Trim busy sections out of found extent */
 239        busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
 240
 241        /*
 242         * If we have a largish extent that happens to start before min_agbno,
 243         * see if we can shift it into range...
 244         */
 245        if (bno < args->min_agbno && bno + len > args->min_agbno) {
 246                diff = args->min_agbno - bno;
 247                if (len > diff) {
 248                        bno += diff;
 249                        len -= diff;
 250                }
 251        }
 252
 253        if (args->alignment > 1 && len >= args->minlen) {
 254                xfs_agblock_t   aligned_bno = roundup(bno, args->alignment);
 255
 256                diff = aligned_bno - bno;
 257
 258                *resbno = aligned_bno;
 259                *reslen = diff >= len ? 0 : len - diff;
 260        } else {
 261                *resbno = bno;
 262                *reslen = len;
 263        }
 264
 265        return busy;
 266}
 267
 268/*
 269 * Compute best start block and diff for "near" allocations.
 270 * freelen >= wantlen already checked by caller.
 271 */
 272STATIC xfs_extlen_t                     /* difference value (absolute) */
 273xfs_alloc_compute_diff(
 274        xfs_agblock_t   wantbno,        /* target starting block */
 275        xfs_extlen_t    wantlen,        /* target length */
 276        xfs_extlen_t    alignment,      /* target alignment */
 277        int             datatype,       /* are we allocating data? */
 278        xfs_agblock_t   freebno,        /* freespace's starting block */
 279        xfs_extlen_t    freelen,        /* freespace's length */
 280        xfs_agblock_t   *newbnop)       /* result: best start block from free */
 281{
 282        xfs_agblock_t   freeend;        /* end of freespace extent */
 283        xfs_agblock_t   newbno1;        /* return block number */
 284        xfs_agblock_t   newbno2;        /* other new block number */
 285        xfs_extlen_t    newlen1=0;      /* length with newbno1 */
 286        xfs_extlen_t    newlen2=0;      /* length with newbno2 */
 287        xfs_agblock_t   wantend;        /* end of target extent */
 288        bool            userdata = xfs_alloc_is_userdata(datatype);
 289
 290        ASSERT(freelen >= wantlen);
 291        freeend = freebno + freelen;
 292        wantend = wantbno + wantlen;
 293        /*
 294         * We want to allocate from the start of a free extent if it is past
 295         * the desired block or if we are allocating user data and the free
 296         * extent is before desired block. The second case is there to allow
 297         * for contiguous allocation from the remaining free space if the file
 298         * grows in the short term.
 299         */
 300        if (freebno >= wantbno || (userdata && freeend < wantend)) {
 301                if ((newbno1 = roundup(freebno, alignment)) >= freeend)
 302                        newbno1 = NULLAGBLOCK;
 303        } else if (freeend >= wantend && alignment > 1) {
 304                newbno1 = roundup(wantbno, alignment);
 305                newbno2 = newbno1 - alignment;
 306                if (newbno1 >= freeend)
 307                        newbno1 = NULLAGBLOCK;
 308                else
 309                        newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
 310                if (newbno2 < freebno)
 311                        newbno2 = NULLAGBLOCK;
 312                else
 313                        newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
 314                if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
 315                        if (newlen1 < newlen2 ||
 316                            (newlen1 == newlen2 &&
 317                             XFS_ABSDIFF(newbno1, wantbno) >
 318                             XFS_ABSDIFF(newbno2, wantbno)))
 319                                newbno1 = newbno2;
 320                } else if (newbno2 != NULLAGBLOCK)
 321                        newbno1 = newbno2;
 322        } else if (freeend >= wantend) {
 323                newbno1 = wantbno;
 324        } else if (alignment > 1) {
 325                newbno1 = roundup(freeend - wantlen, alignment);
 326                if (newbno1 > freeend - wantlen &&
 327                    newbno1 - alignment >= freebno)
 328                        newbno1 -= alignment;
 329                else if (newbno1 >= freeend)
 330                        newbno1 = NULLAGBLOCK;
 331        } else
 332                newbno1 = freeend - wantlen;
 333        *newbnop = newbno1;
 334        return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
 335}
 336
 337/*
 338 * Fix up the length, based on mod and prod.
 339 * len should be k * prod + mod for some k.
 340 * If len is too small it is returned unchanged.
 341 * If len hits maxlen it is left alone.
 342 */
 343STATIC void
 344xfs_alloc_fix_len(
 345        xfs_alloc_arg_t *args)          /* allocation argument structure */
 346{
 347        xfs_extlen_t    k;
 348        xfs_extlen_t    rlen;
 349
 350        ASSERT(args->mod < args->prod);
 351        rlen = args->len;
 352        ASSERT(rlen >= args->minlen);
 353        ASSERT(rlen <= args->maxlen);
 354        if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
 355            (args->mod == 0 && rlen < args->prod))
 356                return;
 357        k = rlen % args->prod;
 358        if (k == args->mod)
 359                return;
 360        if (k > args->mod)
 361                rlen = rlen - (k - args->mod);
 362        else
 363                rlen = rlen - args->prod + (args->mod - k);
 364        /* casts to (int) catch length underflows */
 365        if ((int)rlen < (int)args->minlen)
 366                return;
 367        ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
 368        ASSERT(rlen % args->prod == args->mod);
 369        ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
 370                rlen + args->minleft);
 371        args->len = rlen;
 372}
 373
 374/*
 375 * Update the two btrees, logically removing from freespace the extent
 376 * starting at rbno, rlen blocks.  The extent is contained within the
 377 * actual (current) free extent fbno for flen blocks.
 378 * Flags are passed in indicating whether the cursors are set to the
 379 * relevant records.
 380 */
 381STATIC int                              /* error code */
 382xfs_alloc_fixup_trees(
 383        xfs_btree_cur_t *cnt_cur,       /* cursor for by-size btree */
 384        xfs_btree_cur_t *bno_cur,       /* cursor for by-block btree */
 385        xfs_agblock_t   fbno,           /* starting block of free extent */
 386        xfs_extlen_t    flen,           /* length of free extent */
 387        xfs_agblock_t   rbno,           /* starting block of returned extent */
 388        xfs_extlen_t    rlen,           /* length of returned extent */
 389        int             flags)          /* flags, XFSA_FIXUP_... */
 390{
 391        int             error;          /* error code */
 392        int             i;              /* operation results */
 393        xfs_agblock_t   nfbno1;         /* first new free startblock */
 394        xfs_agblock_t   nfbno2;         /* second new free startblock */
 395        xfs_extlen_t    nflen1=0;       /* first new free length */
 396        xfs_extlen_t    nflen2=0;       /* second new free length */
 397        struct xfs_mount *mp;
 398
 399        mp = cnt_cur->bc_mp;
 400
 401        /*
 402         * Look up the record in the by-size tree if necessary.
 403         */
 404        if (flags & XFSA_FIXUP_CNT_OK) {
 405#ifdef DEBUG
 406                if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
 407                        return error;
 408                XFS_WANT_CORRUPTED_RETURN(mp,
 409                        i == 1 && nfbno1 == fbno && nflen1 == flen);
 410#endif
 411        } else {
 412                if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
 413                        return error;
 414                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 415        }
 416        /*
 417         * Look up the record in the by-block tree if necessary.
 418         */
 419        if (flags & XFSA_FIXUP_BNO_OK) {
 420#ifdef DEBUG
 421                if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
 422                        return error;
 423                XFS_WANT_CORRUPTED_RETURN(mp,
 424                        i == 1 && nfbno1 == fbno && nflen1 == flen);
 425#endif
 426        } else {
 427                if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
 428                        return error;
 429                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 430        }
 431
 432#ifdef DEBUG
 433        if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
 434                struct xfs_btree_block  *bnoblock;
 435                struct xfs_btree_block  *cntblock;
 436
 437                bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
 438                cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
 439
 440                XFS_WANT_CORRUPTED_RETURN(mp,
 441                        bnoblock->bb_numrecs == cntblock->bb_numrecs);
 442        }
 443#endif
 444
 445        /*
 446         * Deal with all four cases: the allocated record is contained
 447         * within the freespace record, so we can have new freespace
 448         * at either (or both) end, or no freespace remaining.
 449         */
 450        if (rbno == fbno && rlen == flen)
 451                nfbno1 = nfbno2 = NULLAGBLOCK;
 452        else if (rbno == fbno) {
 453                nfbno1 = rbno + rlen;
 454                nflen1 = flen - rlen;
 455                nfbno2 = NULLAGBLOCK;
 456        } else if (rbno + rlen == fbno + flen) {
 457                nfbno1 = fbno;
 458                nflen1 = flen - rlen;
 459                nfbno2 = NULLAGBLOCK;
 460        } else {
 461                nfbno1 = fbno;
 462                nflen1 = rbno - fbno;
 463                nfbno2 = rbno + rlen;
 464                nflen2 = (fbno + flen) - nfbno2;
 465        }
 466        /*
 467         * Delete the entry from the by-size btree.
 468         */
 469        if ((error = xfs_btree_delete(cnt_cur, &i)))
 470                return error;
 471        XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 472        /*
 473         * Add new by-size btree entry(s).
 474         */
 475        if (nfbno1 != NULLAGBLOCK) {
 476                if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
 477                        return error;
 478                XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 479                if ((error = xfs_btree_insert(cnt_cur, &i)))
 480                        return error;
 481                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 482        }
 483        if (nfbno2 != NULLAGBLOCK) {
 484                if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
 485                        return error;
 486                XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 487                if ((error = xfs_btree_insert(cnt_cur, &i)))
 488                        return error;
 489                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 490        }
 491        /*
 492         * Fix up the by-block btree entry(s).
 493         */
 494        if (nfbno1 == NULLAGBLOCK) {
 495                /*
 496                 * No remaining freespace, just delete the by-block tree entry.
 497                 */
 498                if ((error = xfs_btree_delete(bno_cur, &i)))
 499                        return error;
 500                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 501        } else {
 502                /*
 503                 * Update the by-block entry to start later|be shorter.
 504                 */
 505                if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
 506                        return error;
 507        }
 508        if (nfbno2 != NULLAGBLOCK) {
 509                /*
 510                 * 2 resulting free entries, need to add one.
 511                 */
 512                if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
 513                        return error;
 514                XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 515                if ((error = xfs_btree_insert(bno_cur, &i)))
 516                        return error;
 517                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 518        }
 519        return 0;
 520}
 521
 522static bool
 523xfs_agfl_verify(
 524        struct xfs_buf  *bp)
 525{
 526        struct xfs_mount *mp = bp->b_target->bt_mount;
 527        struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
 528        int             i;
 529
 530        if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
 531                return false;
 532        if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
 533                return false;
 534        /*
 535         * during growfs operations, the perag is not fully initialised,
 536         * so we can't use it for any useful checking. growfs ensures we can't
 537         * use it by using uncached buffers that don't have the perag attached
 538         * so we can detect and avoid this problem.
 539         */
 540        if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
 541                return false;
 542
 543        for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
 544                if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
 545                    be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
 546                        return false;
 547        }
 548
 549        return xfs_log_check_lsn(mp,
 550                                 be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn));
 551}
 552
 553static void
 554xfs_agfl_read_verify(
 555        struct xfs_buf  *bp)
 556{
 557        struct xfs_mount *mp = bp->b_target->bt_mount;
 558
 559        /*
 560         * There is no verification of non-crc AGFLs because mkfs does not
 561         * initialise the AGFL to zero or NULL. Hence the only valid part of the
 562         * AGFL is what the AGF says is active. We can't get to the AGF, so we
 563         * can't verify just those entries are valid.
 564         */
 565        if (!xfs_sb_version_hascrc(&mp->m_sb))
 566                return;
 567
 568        if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
 569                xfs_buf_ioerror(bp, -EFSBADCRC);
 570        else if (!xfs_agfl_verify(bp))
 571                xfs_buf_ioerror(bp, -EFSCORRUPTED);
 572
 573        if (bp->b_error)
 574                xfs_verifier_error(bp);
 575}
 576
 577static void
 578xfs_agfl_write_verify(
 579        struct xfs_buf  *bp)
 580{
 581        struct xfs_mount *mp = bp->b_target->bt_mount;
 582        struct xfs_buf_log_item *bip = bp->b_fspriv;
 583
 584        /* no verification of non-crc AGFLs */
 585        if (!xfs_sb_version_hascrc(&mp->m_sb))
 586                return;
 587
 588        if (!xfs_agfl_verify(bp)) {
 589                xfs_buf_ioerror(bp, -EFSCORRUPTED);
 590                xfs_verifier_error(bp);
 591                return;
 592        }
 593
 594        if (bip)
 595                XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
 596
 597        xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
 598}
 599
 600const struct xfs_buf_ops xfs_agfl_buf_ops = {
 601        .name = "xfs_agfl",
 602        .verify_read = xfs_agfl_read_verify,
 603        .verify_write = xfs_agfl_write_verify,
 604};
 605
 606/*
 607 * Read in the allocation group free block array.
 608 */
 609int                                     /* error */
 610xfs_alloc_read_agfl(
 611        xfs_mount_t     *mp,            /* mount point structure */
 612        xfs_trans_t     *tp,            /* transaction pointer */
 613        xfs_agnumber_t  agno,           /* allocation group number */
 614        xfs_buf_t       **bpp)          /* buffer for the ag free block array */
 615{
 616        xfs_buf_t       *bp;            /* return value */
 617        int             error;
 618
 619        ASSERT(agno != NULLAGNUMBER);
 620        error = xfs_trans_read_buf(
 621                        mp, tp, mp->m_ddev_targp,
 622                        XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
 623                        XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
 624        if (error)
 625                return error;
 626        xfs_buf_set_ref(bp, XFS_AGFL_REF);
 627        *bpp = bp;
 628        return 0;
 629}
 630
 631STATIC int
 632xfs_alloc_update_counters(
 633        struct xfs_trans        *tp,
 634        struct xfs_perag        *pag,
 635        struct xfs_buf          *agbp,
 636        long                    len)
 637{
 638        struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
 639
 640        pag->pagf_freeblks += len;
 641        be32_add_cpu(&agf->agf_freeblks, len);
 642
 643        xfs_trans_agblocks_delta(tp, len);
 644        if (unlikely(be32_to_cpu(agf->agf_freeblks) >
 645                     be32_to_cpu(agf->agf_length)))
 646                return -EFSCORRUPTED;
 647
 648        xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
 649        return 0;
 650}
 651
 652/*
 653 * Allocation group level functions.
 654 */
 655
 656/*
 657 * Allocate a variable extent in the allocation group agno.
 658 * Type and bno are used to determine where in the allocation group the
 659 * extent will start.
 660 * Extent's length (returned in *len) will be between minlen and maxlen,
 661 * and of the form k * prod + mod unless there's nothing that large.
 662 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 663 */
 664STATIC int                      /* error */
 665xfs_alloc_ag_vextent(
 666        xfs_alloc_arg_t *args)  /* argument structure for allocation */
 667{
 668        int             error=0;
 669
 670        ASSERT(args->minlen > 0);
 671        ASSERT(args->maxlen > 0);
 672        ASSERT(args->minlen <= args->maxlen);
 673        ASSERT(args->mod < args->prod);
 674        ASSERT(args->alignment > 0);
 675
 676        /*
 677         * Branch to correct routine based on the type.
 678         */
 679        args->wasfromfl = 0;
 680        switch (args->type) {
 681        case XFS_ALLOCTYPE_THIS_AG:
 682                error = xfs_alloc_ag_vextent_size(args);
 683                break;
 684        case XFS_ALLOCTYPE_NEAR_BNO:
 685                error = xfs_alloc_ag_vextent_near(args);
 686                break;
 687        case XFS_ALLOCTYPE_THIS_BNO:
 688                error = xfs_alloc_ag_vextent_exact(args);
 689                break;
 690        default:
 691                ASSERT(0);
 692                /* NOTREACHED */
 693        }
 694
 695        if (error || args->agbno == NULLAGBLOCK)
 696                return error;
 697
 698        ASSERT(args->len >= args->minlen);
 699        ASSERT(args->len <= args->maxlen);
 700        ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
 701        ASSERT(args->agbno % args->alignment == 0);
 702
 703        /* if not file data, insert new block into the reverse map btree */
 704        if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) {
 705                error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
 706                                       args->agbno, args->len, &args->oinfo);
 707                if (error)
 708                        return error;
 709        }
 710
 711        if (!args->wasfromfl) {
 712                error = xfs_alloc_update_counters(args->tp, args->pag,
 713                                                  args->agbp,
 714                                                  -((long)(args->len)));
 715                if (error)
 716                        return error;
 717
 718                ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
 719                                              args->agbno, args->len));
 720        }
 721
 722        xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
 723
 724        XFS_STATS_INC(args->mp, xs_allocx);
 725        XFS_STATS_ADD(args->mp, xs_allocb, args->len);
 726        return error;
 727}
 728
 729/*
 730 * Allocate a variable extent at exactly agno/bno.
 731 * Extent's length (returned in *len) will be between minlen and maxlen,
 732 * and of the form k * prod + mod unless there's nothing that large.
 733 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
 734 */
 735STATIC int                      /* error */
 736xfs_alloc_ag_vextent_exact(
 737        xfs_alloc_arg_t *args)  /* allocation argument structure */
 738{
 739        xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
 740        xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
 741        int             error;
 742        xfs_agblock_t   fbno;   /* start block of found extent */
 743        xfs_extlen_t    flen;   /* length of found extent */
 744        xfs_agblock_t   tbno;   /* start block of busy extent */
 745        xfs_extlen_t    tlen;   /* length of busy extent */
 746        xfs_agblock_t   tend;   /* end block of busy extent */
 747        int             i;      /* success/failure of operation */
 748        unsigned        busy_gen;
 749
 750        ASSERT(args->alignment == 1);
 751
 752        /*
 753         * Allocate/initialize a cursor for the by-number freespace btree.
 754         */
 755        bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 756                                          args->agno, XFS_BTNUM_BNO);
 757
 758        /*
 759         * Lookup bno and minlen in the btree (minlen is irrelevant, really).
 760         * Look for the closest free block <= bno, it must contain bno
 761         * if any free block does.
 762         */
 763        error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
 764        if (error)
 765                goto error0;
 766        if (!i)
 767                goto not_found;
 768
 769        /*
 770         * Grab the freespace record.
 771         */
 772        error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
 773        if (error)
 774                goto error0;
 775        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 776        ASSERT(fbno <= args->agbno);
 777
 778        /*
 779         * Check for overlapping busy extents.
 780         */
 781        tbno = fbno;
 782        tlen = flen;
 783        xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
 784
 785        /*
 786         * Give up if the start of the extent is busy, or the freespace isn't
 787         * long enough for the minimum request.
 788         */
 789        if (tbno > args->agbno)
 790                goto not_found;
 791        if (tlen < args->minlen)
 792                goto not_found;
 793        tend = tbno + tlen;
 794        if (tend < args->agbno + args->minlen)
 795                goto not_found;
 796
 797        /*
 798         * End of extent will be smaller of the freespace end and the
 799         * maximal requested end.
 800         *
 801         * Fix the length according to mod and prod if given.
 802         */
 803        args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
 804                                                - args->agbno;
 805        xfs_alloc_fix_len(args);
 806        ASSERT(args->agbno + args->len <= tend);
 807
 808        /*
 809         * We are allocating agbno for args->len
 810         * Allocate/initialize a cursor for the by-size btree.
 811         */
 812        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 813                args->agno, XFS_BTNUM_CNT);
 814        ASSERT(args->agbno + args->len <=
 815                be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
 816        error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
 817                                      args->len, XFSA_FIXUP_BNO_OK);
 818        if (error) {
 819                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
 820                goto error0;
 821        }
 822
 823        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 824        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 825
 826        args->wasfromfl = 0;
 827        trace_xfs_alloc_exact_done(args);
 828        return 0;
 829
 830not_found:
 831        /* Didn't find it, return null. */
 832        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 833        args->agbno = NULLAGBLOCK;
 834        trace_xfs_alloc_exact_notfound(args);
 835        return 0;
 836
 837error0:
 838        xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
 839        trace_xfs_alloc_exact_error(args);
 840        return error;
 841}
 842
 843/*
 844 * Search the btree in a given direction via the search cursor and compare
 845 * the records found against the good extent we've already found.
 846 */
 847STATIC int
 848xfs_alloc_find_best_extent(
 849        struct xfs_alloc_arg    *args,  /* allocation argument structure */
 850        struct xfs_btree_cur    **gcur, /* good cursor */
 851        struct xfs_btree_cur    **scur, /* searching cursor */
 852        xfs_agblock_t           gdiff,  /* difference for search comparison */
 853        xfs_agblock_t           *sbno,  /* extent found by search */
 854        xfs_extlen_t            *slen,  /* extent length */
 855        xfs_agblock_t           *sbnoa, /* aligned extent found by search */
 856        xfs_extlen_t            *slena, /* aligned extent length */
 857        int                     dir)    /* 0 = search right, 1 = search left */
 858{
 859        xfs_agblock_t           new;
 860        xfs_agblock_t           sdiff;
 861        int                     error;
 862        int                     i;
 863        unsigned                busy_gen;
 864
 865        /* The good extent is perfect, no need to  search. */
 866        if (!gdiff)
 867                goto out_use_good;
 868
 869        /*
 870         * Look until we find a better one, run out of space or run off the end.
 871         */
 872        do {
 873                error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
 874                if (error)
 875                        goto error0;
 876                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 877                xfs_alloc_compute_aligned(args, *sbno, *slen,
 878                                sbnoa, slena, &busy_gen);
 879
 880                /*
 881                 * The good extent is closer than this one.
 882                 */
 883                if (!dir) {
 884                        if (*sbnoa > args->max_agbno)
 885                                goto out_use_good;
 886                        if (*sbnoa >= args->agbno + gdiff)
 887                                goto out_use_good;
 888                } else {
 889                        if (*sbnoa < args->min_agbno)
 890                                goto out_use_good;
 891                        if (*sbnoa <= args->agbno - gdiff)
 892                                goto out_use_good;
 893                }
 894
 895                /*
 896                 * Same distance, compare length and pick the best.
 897                 */
 898                if (*slena >= args->minlen) {
 899                        args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
 900                        xfs_alloc_fix_len(args);
 901
 902                        sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
 903                                                       args->alignment,
 904                                                       args->datatype, *sbnoa,
 905                                                       *slena, &new);
 906
 907                        /*
 908                         * Choose closer size and invalidate other cursor.
 909                         */
 910                        if (sdiff < gdiff)
 911                                goto out_use_search;
 912                        goto out_use_good;
 913                }
 914
 915                if (!dir)
 916                        error = xfs_btree_increment(*scur, 0, &i);
 917                else
 918                        error = xfs_btree_decrement(*scur, 0, &i);
 919                if (error)
 920                        goto error0;
 921        } while (i);
 922
 923out_use_good:
 924        xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
 925        *scur = NULL;
 926        return 0;
 927
 928out_use_search:
 929        xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
 930        *gcur = NULL;
 931        return 0;
 932
 933error0:
 934        /* caller invalidates cursors */
 935        return error;
 936}
 937
 938/*
 939 * Allocate a variable extent near bno in the allocation group agno.
 940 * Extent's length (returned in len) will be between minlen and maxlen,
 941 * and of the form k * prod + mod unless there's nothing that large.
 942 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 943 */
 944STATIC int                              /* error */
 945xfs_alloc_ag_vextent_near(
 946        xfs_alloc_arg_t *args)          /* allocation argument structure */
 947{
 948        xfs_btree_cur_t *bno_cur_gt;    /* cursor for bno btree, right side */
 949        xfs_btree_cur_t *bno_cur_lt;    /* cursor for bno btree, left side */
 950        xfs_btree_cur_t *cnt_cur;       /* cursor for count btree */
 951        xfs_agblock_t   gtbno;          /* start bno of right side entry */
 952        xfs_agblock_t   gtbnoa;         /* aligned ... */
 953        xfs_extlen_t    gtdiff;         /* difference to right side entry */
 954        xfs_extlen_t    gtlen;          /* length of right side entry */
 955        xfs_extlen_t    gtlena;         /* aligned ... */
 956        xfs_agblock_t   gtnew;          /* useful start bno of right side */
 957        int             error;          /* error code */
 958        int             i;              /* result code, temporary */
 959        int             j;              /* result code, temporary */
 960        xfs_agblock_t   ltbno;          /* start bno of left side entry */
 961        xfs_agblock_t   ltbnoa;         /* aligned ... */
 962        xfs_extlen_t    ltdiff;         /* difference to left side entry */
 963        xfs_extlen_t    ltlen;          /* length of left side entry */
 964        xfs_extlen_t    ltlena;         /* aligned ... */
 965        xfs_agblock_t   ltnew;          /* useful start bno of left side */
 966        xfs_extlen_t    rlen;           /* length of returned extent */
 967        bool            busy;
 968        unsigned        busy_gen;
 969#ifdef DEBUG
 970        /*
 971         * Randomly don't execute the first algorithm.
 972         */
 973        int             dofirst;        /* set to do first algorithm */
 974
 975        dofirst = prandom_u32() & 1;
 976#endif
 977
 978        /* handle unitialized agbno range so caller doesn't have to */
 979        if (!args->min_agbno && !args->max_agbno)
 980                args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
 981        ASSERT(args->min_agbno <= args->max_agbno);
 982
 983        /* clamp agbno to the range if it's outside */
 984        if (args->agbno < args->min_agbno)
 985                args->agbno = args->min_agbno;
 986        if (args->agbno > args->max_agbno)
 987                args->agbno = args->max_agbno;
 988
 989restart:
 990        bno_cur_lt = NULL;
 991        bno_cur_gt = NULL;
 992        ltlen = 0;
 993        gtlena = 0;
 994        ltlena = 0;
 995        busy = false;
 996
 997        /*
 998         * Get a cursor for the by-size btree.
 999         */
1000        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1001                args->agno, XFS_BTNUM_CNT);
1002
1003        /*
1004         * See if there are any free extents as big as maxlen.
1005         */
1006        if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
1007                goto error0;
1008        /*
1009         * If none, then pick up the last entry in the tree unless the
1010         * tree is empty.
1011         */
1012        if (!i) {
1013                if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
1014                                &ltlen, &i)))
1015                        goto error0;
1016                if (i == 0 || ltlen == 0) {
1017                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1018                        trace_xfs_alloc_near_noentry(args);
1019                        return 0;
1020                }
1021                ASSERT(i == 1);
1022        }
1023        args->wasfromfl = 0;
1024
1025        /*
1026         * First algorithm.
1027         * If the requested extent is large wrt the freespaces available
1028         * in this a.g., then the cursor will be pointing to a btree entry
1029         * near the right edge of the tree.  If it's in the last btree leaf
1030         * block, then we just examine all the entries in that block
1031         * that are big enough, and pick the best one.
1032         * This is written as a while loop so we can break out of it,
1033         * but we never loop back to the top.
1034         */
1035        while (xfs_btree_islastblock(cnt_cur, 0)) {
1036                xfs_extlen_t    bdiff;
1037                int             besti=0;
1038                xfs_extlen_t    blen=0;
1039                xfs_agblock_t   bnew=0;
1040
1041#ifdef DEBUG
1042                if (dofirst)
1043                        break;
1044#endif
1045                /*
1046                 * Start from the entry that lookup found, sequence through
1047                 * all larger free blocks.  If we're actually pointing at a
1048                 * record smaller than maxlen, go to the start of this block,
1049                 * and skip all those smaller than minlen.
1050                 */
1051                if (ltlen || args->alignment > 1) {
1052                        cnt_cur->bc_ptrs[0] = 1;
1053                        do {
1054                                if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
1055                                                &ltlen, &i)))
1056                                        goto error0;
1057                                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1058                                if (ltlen >= args->minlen)
1059                                        break;
1060                                if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
1061                                        goto error0;
1062                        } while (i);
1063                        ASSERT(ltlen >= args->minlen);
1064                        if (!i)
1065                                break;
1066                }
1067                i = cnt_cur->bc_ptrs[0];
1068                for (j = 1, blen = 0, bdiff = 0;
1069                     !error && j && (blen < args->maxlen || bdiff > 0);
1070                     error = xfs_btree_increment(cnt_cur, 0, &j)) {
1071                        /*
1072                         * For each entry, decide if it's better than
1073                         * the previous best entry.
1074                         */
1075                        if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1076                                goto error0;
1077                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1078                        busy = xfs_alloc_compute_aligned(args, ltbno, ltlen,
1079                                        &ltbnoa, &ltlena, &busy_gen);
1080                        if (ltlena < args->minlen)
1081                                continue;
1082                        if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
1083                                continue;
1084                        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1085                        xfs_alloc_fix_len(args);
1086                        ASSERT(args->len >= args->minlen);
1087                        if (args->len < blen)
1088                                continue;
1089                        ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1090                                args->alignment, args->datatype, ltbnoa,
1091                                ltlena, &ltnew);
1092                        if (ltnew != NULLAGBLOCK &&
1093                            (args->len > blen || ltdiff < bdiff)) {
1094                                bdiff = ltdiff;
1095                                bnew = ltnew;
1096                                blen = args->len;
1097                                besti = cnt_cur->bc_ptrs[0];
1098                        }
1099                }
1100                /*
1101                 * It didn't work.  We COULD be in a case where
1102                 * there's a good record somewhere, so try again.
1103                 */
1104                if (blen == 0)
1105                        break;
1106                /*
1107                 * Point at the best entry, and retrieve it again.
1108                 */
1109                cnt_cur->bc_ptrs[0] = besti;
1110                if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1111                        goto error0;
1112                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1113                ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1114                args->len = blen;
1115
1116                /*
1117                 * We are allocating starting at bnew for blen blocks.
1118                 */
1119                args->agbno = bnew;
1120                ASSERT(bnew >= ltbno);
1121                ASSERT(bnew + blen <= ltbno + ltlen);
1122                /*
1123                 * Set up a cursor for the by-bno tree.
1124                 */
1125                bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
1126                        args->agbp, args->agno, XFS_BTNUM_BNO);
1127                /*
1128                 * Fix up the btree entries.
1129                 */
1130                if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
1131                                ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
1132                        goto error0;
1133                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1134                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1135
1136                trace_xfs_alloc_near_first(args);
1137                return 0;
1138        }
1139        /*
1140         * Second algorithm.
1141         * Search in the by-bno tree to the left and to the right
1142         * simultaneously, until in each case we find a space big enough,
1143         * or run into the edge of the tree.  When we run into the edge,
1144         * we deallocate that cursor.
1145         * If both searches succeed, we compare the two spaces and pick
1146         * the better one.
1147         * With alignment, it's possible for both to fail; the upper
1148         * level algorithm that picks allocation groups for allocations
1149         * is not supposed to do this.
1150         */
1151        /*
1152         * Allocate and initialize the cursor for the leftward search.
1153         */
1154        bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1155                args->agno, XFS_BTNUM_BNO);
1156        /*
1157         * Lookup <= bno to find the leftward search's starting point.
1158         */
1159        if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
1160                goto error0;
1161        if (!i) {
1162                /*
1163                 * Didn't find anything; use this cursor for the rightward
1164                 * search.
1165                 */
1166                bno_cur_gt = bno_cur_lt;
1167                bno_cur_lt = NULL;
1168        }
1169        /*
1170         * Found something.  Duplicate the cursor for the rightward search.
1171         */
1172        else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
1173                goto error0;
1174        /*
1175         * Increment the cursor, so we will point at the entry just right
1176         * of the leftward entry if any, or to the leftmost entry.
1177         */
1178        if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1179                goto error0;
1180        if (!i) {
1181                /*
1182                 * It failed, there are no rightward entries.
1183                 */
1184                xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
1185                bno_cur_gt = NULL;
1186        }
1187        /*
1188         * Loop going left with the leftward cursor, right with the
1189         * rightward cursor, until either both directions give up or
1190         * we find an entry at least as big as minlen.
1191         */
1192        do {
1193                if (bno_cur_lt) {
1194                        if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
1195                                goto error0;
1196                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1197                        busy |= xfs_alloc_compute_aligned(args, ltbno, ltlen,
1198                                        &ltbnoa, &ltlena, &busy_gen);
1199                        if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
1200                                break;
1201                        if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
1202                                goto error0;
1203                        if (!i || ltbnoa < args->min_agbno) {
1204                                xfs_btree_del_cursor(bno_cur_lt,
1205                                                     XFS_BTREE_NOERROR);
1206                                bno_cur_lt = NULL;
1207                        }
1208                }
1209                if (bno_cur_gt) {
1210                        if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1211                                goto error0;
1212                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1213                        busy |= xfs_alloc_compute_aligned(args, gtbno, gtlen,
1214                                        &gtbnoa, &gtlena, &busy_gen);
1215                        if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
1216                                break;
1217                        if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1218                                goto error0;
1219                        if (!i || gtbnoa > args->max_agbno) {
1220                                xfs_btree_del_cursor(bno_cur_gt,
1221                                                     XFS_BTREE_NOERROR);
1222                                bno_cur_gt = NULL;
1223                        }
1224                }
1225        } while (bno_cur_lt || bno_cur_gt);
1226
1227        /*
1228         * Got both cursors still active, need to find better entry.
1229         */
1230        if (bno_cur_lt && bno_cur_gt) {
1231                if (ltlena >= args->minlen) {
1232                        /*
1233                         * Left side is good, look for a right side entry.
1234                         */
1235                        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1236                        xfs_alloc_fix_len(args);
1237                        ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1238                                args->alignment, args->datatype, ltbnoa,
1239                                ltlena, &ltnew);
1240
1241                        error = xfs_alloc_find_best_extent(args,
1242                                                &bno_cur_lt, &bno_cur_gt,
1243                                                ltdiff, &gtbno, &gtlen,
1244                                                &gtbnoa, &gtlena,
1245                                                0 /* search right */);
1246                } else {
1247                        ASSERT(gtlena >= args->minlen);
1248
1249                        /*
1250                         * Right side is good, look for a left side entry.
1251                         */
1252                        args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1253                        xfs_alloc_fix_len(args);
1254                        gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1255                                args->alignment, args->datatype, gtbnoa,
1256                                gtlena, &gtnew);
1257
1258                        error = xfs_alloc_find_best_extent(args,
1259                                                &bno_cur_gt, &bno_cur_lt,
1260                                                gtdiff, &ltbno, &ltlen,
1261                                                &ltbnoa, &ltlena,
1262                                                1 /* search left */);
1263                }
1264
1265                if (error)
1266                        goto error0;
1267        }
1268
1269        /*
1270         * If we couldn't get anything, give up.
1271         */
1272        if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1273                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1274
1275                if (busy) {
1276                        trace_xfs_alloc_near_busy(args);
1277                        xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
1278                        goto restart;
1279                }
1280                trace_xfs_alloc_size_neither(args);
1281                args->agbno = NULLAGBLOCK;
1282                return 0;
1283        }
1284
1285        /*
1286         * At this point we have selected a freespace entry, either to the
1287         * left or to the right.  If it's on the right, copy all the
1288         * useful variables to the "left" set so we only have one
1289         * copy of this code.
1290         */
1291        if (bno_cur_gt) {
1292                bno_cur_lt = bno_cur_gt;
1293                bno_cur_gt = NULL;
1294                ltbno = gtbno;
1295                ltbnoa = gtbnoa;
1296                ltlen = gtlen;
1297                ltlena = gtlena;
1298                j = 1;
1299        } else
1300                j = 0;
1301
1302        /*
1303         * Fix up the length and compute the useful address.
1304         */
1305        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1306        xfs_alloc_fix_len(args);
1307        rlen = args->len;
1308        (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
1309                                     args->datatype, ltbnoa, ltlena, &ltnew);
1310        ASSERT(ltnew >= ltbno);
1311        ASSERT(ltnew + rlen <= ltbnoa + ltlena);
1312        ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1313        ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
1314        args->agbno = ltnew;
1315
1316        if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1317                        ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1318                goto error0;
1319
1320        if (j)
1321                trace_xfs_alloc_near_greater(args);
1322        else
1323                trace_xfs_alloc_near_lesser(args);
1324
1325        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1326        xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1327        return 0;
1328
1329 error0:
1330        trace_xfs_alloc_near_error(args);
1331        if (cnt_cur != NULL)
1332                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1333        if (bno_cur_lt != NULL)
1334                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1335        if (bno_cur_gt != NULL)
1336                xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1337        return error;
1338}
1339
1340/*
1341 * Allocate a variable extent anywhere in the allocation group agno.
1342 * Extent's length (returned in len) will be between minlen and maxlen,
1343 * and of the form k * prod + mod unless there's nothing that large.
1344 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1345 */
1346STATIC int                              /* error */
1347xfs_alloc_ag_vextent_size(
1348        xfs_alloc_arg_t *args)          /* allocation argument structure */
1349{
1350        xfs_btree_cur_t *bno_cur;       /* cursor for bno btree */
1351        xfs_btree_cur_t *cnt_cur;       /* cursor for cnt btree */
1352        int             error;          /* error result */
1353        xfs_agblock_t   fbno;           /* start of found freespace */
1354        xfs_extlen_t    flen;           /* length of found freespace */
1355        int             i;              /* temp status variable */
1356        xfs_agblock_t   rbno;           /* returned block number */
1357        xfs_extlen_t    rlen;           /* length of returned extent */
1358        bool            busy;
1359        unsigned        busy_gen;
1360
1361restart:
1362        /*
1363         * Allocate and initialize a cursor for the by-size btree.
1364         */
1365        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1366                args->agno, XFS_BTNUM_CNT);
1367        bno_cur = NULL;
1368        busy = false;
1369
1370        /*
1371         * Look for an entry >= maxlen+alignment-1 blocks.
1372         */
1373        if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1374                        args->maxlen + args->alignment - 1, &i)))
1375                goto error0;
1376
1377        /*
1378         * If none then we have to settle for a smaller extent. In the case that
1379         * there are no large extents, this will return the last entry in the
1380         * tree unless the tree is empty. In the case that there are only busy
1381         * large extents, this will return the largest small extent unless there
1382         * are no smaller extents available.
1383         */
1384        if (!i) {
1385                error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1386                                                   &fbno, &flen, &i);
1387                if (error)
1388                        goto error0;
1389                if (i == 0 || flen == 0) {
1390                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1391                        trace_xfs_alloc_size_noentry(args);
1392                        return 0;
1393                }
1394                ASSERT(i == 1);
1395                busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1396                                &rlen, &busy_gen);
1397        } else {
1398                /*
1399                 * Search for a non-busy extent that is large enough.
1400                 */
1401                for (;;) {
1402                        error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1403                        if (error)
1404                                goto error0;
1405                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1406
1407                        busy = xfs_alloc_compute_aligned(args, fbno, flen,
1408                                        &rbno, &rlen, &busy_gen);
1409
1410                        if (rlen >= args->maxlen)
1411                                break;
1412
1413                        error = xfs_btree_increment(cnt_cur, 0, &i);
1414                        if (error)
1415                                goto error0;
1416                        if (i == 0) {
1417                                /*
1418                                 * Our only valid extents must have been busy.
1419                                 * Make it unbusy by forcing the log out and
1420                                 * retrying.
1421                                 */
1422                                xfs_btree_del_cursor(cnt_cur,
1423                                                     XFS_BTREE_NOERROR);
1424                                trace_xfs_alloc_size_busy(args);
1425                                xfs_extent_busy_flush(args->mp,
1426                                                        args->pag, busy_gen);
1427                                goto restart;
1428                        }
1429                }
1430        }
1431
1432        /*
1433         * In the first case above, we got the last entry in the
1434         * by-size btree.  Now we check to see if the space hits maxlen
1435         * once aligned; if not, we search left for something better.
1436         * This can't happen in the second case above.
1437         */
1438        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1439        XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1440                        (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1441        if (rlen < args->maxlen) {
1442                xfs_agblock_t   bestfbno;
1443                xfs_extlen_t    bestflen;
1444                xfs_agblock_t   bestrbno;
1445                xfs_extlen_t    bestrlen;
1446
1447                bestrlen = rlen;
1448                bestrbno = rbno;
1449                bestflen = flen;
1450                bestfbno = fbno;
1451                for (;;) {
1452                        if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1453                                goto error0;
1454                        if (i == 0)
1455                                break;
1456                        if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1457                                        &i)))
1458                                goto error0;
1459                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1460                        if (flen < bestrlen)
1461                                break;
1462                        busy = xfs_alloc_compute_aligned(args, fbno, flen,
1463                                        &rbno, &rlen, &busy_gen);
1464                        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1465                        XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1466                                (rlen <= flen && rbno + rlen <= fbno + flen),
1467                                error0);
1468                        if (rlen > bestrlen) {
1469                                bestrlen = rlen;
1470                                bestrbno = rbno;
1471                                bestflen = flen;
1472                                bestfbno = fbno;
1473                                if (rlen == args->maxlen)
1474                                        break;
1475                        }
1476                }
1477                if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1478                                &i)))
1479                        goto error0;
1480                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1481                rlen = bestrlen;
1482                rbno = bestrbno;
1483                flen = bestflen;
1484                fbno = bestfbno;
1485        }
1486        args->wasfromfl = 0;
1487        /*
1488         * Fix up the length.
1489         */
1490        args->len = rlen;
1491        if (rlen < args->minlen) {
1492                if (busy) {
1493                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1494                        trace_xfs_alloc_size_busy(args);
1495                        xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
1496                        goto restart;
1497                }
1498                goto out_nominleft;
1499        }
1500        xfs_alloc_fix_len(args);
1501
1502        rlen = args->len;
1503        XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
1504        /*
1505         * Allocate and initialize a cursor for the by-block tree.
1506         */
1507        bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1508                args->agno, XFS_BTNUM_BNO);
1509        if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1510                        rbno, rlen, XFSA_FIXUP_CNT_OK)))
1511                goto error0;
1512        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1513        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1514        cnt_cur = bno_cur = NULL;
1515        args->len = rlen;
1516        args->agbno = rbno;
1517        XFS_WANT_CORRUPTED_GOTO(args->mp,
1518                args->agbno + args->len <=
1519                        be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1520                error0);
1521        trace_xfs_alloc_size_done(args);
1522        return 0;
1523
1524error0:
1525        trace_xfs_alloc_size_error(args);
1526        if (cnt_cur)
1527                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1528        if (bno_cur)
1529                xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1530        return error;
1531
1532out_nominleft:
1533        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1534        trace_xfs_alloc_size_nominleft(args);
1535        args->agbno = NULLAGBLOCK;
1536        return 0;
1537}
1538
1539/*
1540 * Deal with the case where only small freespaces remain.
1541 * Either return the contents of the last freespace record,
1542 * or allocate space from the freelist if there is nothing in the tree.
1543 */
1544STATIC int                      /* error */
1545xfs_alloc_ag_vextent_small(
1546        xfs_alloc_arg_t *args,  /* allocation argument structure */
1547        xfs_btree_cur_t *ccur,  /* by-size cursor */
1548        xfs_agblock_t   *fbnop, /* result block number */
1549        xfs_extlen_t    *flenp, /* result length */
1550        int             *stat)  /* status: 0-freelist, 1-normal/none */
1551{
1552        struct xfs_owner_info   oinfo;
1553        struct xfs_perag        *pag;
1554        int             error;
1555        xfs_agblock_t   fbno;
1556        xfs_extlen_t    flen;
1557        int             i;
1558
1559        if ((error = xfs_btree_decrement(ccur, 0, &i)))
1560                goto error0;
1561        if (i) {
1562                if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
1563                        goto error0;
1564                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1565        }
1566        /*
1567         * Nothing in the btree, try the freelist.  Make sure
1568         * to respect minleft even when pulling from the
1569         * freelist.
1570         */
1571        else if (args->minlen == 1 && args->alignment == 1 &&
1572                 args->resv != XFS_AG_RESV_AGFL &&
1573                 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1574                  > args->minleft)) {
1575                error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1576                if (error)
1577                        goto error0;
1578                if (fbno != NULLAGBLOCK) {
1579                        xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
1580                              xfs_alloc_allow_busy_reuse(args->datatype));
1581
1582                        if (xfs_alloc_is_userdata(args->datatype)) {
1583                                xfs_buf_t       *bp;
1584
1585                                bp = xfs_btree_get_bufs(args->mp, args->tp,
1586                                        args->agno, fbno, 0);
1587                                xfs_trans_binval(args->tp, bp);
1588                        }
1589                        args->len = 1;
1590                        args->agbno = fbno;
1591                        XFS_WANT_CORRUPTED_GOTO(args->mp,
1592                                args->agbno + args->len <=
1593                                be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1594                                error0);
1595                        args->wasfromfl = 1;
1596                        trace_xfs_alloc_small_freelist(args);
1597
1598                        /*
1599                         * If we're feeding an AGFL block to something that
1600                         * doesn't live in the free space, we need to clear
1601                         * out the OWN_AG rmap and add the block back to
1602                         * the AGFL per-AG reservation.
1603                         */
1604                        xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
1605                        error = xfs_rmap_free(args->tp, args->agbp, args->agno,
1606                                        fbno, 1, &oinfo);
1607                        if (error)
1608                                goto error0;
1609                        pag = xfs_perag_get(args->mp, args->agno);
1610                        xfs_ag_resv_free_extent(pag, XFS_AG_RESV_AGFL,
1611                                        args->tp, 1);
1612                        xfs_perag_put(pag);
1613
1614                        *stat = 0;
1615                        return 0;
1616                }
1617                /*
1618                 * Nothing in the freelist.
1619                 */
1620                else
1621                        flen = 0;
1622        }
1623        /*
1624         * Can't allocate from the freelist for some reason.
1625         */
1626        else {
1627                fbno = NULLAGBLOCK;
1628                flen = 0;
1629        }
1630        /*
1631         * Can't do the allocation, give up.
1632         */
1633        if (flen < args->minlen) {
1634                args->agbno = NULLAGBLOCK;
1635                trace_xfs_alloc_small_notenough(args);
1636                flen = 0;
1637        }
1638        *fbnop = fbno;
1639        *flenp = flen;
1640        *stat = 1;
1641        trace_xfs_alloc_small_done(args);
1642        return 0;
1643
1644error0:
1645        trace_xfs_alloc_small_error(args);
1646        return error;
1647}
1648
1649/*
1650 * Free the extent starting at agno/bno for length.
1651 */
1652STATIC int
1653xfs_free_ag_extent(
1654        xfs_trans_t             *tp,
1655        xfs_buf_t               *agbp,
1656        xfs_agnumber_t          agno,
1657        xfs_agblock_t           bno,
1658        xfs_extlen_t            len,
1659        struct xfs_owner_info   *oinfo,
1660        enum xfs_ag_resv_type   type)
1661{
1662        xfs_btree_cur_t *bno_cur;       /* cursor for by-block btree */
1663        xfs_btree_cur_t *cnt_cur;       /* cursor for by-size btree */
1664        int             error;          /* error return value */
1665        xfs_agblock_t   gtbno;          /* start of right neighbor block */
1666        xfs_extlen_t    gtlen;          /* length of right neighbor block */
1667        int             haveleft;       /* have a left neighbor block */
1668        int             haveright;      /* have a right neighbor block */
1669        int             i;              /* temp, result code */
1670        xfs_agblock_t   ltbno;          /* start of left neighbor block */
1671        xfs_extlen_t    ltlen;          /* length of left neighbor block */
1672        xfs_mount_t     *mp;            /* mount point struct for filesystem */
1673        xfs_agblock_t   nbno;           /* new starting block of freespace */
1674        xfs_extlen_t    nlen;           /* new length of freespace */
1675        xfs_perag_t     *pag;           /* per allocation group data */
1676
1677        bno_cur = cnt_cur = NULL;
1678        mp = tp->t_mountp;
1679
1680        if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) {
1681                error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
1682                if (error)
1683                        goto error0;
1684        }
1685
1686        /*
1687         * Allocate and initialize a cursor for the by-block btree.
1688         */
1689        bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1690        /*
1691         * Look for a neighboring block on the left (lower block numbers)
1692         * that is contiguous with this space.
1693         */
1694        if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1695                goto error0;
1696        if (haveleft) {
1697                /*
1698                 * There is a block to our left.
1699                 */
1700                if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1701                        goto error0;
1702                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1703                /*
1704                 * It's not contiguous, though.
1705                 */
1706                if (ltbno + ltlen < bno)
1707                        haveleft = 0;
1708                else {
1709                        /*
1710                         * If this failure happens the request to free this
1711                         * space was invalid, it's (partly) already free.
1712                         * Very bad.
1713                         */
1714                        XFS_WANT_CORRUPTED_GOTO(mp,
1715                                                ltbno + ltlen <= bno, error0);
1716                }
1717        }
1718        /*
1719         * Look for a neighboring block on the right (higher block numbers)
1720         * that is contiguous with this space.
1721         */
1722        if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1723                goto error0;
1724        if (haveright) {
1725                /*
1726                 * There is a block to our right.
1727                 */
1728                if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1729                        goto error0;
1730                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1731                /*
1732                 * It's not contiguous, though.
1733                 */
1734                if (bno + len < gtbno)
1735                        haveright = 0;
1736                else {
1737                        /*
1738                         * If this failure happens the request to free this
1739                         * space was invalid, it's (partly) already free.
1740                         * Very bad.
1741                         */
1742                        XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0);
1743                }
1744        }
1745        /*
1746         * Now allocate and initialize a cursor for the by-size tree.
1747         */
1748        cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1749        /*
1750         * Have both left and right contiguous neighbors.
1751         * Merge all three into a single free block.
1752         */
1753        if (haveleft && haveright) {
1754                /*
1755                 * Delete the old by-size entry on the left.
1756                 */
1757                if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1758                        goto error0;
1759                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1760                if ((error = xfs_btree_delete(cnt_cur, &i)))
1761                        goto error0;
1762                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1763                /*
1764                 * Delete the old by-size entry on the right.
1765                 */
1766                if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1767                        goto error0;
1768                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1769                if ((error = xfs_btree_delete(cnt_cur, &i)))
1770                        goto error0;
1771                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1772                /*
1773                 * Delete the old by-block entry for the right block.
1774                 */
1775                if ((error = xfs_btree_delete(bno_cur, &i)))
1776                        goto error0;
1777                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1778                /*
1779                 * Move the by-block cursor back to the left neighbor.
1780                 */
1781                if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1782                        goto error0;
1783                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1784#ifdef DEBUG
1785                /*
1786                 * Check that this is the right record: delete didn't
1787                 * mangle the cursor.
1788                 */
1789                {
1790                        xfs_agblock_t   xxbno;
1791                        xfs_extlen_t    xxlen;
1792
1793                        if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1794                                        &i)))
1795                                goto error0;
1796                        XFS_WANT_CORRUPTED_GOTO(mp,
1797                                i == 1 && xxbno == ltbno && xxlen == ltlen,
1798                                error0);
1799                }
1800#endif
1801                /*
1802                 * Update remaining by-block entry to the new, joined block.
1803                 */
1804                nbno = ltbno;
1805                nlen = len + ltlen + gtlen;
1806                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1807                        goto error0;
1808        }
1809        /*
1810         * Have only a left contiguous neighbor.
1811         * Merge it together with the new freespace.
1812         */
1813        else if (haveleft) {
1814                /*
1815                 * Delete the old by-size entry on the left.
1816                 */
1817                if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1818                        goto error0;
1819                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1820                if ((error = xfs_btree_delete(cnt_cur, &i)))
1821                        goto error0;
1822                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1823                /*
1824                 * Back up the by-block cursor to the left neighbor, and
1825                 * update its length.
1826                 */
1827                if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1828                        goto error0;
1829                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1830                nbno = ltbno;
1831                nlen = len + ltlen;
1832                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1833                        goto error0;
1834        }
1835        /*
1836         * Have only a right contiguous neighbor.
1837         * Merge it together with the new freespace.
1838         */
1839        else if (haveright) {
1840                /*
1841                 * Delete the old by-size entry on the right.
1842                 */
1843                if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1844                        goto error0;
1845                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1846                if ((error = xfs_btree_delete(cnt_cur, &i)))
1847                        goto error0;
1848                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1849                /*
1850                 * Update the starting block and length of the right
1851                 * neighbor in the by-block tree.
1852                 */
1853                nbno = bno;
1854                nlen = len + gtlen;
1855                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1856                        goto error0;
1857        }
1858        /*
1859         * No contiguous neighbors.
1860         * Insert the new freespace into the by-block tree.
1861         */
1862        else {
1863                nbno = bno;
1864                nlen = len;
1865                if ((error = xfs_btree_insert(bno_cur, &i)))
1866                        goto error0;
1867                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1868        }
1869        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1870        bno_cur = NULL;
1871        /*
1872         * In all cases we need to insert the new freespace in the by-size tree.
1873         */
1874        if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1875                goto error0;
1876        XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0);
1877        if ((error = xfs_btree_insert(cnt_cur, &i)))
1878                goto error0;
1879        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1880        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1881        cnt_cur = NULL;
1882
1883        /*
1884         * Update the freespace totals in the ag and superblock.
1885         */
1886        pag = xfs_perag_get(mp, agno);
1887        error = xfs_alloc_update_counters(tp, pag, agbp, len);
1888        xfs_ag_resv_free_extent(pag, type, tp, len);
1889        xfs_perag_put(pag);
1890        if (error)
1891                goto error0;
1892
1893        XFS_STATS_INC(mp, xs_freex);
1894        XFS_STATS_ADD(mp, xs_freeb, len);
1895
1896        trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
1897                        haveleft, haveright);
1898
1899        return 0;
1900
1901 error0:
1902        trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
1903                        -1, -1);
1904        if (bno_cur)
1905                xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1906        if (cnt_cur)
1907                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1908        return error;
1909}
1910
1911/*
1912 * Visible (exported) allocation/free functions.
1913 * Some of these are used just by xfs_alloc_btree.c and this file.
1914 */
1915
1916/*
1917 * Compute and fill in value of m_ag_maxlevels.
1918 */
1919void
1920xfs_alloc_compute_maxlevels(
1921        xfs_mount_t     *mp)    /* file system mount structure */
1922{
1923        mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp, mp->m_alloc_mnr,
1924                        (mp->m_sb.sb_agblocks + 1) / 2);
1925}
1926
1927/*
1928 * Find the length of the longest extent in an AG.  The 'need' parameter
1929 * specifies how much space we're going to need for the AGFL and the
1930 * 'reserved' parameter tells us how many blocks in this AG are reserved for
1931 * other callers.
1932 */
1933xfs_extlen_t
1934xfs_alloc_longest_free_extent(
1935        struct xfs_mount        *mp,
1936        struct xfs_perag        *pag,
1937        xfs_extlen_t            need,
1938        xfs_extlen_t            reserved)
1939{
1940        xfs_extlen_t            delta = 0;
1941
1942        /*
1943         * If the AGFL needs a recharge, we'll have to subtract that from the
1944         * longest extent.
1945         */
1946        if (need > pag->pagf_flcount)
1947                delta = need - pag->pagf_flcount;
1948
1949        /*
1950         * If we cannot maintain others' reservations with space from the
1951         * not-longest freesp extents, we'll have to subtract /that/ from
1952         * the longest extent too.
1953         */
1954        if (pag->pagf_freeblks - pag->pagf_longest < reserved)
1955                delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
1956
1957        /*
1958         * If the longest extent is long enough to satisfy all the
1959         * reservations and AGFL rules in place, we can return this extent.
1960         */
1961        if (pag->pagf_longest > delta)
1962                return pag->pagf_longest - delta;
1963
1964        /* Otherwise, let the caller try for 1 block if there's space. */
1965        return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1966}
1967
1968unsigned int
1969xfs_alloc_min_freelist(
1970        struct xfs_mount        *mp,
1971        struct xfs_perag        *pag)
1972{
1973        unsigned int            min_free;
1974
1975        /* space needed by-bno freespace btree */
1976        min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
1977                                       mp->m_ag_maxlevels);
1978        /* space needed by-size freespace btree */
1979        min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
1980                                       mp->m_ag_maxlevels);
1981        /* space needed reverse mapping used space btree */
1982        if (xfs_sb_version_hasrmapbt(&mp->m_sb))
1983                min_free += min_t(unsigned int,
1984                                  pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
1985                                  mp->m_rmap_maxlevels);
1986
1987        return min_free;
1988}
1989
1990/*
1991 * Check if the operation we are fixing up the freelist for should go ahead or
1992 * not. If we are freeing blocks, we always allow it, otherwise the allocation
1993 * is dependent on whether the size and shape of free space available will
1994 * permit the requested allocation to take place.
1995 */
1996static bool
1997xfs_alloc_space_available(
1998        struct xfs_alloc_arg    *args,
1999        xfs_extlen_t            min_free,
2000        int                     flags)
2001{
2002        struct xfs_perag        *pag = args->pag;
2003        xfs_extlen_t            alloc_len, longest;
2004        xfs_extlen_t            reservation; /* blocks that are still reserved */
2005        int                     available;
2006
2007        if (flags & XFS_ALLOC_FLAG_FREEING)
2008                return true;
2009
2010        reservation = xfs_ag_resv_needed(pag, args->resv);
2011
2012        /* do we have enough contiguous free space for the allocation? */
2013        alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
2014        longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free,
2015                        reservation);
2016        if (longest < alloc_len)
2017                return false;
2018
2019        /* do we have enough free space remaining for the allocation? */
2020        available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
2021                          reservation - min_free - args->minleft);
2022        if (available < (int)max(args->total, alloc_len))
2023                return false;
2024
2025        /*
2026         * Clamp maxlen to the amount of free space available for the actual
2027         * extent allocation.
2028         */
2029        if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
2030                args->maxlen = available;
2031                ASSERT(args->maxlen > 0);
2032                ASSERT(args->maxlen >= args->minlen);
2033        }
2034
2035        return true;
2036}
2037
2038/*
2039 * Decide whether to use this allocation group for this allocation.
2040 * If so, fix up the btree freelist's size.
2041 */
2042int                     /* error */
2043xfs_alloc_fix_freelist(
2044        struct xfs_alloc_arg    *args,  /* allocation argument structure */
2045        int                     flags)  /* XFS_ALLOC_FLAG_... */
2046{
2047        struct xfs_mount        *mp = args->mp;
2048        struct xfs_perag        *pag = args->pag;
2049        struct xfs_trans        *tp = args->tp;
2050        struct xfs_buf          *agbp = NULL;
2051        struct xfs_buf          *agflbp = NULL;
2052        struct xfs_alloc_arg    targs;  /* local allocation arguments */
2053        xfs_agblock_t           bno;    /* freelist block */
2054        xfs_extlen_t            need;   /* total blocks needed in freelist */
2055        int                     error = 0;
2056
2057        if (!pag->pagf_init) {
2058                error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2059                if (error)
2060                        goto out_no_agbp;
2061                if (!pag->pagf_init) {
2062                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2063                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2064                        goto out_agbp_relse;
2065                }
2066        }
2067
2068        /*
2069         * If this is a metadata preferred pag and we are user data then try
2070         * somewhere else if we are not being asked to try harder at this
2071         * point
2072         */
2073        if (pag->pagf_metadata && xfs_alloc_is_userdata(args->datatype) &&
2074            (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2075                ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2076                goto out_agbp_relse;
2077        }
2078
2079        need = xfs_alloc_min_freelist(mp, pag);
2080        if (!xfs_alloc_space_available(args, need, flags |
2081                        XFS_ALLOC_FLAG_CHECK))
2082                goto out_agbp_relse;
2083
2084        /*
2085         * Get the a.g. freespace buffer.
2086         * Can fail if we're not blocking on locks, and it's held.
2087         */
2088        if (!agbp) {
2089                error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2090                if (error)
2091                        goto out_no_agbp;
2092                if (!agbp) {
2093                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2094                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2095                        goto out_no_agbp;
2096                }
2097        }
2098
2099        /* If there isn't enough total space or single-extent, reject it. */
2100        need = xfs_alloc_min_freelist(mp, pag);
2101        if (!xfs_alloc_space_available(args, need, flags))
2102                goto out_agbp_relse;
2103
2104        /*
2105         * Make the freelist shorter if it's too long.
2106         *
2107         * Note that from this point onwards, we will always release the agf and
2108         * agfl buffers on error. This handles the case where we error out and
2109         * the buffers are clean or may not have been joined to the transaction
2110         * and hence need to be released manually. If they have been joined to
2111         * the transaction, then xfs_trans_brelse() will handle them
2112         * appropriately based on the recursion count and dirty state of the
2113         * buffer.
2114         *
2115         * XXX (dgc): When we have lots of free space, does this buy us
2116         * anything other than extra overhead when we need to put more blocks
2117         * back on the free list? Maybe we should only do this when space is
2118         * getting low or the AGFL is more than half full?
2119         *
2120         * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2121         * big; the NORMAP flag prevents AGFL expand/shrink operations from
2122         * updating the rmapbt.  Both flags are used in xfs_repair while we're
2123         * rebuilding the rmapbt, and neither are used by the kernel.  They're
2124         * both required to ensure that rmaps are correctly recorded for the
2125         * regenerated AGFL, bnobt, and cntbt.  See repair/phase5.c and
2126         * repair/rmap.c in xfsprogs for details.
2127         */
2128        memset(&targs, 0, sizeof(targs));
2129        if (flags & XFS_ALLOC_FLAG_NORMAP)
2130                xfs_rmap_skip_owner_update(&targs.oinfo);
2131        else
2132                xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG);
2133        while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
2134                struct xfs_buf  *bp;
2135
2136                error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2137                if (error)
2138                        goto out_agbp_relse;
2139                error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1,
2140                                           &targs.oinfo, XFS_AG_RESV_AGFL);
2141                if (error)
2142                        goto out_agbp_relse;
2143                bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
2144                xfs_trans_binval(tp, bp);
2145        }
2146
2147        targs.tp = tp;
2148        targs.mp = mp;
2149        targs.agbp = agbp;
2150        targs.agno = args->agno;
2151        targs.alignment = targs.minlen = targs.prod = 1;
2152        targs.type = XFS_ALLOCTYPE_THIS_AG;
2153        targs.pag = pag;
2154        error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
2155        if (error)
2156                goto out_agbp_relse;
2157
2158        /* Make the freelist longer if it's too short. */
2159        while (pag->pagf_flcount < need) {
2160                targs.agbno = 0;
2161                targs.maxlen = need - pag->pagf_flcount;
2162                targs.resv = XFS_AG_RESV_AGFL;
2163
2164                /* Allocate as many blocks as possible at once. */
2165                error = xfs_alloc_ag_vextent(&targs);
2166                if (error)
2167                        goto out_agflbp_relse;
2168
2169                /*
2170                 * Stop if we run out.  Won't happen if callers are obeying
2171                 * the restrictions correctly.  Can happen for free calls
2172                 * on a completely full ag.
2173                 */
2174                if (targs.agbno == NULLAGBLOCK) {
2175                        if (flags & XFS_ALLOC_FLAG_FREEING)
2176                                break;
2177                        goto out_agflbp_relse;
2178                }
2179                /*
2180                 * Put each allocated block on the list.
2181                 */
2182                for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
2183                        error = xfs_alloc_put_freelist(tp, agbp,
2184                                                        agflbp, bno, 0);
2185                        if (error)
2186                                goto out_agflbp_relse;
2187                }
2188        }
2189        xfs_trans_brelse(tp, agflbp);
2190        args->agbp = agbp;
2191        return 0;
2192
2193out_agflbp_relse:
2194        xfs_trans_brelse(tp, agflbp);
2195out_agbp_relse:
2196        if (agbp)
2197                xfs_trans_brelse(tp, agbp);
2198out_no_agbp:
2199        args->agbp = NULL;
2200        return error;
2201}
2202
2203/*
2204 * Get a block from the freelist.
2205 * Returns with the buffer for the block gotten.
2206 */
2207int                             /* error */
2208xfs_alloc_get_freelist(
2209        xfs_trans_t     *tp,    /* transaction pointer */
2210        xfs_buf_t       *agbp,  /* buffer containing the agf structure */
2211        xfs_agblock_t   *bnop,  /* block address retrieved from freelist */
2212        int             btreeblk) /* destination is a AGF btree */
2213{
2214        xfs_agf_t       *agf;   /* a.g. freespace structure */
2215        xfs_buf_t       *agflbp;/* buffer for a.g. freelist structure */
2216        xfs_agblock_t   bno;    /* block number returned */
2217        __be32          *agfl_bno;
2218        int             error;
2219        int             logflags;
2220        xfs_mount_t     *mp = tp->t_mountp;
2221        xfs_perag_t     *pag;   /* per allocation group data */
2222
2223        /*
2224         * Freelist is empty, give up.
2225         */
2226        agf = XFS_BUF_TO_AGF(agbp);
2227        if (!agf->agf_flcount) {
2228                *bnop = NULLAGBLOCK;
2229                return 0;
2230        }
2231        /*
2232         * Read the array of free blocks.
2233         */
2234        error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2235                                    &agflbp);
2236        if (error)
2237                return error;
2238
2239
2240        /*
2241         * Get the block number and update the data structures.
2242         */
2243        agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2244        bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2245        be32_add_cpu(&agf->agf_flfirst, 1);
2246        xfs_trans_brelse(tp, agflbp);
2247        if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
2248                agf->agf_flfirst = 0;
2249
2250        pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2251        be32_add_cpu(&agf->agf_flcount, -1);
2252        xfs_trans_agflist_delta(tp, -1);
2253        pag->pagf_flcount--;
2254        xfs_perag_put(pag);
2255
2256        logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2257        if (btreeblk) {
2258                be32_add_cpu(&agf->agf_btreeblks, 1);
2259                pag->pagf_btreeblks++;
2260                logflags |= XFS_AGF_BTREEBLKS;
2261        }
2262
2263        xfs_alloc_log_agf(tp, agbp, logflags);
2264        *bnop = bno;
2265
2266        return 0;
2267}
2268
2269/*
2270 * Log the given fields from the agf structure.
2271 */
2272void
2273xfs_alloc_log_agf(
2274        xfs_trans_t     *tp,    /* transaction pointer */
2275        xfs_buf_t       *bp,    /* buffer for a.g. freelist header */
2276        int             fields) /* mask of fields to be logged (XFS_AGF_...) */
2277{
2278        int     first;          /* first byte offset */
2279        int     last;           /* last byte offset */
2280        static const short      offsets[] = {
2281                offsetof(xfs_agf_t, agf_magicnum),
2282                offsetof(xfs_agf_t, agf_versionnum),
2283                offsetof(xfs_agf_t, agf_seqno),
2284                offsetof(xfs_agf_t, agf_length),
2285                offsetof(xfs_agf_t, agf_roots[0]),
2286                offsetof(xfs_agf_t, agf_levels[0]),
2287                offsetof(xfs_agf_t, agf_flfirst),
2288                offsetof(xfs_agf_t, agf_fllast),
2289                offsetof(xfs_agf_t, agf_flcount),
2290                offsetof(xfs_agf_t, agf_freeblks),
2291                offsetof(xfs_agf_t, agf_longest),
2292                offsetof(xfs_agf_t, agf_btreeblks),
2293                offsetof(xfs_agf_t, agf_uuid),
2294                offsetof(xfs_agf_t, agf_rmap_blocks),
2295                offsetof(xfs_agf_t, agf_refcount_blocks),
2296                offsetof(xfs_agf_t, agf_refcount_root),
2297                offsetof(xfs_agf_t, agf_refcount_level),
2298                /* needed so that we don't log the whole rest of the structure: */
2299                offsetof(xfs_agf_t, agf_spare64),
2300                sizeof(xfs_agf_t)
2301        };
2302
2303        trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2304
2305        xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
2306
2307        xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2308        xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2309}
2310
2311/*
2312 * Interface for inode allocation to force the pag data to be initialized.
2313 */
2314int                                     /* error */
2315xfs_alloc_pagf_init(
2316        xfs_mount_t             *mp,    /* file system mount structure */
2317        xfs_trans_t             *tp,    /* transaction pointer */
2318        xfs_agnumber_t          agno,   /* allocation group number */
2319        int                     flags)  /* XFS_ALLOC_FLAGS_... */
2320{
2321        xfs_buf_t               *bp;
2322        int                     error;
2323
2324        if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2325                return error;
2326        if (bp)
2327                xfs_trans_brelse(tp, bp);
2328        return 0;
2329}
2330
2331/*
2332 * Put the block on the freelist for the allocation group.
2333 */
2334int                                     /* error */
2335xfs_alloc_put_freelist(
2336        xfs_trans_t             *tp,    /* transaction pointer */
2337        xfs_buf_t               *agbp,  /* buffer for a.g. freelist header */
2338        xfs_buf_t               *agflbp,/* buffer for a.g. free block array */
2339        xfs_agblock_t           bno,    /* block being freed */
2340        int                     btreeblk) /* block came from a AGF btree */
2341{
2342        xfs_agf_t               *agf;   /* a.g. freespace structure */
2343        __be32                  *blockp;/* pointer to array entry */
2344        int                     error;
2345        int                     logflags;
2346        xfs_mount_t             *mp;    /* mount structure */
2347        xfs_perag_t             *pag;   /* per allocation group data */
2348        __be32                  *agfl_bno;
2349        int                     startoff;
2350
2351        agf = XFS_BUF_TO_AGF(agbp);
2352        mp = tp->t_mountp;
2353
2354        if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
2355                        be32_to_cpu(agf->agf_seqno), &agflbp)))
2356                return error;
2357        be32_add_cpu(&agf->agf_fllast, 1);
2358        if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
2359                agf->agf_fllast = 0;
2360
2361        pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2362        be32_add_cpu(&agf->agf_flcount, 1);
2363        xfs_trans_agflist_delta(tp, 1);
2364        pag->pagf_flcount++;
2365
2366        logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2367        if (btreeblk) {
2368                be32_add_cpu(&agf->agf_btreeblks, -1);
2369                pag->pagf_btreeblks--;
2370                logflags |= XFS_AGF_BTREEBLKS;
2371        }
2372        xfs_perag_put(pag);
2373
2374        xfs_alloc_log_agf(tp, agbp, logflags);
2375
2376        ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2377
2378        agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2379        blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
2380        *blockp = cpu_to_be32(bno);
2381        startoff = (char *)blockp - (char *)agflbp->b_addr;
2382
2383        xfs_alloc_log_agf(tp, agbp, logflags);
2384
2385        xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
2386        xfs_trans_log_buf(tp, agflbp, startoff,
2387                          startoff + sizeof(xfs_agblock_t) - 1);
2388        return 0;
2389}
2390
2391static bool
2392xfs_agf_verify(
2393        struct xfs_mount *mp,
2394        struct xfs_buf  *bp)
2395 {
2396        struct xfs_agf  *agf = XFS_BUF_TO_AGF(bp);
2397
2398        if (xfs_sb_version_hascrc(&mp->m_sb)) {
2399                if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
2400                        return false;
2401                if (!xfs_log_check_lsn(mp,
2402                                be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
2403                        return false;
2404        }
2405
2406        if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
2407              XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2408              be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2409              be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2410              be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2411              be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
2412                return false;
2413
2414        if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
2415            be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
2416            be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
2417            be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
2418                return false;
2419
2420        if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
2421            (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
2422             be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
2423                return false;
2424
2425        /*
2426         * during growfs operations, the perag is not fully initialised,
2427         * so we can't use it for any useful checking. growfs ensures we can't
2428         * use it by using uncached buffers that don't have the perag attached
2429         * so we can detect and avoid this problem.
2430         */
2431        if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
2432                return false;
2433
2434        if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2435            be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
2436                return false;
2437
2438        if (xfs_sb_version_hasreflink(&mp->m_sb) &&
2439            (be32_to_cpu(agf->agf_refcount_level) < 1 ||
2440             be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
2441                return false;
2442
2443        return true;;
2444
2445}
2446
2447static void
2448xfs_agf_read_verify(
2449        struct xfs_buf  *bp)
2450{
2451        struct xfs_mount *mp = bp->b_target->bt_mount;
2452
2453        if (xfs_sb_version_hascrc(&mp->m_sb) &&
2454            !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
2455                xfs_buf_ioerror(bp, -EFSBADCRC);
2456        else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
2457                                XFS_ERRTAG_ALLOC_READ_AGF))
2458                xfs_buf_ioerror(bp, -EFSCORRUPTED);
2459
2460        if (bp->b_error)
2461                xfs_verifier_error(bp);
2462}
2463
2464static void
2465xfs_agf_write_verify(
2466        struct xfs_buf  *bp)
2467{
2468        struct xfs_mount *mp = bp->b_target->bt_mount;
2469        struct xfs_buf_log_item *bip = bp->b_fspriv;
2470
2471        if (!xfs_agf_verify(mp, bp)) {
2472                xfs_buf_ioerror(bp, -EFSCORRUPTED);
2473                xfs_verifier_error(bp);
2474                return;
2475        }
2476
2477        if (!xfs_sb_version_hascrc(&mp->m_sb))
2478                return;
2479
2480        if (bip)
2481                XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2482
2483        xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
2484}
2485
2486const struct xfs_buf_ops xfs_agf_buf_ops = {
2487        .name = "xfs_agf",
2488        .verify_read = xfs_agf_read_verify,
2489        .verify_write = xfs_agf_write_verify,
2490};
2491
2492/*
2493 * Read in the allocation group header (free/alloc section).
2494 */
2495int                                     /* error */
2496xfs_read_agf(
2497        struct xfs_mount        *mp,    /* mount point structure */
2498        struct xfs_trans        *tp,    /* transaction pointer */
2499        xfs_agnumber_t          agno,   /* allocation group number */
2500        int                     flags,  /* XFS_BUF_ */
2501        struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
2502{
2503        int             error;
2504
2505        trace_xfs_read_agf(mp, agno);
2506
2507        ASSERT(agno != NULLAGNUMBER);
2508        error = xfs_trans_read_buf(
2509                        mp, tp, mp->m_ddev_targp,
2510                        XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2511                        XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
2512        if (error)
2513                return error;
2514        if (!*bpp)
2515                return 0;
2516
2517        ASSERT(!(*bpp)->b_error);
2518        xfs_buf_set_ref(*bpp, XFS_AGF_REF);
2519        return 0;
2520}
2521
2522/*
2523 * Read in the allocation group header (free/alloc section).
2524 */
2525int                                     /* error */
2526xfs_alloc_read_agf(
2527        struct xfs_mount        *mp,    /* mount point structure */
2528        struct xfs_trans        *tp,    /* transaction pointer */
2529        xfs_agnumber_t          agno,   /* allocation group number */
2530        int                     flags,  /* XFS_ALLOC_FLAG_... */
2531        struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
2532{
2533        struct xfs_agf          *agf;           /* ag freelist header */
2534        struct xfs_perag        *pag;           /* per allocation group data */
2535        int                     error;
2536
2537        trace_xfs_alloc_read_agf(mp, agno);
2538
2539        ASSERT(agno != NULLAGNUMBER);
2540        error = xfs_read_agf(mp, tp, agno,
2541                        (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2542                        bpp);
2543        if (error)
2544                return error;
2545        if (!*bpp)
2546                return 0;
2547        ASSERT(!(*bpp)->b_error);
2548
2549        agf = XFS_BUF_TO_AGF(*bpp);
2550        pag = xfs_perag_get(mp, agno);
2551        if (!pag->pagf_init) {
2552                pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
2553                pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
2554                pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2555                pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2556                pag->pagf_levels[XFS_BTNUM_BNOi] =
2557                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2558                pag->pagf_levels[XFS_BTNUM_CNTi] =
2559                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
2560                pag->pagf_levels[XFS_BTNUM_RMAPi] =
2561                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
2562                pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
2563                spin_lock_init(&pag->pagb_lock);
2564                pag->pagb_count = 0;
2565                pag->pagb_tree = RB_ROOT;
2566                pag->pagf_init = 1;
2567        }
2568#ifdef DEBUG
2569        else if (!XFS_FORCED_SHUTDOWN(mp)) {
2570                ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
2571                ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
2572                ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2573                ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2574                ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
2575                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2576                ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
2577                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2578        }
2579#endif
2580        xfs_perag_put(pag);
2581        return 0;
2582}
2583
2584/*
2585 * Allocate an extent (variable-size).
2586 * Depending on the allocation type, we either look in a single allocation
2587 * group or loop over the allocation groups to find the result.
2588 */
2589int                             /* error */
2590xfs_alloc_vextent(
2591        xfs_alloc_arg_t *args)  /* allocation argument structure */
2592{
2593        xfs_agblock_t   agsize; /* allocation group size */
2594        int             error;
2595        int             flags;  /* XFS_ALLOC_FLAG_... locking flags */
2596        xfs_mount_t     *mp;    /* mount structure pointer */
2597        xfs_agnumber_t  sagno;  /* starting allocation group number */
2598        xfs_alloctype_t type;   /* input allocation type */
2599        int             bump_rotor = 0;
2600        xfs_agnumber_t  rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2601
2602        mp = args->mp;
2603        type = args->otype = args->type;
2604        args->agbno = NULLAGBLOCK;
2605        /*
2606         * Just fix this up, for the case where the last a.g. is shorter
2607         * (or there's only one a.g.) and the caller couldn't easily figure
2608         * that out (xfs_bmap_alloc).
2609         */
2610        agsize = mp->m_sb.sb_agblocks;
2611        if (args->maxlen > agsize)
2612                args->maxlen = agsize;
2613        if (args->alignment == 0)
2614                args->alignment = 1;
2615        ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2616        ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2617        ASSERT(args->minlen <= args->maxlen);
2618        ASSERT(args->minlen <= agsize);
2619        ASSERT(args->mod < args->prod);
2620        if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2621            XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2622            args->minlen > args->maxlen || args->minlen > agsize ||
2623            args->mod >= args->prod) {
2624                args->fsbno = NULLFSBLOCK;
2625                trace_xfs_alloc_vextent_badargs(args);
2626                return 0;
2627        }
2628
2629        switch (type) {
2630        case XFS_ALLOCTYPE_THIS_AG:
2631        case XFS_ALLOCTYPE_NEAR_BNO:
2632        case XFS_ALLOCTYPE_THIS_BNO:
2633                /*
2634                 * These three force us into a single a.g.
2635                 */
2636                args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2637                args->pag = xfs_perag_get(mp, args->agno);
2638                error = xfs_alloc_fix_freelist(args, 0);
2639                if (error) {
2640                        trace_xfs_alloc_vextent_nofix(args);
2641                        goto error0;
2642                }
2643                if (!args->agbp) {
2644                        trace_xfs_alloc_vextent_noagbp(args);
2645                        break;
2646                }
2647                args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2648                if ((error = xfs_alloc_ag_vextent(args)))
2649                        goto error0;
2650                break;
2651        case XFS_ALLOCTYPE_START_BNO:
2652                /*
2653                 * Try near allocation first, then anywhere-in-ag after
2654                 * the first a.g. fails.
2655                 */
2656                if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
2657                    (mp->m_flags & XFS_MOUNT_32BITINODES)) {
2658                        args->fsbno = XFS_AGB_TO_FSB(mp,
2659                                        ((mp->m_agfrotor / rotorstep) %
2660                                        mp->m_sb.sb_agcount), 0);
2661                        bump_rotor = 1;
2662                }
2663                args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2664                args->type = XFS_ALLOCTYPE_NEAR_BNO;
2665                /* FALLTHROUGH */
2666        case XFS_ALLOCTYPE_FIRST_AG:
2667                /*
2668                 * Rotate through the allocation groups looking for a winner.
2669                 */
2670                if (type == XFS_ALLOCTYPE_FIRST_AG) {
2671                        /*
2672                         * Start with allocation group given by bno.
2673                         */
2674                        args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2675                        args->type = XFS_ALLOCTYPE_THIS_AG;
2676                        sagno = 0;
2677                        flags = 0;
2678                } else {
2679                        /*
2680                         * Start with the given allocation group.
2681                         */
2682                        args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2683                        flags = XFS_ALLOC_FLAG_TRYLOCK;
2684                }
2685                /*
2686                 * Loop over allocation groups twice; first time with
2687                 * trylock set, second time without.
2688                 */
2689                for (;;) {
2690                        args->pag = xfs_perag_get(mp, args->agno);
2691                        error = xfs_alloc_fix_freelist(args, flags);
2692                        if (error) {
2693                                trace_xfs_alloc_vextent_nofix(args);
2694                                goto error0;
2695                        }
2696                        /*
2697                         * If we get a buffer back then the allocation will fly.
2698                         */
2699                        if (args->agbp) {
2700                                if ((error = xfs_alloc_ag_vextent(args)))
2701                                        goto error0;
2702                                break;
2703                        }
2704
2705                        trace_xfs_alloc_vextent_loopfailed(args);
2706
2707                        /*
2708                         * Didn't work, figure out the next iteration.
2709                         */
2710                        if (args->agno == sagno &&
2711                            type == XFS_ALLOCTYPE_START_BNO)
2712                                args->type = XFS_ALLOCTYPE_THIS_AG;
2713                        /*
2714                        * For the first allocation, we can try any AG to get
2715                        * space.  However, if we already have allocated a
2716                        * block, we don't want to try AGs whose number is below
2717                        * sagno. Otherwise, we may end up with out-of-order
2718                        * locking of AGF, which might cause deadlock.
2719                        */
2720                        if (++(args->agno) == mp->m_sb.sb_agcount) {
2721                                if (args->firstblock != NULLFSBLOCK)
2722                                        args->agno = sagno;
2723                                else
2724                                        args->agno = 0;
2725                        }
2726                        /*
2727                         * Reached the starting a.g., must either be done
2728                         * or switch to non-trylock mode.
2729                         */
2730                        if (args->agno == sagno) {
2731                                if (flags == 0) {
2732                                        args->agbno = NULLAGBLOCK;
2733                                        trace_xfs_alloc_vextent_allfailed(args);
2734                                        break;
2735                                }
2736
2737                                flags = 0;
2738                                if (type == XFS_ALLOCTYPE_START_BNO) {
2739                                        args->agbno = XFS_FSB_TO_AGBNO(mp,
2740                                                args->fsbno);
2741                                        args->type = XFS_ALLOCTYPE_NEAR_BNO;
2742                                }
2743                        }
2744                        xfs_perag_put(args->pag);
2745                }
2746                if (bump_rotor) {
2747                        if (args->agno == sagno)
2748                                mp->m_agfrotor = (mp->m_agfrotor + 1) %
2749                                        (mp->m_sb.sb_agcount * rotorstep);
2750                        else
2751                                mp->m_agfrotor = (args->agno * rotorstep + 1) %
2752                                        (mp->m_sb.sb_agcount * rotorstep);
2753                }
2754                break;
2755        default:
2756                ASSERT(0);
2757                /* NOTREACHED */
2758        }
2759        if (args->agbno == NULLAGBLOCK)
2760                args->fsbno = NULLFSBLOCK;
2761        else {
2762                args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2763#ifdef DEBUG
2764                ASSERT(args->len >= args->minlen);
2765                ASSERT(args->len <= args->maxlen);
2766                ASSERT(args->agbno % args->alignment == 0);
2767                XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2768                        args->len);
2769#endif
2770
2771                /* Zero the extent if we were asked to do so */
2772                if (args->datatype & XFS_ALLOC_USERDATA_ZERO) {
2773                        error = xfs_zero_extent(args->ip, args->fsbno, args->len);
2774                        if (error)
2775                                goto error0;
2776                }
2777
2778        }
2779        xfs_perag_put(args->pag);
2780        return 0;
2781error0:
2782        xfs_perag_put(args->pag);
2783        return error;
2784}
2785
2786/* Ensure that the freelist is at full capacity. */
2787int
2788xfs_free_extent_fix_freelist(
2789        struct xfs_trans        *tp,
2790        xfs_agnumber_t          agno,
2791        struct xfs_buf          **agbp)
2792{
2793        struct xfs_alloc_arg    args;
2794        int                     error;
2795
2796        memset(&args, 0, sizeof(struct xfs_alloc_arg));
2797        args.tp = tp;
2798        args.mp = tp->t_mountp;
2799        args.agno = agno;
2800
2801        /*
2802         * validate that the block number is legal - the enables us to detect
2803         * and handle a silent filesystem corruption rather than crashing.
2804         */
2805        if (args.agno >= args.mp->m_sb.sb_agcount)
2806                return -EFSCORRUPTED;
2807
2808        args.pag = xfs_perag_get(args.mp, args.agno);
2809        ASSERT(args.pag);
2810
2811        error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
2812        if (error)
2813                goto out;
2814
2815        *agbp = args.agbp;
2816out:
2817        xfs_perag_put(args.pag);
2818        return error;
2819}
2820
2821/*
2822 * Free an extent.
2823 * Just break up the extent address and hand off to xfs_free_ag_extent
2824 * after fixing up the freelist.
2825 */
2826int                             /* error */
2827xfs_free_extent(
2828        struct xfs_trans        *tp,    /* transaction pointer */
2829        xfs_fsblock_t           bno,    /* starting block number of extent */
2830        xfs_extlen_t            len,    /* length of extent */
2831        struct xfs_owner_info   *oinfo, /* extent owner */
2832        enum xfs_ag_resv_type   type)   /* block reservation type */
2833{
2834        struct xfs_mount        *mp = tp->t_mountp;
2835        struct xfs_buf          *agbp;
2836        xfs_agnumber_t          agno = XFS_FSB_TO_AGNO(mp, bno);
2837        xfs_agblock_t           agbno = XFS_FSB_TO_AGBNO(mp, bno);
2838        int                     error;
2839
2840        ASSERT(len != 0);
2841        ASSERT(type != XFS_AG_RESV_AGFL);
2842
2843        if (XFS_TEST_ERROR(false, mp,
2844                        XFS_ERRTAG_FREE_EXTENT))
2845                return -EIO;
2846
2847        error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
2848        if (error)
2849                return error;
2850
2851        XFS_WANT_CORRUPTED_GOTO(mp, agbno < mp->m_sb.sb_agblocks, err);
2852
2853        /* validate the extent size is legal now we have the agf locked */
2854        XFS_WANT_CORRUPTED_GOTO(mp,
2855                agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length),
2856                                err);
2857
2858        error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
2859        if (error)
2860                goto err;
2861
2862        xfs_extent_busy_insert(tp, agno, agbno, len, 0);
2863        return 0;
2864
2865err:
2866        xfs_trans_brelse(tp, agbp);
2867        return error;
2868}
2869
2870struct xfs_alloc_query_range_info {
2871        xfs_alloc_query_range_fn        fn;
2872        void                            *priv;
2873};
2874
2875/* Format btree record and pass to our callback. */
2876STATIC int
2877xfs_alloc_query_range_helper(
2878        struct xfs_btree_cur            *cur,
2879        union xfs_btree_rec             *rec,
2880        void                            *priv)
2881{
2882        struct xfs_alloc_query_range_info       *query = priv;
2883        struct xfs_alloc_rec_incore             irec;
2884
2885        irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
2886        irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
2887        return query->fn(cur, &irec, query->priv);
2888}
2889
2890/* Find all free space within a given range of blocks. */
2891int
2892xfs_alloc_query_range(
2893        struct xfs_btree_cur                    *cur,
2894        struct xfs_alloc_rec_incore             *low_rec,
2895        struct xfs_alloc_rec_incore             *high_rec,
2896        xfs_alloc_query_range_fn                fn,
2897        void                                    *priv)
2898{
2899        union xfs_btree_irec                    low_brec;
2900        union xfs_btree_irec                    high_brec;
2901        struct xfs_alloc_query_range_info       query;
2902
2903        ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
2904        low_brec.a = *low_rec;
2905        high_brec.a = *high_rec;
2906        query.priv = priv;
2907        query.fn = fn;
2908        return xfs_btree_query_range(cur, &low_brec, &high_brec,
2909                        xfs_alloc_query_range_helper, &query);
2910}
2911
2912/* Find all free space records. */
2913int
2914xfs_alloc_query_all(
2915        struct xfs_btree_cur                    *cur,
2916        xfs_alloc_query_range_fn                fn,
2917        void                                    *priv)
2918{
2919        struct xfs_alloc_query_range_info       query;
2920
2921        ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
2922        query.priv = priv;
2923        query.fn = fn;
2924        return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
2925}
2926