linux/fs/xfs/libxfs/xfs_alloc.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_format.h"
  21#include "xfs_log_format.h"
  22#include "xfs_shared.h"
  23#include "xfs_trans_resv.h"
  24#include "xfs_bit.h"
  25#include "xfs_sb.h"
  26#include "xfs_mount.h"
  27#include "xfs_defer.h"
  28#include "xfs_inode.h"
  29#include "xfs_btree.h"
  30#include "xfs_rmap.h"
  31#include "xfs_alloc_btree.h"
  32#include "xfs_alloc.h"
  33#include "xfs_extent_busy.h"
  34#include "xfs_error.h"
  35#include "xfs_cksum.h"
  36#include "xfs_trace.h"
  37#include "xfs_trans.h"
  38#include "xfs_buf_item.h"
  39#include "xfs_log.h"
  40#include "xfs_ag_resv.h"
  41
  42struct workqueue_struct *xfs_alloc_wq;
  43
  44#define XFS_ABSDIFF(a,b)        (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
  45
  46#define XFSA_FIXUP_BNO_OK       1
  47#define XFSA_FIXUP_CNT_OK       2
  48
  49STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
  50STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
  51STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
  52STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
  53                xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
  54
  55unsigned int
  56xfs_refc_block(
  57        struct xfs_mount        *mp)
  58{
  59        if (xfs_sb_version_hasrmapbt(&mp->m_sb))
  60                return XFS_RMAP_BLOCK(mp) + 1;
  61        if (xfs_sb_version_hasfinobt(&mp->m_sb))
  62                return XFS_FIBT_BLOCK(mp) + 1;
  63        return XFS_IBT_BLOCK(mp) + 1;
  64}
  65
  66xfs_extlen_t
  67xfs_prealloc_blocks(
  68        struct xfs_mount        *mp)
  69{
  70        if (xfs_sb_version_hasreflink(&mp->m_sb))
  71                return xfs_refc_block(mp) + 1;
  72        if (xfs_sb_version_hasrmapbt(&mp->m_sb))
  73                return XFS_RMAP_BLOCK(mp) + 1;
  74        if (xfs_sb_version_hasfinobt(&mp->m_sb))
  75                return XFS_FIBT_BLOCK(mp) + 1;
  76        return XFS_IBT_BLOCK(mp) + 1;
  77}
  78
  79/*
  80 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
  81 * AGF buffer (PV 947395), we place constraints on the relationship among
  82 * actual allocations for data blocks, freelist blocks, and potential file data
  83 * bmap btree blocks. However, these restrictions may result in no actual space
  84 * allocated for a delayed extent, for example, a data block in a certain AG is
  85 * allocated but there is no additional block for the additional bmap btree
  86 * block due to a split of the bmap btree of the file. The result of this may
  87 * lead to an infinite loop when the file gets flushed to disk and all delayed
  88 * extents need to be actually allocated. To get around this, we explicitly set
  89 * aside a few blocks which will not be reserved in delayed allocation.
  90 *
  91 * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a
  92 * potential split of the file's bmap btree.
  93 */
  94unsigned int
  95xfs_alloc_set_aside(
  96        struct xfs_mount        *mp)
  97{
  98        return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4);
  99}
 100
 101/*
 102 * When deciding how much space to allocate out of an AG, we limit the
 103 * allocation maximum size to the size the AG. However, we cannot use all the
 104 * blocks in the AG - some are permanently used by metadata. These
 105 * blocks are generally:
 106 *      - the AG superblock, AGF, AGI and AGFL
 107 *      - the AGF (bno and cnt) and AGI btree root blocks, and optionally
 108 *        the AGI free inode and rmap btree root blocks.
 109 *      - blocks on the AGFL according to xfs_alloc_set_aside() limits
 110 *      - the rmapbt root block
 111 *
 112 * The AG headers are sector sized, so the amount of space they take up is
 113 * dependent on filesystem geometry. The others are all single blocks.
 114 */
 115unsigned int
 116xfs_alloc_ag_max_usable(
 117        struct xfs_mount        *mp)
 118{
 119        unsigned int            blocks;
 120
 121        blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
 122        blocks += XFS_ALLOC_AGFL_RESERVE;
 123        blocks += 3;                    /* AGF, AGI btree root blocks */
 124        if (xfs_sb_version_hasfinobt(&mp->m_sb))
 125                blocks++;               /* finobt root block */
 126        if (xfs_sb_version_hasrmapbt(&mp->m_sb))
 127                blocks++;               /* rmap root block */
 128        if (xfs_sb_version_hasreflink(&mp->m_sb))
 129                blocks++;               /* refcount root block */
 130
 131        return mp->m_sb.sb_agblocks - blocks;
 132}
 133
 134/*
 135 * Lookup the record equal to [bno, len] in the btree given by cur.
 136 */
 137STATIC int                              /* error */
 138xfs_alloc_lookup_eq(
 139        struct xfs_btree_cur    *cur,   /* btree cursor */
 140        xfs_agblock_t           bno,    /* starting block of extent */
 141        xfs_extlen_t            len,    /* length of extent */
 142        int                     *stat)  /* success/failure */
 143{
 144        cur->bc_rec.a.ar_startblock = bno;
 145        cur->bc_rec.a.ar_blockcount = len;
 146        return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
 147}
 148
 149/*
 150 * Lookup the first record greater than or equal to [bno, len]
 151 * in the btree given by cur.
 152 */
 153int                             /* error */
 154xfs_alloc_lookup_ge(
 155        struct xfs_btree_cur    *cur,   /* btree cursor */
 156        xfs_agblock_t           bno,    /* starting block of extent */
 157        xfs_extlen_t            len,    /* length of extent */
 158        int                     *stat)  /* success/failure */
 159{
 160        cur->bc_rec.a.ar_startblock = bno;
 161        cur->bc_rec.a.ar_blockcount = len;
 162        return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
 163}
 164
 165/*
 166 * Lookup the first record less than or equal to [bno, len]
 167 * in the btree given by cur.
 168 */
 169static int                              /* error */
 170xfs_alloc_lookup_le(
 171        struct xfs_btree_cur    *cur,   /* btree cursor */
 172        xfs_agblock_t           bno,    /* starting block of extent */
 173        xfs_extlen_t            len,    /* length of extent */
 174        int                     *stat)  /* success/failure */
 175{
 176        cur->bc_rec.a.ar_startblock = bno;
 177        cur->bc_rec.a.ar_blockcount = len;
 178        return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
 179}
 180
 181/*
 182 * Update the record referred to by cur to the value given
 183 * by [bno, len].
 184 * This either works (return 0) or gets an EFSCORRUPTED error.
 185 */
 186STATIC int                              /* error */
 187xfs_alloc_update(
 188        struct xfs_btree_cur    *cur,   /* btree cursor */
 189        xfs_agblock_t           bno,    /* starting block of extent */
 190        xfs_extlen_t            len)    /* length of extent */
 191{
 192        union xfs_btree_rec     rec;
 193
 194        rec.alloc.ar_startblock = cpu_to_be32(bno);
 195        rec.alloc.ar_blockcount = cpu_to_be32(len);
 196        return xfs_btree_update(cur, &rec);
 197}
 198
 199/*
 200 * Get the data from the pointed-to record.
 201 */
 202int                                     /* error */
 203xfs_alloc_get_rec(
 204        struct xfs_btree_cur    *cur,   /* btree cursor */
 205        xfs_agblock_t           *bno,   /* output: starting block of extent */
 206        xfs_extlen_t            *len,   /* output: length of extent */
 207        int                     *stat)  /* output: success/failure */
 208{
 209        union xfs_btree_rec     *rec;
 210        int                     error;
 211
 212        error = xfs_btree_get_rec(cur, &rec, stat);
 213        if (!error && *stat == 1) {
 214                *bno = be32_to_cpu(rec->alloc.ar_startblock);
 215                *len = be32_to_cpu(rec->alloc.ar_blockcount);
 216        }
 217        return error;
 218}
 219
 220/*
 221 * Compute aligned version of the found extent.
 222 * Takes alignment and min length into account.
 223 */
 224STATIC bool
 225xfs_alloc_compute_aligned(
 226        xfs_alloc_arg_t *args,          /* allocation argument structure */
 227        xfs_agblock_t   foundbno,       /* starting block in found extent */
 228        xfs_extlen_t    foundlen,       /* length in found extent */
 229        xfs_agblock_t   *resbno,        /* result block number */
 230        xfs_extlen_t    *reslen,        /* result length */
 231        unsigned        *busy_gen)
 232{
 233        xfs_agblock_t   bno = foundbno;
 234        xfs_extlen_t    len = foundlen;
 235        xfs_extlen_t    diff;
 236        bool            busy;
 237
 238        /* Trim busy sections out of found extent */
 239        busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
 240
 241        /*
 242         * If we have a largish extent that happens to start before min_agbno,
 243         * see if we can shift it into range...
 244         */
 245        if (bno < args->min_agbno && bno + len > args->min_agbno) {
 246                diff = args->min_agbno - bno;
 247                if (len > diff) {
 248                        bno += diff;
 249                        len -= diff;
 250                }
 251        }
 252
 253        if (args->alignment > 1 && len >= args->minlen) {
 254                xfs_agblock_t   aligned_bno = roundup(bno, args->alignment);
 255
 256                diff = aligned_bno - bno;
 257
 258                *resbno = aligned_bno;
 259                *reslen = diff >= len ? 0 : len - diff;
 260        } else {
 261                *resbno = bno;
 262                *reslen = len;
 263        }
 264
 265        return busy;
 266}
 267
 268/*
 269 * Compute best start block and diff for "near" allocations.
 270 * freelen >= wantlen already checked by caller.
 271 */
 272STATIC xfs_extlen_t                     /* difference value (absolute) */
 273xfs_alloc_compute_diff(
 274        xfs_agblock_t   wantbno,        /* target starting block */
 275        xfs_extlen_t    wantlen,        /* target length */
 276        xfs_extlen_t    alignment,      /* target alignment */
 277        int             datatype,       /* are we allocating data? */
 278        xfs_agblock_t   freebno,        /* freespace's starting block */
 279        xfs_extlen_t    freelen,        /* freespace's length */
 280        xfs_agblock_t   *newbnop)       /* result: best start block from free */
 281{
 282        xfs_agblock_t   freeend;        /* end of freespace extent */
 283        xfs_agblock_t   newbno1;        /* return block number */
 284        xfs_agblock_t   newbno2;        /* other new block number */
 285        xfs_extlen_t    newlen1=0;      /* length with newbno1 */
 286        xfs_extlen_t    newlen2=0;      /* length with newbno2 */
 287        xfs_agblock_t   wantend;        /* end of target extent */
 288        bool            userdata = xfs_alloc_is_userdata(datatype);
 289
 290        ASSERT(freelen >= wantlen);
 291        freeend = freebno + freelen;
 292        wantend = wantbno + wantlen;
 293        /*
 294         * We want to allocate from the start of a free extent if it is past
 295         * the desired block or if we are allocating user data and the free
 296         * extent is before desired block. The second case is there to allow
 297         * for contiguous allocation from the remaining free space if the file
 298         * grows in the short term.
 299         */
 300        if (freebno >= wantbno || (userdata && freeend < wantend)) {
 301                if ((newbno1 = roundup(freebno, alignment)) >= freeend)
 302                        newbno1 = NULLAGBLOCK;
 303        } else if (freeend >= wantend && alignment > 1) {
 304                newbno1 = roundup(wantbno, alignment);
 305                newbno2 = newbno1 - alignment;
 306                if (newbno1 >= freeend)
 307                        newbno1 = NULLAGBLOCK;
 308                else
 309                        newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
 310                if (newbno2 < freebno)
 311                        newbno2 = NULLAGBLOCK;
 312                else
 313                        newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
 314                if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
 315                        if (newlen1 < newlen2 ||
 316                            (newlen1 == newlen2 &&
 317                             XFS_ABSDIFF(newbno1, wantbno) >
 318                             XFS_ABSDIFF(newbno2, wantbno)))
 319                                newbno1 = newbno2;
 320                } else if (newbno2 != NULLAGBLOCK)
 321                        newbno1 = newbno2;
 322        } else if (freeend >= wantend) {
 323                newbno1 = wantbno;
 324        } else if (alignment > 1) {
 325                newbno1 = roundup(freeend - wantlen, alignment);
 326                if (newbno1 > freeend - wantlen &&
 327                    newbno1 - alignment >= freebno)
 328                        newbno1 -= alignment;
 329                else if (newbno1 >= freeend)
 330                        newbno1 = NULLAGBLOCK;
 331        } else
 332                newbno1 = freeend - wantlen;
 333        *newbnop = newbno1;
 334        return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
 335}
 336
 337/*
 338 * Fix up the length, based on mod and prod.
 339 * len should be k * prod + mod for some k.
 340 * If len is too small it is returned unchanged.
 341 * If len hits maxlen it is left alone.
 342 */
 343STATIC void
 344xfs_alloc_fix_len(
 345        xfs_alloc_arg_t *args)          /* allocation argument structure */
 346{
 347        xfs_extlen_t    k;
 348        xfs_extlen_t    rlen;
 349
 350        ASSERT(args->mod < args->prod);
 351        rlen = args->len;
 352        ASSERT(rlen >= args->minlen);
 353        ASSERT(rlen <= args->maxlen);
 354        if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
 355            (args->mod == 0 && rlen < args->prod))
 356                return;
 357        k = rlen % args->prod;
 358        if (k == args->mod)
 359                return;
 360        if (k > args->mod)
 361                rlen = rlen - (k - args->mod);
 362        else
 363                rlen = rlen - args->prod + (args->mod - k);
 364        /* casts to (int) catch length underflows */
 365        if ((int)rlen < (int)args->minlen)
 366                return;
 367        ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
 368        ASSERT(rlen % args->prod == args->mod);
 369        ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
 370                rlen + args->minleft);
 371        args->len = rlen;
 372}
 373
 374/*
 375 * Update the two btrees, logically removing from freespace the extent
 376 * starting at rbno, rlen blocks.  The extent is contained within the
 377 * actual (current) free extent fbno for flen blocks.
 378 * Flags are passed in indicating whether the cursors are set to the
 379 * relevant records.
 380 */
 381STATIC int                              /* error code */
 382xfs_alloc_fixup_trees(
 383        xfs_btree_cur_t *cnt_cur,       /* cursor for by-size btree */
 384        xfs_btree_cur_t *bno_cur,       /* cursor for by-block btree */
 385        xfs_agblock_t   fbno,           /* starting block of free extent */
 386        xfs_extlen_t    flen,           /* length of free extent */
 387        xfs_agblock_t   rbno,           /* starting block of returned extent */
 388        xfs_extlen_t    rlen,           /* length of returned extent */
 389        int             flags)          /* flags, XFSA_FIXUP_... */
 390{
 391        int             error;          /* error code */
 392        int             i;              /* operation results */
 393        xfs_agblock_t   nfbno1;         /* first new free startblock */
 394        xfs_agblock_t   nfbno2;         /* second new free startblock */
 395        xfs_extlen_t    nflen1=0;       /* first new free length */
 396        xfs_extlen_t    nflen2=0;       /* second new free length */
 397        struct xfs_mount *mp;
 398
 399        mp = cnt_cur->bc_mp;
 400
 401        /*
 402         * Look up the record in the by-size tree if necessary.
 403         */
 404        if (flags & XFSA_FIXUP_CNT_OK) {
 405#ifdef DEBUG
 406                if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
 407                        return error;
 408                XFS_WANT_CORRUPTED_RETURN(mp,
 409                        i == 1 && nfbno1 == fbno && nflen1 == flen);
 410#endif
 411        } else {
 412                if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
 413                        return error;
 414                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 415        }
 416        /*
 417         * Look up the record in the by-block tree if necessary.
 418         */
 419        if (flags & XFSA_FIXUP_BNO_OK) {
 420#ifdef DEBUG
 421                if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
 422                        return error;
 423                XFS_WANT_CORRUPTED_RETURN(mp,
 424                        i == 1 && nfbno1 == fbno && nflen1 == flen);
 425#endif
 426        } else {
 427                if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
 428                        return error;
 429                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 430        }
 431
 432#ifdef DEBUG
 433        if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
 434                struct xfs_btree_block  *bnoblock;
 435                struct xfs_btree_block  *cntblock;
 436
 437                bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
 438                cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
 439
 440                XFS_WANT_CORRUPTED_RETURN(mp,
 441                        bnoblock->bb_numrecs == cntblock->bb_numrecs);
 442        }
 443#endif
 444
 445        /*
 446         * Deal with all four cases: the allocated record is contained
 447         * within the freespace record, so we can have new freespace
 448         * at either (or both) end, or no freespace remaining.
 449         */
 450        if (rbno == fbno && rlen == flen)
 451                nfbno1 = nfbno2 = NULLAGBLOCK;
 452        else if (rbno == fbno) {
 453                nfbno1 = rbno + rlen;
 454                nflen1 = flen - rlen;
 455                nfbno2 = NULLAGBLOCK;
 456        } else if (rbno + rlen == fbno + flen) {
 457                nfbno1 = fbno;
 458                nflen1 = flen - rlen;
 459                nfbno2 = NULLAGBLOCK;
 460        } else {
 461                nfbno1 = fbno;
 462                nflen1 = rbno - fbno;
 463                nfbno2 = rbno + rlen;
 464                nflen2 = (fbno + flen) - nfbno2;
 465        }
 466        /*
 467         * Delete the entry from the by-size btree.
 468         */
 469        if ((error = xfs_btree_delete(cnt_cur, &i)))
 470                return error;
 471        XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 472        /*
 473         * Add new by-size btree entry(s).
 474         */
 475        if (nfbno1 != NULLAGBLOCK) {
 476                if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
 477                        return error;
 478                XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 479                if ((error = xfs_btree_insert(cnt_cur, &i)))
 480                        return error;
 481                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 482        }
 483        if (nfbno2 != NULLAGBLOCK) {
 484                if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
 485                        return error;
 486                XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 487                if ((error = xfs_btree_insert(cnt_cur, &i)))
 488                        return error;
 489                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 490        }
 491        /*
 492         * Fix up the by-block btree entry(s).
 493         */
 494        if (nfbno1 == NULLAGBLOCK) {
 495                /*
 496                 * No remaining freespace, just delete the by-block tree entry.
 497                 */
 498                if ((error = xfs_btree_delete(bno_cur, &i)))
 499                        return error;
 500                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 501        } else {
 502                /*
 503                 * Update the by-block entry to start later|be shorter.
 504                 */
 505                if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
 506                        return error;
 507        }
 508        if (nfbno2 != NULLAGBLOCK) {
 509                /*
 510                 * 2 resulting free entries, need to add one.
 511                 */
 512                if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
 513                        return error;
 514                XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 515                if ((error = xfs_btree_insert(bno_cur, &i)))
 516                        return error;
 517                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 518        }
 519        return 0;
 520}
 521
 522static bool
 523xfs_agfl_verify(
 524        struct xfs_buf  *bp)
 525{
 526        struct xfs_mount *mp = bp->b_target->bt_mount;
 527        struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
 528        int             i;
 529
 530        if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
 531                return false;
 532        if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
 533                return false;
 534        /*
 535         * during growfs operations, the perag is not fully initialised,
 536         * so we can't use it for any useful checking. growfs ensures we can't
 537         * use it by using uncached buffers that don't have the perag attached
 538         * so we can detect and avoid this problem.
 539         */
 540        if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
 541                return false;
 542
 543        for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
 544                if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
 545                    be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
 546                        return false;
 547        }
 548
 549        return xfs_log_check_lsn(mp,
 550                                 be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn));
 551}
 552
 553static void
 554xfs_agfl_read_verify(
 555        struct xfs_buf  *bp)
 556{
 557        struct xfs_mount *mp = bp->b_target->bt_mount;
 558
 559        /*
 560         * There is no verification of non-crc AGFLs because mkfs does not
 561         * initialise the AGFL to zero or NULL. Hence the only valid part of the
 562         * AGFL is what the AGF says is active. We can't get to the AGF, so we
 563         * can't verify just those entries are valid.
 564         */
 565        if (!xfs_sb_version_hascrc(&mp->m_sb))
 566                return;
 567
 568        if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
 569                xfs_buf_ioerror(bp, -EFSBADCRC);
 570        else if (!xfs_agfl_verify(bp))
 571                xfs_buf_ioerror(bp, -EFSCORRUPTED);
 572
 573        if (bp->b_error)
 574                xfs_verifier_error(bp);
 575}
 576
 577static void
 578xfs_agfl_write_verify(
 579        struct xfs_buf  *bp)
 580{
 581        struct xfs_mount *mp = bp->b_target->bt_mount;
 582        struct xfs_buf_log_item *bip = bp->b_fspriv;
 583
 584        /* no verification of non-crc AGFLs */
 585        if (!xfs_sb_version_hascrc(&mp->m_sb))
 586                return;
 587
 588        if (!xfs_agfl_verify(bp)) {
 589                xfs_buf_ioerror(bp, -EFSCORRUPTED);
 590                xfs_verifier_error(bp);
 591                return;
 592        }
 593
 594        if (bip)
 595                XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
 596
 597        xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
 598}
 599
 600const struct xfs_buf_ops xfs_agfl_buf_ops = {
 601        .name = "xfs_agfl",
 602        .verify_read = xfs_agfl_read_verify,
 603        .verify_write = xfs_agfl_write_verify,
 604};
 605
 606/*
 607 * Read in the allocation group free block array.
 608 */
 609int                                     /* error */
 610xfs_alloc_read_agfl(
 611        xfs_mount_t     *mp,            /* mount point structure */
 612        xfs_trans_t     *tp,            /* transaction pointer */
 613        xfs_agnumber_t  agno,           /* allocation group number */
 614        xfs_buf_t       **bpp)          /* buffer for the ag free block array */
 615{
 616        xfs_buf_t       *bp;            /* return value */
 617        int             error;
 618
 619        ASSERT(agno != NULLAGNUMBER);
 620        error = xfs_trans_read_buf(
 621                        mp, tp, mp->m_ddev_targp,
 622                        XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
 623                        XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
 624        if (error)
 625                return error;
 626        xfs_buf_set_ref(bp, XFS_AGFL_REF);
 627        *bpp = bp;
 628        return 0;
 629}
 630
 631STATIC int
 632xfs_alloc_update_counters(
 633        struct xfs_trans        *tp,
 634        struct xfs_perag        *pag,
 635        struct xfs_buf          *agbp,
 636        long                    len)
 637{
 638        struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
 639
 640        pag->pagf_freeblks += len;
 641        be32_add_cpu(&agf->agf_freeblks, len);
 642
 643        xfs_trans_agblocks_delta(tp, len);
 644        if (unlikely(be32_to_cpu(agf->agf_freeblks) >
 645                     be32_to_cpu(agf->agf_length)))
 646                return -EFSCORRUPTED;
 647
 648        xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
 649        return 0;
 650}
 651
 652/*
 653 * Allocation group level functions.
 654 */
 655
 656/*
 657 * Allocate a variable extent in the allocation group agno.
 658 * Type and bno are used to determine where in the allocation group the
 659 * extent will start.
 660 * Extent's length (returned in *len) will be between minlen and maxlen,
 661 * and of the form k * prod + mod unless there's nothing that large.
 662 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 663 */
 664STATIC int                      /* error */
 665xfs_alloc_ag_vextent(
 666        xfs_alloc_arg_t *args)  /* argument structure for allocation */
 667{
 668        int             error=0;
 669
 670        ASSERT(args->minlen > 0);
 671        ASSERT(args->maxlen > 0);
 672        ASSERT(args->minlen <= args->maxlen);
 673        ASSERT(args->mod < args->prod);
 674        ASSERT(args->alignment > 0);
 675
 676        /*
 677         * Branch to correct routine based on the type.
 678         */
 679        args->wasfromfl = 0;
 680        switch (args->type) {
 681        case XFS_ALLOCTYPE_THIS_AG:
 682                error = xfs_alloc_ag_vextent_size(args);
 683                break;
 684        case XFS_ALLOCTYPE_NEAR_BNO:
 685                error = xfs_alloc_ag_vextent_near(args);
 686                break;
 687        case XFS_ALLOCTYPE_THIS_BNO:
 688                error = xfs_alloc_ag_vextent_exact(args);
 689                break;
 690        default:
 691                ASSERT(0);
 692                /* NOTREACHED */
 693        }
 694
 695        if (error || args->agbno == NULLAGBLOCK)
 696                return error;
 697
 698        ASSERT(args->len >= args->minlen);
 699        ASSERT(args->len <= args->maxlen);
 700        ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
 701        ASSERT(args->agbno % args->alignment == 0);
 702
 703        /* if not file data, insert new block into the reverse map btree */
 704        if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) {
 705                error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
 706                                       args->agbno, args->len, &args->oinfo);
 707                if (error)
 708                        return error;
 709        }
 710
 711        if (!args->wasfromfl) {
 712                error = xfs_alloc_update_counters(args->tp, args->pag,
 713                                                  args->agbp,
 714                                                  -((long)(args->len)));
 715                if (error)
 716                        return error;
 717
 718                ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
 719                                              args->agbno, args->len));
 720        }
 721
 722        xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
 723
 724        XFS_STATS_INC(args->mp, xs_allocx);
 725        XFS_STATS_ADD(args->mp, xs_allocb, args->len);
 726        return error;
 727}
 728
 729/*
 730 * Allocate a variable extent at exactly agno/bno.
 731 * Extent's length (returned in *len) will be between minlen and maxlen,
 732 * and of the form k * prod + mod unless there's nothing that large.
 733 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
 734 */
 735STATIC int                      /* error */
 736xfs_alloc_ag_vextent_exact(
 737        xfs_alloc_arg_t *args)  /* allocation argument structure */
 738{
 739        xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
 740        xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
 741        int             error;
 742        xfs_agblock_t   fbno;   /* start block of found extent */
 743        xfs_extlen_t    flen;   /* length of found extent */
 744        xfs_agblock_t   tbno;   /* start block of busy extent */
 745        xfs_extlen_t    tlen;   /* length of busy extent */
 746        xfs_agblock_t   tend;   /* end block of busy extent */
 747        int             i;      /* success/failure of operation */
 748        unsigned        busy_gen;
 749
 750        ASSERT(args->alignment == 1);
 751
 752        /*
 753         * Allocate/initialize a cursor for the by-number freespace btree.
 754         */
 755        bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 756                                          args->agno, XFS_BTNUM_BNO);
 757
 758        /*
 759         * Lookup bno and minlen in the btree (minlen is irrelevant, really).
 760         * Look for the closest free block <= bno, it must contain bno
 761         * if any free block does.
 762         */
 763        error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
 764        if (error)
 765                goto error0;
 766        if (!i)
 767                goto not_found;
 768
 769        /*
 770         * Grab the freespace record.
 771         */
 772        error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
 773        if (error)
 774                goto error0;
 775        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 776        ASSERT(fbno <= args->agbno);
 777
 778        /*
 779         * Check for overlapping busy extents.
 780         */
 781        tbno = fbno;
 782        tlen = flen;
 783        xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
 784
 785        /*
 786         * Give up if the start of the extent is busy, or the freespace isn't
 787         * long enough for the minimum request.
 788         */
 789        if (tbno > args->agbno)
 790                goto not_found;
 791        if (tlen < args->minlen)
 792                goto not_found;
 793        tend = tbno + tlen;
 794        if (tend < args->agbno + args->minlen)
 795                goto not_found;
 796
 797        /*
 798         * End of extent will be smaller of the freespace end and the
 799         * maximal requested end.
 800         *
 801         * Fix the length according to mod and prod if given.
 802         */
 803        args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
 804                                                - args->agbno;
 805        xfs_alloc_fix_len(args);
 806        ASSERT(args->agbno + args->len <= tend);
 807
 808        /*
 809         * We are allocating agbno for args->len
 810         * Allocate/initialize a cursor for the by-size btree.
 811         */
 812        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 813                args->agno, XFS_BTNUM_CNT);
 814        ASSERT(args->agbno + args->len <=
 815                be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
 816        error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
 817                                      args->len, XFSA_FIXUP_BNO_OK);
 818        if (error) {
 819                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
 820                goto error0;
 821        }
 822
 823        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 824        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 825
 826        args->wasfromfl = 0;
 827        trace_xfs_alloc_exact_done(args);
 828        return 0;
 829
 830not_found:
 831        /* Didn't find it, return null. */
 832        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 833        args->agbno = NULLAGBLOCK;
 834        trace_xfs_alloc_exact_notfound(args);
 835        return 0;
 836
 837error0:
 838        xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
 839        trace_xfs_alloc_exact_error(args);
 840        return error;
 841}
 842
 843/*
 844 * Search the btree in a given direction via the search cursor and compare
 845 * the records found against the good extent we've already found.
 846 */
 847STATIC int
 848xfs_alloc_find_best_extent(
 849        struct xfs_alloc_arg    *args,  /* allocation argument structure */
 850        struct xfs_btree_cur    **gcur, /* good cursor */
 851        struct xfs_btree_cur    **scur, /* searching cursor */
 852        xfs_agblock_t           gdiff,  /* difference for search comparison */
 853        xfs_agblock_t           *sbno,  /* extent found by search */
 854        xfs_extlen_t            *slen,  /* extent length */
 855        xfs_agblock_t           *sbnoa, /* aligned extent found by search */
 856        xfs_extlen_t            *slena, /* aligned extent length */
 857        int                     dir)    /* 0 = search right, 1 = search left */
 858{
 859        xfs_agblock_t           new;
 860        xfs_agblock_t           sdiff;
 861        int                     error;
 862        int                     i;
 863        unsigned                busy_gen;
 864
 865        /* The good extent is perfect, no need to  search. */
 866        if (!gdiff)
 867                goto out_use_good;
 868
 869        /*
 870         * Look until we find a better one, run out of space or run off the end.
 871         */
 872        do {
 873                error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
 874                if (error)
 875                        goto error0;
 876                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 877                xfs_alloc_compute_aligned(args, *sbno, *slen,
 878                                sbnoa, slena, &busy_gen);
 879
 880                /*
 881                 * The good extent is closer than this one.
 882                 */
 883                if (!dir) {
 884                        if (*sbnoa > args->max_agbno)
 885                                goto out_use_good;
 886                        if (*sbnoa >= args->agbno + gdiff)
 887                                goto out_use_good;
 888                } else {
 889                        if (*sbnoa < args->min_agbno)
 890                                goto out_use_good;
 891                        if (*sbnoa <= args->agbno - gdiff)
 892                                goto out_use_good;
 893                }
 894
 895                /*
 896                 * Same distance, compare length and pick the best.
 897                 */
 898                if (*slena >= args->minlen) {
 899                        args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
 900                        xfs_alloc_fix_len(args);
 901
 902                        sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
 903                                                       args->alignment,
 904                                                       args->datatype, *sbnoa,
 905                                                       *slena, &new);
 906
 907                        /*
 908                         * Choose closer size and invalidate other cursor.
 909                         */
 910                        if (sdiff < gdiff)
 911                                goto out_use_search;
 912                        goto out_use_good;
 913                }
 914
 915                if (!dir)
 916                        error = xfs_btree_increment(*scur, 0, &i);
 917                else
 918                        error = xfs_btree_decrement(*scur, 0, &i);
 919                if (error)
 920                        goto error0;
 921        } while (i);
 922
 923out_use_good:
 924        xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
 925        *scur = NULL;
 926        return 0;
 927
 928out_use_search:
 929        xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
 930        *gcur = NULL;
 931        return 0;
 932
 933error0:
 934        /* caller invalidates cursors */
 935        return error;
 936}
 937
 938/*
 939 * Allocate a variable extent near bno in the allocation group agno.
 940 * Extent's length (returned in len) will be between minlen and maxlen,
 941 * and of the form k * prod + mod unless there's nothing that large.
 942 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 943 */
 944STATIC int                              /* error */
 945xfs_alloc_ag_vextent_near(
 946        xfs_alloc_arg_t *args)          /* allocation argument structure */
 947{
 948        xfs_btree_cur_t *bno_cur_gt;    /* cursor for bno btree, right side */
 949        xfs_btree_cur_t *bno_cur_lt;    /* cursor for bno btree, left side */
 950        xfs_btree_cur_t *cnt_cur;       /* cursor for count btree */
 951        xfs_agblock_t   gtbno;          /* start bno of right side entry */
 952        xfs_agblock_t   gtbnoa;         /* aligned ... */
 953        xfs_extlen_t    gtdiff;         /* difference to right side entry */
 954        xfs_extlen_t    gtlen;          /* length of right side entry */
 955        xfs_extlen_t    gtlena;         /* aligned ... */
 956        xfs_agblock_t   gtnew;          /* useful start bno of right side */
 957        int             error;          /* error code */
 958        int             i;              /* result code, temporary */
 959        int             j;              /* result code, temporary */
 960        xfs_agblock_t   ltbno;          /* start bno of left side entry */
 961        xfs_agblock_t   ltbnoa;         /* aligned ... */
 962        xfs_extlen_t    ltdiff;         /* difference to left side entry */
 963        xfs_extlen_t    ltlen;          /* length of left side entry */
 964        xfs_extlen_t    ltlena;         /* aligned ... */
 965        xfs_agblock_t   ltnew;          /* useful start bno of left side */
 966        xfs_extlen_t    rlen;           /* length of returned extent */
 967        bool            busy;
 968        unsigned        busy_gen;
 969#ifdef DEBUG
 970        /*
 971         * Randomly don't execute the first algorithm.
 972         */
 973        int             dofirst;        /* set to do first algorithm */
 974
 975        dofirst = prandom_u32() & 1;
 976#endif
 977
 978        /* handle unitialized agbno range so caller doesn't have to */
 979        if (!args->min_agbno && !args->max_agbno)
 980                args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
 981        ASSERT(args->min_agbno <= args->max_agbno);
 982
 983        /* clamp agbno to the range if it's outside */
 984        if (args->agbno < args->min_agbno)
 985                args->agbno = args->min_agbno;
 986        if (args->agbno > args->max_agbno)
 987                args->agbno = args->max_agbno;
 988
 989restart:
 990        bno_cur_lt = NULL;
 991        bno_cur_gt = NULL;
 992        ltlen = 0;
 993        gtlena = 0;
 994        ltlena = 0;
 995        busy = false;
 996
 997        /*
 998         * Get a cursor for the by-size btree.
 999         */
1000        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1001                args->agno, XFS_BTNUM_CNT);
1002
1003        /*
1004         * See if there are any free extents as big as maxlen.
1005         */
1006        if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
1007                goto error0;
1008        /*
1009         * If none, then pick up the last entry in the tree unless the
1010         * tree is empty.
1011         */
1012        if (!i) {
1013                if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
1014                                &ltlen, &i)))
1015                        goto error0;
1016                if (i == 0 || ltlen == 0) {
1017                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1018                        trace_xfs_alloc_near_noentry(args);
1019                        return 0;
1020                }
1021                ASSERT(i == 1);
1022        }
1023        args->wasfromfl = 0;
1024
1025        /*
1026         * First algorithm.
1027         * If the requested extent is large wrt the freespaces available
1028         * in this a.g., then the cursor will be pointing to a btree entry
1029         * near the right edge of the tree.  If it's in the last btree leaf
1030         * block, then we just examine all the entries in that block
1031         * that are big enough, and pick the best one.
1032         * This is written as a while loop so we can break out of it,
1033         * but we never loop back to the top.
1034         */
1035        while (xfs_btree_islastblock(cnt_cur, 0)) {
1036                xfs_extlen_t    bdiff;
1037                int             besti=0;
1038                xfs_extlen_t    blen=0;
1039                xfs_agblock_t   bnew=0;
1040
1041#ifdef DEBUG
1042                if (dofirst)
1043                        break;
1044#endif
1045                /*
1046                 * Start from the entry that lookup found, sequence through
1047                 * all larger free blocks.  If we're actually pointing at a
1048                 * record smaller than maxlen, go to the start of this block,
1049                 * and skip all those smaller than minlen.
1050                 */
1051                if (ltlen || args->alignment > 1) {
1052                        cnt_cur->bc_ptrs[0] = 1;
1053                        do {
1054                                if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
1055                                                &ltlen, &i)))
1056                                        goto error0;
1057                                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1058                                if (ltlen >= args->minlen)
1059                                        break;
1060                                if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
1061                                        goto error0;
1062                        } while (i);
1063                        ASSERT(ltlen >= args->minlen);
1064                        if (!i)
1065                                break;
1066                }
1067                i = cnt_cur->bc_ptrs[0];
1068                for (j = 1, blen = 0, bdiff = 0;
1069                     !error && j && (blen < args->maxlen || bdiff > 0);
1070                     error = xfs_btree_increment(cnt_cur, 0, &j)) {
1071                        /*
1072                         * For each entry, decide if it's better than
1073                         * the previous best entry.
1074                         */
1075                        if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1076                                goto error0;
1077                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1078                        busy = xfs_alloc_compute_aligned(args, ltbno, ltlen,
1079                                        &ltbnoa, &ltlena, &busy_gen);
1080                        if (ltlena < args->minlen)
1081                                continue;
1082                        if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
1083                                continue;
1084                        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1085                        xfs_alloc_fix_len(args);
1086                        ASSERT(args->len >= args->minlen);
1087                        if (args->len < blen)
1088                                continue;
1089                        ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1090                                args->alignment, args->datatype, ltbnoa,
1091                                ltlena, &ltnew);
1092                        if (ltnew != NULLAGBLOCK &&
1093                            (args->len > blen || ltdiff < bdiff)) {
1094                                bdiff = ltdiff;
1095                                bnew = ltnew;
1096                                blen = args->len;
1097                                besti = cnt_cur->bc_ptrs[0];
1098                        }
1099                }
1100                /*
1101                 * It didn't work.  We COULD be in a case where
1102                 * there's a good record somewhere, so try again.
1103                 */
1104                if (blen == 0)
1105                        break;
1106                /*
1107                 * Point at the best entry, and retrieve it again.
1108                 */
1109                cnt_cur->bc_ptrs[0] = besti;
1110                if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1111                        goto error0;
1112                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1113                ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1114                args->len = blen;
1115
1116                /*
1117                 * We are allocating starting at bnew for blen blocks.
1118                 */
1119                args->agbno = bnew;
1120                ASSERT(bnew >= ltbno);
1121                ASSERT(bnew + blen <= ltbno + ltlen);
1122                /*
1123                 * Set up a cursor for the by-bno tree.
1124                 */
1125                bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
1126                        args->agbp, args->agno, XFS_BTNUM_BNO);
1127                /*
1128                 * Fix up the btree entries.
1129                 */
1130                if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
1131                                ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
1132                        goto error0;
1133                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1134                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1135
1136                trace_xfs_alloc_near_first(args);
1137                return 0;
1138        }
1139        /*
1140         * Second algorithm.
1141         * Search in the by-bno tree to the left and to the right
1142         * simultaneously, until in each case we find a space big enough,
1143         * or run into the edge of the tree.  When we run into the edge,
1144         * we deallocate that cursor.
1145         * If both searches succeed, we compare the two spaces and pick
1146         * the better one.
1147         * With alignment, it's possible for both to fail; the upper
1148         * level algorithm that picks allocation groups for allocations
1149         * is not supposed to do this.
1150         */
1151        /*
1152         * Allocate and initialize the cursor for the leftward search.
1153         */
1154        bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1155                args->agno, XFS_BTNUM_BNO);
1156        /*
1157         * Lookup <= bno to find the leftward search's starting point.
1158         */
1159        if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
1160                goto error0;
1161        if (!i) {
1162                /*
1163                 * Didn't find anything; use this cursor for the rightward
1164                 * search.
1165                 */
1166                bno_cur_gt = bno_cur_lt;
1167                bno_cur_lt = NULL;
1168        }
1169        /*
1170         * Found something.  Duplicate the cursor for the rightward search.
1171         */
1172        else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
1173                goto error0;
1174        /*
1175         * Increment the cursor, so we will point at the entry just right
1176         * of the leftward entry if any, or to the leftmost entry.
1177         */
1178        if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1179                goto error0;
1180        if (!i) {
1181                /*
1182                 * It failed, there are no rightward entries.
1183                 */
1184                xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
1185                bno_cur_gt = NULL;
1186        }
1187        /*
1188         * Loop going left with the leftward cursor, right with the
1189         * rightward cursor, until either both directions give up or
1190         * we find an entry at least as big as minlen.
1191         */
1192        do {
1193                if (bno_cur_lt) {
1194                        if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
1195                                goto error0;
1196                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1197                        busy |= xfs_alloc_compute_aligned(args, ltbno, ltlen,
1198                                        &ltbnoa, &ltlena, &busy_gen);
1199                        if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
1200                                break;
1201                        if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
1202                                goto error0;
1203                        if (!i || ltbnoa < args->min_agbno) {
1204                                xfs_btree_del_cursor(bno_cur_lt,
1205                                                     XFS_BTREE_NOERROR);
1206                                bno_cur_lt = NULL;
1207                        }
1208                }
1209                if (bno_cur_gt) {
1210                        if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1211                                goto error0;
1212                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1213                        busy |= xfs_alloc_compute_aligned(args, gtbno, gtlen,
1214                                        &gtbnoa, &gtlena, &busy_gen);
1215                        if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
1216                                break;
1217                        if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1218                                goto error0;
1219                        if (!i || gtbnoa > args->max_agbno) {
1220                                xfs_btree_del_cursor(bno_cur_gt,
1221                                                     XFS_BTREE_NOERROR);
1222                                bno_cur_gt = NULL;
1223                        }
1224                }
1225        } while (bno_cur_lt || bno_cur_gt);
1226
1227        /*
1228         * Got both cursors still active, need to find better entry.
1229         */
1230        if (bno_cur_lt && bno_cur_gt) {
1231                if (ltlena >= args->minlen) {
1232                        /*
1233                         * Left side is good, look for a right side entry.
1234                         */
1235                        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1236                        xfs_alloc_fix_len(args);
1237                        ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1238                                args->alignment, args->datatype, ltbnoa,
1239                                ltlena, &ltnew);
1240
1241                        error = xfs_alloc_find_best_extent(args,
1242                                                &bno_cur_lt, &bno_cur_gt,
1243                                                ltdiff, &gtbno, &gtlen,
1244                                                &gtbnoa, &gtlena,
1245                                                0 /* search right */);
1246                } else {
1247                        ASSERT(gtlena >= args->minlen);
1248
1249                        /*
1250                         * Right side is good, look for a left side entry.
1251                         */
1252                        args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1253                        xfs_alloc_fix_len(args);
1254                        gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1255                                args->alignment, args->datatype, gtbnoa,
1256                                gtlena, &gtnew);
1257
1258                        error = xfs_alloc_find_best_extent(args,
1259                                                &bno_cur_gt, &bno_cur_lt,
1260                                                gtdiff, &ltbno, &ltlen,
1261                                                &ltbnoa, &ltlena,
1262                                                1 /* search left */);
1263                }
1264
1265                if (error)
1266                        goto error0;
1267        }
1268
1269        /*
1270         * If we couldn't get anything, give up.
1271         */
1272        if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1273                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1274
1275                if (busy) {
1276                        trace_xfs_alloc_near_busy(args);
1277                        xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
1278                        goto restart;
1279                }
1280                trace_xfs_alloc_size_neither(args);
1281                args->agbno = NULLAGBLOCK;
1282                return 0;
1283        }
1284
1285        /*
1286         * At this point we have selected a freespace entry, either to the
1287         * left or to the right.  If it's on the right, copy all the
1288         * useful variables to the "left" set so we only have one
1289         * copy of this code.
1290         */
1291        if (bno_cur_gt) {
1292                bno_cur_lt = bno_cur_gt;
1293                bno_cur_gt = NULL;
1294                ltbno = gtbno;
1295                ltbnoa = gtbnoa;
1296                ltlen = gtlen;
1297                ltlena = gtlena;
1298                j = 1;
1299        } else
1300                j = 0;
1301
1302        /*
1303         * Fix up the length and compute the useful address.
1304         */
1305        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1306        xfs_alloc_fix_len(args);
1307        rlen = args->len;
1308        (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
1309                                     args->datatype, ltbnoa, ltlena, &ltnew);
1310        ASSERT(ltnew >= ltbno);
1311        ASSERT(ltnew + rlen <= ltbnoa + ltlena);
1312        ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1313        ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
1314        args->agbno = ltnew;
1315
1316        if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1317                        ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1318                goto error0;
1319
1320        if (j)
1321                trace_xfs_alloc_near_greater(args);
1322        else
1323                trace_xfs_alloc_near_lesser(args);
1324
1325        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1326        xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1327        return 0;
1328
1329 error0:
1330        trace_xfs_alloc_near_error(args);
1331        if (cnt_cur != NULL)
1332                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1333        if (bno_cur_lt != NULL)
1334                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1335        if (bno_cur_gt != NULL)
1336                xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1337        return error;
1338}
1339
1340/*
1341 * Allocate a variable extent anywhere in the allocation group agno.
1342 * Extent's length (returned in len) will be between minlen and maxlen,
1343 * and of the form k * prod + mod unless there's nothing that large.
1344 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1345 */
1346STATIC int                              /* error */
1347xfs_alloc_ag_vextent_size(
1348        xfs_alloc_arg_t *args)          /* allocation argument structure */
1349{
1350        xfs_btree_cur_t *bno_cur;       /* cursor for bno btree */
1351        xfs_btree_cur_t *cnt_cur;       /* cursor for cnt btree */
1352        int             error;          /* error result */
1353        xfs_agblock_t   fbno;           /* start of found freespace */
1354        xfs_extlen_t    flen;           /* length of found freespace */
1355        int             i;              /* temp status variable */
1356        xfs_agblock_t   rbno;           /* returned block number */
1357        xfs_extlen_t    rlen;           /* length of returned extent */
1358        bool            busy;
1359        unsigned        busy_gen;
1360
1361restart:
1362        /*
1363         * Allocate and initialize a cursor for the by-size btree.
1364         */
1365        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1366                args->agno, XFS_BTNUM_CNT);
1367        bno_cur = NULL;
1368        busy = false;
1369
1370        /*
1371         * Look for an entry >= maxlen+alignment-1 blocks.
1372         */
1373        if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1374                        args->maxlen + args->alignment - 1, &i)))
1375                goto error0;
1376
1377        /*
1378         * If none then we have to settle for a smaller extent. In the case that
1379         * there are no large extents, this will return the last entry in the
1380         * tree unless the tree is empty. In the case that there are only busy
1381         * large extents, this will return the largest small extent unless there
1382         * are no smaller extents available.
1383         */
1384        if (!i) {
1385                error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1386                                                   &fbno, &flen, &i);
1387                if (error)
1388                        goto error0;
1389                if (i == 0 || flen == 0) {
1390                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1391                        trace_xfs_alloc_size_noentry(args);
1392                        return 0;
1393                }
1394                ASSERT(i == 1);
1395                busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1396                                &rlen, &busy_gen);
1397        } else {
1398                /*
1399                 * Search for a non-busy extent that is large enough.
1400                 */
1401                for (;;) {
1402                        error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1403                        if (error)
1404                                goto error0;
1405                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1406
1407                        busy = xfs_alloc_compute_aligned(args, fbno, flen,
1408                                        &rbno, &rlen, &busy_gen);
1409
1410                        if (rlen >= args->maxlen)
1411                                break;
1412
1413                        error = xfs_btree_increment(cnt_cur, 0, &i);
1414                        if (error)
1415                                goto error0;
1416                        if (i == 0) {
1417                                /*
1418                                 * Our only valid extents must have been busy.
1419                                 * Make it unbusy by forcing the log out and
1420                                 * retrying.
1421                                 */
1422                                xfs_btree_del_cursor(cnt_cur,
1423                                                     XFS_BTREE_NOERROR);
1424                                trace_xfs_alloc_size_busy(args);
1425                                xfs_extent_busy_flush(args->mp,
1426                                                        args->pag, busy_gen);
1427                                goto restart;
1428                        }
1429                }
1430        }
1431
1432        /*
1433         * In the first case above, we got the last entry in the
1434         * by-size btree.  Now we check to see if the space hits maxlen
1435         * once aligned; if not, we search left for something better.
1436         * This can't happen in the second case above.
1437         */
1438        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1439        XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1440                        (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1441        if (rlen < args->maxlen) {
1442                xfs_agblock_t   bestfbno;
1443                xfs_extlen_t    bestflen;
1444                xfs_agblock_t   bestrbno;
1445                xfs_extlen_t    bestrlen;
1446
1447                bestrlen = rlen;
1448                bestrbno = rbno;
1449                bestflen = flen;
1450                bestfbno = fbno;
1451                for (;;) {
1452                        if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1453                                goto error0;
1454                        if (i == 0)
1455                                break;
1456                        if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1457                                        &i)))
1458                                goto error0;
1459                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1460                        if (flen < bestrlen)
1461                                break;
1462                        busy = xfs_alloc_compute_aligned(args, fbno, flen,
1463                                        &rbno, &rlen, &busy_gen);
1464                        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1465                        XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1466                                (rlen <= flen && rbno + rlen <= fbno + flen),
1467                                error0);
1468                        if (rlen > bestrlen) {
1469                                bestrlen = rlen;
1470                                bestrbno = rbno;
1471                                bestflen = flen;
1472                                bestfbno = fbno;
1473                                if (rlen == args->maxlen)
1474                                        break;
1475                        }
1476                }
1477                if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1478                                &i)))
1479                        goto error0;
1480                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1481                rlen = bestrlen;
1482                rbno = bestrbno;
1483                flen = bestflen;
1484                fbno = bestfbno;
1485        }
1486        args->wasfromfl = 0;
1487        /*
1488         * Fix up the length.
1489         */
1490        args->len = rlen;
1491        if (rlen < args->minlen) {
1492                if (busy) {
1493                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1494                        trace_xfs_alloc_size_busy(args);
1495                        xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
1496                        goto restart;
1497                }
1498                goto out_nominleft;
1499        }
1500        xfs_alloc_fix_len(args);
1501
1502        rlen = args->len;
1503        XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
1504        /*
1505         * Allocate and initialize a cursor for the by-block tree.
1506         */
1507        bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1508                args->agno, XFS_BTNUM_BNO);
1509        if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1510                        rbno, rlen, XFSA_FIXUP_CNT_OK)))
1511                goto error0;
1512        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1513        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1514        cnt_cur = bno_cur = NULL;
1515        args->len = rlen;
1516        args->agbno = rbno;
1517        XFS_WANT_CORRUPTED_GOTO(args->mp,
1518                args->agbno + args->len <=
1519                        be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1520                error0);
1521        trace_xfs_alloc_size_done(args);
1522        return 0;
1523
1524error0:
1525        trace_xfs_alloc_size_error(args);
1526        if (cnt_cur)
1527                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1528        if (bno_cur)
1529                xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1530        return error;
1531
1532out_nominleft:
1533        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1534        trace_xfs_alloc_size_nominleft(args);
1535        args->agbno = NULLAGBLOCK;
1536        return 0;
1537}
1538
1539/*
1540 * Deal with the case where only small freespaces remain.
1541 * Either return the contents of the last freespace record,
1542 * or allocate space from the freelist if there is nothing in the tree.
1543 */
1544STATIC int                      /* error */
1545xfs_alloc_ag_vextent_small(
1546        xfs_alloc_arg_t *args,  /* allocation argument structure */
1547        xfs_btree_cur_t *ccur,  /* by-size cursor */
1548        xfs_agblock_t   *fbnop, /* result block number */
1549        xfs_extlen_t    *flenp, /* result length */
1550        int             *stat)  /* status: 0-freelist, 1-normal/none */
1551{
1552        struct xfs_owner_info   oinfo;
1553        struct xfs_perag        *pag;
1554        int             error;
1555        xfs_agblock_t   fbno;
1556        xfs_extlen_t    flen;
1557        int             i;
1558
1559        if ((error = xfs_btree_decrement(ccur, 0, &i)))
1560                goto error0;
1561        if (i) {
1562                if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
1563                        goto error0;
1564                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1565        }
1566        /*
1567         * Nothing in the btree, try the freelist.  Make sure
1568         * to respect minleft even when pulling from the
1569         * freelist.
1570         */
1571        else if (args->minlen == 1 && args->alignment == 1 &&
1572                 args->resv != XFS_AG_RESV_AGFL &&
1573                 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1574                  > args->minleft)) {
1575                error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1576                if (error)
1577                        goto error0;
1578                if (fbno != NULLAGBLOCK) {
1579                        xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
1580                              xfs_alloc_allow_busy_reuse(args->datatype));
1581
1582                        if (xfs_alloc_is_userdata(args->datatype)) {
1583                                xfs_buf_t       *bp;
1584
1585                                bp = xfs_btree_get_bufs(args->mp, args->tp,
1586                                        args->agno, fbno, 0);
1587                                if (!bp) {
1588                                        error = -EFSCORRUPTED;
1589                                        goto error0;
1590                                }
1591                                xfs_trans_binval(args->tp, bp);
1592                        }
1593                        args->len = 1;
1594                        args->agbno = fbno;
1595                        XFS_WANT_CORRUPTED_GOTO(args->mp,
1596                                args->agbno + args->len <=
1597                                be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1598                                error0);
1599                        args->wasfromfl = 1;
1600                        trace_xfs_alloc_small_freelist(args);
1601
1602                        /*
1603                         * If we're feeding an AGFL block to something that
1604                         * doesn't live in the free space, we need to clear
1605                         * out the OWN_AG rmap and add the block back to
1606                         * the AGFL per-AG reservation.
1607                         */
1608                        xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
1609                        error = xfs_rmap_free(args->tp, args->agbp, args->agno,
1610                                        fbno, 1, &oinfo);
1611                        if (error)
1612                                goto error0;
1613                        pag = xfs_perag_get(args->mp, args->agno);
1614                        xfs_ag_resv_free_extent(pag, XFS_AG_RESV_AGFL,
1615                                        args->tp, 1);
1616                        xfs_perag_put(pag);
1617
1618                        *stat = 0;
1619                        return 0;
1620                }
1621                /*
1622                 * Nothing in the freelist.
1623                 */
1624                else
1625                        flen = 0;
1626        }
1627        /*
1628         * Can't allocate from the freelist for some reason.
1629         */
1630        else {
1631                fbno = NULLAGBLOCK;
1632                flen = 0;
1633        }
1634        /*
1635         * Can't do the allocation, give up.
1636         */
1637        if (flen < args->minlen) {
1638                args->agbno = NULLAGBLOCK;
1639                trace_xfs_alloc_small_notenough(args);
1640                flen = 0;
1641        }
1642        *fbnop = fbno;
1643        *flenp = flen;
1644        *stat = 1;
1645        trace_xfs_alloc_small_done(args);
1646        return 0;
1647
1648error0:
1649        trace_xfs_alloc_small_error(args);
1650        return error;
1651}
1652
1653/*
1654 * Free the extent starting at agno/bno for length.
1655 */
1656STATIC int
1657xfs_free_ag_extent(
1658        xfs_trans_t             *tp,
1659        xfs_buf_t               *agbp,
1660        xfs_agnumber_t          agno,
1661        xfs_agblock_t           bno,
1662        xfs_extlen_t            len,
1663        struct xfs_owner_info   *oinfo,
1664        enum xfs_ag_resv_type   type)
1665{
1666        xfs_btree_cur_t *bno_cur;       /* cursor for by-block btree */
1667        xfs_btree_cur_t *cnt_cur;       /* cursor for by-size btree */
1668        int             error;          /* error return value */
1669        xfs_agblock_t   gtbno;          /* start of right neighbor block */
1670        xfs_extlen_t    gtlen;          /* length of right neighbor block */
1671        int             haveleft;       /* have a left neighbor block */
1672        int             haveright;      /* have a right neighbor block */
1673        int             i;              /* temp, result code */
1674        xfs_agblock_t   ltbno;          /* start of left neighbor block */
1675        xfs_extlen_t    ltlen;          /* length of left neighbor block */
1676        xfs_mount_t     *mp;            /* mount point struct for filesystem */
1677        xfs_agblock_t   nbno;           /* new starting block of freespace */
1678        xfs_extlen_t    nlen;           /* new length of freespace */
1679        xfs_perag_t     *pag;           /* per allocation group data */
1680
1681        bno_cur = cnt_cur = NULL;
1682        mp = tp->t_mountp;
1683
1684        if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) {
1685                error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
1686                if (error)
1687                        goto error0;
1688        }
1689
1690        /*
1691         * Allocate and initialize a cursor for the by-block btree.
1692         */
1693        bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1694        /*
1695         * Look for a neighboring block on the left (lower block numbers)
1696         * that is contiguous with this space.
1697         */
1698        if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1699                goto error0;
1700        if (haveleft) {
1701                /*
1702                 * There is a block to our left.
1703                 */
1704                if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1705                        goto error0;
1706                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1707                /*
1708                 * It's not contiguous, though.
1709                 */
1710                if (ltbno + ltlen < bno)
1711                        haveleft = 0;
1712                else {
1713                        /*
1714                         * If this failure happens the request to free this
1715                         * space was invalid, it's (partly) already free.
1716                         * Very bad.
1717                         */
1718                        XFS_WANT_CORRUPTED_GOTO(mp,
1719                                                ltbno + ltlen <= bno, error0);
1720                }
1721        }
1722        /*
1723         * Look for a neighboring block on the right (higher block numbers)
1724         * that is contiguous with this space.
1725         */
1726        if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1727                goto error0;
1728        if (haveright) {
1729                /*
1730                 * There is a block to our right.
1731                 */
1732                if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1733                        goto error0;
1734                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1735                /*
1736                 * It's not contiguous, though.
1737                 */
1738                if (bno + len < gtbno)
1739                        haveright = 0;
1740                else {
1741                        /*
1742                         * If this failure happens the request to free this
1743                         * space was invalid, it's (partly) already free.
1744                         * Very bad.
1745                         */
1746                        XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0);
1747                }
1748        }
1749        /*
1750         * Now allocate and initialize a cursor for the by-size tree.
1751         */
1752        cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1753        /*
1754         * Have both left and right contiguous neighbors.
1755         * Merge all three into a single free block.
1756         */
1757        if (haveleft && haveright) {
1758                /*
1759                 * Delete the old by-size entry on the left.
1760                 */
1761                if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1762                        goto error0;
1763                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1764                if ((error = xfs_btree_delete(cnt_cur, &i)))
1765                        goto error0;
1766                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1767                /*
1768                 * Delete the old by-size entry on the right.
1769                 */
1770                if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1771                        goto error0;
1772                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1773                if ((error = xfs_btree_delete(cnt_cur, &i)))
1774                        goto error0;
1775                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1776                /*
1777                 * Delete the old by-block entry for the right block.
1778                 */
1779                if ((error = xfs_btree_delete(bno_cur, &i)))
1780                        goto error0;
1781                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1782                /*
1783                 * Move the by-block cursor back to the left neighbor.
1784                 */
1785                if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1786                        goto error0;
1787                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1788#ifdef DEBUG
1789                /*
1790                 * Check that this is the right record: delete didn't
1791                 * mangle the cursor.
1792                 */
1793                {
1794                        xfs_agblock_t   xxbno;
1795                        xfs_extlen_t    xxlen;
1796
1797                        if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1798                                        &i)))
1799                                goto error0;
1800                        XFS_WANT_CORRUPTED_GOTO(mp,
1801                                i == 1 && xxbno == ltbno && xxlen == ltlen,
1802                                error0);
1803                }
1804#endif
1805                /*
1806                 * Update remaining by-block entry to the new, joined block.
1807                 */
1808                nbno = ltbno;
1809                nlen = len + ltlen + gtlen;
1810                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1811                        goto error0;
1812        }
1813        /*
1814         * Have only a left contiguous neighbor.
1815         * Merge it together with the new freespace.
1816         */
1817        else if (haveleft) {
1818                /*
1819                 * Delete the old by-size entry on the left.
1820                 */
1821                if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1822                        goto error0;
1823                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1824                if ((error = xfs_btree_delete(cnt_cur, &i)))
1825                        goto error0;
1826                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1827                /*
1828                 * Back up the by-block cursor to the left neighbor, and
1829                 * update its length.
1830                 */
1831                if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1832                        goto error0;
1833                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1834                nbno = ltbno;
1835                nlen = len + ltlen;
1836                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1837                        goto error0;
1838        }
1839        /*
1840         * Have only a right contiguous neighbor.
1841         * Merge it together with the new freespace.
1842         */
1843        else if (haveright) {
1844                /*
1845                 * Delete the old by-size entry on the right.
1846                 */
1847                if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1848                        goto error0;
1849                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1850                if ((error = xfs_btree_delete(cnt_cur, &i)))
1851                        goto error0;
1852                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1853                /*
1854                 * Update the starting block and length of the right
1855                 * neighbor in the by-block tree.
1856                 */
1857                nbno = bno;
1858                nlen = len + gtlen;
1859                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1860                        goto error0;
1861        }
1862        /*
1863         * No contiguous neighbors.
1864         * Insert the new freespace into the by-block tree.
1865         */
1866        else {
1867                nbno = bno;
1868                nlen = len;
1869                if ((error = xfs_btree_insert(bno_cur, &i)))
1870                        goto error0;
1871                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1872        }
1873        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1874        bno_cur = NULL;
1875        /*
1876         * In all cases we need to insert the new freespace in the by-size tree.
1877         */
1878        if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1879                goto error0;
1880        XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0);
1881        if ((error = xfs_btree_insert(cnt_cur, &i)))
1882                goto error0;
1883        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1884        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1885        cnt_cur = NULL;
1886
1887        /*
1888         * Update the freespace totals in the ag and superblock.
1889         */
1890        pag = xfs_perag_get(mp, agno);
1891        error = xfs_alloc_update_counters(tp, pag, agbp, len);
1892        xfs_ag_resv_free_extent(pag, type, tp, len);
1893        xfs_perag_put(pag);
1894        if (error)
1895                goto error0;
1896
1897        XFS_STATS_INC(mp, xs_freex);
1898        XFS_STATS_ADD(mp, xs_freeb, len);
1899
1900        trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
1901                        haveleft, haveright);
1902
1903        return 0;
1904
1905 error0:
1906        trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
1907                        -1, -1);
1908        if (bno_cur)
1909                xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1910        if (cnt_cur)
1911                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1912        return error;
1913}
1914
1915/*
1916 * Visible (exported) allocation/free functions.
1917 * Some of these are used just by xfs_alloc_btree.c and this file.
1918 */
1919
1920/*
1921 * Compute and fill in value of m_ag_maxlevels.
1922 */
1923void
1924xfs_alloc_compute_maxlevels(
1925        xfs_mount_t     *mp)    /* file system mount structure */
1926{
1927        mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp, mp->m_alloc_mnr,
1928                        (mp->m_sb.sb_agblocks + 1) / 2);
1929}
1930
1931/*
1932 * Find the length of the longest extent in an AG.  The 'need' parameter
1933 * specifies how much space we're going to need for the AGFL and the
1934 * 'reserved' parameter tells us how many blocks in this AG are reserved for
1935 * other callers.
1936 */
1937xfs_extlen_t
1938xfs_alloc_longest_free_extent(
1939        struct xfs_mount        *mp,
1940        struct xfs_perag        *pag,
1941        xfs_extlen_t            need,
1942        xfs_extlen_t            reserved)
1943{
1944        xfs_extlen_t            delta = 0;
1945
1946        /*
1947         * If the AGFL needs a recharge, we'll have to subtract that from the
1948         * longest extent.
1949         */
1950        if (need > pag->pagf_flcount)
1951                delta = need - pag->pagf_flcount;
1952
1953        /*
1954         * If we cannot maintain others' reservations with space from the
1955         * not-longest freesp extents, we'll have to subtract /that/ from
1956         * the longest extent too.
1957         */
1958        if (pag->pagf_freeblks - pag->pagf_longest < reserved)
1959                delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
1960
1961        /*
1962         * If the longest extent is long enough to satisfy all the
1963         * reservations and AGFL rules in place, we can return this extent.
1964         */
1965        if (pag->pagf_longest > delta)
1966                return pag->pagf_longest - delta;
1967
1968        /* Otherwise, let the caller try for 1 block if there's space. */
1969        return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1970}
1971
1972unsigned int
1973xfs_alloc_min_freelist(
1974        struct xfs_mount        *mp,
1975        struct xfs_perag        *pag)
1976{
1977        unsigned int            min_free;
1978
1979        /* space needed by-bno freespace btree */
1980        min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
1981                                       mp->m_ag_maxlevels);
1982        /* space needed by-size freespace btree */
1983        min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
1984                                       mp->m_ag_maxlevels);
1985        /* space needed reverse mapping used space btree */
1986        if (xfs_sb_version_hasrmapbt(&mp->m_sb))
1987                min_free += min_t(unsigned int,
1988                                  pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
1989                                  mp->m_rmap_maxlevels);
1990
1991        return min_free;
1992}
1993
1994/*
1995 * Check if the operation we are fixing up the freelist for should go ahead or
1996 * not. If we are freeing blocks, we always allow it, otherwise the allocation
1997 * is dependent on whether the size and shape of free space available will
1998 * permit the requested allocation to take place.
1999 */
2000static bool
2001xfs_alloc_space_available(
2002        struct xfs_alloc_arg    *args,
2003        xfs_extlen_t            min_free,
2004        int                     flags)
2005{
2006        struct xfs_perag        *pag = args->pag;
2007        xfs_extlen_t            alloc_len, longest;
2008        xfs_extlen_t            reservation; /* blocks that are still reserved */
2009        int                     available;
2010
2011        if (flags & XFS_ALLOC_FLAG_FREEING)
2012                return true;
2013
2014        reservation = xfs_ag_resv_needed(pag, args->resv);
2015
2016        /* do we have enough contiguous free space for the allocation? */
2017        alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
2018        longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free,
2019                        reservation);
2020        if (longest < alloc_len)
2021                return false;
2022
2023        /* do we have enough free space remaining for the allocation? */
2024        available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
2025                          reservation - min_free - args->minleft);
2026        if (available < (int)max(args->total, alloc_len))
2027                return false;
2028
2029        /*
2030         * Clamp maxlen to the amount of free space available for the actual
2031         * extent allocation.
2032         */
2033        if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
2034                args->maxlen = available;
2035                ASSERT(args->maxlen > 0);
2036                ASSERT(args->maxlen >= args->minlen);
2037        }
2038
2039        return true;
2040}
2041
2042/*
2043 * Decide whether to use this allocation group for this allocation.
2044 * If so, fix up the btree freelist's size.
2045 */
2046int                     /* error */
2047xfs_alloc_fix_freelist(
2048        struct xfs_alloc_arg    *args,  /* allocation argument structure */
2049        int                     flags)  /* XFS_ALLOC_FLAG_... */
2050{
2051        struct xfs_mount        *mp = args->mp;
2052        struct xfs_perag        *pag = args->pag;
2053        struct xfs_trans        *tp = args->tp;
2054        struct xfs_buf          *agbp = NULL;
2055        struct xfs_buf          *agflbp = NULL;
2056        struct xfs_alloc_arg    targs;  /* local allocation arguments */
2057        xfs_agblock_t           bno;    /* freelist block */
2058        xfs_extlen_t            need;   /* total blocks needed in freelist */
2059        int                     error = 0;
2060
2061        if (!pag->pagf_init) {
2062                error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2063                if (error)
2064                        goto out_no_agbp;
2065                if (!pag->pagf_init) {
2066                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2067                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2068                        goto out_agbp_relse;
2069                }
2070        }
2071
2072        /*
2073         * If this is a metadata preferred pag and we are user data then try
2074         * somewhere else if we are not being asked to try harder at this
2075         * point
2076         */
2077        if (pag->pagf_metadata && xfs_alloc_is_userdata(args->datatype) &&
2078            (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2079                ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2080                goto out_agbp_relse;
2081        }
2082
2083        need = xfs_alloc_min_freelist(mp, pag);
2084        if (!xfs_alloc_space_available(args, need, flags |
2085                        XFS_ALLOC_FLAG_CHECK))
2086                goto out_agbp_relse;
2087
2088        /*
2089         * Get the a.g. freespace buffer.
2090         * Can fail if we're not blocking on locks, and it's held.
2091         */
2092        if (!agbp) {
2093                error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2094                if (error)
2095                        goto out_no_agbp;
2096                if (!agbp) {
2097                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2098                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2099                        goto out_no_agbp;
2100                }
2101        }
2102
2103        /* If there isn't enough total space or single-extent, reject it. */
2104        need = xfs_alloc_min_freelist(mp, pag);
2105        if (!xfs_alloc_space_available(args, need, flags))
2106                goto out_agbp_relse;
2107
2108        /*
2109         * Make the freelist shorter if it's too long.
2110         *
2111         * Note that from this point onwards, we will always release the agf and
2112         * agfl buffers on error. This handles the case where we error out and
2113         * the buffers are clean or may not have been joined to the transaction
2114         * and hence need to be released manually. If they have been joined to
2115         * the transaction, then xfs_trans_brelse() will handle them
2116         * appropriately based on the recursion count and dirty state of the
2117         * buffer.
2118         *
2119         * XXX (dgc): When we have lots of free space, does this buy us
2120         * anything other than extra overhead when we need to put more blocks
2121         * back on the free list? Maybe we should only do this when space is
2122         * getting low or the AGFL is more than half full?
2123         *
2124         * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2125         * big; the NORMAP flag prevents AGFL expand/shrink operations from
2126         * updating the rmapbt.  Both flags are used in xfs_repair while we're
2127         * rebuilding the rmapbt, and neither are used by the kernel.  They're
2128         * both required to ensure that rmaps are correctly recorded for the
2129         * regenerated AGFL, bnobt, and cntbt.  See repair/phase5.c and
2130         * repair/rmap.c in xfsprogs for details.
2131         */
2132        memset(&targs, 0, sizeof(targs));
2133        if (flags & XFS_ALLOC_FLAG_NORMAP)
2134                xfs_rmap_skip_owner_update(&targs.oinfo);
2135        else
2136                xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG);
2137        while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
2138                struct xfs_buf  *bp;
2139
2140                error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2141                if (error)
2142                        goto out_agbp_relse;
2143                error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1,
2144                                           &targs.oinfo, XFS_AG_RESV_AGFL);
2145                if (error)
2146                        goto out_agbp_relse;
2147                bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
2148                if (!bp) {
2149                        error = -EFSCORRUPTED;
2150                        goto out_agbp_relse;
2151                }
2152                xfs_trans_binval(tp, bp);
2153        }
2154
2155        targs.tp = tp;
2156        targs.mp = mp;
2157        targs.agbp = agbp;
2158        targs.agno = args->agno;
2159        targs.alignment = targs.minlen = targs.prod = 1;
2160        targs.type = XFS_ALLOCTYPE_THIS_AG;
2161        targs.pag = pag;
2162        error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
2163        if (error)
2164                goto out_agbp_relse;
2165
2166        /* Make the freelist longer if it's too short. */
2167        while (pag->pagf_flcount < need) {
2168                targs.agbno = 0;
2169                targs.maxlen = need - pag->pagf_flcount;
2170                targs.resv = XFS_AG_RESV_AGFL;
2171
2172                /* Allocate as many blocks as possible at once. */
2173                error = xfs_alloc_ag_vextent(&targs);
2174                if (error)
2175                        goto out_agflbp_relse;
2176
2177                /*
2178                 * Stop if we run out.  Won't happen if callers are obeying
2179                 * the restrictions correctly.  Can happen for free calls
2180                 * on a completely full ag.
2181                 */
2182                if (targs.agbno == NULLAGBLOCK) {
2183                        if (flags & XFS_ALLOC_FLAG_FREEING)
2184                                break;
2185                        goto out_agflbp_relse;
2186                }
2187                /*
2188                 * Put each allocated block on the list.
2189                 */
2190                for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
2191                        error = xfs_alloc_put_freelist(tp, agbp,
2192                                                        agflbp, bno, 0);
2193                        if (error)
2194                                goto out_agflbp_relse;
2195                }
2196        }
2197        xfs_trans_brelse(tp, agflbp);
2198        args->agbp = agbp;
2199        return 0;
2200
2201out_agflbp_relse:
2202        xfs_trans_brelse(tp, agflbp);
2203out_agbp_relse:
2204        if (agbp)
2205                xfs_trans_brelse(tp, agbp);
2206out_no_agbp:
2207        args->agbp = NULL;
2208        return error;
2209}
2210
2211/*
2212 * Get a block from the freelist.
2213 * Returns with the buffer for the block gotten.
2214 */
2215int                             /* error */
2216xfs_alloc_get_freelist(
2217        xfs_trans_t     *tp,    /* transaction pointer */
2218        xfs_buf_t       *agbp,  /* buffer containing the agf structure */
2219        xfs_agblock_t   *bnop,  /* block address retrieved from freelist */
2220        int             btreeblk) /* destination is a AGF btree */
2221{
2222        xfs_agf_t       *agf;   /* a.g. freespace structure */
2223        xfs_buf_t       *agflbp;/* buffer for a.g. freelist structure */
2224        xfs_agblock_t   bno;    /* block number returned */
2225        __be32          *agfl_bno;
2226        int             error;
2227        int             logflags;
2228        xfs_mount_t     *mp = tp->t_mountp;
2229        xfs_perag_t     *pag;   /* per allocation group data */
2230
2231        /*
2232         * Freelist is empty, give up.
2233         */
2234        agf = XFS_BUF_TO_AGF(agbp);
2235        if (!agf->agf_flcount) {
2236                *bnop = NULLAGBLOCK;
2237                return 0;
2238        }
2239        /*
2240         * Read the array of free blocks.
2241         */
2242        error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2243                                    &agflbp);
2244        if (error)
2245                return error;
2246
2247
2248        /*
2249         * Get the block number and update the data structures.
2250         */
2251        agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2252        bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2253        be32_add_cpu(&agf->agf_flfirst, 1);
2254        xfs_trans_brelse(tp, agflbp);
2255        if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
2256                agf->agf_flfirst = 0;
2257
2258        pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2259        be32_add_cpu(&agf->agf_flcount, -1);
2260        xfs_trans_agflist_delta(tp, -1);
2261        pag->pagf_flcount--;
2262        xfs_perag_put(pag);
2263
2264        logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2265        if (btreeblk) {
2266                be32_add_cpu(&agf->agf_btreeblks, 1);
2267                pag->pagf_btreeblks++;
2268                logflags |= XFS_AGF_BTREEBLKS;
2269        }
2270
2271        xfs_alloc_log_agf(tp, agbp, logflags);
2272        *bnop = bno;
2273
2274        return 0;
2275}
2276
2277/*
2278 * Log the given fields from the agf structure.
2279 */
2280void
2281xfs_alloc_log_agf(
2282        xfs_trans_t     *tp,    /* transaction pointer */
2283        xfs_buf_t       *bp,    /* buffer for a.g. freelist header */
2284        int             fields) /* mask of fields to be logged (XFS_AGF_...) */
2285{
2286        int     first;          /* first byte offset */
2287        int     last;           /* last byte offset */
2288        static const short      offsets[] = {
2289                offsetof(xfs_agf_t, agf_magicnum),
2290                offsetof(xfs_agf_t, agf_versionnum),
2291                offsetof(xfs_agf_t, agf_seqno),
2292                offsetof(xfs_agf_t, agf_length),
2293                offsetof(xfs_agf_t, agf_roots[0]),
2294                offsetof(xfs_agf_t, agf_levels[0]),
2295                offsetof(xfs_agf_t, agf_flfirst),
2296                offsetof(xfs_agf_t, agf_fllast),
2297                offsetof(xfs_agf_t, agf_flcount),
2298                offsetof(xfs_agf_t, agf_freeblks),
2299                offsetof(xfs_agf_t, agf_longest),
2300                offsetof(xfs_agf_t, agf_btreeblks),
2301                offsetof(xfs_agf_t, agf_uuid),
2302                offsetof(xfs_agf_t, agf_rmap_blocks),
2303                offsetof(xfs_agf_t, agf_refcount_blocks),
2304                offsetof(xfs_agf_t, agf_refcount_root),
2305                offsetof(xfs_agf_t, agf_refcount_level),
2306                /* needed so that we don't log the whole rest of the structure: */
2307                offsetof(xfs_agf_t, agf_spare64),
2308                sizeof(xfs_agf_t)
2309        };
2310
2311        trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2312
2313        xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
2314
2315        xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2316        xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2317}
2318
2319/*
2320 * Interface for inode allocation to force the pag data to be initialized.
2321 */
2322int                                     /* error */
2323xfs_alloc_pagf_init(
2324        xfs_mount_t             *mp,    /* file system mount structure */
2325        xfs_trans_t             *tp,    /* transaction pointer */
2326        xfs_agnumber_t          agno,   /* allocation group number */
2327        int                     flags)  /* XFS_ALLOC_FLAGS_... */
2328{
2329        xfs_buf_t               *bp;
2330        int                     error;
2331
2332        if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2333                return error;
2334        if (bp)
2335                xfs_trans_brelse(tp, bp);
2336        return 0;
2337}
2338
2339/*
2340 * Put the block on the freelist for the allocation group.
2341 */
2342int                                     /* error */
2343xfs_alloc_put_freelist(
2344        xfs_trans_t             *tp,    /* transaction pointer */
2345        xfs_buf_t               *agbp,  /* buffer for a.g. freelist header */
2346        xfs_buf_t               *agflbp,/* buffer for a.g. free block array */
2347        xfs_agblock_t           bno,    /* block being freed */
2348        int                     btreeblk) /* block came from a AGF btree */
2349{
2350        xfs_agf_t               *agf;   /* a.g. freespace structure */
2351        __be32                  *blockp;/* pointer to array entry */
2352        int                     error;
2353        int                     logflags;
2354        xfs_mount_t             *mp;    /* mount structure */
2355        xfs_perag_t             *pag;   /* per allocation group data */
2356        __be32                  *agfl_bno;
2357        int                     startoff;
2358
2359        agf = XFS_BUF_TO_AGF(agbp);
2360        mp = tp->t_mountp;
2361
2362        if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
2363                        be32_to_cpu(agf->agf_seqno), &agflbp)))
2364                return error;
2365        be32_add_cpu(&agf->agf_fllast, 1);
2366        if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
2367                agf->agf_fllast = 0;
2368
2369        pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2370        be32_add_cpu(&agf->agf_flcount, 1);
2371        xfs_trans_agflist_delta(tp, 1);
2372        pag->pagf_flcount++;
2373
2374        logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2375        if (btreeblk) {
2376                be32_add_cpu(&agf->agf_btreeblks, -1);
2377                pag->pagf_btreeblks--;
2378                logflags |= XFS_AGF_BTREEBLKS;
2379        }
2380        xfs_perag_put(pag);
2381
2382        xfs_alloc_log_agf(tp, agbp, logflags);
2383
2384        ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2385
2386        agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2387        blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
2388        *blockp = cpu_to_be32(bno);
2389        startoff = (char *)blockp - (char *)agflbp->b_addr;
2390
2391        xfs_alloc_log_agf(tp, agbp, logflags);
2392
2393        xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
2394        xfs_trans_log_buf(tp, agflbp, startoff,
2395                          startoff + sizeof(xfs_agblock_t) - 1);
2396        return 0;
2397}
2398
2399static bool
2400xfs_agf_verify(
2401        struct xfs_mount *mp,
2402        struct xfs_buf  *bp)
2403 {
2404        struct xfs_agf  *agf = XFS_BUF_TO_AGF(bp);
2405
2406        if (xfs_sb_version_hascrc(&mp->m_sb)) {
2407                if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
2408                        return false;
2409                if (!xfs_log_check_lsn(mp,
2410                                be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
2411                        return false;
2412        }
2413
2414        if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
2415              XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2416              be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2417              be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2418              be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2419              be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
2420                return false;
2421
2422        if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
2423            be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
2424            be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
2425            be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
2426                return false;
2427
2428        if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
2429            (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
2430             be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
2431                return false;
2432
2433        /*
2434         * during growfs operations, the perag is not fully initialised,
2435         * so we can't use it for any useful checking. growfs ensures we can't
2436         * use it by using uncached buffers that don't have the perag attached
2437         * so we can detect and avoid this problem.
2438         */
2439        if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
2440                return false;
2441
2442        if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2443            be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
2444                return false;
2445
2446        if (xfs_sb_version_hasreflink(&mp->m_sb) &&
2447            (be32_to_cpu(agf->agf_refcount_level) < 1 ||
2448             be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
2449                return false;
2450
2451        return true;;
2452
2453}
2454
2455static void
2456xfs_agf_read_verify(
2457        struct xfs_buf  *bp)
2458{
2459        struct xfs_mount *mp = bp->b_target->bt_mount;
2460
2461        if (xfs_sb_version_hascrc(&mp->m_sb) &&
2462            !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
2463                xfs_buf_ioerror(bp, -EFSBADCRC);
2464        else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
2465                                XFS_ERRTAG_ALLOC_READ_AGF))
2466                xfs_buf_ioerror(bp, -EFSCORRUPTED);
2467
2468        if (bp->b_error)
2469                xfs_verifier_error(bp);
2470}
2471
2472static void
2473xfs_agf_write_verify(
2474        struct xfs_buf  *bp)
2475{
2476        struct xfs_mount *mp = bp->b_target->bt_mount;
2477        struct xfs_buf_log_item *bip = bp->b_fspriv;
2478
2479        if (!xfs_agf_verify(mp, bp)) {
2480                xfs_buf_ioerror(bp, -EFSCORRUPTED);
2481                xfs_verifier_error(bp);
2482                return;
2483        }
2484
2485        if (!xfs_sb_version_hascrc(&mp->m_sb))
2486                return;
2487
2488        if (bip)
2489                XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2490
2491        xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
2492}
2493
2494const struct xfs_buf_ops xfs_agf_buf_ops = {
2495        .name = "xfs_agf",
2496        .verify_read = xfs_agf_read_verify,
2497        .verify_write = xfs_agf_write_verify,
2498};
2499
2500/*
2501 * Read in the allocation group header (free/alloc section).
2502 */
2503int                                     /* error */
2504xfs_read_agf(
2505        struct xfs_mount        *mp,    /* mount point structure */
2506        struct xfs_trans        *tp,    /* transaction pointer */
2507        xfs_agnumber_t          agno,   /* allocation group number */
2508        int                     flags,  /* XFS_BUF_ */
2509        struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
2510{
2511        int             error;
2512
2513        trace_xfs_read_agf(mp, agno);
2514
2515        ASSERT(agno != NULLAGNUMBER);
2516        error = xfs_trans_read_buf(
2517                        mp, tp, mp->m_ddev_targp,
2518                        XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2519                        XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
2520        if (error)
2521                return error;
2522        if (!*bpp)
2523                return 0;
2524
2525        ASSERT(!(*bpp)->b_error);
2526        xfs_buf_set_ref(*bpp, XFS_AGF_REF);
2527        return 0;
2528}
2529
2530/*
2531 * Read in the allocation group header (free/alloc section).
2532 */
2533int                                     /* error */
2534xfs_alloc_read_agf(
2535        struct xfs_mount        *mp,    /* mount point structure */
2536        struct xfs_trans        *tp,    /* transaction pointer */
2537        xfs_agnumber_t          agno,   /* allocation group number */
2538        int                     flags,  /* XFS_ALLOC_FLAG_... */
2539        struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
2540{
2541        struct xfs_agf          *agf;           /* ag freelist header */
2542        struct xfs_perag        *pag;           /* per allocation group data */
2543        int                     error;
2544
2545        trace_xfs_alloc_read_agf(mp, agno);
2546
2547        ASSERT(agno != NULLAGNUMBER);
2548        error = xfs_read_agf(mp, tp, agno,
2549                        (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2550                        bpp);
2551        if (error)
2552                return error;
2553        if (!*bpp)
2554                return 0;
2555        ASSERT(!(*bpp)->b_error);
2556
2557        agf = XFS_BUF_TO_AGF(*bpp);
2558        pag = xfs_perag_get(mp, agno);
2559        if (!pag->pagf_init) {
2560                pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
2561                pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
2562                pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2563                pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2564                pag->pagf_levels[XFS_BTNUM_BNOi] =
2565                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2566                pag->pagf_levels[XFS_BTNUM_CNTi] =
2567                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
2568                pag->pagf_levels[XFS_BTNUM_RMAPi] =
2569                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
2570                pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
2571                spin_lock_init(&pag->pagb_lock);
2572                pag->pagb_count = 0;
2573                pag->pagb_tree = RB_ROOT;
2574                pag->pagf_init = 1;
2575        }
2576#ifdef DEBUG
2577        else if (!XFS_FORCED_SHUTDOWN(mp)) {
2578                ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
2579                ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
2580                ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2581                ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2582                ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
2583                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2584                ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
2585                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2586        }
2587#endif
2588        xfs_perag_put(pag);
2589        return 0;
2590}
2591
2592/*
2593 * Allocate an extent (variable-size).
2594 * Depending on the allocation type, we either look in a single allocation
2595 * group or loop over the allocation groups to find the result.
2596 */
2597int                             /* error */
2598xfs_alloc_vextent(
2599        xfs_alloc_arg_t *args)  /* allocation argument structure */
2600{
2601        xfs_agblock_t   agsize; /* allocation group size */
2602        int             error;
2603        int             flags;  /* XFS_ALLOC_FLAG_... locking flags */
2604        xfs_mount_t     *mp;    /* mount structure pointer */
2605        xfs_agnumber_t  sagno;  /* starting allocation group number */
2606        xfs_alloctype_t type;   /* input allocation type */
2607        int             bump_rotor = 0;
2608        xfs_agnumber_t  rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2609
2610        mp = args->mp;
2611        type = args->otype = args->type;
2612        args->agbno = NULLAGBLOCK;
2613        /*
2614         * Just fix this up, for the case where the last a.g. is shorter
2615         * (or there's only one a.g.) and the caller couldn't easily figure
2616         * that out (xfs_bmap_alloc).
2617         */
2618        agsize = mp->m_sb.sb_agblocks;
2619        if (args->maxlen > agsize)
2620                args->maxlen = agsize;
2621        if (args->alignment == 0)
2622                args->alignment = 1;
2623        ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2624        ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2625        ASSERT(args->minlen <= args->maxlen);
2626        ASSERT(args->minlen <= agsize);
2627        ASSERT(args->mod < args->prod);
2628        if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2629            XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2630            args->minlen > args->maxlen || args->minlen > agsize ||
2631            args->mod >= args->prod) {
2632                args->fsbno = NULLFSBLOCK;
2633                trace_xfs_alloc_vextent_badargs(args);
2634                return 0;
2635        }
2636
2637        switch (type) {
2638        case XFS_ALLOCTYPE_THIS_AG:
2639        case XFS_ALLOCTYPE_NEAR_BNO:
2640        case XFS_ALLOCTYPE_THIS_BNO:
2641                /*
2642                 * These three force us into a single a.g.
2643                 */
2644                args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2645                args->pag = xfs_perag_get(mp, args->agno);
2646                error = xfs_alloc_fix_freelist(args, 0);
2647                if (error) {
2648                        trace_xfs_alloc_vextent_nofix(args);
2649                        goto error0;
2650                }
2651                if (!args->agbp) {
2652                        trace_xfs_alloc_vextent_noagbp(args);
2653                        break;
2654                }
2655                args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2656                if ((error = xfs_alloc_ag_vextent(args)))
2657                        goto error0;
2658                break;
2659        case XFS_ALLOCTYPE_START_BNO:
2660                /*
2661                 * Try near allocation first, then anywhere-in-ag after
2662                 * the first a.g. fails.
2663                 */
2664                if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
2665                    (mp->m_flags & XFS_MOUNT_32BITINODES)) {
2666                        args->fsbno = XFS_AGB_TO_FSB(mp,
2667                                        ((mp->m_agfrotor / rotorstep) %
2668                                        mp->m_sb.sb_agcount), 0);
2669                        bump_rotor = 1;
2670                }
2671                args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2672                args->type = XFS_ALLOCTYPE_NEAR_BNO;
2673                /* FALLTHROUGH */
2674        case XFS_ALLOCTYPE_FIRST_AG:
2675                /*
2676                 * Rotate through the allocation groups looking for a winner.
2677                 */
2678                if (type == XFS_ALLOCTYPE_FIRST_AG) {
2679                        /*
2680                         * Start with allocation group given by bno.
2681                         */
2682                        args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2683                        args->type = XFS_ALLOCTYPE_THIS_AG;
2684                        sagno = 0;
2685                        flags = 0;
2686                } else {
2687                        /*
2688                         * Start with the given allocation group.
2689                         */
2690                        args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2691                        flags = XFS_ALLOC_FLAG_TRYLOCK;
2692                }
2693                /*
2694                 * Loop over allocation groups twice; first time with
2695                 * trylock set, second time without.
2696                 */
2697                for (;;) {
2698                        args->pag = xfs_perag_get(mp, args->agno);
2699                        error = xfs_alloc_fix_freelist(args, flags);
2700                        if (error) {
2701                                trace_xfs_alloc_vextent_nofix(args);
2702                                goto error0;
2703                        }
2704                        /*
2705                         * If we get a buffer back then the allocation will fly.
2706                         */
2707                        if (args->agbp) {
2708                                if ((error = xfs_alloc_ag_vextent(args)))
2709                                        goto error0;
2710                                break;
2711                        }
2712
2713                        trace_xfs_alloc_vextent_loopfailed(args);
2714
2715                        /*
2716                         * Didn't work, figure out the next iteration.
2717                         */
2718                        if (args->agno == sagno &&
2719                            type == XFS_ALLOCTYPE_START_BNO)
2720                                args->type = XFS_ALLOCTYPE_THIS_AG;
2721                        /*
2722                        * For the first allocation, we can try any AG to get
2723                        * space.  However, if we already have allocated a
2724                        * block, we don't want to try AGs whose number is below
2725                        * sagno. Otherwise, we may end up with out-of-order
2726                        * locking of AGF, which might cause deadlock.
2727                        */
2728                        if (++(args->agno) == mp->m_sb.sb_agcount) {
2729                                if (args->firstblock != NULLFSBLOCK)
2730                                        args->agno = sagno;
2731                                else
2732                                        args->agno = 0;
2733                        }
2734                        /*
2735                         * Reached the starting a.g., must either be done
2736                         * or switch to non-trylock mode.
2737                         */
2738                        if (args->agno == sagno) {
2739                                if (flags == 0) {
2740                                        args->agbno = NULLAGBLOCK;
2741                                        trace_xfs_alloc_vextent_allfailed(args);
2742                                        break;
2743                                }
2744
2745                                flags = 0;
2746                                if (type == XFS_ALLOCTYPE_START_BNO) {
2747                                        args->agbno = XFS_FSB_TO_AGBNO(mp,
2748                                                args->fsbno);
2749                                        args->type = XFS_ALLOCTYPE_NEAR_BNO;
2750                                }
2751                        }
2752                        xfs_perag_put(args->pag);
2753                }
2754                if (bump_rotor) {
2755                        if (args->agno == sagno)
2756                                mp->m_agfrotor = (mp->m_agfrotor + 1) %
2757                                        (mp->m_sb.sb_agcount * rotorstep);
2758                        else
2759                                mp->m_agfrotor = (args->agno * rotorstep + 1) %
2760                                        (mp->m_sb.sb_agcount * rotorstep);
2761                }
2762                break;
2763        default:
2764                ASSERT(0);
2765                /* NOTREACHED */
2766        }
2767        if (args->agbno == NULLAGBLOCK)
2768                args->fsbno = NULLFSBLOCK;
2769        else {
2770                args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2771#ifdef DEBUG
2772                ASSERT(args->len >= args->minlen);
2773                ASSERT(args->len <= args->maxlen);
2774                ASSERT(args->agbno % args->alignment == 0);
2775                XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2776                        args->len);
2777#endif
2778
2779                /* Zero the extent if we were asked to do so */
2780                if (args->datatype & XFS_ALLOC_USERDATA_ZERO) {
2781                        error = xfs_zero_extent(args->ip, args->fsbno, args->len);
2782                        if (error)
2783                                goto error0;
2784                }
2785
2786        }
2787        xfs_perag_put(args->pag);
2788        return 0;
2789error0:
2790        xfs_perag_put(args->pag);
2791        return error;
2792}
2793
2794/* Ensure that the freelist is at full capacity. */
2795int
2796xfs_free_extent_fix_freelist(
2797        struct xfs_trans        *tp,
2798        xfs_agnumber_t          agno,
2799        struct xfs_buf          **agbp)
2800{
2801        struct xfs_alloc_arg    args;
2802        int                     error;
2803
2804        memset(&args, 0, sizeof(struct xfs_alloc_arg));
2805        args.tp = tp;
2806        args.mp = tp->t_mountp;
2807        args.agno = agno;
2808
2809        /*
2810         * validate that the block number is legal - the enables us to detect
2811         * and handle a silent filesystem corruption rather than crashing.
2812         */
2813        if (args.agno >= args.mp->m_sb.sb_agcount)
2814                return -EFSCORRUPTED;
2815
2816        args.pag = xfs_perag_get(args.mp, args.agno);
2817        ASSERT(args.pag);
2818
2819        error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
2820        if (error)
2821                goto out;
2822
2823        *agbp = args.agbp;
2824out:
2825        xfs_perag_put(args.pag);
2826        return error;
2827}
2828
2829/*
2830 * Free an extent.
2831 * Just break up the extent address and hand off to xfs_free_ag_extent
2832 * after fixing up the freelist.
2833 */
2834int                             /* error */
2835xfs_free_extent(
2836        struct xfs_trans        *tp,    /* transaction pointer */
2837        xfs_fsblock_t           bno,    /* starting block number of extent */
2838        xfs_extlen_t            len,    /* length of extent */
2839        struct xfs_owner_info   *oinfo, /* extent owner */
2840        enum xfs_ag_resv_type   type)   /* block reservation type */
2841{
2842        struct xfs_mount        *mp = tp->t_mountp;
2843        struct xfs_buf          *agbp;
2844        xfs_agnumber_t          agno = XFS_FSB_TO_AGNO(mp, bno);
2845        xfs_agblock_t           agbno = XFS_FSB_TO_AGBNO(mp, bno);
2846        int                     error;
2847
2848        ASSERT(len != 0);
2849        ASSERT(type != XFS_AG_RESV_AGFL);
2850
2851        if (XFS_TEST_ERROR(false, mp,
2852                        XFS_ERRTAG_FREE_EXTENT))
2853                return -EIO;
2854
2855        error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
2856        if (error)
2857                return error;
2858
2859        XFS_WANT_CORRUPTED_GOTO(mp, agbno < mp->m_sb.sb_agblocks, err);
2860
2861        /* validate the extent size is legal now we have the agf locked */
2862        XFS_WANT_CORRUPTED_GOTO(mp,
2863                agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length),
2864                                err);
2865
2866        error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
2867        if (error)
2868                goto err;
2869
2870        xfs_extent_busy_insert(tp, agno, agbno, len, 0);
2871        return 0;
2872
2873err:
2874        xfs_trans_brelse(tp, agbp);
2875        return error;
2876}
2877
2878struct xfs_alloc_query_range_info {
2879        xfs_alloc_query_range_fn        fn;
2880        void                            *priv;
2881};
2882
2883/* Format btree record and pass to our callback. */
2884STATIC int
2885xfs_alloc_query_range_helper(
2886        struct xfs_btree_cur            *cur,
2887        union xfs_btree_rec             *rec,
2888        void                            *priv)
2889{
2890        struct xfs_alloc_query_range_info       *query = priv;
2891        struct xfs_alloc_rec_incore             irec;
2892
2893        irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
2894        irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
2895        return query->fn(cur, &irec, query->priv);
2896}
2897
2898/* Find all free space within a given range of blocks. */
2899int
2900xfs_alloc_query_range(
2901        struct xfs_btree_cur                    *cur,
2902        struct xfs_alloc_rec_incore             *low_rec,
2903        struct xfs_alloc_rec_incore             *high_rec,
2904        xfs_alloc_query_range_fn                fn,
2905        void                                    *priv)
2906{
2907        union xfs_btree_irec                    low_brec;
2908        union xfs_btree_irec                    high_brec;
2909        struct xfs_alloc_query_range_info       query;
2910
2911        ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
2912        low_brec.a = *low_rec;
2913        high_brec.a = *high_rec;
2914        query.priv = priv;
2915        query.fn = fn;
2916        return xfs_btree_query_range(cur, &low_brec, &high_brec,
2917                        xfs_alloc_query_range_helper, &query);
2918}
2919
2920/* Find all free space records. */
2921int
2922xfs_alloc_query_all(
2923        struct xfs_btree_cur                    *cur,
2924        xfs_alloc_query_range_fn                fn,
2925        void                                    *priv)
2926{
2927        struct xfs_alloc_query_range_info       query;
2928
2929        ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
2930        query.priv = priv;
2931        query.fn = fn;
2932        return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
2933}
2934