linux/fs/xfs/libxfs/xfs_alloc.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_format.h"
  21#include "xfs_log_format.h"
  22#include "xfs_shared.h"
  23#include "xfs_trans_resv.h"
  24#include "xfs_bit.h"
  25#include "xfs_sb.h"
  26#include "xfs_mount.h"
  27#include "xfs_defer.h"
  28#include "xfs_inode.h"
  29#include "xfs_btree.h"
  30#include "xfs_rmap.h"
  31#include "xfs_alloc_btree.h"
  32#include "xfs_alloc.h"
  33#include "xfs_extent_busy.h"
  34#include "xfs_error.h"
  35#include "xfs_cksum.h"
  36#include "xfs_trace.h"
  37#include "xfs_trans.h"
  38#include "xfs_buf_item.h"
  39#include "xfs_log.h"
  40
  41struct workqueue_struct *xfs_alloc_wq;
  42
  43#define XFS_ABSDIFF(a,b)        (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
  44
  45#define XFSA_FIXUP_BNO_OK       1
  46#define XFSA_FIXUP_CNT_OK       2
  47
  48STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
  49STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
  50STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
  51STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
  52                xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
  53
  54xfs_extlen_t
  55xfs_prealloc_blocks(
  56        struct xfs_mount        *mp)
  57{
  58        if (xfs_sb_version_hasrmapbt(&mp->m_sb))
  59                return XFS_RMAP_BLOCK(mp) + 1;
  60        if (xfs_sb_version_hasfinobt(&mp->m_sb))
  61                return XFS_FIBT_BLOCK(mp) + 1;
  62        return XFS_IBT_BLOCK(mp) + 1;
  63}
  64
  65/*
  66 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
  67 * AGF buffer (PV 947395), we place constraints on the relationship among
  68 * actual allocations for data blocks, freelist blocks, and potential file data
  69 * bmap btree blocks. However, these restrictions may result in no actual space
  70 * allocated for a delayed extent, for example, a data block in a certain AG is
  71 * allocated but there is no additional block for the additional bmap btree
  72 * block due to a split of the bmap btree of the file. The result of this may
  73 * lead to an infinite loop when the file gets flushed to disk and all delayed
  74 * extents need to be actually allocated. To get around this, we explicitly set
  75 * aside a few blocks which will not be reserved in delayed allocation.
  76 *
  77 * When rmap is disabled, we need to reserve 4 fsbs _per AG_ for the freelist
  78 * and 4 more to handle a potential split of the file's bmap btree.
  79 *
  80 * When rmap is enabled, we must also be able to handle two rmap btree inserts
  81 * to record both the file data extent and a new bmbt block.  The bmbt block
  82 * might not be in the same AG as the file data extent.  In the worst case
  83 * the bmap btree splits multiple levels and all the new blocks come from
  84 * different AGs, so set aside enough to handle rmap btree splits in all AGs.
  85 */
  86unsigned int
  87xfs_alloc_set_aside(
  88        struct xfs_mount        *mp)
  89{
  90        unsigned int            blocks;
  91
  92        blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE);
  93        if (xfs_sb_version_hasrmapbt(&mp->m_sb))
  94                blocks += mp->m_sb.sb_agcount * mp->m_rmap_maxlevels;
  95        return blocks;
  96}
  97
  98/*
  99 * When deciding how much space to allocate out of an AG, we limit the
 100 * allocation maximum size to the size the AG. However, we cannot use all the
 101 * blocks in the AG - some are permanently used by metadata. These
 102 * blocks are generally:
 103 *      - the AG superblock, AGF, AGI and AGFL
 104 *      - the AGF (bno and cnt) and AGI btree root blocks, and optionally
 105 *        the AGI free inode and rmap btree root blocks.
 106 *      - blocks on the AGFL according to xfs_alloc_set_aside() limits
 107 *      - the rmapbt root block
 108 *
 109 * The AG headers are sector sized, so the amount of space they take up is
 110 * dependent on filesystem geometry. The others are all single blocks.
 111 */
 112unsigned int
 113xfs_alloc_ag_max_usable(
 114        struct xfs_mount        *mp)
 115{
 116        unsigned int            blocks;
 117
 118        blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
 119        blocks += XFS_ALLOC_AGFL_RESERVE;
 120        blocks += 3;                    /* AGF, AGI btree root blocks */
 121        if (xfs_sb_version_hasfinobt(&mp->m_sb))
 122                blocks++;               /* finobt root block */
 123        if (xfs_sb_version_hasrmapbt(&mp->m_sb))
 124                blocks++;               /* rmap root block */
 125
 126        return mp->m_sb.sb_agblocks - blocks;
 127}
 128
 129/*
 130 * Lookup the record equal to [bno, len] in the btree given by cur.
 131 */
 132STATIC int                              /* error */
 133xfs_alloc_lookup_eq(
 134        struct xfs_btree_cur    *cur,   /* btree cursor */
 135        xfs_agblock_t           bno,    /* starting block of extent */
 136        xfs_extlen_t            len,    /* length of extent */
 137        int                     *stat)  /* success/failure */
 138{
 139        cur->bc_rec.a.ar_startblock = bno;
 140        cur->bc_rec.a.ar_blockcount = len;
 141        return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
 142}
 143
 144/*
 145 * Lookup the first record greater than or equal to [bno, len]
 146 * in the btree given by cur.
 147 */
 148int                             /* error */
 149xfs_alloc_lookup_ge(
 150        struct xfs_btree_cur    *cur,   /* btree cursor */
 151        xfs_agblock_t           bno,    /* starting block of extent */
 152        xfs_extlen_t            len,    /* length of extent */
 153        int                     *stat)  /* success/failure */
 154{
 155        cur->bc_rec.a.ar_startblock = bno;
 156        cur->bc_rec.a.ar_blockcount = len;
 157        return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
 158}
 159
 160/*
 161 * Lookup the first record less than or equal to [bno, len]
 162 * in the btree given by cur.
 163 */
 164static int                              /* error */
 165xfs_alloc_lookup_le(
 166        struct xfs_btree_cur    *cur,   /* btree cursor */
 167        xfs_agblock_t           bno,    /* starting block of extent */
 168        xfs_extlen_t            len,    /* length of extent */
 169        int                     *stat)  /* success/failure */
 170{
 171        cur->bc_rec.a.ar_startblock = bno;
 172        cur->bc_rec.a.ar_blockcount = len;
 173        return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
 174}
 175
 176/*
 177 * Update the record referred to by cur to the value given
 178 * by [bno, len].
 179 * This either works (return 0) or gets an EFSCORRUPTED error.
 180 */
 181STATIC int                              /* error */
 182xfs_alloc_update(
 183        struct xfs_btree_cur    *cur,   /* btree cursor */
 184        xfs_agblock_t           bno,    /* starting block of extent */
 185        xfs_extlen_t            len)    /* length of extent */
 186{
 187        union xfs_btree_rec     rec;
 188
 189        rec.alloc.ar_startblock = cpu_to_be32(bno);
 190        rec.alloc.ar_blockcount = cpu_to_be32(len);
 191        return xfs_btree_update(cur, &rec);
 192}
 193
 194/*
 195 * Get the data from the pointed-to record.
 196 */
 197int                                     /* error */
 198xfs_alloc_get_rec(
 199        struct xfs_btree_cur    *cur,   /* btree cursor */
 200        xfs_agblock_t           *bno,   /* output: starting block of extent */
 201        xfs_extlen_t            *len,   /* output: length of extent */
 202        int                     *stat)  /* output: success/failure */
 203{
 204        union xfs_btree_rec     *rec;
 205        int                     error;
 206
 207        error = xfs_btree_get_rec(cur, &rec, stat);
 208        if (!error && *stat == 1) {
 209                *bno = be32_to_cpu(rec->alloc.ar_startblock);
 210                *len = be32_to_cpu(rec->alloc.ar_blockcount);
 211        }
 212        return error;
 213}
 214
 215/*
 216 * Compute aligned version of the found extent.
 217 * Takes alignment and min length into account.
 218 */
 219STATIC void
 220xfs_alloc_compute_aligned(
 221        xfs_alloc_arg_t *args,          /* allocation argument structure */
 222        xfs_agblock_t   foundbno,       /* starting block in found extent */
 223        xfs_extlen_t    foundlen,       /* length in found extent */
 224        xfs_agblock_t   *resbno,        /* result block number */
 225        xfs_extlen_t    *reslen)        /* result length */
 226{
 227        xfs_agblock_t   bno;
 228        xfs_extlen_t    len;
 229        xfs_extlen_t    diff;
 230
 231        /* Trim busy sections out of found extent */
 232        xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
 233
 234        /*
 235         * If we have a largish extent that happens to start before min_agbno,
 236         * see if we can shift it into range...
 237         */
 238        if (bno < args->min_agbno && bno + len > args->min_agbno) {
 239                diff = args->min_agbno - bno;
 240                if (len > diff) {
 241                        bno += diff;
 242                        len -= diff;
 243                }
 244        }
 245
 246        if (args->alignment > 1 && len >= args->minlen) {
 247                xfs_agblock_t   aligned_bno = roundup(bno, args->alignment);
 248
 249                diff = aligned_bno - bno;
 250
 251                *resbno = aligned_bno;
 252                *reslen = diff >= len ? 0 : len - diff;
 253        } else {
 254                *resbno = bno;
 255                *reslen = len;
 256        }
 257}
 258
 259/*
 260 * Compute best start block and diff for "near" allocations.
 261 * freelen >= wantlen already checked by caller.
 262 */
 263STATIC xfs_extlen_t                     /* difference value (absolute) */
 264xfs_alloc_compute_diff(
 265        xfs_agblock_t   wantbno,        /* target starting block */
 266        xfs_extlen_t    wantlen,        /* target length */
 267        xfs_extlen_t    alignment,      /* target alignment */
 268        char            userdata,       /* are we allocating data? */
 269        xfs_agblock_t   freebno,        /* freespace's starting block */
 270        xfs_extlen_t    freelen,        /* freespace's length */
 271        xfs_agblock_t   *newbnop)       /* result: best start block from free */
 272{
 273        xfs_agblock_t   freeend;        /* end of freespace extent */
 274        xfs_agblock_t   newbno1;        /* return block number */
 275        xfs_agblock_t   newbno2;        /* other new block number */
 276        xfs_extlen_t    newlen1=0;      /* length with newbno1 */
 277        xfs_extlen_t    newlen2=0;      /* length with newbno2 */
 278        xfs_agblock_t   wantend;        /* end of target extent */
 279
 280        ASSERT(freelen >= wantlen);
 281        freeend = freebno + freelen;
 282        wantend = wantbno + wantlen;
 283        /*
 284         * We want to allocate from the start of a free extent if it is past
 285         * the desired block or if we are allocating user data and the free
 286         * extent is before desired block. The second case is there to allow
 287         * for contiguous allocation from the remaining free space if the file
 288         * grows in the short term.
 289         */
 290        if (freebno >= wantbno || (userdata && freeend < wantend)) {
 291                if ((newbno1 = roundup(freebno, alignment)) >= freeend)
 292                        newbno1 = NULLAGBLOCK;
 293        } else if (freeend >= wantend && alignment > 1) {
 294                newbno1 = roundup(wantbno, alignment);
 295                newbno2 = newbno1 - alignment;
 296                if (newbno1 >= freeend)
 297                        newbno1 = NULLAGBLOCK;
 298                else
 299                        newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
 300                if (newbno2 < freebno)
 301                        newbno2 = NULLAGBLOCK;
 302                else
 303                        newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
 304                if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
 305                        if (newlen1 < newlen2 ||
 306                            (newlen1 == newlen2 &&
 307                             XFS_ABSDIFF(newbno1, wantbno) >
 308                             XFS_ABSDIFF(newbno2, wantbno)))
 309                                newbno1 = newbno2;
 310                } else if (newbno2 != NULLAGBLOCK)
 311                        newbno1 = newbno2;
 312        } else if (freeend >= wantend) {
 313                newbno1 = wantbno;
 314        } else if (alignment > 1) {
 315                newbno1 = roundup(freeend - wantlen, alignment);
 316                if (newbno1 > freeend - wantlen &&
 317                    newbno1 - alignment >= freebno)
 318                        newbno1 -= alignment;
 319                else if (newbno1 >= freeend)
 320                        newbno1 = NULLAGBLOCK;
 321        } else
 322                newbno1 = freeend - wantlen;
 323        *newbnop = newbno1;
 324        return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
 325}
 326
 327/*
 328 * Fix up the length, based on mod and prod.
 329 * len should be k * prod + mod for some k.
 330 * If len is too small it is returned unchanged.
 331 * If len hits maxlen it is left alone.
 332 */
 333STATIC void
 334xfs_alloc_fix_len(
 335        xfs_alloc_arg_t *args)          /* allocation argument structure */
 336{
 337        xfs_extlen_t    k;
 338        xfs_extlen_t    rlen;
 339
 340        ASSERT(args->mod < args->prod);
 341        rlen = args->len;
 342        ASSERT(rlen >= args->minlen);
 343        ASSERT(rlen <= args->maxlen);
 344        if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
 345            (args->mod == 0 && rlen < args->prod))
 346                return;
 347        k = rlen % args->prod;
 348        if (k == args->mod)
 349                return;
 350        if (k > args->mod)
 351                rlen = rlen - (k - args->mod);
 352        else
 353                rlen = rlen - args->prod + (args->mod - k);
 354        /* casts to (int) catch length underflows */
 355        if ((int)rlen < (int)args->minlen)
 356                return;
 357        ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
 358        ASSERT(rlen % args->prod == args->mod);
 359        args->len = rlen;
 360}
 361
 362/*
 363 * Fix up length if there is too little space left in the a.g.
 364 * Return 1 if ok, 0 if too little, should give up.
 365 */
 366STATIC int
 367xfs_alloc_fix_minleft(
 368        xfs_alloc_arg_t *args)          /* allocation argument structure */
 369{
 370        xfs_agf_t       *agf;           /* a.g. freelist header */
 371        int             diff;           /* free space difference */
 372
 373        if (args->minleft == 0)
 374                return 1;
 375        agf = XFS_BUF_TO_AGF(args->agbp);
 376        diff = be32_to_cpu(agf->agf_freeblks)
 377                - args->len - args->minleft;
 378        if (diff >= 0)
 379                return 1;
 380        args->len += diff;              /* shrink the allocated space */
 381        /* casts to (int) catch length underflows */
 382        if ((int)args->len >= (int)args->minlen)
 383                return 1;
 384        args->agbno = NULLAGBLOCK;
 385        return 0;
 386}
 387
 388/*
 389 * Update the two btrees, logically removing from freespace the extent
 390 * starting at rbno, rlen blocks.  The extent is contained within the
 391 * actual (current) free extent fbno for flen blocks.
 392 * Flags are passed in indicating whether the cursors are set to the
 393 * relevant records.
 394 */
 395STATIC int                              /* error code */
 396xfs_alloc_fixup_trees(
 397        xfs_btree_cur_t *cnt_cur,       /* cursor for by-size btree */
 398        xfs_btree_cur_t *bno_cur,       /* cursor for by-block btree */
 399        xfs_agblock_t   fbno,           /* starting block of free extent */
 400        xfs_extlen_t    flen,           /* length of free extent */
 401        xfs_agblock_t   rbno,           /* starting block of returned extent */
 402        xfs_extlen_t    rlen,           /* length of returned extent */
 403        int             flags)          /* flags, XFSA_FIXUP_... */
 404{
 405        int             error;          /* error code */
 406        int             i;              /* operation results */
 407        xfs_agblock_t   nfbno1;         /* first new free startblock */
 408        xfs_agblock_t   nfbno2;         /* second new free startblock */
 409        xfs_extlen_t    nflen1=0;       /* first new free length */
 410        xfs_extlen_t    nflen2=0;       /* second new free length */
 411        struct xfs_mount *mp;
 412
 413        mp = cnt_cur->bc_mp;
 414
 415        /*
 416         * Look up the record in the by-size tree if necessary.
 417         */
 418        if (flags & XFSA_FIXUP_CNT_OK) {
 419#ifdef DEBUG
 420                if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
 421                        return error;
 422                XFS_WANT_CORRUPTED_RETURN(mp,
 423                        i == 1 && nfbno1 == fbno && nflen1 == flen);
 424#endif
 425        } else {
 426                if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
 427                        return error;
 428                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 429        }
 430        /*
 431         * Look up the record in the by-block tree if necessary.
 432         */
 433        if (flags & XFSA_FIXUP_BNO_OK) {
 434#ifdef DEBUG
 435                if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
 436                        return error;
 437                XFS_WANT_CORRUPTED_RETURN(mp,
 438                        i == 1 && nfbno1 == fbno && nflen1 == flen);
 439#endif
 440        } else {
 441                if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
 442                        return error;
 443                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 444        }
 445
 446#ifdef DEBUG
 447        if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
 448                struct xfs_btree_block  *bnoblock;
 449                struct xfs_btree_block  *cntblock;
 450
 451                bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
 452                cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
 453
 454                XFS_WANT_CORRUPTED_RETURN(mp,
 455                        bnoblock->bb_numrecs == cntblock->bb_numrecs);
 456        }
 457#endif
 458
 459        /*
 460         * Deal with all four cases: the allocated record is contained
 461         * within the freespace record, so we can have new freespace
 462         * at either (or both) end, or no freespace remaining.
 463         */
 464        if (rbno == fbno && rlen == flen)
 465                nfbno1 = nfbno2 = NULLAGBLOCK;
 466        else if (rbno == fbno) {
 467                nfbno1 = rbno + rlen;
 468                nflen1 = flen - rlen;
 469                nfbno2 = NULLAGBLOCK;
 470        } else if (rbno + rlen == fbno + flen) {
 471                nfbno1 = fbno;
 472                nflen1 = flen - rlen;
 473                nfbno2 = NULLAGBLOCK;
 474        } else {
 475                nfbno1 = fbno;
 476                nflen1 = rbno - fbno;
 477                nfbno2 = rbno + rlen;
 478                nflen2 = (fbno + flen) - nfbno2;
 479        }
 480        /*
 481         * Delete the entry from the by-size btree.
 482         */
 483        if ((error = xfs_btree_delete(cnt_cur, &i)))
 484                return error;
 485        XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 486        /*
 487         * Add new by-size btree entry(s).
 488         */
 489        if (nfbno1 != NULLAGBLOCK) {
 490                if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
 491                        return error;
 492                XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 493                if ((error = xfs_btree_insert(cnt_cur, &i)))
 494                        return error;
 495                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 496        }
 497        if (nfbno2 != NULLAGBLOCK) {
 498                if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
 499                        return error;
 500                XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 501                if ((error = xfs_btree_insert(cnt_cur, &i)))
 502                        return error;
 503                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 504        }
 505        /*
 506         * Fix up the by-block btree entry(s).
 507         */
 508        if (nfbno1 == NULLAGBLOCK) {
 509                /*
 510                 * No remaining freespace, just delete the by-block tree entry.
 511                 */
 512                if ((error = xfs_btree_delete(bno_cur, &i)))
 513                        return error;
 514                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 515        } else {
 516                /*
 517                 * Update the by-block entry to start later|be shorter.
 518                 */
 519                if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
 520                        return error;
 521        }
 522        if (nfbno2 != NULLAGBLOCK) {
 523                /*
 524                 * 2 resulting free entries, need to add one.
 525                 */
 526                if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
 527                        return error;
 528                XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 529                if ((error = xfs_btree_insert(bno_cur, &i)))
 530                        return error;
 531                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 532        }
 533        return 0;
 534}
 535
 536static bool
 537xfs_agfl_verify(
 538        struct xfs_buf  *bp)
 539{
 540        struct xfs_mount *mp = bp->b_target->bt_mount;
 541        struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
 542        int             i;
 543
 544        if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
 545                return false;
 546        if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
 547                return false;
 548        /*
 549         * during growfs operations, the perag is not fully initialised,
 550         * so we can't use it for any useful checking. growfs ensures we can't
 551         * use it by using uncached buffers that don't have the perag attached
 552         * so we can detect and avoid this problem.
 553         */
 554        if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
 555                return false;
 556
 557        for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
 558                if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
 559                    be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
 560                        return false;
 561        }
 562
 563        return xfs_log_check_lsn(mp,
 564                                 be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn));
 565}
 566
 567static void
 568xfs_agfl_read_verify(
 569        struct xfs_buf  *bp)
 570{
 571        struct xfs_mount *mp = bp->b_target->bt_mount;
 572
 573        /*
 574         * There is no verification of non-crc AGFLs because mkfs does not
 575         * initialise the AGFL to zero or NULL. Hence the only valid part of the
 576         * AGFL is what the AGF says is active. We can't get to the AGF, so we
 577         * can't verify just those entries are valid.
 578         */
 579        if (!xfs_sb_version_hascrc(&mp->m_sb))
 580                return;
 581
 582        if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
 583                xfs_buf_ioerror(bp, -EFSBADCRC);
 584        else if (!xfs_agfl_verify(bp))
 585                xfs_buf_ioerror(bp, -EFSCORRUPTED);
 586
 587        if (bp->b_error)
 588                xfs_verifier_error(bp);
 589}
 590
 591static void
 592xfs_agfl_write_verify(
 593        struct xfs_buf  *bp)
 594{
 595        struct xfs_mount *mp = bp->b_target->bt_mount;
 596        struct xfs_buf_log_item *bip = bp->b_fspriv;
 597
 598        /* no verification of non-crc AGFLs */
 599        if (!xfs_sb_version_hascrc(&mp->m_sb))
 600                return;
 601
 602        if (!xfs_agfl_verify(bp)) {
 603                xfs_buf_ioerror(bp, -EFSCORRUPTED);
 604                xfs_verifier_error(bp);
 605                return;
 606        }
 607
 608        if (bip)
 609                XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
 610
 611        xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
 612}
 613
 614const struct xfs_buf_ops xfs_agfl_buf_ops = {
 615        .name = "xfs_agfl",
 616        .verify_read = xfs_agfl_read_verify,
 617        .verify_write = xfs_agfl_write_verify,
 618};
 619
 620/*
 621 * Read in the allocation group free block array.
 622 */
 623STATIC int                              /* error */
 624xfs_alloc_read_agfl(
 625        xfs_mount_t     *mp,            /* mount point structure */
 626        xfs_trans_t     *tp,            /* transaction pointer */
 627        xfs_agnumber_t  agno,           /* allocation group number */
 628        xfs_buf_t       **bpp)          /* buffer for the ag free block array */
 629{
 630        xfs_buf_t       *bp;            /* return value */
 631        int             error;
 632
 633        ASSERT(agno != NULLAGNUMBER);
 634        error = xfs_trans_read_buf(
 635                        mp, tp, mp->m_ddev_targp,
 636                        XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
 637                        XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
 638        if (error)
 639                return error;
 640        xfs_buf_set_ref(bp, XFS_AGFL_REF);
 641        *bpp = bp;
 642        return 0;
 643}
 644
 645STATIC int
 646xfs_alloc_update_counters(
 647        struct xfs_trans        *tp,
 648        struct xfs_perag        *pag,
 649        struct xfs_buf          *agbp,
 650        long                    len)
 651{
 652        struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
 653
 654        pag->pagf_freeblks += len;
 655        be32_add_cpu(&agf->agf_freeblks, len);
 656
 657        xfs_trans_agblocks_delta(tp, len);
 658        if (unlikely(be32_to_cpu(agf->agf_freeblks) >
 659                     be32_to_cpu(agf->agf_length)))
 660                return -EFSCORRUPTED;
 661
 662        xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
 663        return 0;
 664}
 665
 666/*
 667 * Allocation group level functions.
 668 */
 669
 670/*
 671 * Allocate a variable extent in the allocation group agno.
 672 * Type and bno are used to determine where in the allocation group the
 673 * extent will start.
 674 * Extent's length (returned in *len) will be between minlen and maxlen,
 675 * and of the form k * prod + mod unless there's nothing that large.
 676 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 677 */
 678STATIC int                      /* error */
 679xfs_alloc_ag_vextent(
 680        xfs_alloc_arg_t *args)  /* argument structure for allocation */
 681{
 682        int             error=0;
 683
 684        ASSERT(args->minlen > 0);
 685        ASSERT(args->maxlen > 0);
 686        ASSERT(args->minlen <= args->maxlen);
 687        ASSERT(args->mod < args->prod);
 688        ASSERT(args->alignment > 0);
 689        /*
 690         * Branch to correct routine based on the type.
 691         */
 692        args->wasfromfl = 0;
 693        switch (args->type) {
 694        case XFS_ALLOCTYPE_THIS_AG:
 695                error = xfs_alloc_ag_vextent_size(args);
 696                break;
 697        case XFS_ALLOCTYPE_NEAR_BNO:
 698                error = xfs_alloc_ag_vextent_near(args);
 699                break;
 700        case XFS_ALLOCTYPE_THIS_BNO:
 701                error = xfs_alloc_ag_vextent_exact(args);
 702                break;
 703        default:
 704                ASSERT(0);
 705                /* NOTREACHED */
 706        }
 707
 708        if (error || args->agbno == NULLAGBLOCK)
 709                return error;
 710
 711        ASSERT(args->len >= args->minlen);
 712        ASSERT(args->len <= args->maxlen);
 713        ASSERT(!args->wasfromfl || !args->isfl);
 714        ASSERT(args->agbno % args->alignment == 0);
 715
 716        /* if not file data, insert new block into the reverse map btree */
 717        if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) {
 718                error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
 719                                       args->agbno, args->len, &args->oinfo);
 720                if (error)
 721                        return error;
 722        }
 723
 724        if (!args->wasfromfl) {
 725                error = xfs_alloc_update_counters(args->tp, args->pag,
 726                                                  args->agbp,
 727                                                  -((long)(args->len)));
 728                if (error)
 729                        return error;
 730
 731                ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
 732                                              args->agbno, args->len));
 733        }
 734
 735        if (!args->isfl) {
 736                xfs_trans_mod_sb(args->tp, args->wasdel ?
 737                                 XFS_TRANS_SB_RES_FDBLOCKS :
 738                                 XFS_TRANS_SB_FDBLOCKS,
 739                                 -((long)(args->len)));
 740        }
 741
 742        XFS_STATS_INC(args->mp, xs_allocx);
 743        XFS_STATS_ADD(args->mp, xs_allocb, args->len);
 744        return error;
 745}
 746
 747/*
 748 * Allocate a variable extent at exactly agno/bno.
 749 * Extent's length (returned in *len) will be between minlen and maxlen,
 750 * and of the form k * prod + mod unless there's nothing that large.
 751 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
 752 */
 753STATIC int                      /* error */
 754xfs_alloc_ag_vextent_exact(
 755        xfs_alloc_arg_t *args)  /* allocation argument structure */
 756{
 757        xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
 758        xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
 759        int             error;
 760        xfs_agblock_t   fbno;   /* start block of found extent */
 761        xfs_extlen_t    flen;   /* length of found extent */
 762        xfs_agblock_t   tbno;   /* start block of trimmed extent */
 763        xfs_extlen_t    tlen;   /* length of trimmed extent */
 764        xfs_agblock_t   tend;   /* end block of trimmed extent */
 765        int             i;      /* success/failure of operation */
 766
 767        ASSERT(args->alignment == 1);
 768
 769        /*
 770         * Allocate/initialize a cursor for the by-number freespace btree.
 771         */
 772        bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 773                                          args->agno, XFS_BTNUM_BNO);
 774
 775        /*
 776         * Lookup bno and minlen in the btree (minlen is irrelevant, really).
 777         * Look for the closest free block <= bno, it must contain bno
 778         * if any free block does.
 779         */
 780        error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
 781        if (error)
 782                goto error0;
 783        if (!i)
 784                goto not_found;
 785
 786        /*
 787         * Grab the freespace record.
 788         */
 789        error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
 790        if (error)
 791                goto error0;
 792        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 793        ASSERT(fbno <= args->agbno);
 794
 795        /*
 796         * Check for overlapping busy extents.
 797         */
 798        xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
 799
 800        /*
 801         * Give up if the start of the extent is busy, or the freespace isn't
 802         * long enough for the minimum request.
 803         */
 804        if (tbno > args->agbno)
 805                goto not_found;
 806        if (tlen < args->minlen)
 807                goto not_found;
 808        tend = tbno + tlen;
 809        if (tend < args->agbno + args->minlen)
 810                goto not_found;
 811
 812        /*
 813         * End of extent will be smaller of the freespace end and the
 814         * maximal requested end.
 815         *
 816         * Fix the length according to mod and prod if given.
 817         */
 818        args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
 819                                                - args->agbno;
 820        xfs_alloc_fix_len(args);
 821        if (!xfs_alloc_fix_minleft(args))
 822                goto not_found;
 823
 824        ASSERT(args->agbno + args->len <= tend);
 825
 826        /*
 827         * We are allocating agbno for args->len
 828         * Allocate/initialize a cursor for the by-size btree.
 829         */
 830        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 831                args->agno, XFS_BTNUM_CNT);
 832        ASSERT(args->agbno + args->len <=
 833                be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
 834        error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
 835                                      args->len, XFSA_FIXUP_BNO_OK);
 836        if (error) {
 837                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
 838                goto error0;
 839        }
 840
 841        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 842        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 843
 844        args->wasfromfl = 0;
 845        trace_xfs_alloc_exact_done(args);
 846        return 0;
 847
 848not_found:
 849        /* Didn't find it, return null. */
 850        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 851        args->agbno = NULLAGBLOCK;
 852        trace_xfs_alloc_exact_notfound(args);
 853        return 0;
 854
 855error0:
 856        xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
 857        trace_xfs_alloc_exact_error(args);
 858        return error;
 859}
 860
 861/*
 862 * Search the btree in a given direction via the search cursor and compare
 863 * the records found against the good extent we've already found.
 864 */
 865STATIC int
 866xfs_alloc_find_best_extent(
 867        struct xfs_alloc_arg    *args,  /* allocation argument structure */
 868        struct xfs_btree_cur    **gcur, /* good cursor */
 869        struct xfs_btree_cur    **scur, /* searching cursor */
 870        xfs_agblock_t           gdiff,  /* difference for search comparison */
 871        xfs_agblock_t           *sbno,  /* extent found by search */
 872        xfs_extlen_t            *slen,  /* extent length */
 873        xfs_agblock_t           *sbnoa, /* aligned extent found by search */
 874        xfs_extlen_t            *slena, /* aligned extent length */
 875        int                     dir)    /* 0 = search right, 1 = search left */
 876{
 877        xfs_agblock_t           new;
 878        xfs_agblock_t           sdiff;
 879        int                     error;
 880        int                     i;
 881
 882        /* The good extent is perfect, no need to  search. */
 883        if (!gdiff)
 884                goto out_use_good;
 885
 886        /*
 887         * Look until we find a better one, run out of space or run off the end.
 888         */
 889        do {
 890                error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
 891                if (error)
 892                        goto error0;
 893                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 894                xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
 895
 896                /*
 897                 * The good extent is closer than this one.
 898                 */
 899                if (!dir) {
 900                        if (*sbnoa > args->max_agbno)
 901                                goto out_use_good;
 902                        if (*sbnoa >= args->agbno + gdiff)
 903                                goto out_use_good;
 904                } else {
 905                        if (*sbnoa < args->min_agbno)
 906                                goto out_use_good;
 907                        if (*sbnoa <= args->agbno - gdiff)
 908                                goto out_use_good;
 909                }
 910
 911                /*
 912                 * Same distance, compare length and pick the best.
 913                 */
 914                if (*slena >= args->minlen) {
 915                        args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
 916                        xfs_alloc_fix_len(args);
 917
 918                        sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
 919                                                       args->alignment,
 920                                                       args->userdata, *sbnoa,
 921                                                       *slena, &new);
 922
 923                        /*
 924                         * Choose closer size and invalidate other cursor.
 925                         */
 926                        if (sdiff < gdiff)
 927                                goto out_use_search;
 928                        goto out_use_good;
 929                }
 930
 931                if (!dir)
 932                        error = xfs_btree_increment(*scur, 0, &i);
 933                else
 934                        error = xfs_btree_decrement(*scur, 0, &i);
 935                if (error)
 936                        goto error0;
 937        } while (i);
 938
 939out_use_good:
 940        xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
 941        *scur = NULL;
 942        return 0;
 943
 944out_use_search:
 945        xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
 946        *gcur = NULL;
 947        return 0;
 948
 949error0:
 950        /* caller invalidates cursors */
 951        return error;
 952}
 953
 954/*
 955 * Allocate a variable extent near bno in the allocation group agno.
 956 * Extent's length (returned in len) will be between minlen and maxlen,
 957 * and of the form k * prod + mod unless there's nothing that large.
 958 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 959 */
 960STATIC int                              /* error */
 961xfs_alloc_ag_vextent_near(
 962        xfs_alloc_arg_t *args)          /* allocation argument structure */
 963{
 964        xfs_btree_cur_t *bno_cur_gt;    /* cursor for bno btree, right side */
 965        xfs_btree_cur_t *bno_cur_lt;    /* cursor for bno btree, left side */
 966        xfs_btree_cur_t *cnt_cur;       /* cursor for count btree */
 967        xfs_agblock_t   gtbno;          /* start bno of right side entry */
 968        xfs_agblock_t   gtbnoa;         /* aligned ... */
 969        xfs_extlen_t    gtdiff;         /* difference to right side entry */
 970        xfs_extlen_t    gtlen;          /* length of right side entry */
 971        xfs_extlen_t    gtlena;         /* aligned ... */
 972        xfs_agblock_t   gtnew;          /* useful start bno of right side */
 973        int             error;          /* error code */
 974        int             i;              /* result code, temporary */
 975        int             j;              /* result code, temporary */
 976        xfs_agblock_t   ltbno;          /* start bno of left side entry */
 977        xfs_agblock_t   ltbnoa;         /* aligned ... */
 978        xfs_extlen_t    ltdiff;         /* difference to left side entry */
 979        xfs_extlen_t    ltlen;          /* length of left side entry */
 980        xfs_extlen_t    ltlena;         /* aligned ... */
 981        xfs_agblock_t   ltnew;          /* useful start bno of left side */
 982        xfs_extlen_t    rlen;           /* length of returned extent */
 983        int             forced = 0;
 984#ifdef DEBUG
 985        /*
 986         * Randomly don't execute the first algorithm.
 987         */
 988        int             dofirst;        /* set to do first algorithm */
 989
 990        dofirst = prandom_u32() & 1;
 991#endif
 992
 993        /* handle unitialized agbno range so caller doesn't have to */
 994        if (!args->min_agbno && !args->max_agbno)
 995                args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
 996        ASSERT(args->min_agbno <= args->max_agbno);
 997
 998        /* clamp agbno to the range if it's outside */
 999        if (args->agbno < args->min_agbno)
1000                args->agbno = args->min_agbno;
1001        if (args->agbno > args->max_agbno)
1002                args->agbno = args->max_agbno;
1003
1004restart:
1005        bno_cur_lt = NULL;
1006        bno_cur_gt = NULL;
1007        ltlen = 0;
1008        gtlena = 0;
1009        ltlena = 0;
1010
1011        /*
1012         * Get a cursor for the by-size btree.
1013         */
1014        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1015                args->agno, XFS_BTNUM_CNT);
1016
1017        /*
1018         * See if there are any free extents as big as maxlen.
1019         */
1020        if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
1021                goto error0;
1022        /*
1023         * If none, then pick up the last entry in the tree unless the
1024         * tree is empty.
1025         */
1026        if (!i) {
1027                if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
1028                                &ltlen, &i)))
1029                        goto error0;
1030                if (i == 0 || ltlen == 0) {
1031                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1032                        trace_xfs_alloc_near_noentry(args);
1033                        return 0;
1034                }
1035                ASSERT(i == 1);
1036        }
1037        args->wasfromfl = 0;
1038
1039        /*
1040         * First algorithm.
1041         * If the requested extent is large wrt the freespaces available
1042         * in this a.g., then the cursor will be pointing to a btree entry
1043         * near the right edge of the tree.  If it's in the last btree leaf
1044         * block, then we just examine all the entries in that block
1045         * that are big enough, and pick the best one.
1046         * This is written as a while loop so we can break out of it,
1047         * but we never loop back to the top.
1048         */
1049        while (xfs_btree_islastblock(cnt_cur, 0)) {
1050                xfs_extlen_t    bdiff;
1051                int             besti=0;
1052                xfs_extlen_t    blen=0;
1053                xfs_agblock_t   bnew=0;
1054
1055#ifdef DEBUG
1056                if (dofirst)
1057                        break;
1058#endif
1059                /*
1060                 * Start from the entry that lookup found, sequence through
1061                 * all larger free blocks.  If we're actually pointing at a
1062                 * record smaller than maxlen, go to the start of this block,
1063                 * and skip all those smaller than minlen.
1064                 */
1065                if (ltlen || args->alignment > 1) {
1066                        cnt_cur->bc_ptrs[0] = 1;
1067                        do {
1068                                if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
1069                                                &ltlen, &i)))
1070                                        goto error0;
1071                                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1072                                if (ltlen >= args->minlen)
1073                                        break;
1074                                if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
1075                                        goto error0;
1076                        } while (i);
1077                        ASSERT(ltlen >= args->minlen);
1078                        if (!i)
1079                                break;
1080                }
1081                i = cnt_cur->bc_ptrs[0];
1082                for (j = 1, blen = 0, bdiff = 0;
1083                     !error && j && (blen < args->maxlen || bdiff > 0);
1084                     error = xfs_btree_increment(cnt_cur, 0, &j)) {
1085                        /*
1086                         * For each entry, decide if it's better than
1087                         * the previous best entry.
1088                         */
1089                        if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1090                                goto error0;
1091                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1092                        xfs_alloc_compute_aligned(args, ltbno, ltlen,
1093                                                  &ltbnoa, &ltlena);
1094                        if (ltlena < args->minlen)
1095                                continue;
1096                        if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
1097                                continue;
1098                        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1099                        xfs_alloc_fix_len(args);
1100                        ASSERT(args->len >= args->minlen);
1101                        if (args->len < blen)
1102                                continue;
1103                        ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1104                                args->alignment, args->userdata, ltbnoa,
1105                                ltlena, &ltnew);
1106                        if (ltnew != NULLAGBLOCK &&
1107                            (args->len > blen || ltdiff < bdiff)) {
1108                                bdiff = ltdiff;
1109                                bnew = ltnew;
1110                                blen = args->len;
1111                                besti = cnt_cur->bc_ptrs[0];
1112                        }
1113                }
1114                /*
1115                 * It didn't work.  We COULD be in a case where
1116                 * there's a good record somewhere, so try again.
1117                 */
1118                if (blen == 0)
1119                        break;
1120                /*
1121                 * Point at the best entry, and retrieve it again.
1122                 */
1123                cnt_cur->bc_ptrs[0] = besti;
1124                if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1125                        goto error0;
1126                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1127                ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1128                args->len = blen;
1129                if (!xfs_alloc_fix_minleft(args)) {
1130                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1131                        trace_xfs_alloc_near_nominleft(args);
1132                        return 0;
1133                }
1134                blen = args->len;
1135                /*
1136                 * We are allocating starting at bnew for blen blocks.
1137                 */
1138                args->agbno = bnew;
1139                ASSERT(bnew >= ltbno);
1140                ASSERT(bnew + blen <= ltbno + ltlen);
1141                /*
1142                 * Set up a cursor for the by-bno tree.
1143                 */
1144                bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
1145                        args->agbp, args->agno, XFS_BTNUM_BNO);
1146                /*
1147                 * Fix up the btree entries.
1148                 */
1149                if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
1150                                ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
1151                        goto error0;
1152                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1153                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1154
1155                trace_xfs_alloc_near_first(args);
1156                return 0;
1157        }
1158        /*
1159         * Second algorithm.
1160         * Search in the by-bno tree to the left and to the right
1161         * simultaneously, until in each case we find a space big enough,
1162         * or run into the edge of the tree.  When we run into the edge,
1163         * we deallocate that cursor.
1164         * If both searches succeed, we compare the two spaces and pick
1165         * the better one.
1166         * With alignment, it's possible for both to fail; the upper
1167         * level algorithm that picks allocation groups for allocations
1168         * is not supposed to do this.
1169         */
1170        /*
1171         * Allocate and initialize the cursor for the leftward search.
1172         */
1173        bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1174                args->agno, XFS_BTNUM_BNO);
1175        /*
1176         * Lookup <= bno to find the leftward search's starting point.
1177         */
1178        if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
1179                goto error0;
1180        if (!i) {
1181                /*
1182                 * Didn't find anything; use this cursor for the rightward
1183                 * search.
1184                 */
1185                bno_cur_gt = bno_cur_lt;
1186                bno_cur_lt = NULL;
1187        }
1188        /*
1189         * Found something.  Duplicate the cursor for the rightward search.
1190         */
1191        else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
1192                goto error0;
1193        /*
1194         * Increment the cursor, so we will point at the entry just right
1195         * of the leftward entry if any, or to the leftmost entry.
1196         */
1197        if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1198                goto error0;
1199        if (!i) {
1200                /*
1201                 * It failed, there are no rightward entries.
1202                 */
1203                xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
1204                bno_cur_gt = NULL;
1205        }
1206        /*
1207         * Loop going left with the leftward cursor, right with the
1208         * rightward cursor, until either both directions give up or
1209         * we find an entry at least as big as minlen.
1210         */
1211        do {
1212                if (bno_cur_lt) {
1213                        if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
1214                                goto error0;
1215                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1216                        xfs_alloc_compute_aligned(args, ltbno, ltlen,
1217                                                  &ltbnoa, &ltlena);
1218                        if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
1219                                break;
1220                        if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
1221                                goto error0;
1222                        if (!i || ltbnoa < args->min_agbno) {
1223                                xfs_btree_del_cursor(bno_cur_lt,
1224                                                     XFS_BTREE_NOERROR);
1225                                bno_cur_lt = NULL;
1226                        }
1227                }
1228                if (bno_cur_gt) {
1229                        if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1230                                goto error0;
1231                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1232                        xfs_alloc_compute_aligned(args, gtbno, gtlen,
1233                                                  &gtbnoa, &gtlena);
1234                        if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
1235                                break;
1236                        if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1237                                goto error0;
1238                        if (!i || gtbnoa > args->max_agbno) {
1239                                xfs_btree_del_cursor(bno_cur_gt,
1240                                                     XFS_BTREE_NOERROR);
1241                                bno_cur_gt = NULL;
1242                        }
1243                }
1244        } while (bno_cur_lt || bno_cur_gt);
1245
1246        /*
1247         * Got both cursors still active, need to find better entry.
1248         */
1249        if (bno_cur_lt && bno_cur_gt) {
1250                if (ltlena >= args->minlen) {
1251                        /*
1252                         * Left side is good, look for a right side entry.
1253                         */
1254                        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1255                        xfs_alloc_fix_len(args);
1256                        ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1257                                args->alignment, args->userdata, ltbnoa,
1258                                ltlena, &ltnew);
1259
1260                        error = xfs_alloc_find_best_extent(args,
1261                                                &bno_cur_lt, &bno_cur_gt,
1262                                                ltdiff, &gtbno, &gtlen,
1263                                                &gtbnoa, &gtlena,
1264                                                0 /* search right */);
1265                } else {
1266                        ASSERT(gtlena >= args->minlen);
1267
1268                        /*
1269                         * Right side is good, look for a left side entry.
1270                         */
1271                        args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1272                        xfs_alloc_fix_len(args);
1273                        gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1274                                args->alignment, args->userdata, gtbnoa,
1275                                gtlena, &gtnew);
1276
1277                        error = xfs_alloc_find_best_extent(args,
1278                                                &bno_cur_gt, &bno_cur_lt,
1279                                                gtdiff, &ltbno, &ltlen,
1280                                                &ltbnoa, &ltlena,
1281                                                1 /* search left */);
1282                }
1283
1284                if (error)
1285                        goto error0;
1286        }
1287
1288        /*
1289         * If we couldn't get anything, give up.
1290         */
1291        if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1292                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1293
1294                if (!forced++) {
1295                        trace_xfs_alloc_near_busy(args);
1296                        xfs_log_force(args->mp, XFS_LOG_SYNC);
1297                        goto restart;
1298                }
1299                trace_xfs_alloc_size_neither(args);
1300                args->agbno = NULLAGBLOCK;
1301                return 0;
1302        }
1303
1304        /*
1305         * At this point we have selected a freespace entry, either to the
1306         * left or to the right.  If it's on the right, copy all the
1307         * useful variables to the "left" set so we only have one
1308         * copy of this code.
1309         */
1310        if (bno_cur_gt) {
1311                bno_cur_lt = bno_cur_gt;
1312                bno_cur_gt = NULL;
1313                ltbno = gtbno;
1314                ltbnoa = gtbnoa;
1315                ltlen = gtlen;
1316                ltlena = gtlena;
1317                j = 1;
1318        } else
1319                j = 0;
1320
1321        /*
1322         * Fix up the length and compute the useful address.
1323         */
1324        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1325        xfs_alloc_fix_len(args);
1326        if (!xfs_alloc_fix_minleft(args)) {
1327                trace_xfs_alloc_near_nominleft(args);
1328                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1329                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1330                return 0;
1331        }
1332        rlen = args->len;
1333        (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
1334                                     args->userdata, ltbnoa, ltlena, &ltnew);
1335        ASSERT(ltnew >= ltbno);
1336        ASSERT(ltnew + rlen <= ltbnoa + ltlena);
1337        ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1338        ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
1339        args->agbno = ltnew;
1340
1341        if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1342                        ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1343                goto error0;
1344
1345        if (j)
1346                trace_xfs_alloc_near_greater(args);
1347        else
1348                trace_xfs_alloc_near_lesser(args);
1349
1350        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1351        xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1352        return 0;
1353
1354 error0:
1355        trace_xfs_alloc_near_error(args);
1356        if (cnt_cur != NULL)
1357                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1358        if (bno_cur_lt != NULL)
1359                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1360        if (bno_cur_gt != NULL)
1361                xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1362        return error;
1363}
1364
1365/*
1366 * Allocate a variable extent anywhere in the allocation group agno.
1367 * Extent's length (returned in len) will be between minlen and maxlen,
1368 * and of the form k * prod + mod unless there's nothing that large.
1369 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1370 */
1371STATIC int                              /* error */
1372xfs_alloc_ag_vextent_size(
1373        xfs_alloc_arg_t *args)          /* allocation argument structure */
1374{
1375        xfs_btree_cur_t *bno_cur;       /* cursor for bno btree */
1376        xfs_btree_cur_t *cnt_cur;       /* cursor for cnt btree */
1377        int             error;          /* error result */
1378        xfs_agblock_t   fbno;           /* start of found freespace */
1379        xfs_extlen_t    flen;           /* length of found freespace */
1380        int             i;              /* temp status variable */
1381        xfs_agblock_t   rbno;           /* returned block number */
1382        xfs_extlen_t    rlen;           /* length of returned extent */
1383        int             forced = 0;
1384
1385restart:
1386        /*
1387         * Allocate and initialize a cursor for the by-size btree.
1388         */
1389        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1390                args->agno, XFS_BTNUM_CNT);
1391        bno_cur = NULL;
1392
1393        /*
1394         * Look for an entry >= maxlen+alignment-1 blocks.
1395         */
1396        if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1397                        args->maxlen + args->alignment - 1, &i)))
1398                goto error0;
1399
1400        /*
1401         * If none or we have busy extents that we cannot allocate from, then
1402         * we have to settle for a smaller extent. In the case that there are
1403         * no large extents, this will return the last entry in the tree unless
1404         * the tree is empty. In the case that there are only busy large
1405         * extents, this will return the largest small extent unless there
1406         * are no smaller extents available.
1407         */
1408        if (!i || forced > 1) {
1409                error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1410                                                   &fbno, &flen, &i);
1411                if (error)
1412                        goto error0;
1413                if (i == 0 || flen == 0) {
1414                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1415                        trace_xfs_alloc_size_noentry(args);
1416                        return 0;
1417                }
1418                ASSERT(i == 1);
1419                xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1420        } else {
1421                /*
1422                 * Search for a non-busy extent that is large enough.
1423                 * If we are at low space, don't check, or if we fall of
1424                 * the end of the btree, turn off the busy check and
1425                 * restart.
1426                 */
1427                for (;;) {
1428                        error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1429                        if (error)
1430                                goto error0;
1431                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1432
1433                        xfs_alloc_compute_aligned(args, fbno, flen,
1434                                                  &rbno, &rlen);
1435
1436                        if (rlen >= args->maxlen)
1437                                break;
1438
1439                        error = xfs_btree_increment(cnt_cur, 0, &i);
1440                        if (error)
1441                                goto error0;
1442                        if (i == 0) {
1443                                /*
1444                                 * Our only valid extents must have been busy.
1445                                 * Make it unbusy by forcing the log out and
1446                                 * retrying. If we've been here before, forcing
1447                                 * the log isn't making the extents available,
1448                                 * which means they have probably been freed in
1449                                 * this transaction.  In that case, we have to
1450                                 * give up on them and we'll attempt a minlen
1451                                 * allocation the next time around.
1452                                 */
1453                                xfs_btree_del_cursor(cnt_cur,
1454                                                     XFS_BTREE_NOERROR);
1455                                trace_xfs_alloc_size_busy(args);
1456                                if (!forced++)
1457                                        xfs_log_force(args->mp, XFS_LOG_SYNC);
1458                                goto restart;
1459                        }
1460                }
1461        }
1462
1463        /*
1464         * In the first case above, we got the last entry in the
1465         * by-size btree.  Now we check to see if the space hits maxlen
1466         * once aligned; if not, we search left for something better.
1467         * This can't happen in the second case above.
1468         */
1469        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1470        XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1471                        (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1472        if (rlen < args->maxlen) {
1473                xfs_agblock_t   bestfbno;
1474                xfs_extlen_t    bestflen;
1475                xfs_agblock_t   bestrbno;
1476                xfs_extlen_t    bestrlen;
1477
1478                bestrlen = rlen;
1479                bestrbno = rbno;
1480                bestflen = flen;
1481                bestfbno = fbno;
1482                for (;;) {
1483                        if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1484                                goto error0;
1485                        if (i == 0)
1486                                break;
1487                        if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1488                                        &i)))
1489                                goto error0;
1490                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1491                        if (flen < bestrlen)
1492                                break;
1493                        xfs_alloc_compute_aligned(args, fbno, flen,
1494                                                  &rbno, &rlen);
1495                        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1496                        XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1497                                (rlen <= flen && rbno + rlen <= fbno + flen),
1498                                error0);
1499                        if (rlen > bestrlen) {
1500                                bestrlen = rlen;
1501                                bestrbno = rbno;
1502                                bestflen = flen;
1503                                bestfbno = fbno;
1504                                if (rlen == args->maxlen)
1505                                        break;
1506                        }
1507                }
1508                if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1509                                &i)))
1510                        goto error0;
1511                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1512                rlen = bestrlen;
1513                rbno = bestrbno;
1514                flen = bestflen;
1515                fbno = bestfbno;
1516        }
1517        args->wasfromfl = 0;
1518        /*
1519         * Fix up the length.
1520         */
1521        args->len = rlen;
1522        if (rlen < args->minlen) {
1523                if (!forced++) {
1524                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1525                        trace_xfs_alloc_size_busy(args);
1526                        xfs_log_force(args->mp, XFS_LOG_SYNC);
1527                        goto restart;
1528                }
1529                goto out_nominleft;
1530        }
1531        xfs_alloc_fix_len(args);
1532
1533        if (!xfs_alloc_fix_minleft(args))
1534                goto out_nominleft;
1535        rlen = args->len;
1536        XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
1537        /*
1538         * Allocate and initialize a cursor for the by-block tree.
1539         */
1540        bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1541                args->agno, XFS_BTNUM_BNO);
1542        if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1543                        rbno, rlen, XFSA_FIXUP_CNT_OK)))
1544                goto error0;
1545        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1546        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1547        cnt_cur = bno_cur = NULL;
1548        args->len = rlen;
1549        args->agbno = rbno;
1550        XFS_WANT_CORRUPTED_GOTO(args->mp,
1551                args->agbno + args->len <=
1552                        be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1553                error0);
1554        trace_xfs_alloc_size_done(args);
1555        return 0;
1556
1557error0:
1558        trace_xfs_alloc_size_error(args);
1559        if (cnt_cur)
1560                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1561        if (bno_cur)
1562                xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1563        return error;
1564
1565out_nominleft:
1566        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1567        trace_xfs_alloc_size_nominleft(args);
1568        args->agbno = NULLAGBLOCK;
1569        return 0;
1570}
1571
1572/*
1573 * Deal with the case where only small freespaces remain.
1574 * Either return the contents of the last freespace record,
1575 * or allocate space from the freelist if there is nothing in the tree.
1576 */
1577STATIC int                      /* error */
1578xfs_alloc_ag_vextent_small(
1579        xfs_alloc_arg_t *args,  /* allocation argument structure */
1580        xfs_btree_cur_t *ccur,  /* by-size cursor */
1581        xfs_agblock_t   *fbnop, /* result block number */
1582        xfs_extlen_t    *flenp, /* result length */
1583        int             *stat)  /* status: 0-freelist, 1-normal/none */
1584{
1585        struct xfs_owner_info   oinfo;
1586        int             error;
1587        xfs_agblock_t   fbno;
1588        xfs_extlen_t    flen;
1589        int             i;
1590
1591        if ((error = xfs_btree_decrement(ccur, 0, &i)))
1592                goto error0;
1593        if (i) {
1594                if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
1595                        goto error0;
1596                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1597        }
1598        /*
1599         * Nothing in the btree, try the freelist.  Make sure
1600         * to respect minleft even when pulling from the
1601         * freelist.
1602         */
1603        else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
1604                 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1605                  > args->minleft)) {
1606                error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1607                if (error)
1608                        goto error0;
1609                if (fbno != NULLAGBLOCK) {
1610                        xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
1611                                             args->userdata);
1612
1613                        if (args->userdata) {
1614                                xfs_buf_t       *bp;
1615
1616                                bp = xfs_btree_get_bufs(args->mp, args->tp,
1617                                        args->agno, fbno, 0);
1618                                xfs_trans_binval(args->tp, bp);
1619                        }
1620                        args->len = 1;
1621                        args->agbno = fbno;
1622                        XFS_WANT_CORRUPTED_GOTO(args->mp,
1623                                args->agbno + args->len <=
1624                                be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1625                                error0);
1626                        args->wasfromfl = 1;
1627                        trace_xfs_alloc_small_freelist(args);
1628
1629                        /*
1630                         * If we're feeding an AGFL block to something that
1631                         * doesn't live in the free space, we need to clear
1632                         * out the OWN_AG rmap.
1633                         */
1634                        xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
1635                        error = xfs_rmap_free(args->tp, args->agbp, args->agno,
1636                                        fbno, 1, &oinfo);
1637                        if (error)
1638                                goto error0;
1639
1640                        *stat = 0;
1641                        return 0;
1642                }
1643                /*
1644                 * Nothing in the freelist.
1645                 */
1646                else
1647                        flen = 0;
1648        }
1649        /*
1650         * Can't allocate from the freelist for some reason.
1651         */
1652        else {
1653                fbno = NULLAGBLOCK;
1654                flen = 0;
1655        }
1656        /*
1657         * Can't do the allocation, give up.
1658         */
1659        if (flen < args->minlen) {
1660                args->agbno = NULLAGBLOCK;
1661                trace_xfs_alloc_small_notenough(args);
1662                flen = 0;
1663        }
1664        *fbnop = fbno;
1665        *flenp = flen;
1666        *stat = 1;
1667        trace_xfs_alloc_small_done(args);
1668        return 0;
1669
1670error0:
1671        trace_xfs_alloc_small_error(args);
1672        return error;
1673}
1674
1675/*
1676 * Free the extent starting at agno/bno for length.
1677 */
1678STATIC int
1679xfs_free_ag_extent(
1680        xfs_trans_t             *tp,
1681        xfs_buf_t               *agbp,
1682        xfs_agnumber_t          agno,
1683        xfs_agblock_t           bno,
1684        xfs_extlen_t            len,
1685        struct xfs_owner_info   *oinfo,
1686        int                     isfl)
1687{
1688        xfs_btree_cur_t *bno_cur;       /* cursor for by-block btree */
1689        xfs_btree_cur_t *cnt_cur;       /* cursor for by-size btree */
1690        int             error;          /* error return value */
1691        xfs_agblock_t   gtbno;          /* start of right neighbor block */
1692        xfs_extlen_t    gtlen;          /* length of right neighbor block */
1693        int             haveleft;       /* have a left neighbor block */
1694        int             haveright;      /* have a right neighbor block */
1695        int             i;              /* temp, result code */
1696        xfs_agblock_t   ltbno;          /* start of left neighbor block */
1697        xfs_extlen_t    ltlen;          /* length of left neighbor block */
1698        xfs_mount_t     *mp;            /* mount point struct for filesystem */
1699        xfs_agblock_t   nbno;           /* new starting block of freespace */
1700        xfs_extlen_t    nlen;           /* new length of freespace */
1701        xfs_perag_t     *pag;           /* per allocation group data */
1702
1703        bno_cur = cnt_cur = NULL;
1704        mp = tp->t_mountp;
1705
1706        if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) {
1707                error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
1708                if (error)
1709                        goto error0;
1710        }
1711
1712        /*
1713         * Allocate and initialize a cursor for the by-block btree.
1714         */
1715        bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1716        /*
1717         * Look for a neighboring block on the left (lower block numbers)
1718         * that is contiguous with this space.
1719         */
1720        if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1721                goto error0;
1722        if (haveleft) {
1723                /*
1724                 * There is a block to our left.
1725                 */
1726                if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1727                        goto error0;
1728                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1729                /*
1730                 * It's not contiguous, though.
1731                 */
1732                if (ltbno + ltlen < bno)
1733                        haveleft = 0;
1734                else {
1735                        /*
1736                         * If this failure happens the request to free this
1737                         * space was invalid, it's (partly) already free.
1738                         * Very bad.
1739                         */
1740                        XFS_WANT_CORRUPTED_GOTO(mp,
1741                                                ltbno + ltlen <= bno, error0);
1742                }
1743        }
1744        /*
1745         * Look for a neighboring block on the right (higher block numbers)
1746         * that is contiguous with this space.
1747         */
1748        if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1749                goto error0;
1750        if (haveright) {
1751                /*
1752                 * There is a block to our right.
1753                 */
1754                if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1755                        goto error0;
1756                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1757                /*
1758                 * It's not contiguous, though.
1759                 */
1760                if (bno + len < gtbno)
1761                        haveright = 0;
1762                else {
1763                        /*
1764                         * If this failure happens the request to free this
1765                         * space was invalid, it's (partly) already free.
1766                         * Very bad.
1767                         */
1768                        XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0);
1769                }
1770        }
1771        /*
1772         * Now allocate and initialize a cursor for the by-size tree.
1773         */
1774        cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1775        /*
1776         * Have both left and right contiguous neighbors.
1777         * Merge all three into a single free block.
1778         */
1779        if (haveleft && haveright) {
1780                /*
1781                 * Delete the old by-size entry on the left.
1782                 */
1783                if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1784                        goto error0;
1785                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1786                if ((error = xfs_btree_delete(cnt_cur, &i)))
1787                        goto error0;
1788                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1789                /*
1790                 * Delete the old by-size entry on the right.
1791                 */
1792                if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1793                        goto error0;
1794                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1795                if ((error = xfs_btree_delete(cnt_cur, &i)))
1796                        goto error0;
1797                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1798                /*
1799                 * Delete the old by-block entry for the right block.
1800                 */
1801                if ((error = xfs_btree_delete(bno_cur, &i)))
1802                        goto error0;
1803                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1804                /*
1805                 * Move the by-block cursor back to the left neighbor.
1806                 */
1807                if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1808                        goto error0;
1809                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1810#ifdef DEBUG
1811                /*
1812                 * Check that this is the right record: delete didn't
1813                 * mangle the cursor.
1814                 */
1815                {
1816                        xfs_agblock_t   xxbno;
1817                        xfs_extlen_t    xxlen;
1818
1819                        if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1820                                        &i)))
1821                                goto error0;
1822                        XFS_WANT_CORRUPTED_GOTO(mp,
1823                                i == 1 && xxbno == ltbno && xxlen == ltlen,
1824                                error0);
1825                }
1826#endif
1827                /*
1828                 * Update remaining by-block entry to the new, joined block.
1829                 */
1830                nbno = ltbno;
1831                nlen = len + ltlen + gtlen;
1832                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1833                        goto error0;
1834        }
1835        /*
1836         * Have only a left contiguous neighbor.
1837         * Merge it together with the new freespace.
1838         */
1839        else if (haveleft) {
1840                /*
1841                 * Delete the old by-size entry on the left.
1842                 */
1843                if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1844                        goto error0;
1845                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1846                if ((error = xfs_btree_delete(cnt_cur, &i)))
1847                        goto error0;
1848                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1849                /*
1850                 * Back up the by-block cursor to the left neighbor, and
1851                 * update its length.
1852                 */
1853                if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1854                        goto error0;
1855                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1856                nbno = ltbno;
1857                nlen = len + ltlen;
1858                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1859                        goto error0;
1860        }
1861        /*
1862         * Have only a right contiguous neighbor.
1863         * Merge it together with the new freespace.
1864         */
1865        else if (haveright) {
1866                /*
1867                 * Delete the old by-size entry on the right.
1868                 */
1869                if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1870                        goto error0;
1871                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1872                if ((error = xfs_btree_delete(cnt_cur, &i)))
1873                        goto error0;
1874                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1875                /*
1876                 * Update the starting block and length of the right
1877                 * neighbor in the by-block tree.
1878                 */
1879                nbno = bno;
1880                nlen = len + gtlen;
1881                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1882                        goto error0;
1883        }
1884        /*
1885         * No contiguous neighbors.
1886         * Insert the new freespace into the by-block tree.
1887         */
1888        else {
1889                nbno = bno;
1890                nlen = len;
1891                if ((error = xfs_btree_insert(bno_cur, &i)))
1892                        goto error0;
1893                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1894        }
1895        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1896        bno_cur = NULL;
1897        /*
1898         * In all cases we need to insert the new freespace in the by-size tree.
1899         */
1900        if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1901                goto error0;
1902        XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0);
1903        if ((error = xfs_btree_insert(cnt_cur, &i)))
1904                goto error0;
1905        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1906        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1907        cnt_cur = NULL;
1908
1909        /*
1910         * Update the freespace totals in the ag and superblock.
1911         */
1912        pag = xfs_perag_get(mp, agno);
1913        error = xfs_alloc_update_counters(tp, pag, agbp, len);
1914        xfs_perag_put(pag);
1915        if (error)
1916                goto error0;
1917
1918        if (!isfl)
1919                xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1920        XFS_STATS_INC(mp, xs_freex);
1921        XFS_STATS_ADD(mp, xs_freeb, len);
1922
1923        trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
1924
1925        return 0;
1926
1927 error0:
1928        trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1);
1929        if (bno_cur)
1930                xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1931        if (cnt_cur)
1932                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1933        return error;
1934}
1935
1936/*
1937 * Visible (exported) allocation/free functions.
1938 * Some of these are used just by xfs_alloc_btree.c and this file.
1939 */
1940
1941/*
1942 * Compute and fill in value of m_ag_maxlevels.
1943 */
1944void
1945xfs_alloc_compute_maxlevels(
1946        xfs_mount_t     *mp)    /* file system mount structure */
1947{
1948        mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp, mp->m_alloc_mnr,
1949                        (mp->m_sb.sb_agblocks + 1) / 2);
1950}
1951
1952/*
1953 * Find the length of the longest extent in an AG.
1954 */
1955xfs_extlen_t
1956xfs_alloc_longest_free_extent(
1957        struct xfs_mount        *mp,
1958        struct xfs_perag        *pag,
1959        xfs_extlen_t            need)
1960{
1961        xfs_extlen_t            delta = 0;
1962
1963        if (need > pag->pagf_flcount)
1964                delta = need - pag->pagf_flcount;
1965
1966        if (pag->pagf_longest > delta)
1967                return pag->pagf_longest - delta;
1968        return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1969}
1970
1971unsigned int
1972xfs_alloc_min_freelist(
1973        struct xfs_mount        *mp,
1974        struct xfs_perag        *pag)
1975{
1976        unsigned int            min_free;
1977
1978        /* space needed by-bno freespace btree */
1979        min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
1980                                       mp->m_ag_maxlevels);
1981        /* space needed by-size freespace btree */
1982        min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
1983                                       mp->m_ag_maxlevels);
1984        /* space needed reverse mapping used space btree */
1985        if (xfs_sb_version_hasrmapbt(&mp->m_sb))
1986                min_free += min_t(unsigned int,
1987                                  pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
1988                                  mp->m_rmap_maxlevels);
1989
1990        return min_free;
1991}
1992
1993/*
1994 * Check if the operation we are fixing up the freelist for should go ahead or
1995 * not. If we are freeing blocks, we always allow it, otherwise the allocation
1996 * is dependent on whether the size and shape of free space available will
1997 * permit the requested allocation to take place.
1998 */
1999static bool
2000xfs_alloc_space_available(
2001        struct xfs_alloc_arg    *args,
2002        xfs_extlen_t            min_free,
2003        int                     flags)
2004{
2005        struct xfs_perag        *pag = args->pag;
2006        xfs_extlen_t            longest;
2007        int                     available;
2008
2009        if (flags & XFS_ALLOC_FLAG_FREEING)
2010                return true;
2011
2012        /* do we have enough contiguous free space for the allocation? */
2013        longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free);
2014        if ((args->minlen + args->alignment + args->minalignslop - 1) > longest)
2015                return false;
2016
2017        /* do have enough free space remaining for the allocation? */
2018        available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
2019                          min_free - args->total);
2020        if (available < (int)args->minleft)
2021                return false;
2022
2023        return true;
2024}
2025
2026/*
2027 * Decide whether to use this allocation group for this allocation.
2028 * If so, fix up the btree freelist's size.
2029 */
2030int                     /* error */
2031xfs_alloc_fix_freelist(
2032        struct xfs_alloc_arg    *args,  /* allocation argument structure */
2033        int                     flags)  /* XFS_ALLOC_FLAG_... */
2034{
2035        struct xfs_mount        *mp = args->mp;
2036        struct xfs_perag        *pag = args->pag;
2037        struct xfs_trans        *tp = args->tp;
2038        struct xfs_buf          *agbp = NULL;
2039        struct xfs_buf          *agflbp = NULL;
2040        struct xfs_alloc_arg    targs;  /* local allocation arguments */
2041        xfs_agblock_t           bno;    /* freelist block */
2042        xfs_extlen_t            need;   /* total blocks needed in freelist */
2043        int                     error = 0;
2044
2045        if (!pag->pagf_init) {
2046                error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2047                if (error)
2048                        goto out_no_agbp;
2049                if (!pag->pagf_init) {
2050                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2051                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2052                        goto out_agbp_relse;
2053                }
2054        }
2055
2056        /*
2057         * If this is a metadata preferred pag and we are user data then try
2058         * somewhere else if we are not being asked to try harder at this
2059         * point
2060         */
2061        if (pag->pagf_metadata && args->userdata &&
2062            (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2063                ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2064                goto out_agbp_relse;
2065        }
2066
2067        need = xfs_alloc_min_freelist(mp, pag);
2068        if (!xfs_alloc_space_available(args, need, flags))
2069                goto out_agbp_relse;
2070
2071        /*
2072         * Get the a.g. freespace buffer.
2073         * Can fail if we're not blocking on locks, and it's held.
2074         */
2075        if (!agbp) {
2076                error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2077                if (error)
2078                        goto out_no_agbp;
2079                if (!agbp) {
2080                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2081                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2082                        goto out_no_agbp;
2083                }
2084        }
2085
2086        /* If there isn't enough total space or single-extent, reject it. */
2087        need = xfs_alloc_min_freelist(mp, pag);
2088        if (!xfs_alloc_space_available(args, need, flags))
2089                goto out_agbp_relse;
2090
2091        /*
2092         * Make the freelist shorter if it's too long.
2093         *
2094         * Note that from this point onwards, we will always release the agf and
2095         * agfl buffers on error. This handles the case where we error out and
2096         * the buffers are clean or may not have been joined to the transaction
2097         * and hence need to be released manually. If they have been joined to
2098         * the transaction, then xfs_trans_brelse() will handle them
2099         * appropriately based on the recursion count and dirty state of the
2100         * buffer.
2101         *
2102         * XXX (dgc): When we have lots of free space, does this buy us
2103         * anything other than extra overhead when we need to put more blocks
2104         * back on the free list? Maybe we should only do this when space is
2105         * getting low or the AGFL is more than half full?
2106         *
2107         * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2108         * big; the NORMAP flag prevents AGFL expand/shrink operations from
2109         * updating the rmapbt.  Both flags are used in xfs_repair while we're
2110         * rebuilding the rmapbt, and neither are used by the kernel.  They're
2111         * both required to ensure that rmaps are correctly recorded for the
2112         * regenerated AGFL, bnobt, and cntbt.  See repair/phase5.c and
2113         * repair/rmap.c in xfsprogs for details.
2114         */
2115        memset(&targs, 0, sizeof(targs));
2116        if (flags & XFS_ALLOC_FLAG_NORMAP)
2117                xfs_rmap_skip_owner_update(&targs.oinfo);
2118        else
2119                xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG);
2120        while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
2121                struct xfs_buf  *bp;
2122
2123                error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2124                if (error)
2125                        goto out_agbp_relse;
2126                error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1,
2127                                           &targs.oinfo, 1);
2128                if (error)
2129                        goto out_agbp_relse;
2130                bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
2131                xfs_trans_binval(tp, bp);
2132        }
2133
2134        targs.tp = tp;
2135        targs.mp = mp;
2136        targs.agbp = agbp;
2137        targs.agno = args->agno;
2138        targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
2139        targs.type = XFS_ALLOCTYPE_THIS_AG;
2140        targs.pag = pag;
2141        error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
2142        if (error)
2143                goto out_agbp_relse;
2144
2145        /* Make the freelist longer if it's too short. */
2146        while (pag->pagf_flcount < need) {
2147                targs.agbno = 0;
2148                targs.maxlen = need - pag->pagf_flcount;
2149
2150                /* Allocate as many blocks as possible at once. */
2151                error = xfs_alloc_ag_vextent(&targs);
2152                if (error)
2153                        goto out_agflbp_relse;
2154
2155                /*
2156                 * Stop if we run out.  Won't happen if callers are obeying
2157                 * the restrictions correctly.  Can happen for free calls
2158                 * on a completely full ag.
2159                 */
2160                if (targs.agbno == NULLAGBLOCK) {
2161                        if (flags & XFS_ALLOC_FLAG_FREEING)
2162                                break;
2163                        goto out_agflbp_relse;
2164                }
2165                /*
2166                 * Put each allocated block on the list.
2167                 */
2168                for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
2169                        error = xfs_alloc_put_freelist(tp, agbp,
2170                                                        agflbp, bno, 0);
2171                        if (error)
2172                                goto out_agflbp_relse;
2173                }
2174        }
2175        xfs_trans_brelse(tp, agflbp);
2176        args->agbp = agbp;
2177        return 0;
2178
2179out_agflbp_relse:
2180        xfs_trans_brelse(tp, agflbp);
2181out_agbp_relse:
2182        if (agbp)
2183                xfs_trans_brelse(tp, agbp);
2184out_no_agbp:
2185        args->agbp = NULL;
2186        return error;
2187}
2188
2189/*
2190 * Get a block from the freelist.
2191 * Returns with the buffer for the block gotten.
2192 */
2193int                             /* error */
2194xfs_alloc_get_freelist(
2195        xfs_trans_t     *tp,    /* transaction pointer */
2196        xfs_buf_t       *agbp,  /* buffer containing the agf structure */
2197        xfs_agblock_t   *bnop,  /* block address retrieved from freelist */
2198        int             btreeblk) /* destination is a AGF btree */
2199{
2200        xfs_agf_t       *agf;   /* a.g. freespace structure */
2201        xfs_buf_t       *agflbp;/* buffer for a.g. freelist structure */
2202        xfs_agblock_t   bno;    /* block number returned */
2203        __be32          *agfl_bno;
2204        int             error;
2205        int             logflags;
2206        xfs_mount_t     *mp = tp->t_mountp;
2207        xfs_perag_t     *pag;   /* per allocation group data */
2208
2209        /*
2210         * Freelist is empty, give up.
2211         */
2212        agf = XFS_BUF_TO_AGF(agbp);
2213        if (!agf->agf_flcount) {
2214                *bnop = NULLAGBLOCK;
2215                return 0;
2216        }
2217        /*
2218         * Read the array of free blocks.
2219         */
2220        error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2221                                    &agflbp);
2222        if (error)
2223                return error;
2224
2225
2226        /*
2227         * Get the block number and update the data structures.
2228         */
2229        agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2230        bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2231        be32_add_cpu(&agf->agf_flfirst, 1);
2232        xfs_trans_brelse(tp, agflbp);
2233        if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
2234                agf->agf_flfirst = 0;
2235
2236        pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2237        be32_add_cpu(&agf->agf_flcount, -1);
2238        xfs_trans_agflist_delta(tp, -1);
2239        pag->pagf_flcount--;
2240        xfs_perag_put(pag);
2241
2242        logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2243        if (btreeblk) {
2244                be32_add_cpu(&agf->agf_btreeblks, 1);
2245                pag->pagf_btreeblks++;
2246                logflags |= XFS_AGF_BTREEBLKS;
2247        }
2248
2249        xfs_alloc_log_agf(tp, agbp, logflags);
2250        *bnop = bno;
2251
2252        return 0;
2253}
2254
2255/*
2256 * Log the given fields from the agf structure.
2257 */
2258void
2259xfs_alloc_log_agf(
2260        xfs_trans_t     *tp,    /* transaction pointer */
2261        xfs_buf_t       *bp,    /* buffer for a.g. freelist header */
2262        int             fields) /* mask of fields to be logged (XFS_AGF_...) */
2263{
2264        int     first;          /* first byte offset */
2265        int     last;           /* last byte offset */
2266        static const short      offsets[] = {
2267                offsetof(xfs_agf_t, agf_magicnum),
2268                offsetof(xfs_agf_t, agf_versionnum),
2269                offsetof(xfs_agf_t, agf_seqno),
2270                offsetof(xfs_agf_t, agf_length),
2271                offsetof(xfs_agf_t, agf_roots[0]),
2272                offsetof(xfs_agf_t, agf_levels[0]),
2273                offsetof(xfs_agf_t, agf_flfirst),
2274                offsetof(xfs_agf_t, agf_fllast),
2275                offsetof(xfs_agf_t, agf_flcount),
2276                offsetof(xfs_agf_t, agf_freeblks),
2277                offsetof(xfs_agf_t, agf_longest),
2278                offsetof(xfs_agf_t, agf_btreeblks),
2279                offsetof(xfs_agf_t, agf_uuid),
2280                offsetof(xfs_agf_t, agf_rmap_blocks),
2281                /* needed so that we don't log the whole rest of the structure: */
2282                offsetof(xfs_agf_t, agf_spare64),
2283                sizeof(xfs_agf_t)
2284        };
2285
2286        trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2287
2288        xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
2289
2290        xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2291        xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2292}
2293
2294/*
2295 * Interface for inode allocation to force the pag data to be initialized.
2296 */
2297int                                     /* error */
2298xfs_alloc_pagf_init(
2299        xfs_mount_t             *mp,    /* file system mount structure */
2300        xfs_trans_t             *tp,    /* transaction pointer */
2301        xfs_agnumber_t          agno,   /* allocation group number */
2302        int                     flags)  /* XFS_ALLOC_FLAGS_... */
2303{
2304        xfs_buf_t               *bp;
2305        int                     error;
2306
2307        if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2308                return error;
2309        if (bp)
2310                xfs_trans_brelse(tp, bp);
2311        return 0;
2312}
2313
2314/*
2315 * Put the block on the freelist for the allocation group.
2316 */
2317int                                     /* error */
2318xfs_alloc_put_freelist(
2319        xfs_trans_t             *tp,    /* transaction pointer */
2320        xfs_buf_t               *agbp,  /* buffer for a.g. freelist header */
2321        xfs_buf_t               *agflbp,/* buffer for a.g. free block array */
2322        xfs_agblock_t           bno,    /* block being freed */
2323        int                     btreeblk) /* block came from a AGF btree */
2324{
2325        xfs_agf_t               *agf;   /* a.g. freespace structure */
2326        __be32                  *blockp;/* pointer to array entry */
2327        int                     error;
2328        int                     logflags;
2329        xfs_mount_t             *mp;    /* mount structure */
2330        xfs_perag_t             *pag;   /* per allocation group data */
2331        __be32                  *agfl_bno;
2332        int                     startoff;
2333
2334        agf = XFS_BUF_TO_AGF(agbp);
2335        mp = tp->t_mountp;
2336
2337        if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
2338                        be32_to_cpu(agf->agf_seqno), &agflbp)))
2339                return error;
2340        be32_add_cpu(&agf->agf_fllast, 1);
2341        if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
2342                agf->agf_fllast = 0;
2343
2344        pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2345        be32_add_cpu(&agf->agf_flcount, 1);
2346        xfs_trans_agflist_delta(tp, 1);
2347        pag->pagf_flcount++;
2348
2349        logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2350        if (btreeblk) {
2351                be32_add_cpu(&agf->agf_btreeblks, -1);
2352                pag->pagf_btreeblks--;
2353                logflags |= XFS_AGF_BTREEBLKS;
2354        }
2355        xfs_perag_put(pag);
2356
2357        xfs_alloc_log_agf(tp, agbp, logflags);
2358
2359        ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2360
2361        agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2362        blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
2363        *blockp = cpu_to_be32(bno);
2364        startoff = (char *)blockp - (char *)agflbp->b_addr;
2365
2366        xfs_alloc_log_agf(tp, agbp, logflags);
2367
2368        xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
2369        xfs_trans_log_buf(tp, agflbp, startoff,
2370                          startoff + sizeof(xfs_agblock_t) - 1);
2371        return 0;
2372}
2373
2374static bool
2375xfs_agf_verify(
2376        struct xfs_mount *mp,
2377        struct xfs_buf  *bp)
2378 {
2379        struct xfs_agf  *agf = XFS_BUF_TO_AGF(bp);
2380
2381        if (xfs_sb_version_hascrc(&mp->m_sb)) {
2382                if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
2383                        return false;
2384                if (!xfs_log_check_lsn(mp,
2385                                be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
2386                        return false;
2387        }
2388
2389        if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
2390              XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2391              be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2392              be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2393              be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2394              be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
2395                return false;
2396
2397        if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
2398            be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
2399                return false;
2400
2401        if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
2402            be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)
2403                return false;
2404
2405        /*
2406         * during growfs operations, the perag is not fully initialised,
2407         * so we can't use it for any useful checking. growfs ensures we can't
2408         * use it by using uncached buffers that don't have the perag attached
2409         * so we can detect and avoid this problem.
2410         */
2411        if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
2412                return false;
2413
2414        if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2415            be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
2416                return false;
2417
2418        return true;;
2419
2420}
2421
2422static void
2423xfs_agf_read_verify(
2424        struct xfs_buf  *bp)
2425{
2426        struct xfs_mount *mp = bp->b_target->bt_mount;
2427
2428        if (xfs_sb_version_hascrc(&mp->m_sb) &&
2429            !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
2430                xfs_buf_ioerror(bp, -EFSBADCRC);
2431        else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
2432                                XFS_ERRTAG_ALLOC_READ_AGF,
2433                                XFS_RANDOM_ALLOC_READ_AGF))
2434                xfs_buf_ioerror(bp, -EFSCORRUPTED);
2435
2436        if (bp->b_error)
2437                xfs_verifier_error(bp);
2438}
2439
2440static void
2441xfs_agf_write_verify(
2442        struct xfs_buf  *bp)
2443{
2444        struct xfs_mount *mp = bp->b_target->bt_mount;
2445        struct xfs_buf_log_item *bip = bp->b_fspriv;
2446
2447        if (!xfs_agf_verify(mp, bp)) {
2448                xfs_buf_ioerror(bp, -EFSCORRUPTED);
2449                xfs_verifier_error(bp);
2450                return;
2451        }
2452
2453        if (!xfs_sb_version_hascrc(&mp->m_sb))
2454                return;
2455
2456        if (bip)
2457                XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2458
2459        xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
2460}
2461
2462const struct xfs_buf_ops xfs_agf_buf_ops = {
2463        .name = "xfs_agf",
2464        .verify_read = xfs_agf_read_verify,
2465        .verify_write = xfs_agf_write_verify,
2466};
2467
2468/*
2469 * Read in the allocation group header (free/alloc section).
2470 */
2471int                                     /* error */
2472xfs_read_agf(
2473        struct xfs_mount        *mp,    /* mount point structure */
2474        struct xfs_trans        *tp,    /* transaction pointer */
2475        xfs_agnumber_t          agno,   /* allocation group number */
2476        int                     flags,  /* XFS_BUF_ */
2477        struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
2478{
2479        int             error;
2480
2481        trace_xfs_read_agf(mp, agno);
2482
2483        ASSERT(agno != NULLAGNUMBER);
2484        error = xfs_trans_read_buf(
2485                        mp, tp, mp->m_ddev_targp,
2486                        XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2487                        XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
2488        if (error)
2489                return error;
2490        if (!*bpp)
2491                return 0;
2492
2493        ASSERT(!(*bpp)->b_error);
2494        xfs_buf_set_ref(*bpp, XFS_AGF_REF);
2495        return 0;
2496}
2497
2498/*
2499 * Read in the allocation group header (free/alloc section).
2500 */
2501int                                     /* error */
2502xfs_alloc_read_agf(
2503        struct xfs_mount        *mp,    /* mount point structure */
2504        struct xfs_trans        *tp,    /* transaction pointer */
2505        xfs_agnumber_t          agno,   /* allocation group number */
2506        int                     flags,  /* XFS_ALLOC_FLAG_... */
2507        struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
2508{
2509        struct xfs_agf          *agf;           /* ag freelist header */
2510        struct xfs_perag        *pag;           /* per allocation group data */
2511        int                     error;
2512
2513        trace_xfs_alloc_read_agf(mp, agno);
2514
2515        ASSERT(agno != NULLAGNUMBER);
2516        error = xfs_read_agf(mp, tp, agno,
2517                        (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2518                        bpp);
2519        if (error)
2520                return error;
2521        if (!*bpp)
2522                return 0;
2523        ASSERT(!(*bpp)->b_error);
2524
2525        agf = XFS_BUF_TO_AGF(*bpp);
2526        pag = xfs_perag_get(mp, agno);
2527        if (!pag->pagf_init) {
2528                pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
2529                pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
2530                pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2531                pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2532                pag->pagf_levels[XFS_BTNUM_BNOi] =
2533                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2534                pag->pagf_levels[XFS_BTNUM_CNTi] =
2535                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
2536                pag->pagf_levels[XFS_BTNUM_RMAPi] =
2537                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
2538                spin_lock_init(&pag->pagb_lock);
2539                pag->pagb_count = 0;
2540                pag->pagb_tree = RB_ROOT;
2541                pag->pagf_init = 1;
2542        }
2543#ifdef DEBUG
2544        else if (!XFS_FORCED_SHUTDOWN(mp)) {
2545                ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
2546                ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
2547                ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2548                ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2549                ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
2550                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2551                ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
2552                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2553        }
2554#endif
2555        xfs_perag_put(pag);
2556        return 0;
2557}
2558
2559/*
2560 * Allocate an extent (variable-size).
2561 * Depending on the allocation type, we either look in a single allocation
2562 * group or loop over the allocation groups to find the result.
2563 */
2564int                             /* error */
2565xfs_alloc_vextent(
2566        xfs_alloc_arg_t *args)  /* allocation argument structure */
2567{
2568        xfs_agblock_t   agsize; /* allocation group size */
2569        int             error;
2570        int             flags;  /* XFS_ALLOC_FLAG_... locking flags */
2571        xfs_extlen_t    minleft;/* minimum left value, temp copy */
2572        xfs_mount_t     *mp;    /* mount structure pointer */
2573        xfs_agnumber_t  sagno;  /* starting allocation group number */
2574        xfs_alloctype_t type;   /* input allocation type */
2575        int             bump_rotor = 0;
2576        int             no_min = 0;
2577        xfs_agnumber_t  rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2578
2579        mp = args->mp;
2580        type = args->otype = args->type;
2581        args->agbno = NULLAGBLOCK;
2582        /*
2583         * Just fix this up, for the case where the last a.g. is shorter
2584         * (or there's only one a.g.) and the caller couldn't easily figure
2585         * that out (xfs_bmap_alloc).
2586         */
2587        agsize = mp->m_sb.sb_agblocks;
2588        if (args->maxlen > agsize)
2589                args->maxlen = agsize;
2590        if (args->alignment == 0)
2591                args->alignment = 1;
2592        ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2593        ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2594        ASSERT(args->minlen <= args->maxlen);
2595        ASSERT(args->minlen <= agsize);
2596        ASSERT(args->mod < args->prod);
2597        if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2598            XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2599            args->minlen > args->maxlen || args->minlen > agsize ||
2600            args->mod >= args->prod) {
2601                args->fsbno = NULLFSBLOCK;
2602                trace_xfs_alloc_vextent_badargs(args);
2603                return 0;
2604        }
2605        minleft = args->minleft;
2606
2607        switch (type) {
2608        case XFS_ALLOCTYPE_THIS_AG:
2609        case XFS_ALLOCTYPE_NEAR_BNO:
2610        case XFS_ALLOCTYPE_THIS_BNO:
2611                /*
2612                 * These three force us into a single a.g.
2613                 */
2614                args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2615                args->pag = xfs_perag_get(mp, args->agno);
2616                args->minleft = 0;
2617                error = xfs_alloc_fix_freelist(args, 0);
2618                args->minleft = minleft;
2619                if (error) {
2620                        trace_xfs_alloc_vextent_nofix(args);
2621                        goto error0;
2622                }
2623                if (!args->agbp) {
2624                        trace_xfs_alloc_vextent_noagbp(args);
2625                        break;
2626                }
2627                args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2628                if ((error = xfs_alloc_ag_vextent(args)))
2629                        goto error0;
2630                break;
2631        case XFS_ALLOCTYPE_START_BNO:
2632                /*
2633                 * Try near allocation first, then anywhere-in-ag after
2634                 * the first a.g. fails.
2635                 */
2636                if ((args->userdata & XFS_ALLOC_INITIAL_USER_DATA) &&
2637                    (mp->m_flags & XFS_MOUNT_32BITINODES)) {
2638                        args->fsbno = XFS_AGB_TO_FSB(mp,
2639                                        ((mp->m_agfrotor / rotorstep) %
2640                                        mp->m_sb.sb_agcount), 0);
2641                        bump_rotor = 1;
2642                }
2643                args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2644                args->type = XFS_ALLOCTYPE_NEAR_BNO;
2645                /* FALLTHROUGH */
2646        case XFS_ALLOCTYPE_ANY_AG:
2647        case XFS_ALLOCTYPE_START_AG:
2648        case XFS_ALLOCTYPE_FIRST_AG:
2649                /*
2650                 * Rotate through the allocation groups looking for a winner.
2651                 */
2652                if (type == XFS_ALLOCTYPE_ANY_AG) {
2653                        /*
2654                         * Start with the last place we left off.
2655                         */
2656                        args->agno = sagno = (mp->m_agfrotor / rotorstep) %
2657                                        mp->m_sb.sb_agcount;
2658                        args->type = XFS_ALLOCTYPE_THIS_AG;
2659                        flags = XFS_ALLOC_FLAG_TRYLOCK;
2660                } else if (type == XFS_ALLOCTYPE_FIRST_AG) {
2661                        /*
2662                         * Start with allocation group given by bno.
2663                         */
2664                        args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2665                        args->type = XFS_ALLOCTYPE_THIS_AG;
2666                        sagno = 0;
2667                        flags = 0;
2668                } else {
2669                        if (type == XFS_ALLOCTYPE_START_AG)
2670                                args->type = XFS_ALLOCTYPE_THIS_AG;
2671                        /*
2672                         * Start with the given allocation group.
2673                         */
2674                        args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2675                        flags = XFS_ALLOC_FLAG_TRYLOCK;
2676                }
2677                /*
2678                 * Loop over allocation groups twice; first time with
2679                 * trylock set, second time without.
2680                 */
2681                for (;;) {
2682                        args->pag = xfs_perag_get(mp, args->agno);
2683                        if (no_min) args->minleft = 0;
2684                        error = xfs_alloc_fix_freelist(args, flags);
2685                        args->minleft = minleft;
2686                        if (error) {
2687                                trace_xfs_alloc_vextent_nofix(args);
2688                                goto error0;
2689                        }
2690                        /*
2691                         * If we get a buffer back then the allocation will fly.
2692                         */
2693                        if (args->agbp) {
2694                                if ((error = xfs_alloc_ag_vextent(args)))
2695                                        goto error0;
2696                                break;
2697                        }
2698
2699                        trace_xfs_alloc_vextent_loopfailed(args);
2700
2701                        /*
2702                         * Didn't work, figure out the next iteration.
2703                         */
2704                        if (args->agno == sagno &&
2705                            type == XFS_ALLOCTYPE_START_BNO)
2706                                args->type = XFS_ALLOCTYPE_THIS_AG;
2707                        /*
2708                        * For the first allocation, we can try any AG to get
2709                        * space.  However, if we already have allocated a
2710                        * block, we don't want to try AGs whose number is below
2711                        * sagno. Otherwise, we may end up with out-of-order
2712                        * locking of AGF, which might cause deadlock.
2713                        */
2714                        if (++(args->agno) == mp->m_sb.sb_agcount) {
2715                                if (args->firstblock != NULLFSBLOCK)
2716                                        args->agno = sagno;
2717                                else
2718                                        args->agno = 0;
2719                        }
2720                        /*
2721                         * Reached the starting a.g., must either be done
2722                         * or switch to non-trylock mode.
2723                         */
2724                        if (args->agno == sagno) {
2725                                if (no_min == 1) {
2726                                        args->agbno = NULLAGBLOCK;
2727                                        trace_xfs_alloc_vextent_allfailed(args);
2728                                        break;
2729                                }
2730                                if (flags == 0) {
2731                                        no_min = 1;
2732                                } else {
2733                                        flags = 0;
2734                                        if (type == XFS_ALLOCTYPE_START_BNO) {
2735                                                args->agbno = XFS_FSB_TO_AGBNO(mp,
2736                                                        args->fsbno);
2737                                                args->type = XFS_ALLOCTYPE_NEAR_BNO;
2738                                        }
2739                                }
2740                        }
2741                        xfs_perag_put(args->pag);
2742                }
2743                if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2744                        if (args->agno == sagno)
2745                                mp->m_agfrotor = (mp->m_agfrotor + 1) %
2746                                        (mp->m_sb.sb_agcount * rotorstep);
2747                        else
2748                                mp->m_agfrotor = (args->agno * rotorstep + 1) %
2749                                        (mp->m_sb.sb_agcount * rotorstep);
2750                }
2751                break;
2752        default:
2753                ASSERT(0);
2754                /* NOTREACHED */
2755        }
2756        if (args->agbno == NULLAGBLOCK)
2757                args->fsbno = NULLFSBLOCK;
2758        else {
2759                args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2760#ifdef DEBUG
2761                ASSERT(args->len >= args->minlen);
2762                ASSERT(args->len <= args->maxlen);
2763                ASSERT(args->agbno % args->alignment == 0);
2764                XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2765                        args->len);
2766#endif
2767
2768                /* Zero the extent if we were asked to do so */
2769                if (args->userdata & XFS_ALLOC_USERDATA_ZERO) {
2770                        error = xfs_zero_extent(args->ip, args->fsbno, args->len);
2771                        if (error)
2772                                goto error0;
2773                }
2774
2775        }
2776        xfs_perag_put(args->pag);
2777        return 0;
2778error0:
2779        xfs_perag_put(args->pag);
2780        return error;
2781}
2782
2783/* Ensure that the freelist is at full capacity. */
2784int
2785xfs_free_extent_fix_freelist(
2786        struct xfs_trans        *tp,
2787        xfs_agnumber_t          agno,
2788        struct xfs_buf          **agbp)
2789{
2790        struct xfs_alloc_arg    args;
2791        int                     error;
2792
2793        memset(&args, 0, sizeof(struct xfs_alloc_arg));
2794        args.tp = tp;
2795        args.mp = tp->t_mountp;
2796        args.agno = agno;
2797
2798        /*
2799         * validate that the block number is legal - the enables us to detect
2800         * and handle a silent filesystem corruption rather than crashing.
2801         */
2802        if (args.agno >= args.mp->m_sb.sb_agcount)
2803                return -EFSCORRUPTED;
2804
2805        args.pag = xfs_perag_get(args.mp, args.agno);
2806        ASSERT(args.pag);
2807
2808        error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
2809        if (error)
2810                goto out;
2811
2812        *agbp = args.agbp;
2813out:
2814        xfs_perag_put(args.pag);
2815        return error;
2816}
2817
2818/*
2819 * Free an extent.
2820 * Just break up the extent address and hand off to xfs_free_ag_extent
2821 * after fixing up the freelist.
2822 */
2823int                             /* error */
2824xfs_free_extent(
2825        struct xfs_trans        *tp,    /* transaction pointer */
2826        xfs_fsblock_t           bno,    /* starting block number of extent */
2827        xfs_extlen_t            len,    /* length of extent */
2828        struct xfs_owner_info   *oinfo) /* extent owner */
2829{
2830        struct xfs_mount        *mp = tp->t_mountp;
2831        struct xfs_buf          *agbp;
2832        xfs_agnumber_t          agno = XFS_FSB_TO_AGNO(mp, bno);
2833        xfs_agblock_t           agbno = XFS_FSB_TO_AGBNO(mp, bno);
2834        int                     error;
2835
2836        ASSERT(len != 0);
2837
2838        if (XFS_TEST_ERROR(false, mp,
2839                        XFS_ERRTAG_FREE_EXTENT,
2840                        XFS_RANDOM_FREE_EXTENT))
2841                return -EIO;
2842
2843        error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
2844        if (error)
2845                return error;
2846
2847        XFS_WANT_CORRUPTED_GOTO(mp, agbno < mp->m_sb.sb_agblocks, err);
2848
2849        /* validate the extent size is legal now we have the agf locked */
2850        XFS_WANT_CORRUPTED_GOTO(mp,
2851                agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length),
2852                                err);
2853
2854        error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, 0);
2855        if (error)
2856                goto err;
2857
2858        xfs_extent_busy_insert(tp, agno, agbno, len, 0);
2859        return 0;
2860
2861err:
2862        xfs_trans_brelse(tp, agbp);
2863        return error;
2864}
2865