linux/fs/xfs/libxfs/xfs_alloc.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_format.h"
  21#include "xfs_log_format.h"
  22#include "xfs_shared.h"
  23#include "xfs_trans_resv.h"
  24#include "xfs_bit.h"
  25#include "xfs_sb.h"
  26#include "xfs_mount.h"
  27#include "xfs_inode.h"
  28#include "xfs_btree.h"
  29#include "xfs_alloc_btree.h"
  30#include "xfs_alloc.h"
  31#include "xfs_extent_busy.h"
  32#include "xfs_error.h"
  33#include "xfs_cksum.h"
  34#include "xfs_trace.h"
  35#include "xfs_trans.h"
  36#include "xfs_buf_item.h"
  37#include "xfs_log.h"
  38
  39struct workqueue_struct *xfs_alloc_wq;
  40
  41#define XFS_ABSDIFF(a,b)        (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
  42
  43#define XFSA_FIXUP_BNO_OK       1
  44#define XFSA_FIXUP_CNT_OK       2
  45
  46STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
  47STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
  48STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
  49STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
  50                xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
  51
  52/*
  53 * Lookup the record equal to [bno, len] in the btree given by cur.
  54 */
  55STATIC int                              /* error */
  56xfs_alloc_lookup_eq(
  57        struct xfs_btree_cur    *cur,   /* btree cursor */
  58        xfs_agblock_t           bno,    /* starting block of extent */
  59        xfs_extlen_t            len,    /* length of extent */
  60        int                     *stat)  /* success/failure */
  61{
  62        cur->bc_rec.a.ar_startblock = bno;
  63        cur->bc_rec.a.ar_blockcount = len;
  64        return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
  65}
  66
  67/*
  68 * Lookup the first record greater than or equal to [bno, len]
  69 * in the btree given by cur.
  70 */
  71int                             /* error */
  72xfs_alloc_lookup_ge(
  73        struct xfs_btree_cur    *cur,   /* btree cursor */
  74        xfs_agblock_t           bno,    /* starting block of extent */
  75        xfs_extlen_t            len,    /* length of extent */
  76        int                     *stat)  /* success/failure */
  77{
  78        cur->bc_rec.a.ar_startblock = bno;
  79        cur->bc_rec.a.ar_blockcount = len;
  80        return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
  81}
  82
  83/*
  84 * Lookup the first record less than or equal to [bno, len]
  85 * in the btree given by cur.
  86 */
  87int                                     /* error */
  88xfs_alloc_lookup_le(
  89        struct xfs_btree_cur    *cur,   /* btree cursor */
  90        xfs_agblock_t           bno,    /* starting block of extent */
  91        xfs_extlen_t            len,    /* length of extent */
  92        int                     *stat)  /* success/failure */
  93{
  94        cur->bc_rec.a.ar_startblock = bno;
  95        cur->bc_rec.a.ar_blockcount = len;
  96        return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
  97}
  98
  99/*
 100 * Update the record referred to by cur to the value given
 101 * by [bno, len].
 102 * This either works (return 0) or gets an EFSCORRUPTED error.
 103 */
 104STATIC int                              /* error */
 105xfs_alloc_update(
 106        struct xfs_btree_cur    *cur,   /* btree cursor */
 107        xfs_agblock_t           bno,    /* starting block of extent */
 108        xfs_extlen_t            len)    /* length of extent */
 109{
 110        union xfs_btree_rec     rec;
 111
 112        rec.alloc.ar_startblock = cpu_to_be32(bno);
 113        rec.alloc.ar_blockcount = cpu_to_be32(len);
 114        return xfs_btree_update(cur, &rec);
 115}
 116
 117/*
 118 * Get the data from the pointed-to record.
 119 */
 120int                                     /* error */
 121xfs_alloc_get_rec(
 122        struct xfs_btree_cur    *cur,   /* btree cursor */
 123        xfs_agblock_t           *bno,   /* output: starting block of extent */
 124        xfs_extlen_t            *len,   /* output: length of extent */
 125        int                     *stat)  /* output: success/failure */
 126{
 127        union xfs_btree_rec     *rec;
 128        int                     error;
 129
 130        error = xfs_btree_get_rec(cur, &rec, stat);
 131        if (!error && *stat == 1) {
 132                *bno = be32_to_cpu(rec->alloc.ar_startblock);
 133                *len = be32_to_cpu(rec->alloc.ar_blockcount);
 134        }
 135        return error;
 136}
 137
 138/*
 139 * Compute aligned version of the found extent.
 140 * Takes alignment and min length into account.
 141 */
 142STATIC void
 143xfs_alloc_compute_aligned(
 144        xfs_alloc_arg_t *args,          /* allocation argument structure */
 145        xfs_agblock_t   foundbno,       /* starting block in found extent */
 146        xfs_extlen_t    foundlen,       /* length in found extent */
 147        xfs_agblock_t   *resbno,        /* result block number */
 148        xfs_extlen_t    *reslen)        /* result length */
 149{
 150        xfs_agblock_t   bno;
 151        xfs_extlen_t    len;
 152        xfs_extlen_t    diff;
 153
 154        /* Trim busy sections out of found extent */
 155        xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
 156
 157        /*
 158         * If we have a largish extent that happens to start before min_agbno,
 159         * see if we can shift it into range...
 160         */
 161        if (bno < args->min_agbno && bno + len > args->min_agbno) {
 162                diff = args->min_agbno - bno;
 163                if (len > diff) {
 164                        bno += diff;
 165                        len -= diff;
 166                }
 167        }
 168
 169        if (args->alignment > 1 && len >= args->minlen) {
 170                xfs_agblock_t   aligned_bno = roundup(bno, args->alignment);
 171
 172                diff = aligned_bno - bno;
 173
 174                *resbno = aligned_bno;
 175                *reslen = diff >= len ? 0 : len - diff;
 176        } else {
 177                *resbno = bno;
 178                *reslen = len;
 179        }
 180}
 181
 182/*
 183 * Compute best start block and diff for "near" allocations.
 184 * freelen >= wantlen already checked by caller.
 185 */
 186STATIC xfs_extlen_t                     /* difference value (absolute) */
 187xfs_alloc_compute_diff(
 188        xfs_agblock_t   wantbno,        /* target starting block */
 189        xfs_extlen_t    wantlen,        /* target length */
 190        xfs_extlen_t    alignment,      /* target alignment */
 191        char            userdata,       /* are we allocating data? */
 192        xfs_agblock_t   freebno,        /* freespace's starting block */
 193        xfs_extlen_t    freelen,        /* freespace's length */
 194        xfs_agblock_t   *newbnop)       /* result: best start block from free */
 195{
 196        xfs_agblock_t   freeend;        /* end of freespace extent */
 197        xfs_agblock_t   newbno1;        /* return block number */
 198        xfs_agblock_t   newbno2;        /* other new block number */
 199        xfs_extlen_t    newlen1=0;      /* length with newbno1 */
 200        xfs_extlen_t    newlen2=0;      /* length with newbno2 */
 201        xfs_agblock_t   wantend;        /* end of target extent */
 202
 203        ASSERT(freelen >= wantlen);
 204        freeend = freebno + freelen;
 205        wantend = wantbno + wantlen;
 206        /*
 207         * We want to allocate from the start of a free extent if it is past
 208         * the desired block or if we are allocating user data and the free
 209         * extent is before desired block. The second case is there to allow
 210         * for contiguous allocation from the remaining free space if the file
 211         * grows in the short term.
 212         */
 213        if (freebno >= wantbno || (userdata && freeend < wantend)) {
 214                if ((newbno1 = roundup(freebno, alignment)) >= freeend)
 215                        newbno1 = NULLAGBLOCK;
 216        } else if (freeend >= wantend && alignment > 1) {
 217                newbno1 = roundup(wantbno, alignment);
 218                newbno2 = newbno1 - alignment;
 219                if (newbno1 >= freeend)
 220                        newbno1 = NULLAGBLOCK;
 221                else
 222                        newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
 223                if (newbno2 < freebno)
 224                        newbno2 = NULLAGBLOCK;
 225                else
 226                        newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
 227                if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
 228                        if (newlen1 < newlen2 ||
 229                            (newlen1 == newlen2 &&
 230                             XFS_ABSDIFF(newbno1, wantbno) >
 231                             XFS_ABSDIFF(newbno2, wantbno)))
 232                                newbno1 = newbno2;
 233                } else if (newbno2 != NULLAGBLOCK)
 234                        newbno1 = newbno2;
 235        } else if (freeend >= wantend) {
 236                newbno1 = wantbno;
 237        } else if (alignment > 1) {
 238                newbno1 = roundup(freeend - wantlen, alignment);
 239                if (newbno1 > freeend - wantlen &&
 240                    newbno1 - alignment >= freebno)
 241                        newbno1 -= alignment;
 242                else if (newbno1 >= freeend)
 243                        newbno1 = NULLAGBLOCK;
 244        } else
 245                newbno1 = freeend - wantlen;
 246        *newbnop = newbno1;
 247        return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
 248}
 249
 250/*
 251 * Fix up the length, based on mod and prod.
 252 * len should be k * prod + mod for some k.
 253 * If len is too small it is returned unchanged.
 254 * If len hits maxlen it is left alone.
 255 */
 256STATIC void
 257xfs_alloc_fix_len(
 258        xfs_alloc_arg_t *args)          /* allocation argument structure */
 259{
 260        xfs_extlen_t    k;
 261        xfs_extlen_t    rlen;
 262
 263        ASSERT(args->mod < args->prod);
 264        rlen = args->len;
 265        ASSERT(rlen >= args->minlen);
 266        ASSERT(rlen <= args->maxlen);
 267        if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
 268            (args->mod == 0 && rlen < args->prod))
 269                return;
 270        k = rlen % args->prod;
 271        if (k == args->mod)
 272                return;
 273        if (k > args->mod)
 274                rlen = rlen - (k - args->mod);
 275        else
 276                rlen = rlen - args->prod + (args->mod - k);
 277        /* casts to (int) catch length underflows */
 278        if ((int)rlen < (int)args->minlen)
 279                return;
 280        ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
 281        ASSERT(rlen % args->prod == args->mod);
 282        args->len = rlen;
 283}
 284
 285/*
 286 * Fix up length if there is too little space left in the a.g.
 287 * Return 1 if ok, 0 if too little, should give up.
 288 */
 289STATIC int
 290xfs_alloc_fix_minleft(
 291        xfs_alloc_arg_t *args)          /* allocation argument structure */
 292{
 293        xfs_agf_t       *agf;           /* a.g. freelist header */
 294        int             diff;           /* free space difference */
 295
 296        if (args->minleft == 0)
 297                return 1;
 298        agf = XFS_BUF_TO_AGF(args->agbp);
 299        diff = be32_to_cpu(agf->agf_freeblks)
 300                - args->len - args->minleft;
 301        if (diff >= 0)
 302                return 1;
 303        args->len += diff;              /* shrink the allocated space */
 304        /* casts to (int) catch length underflows */
 305        if ((int)args->len >= (int)args->minlen)
 306                return 1;
 307        args->agbno = NULLAGBLOCK;
 308        return 0;
 309}
 310
 311/*
 312 * Update the two btrees, logically removing from freespace the extent
 313 * starting at rbno, rlen blocks.  The extent is contained within the
 314 * actual (current) free extent fbno for flen blocks.
 315 * Flags are passed in indicating whether the cursors are set to the
 316 * relevant records.
 317 */
 318STATIC int                              /* error code */
 319xfs_alloc_fixup_trees(
 320        xfs_btree_cur_t *cnt_cur,       /* cursor for by-size btree */
 321        xfs_btree_cur_t *bno_cur,       /* cursor for by-block btree */
 322        xfs_agblock_t   fbno,           /* starting block of free extent */
 323        xfs_extlen_t    flen,           /* length of free extent */
 324        xfs_agblock_t   rbno,           /* starting block of returned extent */
 325        xfs_extlen_t    rlen,           /* length of returned extent */
 326        int             flags)          /* flags, XFSA_FIXUP_... */
 327{
 328        int             error;          /* error code */
 329        int             i;              /* operation results */
 330        xfs_agblock_t   nfbno1;         /* first new free startblock */
 331        xfs_agblock_t   nfbno2;         /* second new free startblock */
 332        xfs_extlen_t    nflen1=0;       /* first new free length */
 333        xfs_extlen_t    nflen2=0;       /* second new free length */
 334        struct xfs_mount *mp;
 335
 336        mp = cnt_cur->bc_mp;
 337
 338        /*
 339         * Look up the record in the by-size tree if necessary.
 340         */
 341        if (flags & XFSA_FIXUP_CNT_OK) {
 342#ifdef DEBUG
 343                if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
 344                        return error;
 345                XFS_WANT_CORRUPTED_RETURN(mp,
 346                        i == 1 && nfbno1 == fbno && nflen1 == flen);
 347#endif
 348        } else {
 349                if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
 350                        return error;
 351                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 352        }
 353        /*
 354         * Look up the record in the by-block tree if necessary.
 355         */
 356        if (flags & XFSA_FIXUP_BNO_OK) {
 357#ifdef DEBUG
 358                if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
 359                        return error;
 360                XFS_WANT_CORRUPTED_RETURN(mp,
 361                        i == 1 && nfbno1 == fbno && nflen1 == flen);
 362#endif
 363        } else {
 364                if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
 365                        return error;
 366                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 367        }
 368
 369#ifdef DEBUG
 370        if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
 371                struct xfs_btree_block  *bnoblock;
 372                struct xfs_btree_block  *cntblock;
 373
 374                bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
 375                cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
 376
 377                XFS_WANT_CORRUPTED_RETURN(mp,
 378                        bnoblock->bb_numrecs == cntblock->bb_numrecs);
 379        }
 380#endif
 381
 382        /*
 383         * Deal with all four cases: the allocated record is contained
 384         * within the freespace record, so we can have new freespace
 385         * at either (or both) end, or no freespace remaining.
 386         */
 387        if (rbno == fbno && rlen == flen)
 388                nfbno1 = nfbno2 = NULLAGBLOCK;
 389        else if (rbno == fbno) {
 390                nfbno1 = rbno + rlen;
 391                nflen1 = flen - rlen;
 392                nfbno2 = NULLAGBLOCK;
 393        } else if (rbno + rlen == fbno + flen) {
 394                nfbno1 = fbno;
 395                nflen1 = flen - rlen;
 396                nfbno2 = NULLAGBLOCK;
 397        } else {
 398                nfbno1 = fbno;
 399                nflen1 = rbno - fbno;
 400                nfbno2 = rbno + rlen;
 401                nflen2 = (fbno + flen) - nfbno2;
 402        }
 403        /*
 404         * Delete the entry from the by-size btree.
 405         */
 406        if ((error = xfs_btree_delete(cnt_cur, &i)))
 407                return error;
 408        XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 409        /*
 410         * Add new by-size btree entry(s).
 411         */
 412        if (nfbno1 != NULLAGBLOCK) {
 413                if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
 414                        return error;
 415                XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 416                if ((error = xfs_btree_insert(cnt_cur, &i)))
 417                        return error;
 418                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 419        }
 420        if (nfbno2 != NULLAGBLOCK) {
 421                if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
 422                        return error;
 423                XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 424                if ((error = xfs_btree_insert(cnt_cur, &i)))
 425                        return error;
 426                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 427        }
 428        /*
 429         * Fix up the by-block btree entry(s).
 430         */
 431        if (nfbno1 == NULLAGBLOCK) {
 432                /*
 433                 * No remaining freespace, just delete the by-block tree entry.
 434                 */
 435                if ((error = xfs_btree_delete(bno_cur, &i)))
 436                        return error;
 437                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 438        } else {
 439                /*
 440                 * Update the by-block entry to start later|be shorter.
 441                 */
 442                if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
 443                        return error;
 444        }
 445        if (nfbno2 != NULLAGBLOCK) {
 446                /*
 447                 * 2 resulting free entries, need to add one.
 448                 */
 449                if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
 450                        return error;
 451                XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 452                if ((error = xfs_btree_insert(bno_cur, &i)))
 453                        return error;
 454                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 455        }
 456        return 0;
 457}
 458
 459static bool
 460xfs_agfl_verify(
 461        struct xfs_buf  *bp)
 462{
 463        struct xfs_mount *mp = bp->b_target->bt_mount;
 464        struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
 465        int             i;
 466
 467        if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_uuid))
 468                return false;
 469        if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
 470                return false;
 471        /*
 472         * during growfs operations, the perag is not fully initialised,
 473         * so we can't use it for any useful checking. growfs ensures we can't
 474         * use it by using uncached buffers that don't have the perag attached
 475         * so we can detect and avoid this problem.
 476         */
 477        if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
 478                return false;
 479
 480        for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
 481                if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
 482                    be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
 483                        return false;
 484        }
 485        return true;
 486}
 487
 488static void
 489xfs_agfl_read_verify(
 490        struct xfs_buf  *bp)
 491{
 492        struct xfs_mount *mp = bp->b_target->bt_mount;
 493
 494        /*
 495         * There is no verification of non-crc AGFLs because mkfs does not
 496         * initialise the AGFL to zero or NULL. Hence the only valid part of the
 497         * AGFL is what the AGF says is active. We can't get to the AGF, so we
 498         * can't verify just those entries are valid.
 499         */
 500        if (!xfs_sb_version_hascrc(&mp->m_sb))
 501                return;
 502
 503        if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
 504                xfs_buf_ioerror(bp, -EFSBADCRC);
 505        else if (!xfs_agfl_verify(bp))
 506                xfs_buf_ioerror(bp, -EFSCORRUPTED);
 507
 508        if (bp->b_error)
 509                xfs_verifier_error(bp);
 510}
 511
 512static void
 513xfs_agfl_write_verify(
 514        struct xfs_buf  *bp)
 515{
 516        struct xfs_mount *mp = bp->b_target->bt_mount;
 517        struct xfs_buf_log_item *bip = bp->b_fspriv;
 518
 519        /* no verification of non-crc AGFLs */
 520        if (!xfs_sb_version_hascrc(&mp->m_sb))
 521                return;
 522
 523        if (!xfs_agfl_verify(bp)) {
 524                xfs_buf_ioerror(bp, -EFSCORRUPTED);
 525                xfs_verifier_error(bp);
 526                return;
 527        }
 528
 529        if (bip)
 530                XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
 531
 532        xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
 533}
 534
 535const struct xfs_buf_ops xfs_agfl_buf_ops = {
 536        .verify_read = xfs_agfl_read_verify,
 537        .verify_write = xfs_agfl_write_verify,
 538};
 539
 540/*
 541 * Read in the allocation group free block array.
 542 */
 543STATIC int                              /* error */
 544xfs_alloc_read_agfl(
 545        xfs_mount_t     *mp,            /* mount point structure */
 546        xfs_trans_t     *tp,            /* transaction pointer */
 547        xfs_agnumber_t  agno,           /* allocation group number */
 548        xfs_buf_t       **bpp)          /* buffer for the ag free block array */
 549{
 550        xfs_buf_t       *bp;            /* return value */
 551        int             error;
 552
 553        ASSERT(agno != NULLAGNUMBER);
 554        error = xfs_trans_read_buf(
 555                        mp, tp, mp->m_ddev_targp,
 556                        XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
 557                        XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
 558        if (error)
 559                return error;
 560        xfs_buf_set_ref(bp, XFS_AGFL_REF);
 561        *bpp = bp;
 562        return 0;
 563}
 564
 565STATIC int
 566xfs_alloc_update_counters(
 567        struct xfs_trans        *tp,
 568        struct xfs_perag        *pag,
 569        struct xfs_buf          *agbp,
 570        long                    len)
 571{
 572        struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
 573
 574        pag->pagf_freeblks += len;
 575        be32_add_cpu(&agf->agf_freeblks, len);
 576
 577        xfs_trans_agblocks_delta(tp, len);
 578        if (unlikely(be32_to_cpu(agf->agf_freeblks) >
 579                     be32_to_cpu(agf->agf_length)))
 580                return -EFSCORRUPTED;
 581
 582        xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
 583        return 0;
 584}
 585
 586/*
 587 * Allocation group level functions.
 588 */
 589
 590/*
 591 * Allocate a variable extent in the allocation group agno.
 592 * Type and bno are used to determine where in the allocation group the
 593 * extent will start.
 594 * Extent's length (returned in *len) will be between minlen and maxlen,
 595 * and of the form k * prod + mod unless there's nothing that large.
 596 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 597 */
 598STATIC int                      /* error */
 599xfs_alloc_ag_vextent(
 600        xfs_alloc_arg_t *args)  /* argument structure for allocation */
 601{
 602        int             error=0;
 603
 604        ASSERT(args->minlen > 0);
 605        ASSERT(args->maxlen > 0);
 606        ASSERT(args->minlen <= args->maxlen);
 607        ASSERT(args->mod < args->prod);
 608        ASSERT(args->alignment > 0);
 609        /*
 610         * Branch to correct routine based on the type.
 611         */
 612        args->wasfromfl = 0;
 613        switch (args->type) {
 614        case XFS_ALLOCTYPE_THIS_AG:
 615                error = xfs_alloc_ag_vextent_size(args);
 616                break;
 617        case XFS_ALLOCTYPE_NEAR_BNO:
 618                error = xfs_alloc_ag_vextent_near(args);
 619                break;
 620        case XFS_ALLOCTYPE_THIS_BNO:
 621                error = xfs_alloc_ag_vextent_exact(args);
 622                break;
 623        default:
 624                ASSERT(0);
 625                /* NOTREACHED */
 626        }
 627
 628        if (error || args->agbno == NULLAGBLOCK)
 629                return error;
 630
 631        ASSERT(args->len >= args->minlen);
 632        ASSERT(args->len <= args->maxlen);
 633        ASSERT(!args->wasfromfl || !args->isfl);
 634        ASSERT(args->agbno % args->alignment == 0);
 635
 636        if (!args->wasfromfl) {
 637                error = xfs_alloc_update_counters(args->tp, args->pag,
 638                                                  args->agbp,
 639                                                  -((long)(args->len)));
 640                if (error)
 641                        return error;
 642
 643                ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
 644                                              args->agbno, args->len));
 645        }
 646
 647        if (!args->isfl) {
 648                xfs_trans_mod_sb(args->tp, args->wasdel ?
 649                                 XFS_TRANS_SB_RES_FDBLOCKS :
 650                                 XFS_TRANS_SB_FDBLOCKS,
 651                                 -((long)(args->len)));
 652        }
 653
 654        XFS_STATS_INC(xs_allocx);
 655        XFS_STATS_ADD(xs_allocb, args->len);
 656        return error;
 657}
 658
 659/*
 660 * Allocate a variable extent at exactly agno/bno.
 661 * Extent's length (returned in *len) will be between minlen and maxlen,
 662 * and of the form k * prod + mod unless there's nothing that large.
 663 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
 664 */
 665STATIC int                      /* error */
 666xfs_alloc_ag_vextent_exact(
 667        xfs_alloc_arg_t *args)  /* allocation argument structure */
 668{
 669        xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
 670        xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
 671        int             error;
 672        xfs_agblock_t   fbno;   /* start block of found extent */
 673        xfs_extlen_t    flen;   /* length of found extent */
 674        xfs_agblock_t   tbno;   /* start block of trimmed extent */
 675        xfs_extlen_t    tlen;   /* length of trimmed extent */
 676        xfs_agblock_t   tend;   /* end block of trimmed extent */
 677        int             i;      /* success/failure of operation */
 678
 679        ASSERT(args->alignment == 1);
 680
 681        /*
 682         * Allocate/initialize a cursor for the by-number freespace btree.
 683         */
 684        bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 685                                          args->agno, XFS_BTNUM_BNO);
 686
 687        /*
 688         * Lookup bno and minlen in the btree (minlen is irrelevant, really).
 689         * Look for the closest free block <= bno, it must contain bno
 690         * if any free block does.
 691         */
 692        error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
 693        if (error)
 694                goto error0;
 695        if (!i)
 696                goto not_found;
 697
 698        /*
 699         * Grab the freespace record.
 700         */
 701        error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
 702        if (error)
 703                goto error0;
 704        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 705        ASSERT(fbno <= args->agbno);
 706
 707        /*
 708         * Check for overlapping busy extents.
 709         */
 710        xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
 711
 712        /*
 713         * Give up if the start of the extent is busy, or the freespace isn't
 714         * long enough for the minimum request.
 715         */
 716        if (tbno > args->agbno)
 717                goto not_found;
 718        if (tlen < args->minlen)
 719                goto not_found;
 720        tend = tbno + tlen;
 721        if (tend < args->agbno + args->minlen)
 722                goto not_found;
 723
 724        /*
 725         * End of extent will be smaller of the freespace end and the
 726         * maximal requested end.
 727         *
 728         * Fix the length according to mod and prod if given.
 729         */
 730        args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
 731                                                - args->agbno;
 732        xfs_alloc_fix_len(args);
 733        if (!xfs_alloc_fix_minleft(args))
 734                goto not_found;
 735
 736        ASSERT(args->agbno + args->len <= tend);
 737
 738        /*
 739         * We are allocating agbno for args->len
 740         * Allocate/initialize a cursor for the by-size btree.
 741         */
 742        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 743                args->agno, XFS_BTNUM_CNT);
 744        ASSERT(args->agbno + args->len <=
 745                be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
 746        error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
 747                                      args->len, XFSA_FIXUP_BNO_OK);
 748        if (error) {
 749                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
 750                goto error0;
 751        }
 752
 753        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 754        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 755
 756        args->wasfromfl = 0;
 757        trace_xfs_alloc_exact_done(args);
 758        return 0;
 759
 760not_found:
 761        /* Didn't find it, return null. */
 762        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 763        args->agbno = NULLAGBLOCK;
 764        trace_xfs_alloc_exact_notfound(args);
 765        return 0;
 766
 767error0:
 768        xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
 769        trace_xfs_alloc_exact_error(args);
 770        return error;
 771}
 772
 773/*
 774 * Search the btree in a given direction via the search cursor and compare
 775 * the records found against the good extent we've already found.
 776 */
 777STATIC int
 778xfs_alloc_find_best_extent(
 779        struct xfs_alloc_arg    *args,  /* allocation argument structure */
 780        struct xfs_btree_cur    **gcur, /* good cursor */
 781        struct xfs_btree_cur    **scur, /* searching cursor */
 782        xfs_agblock_t           gdiff,  /* difference for search comparison */
 783        xfs_agblock_t           *sbno,  /* extent found by search */
 784        xfs_extlen_t            *slen,  /* extent length */
 785        xfs_agblock_t           *sbnoa, /* aligned extent found by search */
 786        xfs_extlen_t            *slena, /* aligned extent length */
 787        int                     dir)    /* 0 = search right, 1 = search left */
 788{
 789        xfs_agblock_t           new;
 790        xfs_agblock_t           sdiff;
 791        int                     error;
 792        int                     i;
 793
 794        /* The good extent is perfect, no need to  search. */
 795        if (!gdiff)
 796                goto out_use_good;
 797
 798        /*
 799         * Look until we find a better one, run out of space or run off the end.
 800         */
 801        do {
 802                error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
 803                if (error)
 804                        goto error0;
 805                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 806                xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
 807
 808                /*
 809                 * The good extent is closer than this one.
 810                 */
 811                if (!dir) {
 812                        if (*sbnoa > args->max_agbno)
 813                                goto out_use_good;
 814                        if (*sbnoa >= args->agbno + gdiff)
 815                                goto out_use_good;
 816                } else {
 817                        if (*sbnoa < args->min_agbno)
 818                                goto out_use_good;
 819                        if (*sbnoa <= args->agbno - gdiff)
 820                                goto out_use_good;
 821                }
 822
 823                /*
 824                 * Same distance, compare length and pick the best.
 825                 */
 826                if (*slena >= args->minlen) {
 827                        args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
 828                        xfs_alloc_fix_len(args);
 829
 830                        sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
 831                                                       args->alignment,
 832                                                       args->userdata, *sbnoa,
 833                                                       *slena, &new);
 834
 835                        /*
 836                         * Choose closer size and invalidate other cursor.
 837                         */
 838                        if (sdiff < gdiff)
 839                                goto out_use_search;
 840                        goto out_use_good;
 841                }
 842
 843                if (!dir)
 844                        error = xfs_btree_increment(*scur, 0, &i);
 845                else
 846                        error = xfs_btree_decrement(*scur, 0, &i);
 847                if (error)
 848                        goto error0;
 849        } while (i);
 850
 851out_use_good:
 852        xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
 853        *scur = NULL;
 854        return 0;
 855
 856out_use_search:
 857        xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
 858        *gcur = NULL;
 859        return 0;
 860
 861error0:
 862        /* caller invalidates cursors */
 863        return error;
 864}
 865
 866/*
 867 * Allocate a variable extent near bno in the allocation group agno.
 868 * Extent's length (returned in len) will be between minlen and maxlen,
 869 * and of the form k * prod + mod unless there's nothing that large.
 870 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 871 */
 872STATIC int                              /* error */
 873xfs_alloc_ag_vextent_near(
 874        xfs_alloc_arg_t *args)          /* allocation argument structure */
 875{
 876        xfs_btree_cur_t *bno_cur_gt;    /* cursor for bno btree, right side */
 877        xfs_btree_cur_t *bno_cur_lt;    /* cursor for bno btree, left side */
 878        xfs_btree_cur_t *cnt_cur;       /* cursor for count btree */
 879        xfs_agblock_t   gtbno;          /* start bno of right side entry */
 880        xfs_agblock_t   gtbnoa;         /* aligned ... */
 881        xfs_extlen_t    gtdiff;         /* difference to right side entry */
 882        xfs_extlen_t    gtlen;          /* length of right side entry */
 883        xfs_extlen_t    gtlena;         /* aligned ... */
 884        xfs_agblock_t   gtnew;          /* useful start bno of right side */
 885        int             error;          /* error code */
 886        int             i;              /* result code, temporary */
 887        int             j;              /* result code, temporary */
 888        xfs_agblock_t   ltbno;          /* start bno of left side entry */
 889        xfs_agblock_t   ltbnoa;         /* aligned ... */
 890        xfs_extlen_t    ltdiff;         /* difference to left side entry */
 891        xfs_extlen_t    ltlen;          /* length of left side entry */
 892        xfs_extlen_t    ltlena;         /* aligned ... */
 893        xfs_agblock_t   ltnew;          /* useful start bno of left side */
 894        xfs_extlen_t    rlen;           /* length of returned extent */
 895        int             forced = 0;
 896#ifdef DEBUG
 897        /*
 898         * Randomly don't execute the first algorithm.
 899         */
 900        int             dofirst;        /* set to do first algorithm */
 901
 902        dofirst = prandom_u32() & 1;
 903#endif
 904
 905        /* handle unitialized agbno range so caller doesn't have to */
 906        if (!args->min_agbno && !args->max_agbno)
 907                args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
 908        ASSERT(args->min_agbno <= args->max_agbno);
 909
 910        /* clamp agbno to the range if it's outside */
 911        if (args->agbno < args->min_agbno)
 912                args->agbno = args->min_agbno;
 913        if (args->agbno > args->max_agbno)
 914                args->agbno = args->max_agbno;
 915
 916restart:
 917        bno_cur_lt = NULL;
 918        bno_cur_gt = NULL;
 919        ltlen = 0;
 920        gtlena = 0;
 921        ltlena = 0;
 922
 923        /*
 924         * Get a cursor for the by-size btree.
 925         */
 926        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 927                args->agno, XFS_BTNUM_CNT);
 928
 929        /*
 930         * See if there are any free extents as big as maxlen.
 931         */
 932        if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
 933                goto error0;
 934        /*
 935         * If none, then pick up the last entry in the tree unless the
 936         * tree is empty.
 937         */
 938        if (!i) {
 939                if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
 940                                &ltlen, &i)))
 941                        goto error0;
 942                if (i == 0 || ltlen == 0) {
 943                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 944                        trace_xfs_alloc_near_noentry(args);
 945                        return 0;
 946                }
 947                ASSERT(i == 1);
 948        }
 949        args->wasfromfl = 0;
 950
 951        /*
 952         * First algorithm.
 953         * If the requested extent is large wrt the freespaces available
 954         * in this a.g., then the cursor will be pointing to a btree entry
 955         * near the right edge of the tree.  If it's in the last btree leaf
 956         * block, then we just examine all the entries in that block
 957         * that are big enough, and pick the best one.
 958         * This is written as a while loop so we can break out of it,
 959         * but we never loop back to the top.
 960         */
 961        while (xfs_btree_islastblock(cnt_cur, 0)) {
 962                xfs_extlen_t    bdiff;
 963                int             besti=0;
 964                xfs_extlen_t    blen=0;
 965                xfs_agblock_t   bnew=0;
 966
 967#ifdef DEBUG
 968                if (dofirst)
 969                        break;
 970#endif
 971                /*
 972                 * Start from the entry that lookup found, sequence through
 973                 * all larger free blocks.  If we're actually pointing at a
 974                 * record smaller than maxlen, go to the start of this block,
 975                 * and skip all those smaller than minlen.
 976                 */
 977                if (ltlen || args->alignment > 1) {
 978                        cnt_cur->bc_ptrs[0] = 1;
 979                        do {
 980                                if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
 981                                                &ltlen, &i)))
 982                                        goto error0;
 983                                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 984                                if (ltlen >= args->minlen)
 985                                        break;
 986                                if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
 987                                        goto error0;
 988                        } while (i);
 989                        ASSERT(ltlen >= args->minlen);
 990                        if (!i)
 991                                break;
 992                }
 993                i = cnt_cur->bc_ptrs[0];
 994                for (j = 1, blen = 0, bdiff = 0;
 995                     !error && j && (blen < args->maxlen || bdiff > 0);
 996                     error = xfs_btree_increment(cnt_cur, 0, &j)) {
 997                        /*
 998                         * For each entry, decide if it's better than
 999                         * the previous best entry.
1000                         */
1001                        if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1002                                goto error0;
1003                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1004                        xfs_alloc_compute_aligned(args, ltbno, ltlen,
1005                                                  &ltbnoa, &ltlena);
1006                        if (ltlena < args->minlen)
1007                                continue;
1008                        if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
1009                                continue;
1010                        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1011                        xfs_alloc_fix_len(args);
1012                        ASSERT(args->len >= args->minlen);
1013                        if (args->len < blen)
1014                                continue;
1015                        ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1016                                args->alignment, args->userdata, ltbnoa,
1017                                ltlena, &ltnew);
1018                        if (ltnew != NULLAGBLOCK &&
1019                            (args->len > blen || ltdiff < bdiff)) {
1020                                bdiff = ltdiff;
1021                                bnew = ltnew;
1022                                blen = args->len;
1023                                besti = cnt_cur->bc_ptrs[0];
1024                        }
1025                }
1026                /*
1027                 * It didn't work.  We COULD be in a case where
1028                 * there's a good record somewhere, so try again.
1029                 */
1030                if (blen == 0)
1031                        break;
1032                /*
1033                 * Point at the best entry, and retrieve it again.
1034                 */
1035                cnt_cur->bc_ptrs[0] = besti;
1036                if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1037                        goto error0;
1038                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1039                ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1040                args->len = blen;
1041                if (!xfs_alloc_fix_minleft(args)) {
1042                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1043                        trace_xfs_alloc_near_nominleft(args);
1044                        return 0;
1045                }
1046                blen = args->len;
1047                /*
1048                 * We are allocating starting at bnew for blen blocks.
1049                 */
1050                args->agbno = bnew;
1051                ASSERT(bnew >= ltbno);
1052                ASSERT(bnew + blen <= ltbno + ltlen);
1053                /*
1054                 * Set up a cursor for the by-bno tree.
1055                 */
1056                bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
1057                        args->agbp, args->agno, XFS_BTNUM_BNO);
1058                /*
1059                 * Fix up the btree entries.
1060                 */
1061                if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
1062                                ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
1063                        goto error0;
1064                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1065                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1066
1067                trace_xfs_alloc_near_first(args);
1068                return 0;
1069        }
1070        /*
1071         * Second algorithm.
1072         * Search in the by-bno tree to the left and to the right
1073         * simultaneously, until in each case we find a space big enough,
1074         * or run into the edge of the tree.  When we run into the edge,
1075         * we deallocate that cursor.
1076         * If both searches succeed, we compare the two spaces and pick
1077         * the better one.
1078         * With alignment, it's possible for both to fail; the upper
1079         * level algorithm that picks allocation groups for allocations
1080         * is not supposed to do this.
1081         */
1082        /*
1083         * Allocate and initialize the cursor for the leftward search.
1084         */
1085        bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1086                args->agno, XFS_BTNUM_BNO);
1087        /*
1088         * Lookup <= bno to find the leftward search's starting point.
1089         */
1090        if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
1091                goto error0;
1092        if (!i) {
1093                /*
1094                 * Didn't find anything; use this cursor for the rightward
1095                 * search.
1096                 */
1097                bno_cur_gt = bno_cur_lt;
1098                bno_cur_lt = NULL;
1099        }
1100        /*
1101         * Found something.  Duplicate the cursor for the rightward search.
1102         */
1103        else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
1104                goto error0;
1105        /*
1106         * Increment the cursor, so we will point at the entry just right
1107         * of the leftward entry if any, or to the leftmost entry.
1108         */
1109        if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1110                goto error0;
1111        if (!i) {
1112                /*
1113                 * It failed, there are no rightward entries.
1114                 */
1115                xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
1116                bno_cur_gt = NULL;
1117        }
1118        /*
1119         * Loop going left with the leftward cursor, right with the
1120         * rightward cursor, until either both directions give up or
1121         * we find an entry at least as big as minlen.
1122         */
1123        do {
1124                if (bno_cur_lt) {
1125                        if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
1126                                goto error0;
1127                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1128                        xfs_alloc_compute_aligned(args, ltbno, ltlen,
1129                                                  &ltbnoa, &ltlena);
1130                        if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
1131                                break;
1132                        if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
1133                                goto error0;
1134                        if (!i || ltbnoa < args->min_agbno) {
1135                                xfs_btree_del_cursor(bno_cur_lt,
1136                                                     XFS_BTREE_NOERROR);
1137                                bno_cur_lt = NULL;
1138                        }
1139                }
1140                if (bno_cur_gt) {
1141                        if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1142                                goto error0;
1143                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1144                        xfs_alloc_compute_aligned(args, gtbno, gtlen,
1145                                                  &gtbnoa, &gtlena);
1146                        if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
1147                                break;
1148                        if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1149                                goto error0;
1150                        if (!i || gtbnoa > args->max_agbno) {
1151                                xfs_btree_del_cursor(bno_cur_gt,
1152                                                     XFS_BTREE_NOERROR);
1153                                bno_cur_gt = NULL;
1154                        }
1155                }
1156        } while (bno_cur_lt || bno_cur_gt);
1157
1158        /*
1159         * Got both cursors still active, need to find better entry.
1160         */
1161        if (bno_cur_lt && bno_cur_gt) {
1162                if (ltlena >= args->minlen) {
1163                        /*
1164                         * Left side is good, look for a right side entry.
1165                         */
1166                        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1167                        xfs_alloc_fix_len(args);
1168                        ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1169                                args->alignment, args->userdata, ltbnoa,
1170                                ltlena, &ltnew);
1171
1172                        error = xfs_alloc_find_best_extent(args,
1173                                                &bno_cur_lt, &bno_cur_gt,
1174                                                ltdiff, &gtbno, &gtlen,
1175                                                &gtbnoa, &gtlena,
1176                                                0 /* search right */);
1177                } else {
1178                        ASSERT(gtlena >= args->minlen);
1179
1180                        /*
1181                         * Right side is good, look for a left side entry.
1182                         */
1183                        args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1184                        xfs_alloc_fix_len(args);
1185                        gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1186                                args->alignment, args->userdata, gtbnoa,
1187                                gtlena, &gtnew);
1188
1189                        error = xfs_alloc_find_best_extent(args,
1190                                                &bno_cur_gt, &bno_cur_lt,
1191                                                gtdiff, &ltbno, &ltlen,
1192                                                &ltbnoa, &ltlena,
1193                                                1 /* search left */);
1194                }
1195
1196                if (error)
1197                        goto error0;
1198        }
1199
1200        /*
1201         * If we couldn't get anything, give up.
1202         */
1203        if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1204                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1205
1206                if (!forced++) {
1207                        trace_xfs_alloc_near_busy(args);
1208                        xfs_log_force(args->mp, XFS_LOG_SYNC);
1209                        goto restart;
1210                }
1211                trace_xfs_alloc_size_neither(args);
1212                args->agbno = NULLAGBLOCK;
1213                return 0;
1214        }
1215
1216        /*
1217         * At this point we have selected a freespace entry, either to the
1218         * left or to the right.  If it's on the right, copy all the
1219         * useful variables to the "left" set so we only have one
1220         * copy of this code.
1221         */
1222        if (bno_cur_gt) {
1223                bno_cur_lt = bno_cur_gt;
1224                bno_cur_gt = NULL;
1225                ltbno = gtbno;
1226                ltbnoa = gtbnoa;
1227                ltlen = gtlen;
1228                ltlena = gtlena;
1229                j = 1;
1230        } else
1231                j = 0;
1232
1233        /*
1234         * Fix up the length and compute the useful address.
1235         */
1236        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1237        xfs_alloc_fix_len(args);
1238        if (!xfs_alloc_fix_minleft(args)) {
1239                trace_xfs_alloc_near_nominleft(args);
1240                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1241                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1242                return 0;
1243        }
1244        rlen = args->len;
1245        (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
1246                                     args->userdata, ltbnoa, ltlena, &ltnew);
1247        ASSERT(ltnew >= ltbno);
1248        ASSERT(ltnew + rlen <= ltbnoa + ltlena);
1249        ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1250        ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
1251        args->agbno = ltnew;
1252
1253        if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1254                        ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1255                goto error0;
1256
1257        if (j)
1258                trace_xfs_alloc_near_greater(args);
1259        else
1260                trace_xfs_alloc_near_lesser(args);
1261
1262        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1263        xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1264        return 0;
1265
1266 error0:
1267        trace_xfs_alloc_near_error(args);
1268        if (cnt_cur != NULL)
1269                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1270        if (bno_cur_lt != NULL)
1271                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1272        if (bno_cur_gt != NULL)
1273                xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1274        return error;
1275}
1276
1277/*
1278 * Allocate a variable extent anywhere in the allocation group agno.
1279 * Extent's length (returned in len) will be between minlen and maxlen,
1280 * and of the form k * prod + mod unless there's nothing that large.
1281 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1282 */
1283STATIC int                              /* error */
1284xfs_alloc_ag_vextent_size(
1285        xfs_alloc_arg_t *args)          /* allocation argument structure */
1286{
1287        xfs_btree_cur_t *bno_cur;       /* cursor for bno btree */
1288        xfs_btree_cur_t *cnt_cur;       /* cursor for cnt btree */
1289        int             error;          /* error result */
1290        xfs_agblock_t   fbno;           /* start of found freespace */
1291        xfs_extlen_t    flen;           /* length of found freespace */
1292        int             i;              /* temp status variable */
1293        xfs_agblock_t   rbno;           /* returned block number */
1294        xfs_extlen_t    rlen;           /* length of returned extent */
1295        int             forced = 0;
1296
1297restart:
1298        /*
1299         * Allocate and initialize a cursor for the by-size btree.
1300         */
1301        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1302                args->agno, XFS_BTNUM_CNT);
1303        bno_cur = NULL;
1304
1305        /*
1306         * Look for an entry >= maxlen+alignment-1 blocks.
1307         */
1308        if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1309                        args->maxlen + args->alignment - 1, &i)))
1310                goto error0;
1311
1312        /*
1313         * If none or we have busy extents that we cannot allocate from, then
1314         * we have to settle for a smaller extent. In the case that there are
1315         * no large extents, this will return the last entry in the tree unless
1316         * the tree is empty. In the case that there are only busy large
1317         * extents, this will return the largest small extent unless there
1318         * are no smaller extents available.
1319         */
1320        if (!i || forced > 1) {
1321                error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1322                                                   &fbno, &flen, &i);
1323                if (error)
1324                        goto error0;
1325                if (i == 0 || flen == 0) {
1326                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1327                        trace_xfs_alloc_size_noentry(args);
1328                        return 0;
1329                }
1330                ASSERT(i == 1);
1331                xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1332        } else {
1333                /*
1334                 * Search for a non-busy extent that is large enough.
1335                 * If we are at low space, don't check, or if we fall of
1336                 * the end of the btree, turn off the busy check and
1337                 * restart.
1338                 */
1339                for (;;) {
1340                        error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1341                        if (error)
1342                                goto error0;
1343                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1344
1345                        xfs_alloc_compute_aligned(args, fbno, flen,
1346                                                  &rbno, &rlen);
1347
1348                        if (rlen >= args->maxlen)
1349                                break;
1350
1351                        error = xfs_btree_increment(cnt_cur, 0, &i);
1352                        if (error)
1353                                goto error0;
1354                        if (i == 0) {
1355                                /*
1356                                 * Our only valid extents must have been busy.
1357                                 * Make it unbusy by forcing the log out and
1358                                 * retrying. If we've been here before, forcing
1359                                 * the log isn't making the extents available,
1360                                 * which means they have probably been freed in
1361                                 * this transaction.  In that case, we have to
1362                                 * give up on them and we'll attempt a minlen
1363                                 * allocation the next time around.
1364                                 */
1365                                xfs_btree_del_cursor(cnt_cur,
1366                                                     XFS_BTREE_NOERROR);
1367                                trace_xfs_alloc_size_busy(args);
1368                                if (!forced++)
1369                                        xfs_log_force(args->mp, XFS_LOG_SYNC);
1370                                goto restart;
1371                        }
1372                }
1373        }
1374
1375        /*
1376         * In the first case above, we got the last entry in the
1377         * by-size btree.  Now we check to see if the space hits maxlen
1378         * once aligned; if not, we search left for something better.
1379         * This can't happen in the second case above.
1380         */
1381        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1382        XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1383                        (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1384        if (rlen < args->maxlen) {
1385                xfs_agblock_t   bestfbno;
1386                xfs_extlen_t    bestflen;
1387                xfs_agblock_t   bestrbno;
1388                xfs_extlen_t    bestrlen;
1389
1390                bestrlen = rlen;
1391                bestrbno = rbno;
1392                bestflen = flen;
1393                bestfbno = fbno;
1394                for (;;) {
1395                        if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1396                                goto error0;
1397                        if (i == 0)
1398                                break;
1399                        if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1400                                        &i)))
1401                                goto error0;
1402                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1403                        if (flen < bestrlen)
1404                                break;
1405                        xfs_alloc_compute_aligned(args, fbno, flen,
1406                                                  &rbno, &rlen);
1407                        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1408                        XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1409                                (rlen <= flen && rbno + rlen <= fbno + flen),
1410                                error0);
1411                        if (rlen > bestrlen) {
1412                                bestrlen = rlen;
1413                                bestrbno = rbno;
1414                                bestflen = flen;
1415                                bestfbno = fbno;
1416                                if (rlen == args->maxlen)
1417                                        break;
1418                        }
1419                }
1420                if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1421                                &i)))
1422                        goto error0;
1423                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1424                rlen = bestrlen;
1425                rbno = bestrbno;
1426                flen = bestflen;
1427                fbno = bestfbno;
1428        }
1429        args->wasfromfl = 0;
1430        /*
1431         * Fix up the length.
1432         */
1433        args->len = rlen;
1434        if (rlen < args->minlen) {
1435                if (!forced++) {
1436                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1437                        trace_xfs_alloc_size_busy(args);
1438                        xfs_log_force(args->mp, XFS_LOG_SYNC);
1439                        goto restart;
1440                }
1441                goto out_nominleft;
1442        }
1443        xfs_alloc_fix_len(args);
1444
1445        if (!xfs_alloc_fix_minleft(args))
1446                goto out_nominleft;
1447        rlen = args->len;
1448        XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
1449        /*
1450         * Allocate and initialize a cursor for the by-block tree.
1451         */
1452        bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1453                args->agno, XFS_BTNUM_BNO);
1454        if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1455                        rbno, rlen, XFSA_FIXUP_CNT_OK)))
1456                goto error0;
1457        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1458        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1459        cnt_cur = bno_cur = NULL;
1460        args->len = rlen;
1461        args->agbno = rbno;
1462        XFS_WANT_CORRUPTED_GOTO(args->mp,
1463                args->agbno + args->len <=
1464                        be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1465                error0);
1466        trace_xfs_alloc_size_done(args);
1467        return 0;
1468
1469error0:
1470        trace_xfs_alloc_size_error(args);
1471        if (cnt_cur)
1472                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1473        if (bno_cur)
1474                xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1475        return error;
1476
1477out_nominleft:
1478        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1479        trace_xfs_alloc_size_nominleft(args);
1480        args->agbno = NULLAGBLOCK;
1481        return 0;
1482}
1483
1484/*
1485 * Deal with the case where only small freespaces remain.
1486 * Either return the contents of the last freespace record,
1487 * or allocate space from the freelist if there is nothing in the tree.
1488 */
1489STATIC int                      /* error */
1490xfs_alloc_ag_vextent_small(
1491        xfs_alloc_arg_t *args,  /* allocation argument structure */
1492        xfs_btree_cur_t *ccur,  /* by-size cursor */
1493        xfs_agblock_t   *fbnop, /* result block number */
1494        xfs_extlen_t    *flenp, /* result length */
1495        int             *stat)  /* status: 0-freelist, 1-normal/none */
1496{
1497        int             error;
1498        xfs_agblock_t   fbno;
1499        xfs_extlen_t    flen;
1500        int             i;
1501
1502        if ((error = xfs_btree_decrement(ccur, 0, &i)))
1503                goto error0;
1504        if (i) {
1505                if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
1506                        goto error0;
1507                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1508        }
1509        /*
1510         * Nothing in the btree, try the freelist.  Make sure
1511         * to respect minleft even when pulling from the
1512         * freelist.
1513         */
1514        else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
1515                 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1516                  > args->minleft)) {
1517                error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1518                if (error)
1519                        goto error0;
1520                if (fbno != NULLAGBLOCK) {
1521                        xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
1522                                             args->userdata);
1523
1524                        if (args->userdata) {
1525                                xfs_buf_t       *bp;
1526
1527                                bp = xfs_btree_get_bufs(args->mp, args->tp,
1528                                        args->agno, fbno, 0);
1529                                xfs_trans_binval(args->tp, bp);
1530                        }
1531                        args->len = 1;
1532                        args->agbno = fbno;
1533                        XFS_WANT_CORRUPTED_GOTO(args->mp,
1534                                args->agbno + args->len <=
1535                                be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1536                                error0);
1537                        args->wasfromfl = 1;
1538                        trace_xfs_alloc_small_freelist(args);
1539                        *stat = 0;
1540                        return 0;
1541                }
1542                /*
1543                 * Nothing in the freelist.
1544                 */
1545                else
1546                        flen = 0;
1547        }
1548        /*
1549         * Can't allocate from the freelist for some reason.
1550         */
1551        else {
1552                fbno = NULLAGBLOCK;
1553                flen = 0;
1554        }
1555        /*
1556         * Can't do the allocation, give up.
1557         */
1558        if (flen < args->minlen) {
1559                args->agbno = NULLAGBLOCK;
1560                trace_xfs_alloc_small_notenough(args);
1561                flen = 0;
1562        }
1563        *fbnop = fbno;
1564        *flenp = flen;
1565        *stat = 1;
1566        trace_xfs_alloc_small_done(args);
1567        return 0;
1568
1569error0:
1570        trace_xfs_alloc_small_error(args);
1571        return error;
1572}
1573
1574/*
1575 * Free the extent starting at agno/bno for length.
1576 */
1577STATIC int                      /* error */
1578xfs_free_ag_extent(
1579        xfs_trans_t     *tp,    /* transaction pointer */
1580        xfs_buf_t       *agbp,  /* buffer for a.g. freelist header */
1581        xfs_agnumber_t  agno,   /* allocation group number */
1582        xfs_agblock_t   bno,    /* starting block number */
1583        xfs_extlen_t    len,    /* length of extent */
1584        int             isfl)   /* set if is freelist blocks - no sb acctg */
1585{
1586        xfs_btree_cur_t *bno_cur;       /* cursor for by-block btree */
1587        xfs_btree_cur_t *cnt_cur;       /* cursor for by-size btree */
1588        int             error;          /* error return value */
1589        xfs_agblock_t   gtbno;          /* start of right neighbor block */
1590        xfs_extlen_t    gtlen;          /* length of right neighbor block */
1591        int             haveleft;       /* have a left neighbor block */
1592        int             haveright;      /* have a right neighbor block */
1593        int             i;              /* temp, result code */
1594        xfs_agblock_t   ltbno;          /* start of left neighbor block */
1595        xfs_extlen_t    ltlen;          /* length of left neighbor block */
1596        xfs_mount_t     *mp;            /* mount point struct for filesystem */
1597        xfs_agblock_t   nbno;           /* new starting block of freespace */
1598        xfs_extlen_t    nlen;           /* new length of freespace */
1599        xfs_perag_t     *pag;           /* per allocation group data */
1600
1601        mp = tp->t_mountp;
1602        /*
1603         * Allocate and initialize a cursor for the by-block btree.
1604         */
1605        bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1606        cnt_cur = NULL;
1607        /*
1608         * Look for a neighboring block on the left (lower block numbers)
1609         * that is contiguous with this space.
1610         */
1611        if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1612                goto error0;
1613        if (haveleft) {
1614                /*
1615                 * There is a block to our left.
1616                 */
1617                if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1618                        goto error0;
1619                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1620                /*
1621                 * It's not contiguous, though.
1622                 */
1623                if (ltbno + ltlen < bno)
1624                        haveleft = 0;
1625                else {
1626                        /*
1627                         * If this failure happens the request to free this
1628                         * space was invalid, it's (partly) already free.
1629                         * Very bad.
1630                         */
1631                        XFS_WANT_CORRUPTED_GOTO(mp,
1632                                                ltbno + ltlen <= bno, error0);
1633                }
1634        }
1635        /*
1636         * Look for a neighboring block on the right (higher block numbers)
1637         * that is contiguous with this space.
1638         */
1639        if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1640                goto error0;
1641        if (haveright) {
1642                /*
1643                 * There is a block to our right.
1644                 */
1645                if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1646                        goto error0;
1647                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1648                /*
1649                 * It's not contiguous, though.
1650                 */
1651                if (bno + len < gtbno)
1652                        haveright = 0;
1653                else {
1654                        /*
1655                         * If this failure happens the request to free this
1656                         * space was invalid, it's (partly) already free.
1657                         * Very bad.
1658                         */
1659                        XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0);
1660                }
1661        }
1662        /*
1663         * Now allocate and initialize a cursor for the by-size tree.
1664         */
1665        cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1666        /*
1667         * Have both left and right contiguous neighbors.
1668         * Merge all three into a single free block.
1669         */
1670        if (haveleft && haveright) {
1671                /*
1672                 * Delete the old by-size entry on the left.
1673                 */
1674                if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1675                        goto error0;
1676                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1677                if ((error = xfs_btree_delete(cnt_cur, &i)))
1678                        goto error0;
1679                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1680                /*
1681                 * Delete the old by-size entry on the right.
1682                 */
1683                if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1684                        goto error0;
1685                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1686                if ((error = xfs_btree_delete(cnt_cur, &i)))
1687                        goto error0;
1688                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1689                /*
1690                 * Delete the old by-block entry for the right block.
1691                 */
1692                if ((error = xfs_btree_delete(bno_cur, &i)))
1693                        goto error0;
1694                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1695                /*
1696                 * Move the by-block cursor back to the left neighbor.
1697                 */
1698                if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1699                        goto error0;
1700                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1701#ifdef DEBUG
1702                /*
1703                 * Check that this is the right record: delete didn't
1704                 * mangle the cursor.
1705                 */
1706                {
1707                        xfs_agblock_t   xxbno;
1708                        xfs_extlen_t    xxlen;
1709
1710                        if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1711                                        &i)))
1712                                goto error0;
1713                        XFS_WANT_CORRUPTED_GOTO(mp,
1714                                i == 1 && xxbno == ltbno && xxlen == ltlen,
1715                                error0);
1716                }
1717#endif
1718                /*
1719                 * Update remaining by-block entry to the new, joined block.
1720                 */
1721                nbno = ltbno;
1722                nlen = len + ltlen + gtlen;
1723                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1724                        goto error0;
1725        }
1726        /*
1727         * Have only a left contiguous neighbor.
1728         * Merge it together with the new freespace.
1729         */
1730        else if (haveleft) {
1731                /*
1732                 * Delete the old by-size entry on the left.
1733                 */
1734                if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1735                        goto error0;
1736                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1737                if ((error = xfs_btree_delete(cnt_cur, &i)))
1738                        goto error0;
1739                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1740                /*
1741                 * Back up the by-block cursor to the left neighbor, and
1742                 * update its length.
1743                 */
1744                if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1745                        goto error0;
1746                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1747                nbno = ltbno;
1748                nlen = len + ltlen;
1749                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1750                        goto error0;
1751        }
1752        /*
1753         * Have only a right contiguous neighbor.
1754         * Merge it together with the new freespace.
1755         */
1756        else if (haveright) {
1757                /*
1758                 * Delete the old by-size entry on the right.
1759                 */
1760                if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1761                        goto error0;
1762                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1763                if ((error = xfs_btree_delete(cnt_cur, &i)))
1764                        goto error0;
1765                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1766                /*
1767                 * Update the starting block and length of the right
1768                 * neighbor in the by-block tree.
1769                 */
1770                nbno = bno;
1771                nlen = len + gtlen;
1772                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1773                        goto error0;
1774        }
1775        /*
1776         * No contiguous neighbors.
1777         * Insert the new freespace into the by-block tree.
1778         */
1779        else {
1780                nbno = bno;
1781                nlen = len;
1782                if ((error = xfs_btree_insert(bno_cur, &i)))
1783                        goto error0;
1784                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1785        }
1786        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1787        bno_cur = NULL;
1788        /*
1789         * In all cases we need to insert the new freespace in the by-size tree.
1790         */
1791        if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1792                goto error0;
1793        XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0);
1794        if ((error = xfs_btree_insert(cnt_cur, &i)))
1795                goto error0;
1796        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1797        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1798        cnt_cur = NULL;
1799
1800        /*
1801         * Update the freespace totals in the ag and superblock.
1802         */
1803        pag = xfs_perag_get(mp, agno);
1804        error = xfs_alloc_update_counters(tp, pag, agbp, len);
1805        xfs_perag_put(pag);
1806        if (error)
1807                goto error0;
1808
1809        if (!isfl)
1810                xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1811        XFS_STATS_INC(xs_freex);
1812        XFS_STATS_ADD(xs_freeb, len);
1813
1814        trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
1815
1816        return 0;
1817
1818 error0:
1819        trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1);
1820        if (bno_cur)
1821                xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1822        if (cnt_cur)
1823                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1824        return error;
1825}
1826
1827/*
1828 * Visible (exported) allocation/free functions.
1829 * Some of these are used just by xfs_alloc_btree.c and this file.
1830 */
1831
1832/*
1833 * Compute and fill in value of m_ag_maxlevels.
1834 */
1835void
1836xfs_alloc_compute_maxlevels(
1837        xfs_mount_t     *mp)    /* file system mount structure */
1838{
1839        int             level;
1840        uint            maxblocks;
1841        uint            maxleafents;
1842        int             minleafrecs;
1843        int             minnoderecs;
1844
1845        maxleafents = (mp->m_sb.sb_agblocks + 1) / 2;
1846        minleafrecs = mp->m_alloc_mnr[0];
1847        minnoderecs = mp->m_alloc_mnr[1];
1848        maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
1849        for (level = 1; maxblocks > 1; level++)
1850                maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
1851        mp->m_ag_maxlevels = level;
1852}
1853
1854/*
1855 * Find the length of the longest extent in an AG.
1856 */
1857xfs_extlen_t
1858xfs_alloc_longest_free_extent(
1859        struct xfs_mount        *mp,
1860        struct xfs_perag        *pag,
1861        xfs_extlen_t            need)
1862{
1863        xfs_extlen_t            delta = 0;
1864
1865        if (need > pag->pagf_flcount)
1866                delta = need - pag->pagf_flcount;
1867
1868        if (pag->pagf_longest > delta)
1869                return pag->pagf_longest - delta;
1870        return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1871}
1872
1873unsigned int
1874xfs_alloc_min_freelist(
1875        struct xfs_mount        *mp,
1876        struct xfs_perag        *pag)
1877{
1878        unsigned int            min_free;
1879
1880        /* space needed by-bno freespace btree */
1881        min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
1882                                       mp->m_ag_maxlevels);
1883        /* space needed by-size freespace btree */
1884        min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
1885                                       mp->m_ag_maxlevels);
1886
1887        return min_free;
1888}
1889
1890/*
1891 * Check if the operation we are fixing up the freelist for should go ahead or
1892 * not. If we are freeing blocks, we always allow it, otherwise the allocation
1893 * is dependent on whether the size and shape of free space available will
1894 * permit the requested allocation to take place.
1895 */
1896static bool
1897xfs_alloc_space_available(
1898        struct xfs_alloc_arg    *args,
1899        xfs_extlen_t            min_free,
1900        int                     flags)
1901{
1902        struct xfs_perag        *pag = args->pag;
1903        xfs_extlen_t            longest;
1904        int                     available;
1905
1906        if (flags & XFS_ALLOC_FLAG_FREEING)
1907                return true;
1908
1909        /* do we have enough contiguous free space for the allocation? */
1910        longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free);
1911        if ((args->minlen + args->alignment + args->minalignslop - 1) > longest)
1912                return false;
1913
1914        /* do have enough free space remaining for the allocation? */
1915        available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
1916                          min_free - args->total);
1917        if (available < (int)args->minleft)
1918                return false;
1919
1920        return true;
1921}
1922
1923/*
1924 * Decide whether to use this allocation group for this allocation.
1925 * If so, fix up the btree freelist's size.
1926 */
1927STATIC int                      /* error */
1928xfs_alloc_fix_freelist(
1929        struct xfs_alloc_arg    *args,  /* allocation argument structure */
1930        int                     flags)  /* XFS_ALLOC_FLAG_... */
1931{
1932        struct xfs_mount        *mp = args->mp;
1933        struct xfs_perag        *pag = args->pag;
1934        struct xfs_trans        *tp = args->tp;
1935        struct xfs_buf          *agbp = NULL;
1936        struct xfs_buf          *agflbp = NULL;
1937        struct xfs_alloc_arg    targs;  /* local allocation arguments */
1938        xfs_agblock_t           bno;    /* freelist block */
1939        xfs_extlen_t            need;   /* total blocks needed in freelist */
1940        int                     error;
1941
1942        if (!pag->pagf_init) {
1943                error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
1944                if (error)
1945                        goto out_no_agbp;
1946                if (!pag->pagf_init) {
1947                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1948                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1949                        goto out_agbp_relse;
1950                }
1951        }
1952
1953        /*
1954         * If this is a metadata preferred pag and we are user data then try
1955         * somewhere else if we are not being asked to try harder at this
1956         * point
1957         */
1958        if (pag->pagf_metadata && args->userdata &&
1959            (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
1960                ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1961                goto out_agbp_relse;
1962        }
1963
1964        need = xfs_alloc_min_freelist(mp, pag);
1965        if (!xfs_alloc_space_available(args, need, flags))
1966                goto out_agbp_relse;
1967
1968        /*
1969         * Get the a.g. freespace buffer.
1970         * Can fail if we're not blocking on locks, and it's held.
1971         */
1972        if (!agbp) {
1973                error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
1974                if (error)
1975                        goto out_no_agbp;
1976                if (!agbp) {
1977                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1978                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1979                        goto out_no_agbp;
1980                }
1981        }
1982
1983        /* If there isn't enough total space or single-extent, reject it. */
1984        need = xfs_alloc_min_freelist(mp, pag);
1985        if (!xfs_alloc_space_available(args, need, flags))
1986                goto out_agbp_relse;
1987
1988        /*
1989         * Make the freelist shorter if it's too long.
1990         *
1991         * Note that from this point onwards, we will always release the agf and
1992         * agfl buffers on error. This handles the case where we error out and
1993         * the buffers are clean or may not have been joined to the transaction
1994         * and hence need to be released manually. If they have been joined to
1995         * the transaction, then xfs_trans_brelse() will handle them
1996         * appropriately based on the recursion count and dirty state of the
1997         * buffer.
1998         *
1999         * XXX (dgc): When we have lots of free space, does this buy us
2000         * anything other than extra overhead when we need to put more blocks
2001         * back on the free list? Maybe we should only do this when space is
2002         * getting low or the AGFL is more than half full?
2003         */
2004        while (pag->pagf_flcount > need) {
2005                struct xfs_buf  *bp;
2006
2007                error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2008                if (error)
2009                        goto out_agbp_relse;
2010                error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1);
2011                if (error)
2012                        goto out_agbp_relse;
2013                bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
2014                xfs_trans_binval(tp, bp);
2015        }
2016
2017        memset(&targs, 0, sizeof(targs));
2018        targs.tp = tp;
2019        targs.mp = mp;
2020        targs.agbp = agbp;
2021        targs.agno = args->agno;
2022        targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
2023        targs.type = XFS_ALLOCTYPE_THIS_AG;
2024        targs.pag = pag;
2025        error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
2026        if (error)
2027                goto out_agbp_relse;
2028
2029        /* Make the freelist longer if it's too short. */
2030        while (pag->pagf_flcount < need) {
2031                targs.agbno = 0;
2032                targs.maxlen = need - pag->pagf_flcount;
2033
2034                /* Allocate as many blocks as possible at once. */
2035                error = xfs_alloc_ag_vextent(&targs);
2036                if (error)
2037                        goto out_agflbp_relse;
2038
2039                /*
2040                 * Stop if we run out.  Won't happen if callers are obeying
2041                 * the restrictions correctly.  Can happen for free calls
2042                 * on a completely full ag.
2043                 */
2044                if (targs.agbno == NULLAGBLOCK) {
2045                        if (flags & XFS_ALLOC_FLAG_FREEING)
2046                                break;
2047                        goto out_agflbp_relse;
2048                }
2049                /*
2050                 * Put each allocated block on the list.
2051                 */
2052                for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
2053                        error = xfs_alloc_put_freelist(tp, agbp,
2054                                                        agflbp, bno, 0);
2055                        if (error)
2056                                goto out_agflbp_relse;
2057                }
2058        }
2059        xfs_trans_brelse(tp, agflbp);
2060        args->agbp = agbp;
2061        return 0;
2062
2063out_agflbp_relse:
2064        xfs_trans_brelse(tp, agflbp);
2065out_agbp_relse:
2066        if (agbp)
2067                xfs_trans_brelse(tp, agbp);
2068out_no_agbp:
2069        args->agbp = NULL;
2070        return error;
2071}
2072
2073/*
2074 * Get a block from the freelist.
2075 * Returns with the buffer for the block gotten.
2076 */
2077int                             /* error */
2078xfs_alloc_get_freelist(
2079        xfs_trans_t     *tp,    /* transaction pointer */
2080        xfs_buf_t       *agbp,  /* buffer containing the agf structure */
2081        xfs_agblock_t   *bnop,  /* block address retrieved from freelist */
2082        int             btreeblk) /* destination is a AGF btree */
2083{
2084        xfs_agf_t       *agf;   /* a.g. freespace structure */
2085        xfs_buf_t       *agflbp;/* buffer for a.g. freelist structure */
2086        xfs_agblock_t   bno;    /* block number returned */
2087        __be32          *agfl_bno;
2088        int             error;
2089        int             logflags;
2090        xfs_mount_t     *mp = tp->t_mountp;
2091        xfs_perag_t     *pag;   /* per allocation group data */
2092
2093        /*
2094         * Freelist is empty, give up.
2095         */
2096        agf = XFS_BUF_TO_AGF(agbp);
2097        if (!agf->agf_flcount) {
2098                *bnop = NULLAGBLOCK;
2099                return 0;
2100        }
2101        /*
2102         * Read the array of free blocks.
2103         */
2104        error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2105                                    &agflbp);
2106        if (error)
2107                return error;
2108
2109
2110        /*
2111         * Get the block number and update the data structures.
2112         */
2113        agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2114        bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2115        be32_add_cpu(&agf->agf_flfirst, 1);
2116        xfs_trans_brelse(tp, agflbp);
2117        if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
2118                agf->agf_flfirst = 0;
2119
2120        pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2121        be32_add_cpu(&agf->agf_flcount, -1);
2122        xfs_trans_agflist_delta(tp, -1);
2123        pag->pagf_flcount--;
2124        xfs_perag_put(pag);
2125
2126        logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2127        if (btreeblk) {
2128                be32_add_cpu(&agf->agf_btreeblks, 1);
2129                pag->pagf_btreeblks++;
2130                logflags |= XFS_AGF_BTREEBLKS;
2131        }
2132
2133        xfs_alloc_log_agf(tp, agbp, logflags);
2134        *bnop = bno;
2135
2136        return 0;
2137}
2138
2139/*
2140 * Log the given fields from the agf structure.
2141 */
2142void
2143xfs_alloc_log_agf(
2144        xfs_trans_t     *tp,    /* transaction pointer */
2145        xfs_buf_t       *bp,    /* buffer for a.g. freelist header */
2146        int             fields) /* mask of fields to be logged (XFS_AGF_...) */
2147{
2148        int     first;          /* first byte offset */
2149        int     last;           /* last byte offset */
2150        static const short      offsets[] = {
2151                offsetof(xfs_agf_t, agf_magicnum),
2152                offsetof(xfs_agf_t, agf_versionnum),
2153                offsetof(xfs_agf_t, agf_seqno),
2154                offsetof(xfs_agf_t, agf_length),
2155                offsetof(xfs_agf_t, agf_roots[0]),
2156                offsetof(xfs_agf_t, agf_levels[0]),
2157                offsetof(xfs_agf_t, agf_flfirst),
2158                offsetof(xfs_agf_t, agf_fllast),
2159                offsetof(xfs_agf_t, agf_flcount),
2160                offsetof(xfs_agf_t, agf_freeblks),
2161                offsetof(xfs_agf_t, agf_longest),
2162                offsetof(xfs_agf_t, agf_btreeblks),
2163                offsetof(xfs_agf_t, agf_uuid),
2164                sizeof(xfs_agf_t)
2165        };
2166
2167        trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2168
2169        xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
2170
2171        xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2172        xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2173}
2174
2175/*
2176 * Interface for inode allocation to force the pag data to be initialized.
2177 */
2178int                                     /* error */
2179xfs_alloc_pagf_init(
2180        xfs_mount_t             *mp,    /* file system mount structure */
2181        xfs_trans_t             *tp,    /* transaction pointer */
2182        xfs_agnumber_t          agno,   /* allocation group number */
2183        int                     flags)  /* XFS_ALLOC_FLAGS_... */
2184{
2185        xfs_buf_t               *bp;
2186        int                     error;
2187
2188        if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2189                return error;
2190        if (bp)
2191                xfs_trans_brelse(tp, bp);
2192        return 0;
2193}
2194
2195/*
2196 * Put the block on the freelist for the allocation group.
2197 */
2198int                                     /* error */
2199xfs_alloc_put_freelist(
2200        xfs_trans_t             *tp,    /* transaction pointer */
2201        xfs_buf_t               *agbp,  /* buffer for a.g. freelist header */
2202        xfs_buf_t               *agflbp,/* buffer for a.g. free block array */
2203        xfs_agblock_t           bno,    /* block being freed */
2204        int                     btreeblk) /* block came from a AGF btree */
2205{
2206        xfs_agf_t               *agf;   /* a.g. freespace structure */
2207        __be32                  *blockp;/* pointer to array entry */
2208        int                     error;
2209        int                     logflags;
2210        xfs_mount_t             *mp;    /* mount structure */
2211        xfs_perag_t             *pag;   /* per allocation group data */
2212        __be32                  *agfl_bno;
2213        int                     startoff;
2214
2215        agf = XFS_BUF_TO_AGF(agbp);
2216        mp = tp->t_mountp;
2217
2218        if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
2219                        be32_to_cpu(agf->agf_seqno), &agflbp)))
2220                return error;
2221        be32_add_cpu(&agf->agf_fllast, 1);
2222        if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
2223                agf->agf_fllast = 0;
2224
2225        pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2226        be32_add_cpu(&agf->agf_flcount, 1);
2227        xfs_trans_agflist_delta(tp, 1);
2228        pag->pagf_flcount++;
2229
2230        logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2231        if (btreeblk) {
2232                be32_add_cpu(&agf->agf_btreeblks, -1);
2233                pag->pagf_btreeblks--;
2234                logflags |= XFS_AGF_BTREEBLKS;
2235        }
2236        xfs_perag_put(pag);
2237
2238        xfs_alloc_log_agf(tp, agbp, logflags);
2239
2240        ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2241
2242        agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2243        blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
2244        *blockp = cpu_to_be32(bno);
2245        startoff = (char *)blockp - (char *)agflbp->b_addr;
2246
2247        xfs_alloc_log_agf(tp, agbp, logflags);
2248
2249        xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
2250        xfs_trans_log_buf(tp, agflbp, startoff,
2251                          startoff + sizeof(xfs_agblock_t) - 1);
2252        return 0;
2253}
2254
2255static bool
2256xfs_agf_verify(
2257        struct xfs_mount *mp,
2258        struct xfs_buf  *bp)
2259 {
2260        struct xfs_agf  *agf = XFS_BUF_TO_AGF(bp);
2261
2262        if (xfs_sb_version_hascrc(&mp->m_sb) &&
2263            !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_uuid))
2264                        return false;
2265
2266        if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
2267              XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2268              be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2269              be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2270              be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2271              be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
2272                return false;
2273
2274        if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
2275            be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
2276                return false;
2277
2278        /*
2279         * during growfs operations, the perag is not fully initialised,
2280         * so we can't use it for any useful checking. growfs ensures we can't
2281         * use it by using uncached buffers that don't have the perag attached
2282         * so we can detect and avoid this problem.
2283         */
2284        if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
2285                return false;
2286
2287        if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2288            be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
2289                return false;
2290
2291        return true;;
2292
2293}
2294
2295static void
2296xfs_agf_read_verify(
2297        struct xfs_buf  *bp)
2298{
2299        struct xfs_mount *mp = bp->b_target->bt_mount;
2300
2301        if (xfs_sb_version_hascrc(&mp->m_sb) &&
2302            !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
2303                xfs_buf_ioerror(bp, -EFSBADCRC);
2304        else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
2305                                XFS_ERRTAG_ALLOC_READ_AGF,
2306                                XFS_RANDOM_ALLOC_READ_AGF))
2307                xfs_buf_ioerror(bp, -EFSCORRUPTED);
2308
2309        if (bp->b_error)
2310                xfs_verifier_error(bp);
2311}
2312
2313static void
2314xfs_agf_write_verify(
2315        struct xfs_buf  *bp)
2316{
2317        struct xfs_mount *mp = bp->b_target->bt_mount;
2318        struct xfs_buf_log_item *bip = bp->b_fspriv;
2319
2320        if (!xfs_agf_verify(mp, bp)) {
2321                xfs_buf_ioerror(bp, -EFSCORRUPTED);
2322                xfs_verifier_error(bp);
2323                return;
2324        }
2325
2326        if (!xfs_sb_version_hascrc(&mp->m_sb))
2327                return;
2328
2329        if (bip)
2330                XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2331
2332        xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
2333}
2334
2335const struct xfs_buf_ops xfs_agf_buf_ops = {
2336        .verify_read = xfs_agf_read_verify,
2337        .verify_write = xfs_agf_write_verify,
2338};
2339
2340/*
2341 * Read in the allocation group header (free/alloc section).
2342 */
2343int                                     /* error */
2344xfs_read_agf(
2345        struct xfs_mount        *mp,    /* mount point structure */
2346        struct xfs_trans        *tp,    /* transaction pointer */
2347        xfs_agnumber_t          agno,   /* allocation group number */
2348        int                     flags,  /* XFS_BUF_ */
2349        struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
2350{
2351        int             error;
2352
2353        trace_xfs_read_agf(mp, agno);
2354
2355        ASSERT(agno != NULLAGNUMBER);
2356        error = xfs_trans_read_buf(
2357                        mp, tp, mp->m_ddev_targp,
2358                        XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2359                        XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
2360        if (error)
2361                return error;
2362        if (!*bpp)
2363                return 0;
2364
2365        ASSERT(!(*bpp)->b_error);
2366        xfs_buf_set_ref(*bpp, XFS_AGF_REF);
2367        return 0;
2368}
2369
2370/*
2371 * Read in the allocation group header (free/alloc section).
2372 */
2373int                                     /* error */
2374xfs_alloc_read_agf(
2375        struct xfs_mount        *mp,    /* mount point structure */
2376        struct xfs_trans        *tp,    /* transaction pointer */
2377        xfs_agnumber_t          agno,   /* allocation group number */
2378        int                     flags,  /* XFS_ALLOC_FLAG_... */
2379        struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
2380{
2381        struct xfs_agf          *agf;           /* ag freelist header */
2382        struct xfs_perag        *pag;           /* per allocation group data */
2383        int                     error;
2384
2385        trace_xfs_alloc_read_agf(mp, agno);
2386
2387        ASSERT(agno != NULLAGNUMBER);
2388        error = xfs_read_agf(mp, tp, agno,
2389                        (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2390                        bpp);
2391        if (error)
2392                return error;
2393        if (!*bpp)
2394                return 0;
2395        ASSERT(!(*bpp)->b_error);
2396
2397        agf = XFS_BUF_TO_AGF(*bpp);
2398        pag = xfs_perag_get(mp, agno);
2399        if (!pag->pagf_init) {
2400                pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
2401                pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
2402                pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2403                pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2404                pag->pagf_levels[XFS_BTNUM_BNOi] =
2405                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2406                pag->pagf_levels[XFS_BTNUM_CNTi] =
2407                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
2408                spin_lock_init(&pag->pagb_lock);
2409                pag->pagb_count = 0;
2410                pag->pagb_tree = RB_ROOT;
2411                pag->pagf_init = 1;
2412        }
2413#ifdef DEBUG
2414        else if (!XFS_FORCED_SHUTDOWN(mp)) {
2415                ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
2416                ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
2417                ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2418                ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2419                ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
2420                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2421                ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
2422                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2423        }
2424#endif
2425        xfs_perag_put(pag);
2426        return 0;
2427}
2428
2429/*
2430 * Allocate an extent (variable-size).
2431 * Depending on the allocation type, we either look in a single allocation
2432 * group or loop over the allocation groups to find the result.
2433 */
2434int                             /* error */
2435xfs_alloc_vextent(
2436        xfs_alloc_arg_t *args)  /* allocation argument structure */
2437{
2438        xfs_agblock_t   agsize; /* allocation group size */
2439        int             error;
2440        int             flags;  /* XFS_ALLOC_FLAG_... locking flags */
2441        xfs_extlen_t    minleft;/* minimum left value, temp copy */
2442        xfs_mount_t     *mp;    /* mount structure pointer */
2443        xfs_agnumber_t  sagno;  /* starting allocation group number */
2444        xfs_alloctype_t type;   /* input allocation type */
2445        int             bump_rotor = 0;
2446        int             no_min = 0;
2447        xfs_agnumber_t  rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2448
2449        mp = args->mp;
2450        type = args->otype = args->type;
2451        args->agbno = NULLAGBLOCK;
2452        /*
2453         * Just fix this up, for the case where the last a.g. is shorter
2454         * (or there's only one a.g.) and the caller couldn't easily figure
2455         * that out (xfs_bmap_alloc).
2456         */
2457        agsize = mp->m_sb.sb_agblocks;
2458        if (args->maxlen > agsize)
2459                args->maxlen = agsize;
2460        if (args->alignment == 0)
2461                args->alignment = 1;
2462        ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2463        ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2464        ASSERT(args->minlen <= args->maxlen);
2465        ASSERT(args->minlen <= agsize);
2466        ASSERT(args->mod < args->prod);
2467        if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2468            XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2469            args->minlen > args->maxlen || args->minlen > agsize ||
2470            args->mod >= args->prod) {
2471                args->fsbno = NULLFSBLOCK;
2472                trace_xfs_alloc_vextent_badargs(args);
2473                return 0;
2474        }
2475        minleft = args->minleft;
2476
2477        switch (type) {
2478        case XFS_ALLOCTYPE_THIS_AG:
2479        case XFS_ALLOCTYPE_NEAR_BNO:
2480        case XFS_ALLOCTYPE_THIS_BNO:
2481                /*
2482                 * These three force us into a single a.g.
2483                 */
2484                args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2485                args->pag = xfs_perag_get(mp, args->agno);
2486                args->minleft = 0;
2487                error = xfs_alloc_fix_freelist(args, 0);
2488                args->minleft = minleft;
2489                if (error) {
2490                        trace_xfs_alloc_vextent_nofix(args);
2491                        goto error0;
2492                }
2493                if (!args->agbp) {
2494                        trace_xfs_alloc_vextent_noagbp(args);
2495                        break;
2496                }
2497                args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2498                if ((error = xfs_alloc_ag_vextent(args)))
2499                        goto error0;
2500                break;
2501        case XFS_ALLOCTYPE_START_BNO:
2502                /*
2503                 * Try near allocation first, then anywhere-in-ag after
2504                 * the first a.g. fails.
2505                 */
2506                if ((args->userdata  == XFS_ALLOC_INITIAL_USER_DATA) &&
2507                    (mp->m_flags & XFS_MOUNT_32BITINODES)) {
2508                        args->fsbno = XFS_AGB_TO_FSB(mp,
2509                                        ((mp->m_agfrotor / rotorstep) %
2510                                        mp->m_sb.sb_agcount), 0);
2511                        bump_rotor = 1;
2512                }
2513                args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2514                args->type = XFS_ALLOCTYPE_NEAR_BNO;
2515                /* FALLTHROUGH */
2516        case XFS_ALLOCTYPE_ANY_AG:
2517        case XFS_ALLOCTYPE_START_AG:
2518        case XFS_ALLOCTYPE_FIRST_AG:
2519                /*
2520                 * Rotate through the allocation groups looking for a winner.
2521                 */
2522                if (type == XFS_ALLOCTYPE_ANY_AG) {
2523                        /*
2524                         * Start with the last place we left off.
2525                         */
2526                        args->agno = sagno = (mp->m_agfrotor / rotorstep) %
2527                                        mp->m_sb.sb_agcount;
2528                        args->type = XFS_ALLOCTYPE_THIS_AG;
2529                        flags = XFS_ALLOC_FLAG_TRYLOCK;
2530                } else if (type == XFS_ALLOCTYPE_FIRST_AG) {
2531                        /*
2532                         * Start with allocation group given by bno.
2533                         */
2534                        args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2535                        args->type = XFS_ALLOCTYPE_THIS_AG;
2536                        sagno = 0;
2537                        flags = 0;
2538                } else {
2539                        if (type == XFS_ALLOCTYPE_START_AG)
2540                                args->type = XFS_ALLOCTYPE_THIS_AG;
2541                        /*
2542                         * Start with the given allocation group.
2543                         */
2544                        args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2545                        flags = XFS_ALLOC_FLAG_TRYLOCK;
2546                }
2547                /*
2548                 * Loop over allocation groups twice; first time with
2549                 * trylock set, second time without.
2550                 */
2551                for (;;) {
2552                        args->pag = xfs_perag_get(mp, args->agno);
2553                        if (no_min) args->minleft = 0;
2554                        error = xfs_alloc_fix_freelist(args, flags);
2555                        args->minleft = minleft;
2556                        if (error) {
2557                                trace_xfs_alloc_vextent_nofix(args);
2558                                goto error0;
2559                        }
2560                        /*
2561                         * If we get a buffer back then the allocation will fly.
2562                         */
2563                        if (args->agbp) {
2564                                if ((error = xfs_alloc_ag_vextent(args)))
2565                                        goto error0;
2566                                break;
2567                        }
2568
2569                        trace_xfs_alloc_vextent_loopfailed(args);
2570
2571                        /*
2572                         * Didn't work, figure out the next iteration.
2573                         */
2574                        if (args->agno == sagno &&
2575                            type == XFS_ALLOCTYPE_START_BNO)
2576                                args->type = XFS_ALLOCTYPE_THIS_AG;
2577                        /*
2578                        * For the first allocation, we can try any AG to get
2579                        * space.  However, if we already have allocated a
2580                        * block, we don't want to try AGs whose number is below
2581                        * sagno. Otherwise, we may end up with out-of-order
2582                        * locking of AGF, which might cause deadlock.
2583                        */
2584                        if (++(args->agno) == mp->m_sb.sb_agcount) {
2585                                if (args->firstblock != NULLFSBLOCK)
2586                                        args->agno = sagno;
2587                                else
2588                                        args->agno = 0;
2589                        }
2590                        /*
2591                         * Reached the starting a.g., must either be done
2592                         * or switch to non-trylock mode.
2593                         */
2594                        if (args->agno == sagno) {
2595                                if (no_min == 1) {
2596                                        args->agbno = NULLAGBLOCK;
2597                                        trace_xfs_alloc_vextent_allfailed(args);
2598                                        break;
2599                                }
2600                                if (flags == 0) {
2601                                        no_min = 1;
2602                                } else {
2603                                        flags = 0;
2604                                        if (type == XFS_ALLOCTYPE_START_BNO) {
2605                                                args->agbno = XFS_FSB_TO_AGBNO(mp,
2606                                                        args->fsbno);
2607                                                args->type = XFS_ALLOCTYPE_NEAR_BNO;
2608                                        }
2609                                }
2610                        }
2611                        xfs_perag_put(args->pag);
2612                }
2613                if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2614                        if (args->agno == sagno)
2615                                mp->m_agfrotor = (mp->m_agfrotor + 1) %
2616                                        (mp->m_sb.sb_agcount * rotorstep);
2617                        else
2618                                mp->m_agfrotor = (args->agno * rotorstep + 1) %
2619                                        (mp->m_sb.sb_agcount * rotorstep);
2620                }
2621                break;
2622        default:
2623                ASSERT(0);
2624                /* NOTREACHED */
2625        }
2626        if (args->agbno == NULLAGBLOCK)
2627                args->fsbno = NULLFSBLOCK;
2628        else {
2629                args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2630#ifdef DEBUG
2631                ASSERT(args->len >= args->minlen);
2632                ASSERT(args->len <= args->maxlen);
2633                ASSERT(args->agbno % args->alignment == 0);
2634                XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2635                        args->len);
2636#endif
2637        }
2638        xfs_perag_put(args->pag);
2639        return 0;
2640error0:
2641        xfs_perag_put(args->pag);
2642        return error;
2643}
2644
2645/*
2646 * Free an extent.
2647 * Just break up the extent address and hand off to xfs_free_ag_extent
2648 * after fixing up the freelist.
2649 */
2650int                             /* error */
2651xfs_free_extent(
2652        xfs_trans_t     *tp,    /* transaction pointer */
2653        xfs_fsblock_t   bno,    /* starting block number of extent */
2654        xfs_extlen_t    len)    /* length of extent */
2655{
2656        xfs_alloc_arg_t args;
2657        int             error;
2658
2659        ASSERT(len != 0);
2660        memset(&args, 0, sizeof(xfs_alloc_arg_t));
2661        args.tp = tp;
2662        args.mp = tp->t_mountp;
2663
2664        /*
2665         * validate that the block number is legal - the enables us to detect
2666         * and handle a silent filesystem corruption rather than crashing.
2667         */
2668        args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2669        if (args.agno >= args.mp->m_sb.sb_agcount)
2670                return -EFSCORRUPTED;
2671
2672        args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
2673        if (args.agbno >= args.mp->m_sb.sb_agblocks)
2674                return -EFSCORRUPTED;
2675
2676        args.pag = xfs_perag_get(args.mp, args.agno);
2677        ASSERT(args.pag);
2678
2679        error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
2680        if (error)
2681                goto error0;
2682
2683        /* validate the extent size is legal now we have the agf locked */
2684        if (args.agbno + len >
2685                        be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) {
2686                error = -EFSCORRUPTED;
2687                goto error0;
2688        }
2689
2690        error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
2691        if (!error)
2692                xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0);
2693error0:
2694        xfs_perag_put(args.pag);
2695        return error;
2696}
2697