linux/fs/xfs/libxfs/xfs_alloc.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_format.h"
  21#include "xfs_log_format.h"
  22#include "xfs_shared.h"
  23#include "xfs_trans_resv.h"
  24#include "xfs_bit.h"
  25#include "xfs_sb.h"
  26#include "xfs_mount.h"
  27#include "xfs_inode.h"
  28#include "xfs_btree.h"
  29#include "xfs_alloc_btree.h"
  30#include "xfs_alloc.h"
  31#include "xfs_extent_busy.h"
  32#include "xfs_error.h"
  33#include "xfs_cksum.h"
  34#include "xfs_trace.h"
  35#include "xfs_trans.h"
  36#include "xfs_buf_item.h"
  37#include "xfs_log.h"
  38
  39struct workqueue_struct *xfs_alloc_wq;
  40
  41#define XFS_ABSDIFF(a,b)        (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
  42
  43#define XFSA_FIXUP_BNO_OK       1
  44#define XFSA_FIXUP_CNT_OK       2
  45
  46STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
  47STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
  48STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
  49STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
  50                xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
  51
  52/*
  53 * Lookup the record equal to [bno, len] in the btree given by cur.
  54 */
  55STATIC int                              /* error */
  56xfs_alloc_lookup_eq(
  57        struct xfs_btree_cur    *cur,   /* btree cursor */
  58        xfs_agblock_t           bno,    /* starting block of extent */
  59        xfs_extlen_t            len,    /* length of extent */
  60        int                     *stat)  /* success/failure */
  61{
  62        cur->bc_rec.a.ar_startblock = bno;
  63        cur->bc_rec.a.ar_blockcount = len;
  64        return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
  65}
  66
  67/*
  68 * Lookup the first record greater than or equal to [bno, len]
  69 * in the btree given by cur.
  70 */
  71int                             /* error */
  72xfs_alloc_lookup_ge(
  73        struct xfs_btree_cur    *cur,   /* btree cursor */
  74        xfs_agblock_t           bno,    /* starting block of extent */
  75        xfs_extlen_t            len,    /* length of extent */
  76        int                     *stat)  /* success/failure */
  77{
  78        cur->bc_rec.a.ar_startblock = bno;
  79        cur->bc_rec.a.ar_blockcount = len;
  80        return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
  81}
  82
  83/*
  84 * Lookup the first record less than or equal to [bno, len]
  85 * in the btree given by cur.
  86 */
  87int                                     /* error */
  88xfs_alloc_lookup_le(
  89        struct xfs_btree_cur    *cur,   /* btree cursor */
  90        xfs_agblock_t           bno,    /* starting block of extent */
  91        xfs_extlen_t            len,    /* length of extent */
  92        int                     *stat)  /* success/failure */
  93{
  94        cur->bc_rec.a.ar_startblock = bno;
  95        cur->bc_rec.a.ar_blockcount = len;
  96        return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
  97}
  98
  99/*
 100 * Update the record referred to by cur to the value given
 101 * by [bno, len].
 102 * This either works (return 0) or gets an EFSCORRUPTED error.
 103 */
 104STATIC int                              /* error */
 105xfs_alloc_update(
 106        struct xfs_btree_cur    *cur,   /* btree cursor */
 107        xfs_agblock_t           bno,    /* starting block of extent */
 108        xfs_extlen_t            len)    /* length of extent */
 109{
 110        union xfs_btree_rec     rec;
 111
 112        rec.alloc.ar_startblock = cpu_to_be32(bno);
 113        rec.alloc.ar_blockcount = cpu_to_be32(len);
 114        return xfs_btree_update(cur, &rec);
 115}
 116
 117/*
 118 * Get the data from the pointed-to record.
 119 */
 120int                                     /* error */
 121xfs_alloc_get_rec(
 122        struct xfs_btree_cur    *cur,   /* btree cursor */
 123        xfs_agblock_t           *bno,   /* output: starting block of extent */
 124        xfs_extlen_t            *len,   /* output: length of extent */
 125        int                     *stat)  /* output: success/failure */
 126{
 127        union xfs_btree_rec     *rec;
 128        int                     error;
 129
 130        error = xfs_btree_get_rec(cur, &rec, stat);
 131        if (!error && *stat == 1) {
 132                *bno = be32_to_cpu(rec->alloc.ar_startblock);
 133                *len = be32_to_cpu(rec->alloc.ar_blockcount);
 134        }
 135        return error;
 136}
 137
 138/*
 139 * Compute aligned version of the found extent.
 140 * Takes alignment and min length into account.
 141 */
 142STATIC void
 143xfs_alloc_compute_aligned(
 144        xfs_alloc_arg_t *args,          /* allocation argument structure */
 145        xfs_agblock_t   foundbno,       /* starting block in found extent */
 146        xfs_extlen_t    foundlen,       /* length in found extent */
 147        xfs_agblock_t   *resbno,        /* result block number */
 148        xfs_extlen_t    *reslen)        /* result length */
 149{
 150        xfs_agblock_t   bno;
 151        xfs_extlen_t    len;
 152        xfs_extlen_t    diff;
 153
 154        /* Trim busy sections out of found extent */
 155        xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
 156
 157        /*
 158         * If we have a largish extent that happens to start before min_agbno,
 159         * see if we can shift it into range...
 160         */
 161        if (bno < args->min_agbno && bno + len > args->min_agbno) {
 162                diff = args->min_agbno - bno;
 163                if (len > diff) {
 164                        bno += diff;
 165                        len -= diff;
 166                }
 167        }
 168
 169        if (args->alignment > 1 && len >= args->minlen) {
 170                xfs_agblock_t   aligned_bno = roundup(bno, args->alignment);
 171
 172                diff = aligned_bno - bno;
 173
 174                *resbno = aligned_bno;
 175                *reslen = diff >= len ? 0 : len - diff;
 176        } else {
 177                *resbno = bno;
 178                *reslen = len;
 179        }
 180}
 181
 182/*
 183 * Compute best start block and diff for "near" allocations.
 184 * freelen >= wantlen already checked by caller.
 185 */
 186STATIC xfs_extlen_t                     /* difference value (absolute) */
 187xfs_alloc_compute_diff(
 188        xfs_agblock_t   wantbno,        /* target starting block */
 189        xfs_extlen_t    wantlen,        /* target length */
 190        xfs_extlen_t    alignment,      /* target alignment */
 191        char            userdata,       /* are we allocating data? */
 192        xfs_agblock_t   freebno,        /* freespace's starting block */
 193        xfs_extlen_t    freelen,        /* freespace's length */
 194        xfs_agblock_t   *newbnop)       /* result: best start block from free */
 195{
 196        xfs_agblock_t   freeend;        /* end of freespace extent */
 197        xfs_agblock_t   newbno1;        /* return block number */
 198        xfs_agblock_t   newbno2;        /* other new block number */
 199        xfs_extlen_t    newlen1=0;      /* length with newbno1 */
 200        xfs_extlen_t    newlen2=0;      /* length with newbno2 */
 201        xfs_agblock_t   wantend;        /* end of target extent */
 202
 203        ASSERT(freelen >= wantlen);
 204        freeend = freebno + freelen;
 205        wantend = wantbno + wantlen;
 206        /*
 207         * We want to allocate from the start of a free extent if it is past
 208         * the desired block or if we are allocating user data and the free
 209         * extent is before desired block. The second case is there to allow
 210         * for contiguous allocation from the remaining free space if the file
 211         * grows in the short term.
 212         */
 213        if (freebno >= wantbno || (userdata && freeend < wantend)) {
 214                if ((newbno1 = roundup(freebno, alignment)) >= freeend)
 215                        newbno1 = NULLAGBLOCK;
 216        } else if (freeend >= wantend && alignment > 1) {
 217                newbno1 = roundup(wantbno, alignment);
 218                newbno2 = newbno1 - alignment;
 219                if (newbno1 >= freeend)
 220                        newbno1 = NULLAGBLOCK;
 221                else
 222                        newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
 223                if (newbno2 < freebno)
 224                        newbno2 = NULLAGBLOCK;
 225                else
 226                        newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
 227                if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
 228                        if (newlen1 < newlen2 ||
 229                            (newlen1 == newlen2 &&
 230                             XFS_ABSDIFF(newbno1, wantbno) >
 231                             XFS_ABSDIFF(newbno2, wantbno)))
 232                                newbno1 = newbno2;
 233                } else if (newbno2 != NULLAGBLOCK)
 234                        newbno1 = newbno2;
 235        } else if (freeend >= wantend) {
 236                newbno1 = wantbno;
 237        } else if (alignment > 1) {
 238                newbno1 = roundup(freeend - wantlen, alignment);
 239                if (newbno1 > freeend - wantlen &&
 240                    newbno1 - alignment >= freebno)
 241                        newbno1 -= alignment;
 242                else if (newbno1 >= freeend)
 243                        newbno1 = NULLAGBLOCK;
 244        } else
 245                newbno1 = freeend - wantlen;
 246        *newbnop = newbno1;
 247        return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
 248}
 249
 250/*
 251 * Fix up the length, based on mod and prod.
 252 * len should be k * prod + mod for some k.
 253 * If len is too small it is returned unchanged.
 254 * If len hits maxlen it is left alone.
 255 */
 256STATIC void
 257xfs_alloc_fix_len(
 258        xfs_alloc_arg_t *args)          /* allocation argument structure */
 259{
 260        xfs_extlen_t    k;
 261        xfs_extlen_t    rlen;
 262
 263        ASSERT(args->mod < args->prod);
 264        rlen = args->len;
 265        ASSERT(rlen >= args->minlen);
 266        ASSERT(rlen <= args->maxlen);
 267        if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
 268            (args->mod == 0 && rlen < args->prod))
 269                return;
 270        k = rlen % args->prod;
 271        if (k == args->mod)
 272                return;
 273        if (k > args->mod)
 274                rlen = rlen - (k - args->mod);
 275        else
 276                rlen = rlen - args->prod + (args->mod - k);
 277        /* casts to (int) catch length underflows */
 278        if ((int)rlen < (int)args->minlen)
 279                return;
 280        ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
 281        ASSERT(rlen % args->prod == args->mod);
 282        args->len = rlen;
 283}
 284
 285/*
 286 * Fix up length if there is too little space left in the a.g.
 287 * Return 1 if ok, 0 if too little, should give up.
 288 */
 289STATIC int
 290xfs_alloc_fix_minleft(
 291        xfs_alloc_arg_t *args)          /* allocation argument structure */
 292{
 293        xfs_agf_t       *agf;           /* a.g. freelist header */
 294        int             diff;           /* free space difference */
 295
 296        if (args->minleft == 0)
 297                return 1;
 298        agf = XFS_BUF_TO_AGF(args->agbp);
 299        diff = be32_to_cpu(agf->agf_freeblks)
 300                - args->len - args->minleft;
 301        if (diff >= 0)
 302                return 1;
 303        args->len += diff;              /* shrink the allocated space */
 304        /* casts to (int) catch length underflows */
 305        if ((int)args->len >= (int)args->minlen)
 306                return 1;
 307        args->agbno = NULLAGBLOCK;
 308        return 0;
 309}
 310
 311/*
 312 * Update the two btrees, logically removing from freespace the extent
 313 * starting at rbno, rlen blocks.  The extent is contained within the
 314 * actual (current) free extent fbno for flen blocks.
 315 * Flags are passed in indicating whether the cursors are set to the
 316 * relevant records.
 317 */
 318STATIC int                              /* error code */
 319xfs_alloc_fixup_trees(
 320        xfs_btree_cur_t *cnt_cur,       /* cursor for by-size btree */
 321        xfs_btree_cur_t *bno_cur,       /* cursor for by-block btree */
 322        xfs_agblock_t   fbno,           /* starting block of free extent */
 323        xfs_extlen_t    flen,           /* length of free extent */
 324        xfs_agblock_t   rbno,           /* starting block of returned extent */
 325        xfs_extlen_t    rlen,           /* length of returned extent */
 326        int             flags)          /* flags, XFSA_FIXUP_... */
 327{
 328        int             error;          /* error code */
 329        int             i;              /* operation results */
 330        xfs_agblock_t   nfbno1;         /* first new free startblock */
 331        xfs_agblock_t   nfbno2;         /* second new free startblock */
 332        xfs_extlen_t    nflen1=0;       /* first new free length */
 333        xfs_extlen_t    nflen2=0;       /* second new free length */
 334        struct xfs_mount *mp;
 335
 336        mp = cnt_cur->bc_mp;
 337
 338        /*
 339         * Look up the record in the by-size tree if necessary.
 340         */
 341        if (flags & XFSA_FIXUP_CNT_OK) {
 342#ifdef DEBUG
 343                if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
 344                        return error;
 345                XFS_WANT_CORRUPTED_RETURN(mp,
 346                        i == 1 && nfbno1 == fbno && nflen1 == flen);
 347#endif
 348        } else {
 349                if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
 350                        return error;
 351                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 352        }
 353        /*
 354         * Look up the record in the by-block tree if necessary.
 355         */
 356        if (flags & XFSA_FIXUP_BNO_OK) {
 357#ifdef DEBUG
 358                if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
 359                        return error;
 360                XFS_WANT_CORRUPTED_RETURN(mp,
 361                        i == 1 && nfbno1 == fbno && nflen1 == flen);
 362#endif
 363        } else {
 364                if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
 365                        return error;
 366                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 367        }
 368
 369#ifdef DEBUG
 370        if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
 371                struct xfs_btree_block  *bnoblock;
 372                struct xfs_btree_block  *cntblock;
 373
 374                bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
 375                cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
 376
 377                XFS_WANT_CORRUPTED_RETURN(mp,
 378                        bnoblock->bb_numrecs == cntblock->bb_numrecs);
 379        }
 380#endif
 381
 382        /*
 383         * Deal with all four cases: the allocated record is contained
 384         * within the freespace record, so we can have new freespace
 385         * at either (or both) end, or no freespace remaining.
 386         */
 387        if (rbno == fbno && rlen == flen)
 388                nfbno1 = nfbno2 = NULLAGBLOCK;
 389        else if (rbno == fbno) {
 390                nfbno1 = rbno + rlen;
 391                nflen1 = flen - rlen;
 392                nfbno2 = NULLAGBLOCK;
 393        } else if (rbno + rlen == fbno + flen) {
 394                nfbno1 = fbno;
 395                nflen1 = flen - rlen;
 396                nfbno2 = NULLAGBLOCK;
 397        } else {
 398                nfbno1 = fbno;
 399                nflen1 = rbno - fbno;
 400                nfbno2 = rbno + rlen;
 401                nflen2 = (fbno + flen) - nfbno2;
 402        }
 403        /*
 404         * Delete the entry from the by-size btree.
 405         */
 406        if ((error = xfs_btree_delete(cnt_cur, &i)))
 407                return error;
 408        XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 409        /*
 410         * Add new by-size btree entry(s).
 411         */
 412        if (nfbno1 != NULLAGBLOCK) {
 413                if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
 414                        return error;
 415                XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 416                if ((error = xfs_btree_insert(cnt_cur, &i)))
 417                        return error;
 418                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 419        }
 420        if (nfbno2 != NULLAGBLOCK) {
 421                if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
 422                        return error;
 423                XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 424                if ((error = xfs_btree_insert(cnt_cur, &i)))
 425                        return error;
 426                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 427        }
 428        /*
 429         * Fix up the by-block btree entry(s).
 430         */
 431        if (nfbno1 == NULLAGBLOCK) {
 432                /*
 433                 * No remaining freespace, just delete the by-block tree entry.
 434                 */
 435                if ((error = xfs_btree_delete(bno_cur, &i)))
 436                        return error;
 437                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 438        } else {
 439                /*
 440                 * Update the by-block entry to start later|be shorter.
 441                 */
 442                if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
 443                        return error;
 444        }
 445        if (nfbno2 != NULLAGBLOCK) {
 446                /*
 447                 * 2 resulting free entries, need to add one.
 448                 */
 449                if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
 450                        return error;
 451                XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 452                if ((error = xfs_btree_insert(bno_cur, &i)))
 453                        return error;
 454                XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 455        }
 456        return 0;
 457}
 458
 459static bool
 460xfs_agfl_verify(
 461        struct xfs_buf  *bp)
 462{
 463        struct xfs_mount *mp = bp->b_target->bt_mount;
 464        struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
 465        int             i;
 466
 467        if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
 468                return false;
 469        if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
 470                return false;
 471        /*
 472         * during growfs operations, the perag is not fully initialised,
 473         * so we can't use it for any useful checking. growfs ensures we can't
 474         * use it by using uncached buffers that don't have the perag attached
 475         * so we can detect and avoid this problem.
 476         */
 477        if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
 478                return false;
 479
 480        for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
 481                if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
 482                    be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
 483                        return false;
 484        }
 485
 486        return xfs_log_check_lsn(mp,
 487                                 be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn));
 488}
 489
 490static void
 491xfs_agfl_read_verify(
 492        struct xfs_buf  *bp)
 493{
 494        struct xfs_mount *mp = bp->b_target->bt_mount;
 495
 496        /*
 497         * There is no verification of non-crc AGFLs because mkfs does not
 498         * initialise the AGFL to zero or NULL. Hence the only valid part of the
 499         * AGFL is what the AGF says is active. We can't get to the AGF, so we
 500         * can't verify just those entries are valid.
 501         */
 502        if (!xfs_sb_version_hascrc(&mp->m_sb))
 503                return;
 504
 505        if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
 506                xfs_buf_ioerror(bp, -EFSBADCRC);
 507        else if (!xfs_agfl_verify(bp))
 508                xfs_buf_ioerror(bp, -EFSCORRUPTED);
 509
 510        if (bp->b_error)
 511                xfs_verifier_error(bp);
 512}
 513
 514static void
 515xfs_agfl_write_verify(
 516        struct xfs_buf  *bp)
 517{
 518        struct xfs_mount *mp = bp->b_target->bt_mount;
 519        struct xfs_buf_log_item *bip = bp->b_fspriv;
 520
 521        /* no verification of non-crc AGFLs */
 522        if (!xfs_sb_version_hascrc(&mp->m_sb))
 523                return;
 524
 525        if (!xfs_agfl_verify(bp)) {
 526                xfs_buf_ioerror(bp, -EFSCORRUPTED);
 527                xfs_verifier_error(bp);
 528                return;
 529        }
 530
 531        if (bip)
 532                XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
 533
 534        xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
 535}
 536
 537const struct xfs_buf_ops xfs_agfl_buf_ops = {
 538        .name = "xfs_agfl",
 539        .verify_read = xfs_agfl_read_verify,
 540        .verify_write = xfs_agfl_write_verify,
 541};
 542
 543/*
 544 * Read in the allocation group free block array.
 545 */
 546STATIC int                              /* error */
 547xfs_alloc_read_agfl(
 548        xfs_mount_t     *mp,            /* mount point structure */
 549        xfs_trans_t     *tp,            /* transaction pointer */
 550        xfs_agnumber_t  agno,           /* allocation group number */
 551        xfs_buf_t       **bpp)          /* buffer for the ag free block array */
 552{
 553        xfs_buf_t       *bp;            /* return value */
 554        int             error;
 555
 556        ASSERT(agno != NULLAGNUMBER);
 557        error = xfs_trans_read_buf(
 558                        mp, tp, mp->m_ddev_targp,
 559                        XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
 560                        XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
 561        if (error)
 562                return error;
 563        xfs_buf_set_ref(bp, XFS_AGFL_REF);
 564        *bpp = bp;
 565        return 0;
 566}
 567
 568STATIC int
 569xfs_alloc_update_counters(
 570        struct xfs_trans        *tp,
 571        struct xfs_perag        *pag,
 572        struct xfs_buf          *agbp,
 573        long                    len)
 574{
 575        struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
 576
 577        pag->pagf_freeblks += len;
 578        be32_add_cpu(&agf->agf_freeblks, len);
 579
 580        xfs_trans_agblocks_delta(tp, len);
 581        if (unlikely(be32_to_cpu(agf->agf_freeblks) >
 582                     be32_to_cpu(agf->agf_length)))
 583                return -EFSCORRUPTED;
 584
 585        xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
 586        return 0;
 587}
 588
 589/*
 590 * Allocation group level functions.
 591 */
 592
 593/*
 594 * Allocate a variable extent in the allocation group agno.
 595 * Type and bno are used to determine where in the allocation group the
 596 * extent will start.
 597 * Extent's length (returned in *len) will be between minlen and maxlen,
 598 * and of the form k * prod + mod unless there's nothing that large.
 599 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 600 */
 601STATIC int                      /* error */
 602xfs_alloc_ag_vextent(
 603        xfs_alloc_arg_t *args)  /* argument structure for allocation */
 604{
 605        int             error=0;
 606
 607        ASSERT(args->minlen > 0);
 608        ASSERT(args->maxlen > 0);
 609        ASSERT(args->minlen <= args->maxlen);
 610        ASSERT(args->mod < args->prod);
 611        ASSERT(args->alignment > 0);
 612        /*
 613         * Branch to correct routine based on the type.
 614         */
 615        args->wasfromfl = 0;
 616        switch (args->type) {
 617        case XFS_ALLOCTYPE_THIS_AG:
 618                error = xfs_alloc_ag_vextent_size(args);
 619                break;
 620        case XFS_ALLOCTYPE_NEAR_BNO:
 621                error = xfs_alloc_ag_vextent_near(args);
 622                break;
 623        case XFS_ALLOCTYPE_THIS_BNO:
 624                error = xfs_alloc_ag_vextent_exact(args);
 625                break;
 626        default:
 627                ASSERT(0);
 628                /* NOTREACHED */
 629        }
 630
 631        if (error || args->agbno == NULLAGBLOCK)
 632                return error;
 633
 634        ASSERT(args->len >= args->minlen);
 635        ASSERT(args->len <= args->maxlen);
 636        ASSERT(!args->wasfromfl || !args->isfl);
 637        ASSERT(args->agbno % args->alignment == 0);
 638
 639        if (!args->wasfromfl) {
 640                error = xfs_alloc_update_counters(args->tp, args->pag,
 641                                                  args->agbp,
 642                                                  -((long)(args->len)));
 643                if (error)
 644                        return error;
 645
 646                ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
 647                                              args->agbno, args->len));
 648        }
 649
 650        if (!args->isfl) {
 651                xfs_trans_mod_sb(args->tp, args->wasdel ?
 652                                 XFS_TRANS_SB_RES_FDBLOCKS :
 653                                 XFS_TRANS_SB_FDBLOCKS,
 654                                 -((long)(args->len)));
 655        }
 656
 657        XFS_STATS_INC(args->mp, xs_allocx);
 658        XFS_STATS_ADD(args->mp, xs_allocb, args->len);
 659        return error;
 660}
 661
 662/*
 663 * Allocate a variable extent at exactly agno/bno.
 664 * Extent's length (returned in *len) will be between minlen and maxlen,
 665 * and of the form k * prod + mod unless there's nothing that large.
 666 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
 667 */
 668STATIC int                      /* error */
 669xfs_alloc_ag_vextent_exact(
 670        xfs_alloc_arg_t *args)  /* allocation argument structure */
 671{
 672        xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
 673        xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
 674        int             error;
 675        xfs_agblock_t   fbno;   /* start block of found extent */
 676        xfs_extlen_t    flen;   /* length of found extent */
 677        xfs_agblock_t   tbno;   /* start block of trimmed extent */
 678        xfs_extlen_t    tlen;   /* length of trimmed extent */
 679        xfs_agblock_t   tend;   /* end block of trimmed extent */
 680        int             i;      /* success/failure of operation */
 681
 682        ASSERT(args->alignment == 1);
 683
 684        /*
 685         * Allocate/initialize a cursor for the by-number freespace btree.
 686         */
 687        bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 688                                          args->agno, XFS_BTNUM_BNO);
 689
 690        /*
 691         * Lookup bno and minlen in the btree (minlen is irrelevant, really).
 692         * Look for the closest free block <= bno, it must contain bno
 693         * if any free block does.
 694         */
 695        error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
 696        if (error)
 697                goto error0;
 698        if (!i)
 699                goto not_found;
 700
 701        /*
 702         * Grab the freespace record.
 703         */
 704        error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
 705        if (error)
 706                goto error0;
 707        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 708        ASSERT(fbno <= args->agbno);
 709
 710        /*
 711         * Check for overlapping busy extents.
 712         */
 713        xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
 714
 715        /*
 716         * Give up if the start of the extent is busy, or the freespace isn't
 717         * long enough for the minimum request.
 718         */
 719        if (tbno > args->agbno)
 720                goto not_found;
 721        if (tlen < args->minlen)
 722                goto not_found;
 723        tend = tbno + tlen;
 724        if (tend < args->agbno + args->minlen)
 725                goto not_found;
 726
 727        /*
 728         * End of extent will be smaller of the freespace end and the
 729         * maximal requested end.
 730         *
 731         * Fix the length according to mod and prod if given.
 732         */
 733        args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
 734                                                - args->agbno;
 735        xfs_alloc_fix_len(args);
 736        if (!xfs_alloc_fix_minleft(args))
 737                goto not_found;
 738
 739        ASSERT(args->agbno + args->len <= tend);
 740
 741        /*
 742         * We are allocating agbno for args->len
 743         * Allocate/initialize a cursor for the by-size btree.
 744         */
 745        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 746                args->agno, XFS_BTNUM_CNT);
 747        ASSERT(args->agbno + args->len <=
 748                be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
 749        error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
 750                                      args->len, XFSA_FIXUP_BNO_OK);
 751        if (error) {
 752                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
 753                goto error0;
 754        }
 755
 756        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 757        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 758
 759        args->wasfromfl = 0;
 760        trace_xfs_alloc_exact_done(args);
 761        return 0;
 762
 763not_found:
 764        /* Didn't find it, return null. */
 765        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 766        args->agbno = NULLAGBLOCK;
 767        trace_xfs_alloc_exact_notfound(args);
 768        return 0;
 769
 770error0:
 771        xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
 772        trace_xfs_alloc_exact_error(args);
 773        return error;
 774}
 775
 776/*
 777 * Search the btree in a given direction via the search cursor and compare
 778 * the records found against the good extent we've already found.
 779 */
 780STATIC int
 781xfs_alloc_find_best_extent(
 782        struct xfs_alloc_arg    *args,  /* allocation argument structure */
 783        struct xfs_btree_cur    **gcur, /* good cursor */
 784        struct xfs_btree_cur    **scur, /* searching cursor */
 785        xfs_agblock_t           gdiff,  /* difference for search comparison */
 786        xfs_agblock_t           *sbno,  /* extent found by search */
 787        xfs_extlen_t            *slen,  /* extent length */
 788        xfs_agblock_t           *sbnoa, /* aligned extent found by search */
 789        xfs_extlen_t            *slena, /* aligned extent length */
 790        int                     dir)    /* 0 = search right, 1 = search left */
 791{
 792        xfs_agblock_t           new;
 793        xfs_agblock_t           sdiff;
 794        int                     error;
 795        int                     i;
 796
 797        /* The good extent is perfect, no need to  search. */
 798        if (!gdiff)
 799                goto out_use_good;
 800
 801        /*
 802         * Look until we find a better one, run out of space or run off the end.
 803         */
 804        do {
 805                error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
 806                if (error)
 807                        goto error0;
 808                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 809                xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
 810
 811                /*
 812                 * The good extent is closer than this one.
 813                 */
 814                if (!dir) {
 815                        if (*sbnoa > args->max_agbno)
 816                                goto out_use_good;
 817                        if (*sbnoa >= args->agbno + gdiff)
 818                                goto out_use_good;
 819                } else {
 820                        if (*sbnoa < args->min_agbno)
 821                                goto out_use_good;
 822                        if (*sbnoa <= args->agbno - gdiff)
 823                                goto out_use_good;
 824                }
 825
 826                /*
 827                 * Same distance, compare length and pick the best.
 828                 */
 829                if (*slena >= args->minlen) {
 830                        args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
 831                        xfs_alloc_fix_len(args);
 832
 833                        sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
 834                                                       args->alignment,
 835                                                       args->userdata, *sbnoa,
 836                                                       *slena, &new);
 837
 838                        /*
 839                         * Choose closer size and invalidate other cursor.
 840                         */
 841                        if (sdiff < gdiff)
 842                                goto out_use_search;
 843                        goto out_use_good;
 844                }
 845
 846                if (!dir)
 847                        error = xfs_btree_increment(*scur, 0, &i);
 848                else
 849                        error = xfs_btree_decrement(*scur, 0, &i);
 850                if (error)
 851                        goto error0;
 852        } while (i);
 853
 854out_use_good:
 855        xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
 856        *scur = NULL;
 857        return 0;
 858
 859out_use_search:
 860        xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
 861        *gcur = NULL;
 862        return 0;
 863
 864error0:
 865        /* caller invalidates cursors */
 866        return error;
 867}
 868
 869/*
 870 * Allocate a variable extent near bno in the allocation group agno.
 871 * Extent's length (returned in len) will be between minlen and maxlen,
 872 * and of the form k * prod + mod unless there's nothing that large.
 873 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 874 */
 875STATIC int                              /* error */
 876xfs_alloc_ag_vextent_near(
 877        xfs_alloc_arg_t *args)          /* allocation argument structure */
 878{
 879        xfs_btree_cur_t *bno_cur_gt;    /* cursor for bno btree, right side */
 880        xfs_btree_cur_t *bno_cur_lt;    /* cursor for bno btree, left side */
 881        xfs_btree_cur_t *cnt_cur;       /* cursor for count btree */
 882        xfs_agblock_t   gtbno;          /* start bno of right side entry */
 883        xfs_agblock_t   gtbnoa;         /* aligned ... */
 884        xfs_extlen_t    gtdiff;         /* difference to right side entry */
 885        xfs_extlen_t    gtlen;          /* length of right side entry */
 886        xfs_extlen_t    gtlena;         /* aligned ... */
 887        xfs_agblock_t   gtnew;          /* useful start bno of right side */
 888        int             error;          /* error code */
 889        int             i;              /* result code, temporary */
 890        int             j;              /* result code, temporary */
 891        xfs_agblock_t   ltbno;          /* start bno of left side entry */
 892        xfs_agblock_t   ltbnoa;         /* aligned ... */
 893        xfs_extlen_t    ltdiff;         /* difference to left side entry */
 894        xfs_extlen_t    ltlen;          /* length of left side entry */
 895        xfs_extlen_t    ltlena;         /* aligned ... */
 896        xfs_agblock_t   ltnew;          /* useful start bno of left side */
 897        xfs_extlen_t    rlen;           /* length of returned extent */
 898        int             forced = 0;
 899#ifdef DEBUG
 900        /*
 901         * Randomly don't execute the first algorithm.
 902         */
 903        int             dofirst;        /* set to do first algorithm */
 904
 905        dofirst = prandom_u32() & 1;
 906#endif
 907
 908        /* handle unitialized agbno range so caller doesn't have to */
 909        if (!args->min_agbno && !args->max_agbno)
 910                args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
 911        ASSERT(args->min_agbno <= args->max_agbno);
 912
 913        /* clamp agbno to the range if it's outside */
 914        if (args->agbno < args->min_agbno)
 915                args->agbno = args->min_agbno;
 916        if (args->agbno > args->max_agbno)
 917                args->agbno = args->max_agbno;
 918
 919restart:
 920        bno_cur_lt = NULL;
 921        bno_cur_gt = NULL;
 922        ltlen = 0;
 923        gtlena = 0;
 924        ltlena = 0;
 925
 926        /*
 927         * Get a cursor for the by-size btree.
 928         */
 929        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 930                args->agno, XFS_BTNUM_CNT);
 931
 932        /*
 933         * See if there are any free extents as big as maxlen.
 934         */
 935        if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
 936                goto error0;
 937        /*
 938         * If none, then pick up the last entry in the tree unless the
 939         * tree is empty.
 940         */
 941        if (!i) {
 942                if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
 943                                &ltlen, &i)))
 944                        goto error0;
 945                if (i == 0 || ltlen == 0) {
 946                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 947                        trace_xfs_alloc_near_noentry(args);
 948                        return 0;
 949                }
 950                ASSERT(i == 1);
 951        }
 952        args->wasfromfl = 0;
 953
 954        /*
 955         * First algorithm.
 956         * If the requested extent is large wrt the freespaces available
 957         * in this a.g., then the cursor will be pointing to a btree entry
 958         * near the right edge of the tree.  If it's in the last btree leaf
 959         * block, then we just examine all the entries in that block
 960         * that are big enough, and pick the best one.
 961         * This is written as a while loop so we can break out of it,
 962         * but we never loop back to the top.
 963         */
 964        while (xfs_btree_islastblock(cnt_cur, 0)) {
 965                xfs_extlen_t    bdiff;
 966                int             besti=0;
 967                xfs_extlen_t    blen=0;
 968                xfs_agblock_t   bnew=0;
 969
 970#ifdef DEBUG
 971                if (dofirst)
 972                        break;
 973#endif
 974                /*
 975                 * Start from the entry that lookup found, sequence through
 976                 * all larger free blocks.  If we're actually pointing at a
 977                 * record smaller than maxlen, go to the start of this block,
 978                 * and skip all those smaller than minlen.
 979                 */
 980                if (ltlen || args->alignment > 1) {
 981                        cnt_cur->bc_ptrs[0] = 1;
 982                        do {
 983                                if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
 984                                                &ltlen, &i)))
 985                                        goto error0;
 986                                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 987                                if (ltlen >= args->minlen)
 988                                        break;
 989                                if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
 990                                        goto error0;
 991                        } while (i);
 992                        ASSERT(ltlen >= args->minlen);
 993                        if (!i)
 994                                break;
 995                }
 996                i = cnt_cur->bc_ptrs[0];
 997                for (j = 1, blen = 0, bdiff = 0;
 998                     !error && j && (blen < args->maxlen || bdiff > 0);
 999                     error = xfs_btree_increment(cnt_cur, 0, &j)) {
1000                        /*
1001                         * For each entry, decide if it's better than
1002                         * the previous best entry.
1003                         */
1004                        if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1005                                goto error0;
1006                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1007                        xfs_alloc_compute_aligned(args, ltbno, ltlen,
1008                                                  &ltbnoa, &ltlena);
1009                        if (ltlena < args->minlen)
1010                                continue;
1011                        if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
1012                                continue;
1013                        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1014                        xfs_alloc_fix_len(args);
1015                        ASSERT(args->len >= args->minlen);
1016                        if (args->len < blen)
1017                                continue;
1018                        ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1019                                args->alignment, args->userdata, ltbnoa,
1020                                ltlena, &ltnew);
1021                        if (ltnew != NULLAGBLOCK &&
1022                            (args->len > blen || ltdiff < bdiff)) {
1023                                bdiff = ltdiff;
1024                                bnew = ltnew;
1025                                blen = args->len;
1026                                besti = cnt_cur->bc_ptrs[0];
1027                        }
1028                }
1029                /*
1030                 * It didn't work.  We COULD be in a case where
1031                 * there's a good record somewhere, so try again.
1032                 */
1033                if (blen == 0)
1034                        break;
1035                /*
1036                 * Point at the best entry, and retrieve it again.
1037                 */
1038                cnt_cur->bc_ptrs[0] = besti;
1039                if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1040                        goto error0;
1041                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1042                ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1043                args->len = blen;
1044                if (!xfs_alloc_fix_minleft(args)) {
1045                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1046                        trace_xfs_alloc_near_nominleft(args);
1047                        return 0;
1048                }
1049                blen = args->len;
1050                /*
1051                 * We are allocating starting at bnew for blen blocks.
1052                 */
1053                args->agbno = bnew;
1054                ASSERT(bnew >= ltbno);
1055                ASSERT(bnew + blen <= ltbno + ltlen);
1056                /*
1057                 * Set up a cursor for the by-bno tree.
1058                 */
1059                bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
1060                        args->agbp, args->agno, XFS_BTNUM_BNO);
1061                /*
1062                 * Fix up the btree entries.
1063                 */
1064                if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
1065                                ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
1066                        goto error0;
1067                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1068                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1069
1070                trace_xfs_alloc_near_first(args);
1071                return 0;
1072        }
1073        /*
1074         * Second algorithm.
1075         * Search in the by-bno tree to the left and to the right
1076         * simultaneously, until in each case we find a space big enough,
1077         * or run into the edge of the tree.  When we run into the edge,
1078         * we deallocate that cursor.
1079         * If both searches succeed, we compare the two spaces and pick
1080         * the better one.
1081         * With alignment, it's possible for both to fail; the upper
1082         * level algorithm that picks allocation groups for allocations
1083         * is not supposed to do this.
1084         */
1085        /*
1086         * Allocate and initialize the cursor for the leftward search.
1087         */
1088        bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1089                args->agno, XFS_BTNUM_BNO);
1090        /*
1091         * Lookup <= bno to find the leftward search's starting point.
1092         */
1093        if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
1094                goto error0;
1095        if (!i) {
1096                /*
1097                 * Didn't find anything; use this cursor for the rightward
1098                 * search.
1099                 */
1100                bno_cur_gt = bno_cur_lt;
1101                bno_cur_lt = NULL;
1102        }
1103        /*
1104         * Found something.  Duplicate the cursor for the rightward search.
1105         */
1106        else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
1107                goto error0;
1108        /*
1109         * Increment the cursor, so we will point at the entry just right
1110         * of the leftward entry if any, or to the leftmost entry.
1111         */
1112        if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1113                goto error0;
1114        if (!i) {
1115                /*
1116                 * It failed, there are no rightward entries.
1117                 */
1118                xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
1119                bno_cur_gt = NULL;
1120        }
1121        /*
1122         * Loop going left with the leftward cursor, right with the
1123         * rightward cursor, until either both directions give up or
1124         * we find an entry at least as big as minlen.
1125         */
1126        do {
1127                if (bno_cur_lt) {
1128                        if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
1129                                goto error0;
1130                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1131                        xfs_alloc_compute_aligned(args, ltbno, ltlen,
1132                                                  &ltbnoa, &ltlena);
1133                        if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
1134                                break;
1135                        if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
1136                                goto error0;
1137                        if (!i || ltbnoa < args->min_agbno) {
1138                                xfs_btree_del_cursor(bno_cur_lt,
1139                                                     XFS_BTREE_NOERROR);
1140                                bno_cur_lt = NULL;
1141                        }
1142                }
1143                if (bno_cur_gt) {
1144                        if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1145                                goto error0;
1146                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1147                        xfs_alloc_compute_aligned(args, gtbno, gtlen,
1148                                                  &gtbnoa, &gtlena);
1149                        if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
1150                                break;
1151                        if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1152                                goto error0;
1153                        if (!i || gtbnoa > args->max_agbno) {
1154                                xfs_btree_del_cursor(bno_cur_gt,
1155                                                     XFS_BTREE_NOERROR);
1156                                bno_cur_gt = NULL;
1157                        }
1158                }
1159        } while (bno_cur_lt || bno_cur_gt);
1160
1161        /*
1162         * Got both cursors still active, need to find better entry.
1163         */
1164        if (bno_cur_lt && bno_cur_gt) {
1165                if (ltlena >= args->minlen) {
1166                        /*
1167                         * Left side is good, look for a right side entry.
1168                         */
1169                        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1170                        xfs_alloc_fix_len(args);
1171                        ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1172                                args->alignment, args->userdata, ltbnoa,
1173                                ltlena, &ltnew);
1174
1175                        error = xfs_alloc_find_best_extent(args,
1176                                                &bno_cur_lt, &bno_cur_gt,
1177                                                ltdiff, &gtbno, &gtlen,
1178                                                &gtbnoa, &gtlena,
1179                                                0 /* search right */);
1180                } else {
1181                        ASSERT(gtlena >= args->minlen);
1182
1183                        /*
1184                         * Right side is good, look for a left side entry.
1185                         */
1186                        args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1187                        xfs_alloc_fix_len(args);
1188                        gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1189                                args->alignment, args->userdata, gtbnoa,
1190                                gtlena, &gtnew);
1191
1192                        error = xfs_alloc_find_best_extent(args,
1193                                                &bno_cur_gt, &bno_cur_lt,
1194                                                gtdiff, &ltbno, &ltlen,
1195                                                &ltbnoa, &ltlena,
1196                                                1 /* search left */);
1197                }
1198
1199                if (error)
1200                        goto error0;
1201        }
1202
1203        /*
1204         * If we couldn't get anything, give up.
1205         */
1206        if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1207                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1208
1209                if (!forced++) {
1210                        trace_xfs_alloc_near_busy(args);
1211                        xfs_log_force(args->mp, XFS_LOG_SYNC);
1212                        goto restart;
1213                }
1214                trace_xfs_alloc_size_neither(args);
1215                args->agbno = NULLAGBLOCK;
1216                return 0;
1217        }
1218
1219        /*
1220         * At this point we have selected a freespace entry, either to the
1221         * left or to the right.  If it's on the right, copy all the
1222         * useful variables to the "left" set so we only have one
1223         * copy of this code.
1224         */
1225        if (bno_cur_gt) {
1226                bno_cur_lt = bno_cur_gt;
1227                bno_cur_gt = NULL;
1228                ltbno = gtbno;
1229                ltbnoa = gtbnoa;
1230                ltlen = gtlen;
1231                ltlena = gtlena;
1232                j = 1;
1233        } else
1234                j = 0;
1235
1236        /*
1237         * Fix up the length and compute the useful address.
1238         */
1239        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1240        xfs_alloc_fix_len(args);
1241        if (!xfs_alloc_fix_minleft(args)) {
1242                trace_xfs_alloc_near_nominleft(args);
1243                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1244                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1245                return 0;
1246        }
1247        rlen = args->len;
1248        (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
1249                                     args->userdata, ltbnoa, ltlena, &ltnew);
1250        ASSERT(ltnew >= ltbno);
1251        ASSERT(ltnew + rlen <= ltbnoa + ltlena);
1252        ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1253        ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
1254        args->agbno = ltnew;
1255
1256        if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1257                        ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1258                goto error0;
1259
1260        if (j)
1261                trace_xfs_alloc_near_greater(args);
1262        else
1263                trace_xfs_alloc_near_lesser(args);
1264
1265        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1266        xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1267        return 0;
1268
1269 error0:
1270        trace_xfs_alloc_near_error(args);
1271        if (cnt_cur != NULL)
1272                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1273        if (bno_cur_lt != NULL)
1274                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1275        if (bno_cur_gt != NULL)
1276                xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1277        return error;
1278}
1279
1280/*
1281 * Allocate a variable extent anywhere in the allocation group agno.
1282 * Extent's length (returned in len) will be between minlen and maxlen,
1283 * and of the form k * prod + mod unless there's nothing that large.
1284 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1285 */
1286STATIC int                              /* error */
1287xfs_alloc_ag_vextent_size(
1288        xfs_alloc_arg_t *args)          /* allocation argument structure */
1289{
1290        xfs_btree_cur_t *bno_cur;       /* cursor for bno btree */
1291        xfs_btree_cur_t *cnt_cur;       /* cursor for cnt btree */
1292        int             error;          /* error result */
1293        xfs_agblock_t   fbno;           /* start of found freespace */
1294        xfs_extlen_t    flen;           /* length of found freespace */
1295        int             i;              /* temp status variable */
1296        xfs_agblock_t   rbno;           /* returned block number */
1297        xfs_extlen_t    rlen;           /* length of returned extent */
1298        int             forced = 0;
1299
1300restart:
1301        /*
1302         * Allocate and initialize a cursor for the by-size btree.
1303         */
1304        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1305                args->agno, XFS_BTNUM_CNT);
1306        bno_cur = NULL;
1307
1308        /*
1309         * Look for an entry >= maxlen+alignment-1 blocks.
1310         */
1311        if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1312                        args->maxlen + args->alignment - 1, &i)))
1313                goto error0;
1314
1315        /*
1316         * If none or we have busy extents that we cannot allocate from, then
1317         * we have to settle for a smaller extent. In the case that there are
1318         * no large extents, this will return the last entry in the tree unless
1319         * the tree is empty. In the case that there are only busy large
1320         * extents, this will return the largest small extent unless there
1321         * are no smaller extents available.
1322         */
1323        if (!i || forced > 1) {
1324                error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1325                                                   &fbno, &flen, &i);
1326                if (error)
1327                        goto error0;
1328                if (i == 0 || flen == 0) {
1329                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1330                        trace_xfs_alloc_size_noentry(args);
1331                        return 0;
1332                }
1333                ASSERT(i == 1);
1334                xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1335        } else {
1336                /*
1337                 * Search for a non-busy extent that is large enough.
1338                 * If we are at low space, don't check, or if we fall of
1339                 * the end of the btree, turn off the busy check and
1340                 * restart.
1341                 */
1342                for (;;) {
1343                        error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1344                        if (error)
1345                                goto error0;
1346                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1347
1348                        xfs_alloc_compute_aligned(args, fbno, flen,
1349                                                  &rbno, &rlen);
1350
1351                        if (rlen >= args->maxlen)
1352                                break;
1353
1354                        error = xfs_btree_increment(cnt_cur, 0, &i);
1355                        if (error)
1356                                goto error0;
1357                        if (i == 0) {
1358                                /*
1359                                 * Our only valid extents must have been busy.
1360                                 * Make it unbusy by forcing the log out and
1361                                 * retrying. If we've been here before, forcing
1362                                 * the log isn't making the extents available,
1363                                 * which means they have probably been freed in
1364                                 * this transaction.  In that case, we have to
1365                                 * give up on them and we'll attempt a minlen
1366                                 * allocation the next time around.
1367                                 */
1368                                xfs_btree_del_cursor(cnt_cur,
1369                                                     XFS_BTREE_NOERROR);
1370                                trace_xfs_alloc_size_busy(args);
1371                                if (!forced++)
1372                                        xfs_log_force(args->mp, XFS_LOG_SYNC);
1373                                goto restart;
1374                        }
1375                }
1376        }
1377
1378        /*
1379         * In the first case above, we got the last entry in the
1380         * by-size btree.  Now we check to see if the space hits maxlen
1381         * once aligned; if not, we search left for something better.
1382         * This can't happen in the second case above.
1383         */
1384        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1385        XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1386                        (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1387        if (rlen < args->maxlen) {
1388                xfs_agblock_t   bestfbno;
1389                xfs_extlen_t    bestflen;
1390                xfs_agblock_t   bestrbno;
1391                xfs_extlen_t    bestrlen;
1392
1393                bestrlen = rlen;
1394                bestrbno = rbno;
1395                bestflen = flen;
1396                bestfbno = fbno;
1397                for (;;) {
1398                        if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1399                                goto error0;
1400                        if (i == 0)
1401                                break;
1402                        if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1403                                        &i)))
1404                                goto error0;
1405                        XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1406                        if (flen < bestrlen)
1407                                break;
1408                        xfs_alloc_compute_aligned(args, fbno, flen,
1409                                                  &rbno, &rlen);
1410                        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1411                        XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1412                                (rlen <= flen && rbno + rlen <= fbno + flen),
1413                                error0);
1414                        if (rlen > bestrlen) {
1415                                bestrlen = rlen;
1416                                bestrbno = rbno;
1417                                bestflen = flen;
1418                                bestfbno = fbno;
1419                                if (rlen == args->maxlen)
1420                                        break;
1421                        }
1422                }
1423                if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1424                                &i)))
1425                        goto error0;
1426                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1427                rlen = bestrlen;
1428                rbno = bestrbno;
1429                flen = bestflen;
1430                fbno = bestfbno;
1431        }
1432        args->wasfromfl = 0;
1433        /*
1434         * Fix up the length.
1435         */
1436        args->len = rlen;
1437        if (rlen < args->minlen) {
1438                if (!forced++) {
1439                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1440                        trace_xfs_alloc_size_busy(args);
1441                        xfs_log_force(args->mp, XFS_LOG_SYNC);
1442                        goto restart;
1443                }
1444                goto out_nominleft;
1445        }
1446        xfs_alloc_fix_len(args);
1447
1448        if (!xfs_alloc_fix_minleft(args))
1449                goto out_nominleft;
1450        rlen = args->len;
1451        XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
1452        /*
1453         * Allocate and initialize a cursor for the by-block tree.
1454         */
1455        bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1456                args->agno, XFS_BTNUM_BNO);
1457        if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1458                        rbno, rlen, XFSA_FIXUP_CNT_OK)))
1459                goto error0;
1460        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1461        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1462        cnt_cur = bno_cur = NULL;
1463        args->len = rlen;
1464        args->agbno = rbno;
1465        XFS_WANT_CORRUPTED_GOTO(args->mp,
1466                args->agbno + args->len <=
1467                        be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1468                error0);
1469        trace_xfs_alloc_size_done(args);
1470        return 0;
1471
1472error0:
1473        trace_xfs_alloc_size_error(args);
1474        if (cnt_cur)
1475                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1476        if (bno_cur)
1477                xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1478        return error;
1479
1480out_nominleft:
1481        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1482        trace_xfs_alloc_size_nominleft(args);
1483        args->agbno = NULLAGBLOCK;
1484        return 0;
1485}
1486
1487/*
1488 * Deal with the case where only small freespaces remain.
1489 * Either return the contents of the last freespace record,
1490 * or allocate space from the freelist if there is nothing in the tree.
1491 */
1492STATIC int                      /* error */
1493xfs_alloc_ag_vextent_small(
1494        xfs_alloc_arg_t *args,  /* allocation argument structure */
1495        xfs_btree_cur_t *ccur,  /* by-size cursor */
1496        xfs_agblock_t   *fbnop, /* result block number */
1497        xfs_extlen_t    *flenp, /* result length */
1498        int             *stat)  /* status: 0-freelist, 1-normal/none */
1499{
1500        int             error;
1501        xfs_agblock_t   fbno;
1502        xfs_extlen_t    flen;
1503        int             i;
1504
1505        if ((error = xfs_btree_decrement(ccur, 0, &i)))
1506                goto error0;
1507        if (i) {
1508                if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
1509                        goto error0;
1510                XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1511        }
1512        /*
1513         * Nothing in the btree, try the freelist.  Make sure
1514         * to respect minleft even when pulling from the
1515         * freelist.
1516         */
1517        else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
1518                 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1519                  > args->minleft)) {
1520                error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1521                if (error)
1522                        goto error0;
1523                if (fbno != NULLAGBLOCK) {
1524                        xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
1525                                             args->userdata);
1526
1527                        if (args->userdata) {
1528                                xfs_buf_t       *bp;
1529
1530                                bp = xfs_btree_get_bufs(args->mp, args->tp,
1531                                        args->agno, fbno, 0);
1532                                xfs_trans_binval(args->tp, bp);
1533                        }
1534                        args->len = 1;
1535                        args->agbno = fbno;
1536                        XFS_WANT_CORRUPTED_GOTO(args->mp,
1537                                args->agbno + args->len <=
1538                                be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1539                                error0);
1540                        args->wasfromfl = 1;
1541                        trace_xfs_alloc_small_freelist(args);
1542                        *stat = 0;
1543                        return 0;
1544                }
1545                /*
1546                 * Nothing in the freelist.
1547                 */
1548                else
1549                        flen = 0;
1550        }
1551        /*
1552         * Can't allocate from the freelist for some reason.
1553         */
1554        else {
1555                fbno = NULLAGBLOCK;
1556                flen = 0;
1557        }
1558        /*
1559         * Can't do the allocation, give up.
1560         */
1561        if (flen < args->minlen) {
1562                args->agbno = NULLAGBLOCK;
1563                trace_xfs_alloc_small_notenough(args);
1564                flen = 0;
1565        }
1566        *fbnop = fbno;
1567        *flenp = flen;
1568        *stat = 1;
1569        trace_xfs_alloc_small_done(args);
1570        return 0;
1571
1572error0:
1573        trace_xfs_alloc_small_error(args);
1574        return error;
1575}
1576
1577/*
1578 * Free the extent starting at agno/bno for length.
1579 */
1580STATIC int                      /* error */
1581xfs_free_ag_extent(
1582        xfs_trans_t     *tp,    /* transaction pointer */
1583        xfs_buf_t       *agbp,  /* buffer for a.g. freelist header */
1584        xfs_agnumber_t  agno,   /* allocation group number */
1585        xfs_agblock_t   bno,    /* starting block number */
1586        xfs_extlen_t    len,    /* length of extent */
1587        int             isfl)   /* set if is freelist blocks - no sb acctg */
1588{
1589        xfs_btree_cur_t *bno_cur;       /* cursor for by-block btree */
1590        xfs_btree_cur_t *cnt_cur;       /* cursor for by-size btree */
1591        int             error;          /* error return value */
1592        xfs_agblock_t   gtbno;          /* start of right neighbor block */
1593        xfs_extlen_t    gtlen;          /* length of right neighbor block */
1594        int             haveleft;       /* have a left neighbor block */
1595        int             haveright;      /* have a right neighbor block */
1596        int             i;              /* temp, result code */
1597        xfs_agblock_t   ltbno;          /* start of left neighbor block */
1598        xfs_extlen_t    ltlen;          /* length of left neighbor block */
1599        xfs_mount_t     *mp;            /* mount point struct for filesystem */
1600        xfs_agblock_t   nbno;           /* new starting block of freespace */
1601        xfs_extlen_t    nlen;           /* new length of freespace */
1602        xfs_perag_t     *pag;           /* per allocation group data */
1603
1604        mp = tp->t_mountp;
1605        /*
1606         * Allocate and initialize a cursor for the by-block btree.
1607         */
1608        bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1609        cnt_cur = NULL;
1610        /*
1611         * Look for a neighboring block on the left (lower block numbers)
1612         * that is contiguous with this space.
1613         */
1614        if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1615                goto error0;
1616        if (haveleft) {
1617                /*
1618                 * There is a block to our left.
1619                 */
1620                if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1621                        goto error0;
1622                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1623                /*
1624                 * It's not contiguous, though.
1625                 */
1626                if (ltbno + ltlen < bno)
1627                        haveleft = 0;
1628                else {
1629                        /*
1630                         * If this failure happens the request to free this
1631                         * space was invalid, it's (partly) already free.
1632                         * Very bad.
1633                         */
1634                        XFS_WANT_CORRUPTED_GOTO(mp,
1635                                                ltbno + ltlen <= bno, error0);
1636                }
1637        }
1638        /*
1639         * Look for a neighboring block on the right (higher block numbers)
1640         * that is contiguous with this space.
1641         */
1642        if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1643                goto error0;
1644        if (haveright) {
1645                /*
1646                 * There is a block to our right.
1647                 */
1648                if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1649                        goto error0;
1650                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1651                /*
1652                 * It's not contiguous, though.
1653                 */
1654                if (bno + len < gtbno)
1655                        haveright = 0;
1656                else {
1657                        /*
1658                         * If this failure happens the request to free this
1659                         * space was invalid, it's (partly) already free.
1660                         * Very bad.
1661                         */
1662                        XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0);
1663                }
1664        }
1665        /*
1666         * Now allocate and initialize a cursor for the by-size tree.
1667         */
1668        cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1669        /*
1670         * Have both left and right contiguous neighbors.
1671         * Merge all three into a single free block.
1672         */
1673        if (haveleft && haveright) {
1674                /*
1675                 * Delete the old by-size entry on the left.
1676                 */
1677                if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1678                        goto error0;
1679                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1680                if ((error = xfs_btree_delete(cnt_cur, &i)))
1681                        goto error0;
1682                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1683                /*
1684                 * Delete the old by-size entry on the right.
1685                 */
1686                if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1687                        goto error0;
1688                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1689                if ((error = xfs_btree_delete(cnt_cur, &i)))
1690                        goto error0;
1691                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1692                /*
1693                 * Delete the old by-block entry for the right block.
1694                 */
1695                if ((error = xfs_btree_delete(bno_cur, &i)))
1696                        goto error0;
1697                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1698                /*
1699                 * Move the by-block cursor back to the left neighbor.
1700                 */
1701                if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1702                        goto error0;
1703                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1704#ifdef DEBUG
1705                /*
1706                 * Check that this is the right record: delete didn't
1707                 * mangle the cursor.
1708                 */
1709                {
1710                        xfs_agblock_t   xxbno;
1711                        xfs_extlen_t    xxlen;
1712
1713                        if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1714                                        &i)))
1715                                goto error0;
1716                        XFS_WANT_CORRUPTED_GOTO(mp,
1717                                i == 1 && xxbno == ltbno && xxlen == ltlen,
1718                                error0);
1719                }
1720#endif
1721                /*
1722                 * Update remaining by-block entry to the new, joined block.
1723                 */
1724                nbno = ltbno;
1725                nlen = len + ltlen + gtlen;
1726                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1727                        goto error0;
1728        }
1729        /*
1730         * Have only a left contiguous neighbor.
1731         * Merge it together with the new freespace.
1732         */
1733        else if (haveleft) {
1734                /*
1735                 * Delete the old by-size entry on the left.
1736                 */
1737                if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1738                        goto error0;
1739                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1740                if ((error = xfs_btree_delete(cnt_cur, &i)))
1741                        goto error0;
1742                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1743                /*
1744                 * Back up the by-block cursor to the left neighbor, and
1745                 * update its length.
1746                 */
1747                if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1748                        goto error0;
1749                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1750                nbno = ltbno;
1751                nlen = len + ltlen;
1752                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1753                        goto error0;
1754        }
1755        /*
1756         * Have only a right contiguous neighbor.
1757         * Merge it together with the new freespace.
1758         */
1759        else if (haveright) {
1760                /*
1761                 * Delete the old by-size entry on the right.
1762                 */
1763                if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1764                        goto error0;
1765                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1766                if ((error = xfs_btree_delete(cnt_cur, &i)))
1767                        goto error0;
1768                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1769                /*
1770                 * Update the starting block and length of the right
1771                 * neighbor in the by-block tree.
1772                 */
1773                nbno = bno;
1774                nlen = len + gtlen;
1775                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1776                        goto error0;
1777        }
1778        /*
1779         * No contiguous neighbors.
1780         * Insert the new freespace into the by-block tree.
1781         */
1782        else {
1783                nbno = bno;
1784                nlen = len;
1785                if ((error = xfs_btree_insert(bno_cur, &i)))
1786                        goto error0;
1787                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1788        }
1789        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1790        bno_cur = NULL;
1791        /*
1792         * In all cases we need to insert the new freespace in the by-size tree.
1793         */
1794        if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1795                goto error0;
1796        XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0);
1797        if ((error = xfs_btree_insert(cnt_cur, &i)))
1798                goto error0;
1799        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1800        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1801        cnt_cur = NULL;
1802
1803        /*
1804         * Update the freespace totals in the ag and superblock.
1805         */
1806        pag = xfs_perag_get(mp, agno);
1807        error = xfs_alloc_update_counters(tp, pag, agbp, len);
1808        xfs_perag_put(pag);
1809        if (error)
1810                goto error0;
1811
1812        if (!isfl)
1813                xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1814        XFS_STATS_INC(mp, xs_freex);
1815        XFS_STATS_ADD(mp, xs_freeb, len);
1816
1817        trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
1818
1819        return 0;
1820
1821 error0:
1822        trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1);
1823        if (bno_cur)
1824                xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1825        if (cnt_cur)
1826                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1827        return error;
1828}
1829
1830/*
1831 * Visible (exported) allocation/free functions.
1832 * Some of these are used just by xfs_alloc_btree.c and this file.
1833 */
1834
1835/*
1836 * Compute and fill in value of m_ag_maxlevels.
1837 */
1838void
1839xfs_alloc_compute_maxlevels(
1840        xfs_mount_t     *mp)    /* file system mount structure */
1841{
1842        int             level;
1843        uint            maxblocks;
1844        uint            maxleafents;
1845        int             minleafrecs;
1846        int             minnoderecs;
1847
1848        maxleafents = (mp->m_sb.sb_agblocks + 1) / 2;
1849        minleafrecs = mp->m_alloc_mnr[0];
1850        minnoderecs = mp->m_alloc_mnr[1];
1851        maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
1852        for (level = 1; maxblocks > 1; level++)
1853                maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
1854        mp->m_ag_maxlevels = level;
1855}
1856
1857/*
1858 * Find the length of the longest extent in an AG.
1859 */
1860xfs_extlen_t
1861xfs_alloc_longest_free_extent(
1862        struct xfs_mount        *mp,
1863        struct xfs_perag        *pag,
1864        xfs_extlen_t            need)
1865{
1866        xfs_extlen_t            delta = 0;
1867
1868        if (need > pag->pagf_flcount)
1869                delta = need - pag->pagf_flcount;
1870
1871        if (pag->pagf_longest > delta)
1872                return pag->pagf_longest - delta;
1873        return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1874}
1875
1876unsigned int
1877xfs_alloc_min_freelist(
1878        struct xfs_mount        *mp,
1879        struct xfs_perag        *pag)
1880{
1881        unsigned int            min_free;
1882
1883        /* space needed by-bno freespace btree */
1884        min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
1885                                       mp->m_ag_maxlevels);
1886        /* space needed by-size freespace btree */
1887        min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
1888                                       mp->m_ag_maxlevels);
1889
1890        return min_free;
1891}
1892
1893/*
1894 * Check if the operation we are fixing up the freelist for should go ahead or
1895 * not. If we are freeing blocks, we always allow it, otherwise the allocation
1896 * is dependent on whether the size and shape of free space available will
1897 * permit the requested allocation to take place.
1898 */
1899static bool
1900xfs_alloc_space_available(
1901        struct xfs_alloc_arg    *args,
1902        xfs_extlen_t            min_free,
1903        int                     flags)
1904{
1905        struct xfs_perag        *pag = args->pag;
1906        xfs_extlen_t            longest;
1907        int                     available;
1908
1909        if (flags & XFS_ALLOC_FLAG_FREEING)
1910                return true;
1911
1912        /* do we have enough contiguous free space for the allocation? */
1913        longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free);
1914        if ((args->minlen + args->alignment + args->minalignslop - 1) > longest)
1915                return false;
1916
1917        /* do have enough free space remaining for the allocation? */
1918        available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
1919                          min_free - args->total);
1920        if (available < (int)args->minleft)
1921                return false;
1922
1923        return true;
1924}
1925
1926/*
1927 * Decide whether to use this allocation group for this allocation.
1928 * If so, fix up the btree freelist's size.
1929 */
1930int                     /* error */
1931xfs_alloc_fix_freelist(
1932        struct xfs_alloc_arg    *args,  /* allocation argument structure */
1933        int                     flags)  /* XFS_ALLOC_FLAG_... */
1934{
1935        struct xfs_mount        *mp = args->mp;
1936        struct xfs_perag        *pag = args->pag;
1937        struct xfs_trans        *tp = args->tp;
1938        struct xfs_buf          *agbp = NULL;
1939        struct xfs_buf          *agflbp = NULL;
1940        struct xfs_alloc_arg    targs;  /* local allocation arguments */
1941        xfs_agblock_t           bno;    /* freelist block */
1942        xfs_extlen_t            need;   /* total blocks needed in freelist */
1943        int                     error = 0;
1944
1945        if (!pag->pagf_init) {
1946                error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
1947                if (error)
1948                        goto out_no_agbp;
1949                if (!pag->pagf_init) {
1950                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1951                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1952                        goto out_agbp_relse;
1953                }
1954        }
1955
1956        /*
1957         * If this is a metadata preferred pag and we are user data then try
1958         * somewhere else if we are not being asked to try harder at this
1959         * point
1960         */
1961        if (pag->pagf_metadata && args->userdata &&
1962            (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
1963                ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1964                goto out_agbp_relse;
1965        }
1966
1967        need = xfs_alloc_min_freelist(mp, pag);
1968        if (!xfs_alloc_space_available(args, need, flags))
1969                goto out_agbp_relse;
1970
1971        /*
1972         * Get the a.g. freespace buffer.
1973         * Can fail if we're not blocking on locks, and it's held.
1974         */
1975        if (!agbp) {
1976                error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
1977                if (error)
1978                        goto out_no_agbp;
1979                if (!agbp) {
1980                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1981                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1982                        goto out_no_agbp;
1983                }
1984        }
1985
1986        /* If there isn't enough total space or single-extent, reject it. */
1987        need = xfs_alloc_min_freelist(mp, pag);
1988        if (!xfs_alloc_space_available(args, need, flags))
1989                goto out_agbp_relse;
1990
1991        /*
1992         * Make the freelist shorter if it's too long.
1993         *
1994         * Note that from this point onwards, we will always release the agf and
1995         * agfl buffers on error. This handles the case where we error out and
1996         * the buffers are clean or may not have been joined to the transaction
1997         * and hence need to be released manually. If they have been joined to
1998         * the transaction, then xfs_trans_brelse() will handle them
1999         * appropriately based on the recursion count and dirty state of the
2000         * buffer.
2001         *
2002         * XXX (dgc): When we have lots of free space, does this buy us
2003         * anything other than extra overhead when we need to put more blocks
2004         * back on the free list? Maybe we should only do this when space is
2005         * getting low or the AGFL is more than half full?
2006         */
2007        while (pag->pagf_flcount > need) {
2008                struct xfs_buf  *bp;
2009
2010                error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2011                if (error)
2012                        goto out_agbp_relse;
2013                error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1);
2014                if (error)
2015                        goto out_agbp_relse;
2016                bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
2017                xfs_trans_binval(tp, bp);
2018        }
2019
2020        memset(&targs, 0, sizeof(targs));
2021        targs.tp = tp;
2022        targs.mp = mp;
2023        targs.agbp = agbp;
2024        targs.agno = args->agno;
2025        targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
2026        targs.type = XFS_ALLOCTYPE_THIS_AG;
2027        targs.pag = pag;
2028        error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
2029        if (error)
2030                goto out_agbp_relse;
2031
2032        /* Make the freelist longer if it's too short. */
2033        while (pag->pagf_flcount < need) {
2034                targs.agbno = 0;
2035                targs.maxlen = need - pag->pagf_flcount;
2036
2037                /* Allocate as many blocks as possible at once. */
2038                error = xfs_alloc_ag_vextent(&targs);
2039                if (error)
2040                        goto out_agflbp_relse;
2041
2042                /*
2043                 * Stop if we run out.  Won't happen if callers are obeying
2044                 * the restrictions correctly.  Can happen for free calls
2045                 * on a completely full ag.
2046                 */
2047                if (targs.agbno == NULLAGBLOCK) {
2048                        if (flags & XFS_ALLOC_FLAG_FREEING)
2049                                break;
2050                        goto out_agflbp_relse;
2051                }
2052                /*
2053                 * Put each allocated block on the list.
2054                 */
2055                for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
2056                        error = xfs_alloc_put_freelist(tp, agbp,
2057                                                        agflbp, bno, 0);
2058                        if (error)
2059                                goto out_agflbp_relse;
2060                }
2061        }
2062        xfs_trans_brelse(tp, agflbp);
2063        args->agbp = agbp;
2064        return 0;
2065
2066out_agflbp_relse:
2067        xfs_trans_brelse(tp, agflbp);
2068out_agbp_relse:
2069        if (agbp)
2070                xfs_trans_brelse(tp, agbp);
2071out_no_agbp:
2072        args->agbp = NULL;
2073        return error;
2074}
2075
2076/*
2077 * Get a block from the freelist.
2078 * Returns with the buffer for the block gotten.
2079 */
2080int                             /* error */
2081xfs_alloc_get_freelist(
2082        xfs_trans_t     *tp,    /* transaction pointer */
2083        xfs_buf_t       *agbp,  /* buffer containing the agf structure */
2084        xfs_agblock_t   *bnop,  /* block address retrieved from freelist */
2085        int             btreeblk) /* destination is a AGF btree */
2086{
2087        xfs_agf_t       *agf;   /* a.g. freespace structure */
2088        xfs_buf_t       *agflbp;/* buffer for a.g. freelist structure */
2089        xfs_agblock_t   bno;    /* block number returned */
2090        __be32          *agfl_bno;
2091        int             error;
2092        int             logflags;
2093        xfs_mount_t     *mp = tp->t_mountp;
2094        xfs_perag_t     *pag;   /* per allocation group data */
2095
2096        /*
2097         * Freelist is empty, give up.
2098         */
2099        agf = XFS_BUF_TO_AGF(agbp);
2100        if (!agf->agf_flcount) {
2101                *bnop = NULLAGBLOCK;
2102                return 0;
2103        }
2104        /*
2105         * Read the array of free blocks.
2106         */
2107        error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2108                                    &agflbp);
2109        if (error)
2110                return error;
2111
2112
2113        /*
2114         * Get the block number and update the data structures.
2115         */
2116        agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2117        bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2118        be32_add_cpu(&agf->agf_flfirst, 1);
2119        xfs_trans_brelse(tp, agflbp);
2120        if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
2121                agf->agf_flfirst = 0;
2122
2123        pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2124        be32_add_cpu(&agf->agf_flcount, -1);
2125        xfs_trans_agflist_delta(tp, -1);
2126        pag->pagf_flcount--;
2127        xfs_perag_put(pag);
2128
2129        logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2130        if (btreeblk) {
2131                be32_add_cpu(&agf->agf_btreeblks, 1);
2132                pag->pagf_btreeblks++;
2133                logflags |= XFS_AGF_BTREEBLKS;
2134        }
2135
2136        xfs_alloc_log_agf(tp, agbp, logflags);
2137        *bnop = bno;
2138
2139        return 0;
2140}
2141
2142/*
2143 * Log the given fields from the agf structure.
2144 */
2145void
2146xfs_alloc_log_agf(
2147        xfs_trans_t     *tp,    /* transaction pointer */
2148        xfs_buf_t       *bp,    /* buffer for a.g. freelist header */
2149        int             fields) /* mask of fields to be logged (XFS_AGF_...) */
2150{
2151        int     first;          /* first byte offset */
2152        int     last;           /* last byte offset */
2153        static const short      offsets[] = {
2154                offsetof(xfs_agf_t, agf_magicnum),
2155                offsetof(xfs_agf_t, agf_versionnum),
2156                offsetof(xfs_agf_t, agf_seqno),
2157                offsetof(xfs_agf_t, agf_length),
2158                offsetof(xfs_agf_t, agf_roots[0]),
2159                offsetof(xfs_agf_t, agf_levels[0]),
2160                offsetof(xfs_agf_t, agf_flfirst),
2161                offsetof(xfs_agf_t, agf_fllast),
2162                offsetof(xfs_agf_t, agf_flcount),
2163                offsetof(xfs_agf_t, agf_freeblks),
2164                offsetof(xfs_agf_t, agf_longest),
2165                offsetof(xfs_agf_t, agf_btreeblks),
2166                offsetof(xfs_agf_t, agf_uuid),
2167                sizeof(xfs_agf_t)
2168        };
2169
2170        trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2171
2172        xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
2173
2174        xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2175        xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2176}
2177
2178/*
2179 * Interface for inode allocation to force the pag data to be initialized.
2180 */
2181int                                     /* error */
2182xfs_alloc_pagf_init(
2183        xfs_mount_t             *mp,    /* file system mount structure */
2184        xfs_trans_t             *tp,    /* transaction pointer */
2185        xfs_agnumber_t          agno,   /* allocation group number */
2186        int                     flags)  /* XFS_ALLOC_FLAGS_... */
2187{
2188        xfs_buf_t               *bp;
2189        int                     error;
2190
2191        if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2192                return error;
2193        if (bp)
2194                xfs_trans_brelse(tp, bp);
2195        return 0;
2196}
2197
2198/*
2199 * Put the block on the freelist for the allocation group.
2200 */
2201int                                     /* error */
2202xfs_alloc_put_freelist(
2203        xfs_trans_t             *tp,    /* transaction pointer */
2204        xfs_buf_t               *agbp,  /* buffer for a.g. freelist header */
2205        xfs_buf_t               *agflbp,/* buffer for a.g. free block array */
2206        xfs_agblock_t           bno,    /* block being freed */
2207        int                     btreeblk) /* block came from a AGF btree */
2208{
2209        xfs_agf_t               *agf;   /* a.g. freespace structure */
2210        __be32                  *blockp;/* pointer to array entry */
2211        int                     error;
2212        int                     logflags;
2213        xfs_mount_t             *mp;    /* mount structure */
2214        xfs_perag_t             *pag;   /* per allocation group data */
2215        __be32                  *agfl_bno;
2216        int                     startoff;
2217
2218        agf = XFS_BUF_TO_AGF(agbp);
2219        mp = tp->t_mountp;
2220
2221        if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
2222                        be32_to_cpu(agf->agf_seqno), &agflbp)))
2223                return error;
2224        be32_add_cpu(&agf->agf_fllast, 1);
2225        if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
2226                agf->agf_fllast = 0;
2227
2228        pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2229        be32_add_cpu(&agf->agf_flcount, 1);
2230        xfs_trans_agflist_delta(tp, 1);
2231        pag->pagf_flcount++;
2232
2233        logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2234        if (btreeblk) {
2235                be32_add_cpu(&agf->agf_btreeblks, -1);
2236                pag->pagf_btreeblks--;
2237                logflags |= XFS_AGF_BTREEBLKS;
2238        }
2239        xfs_perag_put(pag);
2240
2241        xfs_alloc_log_agf(tp, agbp, logflags);
2242
2243        ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2244
2245        agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2246        blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
2247        *blockp = cpu_to_be32(bno);
2248        startoff = (char *)blockp - (char *)agflbp->b_addr;
2249
2250        xfs_alloc_log_agf(tp, agbp, logflags);
2251
2252        xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
2253        xfs_trans_log_buf(tp, agflbp, startoff,
2254                          startoff + sizeof(xfs_agblock_t) - 1);
2255        return 0;
2256}
2257
2258static bool
2259xfs_agf_verify(
2260        struct xfs_mount *mp,
2261        struct xfs_buf  *bp)
2262 {
2263        struct xfs_agf  *agf = XFS_BUF_TO_AGF(bp);
2264
2265        if (xfs_sb_version_hascrc(&mp->m_sb)) {
2266                if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
2267                        return false;
2268                if (!xfs_log_check_lsn(mp,
2269                                be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
2270                        return false;
2271        }
2272
2273        if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
2274              XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2275              be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2276              be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2277              be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2278              be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
2279                return false;
2280
2281        if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
2282            be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
2283                return false;
2284
2285        /*
2286         * during growfs operations, the perag is not fully initialised,
2287         * so we can't use it for any useful checking. growfs ensures we can't
2288         * use it by using uncached buffers that don't have the perag attached
2289         * so we can detect and avoid this problem.
2290         */
2291        if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
2292                return false;
2293
2294        if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2295            be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
2296                return false;
2297
2298        return true;;
2299
2300}
2301
2302static void
2303xfs_agf_read_verify(
2304        struct xfs_buf  *bp)
2305{
2306        struct xfs_mount *mp = bp->b_target->bt_mount;
2307
2308        if (xfs_sb_version_hascrc(&mp->m_sb) &&
2309            !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
2310                xfs_buf_ioerror(bp, -EFSBADCRC);
2311        else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
2312                                XFS_ERRTAG_ALLOC_READ_AGF,
2313                                XFS_RANDOM_ALLOC_READ_AGF))
2314                xfs_buf_ioerror(bp, -EFSCORRUPTED);
2315
2316        if (bp->b_error)
2317                xfs_verifier_error(bp);
2318}
2319
2320static void
2321xfs_agf_write_verify(
2322        struct xfs_buf  *bp)
2323{
2324        struct xfs_mount *mp = bp->b_target->bt_mount;
2325        struct xfs_buf_log_item *bip = bp->b_fspriv;
2326
2327        if (!xfs_agf_verify(mp, bp)) {
2328                xfs_buf_ioerror(bp, -EFSCORRUPTED);
2329                xfs_verifier_error(bp);
2330                return;
2331        }
2332
2333        if (!xfs_sb_version_hascrc(&mp->m_sb))
2334                return;
2335
2336        if (bip)
2337                XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2338
2339        xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
2340}
2341
2342const struct xfs_buf_ops xfs_agf_buf_ops = {
2343        .name = "xfs_agf",
2344        .verify_read = xfs_agf_read_verify,
2345        .verify_write = xfs_agf_write_verify,
2346};
2347
2348/*
2349 * Read in the allocation group header (free/alloc section).
2350 */
2351int                                     /* error */
2352xfs_read_agf(
2353        struct xfs_mount        *mp,    /* mount point structure */
2354        struct xfs_trans        *tp,    /* transaction pointer */
2355        xfs_agnumber_t          agno,   /* allocation group number */
2356        int                     flags,  /* XFS_BUF_ */
2357        struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
2358{
2359        int             error;
2360
2361        trace_xfs_read_agf(mp, agno);
2362
2363        ASSERT(agno != NULLAGNUMBER);
2364        error = xfs_trans_read_buf(
2365                        mp, tp, mp->m_ddev_targp,
2366                        XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2367                        XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
2368        if (error)
2369                return error;
2370        if (!*bpp)
2371                return 0;
2372
2373        ASSERT(!(*bpp)->b_error);
2374        xfs_buf_set_ref(*bpp, XFS_AGF_REF);
2375        return 0;
2376}
2377
2378/*
2379 * Read in the allocation group header (free/alloc section).
2380 */
2381int                                     /* error */
2382xfs_alloc_read_agf(
2383        struct xfs_mount        *mp,    /* mount point structure */
2384        struct xfs_trans        *tp,    /* transaction pointer */
2385        xfs_agnumber_t          agno,   /* allocation group number */
2386        int                     flags,  /* XFS_ALLOC_FLAG_... */
2387        struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
2388{
2389        struct xfs_agf          *agf;           /* ag freelist header */
2390        struct xfs_perag        *pag;           /* per allocation group data */
2391        int                     error;
2392
2393        trace_xfs_alloc_read_agf(mp, agno);
2394
2395        ASSERT(agno != NULLAGNUMBER);
2396        error = xfs_read_agf(mp, tp, agno,
2397                        (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2398                        bpp);
2399        if (error)
2400                return error;
2401        if (!*bpp)
2402                return 0;
2403        ASSERT(!(*bpp)->b_error);
2404
2405        agf = XFS_BUF_TO_AGF(*bpp);
2406        pag = xfs_perag_get(mp, agno);
2407        if (!pag->pagf_init) {
2408                pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
2409                pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
2410                pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2411                pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2412                pag->pagf_levels[XFS_BTNUM_BNOi] =
2413                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2414                pag->pagf_levels[XFS_BTNUM_CNTi] =
2415                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
2416                spin_lock_init(&pag->pagb_lock);
2417                pag->pagb_count = 0;
2418                pag->pagb_tree = RB_ROOT;
2419                pag->pagf_init = 1;
2420        }
2421#ifdef DEBUG
2422        else if (!XFS_FORCED_SHUTDOWN(mp)) {
2423                ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
2424                ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
2425                ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2426                ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2427                ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
2428                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2429                ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
2430                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2431        }
2432#endif
2433        xfs_perag_put(pag);
2434        return 0;
2435}
2436
2437/*
2438 * Allocate an extent (variable-size).
2439 * Depending on the allocation type, we either look in a single allocation
2440 * group or loop over the allocation groups to find the result.
2441 */
2442int                             /* error */
2443xfs_alloc_vextent(
2444        xfs_alloc_arg_t *args)  /* allocation argument structure */
2445{
2446        xfs_agblock_t   agsize; /* allocation group size */
2447        int             error;
2448        int             flags;  /* XFS_ALLOC_FLAG_... locking flags */
2449        xfs_extlen_t    minleft;/* minimum left value, temp copy */
2450        xfs_mount_t     *mp;    /* mount structure pointer */
2451        xfs_agnumber_t  sagno;  /* starting allocation group number */
2452        xfs_alloctype_t type;   /* input allocation type */
2453        int             bump_rotor = 0;
2454        int             no_min = 0;
2455        xfs_agnumber_t  rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2456
2457        mp = args->mp;
2458        type = args->otype = args->type;
2459        args->agbno = NULLAGBLOCK;
2460        /*
2461         * Just fix this up, for the case where the last a.g. is shorter
2462         * (or there's only one a.g.) and the caller couldn't easily figure
2463         * that out (xfs_bmap_alloc).
2464         */
2465        agsize = mp->m_sb.sb_agblocks;
2466        if (args->maxlen > agsize)
2467                args->maxlen = agsize;
2468        if (args->alignment == 0)
2469                args->alignment = 1;
2470        ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2471        ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2472        ASSERT(args->minlen <= args->maxlen);
2473        ASSERT(args->minlen <= agsize);
2474        ASSERT(args->mod < args->prod);
2475        if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2476            XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2477            args->minlen > args->maxlen || args->minlen > agsize ||
2478            args->mod >= args->prod) {
2479                args->fsbno = NULLFSBLOCK;
2480                trace_xfs_alloc_vextent_badargs(args);
2481                return 0;
2482        }
2483        minleft = args->minleft;
2484
2485        switch (type) {
2486        case XFS_ALLOCTYPE_THIS_AG:
2487        case XFS_ALLOCTYPE_NEAR_BNO:
2488        case XFS_ALLOCTYPE_THIS_BNO:
2489                /*
2490                 * These three force us into a single a.g.
2491                 */
2492                args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2493                args->pag = xfs_perag_get(mp, args->agno);
2494                args->minleft = 0;
2495                error = xfs_alloc_fix_freelist(args, 0);
2496                args->minleft = minleft;
2497                if (error) {
2498                        trace_xfs_alloc_vextent_nofix(args);
2499                        goto error0;
2500                }
2501                if (!args->agbp) {
2502                        trace_xfs_alloc_vextent_noagbp(args);
2503                        break;
2504                }
2505                args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2506                if ((error = xfs_alloc_ag_vextent(args)))
2507                        goto error0;
2508                break;
2509        case XFS_ALLOCTYPE_START_BNO:
2510                /*
2511                 * Try near allocation first, then anywhere-in-ag after
2512                 * the first a.g. fails.
2513                 */
2514                if ((args->userdata & XFS_ALLOC_INITIAL_USER_DATA) &&
2515                    (mp->m_flags & XFS_MOUNT_32BITINODES)) {
2516                        args->fsbno = XFS_AGB_TO_FSB(mp,
2517                                        ((mp->m_agfrotor / rotorstep) %
2518                                        mp->m_sb.sb_agcount), 0);
2519                        bump_rotor = 1;
2520                }
2521                args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2522                args->type = XFS_ALLOCTYPE_NEAR_BNO;
2523                /* FALLTHROUGH */
2524        case XFS_ALLOCTYPE_ANY_AG:
2525        case XFS_ALLOCTYPE_START_AG:
2526        case XFS_ALLOCTYPE_FIRST_AG:
2527                /*
2528                 * Rotate through the allocation groups looking for a winner.
2529                 */
2530                if (type == XFS_ALLOCTYPE_ANY_AG) {
2531                        /*
2532                         * Start with the last place we left off.
2533                         */
2534                        args->agno = sagno = (mp->m_agfrotor / rotorstep) %
2535                                        mp->m_sb.sb_agcount;
2536                        args->type = XFS_ALLOCTYPE_THIS_AG;
2537                        flags = XFS_ALLOC_FLAG_TRYLOCK;
2538                } else if (type == XFS_ALLOCTYPE_FIRST_AG) {
2539                        /*
2540                         * Start with allocation group given by bno.
2541                         */
2542                        args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2543                        args->type = XFS_ALLOCTYPE_THIS_AG;
2544                        sagno = 0;
2545                        flags = 0;
2546                } else {
2547                        if (type == XFS_ALLOCTYPE_START_AG)
2548                                args->type = XFS_ALLOCTYPE_THIS_AG;
2549                        /*
2550                         * Start with the given allocation group.
2551                         */
2552                        args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2553                        flags = XFS_ALLOC_FLAG_TRYLOCK;
2554                }
2555                /*
2556                 * Loop over allocation groups twice; first time with
2557                 * trylock set, second time without.
2558                 */
2559                for (;;) {
2560                        args->pag = xfs_perag_get(mp, args->agno);
2561                        if (no_min) args->minleft = 0;
2562                        error = xfs_alloc_fix_freelist(args, flags);
2563                        args->minleft = minleft;
2564                        if (error) {
2565                                trace_xfs_alloc_vextent_nofix(args);
2566                                goto error0;
2567                        }
2568                        /*
2569                         * If we get a buffer back then the allocation will fly.
2570                         */
2571                        if (args->agbp) {
2572                                if ((error = xfs_alloc_ag_vextent(args)))
2573                                        goto error0;
2574                                break;
2575                        }
2576
2577                        trace_xfs_alloc_vextent_loopfailed(args);
2578
2579                        /*
2580                         * Didn't work, figure out the next iteration.
2581                         */
2582                        if (args->agno == sagno &&
2583                            type == XFS_ALLOCTYPE_START_BNO)
2584                                args->type = XFS_ALLOCTYPE_THIS_AG;
2585                        /*
2586                        * For the first allocation, we can try any AG to get
2587                        * space.  However, if we already have allocated a
2588                        * block, we don't want to try AGs whose number is below
2589                        * sagno. Otherwise, we may end up with out-of-order
2590                        * locking of AGF, which might cause deadlock.
2591                        */
2592                        if (++(args->agno) == mp->m_sb.sb_agcount) {
2593                                if (args->firstblock != NULLFSBLOCK)
2594                                        args->agno = sagno;
2595                                else
2596                                        args->agno = 0;
2597                        }
2598                        /*
2599                         * Reached the starting a.g., must either be done
2600                         * or switch to non-trylock mode.
2601                         */
2602                        if (args->agno == sagno) {
2603                                if (no_min == 1) {
2604                                        args->agbno = NULLAGBLOCK;
2605                                        trace_xfs_alloc_vextent_allfailed(args);
2606                                        break;
2607                                }
2608                                if (flags == 0) {
2609                                        no_min = 1;
2610                                } else {
2611                                        flags = 0;
2612                                        if (type == XFS_ALLOCTYPE_START_BNO) {
2613                                                args->agbno = XFS_FSB_TO_AGBNO(mp,
2614                                                        args->fsbno);
2615                                                args->type = XFS_ALLOCTYPE_NEAR_BNO;
2616                                        }
2617                                }
2618                        }
2619                        xfs_perag_put(args->pag);
2620                }
2621                if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2622                        if (args->agno == sagno)
2623                                mp->m_agfrotor = (mp->m_agfrotor + 1) %
2624                                        (mp->m_sb.sb_agcount * rotorstep);
2625                        else
2626                                mp->m_agfrotor = (args->agno * rotorstep + 1) %
2627                                        (mp->m_sb.sb_agcount * rotorstep);
2628                }
2629                break;
2630        default:
2631                ASSERT(0);
2632                /* NOTREACHED */
2633        }
2634        if (args->agbno == NULLAGBLOCK)
2635                args->fsbno = NULLFSBLOCK;
2636        else {
2637                args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2638#ifdef DEBUG
2639                ASSERT(args->len >= args->minlen);
2640                ASSERT(args->len <= args->maxlen);
2641                ASSERT(args->agbno % args->alignment == 0);
2642                XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2643                        args->len);
2644#endif
2645
2646                /* Zero the extent if we were asked to do so */
2647                if (args->userdata & XFS_ALLOC_USERDATA_ZERO) {
2648                        error = xfs_zero_extent(args->ip, args->fsbno, args->len);
2649                        if (error)
2650                                goto error0;
2651                }
2652
2653        }
2654        xfs_perag_put(args->pag);
2655        return 0;
2656error0:
2657        xfs_perag_put(args->pag);
2658        return error;
2659}
2660
2661/*
2662 * Free an extent.
2663 * Just break up the extent address and hand off to xfs_free_ag_extent
2664 * after fixing up the freelist.
2665 */
2666int                             /* error */
2667xfs_free_extent(
2668        xfs_trans_t     *tp,    /* transaction pointer */
2669        xfs_fsblock_t   bno,    /* starting block number of extent */
2670        xfs_extlen_t    len)    /* length of extent */
2671{
2672        xfs_alloc_arg_t args;
2673        int             error;
2674
2675        ASSERT(len != 0);
2676        memset(&args, 0, sizeof(xfs_alloc_arg_t));
2677        args.tp = tp;
2678        args.mp = tp->t_mountp;
2679
2680        /*
2681         * validate that the block number is legal - the enables us to detect
2682         * and handle a silent filesystem corruption rather than crashing.
2683         */
2684        args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2685        if (args.agno >= args.mp->m_sb.sb_agcount)
2686                return -EFSCORRUPTED;
2687
2688        args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
2689        if (args.agbno >= args.mp->m_sb.sb_agblocks)
2690                return -EFSCORRUPTED;
2691
2692        args.pag = xfs_perag_get(args.mp, args.agno);
2693        ASSERT(args.pag);
2694
2695        error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
2696        if (error)
2697                goto error0;
2698
2699        /* validate the extent size is legal now we have the agf locked */
2700        if (args.agbno + len >
2701                        be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) {
2702                error = -EFSCORRUPTED;
2703                goto error0;
2704        }
2705
2706        error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
2707        if (!error)
2708                xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0);
2709error0:
2710        xfs_perag_put(args.pag);
2711        return error;
2712}
2713