linux/fs/xfs/xfs_alloc.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_format.h"
  21#include "xfs_log_format.h"
  22#include "xfs_shared.h"
  23#include "xfs_trans_resv.h"
  24#include "xfs_bit.h"
  25#include "xfs_sb.h"
  26#include "xfs_ag.h"
  27#include "xfs_mount.h"
  28#include "xfs_inode.h"
  29#include "xfs_btree.h"
  30#include "xfs_alloc_btree.h"
  31#include "xfs_alloc.h"
  32#include "xfs_extent_busy.h"
  33#include "xfs_error.h"
  34#include "xfs_cksum.h"
  35#include "xfs_trace.h"
  36#include "xfs_trans.h"
  37#include "xfs_buf_item.h"
  38#include "xfs_log.h"
  39
  40struct workqueue_struct *xfs_alloc_wq;
  41
  42#define XFS_ABSDIFF(a,b)        (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
  43
  44#define XFSA_FIXUP_BNO_OK       1
  45#define XFSA_FIXUP_CNT_OK       2
  46
  47STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
  48STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
  49STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
  50STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
  51                xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
  52
  53/*
  54 * Lookup the record equal to [bno, len] in the btree given by cur.
  55 */
  56STATIC int                              /* error */
  57xfs_alloc_lookup_eq(
  58        struct xfs_btree_cur    *cur,   /* btree cursor */
  59        xfs_agblock_t           bno,    /* starting block of extent */
  60        xfs_extlen_t            len,    /* length of extent */
  61        int                     *stat)  /* success/failure */
  62{
  63        cur->bc_rec.a.ar_startblock = bno;
  64        cur->bc_rec.a.ar_blockcount = len;
  65        return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
  66}
  67
  68/*
  69 * Lookup the first record greater than or equal to [bno, len]
  70 * in the btree given by cur.
  71 */
  72int                             /* error */
  73xfs_alloc_lookup_ge(
  74        struct xfs_btree_cur    *cur,   /* btree cursor */
  75        xfs_agblock_t           bno,    /* starting block of extent */
  76        xfs_extlen_t            len,    /* length of extent */
  77        int                     *stat)  /* success/failure */
  78{
  79        cur->bc_rec.a.ar_startblock = bno;
  80        cur->bc_rec.a.ar_blockcount = len;
  81        return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
  82}
  83
  84/*
  85 * Lookup the first record less than or equal to [bno, len]
  86 * in the btree given by cur.
  87 */
  88int                                     /* error */
  89xfs_alloc_lookup_le(
  90        struct xfs_btree_cur    *cur,   /* btree cursor */
  91        xfs_agblock_t           bno,    /* starting block of extent */
  92        xfs_extlen_t            len,    /* length of extent */
  93        int                     *stat)  /* success/failure */
  94{
  95        cur->bc_rec.a.ar_startblock = bno;
  96        cur->bc_rec.a.ar_blockcount = len;
  97        return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
  98}
  99
 100/*
 101 * Update the record referred to by cur to the value given
 102 * by [bno, len].
 103 * This either works (return 0) or gets an EFSCORRUPTED error.
 104 */
 105STATIC int                              /* error */
 106xfs_alloc_update(
 107        struct xfs_btree_cur    *cur,   /* btree cursor */
 108        xfs_agblock_t           bno,    /* starting block of extent */
 109        xfs_extlen_t            len)    /* length of extent */
 110{
 111        union xfs_btree_rec     rec;
 112
 113        rec.alloc.ar_startblock = cpu_to_be32(bno);
 114        rec.alloc.ar_blockcount = cpu_to_be32(len);
 115        return xfs_btree_update(cur, &rec);
 116}
 117
 118/*
 119 * Get the data from the pointed-to record.
 120 */
 121int                                     /* error */
 122xfs_alloc_get_rec(
 123        struct xfs_btree_cur    *cur,   /* btree cursor */
 124        xfs_agblock_t           *bno,   /* output: starting block of extent */
 125        xfs_extlen_t            *len,   /* output: length of extent */
 126        int                     *stat)  /* output: success/failure */
 127{
 128        union xfs_btree_rec     *rec;
 129        int                     error;
 130
 131        error = xfs_btree_get_rec(cur, &rec, stat);
 132        if (!error && *stat == 1) {
 133                *bno = be32_to_cpu(rec->alloc.ar_startblock);
 134                *len = be32_to_cpu(rec->alloc.ar_blockcount);
 135        }
 136        return error;
 137}
 138
 139/*
 140 * Compute aligned version of the found extent.
 141 * Takes alignment and min length into account.
 142 */
 143STATIC void
 144xfs_alloc_compute_aligned(
 145        xfs_alloc_arg_t *args,          /* allocation argument structure */
 146        xfs_agblock_t   foundbno,       /* starting block in found extent */
 147        xfs_extlen_t    foundlen,       /* length in found extent */
 148        xfs_agblock_t   *resbno,        /* result block number */
 149        xfs_extlen_t    *reslen)        /* result length */
 150{
 151        xfs_agblock_t   bno;
 152        xfs_extlen_t    len;
 153
 154        /* Trim busy sections out of found extent */
 155        xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
 156
 157        if (args->alignment > 1 && len >= args->minlen) {
 158                xfs_agblock_t   aligned_bno = roundup(bno, args->alignment);
 159                xfs_extlen_t    diff = aligned_bno - bno;
 160
 161                *resbno = aligned_bno;
 162                *reslen = diff >= len ? 0 : len - diff;
 163        } else {
 164                *resbno = bno;
 165                *reslen = len;
 166        }
 167}
 168
 169/*
 170 * Compute best start block and diff for "near" allocations.
 171 * freelen >= wantlen already checked by caller.
 172 */
 173STATIC xfs_extlen_t                     /* difference value (absolute) */
 174xfs_alloc_compute_diff(
 175        xfs_agblock_t   wantbno,        /* target starting block */
 176        xfs_extlen_t    wantlen,        /* target length */
 177        xfs_extlen_t    alignment,      /* target alignment */
 178        char            userdata,       /* are we allocating data? */
 179        xfs_agblock_t   freebno,        /* freespace's starting block */
 180        xfs_extlen_t    freelen,        /* freespace's length */
 181        xfs_agblock_t   *newbnop)       /* result: best start block from free */
 182{
 183        xfs_agblock_t   freeend;        /* end of freespace extent */
 184        xfs_agblock_t   newbno1;        /* return block number */
 185        xfs_agblock_t   newbno2;        /* other new block number */
 186        xfs_extlen_t    newlen1=0;      /* length with newbno1 */
 187        xfs_extlen_t    newlen2=0;      /* length with newbno2 */
 188        xfs_agblock_t   wantend;        /* end of target extent */
 189
 190        ASSERT(freelen >= wantlen);
 191        freeend = freebno + freelen;
 192        wantend = wantbno + wantlen;
 193        /*
 194         * We want to allocate from the start of a free extent if it is past
 195         * the desired block or if we are allocating user data and the free
 196         * extent is before desired block. The second case is there to allow
 197         * for contiguous allocation from the remaining free space if the file
 198         * grows in the short term.
 199         */
 200        if (freebno >= wantbno || (userdata && freeend < wantend)) {
 201                if ((newbno1 = roundup(freebno, alignment)) >= freeend)
 202                        newbno1 = NULLAGBLOCK;
 203        } else if (freeend >= wantend && alignment > 1) {
 204                newbno1 = roundup(wantbno, alignment);
 205                newbno2 = newbno1 - alignment;
 206                if (newbno1 >= freeend)
 207                        newbno1 = NULLAGBLOCK;
 208                else
 209                        newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
 210                if (newbno2 < freebno)
 211                        newbno2 = NULLAGBLOCK;
 212                else
 213                        newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
 214                if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
 215                        if (newlen1 < newlen2 ||
 216                            (newlen1 == newlen2 &&
 217                             XFS_ABSDIFF(newbno1, wantbno) >
 218                             XFS_ABSDIFF(newbno2, wantbno)))
 219                                newbno1 = newbno2;
 220                } else if (newbno2 != NULLAGBLOCK)
 221                        newbno1 = newbno2;
 222        } else if (freeend >= wantend) {
 223                newbno1 = wantbno;
 224        } else if (alignment > 1) {
 225                newbno1 = roundup(freeend - wantlen, alignment);
 226                if (newbno1 > freeend - wantlen &&
 227                    newbno1 - alignment >= freebno)
 228                        newbno1 -= alignment;
 229                else if (newbno1 >= freeend)
 230                        newbno1 = NULLAGBLOCK;
 231        } else
 232                newbno1 = freeend - wantlen;
 233        *newbnop = newbno1;
 234        return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
 235}
 236
 237/*
 238 * Fix up the length, based on mod and prod.
 239 * len should be k * prod + mod for some k.
 240 * If len is too small it is returned unchanged.
 241 * If len hits maxlen it is left alone.
 242 */
 243STATIC void
 244xfs_alloc_fix_len(
 245        xfs_alloc_arg_t *args)          /* allocation argument structure */
 246{
 247        xfs_extlen_t    k;
 248        xfs_extlen_t    rlen;
 249
 250        ASSERT(args->mod < args->prod);
 251        rlen = args->len;
 252        ASSERT(rlen >= args->minlen);
 253        ASSERT(rlen <= args->maxlen);
 254        if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
 255            (args->mod == 0 && rlen < args->prod))
 256                return;
 257        k = rlen % args->prod;
 258        if (k == args->mod)
 259                return;
 260        if (k > args->mod)
 261                rlen = rlen - (k - args->mod);
 262        else
 263                rlen = rlen - args->prod + (args->mod - k);
 264        if ((int)rlen < (int)args->minlen)
 265                return;
 266        ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
 267        ASSERT(rlen % args->prod == args->mod);
 268        args->len = rlen;
 269}
 270
 271/*
 272 * Fix up length if there is too little space left in the a.g.
 273 * Return 1 if ok, 0 if too little, should give up.
 274 */
 275STATIC int
 276xfs_alloc_fix_minleft(
 277        xfs_alloc_arg_t *args)          /* allocation argument structure */
 278{
 279        xfs_agf_t       *agf;           /* a.g. freelist header */
 280        int             diff;           /* free space difference */
 281
 282        if (args->minleft == 0)
 283                return 1;
 284        agf = XFS_BUF_TO_AGF(args->agbp);
 285        diff = be32_to_cpu(agf->agf_freeblks)
 286                - args->len - args->minleft;
 287        if (diff >= 0)
 288                return 1;
 289        args->len += diff;              /* shrink the allocated space */
 290        if (args->len >= args->minlen)
 291                return 1;
 292        args->agbno = NULLAGBLOCK;
 293        return 0;
 294}
 295
 296/*
 297 * Update the two btrees, logically removing from freespace the extent
 298 * starting at rbno, rlen blocks.  The extent is contained within the
 299 * actual (current) free extent fbno for flen blocks.
 300 * Flags are passed in indicating whether the cursors are set to the
 301 * relevant records.
 302 */
 303STATIC int                              /* error code */
 304xfs_alloc_fixup_trees(
 305        xfs_btree_cur_t *cnt_cur,       /* cursor for by-size btree */
 306        xfs_btree_cur_t *bno_cur,       /* cursor for by-block btree */
 307        xfs_agblock_t   fbno,           /* starting block of free extent */
 308        xfs_extlen_t    flen,           /* length of free extent */
 309        xfs_agblock_t   rbno,           /* starting block of returned extent */
 310        xfs_extlen_t    rlen,           /* length of returned extent */
 311        int             flags)          /* flags, XFSA_FIXUP_... */
 312{
 313        int             error;          /* error code */
 314        int             i;              /* operation results */
 315        xfs_agblock_t   nfbno1;         /* first new free startblock */
 316        xfs_agblock_t   nfbno2;         /* second new free startblock */
 317        xfs_extlen_t    nflen1=0;       /* first new free length */
 318        xfs_extlen_t    nflen2=0;       /* second new free length */
 319
 320        /*
 321         * Look up the record in the by-size tree if necessary.
 322         */
 323        if (flags & XFSA_FIXUP_CNT_OK) {
 324#ifdef DEBUG
 325                if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
 326                        return error;
 327                XFS_WANT_CORRUPTED_RETURN(
 328                        i == 1 && nfbno1 == fbno && nflen1 == flen);
 329#endif
 330        } else {
 331                if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
 332                        return error;
 333                XFS_WANT_CORRUPTED_RETURN(i == 1);
 334        }
 335        /*
 336         * Look up the record in the by-block tree if necessary.
 337         */
 338        if (flags & XFSA_FIXUP_BNO_OK) {
 339#ifdef DEBUG
 340                if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
 341                        return error;
 342                XFS_WANT_CORRUPTED_RETURN(
 343                        i == 1 && nfbno1 == fbno && nflen1 == flen);
 344#endif
 345        } else {
 346                if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
 347                        return error;
 348                XFS_WANT_CORRUPTED_RETURN(i == 1);
 349        }
 350
 351#ifdef DEBUG
 352        if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
 353                struct xfs_btree_block  *bnoblock;
 354                struct xfs_btree_block  *cntblock;
 355
 356                bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
 357                cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
 358
 359                XFS_WANT_CORRUPTED_RETURN(
 360                        bnoblock->bb_numrecs == cntblock->bb_numrecs);
 361        }
 362#endif
 363
 364        /*
 365         * Deal with all four cases: the allocated record is contained
 366         * within the freespace record, so we can have new freespace
 367         * at either (or both) end, or no freespace remaining.
 368         */
 369        if (rbno == fbno && rlen == flen)
 370                nfbno1 = nfbno2 = NULLAGBLOCK;
 371        else if (rbno == fbno) {
 372                nfbno1 = rbno + rlen;
 373                nflen1 = flen - rlen;
 374                nfbno2 = NULLAGBLOCK;
 375        } else if (rbno + rlen == fbno + flen) {
 376                nfbno1 = fbno;
 377                nflen1 = flen - rlen;
 378                nfbno2 = NULLAGBLOCK;
 379        } else {
 380                nfbno1 = fbno;
 381                nflen1 = rbno - fbno;
 382                nfbno2 = rbno + rlen;
 383                nflen2 = (fbno + flen) - nfbno2;
 384        }
 385        /*
 386         * Delete the entry from the by-size btree.
 387         */
 388        if ((error = xfs_btree_delete(cnt_cur, &i)))
 389                return error;
 390        XFS_WANT_CORRUPTED_RETURN(i == 1);
 391        /*
 392         * Add new by-size btree entry(s).
 393         */
 394        if (nfbno1 != NULLAGBLOCK) {
 395                if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
 396                        return error;
 397                XFS_WANT_CORRUPTED_RETURN(i == 0);
 398                if ((error = xfs_btree_insert(cnt_cur, &i)))
 399                        return error;
 400                XFS_WANT_CORRUPTED_RETURN(i == 1);
 401        }
 402        if (nfbno2 != NULLAGBLOCK) {
 403                if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
 404                        return error;
 405                XFS_WANT_CORRUPTED_RETURN(i == 0);
 406                if ((error = xfs_btree_insert(cnt_cur, &i)))
 407                        return error;
 408                XFS_WANT_CORRUPTED_RETURN(i == 1);
 409        }
 410        /*
 411         * Fix up the by-block btree entry(s).
 412         */
 413        if (nfbno1 == NULLAGBLOCK) {
 414                /*
 415                 * No remaining freespace, just delete the by-block tree entry.
 416                 */
 417                if ((error = xfs_btree_delete(bno_cur, &i)))
 418                        return error;
 419                XFS_WANT_CORRUPTED_RETURN(i == 1);
 420        } else {
 421                /*
 422                 * Update the by-block entry to start later|be shorter.
 423                 */
 424                if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
 425                        return error;
 426        }
 427        if (nfbno2 != NULLAGBLOCK) {
 428                /*
 429                 * 2 resulting free entries, need to add one.
 430                 */
 431                if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
 432                        return error;
 433                XFS_WANT_CORRUPTED_RETURN(i == 0);
 434                if ((error = xfs_btree_insert(bno_cur, &i)))
 435                        return error;
 436                XFS_WANT_CORRUPTED_RETURN(i == 1);
 437        }
 438        return 0;
 439}
 440
 441static bool
 442xfs_agfl_verify(
 443        struct xfs_buf  *bp)
 444{
 445        struct xfs_mount *mp = bp->b_target->bt_mount;
 446        struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
 447        int             i;
 448
 449        if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_uuid))
 450                return false;
 451        if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
 452                return false;
 453        /*
 454         * during growfs operations, the perag is not fully initialised,
 455         * so we can't use it for any useful checking. growfs ensures we can't
 456         * use it by using uncached buffers that don't have the perag attached
 457         * so we can detect and avoid this problem.
 458         */
 459        if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
 460                return false;
 461
 462        for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
 463                if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
 464                    be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
 465                        return false;
 466        }
 467        return true;
 468}
 469
 470static void
 471xfs_agfl_read_verify(
 472        struct xfs_buf  *bp)
 473{
 474        struct xfs_mount *mp = bp->b_target->bt_mount;
 475
 476        /*
 477         * There is no verification of non-crc AGFLs because mkfs does not
 478         * initialise the AGFL to zero or NULL. Hence the only valid part of the
 479         * AGFL is what the AGF says is active. We can't get to the AGF, so we
 480         * can't verify just those entries are valid.
 481         */
 482        if (!xfs_sb_version_hascrc(&mp->m_sb))
 483                return;
 484
 485        if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
 486                xfs_buf_ioerror(bp, EFSBADCRC);
 487        else if (!xfs_agfl_verify(bp))
 488                xfs_buf_ioerror(bp, EFSCORRUPTED);
 489
 490        if (bp->b_error)
 491                xfs_verifier_error(bp);
 492}
 493
 494static void
 495xfs_agfl_write_verify(
 496        struct xfs_buf  *bp)
 497{
 498        struct xfs_mount *mp = bp->b_target->bt_mount;
 499        struct xfs_buf_log_item *bip = bp->b_fspriv;
 500
 501        /* no verification of non-crc AGFLs */
 502        if (!xfs_sb_version_hascrc(&mp->m_sb))
 503                return;
 504
 505        if (!xfs_agfl_verify(bp)) {
 506                xfs_buf_ioerror(bp, EFSCORRUPTED);
 507                xfs_verifier_error(bp);
 508                return;
 509        }
 510
 511        if (bip)
 512                XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
 513
 514        xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
 515}
 516
 517const struct xfs_buf_ops xfs_agfl_buf_ops = {
 518        .verify_read = xfs_agfl_read_verify,
 519        .verify_write = xfs_agfl_write_verify,
 520};
 521
 522/*
 523 * Read in the allocation group free block array.
 524 */
 525STATIC int                              /* error */
 526xfs_alloc_read_agfl(
 527        xfs_mount_t     *mp,            /* mount point structure */
 528        xfs_trans_t     *tp,            /* transaction pointer */
 529        xfs_agnumber_t  agno,           /* allocation group number */
 530        xfs_buf_t       **bpp)          /* buffer for the ag free block array */
 531{
 532        xfs_buf_t       *bp;            /* return value */
 533        int             error;
 534
 535        ASSERT(agno != NULLAGNUMBER);
 536        error = xfs_trans_read_buf(
 537                        mp, tp, mp->m_ddev_targp,
 538                        XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
 539                        XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
 540        if (error)
 541                return error;
 542        xfs_buf_set_ref(bp, XFS_AGFL_REF);
 543        *bpp = bp;
 544        return 0;
 545}
 546
 547STATIC int
 548xfs_alloc_update_counters(
 549        struct xfs_trans        *tp,
 550        struct xfs_perag        *pag,
 551        struct xfs_buf          *agbp,
 552        long                    len)
 553{
 554        struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
 555
 556        pag->pagf_freeblks += len;
 557        be32_add_cpu(&agf->agf_freeblks, len);
 558
 559        xfs_trans_agblocks_delta(tp, len);
 560        if (unlikely(be32_to_cpu(agf->agf_freeblks) >
 561                     be32_to_cpu(agf->agf_length)))
 562                return EFSCORRUPTED;
 563
 564        xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
 565        return 0;
 566}
 567
 568/*
 569 * Allocation group level functions.
 570 */
 571
 572/*
 573 * Allocate a variable extent in the allocation group agno.
 574 * Type and bno are used to determine where in the allocation group the
 575 * extent will start.
 576 * Extent's length (returned in *len) will be between minlen and maxlen,
 577 * and of the form k * prod + mod unless there's nothing that large.
 578 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 579 */
 580STATIC int                      /* error */
 581xfs_alloc_ag_vextent(
 582        xfs_alloc_arg_t *args)  /* argument structure for allocation */
 583{
 584        int             error=0;
 585
 586        ASSERT(args->minlen > 0);
 587        ASSERT(args->maxlen > 0);
 588        ASSERT(args->minlen <= args->maxlen);
 589        ASSERT(args->mod < args->prod);
 590        ASSERT(args->alignment > 0);
 591        /*
 592         * Branch to correct routine based on the type.
 593         */
 594        args->wasfromfl = 0;
 595        switch (args->type) {
 596        case XFS_ALLOCTYPE_THIS_AG:
 597                error = xfs_alloc_ag_vextent_size(args);
 598                break;
 599        case XFS_ALLOCTYPE_NEAR_BNO:
 600                error = xfs_alloc_ag_vextent_near(args);
 601                break;
 602        case XFS_ALLOCTYPE_THIS_BNO:
 603                error = xfs_alloc_ag_vextent_exact(args);
 604                break;
 605        default:
 606                ASSERT(0);
 607                /* NOTREACHED */
 608        }
 609
 610        if (error || args->agbno == NULLAGBLOCK)
 611                return error;
 612
 613        ASSERT(args->len >= args->minlen);
 614        ASSERT(args->len <= args->maxlen);
 615        ASSERT(!args->wasfromfl || !args->isfl);
 616        ASSERT(args->agbno % args->alignment == 0);
 617
 618        if (!args->wasfromfl) {
 619                error = xfs_alloc_update_counters(args->tp, args->pag,
 620                                                  args->agbp,
 621                                                  -((long)(args->len)));
 622                if (error)
 623                        return error;
 624
 625                ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
 626                                              args->agbno, args->len));
 627        }
 628
 629        if (!args->isfl) {
 630                xfs_trans_mod_sb(args->tp, args->wasdel ?
 631                                 XFS_TRANS_SB_RES_FDBLOCKS :
 632                                 XFS_TRANS_SB_FDBLOCKS,
 633                                 -((long)(args->len)));
 634        }
 635
 636        XFS_STATS_INC(xs_allocx);
 637        XFS_STATS_ADD(xs_allocb, args->len);
 638        return error;
 639}
 640
 641/*
 642 * Allocate a variable extent at exactly agno/bno.
 643 * Extent's length (returned in *len) will be between minlen and maxlen,
 644 * and of the form k * prod + mod unless there's nothing that large.
 645 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
 646 */
 647STATIC int                      /* error */
 648xfs_alloc_ag_vextent_exact(
 649        xfs_alloc_arg_t *args)  /* allocation argument structure */
 650{
 651        xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
 652        xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
 653        int             error;
 654        xfs_agblock_t   fbno;   /* start block of found extent */
 655        xfs_extlen_t    flen;   /* length of found extent */
 656        xfs_agblock_t   tbno;   /* start block of trimmed extent */
 657        xfs_extlen_t    tlen;   /* length of trimmed extent */
 658        xfs_agblock_t   tend;   /* end block of trimmed extent */
 659        int             i;      /* success/failure of operation */
 660
 661        ASSERT(args->alignment == 1);
 662
 663        /*
 664         * Allocate/initialize a cursor for the by-number freespace btree.
 665         */
 666        bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 667                                          args->agno, XFS_BTNUM_BNO);
 668
 669        /*
 670         * Lookup bno and minlen in the btree (minlen is irrelevant, really).
 671         * Look for the closest free block <= bno, it must contain bno
 672         * if any free block does.
 673         */
 674        error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
 675        if (error)
 676                goto error0;
 677        if (!i)
 678                goto not_found;
 679
 680        /*
 681         * Grab the freespace record.
 682         */
 683        error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
 684        if (error)
 685                goto error0;
 686        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 687        ASSERT(fbno <= args->agbno);
 688
 689        /*
 690         * Check for overlapping busy extents.
 691         */
 692        xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
 693
 694        /*
 695         * Give up if the start of the extent is busy, or the freespace isn't
 696         * long enough for the minimum request.
 697         */
 698        if (tbno > args->agbno)
 699                goto not_found;
 700        if (tlen < args->minlen)
 701                goto not_found;
 702        tend = tbno + tlen;
 703        if (tend < args->agbno + args->minlen)
 704                goto not_found;
 705
 706        /*
 707         * End of extent will be smaller of the freespace end and the
 708         * maximal requested end.
 709         *
 710         * Fix the length according to mod and prod if given.
 711         */
 712        args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
 713                                                - args->agbno;
 714        xfs_alloc_fix_len(args);
 715        if (!xfs_alloc_fix_minleft(args))
 716                goto not_found;
 717
 718        ASSERT(args->agbno + args->len <= tend);
 719
 720        /*
 721         * We are allocating agbno for args->len
 722         * Allocate/initialize a cursor for the by-size btree.
 723         */
 724        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 725                args->agno, XFS_BTNUM_CNT);
 726        ASSERT(args->agbno + args->len <=
 727                be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
 728        error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
 729                                      args->len, XFSA_FIXUP_BNO_OK);
 730        if (error) {
 731                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
 732                goto error0;
 733        }
 734
 735        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 736        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 737
 738        args->wasfromfl = 0;
 739        trace_xfs_alloc_exact_done(args);
 740        return 0;
 741
 742not_found:
 743        /* Didn't find it, return null. */
 744        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 745        args->agbno = NULLAGBLOCK;
 746        trace_xfs_alloc_exact_notfound(args);
 747        return 0;
 748
 749error0:
 750        xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
 751        trace_xfs_alloc_exact_error(args);
 752        return error;
 753}
 754
 755/*
 756 * Search the btree in a given direction via the search cursor and compare
 757 * the records found against the good extent we've already found.
 758 */
 759STATIC int
 760xfs_alloc_find_best_extent(
 761        struct xfs_alloc_arg    *args,  /* allocation argument structure */
 762        struct xfs_btree_cur    **gcur, /* good cursor */
 763        struct xfs_btree_cur    **scur, /* searching cursor */
 764        xfs_agblock_t           gdiff,  /* difference for search comparison */
 765        xfs_agblock_t           *sbno,  /* extent found by search */
 766        xfs_extlen_t            *slen,  /* extent length */
 767        xfs_agblock_t           *sbnoa, /* aligned extent found by search */
 768        xfs_extlen_t            *slena, /* aligned extent length */
 769        int                     dir)    /* 0 = search right, 1 = search left */
 770{
 771        xfs_agblock_t           new;
 772        xfs_agblock_t           sdiff;
 773        int                     error;
 774        int                     i;
 775
 776        /* The good extent is perfect, no need to  search. */
 777        if (!gdiff)
 778                goto out_use_good;
 779
 780        /*
 781         * Look until we find a better one, run out of space or run off the end.
 782         */
 783        do {
 784                error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
 785                if (error)
 786                        goto error0;
 787                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 788                xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
 789
 790                /*
 791                 * The good extent is closer than this one.
 792                 */
 793                if (!dir) {
 794                        if (*sbnoa >= args->agbno + gdiff)
 795                                goto out_use_good;
 796                } else {
 797                        if (*sbnoa <= args->agbno - gdiff)
 798                                goto out_use_good;
 799                }
 800
 801                /*
 802                 * Same distance, compare length and pick the best.
 803                 */
 804                if (*slena >= args->minlen) {
 805                        args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
 806                        xfs_alloc_fix_len(args);
 807
 808                        sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
 809                                                       args->alignment,
 810                                                       args->userdata, *sbnoa,
 811                                                       *slena, &new);
 812
 813                        /*
 814                         * Choose closer size and invalidate other cursor.
 815                         */
 816                        if (sdiff < gdiff)
 817                                goto out_use_search;
 818                        goto out_use_good;
 819                }
 820
 821                if (!dir)
 822                        error = xfs_btree_increment(*scur, 0, &i);
 823                else
 824                        error = xfs_btree_decrement(*scur, 0, &i);
 825                if (error)
 826                        goto error0;
 827        } while (i);
 828
 829out_use_good:
 830        xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
 831        *scur = NULL;
 832        return 0;
 833
 834out_use_search:
 835        xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
 836        *gcur = NULL;
 837        return 0;
 838
 839error0:
 840        /* caller invalidates cursors */
 841        return error;
 842}
 843
 844/*
 845 * Allocate a variable extent near bno in the allocation group agno.
 846 * Extent's length (returned in len) will be between minlen and maxlen,
 847 * and of the form k * prod + mod unless there's nothing that large.
 848 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 849 */
 850STATIC int                              /* error */
 851xfs_alloc_ag_vextent_near(
 852        xfs_alloc_arg_t *args)          /* allocation argument structure */
 853{
 854        xfs_btree_cur_t *bno_cur_gt;    /* cursor for bno btree, right side */
 855        xfs_btree_cur_t *bno_cur_lt;    /* cursor for bno btree, left side */
 856        xfs_btree_cur_t *cnt_cur;       /* cursor for count btree */
 857        xfs_agblock_t   gtbno;          /* start bno of right side entry */
 858        xfs_agblock_t   gtbnoa;         /* aligned ... */
 859        xfs_extlen_t    gtdiff;         /* difference to right side entry */
 860        xfs_extlen_t    gtlen;          /* length of right side entry */
 861        xfs_extlen_t    gtlena;         /* aligned ... */
 862        xfs_agblock_t   gtnew;          /* useful start bno of right side */
 863        int             error;          /* error code */
 864        int             i;              /* result code, temporary */
 865        int             j;              /* result code, temporary */
 866        xfs_agblock_t   ltbno;          /* start bno of left side entry */
 867        xfs_agblock_t   ltbnoa;         /* aligned ... */
 868        xfs_extlen_t    ltdiff;         /* difference to left side entry */
 869        xfs_extlen_t    ltlen;          /* length of left side entry */
 870        xfs_extlen_t    ltlena;         /* aligned ... */
 871        xfs_agblock_t   ltnew;          /* useful start bno of left side */
 872        xfs_extlen_t    rlen;           /* length of returned extent */
 873        int             forced = 0;
 874#ifdef DEBUG
 875        /*
 876         * Randomly don't execute the first algorithm.
 877         */
 878        int             dofirst;        /* set to do first algorithm */
 879
 880        dofirst = prandom_u32() & 1;
 881#endif
 882
 883restart:
 884        bno_cur_lt = NULL;
 885        bno_cur_gt = NULL;
 886        ltlen = 0;
 887        gtlena = 0;
 888        ltlena = 0;
 889
 890        /*
 891         * Get a cursor for the by-size btree.
 892         */
 893        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 894                args->agno, XFS_BTNUM_CNT);
 895
 896        /*
 897         * See if there are any free extents as big as maxlen.
 898         */
 899        if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
 900                goto error0;
 901        /*
 902         * If none, then pick up the last entry in the tree unless the
 903         * tree is empty.
 904         */
 905        if (!i) {
 906                if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
 907                                &ltlen, &i)))
 908                        goto error0;
 909                if (i == 0 || ltlen == 0) {
 910                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 911                        trace_xfs_alloc_near_noentry(args);
 912                        return 0;
 913                }
 914                ASSERT(i == 1);
 915        }
 916        args->wasfromfl = 0;
 917
 918        /*
 919         * First algorithm.
 920         * If the requested extent is large wrt the freespaces available
 921         * in this a.g., then the cursor will be pointing to a btree entry
 922         * near the right edge of the tree.  If it's in the last btree leaf
 923         * block, then we just examine all the entries in that block
 924         * that are big enough, and pick the best one.
 925         * This is written as a while loop so we can break out of it,
 926         * but we never loop back to the top.
 927         */
 928        while (xfs_btree_islastblock(cnt_cur, 0)) {
 929                xfs_extlen_t    bdiff;
 930                int             besti=0;
 931                xfs_extlen_t    blen=0;
 932                xfs_agblock_t   bnew=0;
 933
 934#ifdef DEBUG
 935                if (dofirst)
 936                        break;
 937#endif
 938                /*
 939                 * Start from the entry that lookup found, sequence through
 940                 * all larger free blocks.  If we're actually pointing at a
 941                 * record smaller than maxlen, go to the start of this block,
 942                 * and skip all those smaller than minlen.
 943                 */
 944                if (ltlen || args->alignment > 1) {
 945                        cnt_cur->bc_ptrs[0] = 1;
 946                        do {
 947                                if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
 948                                                &ltlen, &i)))
 949                                        goto error0;
 950                                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 951                                if (ltlen >= args->minlen)
 952                                        break;
 953                                if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
 954                                        goto error0;
 955                        } while (i);
 956                        ASSERT(ltlen >= args->minlen);
 957                        if (!i)
 958                                break;
 959                }
 960                i = cnt_cur->bc_ptrs[0];
 961                for (j = 1, blen = 0, bdiff = 0;
 962                     !error && j && (blen < args->maxlen || bdiff > 0);
 963                     error = xfs_btree_increment(cnt_cur, 0, &j)) {
 964                        /*
 965                         * For each entry, decide if it's better than
 966                         * the previous best entry.
 967                         */
 968                        if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
 969                                goto error0;
 970                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 971                        xfs_alloc_compute_aligned(args, ltbno, ltlen,
 972                                                  &ltbnoa, &ltlena);
 973                        if (ltlena < args->minlen)
 974                                continue;
 975                        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
 976                        xfs_alloc_fix_len(args);
 977                        ASSERT(args->len >= args->minlen);
 978                        if (args->len < blen)
 979                                continue;
 980                        ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
 981                                args->alignment, args->userdata, ltbnoa,
 982                                ltlena, &ltnew);
 983                        if (ltnew != NULLAGBLOCK &&
 984                            (args->len > blen || ltdiff < bdiff)) {
 985                                bdiff = ltdiff;
 986                                bnew = ltnew;
 987                                blen = args->len;
 988                                besti = cnt_cur->bc_ptrs[0];
 989                        }
 990                }
 991                /*
 992                 * It didn't work.  We COULD be in a case where
 993                 * there's a good record somewhere, so try again.
 994                 */
 995                if (blen == 0)
 996                        break;
 997                /*
 998                 * Point at the best entry, and retrieve it again.
 999                 */
1000                cnt_cur->bc_ptrs[0] = besti;
1001                if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1002                        goto error0;
1003                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1004                ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1005                args->len = blen;
1006                if (!xfs_alloc_fix_minleft(args)) {
1007                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1008                        trace_xfs_alloc_near_nominleft(args);
1009                        return 0;
1010                }
1011                blen = args->len;
1012                /*
1013                 * We are allocating starting at bnew for blen blocks.
1014                 */
1015                args->agbno = bnew;
1016                ASSERT(bnew >= ltbno);
1017                ASSERT(bnew + blen <= ltbno + ltlen);
1018                /*
1019                 * Set up a cursor for the by-bno tree.
1020                 */
1021                bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
1022                        args->agbp, args->agno, XFS_BTNUM_BNO);
1023                /*
1024                 * Fix up the btree entries.
1025                 */
1026                if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
1027                                ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
1028                        goto error0;
1029                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1030                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1031
1032                trace_xfs_alloc_near_first(args);
1033                return 0;
1034        }
1035        /*
1036         * Second algorithm.
1037         * Search in the by-bno tree to the left and to the right
1038         * simultaneously, until in each case we find a space big enough,
1039         * or run into the edge of the tree.  When we run into the edge,
1040         * we deallocate that cursor.
1041         * If both searches succeed, we compare the two spaces and pick
1042         * the better one.
1043         * With alignment, it's possible for both to fail; the upper
1044         * level algorithm that picks allocation groups for allocations
1045         * is not supposed to do this.
1046         */
1047        /*
1048         * Allocate and initialize the cursor for the leftward search.
1049         */
1050        bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1051                args->agno, XFS_BTNUM_BNO);
1052        /*
1053         * Lookup <= bno to find the leftward search's starting point.
1054         */
1055        if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
1056                goto error0;
1057        if (!i) {
1058                /*
1059                 * Didn't find anything; use this cursor for the rightward
1060                 * search.
1061                 */
1062                bno_cur_gt = bno_cur_lt;
1063                bno_cur_lt = NULL;
1064        }
1065        /*
1066         * Found something.  Duplicate the cursor for the rightward search.
1067         */
1068        else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
1069                goto error0;
1070        /*
1071         * Increment the cursor, so we will point at the entry just right
1072         * of the leftward entry if any, or to the leftmost entry.
1073         */
1074        if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1075                goto error0;
1076        if (!i) {
1077                /*
1078                 * It failed, there are no rightward entries.
1079                 */
1080                xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
1081                bno_cur_gt = NULL;
1082        }
1083        /*
1084         * Loop going left with the leftward cursor, right with the
1085         * rightward cursor, until either both directions give up or
1086         * we find an entry at least as big as minlen.
1087         */
1088        do {
1089                if (bno_cur_lt) {
1090                        if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
1091                                goto error0;
1092                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1093                        xfs_alloc_compute_aligned(args, ltbno, ltlen,
1094                                                  &ltbnoa, &ltlena);
1095                        if (ltlena >= args->minlen)
1096                                break;
1097                        if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
1098                                goto error0;
1099                        if (!i) {
1100                                xfs_btree_del_cursor(bno_cur_lt,
1101                                                     XFS_BTREE_NOERROR);
1102                                bno_cur_lt = NULL;
1103                        }
1104                }
1105                if (bno_cur_gt) {
1106                        if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1107                                goto error0;
1108                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1109                        xfs_alloc_compute_aligned(args, gtbno, gtlen,
1110                                                  &gtbnoa, &gtlena);
1111                        if (gtlena >= args->minlen)
1112                                break;
1113                        if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1114                                goto error0;
1115                        if (!i) {
1116                                xfs_btree_del_cursor(bno_cur_gt,
1117                                                     XFS_BTREE_NOERROR);
1118                                bno_cur_gt = NULL;
1119                        }
1120                }
1121        } while (bno_cur_lt || bno_cur_gt);
1122
1123        /*
1124         * Got both cursors still active, need to find better entry.
1125         */
1126        if (bno_cur_lt && bno_cur_gt) {
1127                if (ltlena >= args->minlen) {
1128                        /*
1129                         * Left side is good, look for a right side entry.
1130                         */
1131                        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1132                        xfs_alloc_fix_len(args);
1133                        ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1134                                args->alignment, args->userdata, ltbnoa,
1135                                ltlena, &ltnew);
1136
1137                        error = xfs_alloc_find_best_extent(args,
1138                                                &bno_cur_lt, &bno_cur_gt,
1139                                                ltdiff, &gtbno, &gtlen,
1140                                                &gtbnoa, &gtlena,
1141                                                0 /* search right */);
1142                } else {
1143                        ASSERT(gtlena >= args->minlen);
1144
1145                        /*
1146                         * Right side is good, look for a left side entry.
1147                         */
1148                        args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1149                        xfs_alloc_fix_len(args);
1150                        gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1151                                args->alignment, args->userdata, gtbnoa,
1152                                gtlena, &gtnew);
1153
1154                        error = xfs_alloc_find_best_extent(args,
1155                                                &bno_cur_gt, &bno_cur_lt,
1156                                                gtdiff, &ltbno, &ltlen,
1157                                                &ltbnoa, &ltlena,
1158                                                1 /* search left */);
1159                }
1160
1161                if (error)
1162                        goto error0;
1163        }
1164
1165        /*
1166         * If we couldn't get anything, give up.
1167         */
1168        if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1169                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1170
1171                if (!forced++) {
1172                        trace_xfs_alloc_near_busy(args);
1173                        xfs_log_force(args->mp, XFS_LOG_SYNC);
1174                        goto restart;
1175                }
1176                trace_xfs_alloc_size_neither(args);
1177                args->agbno = NULLAGBLOCK;
1178                return 0;
1179        }
1180
1181        /*
1182         * At this point we have selected a freespace entry, either to the
1183         * left or to the right.  If it's on the right, copy all the
1184         * useful variables to the "left" set so we only have one
1185         * copy of this code.
1186         */
1187        if (bno_cur_gt) {
1188                bno_cur_lt = bno_cur_gt;
1189                bno_cur_gt = NULL;
1190                ltbno = gtbno;
1191                ltbnoa = gtbnoa;
1192                ltlen = gtlen;
1193                ltlena = gtlena;
1194                j = 1;
1195        } else
1196                j = 0;
1197
1198        /*
1199         * Fix up the length and compute the useful address.
1200         */
1201        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1202        xfs_alloc_fix_len(args);
1203        if (!xfs_alloc_fix_minleft(args)) {
1204                trace_xfs_alloc_near_nominleft(args);
1205                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1206                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1207                return 0;
1208        }
1209        rlen = args->len;
1210        (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
1211                                     args->userdata, ltbnoa, ltlena, &ltnew);
1212        ASSERT(ltnew >= ltbno);
1213        ASSERT(ltnew + rlen <= ltbnoa + ltlena);
1214        ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1215        args->agbno = ltnew;
1216
1217        if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1218                        ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1219                goto error0;
1220
1221        if (j)
1222                trace_xfs_alloc_near_greater(args);
1223        else
1224                trace_xfs_alloc_near_lesser(args);
1225
1226        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1227        xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1228        return 0;
1229
1230 error0:
1231        trace_xfs_alloc_near_error(args);
1232        if (cnt_cur != NULL)
1233                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1234        if (bno_cur_lt != NULL)
1235                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1236        if (bno_cur_gt != NULL)
1237                xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1238        return error;
1239}
1240
1241/*
1242 * Allocate a variable extent anywhere in the allocation group agno.
1243 * Extent's length (returned in len) will be between minlen and maxlen,
1244 * and of the form k * prod + mod unless there's nothing that large.
1245 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1246 */
1247STATIC int                              /* error */
1248xfs_alloc_ag_vextent_size(
1249        xfs_alloc_arg_t *args)          /* allocation argument structure */
1250{
1251        xfs_btree_cur_t *bno_cur;       /* cursor for bno btree */
1252        xfs_btree_cur_t *cnt_cur;       /* cursor for cnt btree */
1253        int             error;          /* error result */
1254        xfs_agblock_t   fbno;           /* start of found freespace */
1255        xfs_extlen_t    flen;           /* length of found freespace */
1256        int             i;              /* temp status variable */
1257        xfs_agblock_t   rbno;           /* returned block number */
1258        xfs_extlen_t    rlen;           /* length of returned extent */
1259        int             forced = 0;
1260
1261restart:
1262        /*
1263         * Allocate and initialize a cursor for the by-size btree.
1264         */
1265        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1266                args->agno, XFS_BTNUM_CNT);
1267        bno_cur = NULL;
1268
1269        /*
1270         * Look for an entry >= maxlen+alignment-1 blocks.
1271         */
1272        if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1273                        args->maxlen + args->alignment - 1, &i)))
1274                goto error0;
1275
1276        /*
1277         * If none or we have busy extents that we cannot allocate from, then
1278         * we have to settle for a smaller extent. In the case that there are
1279         * no large extents, this will return the last entry in the tree unless
1280         * the tree is empty. In the case that there are only busy large
1281         * extents, this will return the largest small extent unless there
1282         * are no smaller extents available.
1283         */
1284        if (!i || forced > 1) {
1285                error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1286                                                   &fbno, &flen, &i);
1287                if (error)
1288                        goto error0;
1289                if (i == 0 || flen == 0) {
1290                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1291                        trace_xfs_alloc_size_noentry(args);
1292                        return 0;
1293                }
1294                ASSERT(i == 1);
1295                xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1296        } else {
1297                /*
1298                 * Search for a non-busy extent that is large enough.
1299                 * If we are at low space, don't check, or if we fall of
1300                 * the end of the btree, turn off the busy check and
1301                 * restart.
1302                 */
1303                for (;;) {
1304                        error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1305                        if (error)
1306                                goto error0;
1307                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1308
1309                        xfs_alloc_compute_aligned(args, fbno, flen,
1310                                                  &rbno, &rlen);
1311
1312                        if (rlen >= args->maxlen)
1313                                break;
1314
1315                        error = xfs_btree_increment(cnt_cur, 0, &i);
1316                        if (error)
1317                                goto error0;
1318                        if (i == 0) {
1319                                /*
1320                                 * Our only valid extents must have been busy.
1321                                 * Make it unbusy by forcing the log out and
1322                                 * retrying. If we've been here before, forcing
1323                                 * the log isn't making the extents available,
1324                                 * which means they have probably been freed in
1325                                 * this transaction.  In that case, we have to
1326                                 * give up on them and we'll attempt a minlen
1327                                 * allocation the next time around.
1328                                 */
1329                                xfs_btree_del_cursor(cnt_cur,
1330                                                     XFS_BTREE_NOERROR);
1331                                trace_xfs_alloc_size_busy(args);
1332                                if (!forced++)
1333                                        xfs_log_force(args->mp, XFS_LOG_SYNC);
1334                                goto restart;
1335                        }
1336                }
1337        }
1338
1339        /*
1340         * In the first case above, we got the last entry in the
1341         * by-size btree.  Now we check to see if the space hits maxlen
1342         * once aligned; if not, we search left for something better.
1343         * This can't happen in the second case above.
1344         */
1345        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1346        XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1347                        (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1348        if (rlen < args->maxlen) {
1349                xfs_agblock_t   bestfbno;
1350                xfs_extlen_t    bestflen;
1351                xfs_agblock_t   bestrbno;
1352                xfs_extlen_t    bestrlen;
1353
1354                bestrlen = rlen;
1355                bestrbno = rbno;
1356                bestflen = flen;
1357                bestfbno = fbno;
1358                for (;;) {
1359                        if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1360                                goto error0;
1361                        if (i == 0)
1362                                break;
1363                        if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1364                                        &i)))
1365                                goto error0;
1366                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1367                        if (flen < bestrlen)
1368                                break;
1369                        xfs_alloc_compute_aligned(args, fbno, flen,
1370                                                  &rbno, &rlen);
1371                        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1372                        XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1373                                (rlen <= flen && rbno + rlen <= fbno + flen),
1374                                error0);
1375                        if (rlen > bestrlen) {
1376                                bestrlen = rlen;
1377                                bestrbno = rbno;
1378                                bestflen = flen;
1379                                bestfbno = fbno;
1380                                if (rlen == args->maxlen)
1381                                        break;
1382                        }
1383                }
1384                if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1385                                &i)))
1386                        goto error0;
1387                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1388                rlen = bestrlen;
1389                rbno = bestrbno;
1390                flen = bestflen;
1391                fbno = bestfbno;
1392        }
1393        args->wasfromfl = 0;
1394        /*
1395         * Fix up the length.
1396         */
1397        args->len = rlen;
1398        if (rlen < args->minlen) {
1399                if (!forced++) {
1400                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1401                        trace_xfs_alloc_size_busy(args);
1402                        xfs_log_force(args->mp, XFS_LOG_SYNC);
1403                        goto restart;
1404                }
1405                goto out_nominleft;
1406        }
1407        xfs_alloc_fix_len(args);
1408
1409        if (!xfs_alloc_fix_minleft(args))
1410                goto out_nominleft;
1411        rlen = args->len;
1412        XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
1413        /*
1414         * Allocate and initialize a cursor for the by-block tree.
1415         */
1416        bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1417                args->agno, XFS_BTNUM_BNO);
1418        if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1419                        rbno, rlen, XFSA_FIXUP_CNT_OK)))
1420                goto error0;
1421        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1422        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1423        cnt_cur = bno_cur = NULL;
1424        args->len = rlen;
1425        args->agbno = rbno;
1426        XFS_WANT_CORRUPTED_GOTO(
1427                args->agbno + args->len <=
1428                        be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1429                error0);
1430        trace_xfs_alloc_size_done(args);
1431        return 0;
1432
1433error0:
1434        trace_xfs_alloc_size_error(args);
1435        if (cnt_cur)
1436                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1437        if (bno_cur)
1438                xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1439        return error;
1440
1441out_nominleft:
1442        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1443        trace_xfs_alloc_size_nominleft(args);
1444        args->agbno = NULLAGBLOCK;
1445        return 0;
1446}
1447
1448/*
1449 * Deal with the case where only small freespaces remain.
1450 * Either return the contents of the last freespace record,
1451 * or allocate space from the freelist if there is nothing in the tree.
1452 */
1453STATIC int                      /* error */
1454xfs_alloc_ag_vextent_small(
1455        xfs_alloc_arg_t *args,  /* allocation argument structure */
1456        xfs_btree_cur_t *ccur,  /* by-size cursor */
1457        xfs_agblock_t   *fbnop, /* result block number */
1458        xfs_extlen_t    *flenp, /* result length */
1459        int             *stat)  /* status: 0-freelist, 1-normal/none */
1460{
1461        int             error;
1462        xfs_agblock_t   fbno;
1463        xfs_extlen_t    flen;
1464        int             i;
1465
1466        if ((error = xfs_btree_decrement(ccur, 0, &i)))
1467                goto error0;
1468        if (i) {
1469                if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
1470                        goto error0;
1471                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1472        }
1473        /*
1474         * Nothing in the btree, try the freelist.  Make sure
1475         * to respect minleft even when pulling from the
1476         * freelist.
1477         */
1478        else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
1479                 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1480                  > args->minleft)) {
1481                error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1482                if (error)
1483                        goto error0;
1484                if (fbno != NULLAGBLOCK) {
1485                        xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
1486                                             args->userdata);
1487
1488                        if (args->userdata) {
1489                                xfs_buf_t       *bp;
1490
1491                                bp = xfs_btree_get_bufs(args->mp, args->tp,
1492                                        args->agno, fbno, 0);
1493                                xfs_trans_binval(args->tp, bp);
1494                        }
1495                        args->len = 1;
1496                        args->agbno = fbno;
1497                        XFS_WANT_CORRUPTED_GOTO(
1498                                args->agbno + args->len <=
1499                                be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1500                                error0);
1501                        args->wasfromfl = 1;
1502                        trace_xfs_alloc_small_freelist(args);
1503                        *stat = 0;
1504                        return 0;
1505                }
1506                /*
1507                 * Nothing in the freelist.
1508                 */
1509                else
1510                        flen = 0;
1511        }
1512        /*
1513         * Can't allocate from the freelist for some reason.
1514         */
1515        else {
1516                fbno = NULLAGBLOCK;
1517                flen = 0;
1518        }
1519        /*
1520         * Can't do the allocation, give up.
1521         */
1522        if (flen < args->minlen) {
1523                args->agbno = NULLAGBLOCK;
1524                trace_xfs_alloc_small_notenough(args);
1525                flen = 0;
1526        }
1527        *fbnop = fbno;
1528        *flenp = flen;
1529        *stat = 1;
1530        trace_xfs_alloc_small_done(args);
1531        return 0;
1532
1533error0:
1534        trace_xfs_alloc_small_error(args);
1535        return error;
1536}
1537
1538/*
1539 * Free the extent starting at agno/bno for length.
1540 */
1541STATIC int                      /* error */
1542xfs_free_ag_extent(
1543        xfs_trans_t     *tp,    /* transaction pointer */
1544        xfs_buf_t       *agbp,  /* buffer for a.g. freelist header */
1545        xfs_agnumber_t  agno,   /* allocation group number */
1546        xfs_agblock_t   bno,    /* starting block number */
1547        xfs_extlen_t    len,    /* length of extent */
1548        int             isfl)   /* set if is freelist blocks - no sb acctg */
1549{
1550        xfs_btree_cur_t *bno_cur;       /* cursor for by-block btree */
1551        xfs_btree_cur_t *cnt_cur;       /* cursor for by-size btree */
1552        int             error;          /* error return value */
1553        xfs_agblock_t   gtbno;          /* start of right neighbor block */
1554        xfs_extlen_t    gtlen;          /* length of right neighbor block */
1555        int             haveleft;       /* have a left neighbor block */
1556        int             haveright;      /* have a right neighbor block */
1557        int             i;              /* temp, result code */
1558        xfs_agblock_t   ltbno;          /* start of left neighbor block */
1559        xfs_extlen_t    ltlen;          /* length of left neighbor block */
1560        xfs_mount_t     *mp;            /* mount point struct for filesystem */
1561        xfs_agblock_t   nbno;           /* new starting block of freespace */
1562        xfs_extlen_t    nlen;           /* new length of freespace */
1563        xfs_perag_t     *pag;           /* per allocation group data */
1564
1565        mp = tp->t_mountp;
1566        /*
1567         * Allocate and initialize a cursor for the by-block btree.
1568         */
1569        bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1570        cnt_cur = NULL;
1571        /*
1572         * Look for a neighboring block on the left (lower block numbers)
1573         * that is contiguous with this space.
1574         */
1575        if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1576                goto error0;
1577        if (haveleft) {
1578                /*
1579                 * There is a block to our left.
1580                 */
1581                if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1582                        goto error0;
1583                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1584                /*
1585                 * It's not contiguous, though.
1586                 */
1587                if (ltbno + ltlen < bno)
1588                        haveleft = 0;
1589                else {
1590                        /*
1591                         * If this failure happens the request to free this
1592                         * space was invalid, it's (partly) already free.
1593                         * Very bad.
1594                         */
1595                        XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0);
1596                }
1597        }
1598        /*
1599         * Look for a neighboring block on the right (higher block numbers)
1600         * that is contiguous with this space.
1601         */
1602        if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1603                goto error0;
1604        if (haveright) {
1605                /*
1606                 * There is a block to our right.
1607                 */
1608                if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1609                        goto error0;
1610                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1611                /*
1612                 * It's not contiguous, though.
1613                 */
1614                if (bno + len < gtbno)
1615                        haveright = 0;
1616                else {
1617                        /*
1618                         * If this failure happens the request to free this
1619                         * space was invalid, it's (partly) already free.
1620                         * Very bad.
1621                         */
1622                        XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0);
1623                }
1624        }
1625        /*
1626         * Now allocate and initialize a cursor for the by-size tree.
1627         */
1628        cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1629        /*
1630         * Have both left and right contiguous neighbors.
1631         * Merge all three into a single free block.
1632         */
1633        if (haveleft && haveright) {
1634                /*
1635                 * Delete the old by-size entry on the left.
1636                 */
1637                if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1638                        goto error0;
1639                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1640                if ((error = xfs_btree_delete(cnt_cur, &i)))
1641                        goto error0;
1642                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1643                /*
1644                 * Delete the old by-size entry on the right.
1645                 */
1646                if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1647                        goto error0;
1648                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1649                if ((error = xfs_btree_delete(cnt_cur, &i)))
1650                        goto error0;
1651                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1652                /*
1653                 * Delete the old by-block entry for the right block.
1654                 */
1655                if ((error = xfs_btree_delete(bno_cur, &i)))
1656                        goto error0;
1657                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1658                /*
1659                 * Move the by-block cursor back to the left neighbor.
1660                 */
1661                if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1662                        goto error0;
1663                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1664#ifdef DEBUG
1665                /*
1666                 * Check that this is the right record: delete didn't
1667                 * mangle the cursor.
1668                 */
1669                {
1670                        xfs_agblock_t   xxbno;
1671                        xfs_extlen_t    xxlen;
1672
1673                        if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1674                                        &i)))
1675                                goto error0;
1676                        XFS_WANT_CORRUPTED_GOTO(
1677                                i == 1 && xxbno == ltbno && xxlen == ltlen,
1678                                error0);
1679                }
1680#endif
1681                /*
1682                 * Update remaining by-block entry to the new, joined block.
1683                 */
1684                nbno = ltbno;
1685                nlen = len + ltlen + gtlen;
1686                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1687                        goto error0;
1688        }
1689        /*
1690         * Have only a left contiguous neighbor.
1691         * Merge it together with the new freespace.
1692         */
1693        else if (haveleft) {
1694                /*
1695                 * Delete the old by-size entry on the left.
1696                 */
1697                if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1698                        goto error0;
1699                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1700                if ((error = xfs_btree_delete(cnt_cur, &i)))
1701                        goto error0;
1702                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1703                /*
1704                 * Back up the by-block cursor to the left neighbor, and
1705                 * update its length.
1706                 */
1707                if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1708                        goto error0;
1709                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1710                nbno = ltbno;
1711                nlen = len + ltlen;
1712                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1713                        goto error0;
1714        }
1715        /*
1716         * Have only a right contiguous neighbor.
1717         * Merge it together with the new freespace.
1718         */
1719        else if (haveright) {
1720                /*
1721                 * Delete the old by-size entry on the right.
1722                 */
1723                if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1724                        goto error0;
1725                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1726                if ((error = xfs_btree_delete(cnt_cur, &i)))
1727                        goto error0;
1728                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1729                /*
1730                 * Update the starting block and length of the right
1731                 * neighbor in the by-block tree.
1732                 */
1733                nbno = bno;
1734                nlen = len + gtlen;
1735                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1736                        goto error0;
1737        }
1738        /*
1739         * No contiguous neighbors.
1740         * Insert the new freespace into the by-block tree.
1741         */
1742        else {
1743                nbno = bno;
1744                nlen = len;
1745                if ((error = xfs_btree_insert(bno_cur, &i)))
1746                        goto error0;
1747                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1748        }
1749        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1750        bno_cur = NULL;
1751        /*
1752         * In all cases we need to insert the new freespace in the by-size tree.
1753         */
1754        if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1755                goto error0;
1756        XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
1757        if ((error = xfs_btree_insert(cnt_cur, &i)))
1758                goto error0;
1759        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1760        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1761        cnt_cur = NULL;
1762
1763        /*
1764         * Update the freespace totals in the ag and superblock.
1765         */
1766        pag = xfs_perag_get(mp, agno);
1767        error = xfs_alloc_update_counters(tp, pag, agbp, len);
1768        xfs_perag_put(pag);
1769        if (error)
1770                goto error0;
1771
1772        if (!isfl)
1773                xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1774        XFS_STATS_INC(xs_freex);
1775        XFS_STATS_ADD(xs_freeb, len);
1776
1777        trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
1778
1779        return 0;
1780
1781 error0:
1782        trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1);
1783        if (bno_cur)
1784                xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1785        if (cnt_cur)
1786                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1787        return error;
1788}
1789
1790/*
1791 * Visible (exported) allocation/free functions.
1792 * Some of these are used just by xfs_alloc_btree.c and this file.
1793 */
1794
1795/*
1796 * Compute and fill in value of m_ag_maxlevels.
1797 */
1798void
1799xfs_alloc_compute_maxlevels(
1800        xfs_mount_t     *mp)    /* file system mount structure */
1801{
1802        int             level;
1803        uint            maxblocks;
1804        uint            maxleafents;
1805        int             minleafrecs;
1806        int             minnoderecs;
1807
1808        maxleafents = (mp->m_sb.sb_agblocks + 1) / 2;
1809        minleafrecs = mp->m_alloc_mnr[0];
1810        minnoderecs = mp->m_alloc_mnr[1];
1811        maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
1812        for (level = 1; maxblocks > 1; level++)
1813                maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
1814        mp->m_ag_maxlevels = level;
1815}
1816
1817/*
1818 * Find the length of the longest extent in an AG.
1819 */
1820xfs_extlen_t
1821xfs_alloc_longest_free_extent(
1822        struct xfs_mount        *mp,
1823        struct xfs_perag        *pag)
1824{
1825        xfs_extlen_t            need, delta = 0;
1826
1827        need = XFS_MIN_FREELIST_PAG(pag, mp);
1828        if (need > pag->pagf_flcount)
1829                delta = need - pag->pagf_flcount;
1830
1831        if (pag->pagf_longest > delta)
1832                return pag->pagf_longest - delta;
1833        return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1834}
1835
1836/*
1837 * Decide whether to use this allocation group for this allocation.
1838 * If so, fix up the btree freelist's size.
1839 */
1840STATIC int                      /* error */
1841xfs_alloc_fix_freelist(
1842        xfs_alloc_arg_t *args,  /* allocation argument structure */
1843        int             flags)  /* XFS_ALLOC_FLAG_... */
1844{
1845        xfs_buf_t       *agbp;  /* agf buffer pointer */
1846        xfs_agf_t       *agf;   /* a.g. freespace structure pointer */
1847        xfs_buf_t       *agflbp;/* agfl buffer pointer */
1848        xfs_agblock_t   bno;    /* freelist block */
1849        xfs_extlen_t    delta;  /* new blocks needed in freelist */
1850        int             error;  /* error result code */
1851        xfs_extlen_t    longest;/* longest extent in allocation group */
1852        xfs_mount_t     *mp;    /* file system mount point structure */
1853        xfs_extlen_t    need;   /* total blocks needed in freelist */
1854        xfs_perag_t     *pag;   /* per-ag information structure */
1855        xfs_alloc_arg_t targs;  /* local allocation arguments */
1856        xfs_trans_t     *tp;    /* transaction pointer */
1857
1858        mp = args->mp;
1859
1860        pag = args->pag;
1861        tp = args->tp;
1862        if (!pag->pagf_init) {
1863                if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1864                                &agbp)))
1865                        return error;
1866                if (!pag->pagf_init) {
1867                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1868                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1869                        args->agbp = NULL;
1870                        return 0;
1871                }
1872        } else
1873                agbp = NULL;
1874
1875        /*
1876         * If this is a metadata preferred pag and we are user data
1877         * then try somewhere else if we are not being asked to
1878         * try harder at this point
1879         */
1880        if (pag->pagf_metadata && args->userdata &&
1881            (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
1882                ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1883                args->agbp = NULL;
1884                return 0;
1885        }
1886
1887        if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1888                /*
1889                 * If it looks like there isn't a long enough extent, or enough
1890                 * total blocks, reject it.
1891                 */
1892                need = XFS_MIN_FREELIST_PAG(pag, mp);
1893                longest = xfs_alloc_longest_free_extent(mp, pag);
1894                if ((args->minlen + args->alignment + args->minalignslop - 1) >
1895                                longest ||
1896                    ((int)(pag->pagf_freeblks + pag->pagf_flcount -
1897                           need - args->total) < (int)args->minleft)) {
1898                        if (agbp)
1899                                xfs_trans_brelse(tp, agbp);
1900                        args->agbp = NULL;
1901                        return 0;
1902                }
1903        }
1904
1905        /*
1906         * Get the a.g. freespace buffer.
1907         * Can fail if we're not blocking on locks, and it's held.
1908         */
1909        if (agbp == NULL) {
1910                if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1911                                &agbp)))
1912                        return error;
1913                if (agbp == NULL) {
1914                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1915                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1916                        args->agbp = NULL;
1917                        return 0;
1918                }
1919        }
1920        /*
1921         * Figure out how many blocks we should have in the freelist.
1922         */
1923        agf = XFS_BUF_TO_AGF(agbp);
1924        need = XFS_MIN_FREELIST(agf, mp);
1925        /*
1926         * If there isn't enough total or single-extent, reject it.
1927         */
1928        if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1929                delta = need > be32_to_cpu(agf->agf_flcount) ?
1930                        (need - be32_to_cpu(agf->agf_flcount)) : 0;
1931                longest = be32_to_cpu(agf->agf_longest);
1932                longest = (longest > delta) ? (longest - delta) :
1933                        (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
1934                if ((args->minlen + args->alignment + args->minalignslop - 1) >
1935                                longest ||
1936                    ((int)(be32_to_cpu(agf->agf_freeblks) +
1937                     be32_to_cpu(agf->agf_flcount) - need - args->total) <
1938                                (int)args->minleft)) {
1939                        xfs_trans_brelse(tp, agbp);
1940                        args->agbp = NULL;
1941                        return 0;
1942                }
1943        }
1944        /*
1945         * Make the freelist shorter if it's too long.
1946         */
1947        while (be32_to_cpu(agf->agf_flcount) > need) {
1948                xfs_buf_t       *bp;
1949
1950                error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
1951                if (error)
1952                        return error;
1953                if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
1954                        return error;
1955                bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
1956                xfs_trans_binval(tp, bp);
1957        }
1958        /*
1959         * Initialize the args structure.
1960         */
1961        memset(&targs, 0, sizeof(targs));
1962        targs.tp = tp;
1963        targs.mp = mp;
1964        targs.agbp = agbp;
1965        targs.agno = args->agno;
1966        targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
1967        targs.type = XFS_ALLOCTYPE_THIS_AG;
1968        targs.pag = pag;
1969        if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp)))
1970                return error;
1971        /*
1972         * Make the freelist longer if it's too short.
1973         */
1974        while (be32_to_cpu(agf->agf_flcount) < need) {
1975                targs.agbno = 0;
1976                targs.maxlen = need - be32_to_cpu(agf->agf_flcount);
1977                /*
1978                 * Allocate as many blocks as possible at once.
1979                 */
1980                if ((error = xfs_alloc_ag_vextent(&targs))) {
1981                        xfs_trans_brelse(tp, agflbp);
1982                        return error;
1983                }
1984                /*
1985                 * Stop if we run out.  Won't happen if callers are obeying
1986                 * the restrictions correctly.  Can happen for free calls
1987                 * on a completely full ag.
1988                 */
1989                if (targs.agbno == NULLAGBLOCK) {
1990                        if (flags & XFS_ALLOC_FLAG_FREEING)
1991                                break;
1992                        xfs_trans_brelse(tp, agflbp);
1993                        args->agbp = NULL;
1994                        return 0;
1995                }
1996                /*
1997                 * Put each allocated block on the list.
1998                 */
1999                for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
2000                        error = xfs_alloc_put_freelist(tp, agbp,
2001                                                        agflbp, bno, 0);
2002                        if (error)
2003                                return error;
2004                }
2005        }
2006        xfs_trans_brelse(tp, agflbp);
2007        args->agbp = agbp;
2008        return 0;
2009}
2010
2011/*
2012 * Get a block from the freelist.
2013 * Returns with the buffer for the block gotten.
2014 */
2015int                             /* error */
2016xfs_alloc_get_freelist(
2017        xfs_trans_t     *tp,    /* transaction pointer */
2018        xfs_buf_t       *agbp,  /* buffer containing the agf structure */
2019        xfs_agblock_t   *bnop,  /* block address retrieved from freelist */
2020        int             btreeblk) /* destination is a AGF btree */
2021{
2022        xfs_agf_t       *agf;   /* a.g. freespace structure */
2023        xfs_buf_t       *agflbp;/* buffer for a.g. freelist structure */
2024        xfs_agblock_t   bno;    /* block number returned */
2025        __be32          *agfl_bno;
2026        int             error;
2027        int             logflags;
2028        xfs_mount_t     *mp = tp->t_mountp;
2029        xfs_perag_t     *pag;   /* per allocation group data */
2030
2031        /*
2032         * Freelist is empty, give up.
2033         */
2034        agf = XFS_BUF_TO_AGF(agbp);
2035        if (!agf->agf_flcount) {
2036                *bnop = NULLAGBLOCK;
2037                return 0;
2038        }
2039        /*
2040         * Read the array of free blocks.
2041         */
2042        error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2043                                    &agflbp);
2044        if (error)
2045                return error;
2046
2047
2048        /*
2049         * Get the block number and update the data structures.
2050         */
2051        agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2052        bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2053        be32_add_cpu(&agf->agf_flfirst, 1);
2054        xfs_trans_brelse(tp, agflbp);
2055        if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
2056                agf->agf_flfirst = 0;
2057
2058        pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2059        be32_add_cpu(&agf->agf_flcount, -1);
2060        xfs_trans_agflist_delta(tp, -1);
2061        pag->pagf_flcount--;
2062        xfs_perag_put(pag);
2063
2064        logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2065        if (btreeblk) {
2066                be32_add_cpu(&agf->agf_btreeblks, 1);
2067                pag->pagf_btreeblks++;
2068                logflags |= XFS_AGF_BTREEBLKS;
2069        }
2070
2071        xfs_alloc_log_agf(tp, agbp, logflags);
2072        *bnop = bno;
2073
2074        return 0;
2075}
2076
2077/*
2078 * Log the given fields from the agf structure.
2079 */
2080void
2081xfs_alloc_log_agf(
2082        xfs_trans_t     *tp,    /* transaction pointer */
2083        xfs_buf_t       *bp,    /* buffer for a.g. freelist header */
2084        int             fields) /* mask of fields to be logged (XFS_AGF_...) */
2085{
2086        int     first;          /* first byte offset */
2087        int     last;           /* last byte offset */
2088        static const short      offsets[] = {
2089                offsetof(xfs_agf_t, agf_magicnum),
2090                offsetof(xfs_agf_t, agf_versionnum),
2091                offsetof(xfs_agf_t, agf_seqno),
2092                offsetof(xfs_agf_t, agf_length),
2093                offsetof(xfs_agf_t, agf_roots[0]),
2094                offsetof(xfs_agf_t, agf_levels[0]),
2095                offsetof(xfs_agf_t, agf_flfirst),
2096                offsetof(xfs_agf_t, agf_fllast),
2097                offsetof(xfs_agf_t, agf_flcount),
2098                offsetof(xfs_agf_t, agf_freeblks),
2099                offsetof(xfs_agf_t, agf_longest),
2100                offsetof(xfs_agf_t, agf_btreeblks),
2101                offsetof(xfs_agf_t, agf_uuid),
2102                sizeof(xfs_agf_t)
2103        };
2104
2105        trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2106
2107        xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
2108
2109        xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2110        xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2111}
2112
2113/*
2114 * Interface for inode allocation to force the pag data to be initialized.
2115 */
2116int                                     /* error */
2117xfs_alloc_pagf_init(
2118        xfs_mount_t             *mp,    /* file system mount structure */
2119        xfs_trans_t             *tp,    /* transaction pointer */
2120        xfs_agnumber_t          agno,   /* allocation group number */
2121        int                     flags)  /* XFS_ALLOC_FLAGS_... */
2122{
2123        xfs_buf_t               *bp;
2124        int                     error;
2125
2126        if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2127                return error;
2128        if (bp)
2129                xfs_trans_brelse(tp, bp);
2130        return 0;
2131}
2132
2133/*
2134 * Put the block on the freelist for the allocation group.
2135 */
2136int                                     /* error */
2137xfs_alloc_put_freelist(
2138        xfs_trans_t             *tp,    /* transaction pointer */
2139        xfs_buf_t               *agbp,  /* buffer for a.g. freelist header */
2140        xfs_buf_t               *agflbp,/* buffer for a.g. free block array */
2141        xfs_agblock_t           bno,    /* block being freed */
2142        int                     btreeblk) /* block came from a AGF btree */
2143{
2144        xfs_agf_t               *agf;   /* a.g. freespace structure */
2145        __be32                  *blockp;/* pointer to array entry */
2146        int                     error;
2147        int                     logflags;
2148        xfs_mount_t             *mp;    /* mount structure */
2149        xfs_perag_t             *pag;   /* per allocation group data */
2150        __be32                  *agfl_bno;
2151        int                     startoff;
2152
2153        agf = XFS_BUF_TO_AGF(agbp);
2154        mp = tp->t_mountp;
2155
2156        if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
2157                        be32_to_cpu(agf->agf_seqno), &agflbp)))
2158                return error;
2159        be32_add_cpu(&agf->agf_fllast, 1);
2160        if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
2161                agf->agf_fllast = 0;
2162
2163        pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2164        be32_add_cpu(&agf->agf_flcount, 1);
2165        xfs_trans_agflist_delta(tp, 1);
2166        pag->pagf_flcount++;
2167
2168        logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2169        if (btreeblk) {
2170                be32_add_cpu(&agf->agf_btreeblks, -1);
2171                pag->pagf_btreeblks--;
2172                logflags |= XFS_AGF_BTREEBLKS;
2173        }
2174        xfs_perag_put(pag);
2175
2176        xfs_alloc_log_agf(tp, agbp, logflags);
2177
2178        ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2179
2180        agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2181        blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
2182        *blockp = cpu_to_be32(bno);
2183        startoff = (char *)blockp - (char *)agflbp->b_addr;
2184
2185        xfs_alloc_log_agf(tp, agbp, logflags);
2186
2187        xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
2188        xfs_trans_log_buf(tp, agflbp, startoff,
2189                          startoff + sizeof(xfs_agblock_t) - 1);
2190        return 0;
2191}
2192
2193static bool
2194xfs_agf_verify(
2195        struct xfs_mount *mp,
2196        struct xfs_buf  *bp)
2197 {
2198        struct xfs_agf  *agf = XFS_BUF_TO_AGF(bp);
2199
2200        if (xfs_sb_version_hascrc(&mp->m_sb) &&
2201            !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_uuid))
2202                        return false;
2203
2204        if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
2205              XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2206              be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2207              be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2208              be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2209              be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
2210                return false;
2211
2212        /*
2213         * during growfs operations, the perag is not fully initialised,
2214         * so we can't use it for any useful checking. growfs ensures we can't
2215         * use it by using uncached buffers that don't have the perag attached
2216         * so we can detect and avoid this problem.
2217         */
2218        if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
2219                return false;
2220
2221        if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2222            be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
2223                return false;
2224
2225        return true;;
2226
2227}
2228
2229static void
2230xfs_agf_read_verify(
2231        struct xfs_buf  *bp)
2232{
2233        struct xfs_mount *mp = bp->b_target->bt_mount;
2234
2235        if (xfs_sb_version_hascrc(&mp->m_sb) &&
2236            !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
2237                xfs_buf_ioerror(bp, EFSBADCRC);
2238        else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
2239                                XFS_ERRTAG_ALLOC_READ_AGF,
2240                                XFS_RANDOM_ALLOC_READ_AGF))
2241                xfs_buf_ioerror(bp, EFSCORRUPTED);
2242
2243        if (bp->b_error)
2244                xfs_verifier_error(bp);
2245}
2246
2247static void
2248xfs_agf_write_verify(
2249        struct xfs_buf  *bp)
2250{
2251        struct xfs_mount *mp = bp->b_target->bt_mount;
2252        struct xfs_buf_log_item *bip = bp->b_fspriv;
2253
2254        if (!xfs_agf_verify(mp, bp)) {
2255                xfs_buf_ioerror(bp, EFSCORRUPTED);
2256                xfs_verifier_error(bp);
2257                return;
2258        }
2259
2260        if (!xfs_sb_version_hascrc(&mp->m_sb))
2261                return;
2262
2263        if (bip)
2264                XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2265
2266        xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
2267}
2268
2269const struct xfs_buf_ops xfs_agf_buf_ops = {
2270        .verify_read = xfs_agf_read_verify,
2271        .verify_write = xfs_agf_write_verify,
2272};
2273
2274/*
2275 * Read in the allocation group header (free/alloc section).
2276 */
2277int                                     /* error */
2278xfs_read_agf(
2279        struct xfs_mount        *mp,    /* mount point structure */
2280        struct xfs_trans        *tp,    /* transaction pointer */
2281        xfs_agnumber_t          agno,   /* allocation group number */
2282        int                     flags,  /* XFS_BUF_ */
2283        struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
2284{
2285        int             error;
2286
2287        trace_xfs_read_agf(mp, agno);
2288
2289        ASSERT(agno != NULLAGNUMBER);
2290        error = xfs_trans_read_buf(
2291                        mp, tp, mp->m_ddev_targp,
2292                        XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2293                        XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
2294        if (error)
2295                return error;
2296        if (!*bpp)
2297                return 0;
2298
2299        ASSERT(!(*bpp)->b_error);
2300        xfs_buf_set_ref(*bpp, XFS_AGF_REF);
2301        return 0;
2302}
2303
2304/*
2305 * Read in the allocation group header (free/alloc section).
2306 */
2307int                                     /* error */
2308xfs_alloc_read_agf(
2309        struct xfs_mount        *mp,    /* mount point structure */
2310        struct xfs_trans        *tp,    /* transaction pointer */
2311        xfs_agnumber_t          agno,   /* allocation group number */
2312        int                     flags,  /* XFS_ALLOC_FLAG_... */
2313        struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
2314{
2315        struct xfs_agf          *agf;           /* ag freelist header */
2316        struct xfs_perag        *pag;           /* per allocation group data */
2317        int                     error;
2318
2319        trace_xfs_alloc_read_agf(mp, agno);
2320
2321        ASSERT(agno != NULLAGNUMBER);
2322        error = xfs_read_agf(mp, tp, agno,
2323                        (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2324                        bpp);
2325        if (error)
2326                return error;
2327        if (!*bpp)
2328                return 0;
2329        ASSERT(!(*bpp)->b_error);
2330
2331        agf = XFS_BUF_TO_AGF(*bpp);
2332        pag = xfs_perag_get(mp, agno);
2333        if (!pag->pagf_init) {
2334                pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
2335                pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
2336                pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2337                pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2338                pag->pagf_levels[XFS_BTNUM_BNOi] =
2339                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2340                pag->pagf_levels[XFS_BTNUM_CNTi] =
2341                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
2342                spin_lock_init(&pag->pagb_lock);
2343                pag->pagb_count = 0;
2344                pag->pagb_tree = RB_ROOT;
2345                pag->pagf_init = 1;
2346        }
2347#ifdef DEBUG
2348        else if (!XFS_FORCED_SHUTDOWN(mp)) {
2349                ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
2350                ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
2351                ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2352                ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2353                ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
2354                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2355                ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
2356                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2357        }
2358#endif
2359        xfs_perag_put(pag);
2360        return 0;
2361}
2362
2363/*
2364 * Allocate an extent (variable-size).
2365 * Depending on the allocation type, we either look in a single allocation
2366 * group or loop over the allocation groups to find the result.
2367 */
2368int                             /* error */
2369xfs_alloc_vextent(
2370        xfs_alloc_arg_t *args)  /* allocation argument structure */
2371{
2372        xfs_agblock_t   agsize; /* allocation group size */
2373        int             error;
2374        int             flags;  /* XFS_ALLOC_FLAG_... locking flags */
2375        xfs_extlen_t    minleft;/* minimum left value, temp copy */
2376        xfs_mount_t     *mp;    /* mount structure pointer */
2377        xfs_agnumber_t  sagno;  /* starting allocation group number */
2378        xfs_alloctype_t type;   /* input allocation type */
2379        int             bump_rotor = 0;
2380        int             no_min = 0;
2381        xfs_agnumber_t  rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2382
2383        mp = args->mp;
2384        type = args->otype = args->type;
2385        args->agbno = NULLAGBLOCK;
2386        /*
2387         * Just fix this up, for the case where the last a.g. is shorter
2388         * (or there's only one a.g.) and the caller couldn't easily figure
2389         * that out (xfs_bmap_alloc).
2390         */
2391        agsize = mp->m_sb.sb_agblocks;
2392        if (args->maxlen > agsize)
2393                args->maxlen = agsize;
2394        if (args->alignment == 0)
2395                args->alignment = 1;
2396        ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2397        ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2398        ASSERT(args->minlen <= args->maxlen);
2399        ASSERT(args->minlen <= agsize);
2400        ASSERT(args->mod < args->prod);
2401        if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2402            XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2403            args->minlen > args->maxlen || args->minlen > agsize ||
2404            args->mod >= args->prod) {
2405                args->fsbno = NULLFSBLOCK;
2406                trace_xfs_alloc_vextent_badargs(args);
2407                return 0;
2408        }
2409        minleft = args->minleft;
2410
2411        switch (type) {
2412        case XFS_ALLOCTYPE_THIS_AG:
2413        case XFS_ALLOCTYPE_NEAR_BNO:
2414        case XFS_ALLOCTYPE_THIS_BNO:
2415                /*
2416                 * These three force us into a single a.g.
2417                 */
2418                args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2419                args->pag = xfs_perag_get(mp, args->agno);
2420                args->minleft = 0;
2421                error = xfs_alloc_fix_freelist(args, 0);
2422                args->minleft = minleft;
2423                if (error) {
2424                        trace_xfs_alloc_vextent_nofix(args);
2425                        goto error0;
2426                }
2427                if (!args->agbp) {
2428                        trace_xfs_alloc_vextent_noagbp(args);
2429                        break;
2430                }
2431                args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2432                if ((error = xfs_alloc_ag_vextent(args)))
2433                        goto error0;
2434                break;
2435        case XFS_ALLOCTYPE_START_BNO:
2436                /*
2437                 * Try near allocation first, then anywhere-in-ag after
2438                 * the first a.g. fails.
2439                 */
2440                if ((args->userdata  == XFS_ALLOC_INITIAL_USER_DATA) &&
2441                    (mp->m_flags & XFS_MOUNT_32BITINODES)) {
2442                        args->fsbno = XFS_AGB_TO_FSB(mp,
2443                                        ((mp->m_agfrotor / rotorstep) %
2444                                        mp->m_sb.sb_agcount), 0);
2445                        bump_rotor = 1;
2446                }
2447                args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2448                args->type = XFS_ALLOCTYPE_NEAR_BNO;
2449                /* FALLTHROUGH */
2450        case XFS_ALLOCTYPE_ANY_AG:
2451        case XFS_ALLOCTYPE_START_AG:
2452        case XFS_ALLOCTYPE_FIRST_AG:
2453                /*
2454                 * Rotate through the allocation groups looking for a winner.
2455                 */
2456                if (type == XFS_ALLOCTYPE_ANY_AG) {
2457                        /*
2458                         * Start with the last place we left off.
2459                         */
2460                        args->agno = sagno = (mp->m_agfrotor / rotorstep) %
2461                                        mp->m_sb.sb_agcount;
2462                        args->type = XFS_ALLOCTYPE_THIS_AG;
2463                        flags = XFS_ALLOC_FLAG_TRYLOCK;
2464                } else if (type == XFS_ALLOCTYPE_FIRST_AG) {
2465                        /*
2466                         * Start with allocation group given by bno.
2467                         */
2468                        args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2469                        args->type = XFS_ALLOCTYPE_THIS_AG;
2470                        sagno = 0;
2471                        flags = 0;
2472                } else {
2473                        if (type == XFS_ALLOCTYPE_START_AG)
2474                                args->type = XFS_ALLOCTYPE_THIS_AG;
2475                        /*
2476                         * Start with the given allocation group.
2477                         */
2478                        args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2479                        flags = XFS_ALLOC_FLAG_TRYLOCK;
2480                }
2481                /*
2482                 * Loop over allocation groups twice; first time with
2483                 * trylock set, second time without.
2484                 */
2485                for (;;) {
2486                        args->pag = xfs_perag_get(mp, args->agno);
2487                        if (no_min) args->minleft = 0;
2488                        error = xfs_alloc_fix_freelist(args, flags);
2489                        args->minleft = minleft;
2490                        if (error) {
2491                                trace_xfs_alloc_vextent_nofix(args);
2492                                goto error0;
2493                        }
2494                        /*
2495                         * If we get a buffer back then the allocation will fly.
2496                         */
2497                        if (args->agbp) {
2498                                if ((error = xfs_alloc_ag_vextent(args)))
2499                                        goto error0;
2500                                break;
2501                        }
2502
2503                        trace_xfs_alloc_vextent_loopfailed(args);
2504
2505                        /*
2506                         * Didn't work, figure out the next iteration.
2507                         */
2508                        if (args->agno == sagno &&
2509                            type == XFS_ALLOCTYPE_START_BNO)
2510                                args->type = XFS_ALLOCTYPE_THIS_AG;
2511                        /*
2512                        * For the first allocation, we can try any AG to get
2513                        * space.  However, if we already have allocated a
2514                        * block, we don't want to try AGs whose number is below
2515                        * sagno. Otherwise, we may end up with out-of-order
2516                        * locking of AGF, which might cause deadlock.
2517                        */
2518                        if (++(args->agno) == mp->m_sb.sb_agcount) {
2519                                if (args->firstblock != NULLFSBLOCK)
2520                                        args->agno = sagno;
2521                                else
2522                                        args->agno = 0;
2523                        }
2524                        /*
2525                         * Reached the starting a.g., must either be done
2526                         * or switch to non-trylock mode.
2527                         */
2528                        if (args->agno == sagno) {
2529                                if (no_min == 1) {
2530                                        args->agbno = NULLAGBLOCK;
2531                                        trace_xfs_alloc_vextent_allfailed(args);
2532                                        break;
2533                                }
2534                                if (flags == 0) {
2535                                        no_min = 1;
2536                                } else {
2537                                        flags = 0;
2538                                        if (type == XFS_ALLOCTYPE_START_BNO) {
2539                                                args->agbno = XFS_FSB_TO_AGBNO(mp,
2540                                                        args->fsbno);
2541                                                args->type = XFS_ALLOCTYPE_NEAR_BNO;
2542                                        }
2543                                }
2544                        }
2545                        xfs_perag_put(args->pag);
2546                }
2547                if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2548                        if (args->agno == sagno)
2549                                mp->m_agfrotor = (mp->m_agfrotor + 1) %
2550                                        (mp->m_sb.sb_agcount * rotorstep);
2551                        else
2552                                mp->m_agfrotor = (args->agno * rotorstep + 1) %
2553                                        (mp->m_sb.sb_agcount * rotorstep);
2554                }
2555                break;
2556        default:
2557                ASSERT(0);
2558                /* NOTREACHED */
2559        }
2560        if (args->agbno == NULLAGBLOCK)
2561                args->fsbno = NULLFSBLOCK;
2562        else {
2563                args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2564#ifdef DEBUG
2565                ASSERT(args->len >= args->minlen);
2566                ASSERT(args->len <= args->maxlen);
2567                ASSERT(args->agbno % args->alignment == 0);
2568                XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2569                        args->len);
2570#endif
2571        }
2572        xfs_perag_put(args->pag);
2573        return 0;
2574error0:
2575        xfs_perag_put(args->pag);
2576        return error;
2577}
2578
2579/*
2580 * Free an extent.
2581 * Just break up the extent address and hand off to xfs_free_ag_extent
2582 * after fixing up the freelist.
2583 */
2584int                             /* error */
2585xfs_free_extent(
2586        xfs_trans_t     *tp,    /* transaction pointer */
2587        xfs_fsblock_t   bno,    /* starting block number of extent */
2588        xfs_extlen_t    len)    /* length of extent */
2589{
2590        xfs_alloc_arg_t args;
2591        int             error;
2592
2593        ASSERT(len != 0);
2594        memset(&args, 0, sizeof(xfs_alloc_arg_t));
2595        args.tp = tp;
2596        args.mp = tp->t_mountp;
2597
2598        /*
2599         * validate that the block number is legal - the enables us to detect
2600         * and handle a silent filesystem corruption rather than crashing.
2601         */
2602        args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2603        if (args.agno >= args.mp->m_sb.sb_agcount)
2604                return EFSCORRUPTED;
2605
2606        args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
2607        if (args.agbno >= args.mp->m_sb.sb_agblocks)
2608                return EFSCORRUPTED;
2609
2610        args.pag = xfs_perag_get(args.mp, args.agno);
2611        ASSERT(args.pag);
2612
2613        error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
2614        if (error)
2615                goto error0;
2616
2617        /* validate the extent size is legal now we have the agf locked */
2618        if (args.agbno + len >
2619                        be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) {
2620                error = EFSCORRUPTED;
2621                goto error0;
2622        }
2623
2624        error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
2625        if (!error)
2626                xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0);
2627error0:
2628        xfs_perag_put(args.pag);
2629        return error;
2630}
2631