linux/fs/xfs/xfs_alloc.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_types.h"
  21#include "xfs_bit.h"
  22#include "xfs_log.h"
  23#include "xfs_trans.h"
  24#include "xfs_sb.h"
  25#include "xfs_ag.h"
  26#include "xfs_mount.h"
  27#include "xfs_bmap_btree.h"
  28#include "xfs_alloc_btree.h"
  29#include "xfs_ialloc_btree.h"
  30#include "xfs_dinode.h"
  31#include "xfs_inode.h"
  32#include "xfs_btree.h"
  33#include "xfs_alloc.h"
  34#include "xfs_extent_busy.h"
  35#include "xfs_error.h"
  36#include "xfs_cksum.h"
  37#include "xfs_trace.h"
  38#include "xfs_buf_item.h"
  39
  40struct workqueue_struct *xfs_alloc_wq;
  41
  42#define XFS_ABSDIFF(a,b)        (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
  43
  44#define XFSA_FIXUP_BNO_OK       1
  45#define XFSA_FIXUP_CNT_OK       2
  46
  47STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
  48STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
  49STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
  50STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
  51                xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
  52
  53/*
  54 * Lookup the record equal to [bno, len] in the btree given by cur.
  55 */
  56STATIC int                              /* error */
  57xfs_alloc_lookup_eq(
  58        struct xfs_btree_cur    *cur,   /* btree cursor */
  59        xfs_agblock_t           bno,    /* starting block of extent */
  60        xfs_extlen_t            len,    /* length of extent */
  61        int                     *stat)  /* success/failure */
  62{
  63        cur->bc_rec.a.ar_startblock = bno;
  64        cur->bc_rec.a.ar_blockcount = len;
  65        return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
  66}
  67
  68/*
  69 * Lookup the first record greater than or equal to [bno, len]
  70 * in the btree given by cur.
  71 */
  72int                             /* error */
  73xfs_alloc_lookup_ge(
  74        struct xfs_btree_cur    *cur,   /* btree cursor */
  75        xfs_agblock_t           bno,    /* starting block of extent */
  76        xfs_extlen_t            len,    /* length of extent */
  77        int                     *stat)  /* success/failure */
  78{
  79        cur->bc_rec.a.ar_startblock = bno;
  80        cur->bc_rec.a.ar_blockcount = len;
  81        return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
  82}
  83
  84/*
  85 * Lookup the first record less than or equal to [bno, len]
  86 * in the btree given by cur.
  87 */
  88int                                     /* error */
  89xfs_alloc_lookup_le(
  90        struct xfs_btree_cur    *cur,   /* btree cursor */
  91        xfs_agblock_t           bno,    /* starting block of extent */
  92        xfs_extlen_t            len,    /* length of extent */
  93        int                     *stat)  /* success/failure */
  94{
  95        cur->bc_rec.a.ar_startblock = bno;
  96        cur->bc_rec.a.ar_blockcount = len;
  97        return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
  98}
  99
 100/*
 101 * Update the record referred to by cur to the value given
 102 * by [bno, len].
 103 * This either works (return 0) or gets an EFSCORRUPTED error.
 104 */
 105STATIC int                              /* error */
 106xfs_alloc_update(
 107        struct xfs_btree_cur    *cur,   /* btree cursor */
 108        xfs_agblock_t           bno,    /* starting block of extent */
 109        xfs_extlen_t            len)    /* length of extent */
 110{
 111        union xfs_btree_rec     rec;
 112
 113        rec.alloc.ar_startblock = cpu_to_be32(bno);
 114        rec.alloc.ar_blockcount = cpu_to_be32(len);
 115        return xfs_btree_update(cur, &rec);
 116}
 117
 118/*
 119 * Get the data from the pointed-to record.
 120 */
 121int                                     /* error */
 122xfs_alloc_get_rec(
 123        struct xfs_btree_cur    *cur,   /* btree cursor */
 124        xfs_agblock_t           *bno,   /* output: starting block of extent */
 125        xfs_extlen_t            *len,   /* output: length of extent */
 126        int                     *stat)  /* output: success/failure */
 127{
 128        union xfs_btree_rec     *rec;
 129        int                     error;
 130
 131        error = xfs_btree_get_rec(cur, &rec, stat);
 132        if (!error && *stat == 1) {
 133                *bno = be32_to_cpu(rec->alloc.ar_startblock);
 134                *len = be32_to_cpu(rec->alloc.ar_blockcount);
 135        }
 136        return error;
 137}
 138
 139/*
 140 * Compute aligned version of the found extent.
 141 * Takes alignment and min length into account.
 142 */
 143STATIC void
 144xfs_alloc_compute_aligned(
 145        xfs_alloc_arg_t *args,          /* allocation argument structure */
 146        xfs_agblock_t   foundbno,       /* starting block in found extent */
 147        xfs_extlen_t    foundlen,       /* length in found extent */
 148        xfs_agblock_t   *resbno,        /* result block number */
 149        xfs_extlen_t    *reslen)        /* result length */
 150{
 151        xfs_agblock_t   bno;
 152        xfs_extlen_t    len;
 153
 154        /* Trim busy sections out of found extent */
 155        xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
 156
 157        if (args->alignment > 1 && len >= args->minlen) {
 158                xfs_agblock_t   aligned_bno = roundup(bno, args->alignment);
 159                xfs_extlen_t    diff = aligned_bno - bno;
 160
 161                *resbno = aligned_bno;
 162                *reslen = diff >= len ? 0 : len - diff;
 163        } else {
 164                *resbno = bno;
 165                *reslen = len;
 166        }
 167}
 168
 169/*
 170 * Compute best start block and diff for "near" allocations.
 171 * freelen >= wantlen already checked by caller.
 172 */
 173STATIC xfs_extlen_t                     /* difference value (absolute) */
 174xfs_alloc_compute_diff(
 175        xfs_agblock_t   wantbno,        /* target starting block */
 176        xfs_extlen_t    wantlen,        /* target length */
 177        xfs_extlen_t    alignment,      /* target alignment */
 178        char            userdata,       /* are we allocating data? */
 179        xfs_agblock_t   freebno,        /* freespace's starting block */
 180        xfs_extlen_t    freelen,        /* freespace's length */
 181        xfs_agblock_t   *newbnop)       /* result: best start block from free */
 182{
 183        xfs_agblock_t   freeend;        /* end of freespace extent */
 184        xfs_agblock_t   newbno1;        /* return block number */
 185        xfs_agblock_t   newbno2;        /* other new block number */
 186        xfs_extlen_t    newlen1=0;      /* length with newbno1 */
 187        xfs_extlen_t    newlen2=0;      /* length with newbno2 */
 188        xfs_agblock_t   wantend;        /* end of target extent */
 189
 190        ASSERT(freelen >= wantlen);
 191        freeend = freebno + freelen;
 192        wantend = wantbno + wantlen;
 193        /*
 194         * We want to allocate from the start of a free extent if it is past
 195         * the desired block or if we are allocating user data and the free
 196         * extent is before desired block. The second case is there to allow
 197         * for contiguous allocation from the remaining free space if the file
 198         * grows in the short term.
 199         */
 200        if (freebno >= wantbno || (userdata && freeend < wantend)) {
 201                if ((newbno1 = roundup(freebno, alignment)) >= freeend)
 202                        newbno1 = NULLAGBLOCK;
 203        } else if (freeend >= wantend && alignment > 1) {
 204                newbno1 = roundup(wantbno, alignment);
 205                newbno2 = newbno1 - alignment;
 206                if (newbno1 >= freeend)
 207                        newbno1 = NULLAGBLOCK;
 208                else
 209                        newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
 210                if (newbno2 < freebno)
 211                        newbno2 = NULLAGBLOCK;
 212                else
 213                        newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
 214                if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
 215                        if (newlen1 < newlen2 ||
 216                            (newlen1 == newlen2 &&
 217                             XFS_ABSDIFF(newbno1, wantbno) >
 218                             XFS_ABSDIFF(newbno2, wantbno)))
 219                                newbno1 = newbno2;
 220                } else if (newbno2 != NULLAGBLOCK)
 221                        newbno1 = newbno2;
 222        } else if (freeend >= wantend) {
 223                newbno1 = wantbno;
 224        } else if (alignment > 1) {
 225                newbno1 = roundup(freeend - wantlen, alignment);
 226                if (newbno1 > freeend - wantlen &&
 227                    newbno1 - alignment >= freebno)
 228                        newbno1 -= alignment;
 229                else if (newbno1 >= freeend)
 230                        newbno1 = NULLAGBLOCK;
 231        } else
 232                newbno1 = freeend - wantlen;
 233        *newbnop = newbno1;
 234        return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
 235}
 236
 237/*
 238 * Fix up the length, based on mod and prod.
 239 * len should be k * prod + mod for some k.
 240 * If len is too small it is returned unchanged.
 241 * If len hits maxlen it is left alone.
 242 */
 243STATIC void
 244xfs_alloc_fix_len(
 245        xfs_alloc_arg_t *args)          /* allocation argument structure */
 246{
 247        xfs_extlen_t    k;
 248        xfs_extlen_t    rlen;
 249
 250        ASSERT(args->mod < args->prod);
 251        rlen = args->len;
 252        ASSERT(rlen >= args->minlen);
 253        ASSERT(rlen <= args->maxlen);
 254        if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
 255            (args->mod == 0 && rlen < args->prod))
 256                return;
 257        k = rlen % args->prod;
 258        if (k == args->mod)
 259                return;
 260        if (k > args->mod) {
 261                if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen)
 262                        return;
 263        } else {
 264                if ((int)(rlen = rlen - args->prod - (args->mod - k)) <
 265                    (int)args->minlen)
 266                        return;
 267        }
 268        ASSERT(rlen >= args->minlen);
 269        ASSERT(rlen <= args->maxlen);
 270        args->len = rlen;
 271}
 272
 273/*
 274 * Fix up length if there is too little space left in the a.g.
 275 * Return 1 if ok, 0 if too little, should give up.
 276 */
 277STATIC int
 278xfs_alloc_fix_minleft(
 279        xfs_alloc_arg_t *args)          /* allocation argument structure */
 280{
 281        xfs_agf_t       *agf;           /* a.g. freelist header */
 282        int             diff;           /* free space difference */
 283
 284        if (args->minleft == 0)
 285                return 1;
 286        agf = XFS_BUF_TO_AGF(args->agbp);
 287        diff = be32_to_cpu(agf->agf_freeblks)
 288                - args->len - args->minleft;
 289        if (diff >= 0)
 290                return 1;
 291        args->len += diff;              /* shrink the allocated space */
 292        if (args->len >= args->minlen)
 293                return 1;
 294        args->agbno = NULLAGBLOCK;
 295        return 0;
 296}
 297
 298/*
 299 * Update the two btrees, logically removing from freespace the extent
 300 * starting at rbno, rlen blocks.  The extent is contained within the
 301 * actual (current) free extent fbno for flen blocks.
 302 * Flags are passed in indicating whether the cursors are set to the
 303 * relevant records.
 304 */
 305STATIC int                              /* error code */
 306xfs_alloc_fixup_trees(
 307        xfs_btree_cur_t *cnt_cur,       /* cursor for by-size btree */
 308        xfs_btree_cur_t *bno_cur,       /* cursor for by-block btree */
 309        xfs_agblock_t   fbno,           /* starting block of free extent */
 310        xfs_extlen_t    flen,           /* length of free extent */
 311        xfs_agblock_t   rbno,           /* starting block of returned extent */
 312        xfs_extlen_t    rlen,           /* length of returned extent */
 313        int             flags)          /* flags, XFSA_FIXUP_... */
 314{
 315        int             error;          /* error code */
 316        int             i;              /* operation results */
 317        xfs_agblock_t   nfbno1;         /* first new free startblock */
 318        xfs_agblock_t   nfbno2;         /* second new free startblock */
 319        xfs_extlen_t    nflen1=0;       /* first new free length */
 320        xfs_extlen_t    nflen2=0;       /* second new free length */
 321
 322        /*
 323         * Look up the record in the by-size tree if necessary.
 324         */
 325        if (flags & XFSA_FIXUP_CNT_OK) {
 326#ifdef DEBUG
 327                if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
 328                        return error;
 329                XFS_WANT_CORRUPTED_RETURN(
 330                        i == 1 && nfbno1 == fbno && nflen1 == flen);
 331#endif
 332        } else {
 333                if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
 334                        return error;
 335                XFS_WANT_CORRUPTED_RETURN(i == 1);
 336        }
 337        /*
 338         * Look up the record in the by-block tree if necessary.
 339         */
 340        if (flags & XFSA_FIXUP_BNO_OK) {
 341#ifdef DEBUG
 342                if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
 343                        return error;
 344                XFS_WANT_CORRUPTED_RETURN(
 345                        i == 1 && nfbno1 == fbno && nflen1 == flen);
 346#endif
 347        } else {
 348                if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
 349                        return error;
 350                XFS_WANT_CORRUPTED_RETURN(i == 1);
 351        }
 352
 353#ifdef DEBUG
 354        if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
 355                struct xfs_btree_block  *bnoblock;
 356                struct xfs_btree_block  *cntblock;
 357
 358                bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
 359                cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
 360
 361                XFS_WANT_CORRUPTED_RETURN(
 362                        bnoblock->bb_numrecs == cntblock->bb_numrecs);
 363        }
 364#endif
 365
 366        /*
 367         * Deal with all four cases: the allocated record is contained
 368         * within the freespace record, so we can have new freespace
 369         * at either (or both) end, or no freespace remaining.
 370         */
 371        if (rbno == fbno && rlen == flen)
 372                nfbno1 = nfbno2 = NULLAGBLOCK;
 373        else if (rbno == fbno) {
 374                nfbno1 = rbno + rlen;
 375                nflen1 = flen - rlen;
 376                nfbno2 = NULLAGBLOCK;
 377        } else if (rbno + rlen == fbno + flen) {
 378                nfbno1 = fbno;
 379                nflen1 = flen - rlen;
 380                nfbno2 = NULLAGBLOCK;
 381        } else {
 382                nfbno1 = fbno;
 383                nflen1 = rbno - fbno;
 384                nfbno2 = rbno + rlen;
 385                nflen2 = (fbno + flen) - nfbno2;
 386        }
 387        /*
 388         * Delete the entry from the by-size btree.
 389         */
 390        if ((error = xfs_btree_delete(cnt_cur, &i)))
 391                return error;
 392        XFS_WANT_CORRUPTED_RETURN(i == 1);
 393        /*
 394         * Add new by-size btree entry(s).
 395         */
 396        if (nfbno1 != NULLAGBLOCK) {
 397                if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
 398                        return error;
 399                XFS_WANT_CORRUPTED_RETURN(i == 0);
 400                if ((error = xfs_btree_insert(cnt_cur, &i)))
 401                        return error;
 402                XFS_WANT_CORRUPTED_RETURN(i == 1);
 403        }
 404        if (nfbno2 != NULLAGBLOCK) {
 405                if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
 406                        return error;
 407                XFS_WANT_CORRUPTED_RETURN(i == 0);
 408                if ((error = xfs_btree_insert(cnt_cur, &i)))
 409                        return error;
 410                XFS_WANT_CORRUPTED_RETURN(i == 1);
 411        }
 412        /*
 413         * Fix up the by-block btree entry(s).
 414         */
 415        if (nfbno1 == NULLAGBLOCK) {
 416                /*
 417                 * No remaining freespace, just delete the by-block tree entry.
 418                 */
 419                if ((error = xfs_btree_delete(bno_cur, &i)))
 420                        return error;
 421                XFS_WANT_CORRUPTED_RETURN(i == 1);
 422        } else {
 423                /*
 424                 * Update the by-block entry to start later|be shorter.
 425                 */
 426                if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
 427                        return error;
 428        }
 429        if (nfbno2 != NULLAGBLOCK) {
 430                /*
 431                 * 2 resulting free entries, need to add one.
 432                 */
 433                if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
 434                        return error;
 435                XFS_WANT_CORRUPTED_RETURN(i == 0);
 436                if ((error = xfs_btree_insert(bno_cur, &i)))
 437                        return error;
 438                XFS_WANT_CORRUPTED_RETURN(i == 1);
 439        }
 440        return 0;
 441}
 442
 443static bool
 444xfs_agfl_verify(
 445        struct xfs_buf  *bp)
 446{
 447        struct xfs_mount *mp = bp->b_target->bt_mount;
 448        struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
 449        int             i;
 450
 451        if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_uuid))
 452                return false;
 453        if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
 454                return false;
 455        /*
 456         * during growfs operations, the perag is not fully initialised,
 457         * so we can't use it for any useful checking. growfs ensures we can't
 458         * use it by using uncached buffers that don't have the perag attached
 459         * so we can detect and avoid this problem.
 460         */
 461        if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
 462                return false;
 463
 464        for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
 465                if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
 466                    be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
 467                        return false;
 468        }
 469        return true;
 470}
 471
 472static void
 473xfs_agfl_read_verify(
 474        struct xfs_buf  *bp)
 475{
 476        struct xfs_mount *mp = bp->b_target->bt_mount;
 477        int             agfl_ok = 1;
 478
 479        /*
 480         * There is no verification of non-crc AGFLs because mkfs does not
 481         * initialise the AGFL to zero or NULL. Hence the only valid part of the
 482         * AGFL is what the AGF says is active. We can't get to the AGF, so we
 483         * can't verify just those entries are valid.
 484         */
 485        if (!xfs_sb_version_hascrc(&mp->m_sb))
 486                return;
 487
 488        agfl_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
 489                                   offsetof(struct xfs_agfl, agfl_crc));
 490
 491        agfl_ok = agfl_ok && xfs_agfl_verify(bp);
 492
 493        if (!agfl_ok) {
 494                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
 495                xfs_buf_ioerror(bp, EFSCORRUPTED);
 496        }
 497}
 498
 499static void
 500xfs_agfl_write_verify(
 501        struct xfs_buf  *bp)
 502{
 503        struct xfs_mount *mp = bp->b_target->bt_mount;
 504        struct xfs_buf_log_item *bip = bp->b_fspriv;
 505
 506        /* no verification of non-crc AGFLs */
 507        if (!xfs_sb_version_hascrc(&mp->m_sb))
 508                return;
 509
 510        if (!xfs_agfl_verify(bp)) {
 511                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
 512                xfs_buf_ioerror(bp, EFSCORRUPTED);
 513                return;
 514        }
 515
 516        if (bip)
 517                XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
 518
 519        xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
 520                         offsetof(struct xfs_agfl, agfl_crc));
 521}
 522
 523const struct xfs_buf_ops xfs_agfl_buf_ops = {
 524        .verify_read = xfs_agfl_read_verify,
 525        .verify_write = xfs_agfl_write_verify,
 526};
 527
 528/*
 529 * Read in the allocation group free block array.
 530 */
 531STATIC int                              /* error */
 532xfs_alloc_read_agfl(
 533        xfs_mount_t     *mp,            /* mount point structure */
 534        xfs_trans_t     *tp,            /* transaction pointer */
 535        xfs_agnumber_t  agno,           /* allocation group number */
 536        xfs_buf_t       **bpp)          /* buffer for the ag free block array */
 537{
 538        xfs_buf_t       *bp;            /* return value */
 539        int             error;
 540
 541        ASSERT(agno != NULLAGNUMBER);
 542        error = xfs_trans_read_buf(
 543                        mp, tp, mp->m_ddev_targp,
 544                        XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
 545                        XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
 546        if (error)
 547                return error;
 548        ASSERT(!xfs_buf_geterror(bp));
 549        xfs_buf_set_ref(bp, XFS_AGFL_REF);
 550        *bpp = bp;
 551        return 0;
 552}
 553
 554STATIC int
 555xfs_alloc_update_counters(
 556        struct xfs_trans        *tp,
 557        struct xfs_perag        *pag,
 558        struct xfs_buf          *agbp,
 559        long                    len)
 560{
 561        struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
 562
 563        pag->pagf_freeblks += len;
 564        be32_add_cpu(&agf->agf_freeblks, len);
 565
 566        xfs_trans_agblocks_delta(tp, len);
 567        if (unlikely(be32_to_cpu(agf->agf_freeblks) >
 568                     be32_to_cpu(agf->agf_length)))
 569                return EFSCORRUPTED;
 570
 571        xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
 572        return 0;
 573}
 574
 575/*
 576 * Allocation group level functions.
 577 */
 578
 579/*
 580 * Allocate a variable extent in the allocation group agno.
 581 * Type and bno are used to determine where in the allocation group the
 582 * extent will start.
 583 * Extent's length (returned in *len) will be between minlen and maxlen,
 584 * and of the form k * prod + mod unless there's nothing that large.
 585 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 586 */
 587STATIC int                      /* error */
 588xfs_alloc_ag_vextent(
 589        xfs_alloc_arg_t *args)  /* argument structure for allocation */
 590{
 591        int             error=0;
 592
 593        ASSERT(args->minlen > 0);
 594        ASSERT(args->maxlen > 0);
 595        ASSERT(args->minlen <= args->maxlen);
 596        ASSERT(args->mod < args->prod);
 597        ASSERT(args->alignment > 0);
 598        /*
 599         * Branch to correct routine based on the type.
 600         */
 601        args->wasfromfl = 0;
 602        switch (args->type) {
 603        case XFS_ALLOCTYPE_THIS_AG:
 604                error = xfs_alloc_ag_vextent_size(args);
 605                break;
 606        case XFS_ALLOCTYPE_NEAR_BNO:
 607                error = xfs_alloc_ag_vextent_near(args);
 608                break;
 609        case XFS_ALLOCTYPE_THIS_BNO:
 610                error = xfs_alloc_ag_vextent_exact(args);
 611                break;
 612        default:
 613                ASSERT(0);
 614                /* NOTREACHED */
 615        }
 616
 617        if (error || args->agbno == NULLAGBLOCK)
 618                return error;
 619
 620        ASSERT(args->len >= args->minlen);
 621        ASSERT(args->len <= args->maxlen);
 622        ASSERT(!args->wasfromfl || !args->isfl);
 623        ASSERT(args->agbno % args->alignment == 0);
 624
 625        if (!args->wasfromfl) {
 626                error = xfs_alloc_update_counters(args->tp, args->pag,
 627                                                  args->agbp,
 628                                                  -((long)(args->len)));
 629                if (error)
 630                        return error;
 631
 632                ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
 633                                              args->agbno, args->len));
 634        }
 635
 636        if (!args->isfl) {
 637                xfs_trans_mod_sb(args->tp, args->wasdel ?
 638                                 XFS_TRANS_SB_RES_FDBLOCKS :
 639                                 XFS_TRANS_SB_FDBLOCKS,
 640                                 -((long)(args->len)));
 641        }
 642
 643        XFS_STATS_INC(xs_allocx);
 644        XFS_STATS_ADD(xs_allocb, args->len);
 645        return error;
 646}
 647
 648/*
 649 * Allocate a variable extent at exactly agno/bno.
 650 * Extent's length (returned in *len) will be between minlen and maxlen,
 651 * and of the form k * prod + mod unless there's nothing that large.
 652 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
 653 */
 654STATIC int                      /* error */
 655xfs_alloc_ag_vextent_exact(
 656        xfs_alloc_arg_t *args)  /* allocation argument structure */
 657{
 658        xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
 659        xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
 660        int             error;
 661        xfs_agblock_t   fbno;   /* start block of found extent */
 662        xfs_extlen_t    flen;   /* length of found extent */
 663        xfs_agblock_t   tbno;   /* start block of trimmed extent */
 664        xfs_extlen_t    tlen;   /* length of trimmed extent */
 665        xfs_agblock_t   tend;   /* end block of trimmed extent */
 666        int             i;      /* success/failure of operation */
 667
 668        ASSERT(args->alignment == 1);
 669
 670        /*
 671         * Allocate/initialize a cursor for the by-number freespace btree.
 672         */
 673        bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 674                                          args->agno, XFS_BTNUM_BNO);
 675
 676        /*
 677         * Lookup bno and minlen in the btree (minlen is irrelevant, really).
 678         * Look for the closest free block <= bno, it must contain bno
 679         * if any free block does.
 680         */
 681        error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
 682        if (error)
 683                goto error0;
 684        if (!i)
 685                goto not_found;
 686
 687        /*
 688         * Grab the freespace record.
 689         */
 690        error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
 691        if (error)
 692                goto error0;
 693        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 694        ASSERT(fbno <= args->agbno);
 695
 696        /*
 697         * Check for overlapping busy extents.
 698         */
 699        xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
 700
 701        /*
 702         * Give up if the start of the extent is busy, or the freespace isn't
 703         * long enough for the minimum request.
 704         */
 705        if (tbno > args->agbno)
 706                goto not_found;
 707        if (tlen < args->minlen)
 708                goto not_found;
 709        tend = tbno + tlen;
 710        if (tend < args->agbno + args->minlen)
 711                goto not_found;
 712
 713        /*
 714         * End of extent will be smaller of the freespace end and the
 715         * maximal requested end.
 716         *
 717         * Fix the length according to mod and prod if given.
 718         */
 719        args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
 720                                                - args->agbno;
 721        xfs_alloc_fix_len(args);
 722        if (!xfs_alloc_fix_minleft(args))
 723                goto not_found;
 724
 725        ASSERT(args->agbno + args->len <= tend);
 726
 727        /*
 728         * We are allocating agbno for args->len
 729         * Allocate/initialize a cursor for the by-size btree.
 730         */
 731        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 732                args->agno, XFS_BTNUM_CNT);
 733        ASSERT(args->agbno + args->len <=
 734                be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
 735        error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
 736                                      args->len, XFSA_FIXUP_BNO_OK);
 737        if (error) {
 738                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
 739                goto error0;
 740        }
 741
 742        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 743        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 744
 745        args->wasfromfl = 0;
 746        trace_xfs_alloc_exact_done(args);
 747        return 0;
 748
 749not_found:
 750        /* Didn't find it, return null. */
 751        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 752        args->agbno = NULLAGBLOCK;
 753        trace_xfs_alloc_exact_notfound(args);
 754        return 0;
 755
 756error0:
 757        xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
 758        trace_xfs_alloc_exact_error(args);
 759        return error;
 760}
 761
 762/*
 763 * Search the btree in a given direction via the search cursor and compare
 764 * the records found against the good extent we've already found.
 765 */
 766STATIC int
 767xfs_alloc_find_best_extent(
 768        struct xfs_alloc_arg    *args,  /* allocation argument structure */
 769        struct xfs_btree_cur    **gcur, /* good cursor */
 770        struct xfs_btree_cur    **scur, /* searching cursor */
 771        xfs_agblock_t           gdiff,  /* difference for search comparison */
 772        xfs_agblock_t           *sbno,  /* extent found by search */
 773        xfs_extlen_t            *slen,  /* extent length */
 774        xfs_agblock_t           *sbnoa, /* aligned extent found by search */
 775        xfs_extlen_t            *slena, /* aligned extent length */
 776        int                     dir)    /* 0 = search right, 1 = search left */
 777{
 778        xfs_agblock_t           new;
 779        xfs_agblock_t           sdiff;
 780        int                     error;
 781        int                     i;
 782
 783        /* The good extent is perfect, no need to  search. */
 784        if (!gdiff)
 785                goto out_use_good;
 786
 787        /*
 788         * Look until we find a better one, run out of space or run off the end.
 789         */
 790        do {
 791                error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
 792                if (error)
 793                        goto error0;
 794                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 795                xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
 796
 797                /*
 798                 * The good extent is closer than this one.
 799                 */
 800                if (!dir) {
 801                        if (*sbnoa >= args->agbno + gdiff)
 802                                goto out_use_good;
 803                } else {
 804                        if (*sbnoa <= args->agbno - gdiff)
 805                                goto out_use_good;
 806                }
 807
 808                /*
 809                 * Same distance, compare length and pick the best.
 810                 */
 811                if (*slena >= args->minlen) {
 812                        args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
 813                        xfs_alloc_fix_len(args);
 814
 815                        sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
 816                                                       args->alignment,
 817                                                       args->userdata, *sbnoa,
 818                                                       *slena, &new);
 819
 820                        /*
 821                         * Choose closer size and invalidate other cursor.
 822                         */
 823                        if (sdiff < gdiff)
 824                                goto out_use_search;
 825                        goto out_use_good;
 826                }
 827
 828                if (!dir)
 829                        error = xfs_btree_increment(*scur, 0, &i);
 830                else
 831                        error = xfs_btree_decrement(*scur, 0, &i);
 832                if (error)
 833                        goto error0;
 834        } while (i);
 835
 836out_use_good:
 837        xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
 838        *scur = NULL;
 839        return 0;
 840
 841out_use_search:
 842        xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
 843        *gcur = NULL;
 844        return 0;
 845
 846error0:
 847        /* caller invalidates cursors */
 848        return error;
 849}
 850
 851/*
 852 * Allocate a variable extent near bno in the allocation group agno.
 853 * Extent's length (returned in len) will be between minlen and maxlen,
 854 * and of the form k * prod + mod unless there's nothing that large.
 855 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 856 */
 857STATIC int                              /* error */
 858xfs_alloc_ag_vextent_near(
 859        xfs_alloc_arg_t *args)          /* allocation argument structure */
 860{
 861        xfs_btree_cur_t *bno_cur_gt;    /* cursor for bno btree, right side */
 862        xfs_btree_cur_t *bno_cur_lt;    /* cursor for bno btree, left side */
 863        xfs_btree_cur_t *cnt_cur;       /* cursor for count btree */
 864        xfs_agblock_t   gtbno;          /* start bno of right side entry */
 865        xfs_agblock_t   gtbnoa;         /* aligned ... */
 866        xfs_extlen_t    gtdiff;         /* difference to right side entry */
 867        xfs_extlen_t    gtlen;          /* length of right side entry */
 868        xfs_extlen_t    gtlena;         /* aligned ... */
 869        xfs_agblock_t   gtnew;          /* useful start bno of right side */
 870        int             error;          /* error code */
 871        int             i;              /* result code, temporary */
 872        int             j;              /* result code, temporary */
 873        xfs_agblock_t   ltbno;          /* start bno of left side entry */
 874        xfs_agblock_t   ltbnoa;         /* aligned ... */
 875        xfs_extlen_t    ltdiff;         /* difference to left side entry */
 876        xfs_extlen_t    ltlen;          /* length of left side entry */
 877        xfs_extlen_t    ltlena;         /* aligned ... */
 878        xfs_agblock_t   ltnew;          /* useful start bno of left side */
 879        xfs_extlen_t    rlen;           /* length of returned extent */
 880        int             forced = 0;
 881#if defined(DEBUG) && defined(__KERNEL__)
 882        /*
 883         * Randomly don't execute the first algorithm.
 884         */
 885        int             dofirst;        /* set to do first algorithm */
 886
 887        dofirst = prandom_u32() & 1;
 888#endif
 889
 890restart:
 891        bno_cur_lt = NULL;
 892        bno_cur_gt = NULL;
 893        ltlen = 0;
 894        gtlena = 0;
 895        ltlena = 0;
 896
 897        /*
 898         * Get a cursor for the by-size btree.
 899         */
 900        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 901                args->agno, XFS_BTNUM_CNT);
 902
 903        /*
 904         * See if there are any free extents as big as maxlen.
 905         */
 906        if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
 907                goto error0;
 908        /*
 909         * If none, then pick up the last entry in the tree unless the
 910         * tree is empty.
 911         */
 912        if (!i) {
 913                if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
 914                                &ltlen, &i)))
 915                        goto error0;
 916                if (i == 0 || ltlen == 0) {
 917                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 918                        trace_xfs_alloc_near_noentry(args);
 919                        return 0;
 920                }
 921                ASSERT(i == 1);
 922        }
 923        args->wasfromfl = 0;
 924
 925        /*
 926         * First algorithm.
 927         * If the requested extent is large wrt the freespaces available
 928         * in this a.g., then the cursor will be pointing to a btree entry
 929         * near the right edge of the tree.  If it's in the last btree leaf
 930         * block, then we just examine all the entries in that block
 931         * that are big enough, and pick the best one.
 932         * This is written as a while loop so we can break out of it,
 933         * but we never loop back to the top.
 934         */
 935        while (xfs_btree_islastblock(cnt_cur, 0)) {
 936                xfs_extlen_t    bdiff;
 937                int             besti=0;
 938                xfs_extlen_t    blen=0;
 939                xfs_agblock_t   bnew=0;
 940
 941#if defined(DEBUG) && defined(__KERNEL__)
 942                if (!dofirst)
 943                        break;
 944#endif
 945                /*
 946                 * Start from the entry that lookup found, sequence through
 947                 * all larger free blocks.  If we're actually pointing at a
 948                 * record smaller than maxlen, go to the start of this block,
 949                 * and skip all those smaller than minlen.
 950                 */
 951                if (ltlen || args->alignment > 1) {
 952                        cnt_cur->bc_ptrs[0] = 1;
 953                        do {
 954                                if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
 955                                                &ltlen, &i)))
 956                                        goto error0;
 957                                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 958                                if (ltlen >= args->minlen)
 959                                        break;
 960                                if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
 961                                        goto error0;
 962                        } while (i);
 963                        ASSERT(ltlen >= args->minlen);
 964                        if (!i)
 965                                break;
 966                }
 967                i = cnt_cur->bc_ptrs[0];
 968                for (j = 1, blen = 0, bdiff = 0;
 969                     !error && j && (blen < args->maxlen || bdiff > 0);
 970                     error = xfs_btree_increment(cnt_cur, 0, &j)) {
 971                        /*
 972                         * For each entry, decide if it's better than
 973                         * the previous best entry.
 974                         */
 975                        if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
 976                                goto error0;
 977                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 978                        xfs_alloc_compute_aligned(args, ltbno, ltlen,
 979                                                  &ltbnoa, &ltlena);
 980                        if (ltlena < args->minlen)
 981                                continue;
 982                        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
 983                        xfs_alloc_fix_len(args);
 984                        ASSERT(args->len >= args->minlen);
 985                        if (args->len < blen)
 986                                continue;
 987                        ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
 988                                args->alignment, args->userdata, ltbnoa,
 989                                ltlena, &ltnew);
 990                        if (ltnew != NULLAGBLOCK &&
 991                            (args->len > blen || ltdiff < bdiff)) {
 992                                bdiff = ltdiff;
 993                                bnew = ltnew;
 994                                blen = args->len;
 995                                besti = cnt_cur->bc_ptrs[0];
 996                        }
 997                }
 998                /*
 999                 * It didn't work.  We COULD be in a case where
1000                 * there's a good record somewhere, so try again.
1001                 */
1002                if (blen == 0)
1003                        break;
1004                /*
1005                 * Point at the best entry, and retrieve it again.
1006                 */
1007                cnt_cur->bc_ptrs[0] = besti;
1008                if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1009                        goto error0;
1010                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1011                ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1012                args->len = blen;
1013                if (!xfs_alloc_fix_minleft(args)) {
1014                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1015                        trace_xfs_alloc_near_nominleft(args);
1016                        return 0;
1017                }
1018                blen = args->len;
1019                /*
1020                 * We are allocating starting at bnew for blen blocks.
1021                 */
1022                args->agbno = bnew;
1023                ASSERT(bnew >= ltbno);
1024                ASSERT(bnew + blen <= ltbno + ltlen);
1025                /*
1026                 * Set up a cursor for the by-bno tree.
1027                 */
1028                bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
1029                        args->agbp, args->agno, XFS_BTNUM_BNO);
1030                /*
1031                 * Fix up the btree entries.
1032                 */
1033                if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
1034                                ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
1035                        goto error0;
1036                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1037                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1038
1039                trace_xfs_alloc_near_first(args);
1040                return 0;
1041        }
1042        /*
1043         * Second algorithm.
1044         * Search in the by-bno tree to the left and to the right
1045         * simultaneously, until in each case we find a space big enough,
1046         * or run into the edge of the tree.  When we run into the edge,
1047         * we deallocate that cursor.
1048         * If both searches succeed, we compare the two spaces and pick
1049         * the better one.
1050         * With alignment, it's possible for both to fail; the upper
1051         * level algorithm that picks allocation groups for allocations
1052         * is not supposed to do this.
1053         */
1054        /*
1055         * Allocate and initialize the cursor for the leftward search.
1056         */
1057        bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1058                args->agno, XFS_BTNUM_BNO);
1059        /*
1060         * Lookup <= bno to find the leftward search's starting point.
1061         */
1062        if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
1063                goto error0;
1064        if (!i) {
1065                /*
1066                 * Didn't find anything; use this cursor for the rightward
1067                 * search.
1068                 */
1069                bno_cur_gt = bno_cur_lt;
1070                bno_cur_lt = NULL;
1071        }
1072        /*
1073         * Found something.  Duplicate the cursor for the rightward search.
1074         */
1075        else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
1076                goto error0;
1077        /*
1078         * Increment the cursor, so we will point at the entry just right
1079         * of the leftward entry if any, or to the leftmost entry.
1080         */
1081        if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1082                goto error0;
1083        if (!i) {
1084                /*
1085                 * It failed, there are no rightward entries.
1086                 */
1087                xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
1088                bno_cur_gt = NULL;
1089        }
1090        /*
1091         * Loop going left with the leftward cursor, right with the
1092         * rightward cursor, until either both directions give up or
1093         * we find an entry at least as big as minlen.
1094         */
1095        do {
1096                if (bno_cur_lt) {
1097                        if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
1098                                goto error0;
1099                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1100                        xfs_alloc_compute_aligned(args, ltbno, ltlen,
1101                                                  &ltbnoa, &ltlena);
1102                        if (ltlena >= args->minlen)
1103                                break;
1104                        if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
1105                                goto error0;
1106                        if (!i) {
1107                                xfs_btree_del_cursor(bno_cur_lt,
1108                                                     XFS_BTREE_NOERROR);
1109                                bno_cur_lt = NULL;
1110                        }
1111                }
1112                if (bno_cur_gt) {
1113                        if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1114                                goto error0;
1115                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1116                        xfs_alloc_compute_aligned(args, gtbno, gtlen,
1117                                                  &gtbnoa, &gtlena);
1118                        if (gtlena >= args->minlen)
1119                                break;
1120                        if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1121                                goto error0;
1122                        if (!i) {
1123                                xfs_btree_del_cursor(bno_cur_gt,
1124                                                     XFS_BTREE_NOERROR);
1125                                bno_cur_gt = NULL;
1126                        }
1127                }
1128        } while (bno_cur_lt || bno_cur_gt);
1129
1130        /*
1131         * Got both cursors still active, need to find better entry.
1132         */
1133        if (bno_cur_lt && bno_cur_gt) {
1134                if (ltlena >= args->minlen) {
1135                        /*
1136                         * Left side is good, look for a right side entry.
1137                         */
1138                        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1139                        xfs_alloc_fix_len(args);
1140                        ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1141                                args->alignment, args->userdata, ltbnoa,
1142                                ltlena, &ltnew);
1143
1144                        error = xfs_alloc_find_best_extent(args,
1145                                                &bno_cur_lt, &bno_cur_gt,
1146                                                ltdiff, &gtbno, &gtlen,
1147                                                &gtbnoa, &gtlena,
1148                                                0 /* search right */);
1149                } else {
1150                        ASSERT(gtlena >= args->minlen);
1151
1152                        /*
1153                         * Right side is good, look for a left side entry.
1154                         */
1155                        args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1156                        xfs_alloc_fix_len(args);
1157                        gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1158                                args->alignment, args->userdata, gtbnoa,
1159                                gtlena, &gtnew);
1160
1161                        error = xfs_alloc_find_best_extent(args,
1162                                                &bno_cur_gt, &bno_cur_lt,
1163                                                gtdiff, &ltbno, &ltlen,
1164                                                &ltbnoa, &ltlena,
1165                                                1 /* search left */);
1166                }
1167
1168                if (error)
1169                        goto error0;
1170        }
1171
1172        /*
1173         * If we couldn't get anything, give up.
1174         */
1175        if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1176                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1177
1178                if (!forced++) {
1179                        trace_xfs_alloc_near_busy(args);
1180                        xfs_log_force(args->mp, XFS_LOG_SYNC);
1181                        goto restart;
1182                }
1183                trace_xfs_alloc_size_neither(args);
1184                args->agbno = NULLAGBLOCK;
1185                return 0;
1186        }
1187
1188        /*
1189         * At this point we have selected a freespace entry, either to the
1190         * left or to the right.  If it's on the right, copy all the
1191         * useful variables to the "left" set so we only have one
1192         * copy of this code.
1193         */
1194        if (bno_cur_gt) {
1195                bno_cur_lt = bno_cur_gt;
1196                bno_cur_gt = NULL;
1197                ltbno = gtbno;
1198                ltbnoa = gtbnoa;
1199                ltlen = gtlen;
1200                ltlena = gtlena;
1201                j = 1;
1202        } else
1203                j = 0;
1204
1205        /*
1206         * Fix up the length and compute the useful address.
1207         */
1208        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1209        xfs_alloc_fix_len(args);
1210        if (!xfs_alloc_fix_minleft(args)) {
1211                trace_xfs_alloc_near_nominleft(args);
1212                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1213                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1214                return 0;
1215        }
1216        rlen = args->len;
1217        (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
1218                                     args->userdata, ltbnoa, ltlena, &ltnew);
1219        ASSERT(ltnew >= ltbno);
1220        ASSERT(ltnew + rlen <= ltbnoa + ltlena);
1221        ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1222        args->agbno = ltnew;
1223
1224        if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1225                        ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1226                goto error0;
1227
1228        if (j)
1229                trace_xfs_alloc_near_greater(args);
1230        else
1231                trace_xfs_alloc_near_lesser(args);
1232
1233        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1234        xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1235        return 0;
1236
1237 error0:
1238        trace_xfs_alloc_near_error(args);
1239        if (cnt_cur != NULL)
1240                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1241        if (bno_cur_lt != NULL)
1242                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1243        if (bno_cur_gt != NULL)
1244                xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1245        return error;
1246}
1247
1248/*
1249 * Allocate a variable extent anywhere in the allocation group agno.
1250 * Extent's length (returned in len) will be between minlen and maxlen,
1251 * and of the form k * prod + mod unless there's nothing that large.
1252 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1253 */
1254STATIC int                              /* error */
1255xfs_alloc_ag_vextent_size(
1256        xfs_alloc_arg_t *args)          /* allocation argument structure */
1257{
1258        xfs_btree_cur_t *bno_cur;       /* cursor for bno btree */
1259        xfs_btree_cur_t *cnt_cur;       /* cursor for cnt btree */
1260        int             error;          /* error result */
1261        xfs_agblock_t   fbno;           /* start of found freespace */
1262        xfs_extlen_t    flen;           /* length of found freespace */
1263        int             i;              /* temp status variable */
1264        xfs_agblock_t   rbno;           /* returned block number */
1265        xfs_extlen_t    rlen;           /* length of returned extent */
1266        int             forced = 0;
1267
1268restart:
1269        /*
1270         * Allocate and initialize a cursor for the by-size btree.
1271         */
1272        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1273                args->agno, XFS_BTNUM_CNT);
1274        bno_cur = NULL;
1275
1276        /*
1277         * Look for an entry >= maxlen+alignment-1 blocks.
1278         */
1279        if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1280                        args->maxlen + args->alignment - 1, &i)))
1281                goto error0;
1282
1283        /*
1284         * If none or we have busy extents that we cannot allocate from, then
1285         * we have to settle for a smaller extent. In the case that there are
1286         * no large extents, this will return the last entry in the tree unless
1287         * the tree is empty. In the case that there are only busy large
1288         * extents, this will return the largest small extent unless there
1289         * are no smaller extents available.
1290         */
1291        if (!i || forced > 1) {
1292                error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1293                                                   &fbno, &flen, &i);
1294                if (error)
1295                        goto error0;
1296                if (i == 0 || flen == 0) {
1297                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1298                        trace_xfs_alloc_size_noentry(args);
1299                        return 0;
1300                }
1301                ASSERT(i == 1);
1302                xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1303        } else {
1304                /*
1305                 * Search for a non-busy extent that is large enough.
1306                 * If we are at low space, don't check, or if we fall of
1307                 * the end of the btree, turn off the busy check and
1308                 * restart.
1309                 */
1310                for (;;) {
1311                        error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1312                        if (error)
1313                                goto error0;
1314                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1315
1316                        xfs_alloc_compute_aligned(args, fbno, flen,
1317                                                  &rbno, &rlen);
1318
1319                        if (rlen >= args->maxlen)
1320                                break;
1321
1322                        error = xfs_btree_increment(cnt_cur, 0, &i);
1323                        if (error)
1324                                goto error0;
1325                        if (i == 0) {
1326                                /*
1327                                 * Our only valid extents must have been busy.
1328                                 * Make it unbusy by forcing the log out and
1329                                 * retrying. If we've been here before, forcing
1330                                 * the log isn't making the extents available,
1331                                 * which means they have probably been freed in
1332                                 * this transaction.  In that case, we have to
1333                                 * give up on them and we'll attempt a minlen
1334                                 * allocation the next time around.
1335                                 */
1336                                xfs_btree_del_cursor(cnt_cur,
1337                                                     XFS_BTREE_NOERROR);
1338                                trace_xfs_alloc_size_busy(args);
1339                                if (!forced++)
1340                                        xfs_log_force(args->mp, XFS_LOG_SYNC);
1341                                goto restart;
1342                        }
1343                }
1344        }
1345
1346        /*
1347         * In the first case above, we got the last entry in the
1348         * by-size btree.  Now we check to see if the space hits maxlen
1349         * once aligned; if not, we search left for something better.
1350         * This can't happen in the second case above.
1351         */
1352        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1353        XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1354                        (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1355        if (rlen < args->maxlen) {
1356                xfs_agblock_t   bestfbno;
1357                xfs_extlen_t    bestflen;
1358                xfs_agblock_t   bestrbno;
1359                xfs_extlen_t    bestrlen;
1360
1361                bestrlen = rlen;
1362                bestrbno = rbno;
1363                bestflen = flen;
1364                bestfbno = fbno;
1365                for (;;) {
1366                        if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1367                                goto error0;
1368                        if (i == 0)
1369                                break;
1370                        if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1371                                        &i)))
1372                                goto error0;
1373                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1374                        if (flen < bestrlen)
1375                                break;
1376                        xfs_alloc_compute_aligned(args, fbno, flen,
1377                                                  &rbno, &rlen);
1378                        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1379                        XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1380                                (rlen <= flen && rbno + rlen <= fbno + flen),
1381                                error0);
1382                        if (rlen > bestrlen) {
1383                                bestrlen = rlen;
1384                                bestrbno = rbno;
1385                                bestflen = flen;
1386                                bestfbno = fbno;
1387                                if (rlen == args->maxlen)
1388                                        break;
1389                        }
1390                }
1391                if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1392                                &i)))
1393                        goto error0;
1394                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1395                rlen = bestrlen;
1396                rbno = bestrbno;
1397                flen = bestflen;
1398                fbno = bestfbno;
1399        }
1400        args->wasfromfl = 0;
1401        /*
1402         * Fix up the length.
1403         */
1404        args->len = rlen;
1405        if (rlen < args->minlen) {
1406                if (!forced++) {
1407                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1408                        trace_xfs_alloc_size_busy(args);
1409                        xfs_log_force(args->mp, XFS_LOG_SYNC);
1410                        goto restart;
1411                }
1412                goto out_nominleft;
1413        }
1414        xfs_alloc_fix_len(args);
1415
1416        if (!xfs_alloc_fix_minleft(args))
1417                goto out_nominleft;
1418        rlen = args->len;
1419        XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
1420        /*
1421         * Allocate and initialize a cursor for the by-block tree.
1422         */
1423        bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1424                args->agno, XFS_BTNUM_BNO);
1425        if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1426                        rbno, rlen, XFSA_FIXUP_CNT_OK)))
1427                goto error0;
1428        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1429        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1430        cnt_cur = bno_cur = NULL;
1431        args->len = rlen;
1432        args->agbno = rbno;
1433        XFS_WANT_CORRUPTED_GOTO(
1434                args->agbno + args->len <=
1435                        be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1436                error0);
1437        trace_xfs_alloc_size_done(args);
1438        return 0;
1439
1440error0:
1441        trace_xfs_alloc_size_error(args);
1442        if (cnt_cur)
1443                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1444        if (bno_cur)
1445                xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1446        return error;
1447
1448out_nominleft:
1449        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1450        trace_xfs_alloc_size_nominleft(args);
1451        args->agbno = NULLAGBLOCK;
1452        return 0;
1453}
1454
1455/*
1456 * Deal with the case where only small freespaces remain.
1457 * Either return the contents of the last freespace record,
1458 * or allocate space from the freelist if there is nothing in the tree.
1459 */
1460STATIC int                      /* error */
1461xfs_alloc_ag_vextent_small(
1462        xfs_alloc_arg_t *args,  /* allocation argument structure */
1463        xfs_btree_cur_t *ccur,  /* by-size cursor */
1464        xfs_agblock_t   *fbnop, /* result block number */
1465        xfs_extlen_t    *flenp, /* result length */
1466        int             *stat)  /* status: 0-freelist, 1-normal/none */
1467{
1468        int             error;
1469        xfs_agblock_t   fbno;
1470        xfs_extlen_t    flen;
1471        int             i;
1472
1473        if ((error = xfs_btree_decrement(ccur, 0, &i)))
1474                goto error0;
1475        if (i) {
1476                if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
1477                        goto error0;
1478                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1479        }
1480        /*
1481         * Nothing in the btree, try the freelist.  Make sure
1482         * to respect minleft even when pulling from the
1483         * freelist.
1484         */
1485        else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
1486                 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1487                  > args->minleft)) {
1488                error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1489                if (error)
1490                        goto error0;
1491                if (fbno != NULLAGBLOCK) {
1492                        xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
1493                                             args->userdata);
1494
1495                        if (args->userdata) {
1496                                xfs_buf_t       *bp;
1497
1498                                bp = xfs_btree_get_bufs(args->mp, args->tp,
1499                                        args->agno, fbno, 0);
1500                                xfs_trans_binval(args->tp, bp);
1501                        }
1502                        args->len = 1;
1503                        args->agbno = fbno;
1504                        XFS_WANT_CORRUPTED_GOTO(
1505                                args->agbno + args->len <=
1506                                be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1507                                error0);
1508                        args->wasfromfl = 1;
1509                        trace_xfs_alloc_small_freelist(args);
1510                        *stat = 0;
1511                        return 0;
1512                }
1513                /*
1514                 * Nothing in the freelist.
1515                 */
1516                else
1517                        flen = 0;
1518        }
1519        /*
1520         * Can't allocate from the freelist for some reason.
1521         */
1522        else {
1523                fbno = NULLAGBLOCK;
1524                flen = 0;
1525        }
1526        /*
1527         * Can't do the allocation, give up.
1528         */
1529        if (flen < args->minlen) {
1530                args->agbno = NULLAGBLOCK;
1531                trace_xfs_alloc_small_notenough(args);
1532                flen = 0;
1533        }
1534        *fbnop = fbno;
1535        *flenp = flen;
1536        *stat = 1;
1537        trace_xfs_alloc_small_done(args);
1538        return 0;
1539
1540error0:
1541        trace_xfs_alloc_small_error(args);
1542        return error;
1543}
1544
1545/*
1546 * Free the extent starting at agno/bno for length.
1547 */
1548STATIC int                      /* error */
1549xfs_free_ag_extent(
1550        xfs_trans_t     *tp,    /* transaction pointer */
1551        xfs_buf_t       *agbp,  /* buffer for a.g. freelist header */
1552        xfs_agnumber_t  agno,   /* allocation group number */
1553        xfs_agblock_t   bno,    /* starting block number */
1554        xfs_extlen_t    len,    /* length of extent */
1555        int             isfl)   /* set if is freelist blocks - no sb acctg */
1556{
1557        xfs_btree_cur_t *bno_cur;       /* cursor for by-block btree */
1558        xfs_btree_cur_t *cnt_cur;       /* cursor for by-size btree */
1559        int             error;          /* error return value */
1560        xfs_agblock_t   gtbno;          /* start of right neighbor block */
1561        xfs_extlen_t    gtlen;          /* length of right neighbor block */
1562        int             haveleft;       /* have a left neighbor block */
1563        int             haveright;      /* have a right neighbor block */
1564        int             i;              /* temp, result code */
1565        xfs_agblock_t   ltbno;          /* start of left neighbor block */
1566        xfs_extlen_t    ltlen;          /* length of left neighbor block */
1567        xfs_mount_t     *mp;            /* mount point struct for filesystem */
1568        xfs_agblock_t   nbno;           /* new starting block of freespace */
1569        xfs_extlen_t    nlen;           /* new length of freespace */
1570        xfs_perag_t     *pag;           /* per allocation group data */
1571
1572        mp = tp->t_mountp;
1573        /*
1574         * Allocate and initialize a cursor for the by-block btree.
1575         */
1576        bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1577        cnt_cur = NULL;
1578        /*
1579         * Look for a neighboring block on the left (lower block numbers)
1580         * that is contiguous with this space.
1581         */
1582        if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1583                goto error0;
1584        if (haveleft) {
1585                /*
1586                 * There is a block to our left.
1587                 */
1588                if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1589                        goto error0;
1590                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1591                /*
1592                 * It's not contiguous, though.
1593                 */
1594                if (ltbno + ltlen < bno)
1595                        haveleft = 0;
1596                else {
1597                        /*
1598                         * If this failure happens the request to free this
1599                         * space was invalid, it's (partly) already free.
1600                         * Very bad.
1601                         */
1602                        XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0);
1603                }
1604        }
1605        /*
1606         * Look for a neighboring block on the right (higher block numbers)
1607         * that is contiguous with this space.
1608         */
1609        if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1610                goto error0;
1611        if (haveright) {
1612                /*
1613                 * There is a block to our right.
1614                 */
1615                if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1616                        goto error0;
1617                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1618                /*
1619                 * It's not contiguous, though.
1620                 */
1621                if (bno + len < gtbno)
1622                        haveright = 0;
1623                else {
1624                        /*
1625                         * If this failure happens the request to free this
1626                         * space was invalid, it's (partly) already free.
1627                         * Very bad.
1628                         */
1629                        XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0);
1630                }
1631        }
1632        /*
1633         * Now allocate and initialize a cursor for the by-size tree.
1634         */
1635        cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1636        /*
1637         * Have both left and right contiguous neighbors.
1638         * Merge all three into a single free block.
1639         */
1640        if (haveleft && haveright) {
1641                /*
1642                 * Delete the old by-size entry on the left.
1643                 */
1644                if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1645                        goto error0;
1646                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1647                if ((error = xfs_btree_delete(cnt_cur, &i)))
1648                        goto error0;
1649                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1650                /*
1651                 * Delete the old by-size entry on the right.
1652                 */
1653                if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1654                        goto error0;
1655                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1656                if ((error = xfs_btree_delete(cnt_cur, &i)))
1657                        goto error0;
1658                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1659                /*
1660                 * Delete the old by-block entry for the right block.
1661                 */
1662                if ((error = xfs_btree_delete(bno_cur, &i)))
1663                        goto error0;
1664                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1665                /*
1666                 * Move the by-block cursor back to the left neighbor.
1667                 */
1668                if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1669                        goto error0;
1670                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1671#ifdef DEBUG
1672                /*
1673                 * Check that this is the right record: delete didn't
1674                 * mangle the cursor.
1675                 */
1676                {
1677                        xfs_agblock_t   xxbno;
1678                        xfs_extlen_t    xxlen;
1679
1680                        if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1681                                        &i)))
1682                                goto error0;
1683                        XFS_WANT_CORRUPTED_GOTO(
1684                                i == 1 && xxbno == ltbno && xxlen == ltlen,
1685                                error0);
1686                }
1687#endif
1688                /*
1689                 * Update remaining by-block entry to the new, joined block.
1690                 */
1691                nbno = ltbno;
1692                nlen = len + ltlen + gtlen;
1693                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1694                        goto error0;
1695        }
1696        /*
1697         * Have only a left contiguous neighbor.
1698         * Merge it together with the new freespace.
1699         */
1700        else if (haveleft) {
1701                /*
1702                 * Delete the old by-size entry on the left.
1703                 */
1704                if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1705                        goto error0;
1706                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1707                if ((error = xfs_btree_delete(cnt_cur, &i)))
1708                        goto error0;
1709                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1710                /*
1711                 * Back up the by-block cursor to the left neighbor, and
1712                 * update its length.
1713                 */
1714                if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1715                        goto error0;
1716                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1717                nbno = ltbno;
1718                nlen = len + ltlen;
1719                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1720                        goto error0;
1721        }
1722        /*
1723         * Have only a right contiguous neighbor.
1724         * Merge it together with the new freespace.
1725         */
1726        else if (haveright) {
1727                /*
1728                 * Delete the old by-size entry on the right.
1729                 */
1730                if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1731                        goto error0;
1732                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1733                if ((error = xfs_btree_delete(cnt_cur, &i)))
1734                        goto error0;
1735                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1736                /*
1737                 * Update the starting block and length of the right
1738                 * neighbor in the by-block tree.
1739                 */
1740                nbno = bno;
1741                nlen = len + gtlen;
1742                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1743                        goto error0;
1744        }
1745        /*
1746         * No contiguous neighbors.
1747         * Insert the new freespace into the by-block tree.
1748         */
1749        else {
1750                nbno = bno;
1751                nlen = len;
1752                if ((error = xfs_btree_insert(bno_cur, &i)))
1753                        goto error0;
1754                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1755        }
1756        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1757        bno_cur = NULL;
1758        /*
1759         * In all cases we need to insert the new freespace in the by-size tree.
1760         */
1761        if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1762                goto error0;
1763        XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
1764        if ((error = xfs_btree_insert(cnt_cur, &i)))
1765                goto error0;
1766        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1767        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1768        cnt_cur = NULL;
1769
1770        /*
1771         * Update the freespace totals in the ag and superblock.
1772         */
1773        pag = xfs_perag_get(mp, agno);
1774        error = xfs_alloc_update_counters(tp, pag, agbp, len);
1775        xfs_perag_put(pag);
1776        if (error)
1777                goto error0;
1778
1779        if (!isfl)
1780                xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1781        XFS_STATS_INC(xs_freex);
1782        XFS_STATS_ADD(xs_freeb, len);
1783
1784        trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
1785
1786        return 0;
1787
1788 error0:
1789        trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1);
1790        if (bno_cur)
1791                xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1792        if (cnt_cur)
1793                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1794        return error;
1795}
1796
1797/*
1798 * Visible (exported) allocation/free functions.
1799 * Some of these are used just by xfs_alloc_btree.c and this file.
1800 */
1801
1802/*
1803 * Compute and fill in value of m_ag_maxlevels.
1804 */
1805void
1806xfs_alloc_compute_maxlevels(
1807        xfs_mount_t     *mp)    /* file system mount structure */
1808{
1809        int             level;
1810        uint            maxblocks;
1811        uint            maxleafents;
1812        int             minleafrecs;
1813        int             minnoderecs;
1814
1815        maxleafents = (mp->m_sb.sb_agblocks + 1) / 2;
1816        minleafrecs = mp->m_alloc_mnr[0];
1817        minnoderecs = mp->m_alloc_mnr[1];
1818        maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
1819        for (level = 1; maxblocks > 1; level++)
1820                maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
1821        mp->m_ag_maxlevels = level;
1822}
1823
1824/*
1825 * Find the length of the longest extent in an AG.
1826 */
1827xfs_extlen_t
1828xfs_alloc_longest_free_extent(
1829        struct xfs_mount        *mp,
1830        struct xfs_perag        *pag)
1831{
1832        xfs_extlen_t            need, delta = 0;
1833
1834        need = XFS_MIN_FREELIST_PAG(pag, mp);
1835        if (need > pag->pagf_flcount)
1836                delta = need - pag->pagf_flcount;
1837
1838        if (pag->pagf_longest > delta)
1839                return pag->pagf_longest - delta;
1840        return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1841}
1842
1843/*
1844 * Decide whether to use this allocation group for this allocation.
1845 * If so, fix up the btree freelist's size.
1846 */
1847STATIC int                      /* error */
1848xfs_alloc_fix_freelist(
1849        xfs_alloc_arg_t *args,  /* allocation argument structure */
1850        int             flags)  /* XFS_ALLOC_FLAG_... */
1851{
1852        xfs_buf_t       *agbp;  /* agf buffer pointer */
1853        xfs_agf_t       *agf;   /* a.g. freespace structure pointer */
1854        xfs_buf_t       *agflbp;/* agfl buffer pointer */
1855        xfs_agblock_t   bno;    /* freelist block */
1856        xfs_extlen_t    delta;  /* new blocks needed in freelist */
1857        int             error;  /* error result code */
1858        xfs_extlen_t    longest;/* longest extent in allocation group */
1859        xfs_mount_t     *mp;    /* file system mount point structure */
1860        xfs_extlen_t    need;   /* total blocks needed in freelist */
1861        xfs_perag_t     *pag;   /* per-ag information structure */
1862        xfs_alloc_arg_t targs;  /* local allocation arguments */
1863        xfs_trans_t     *tp;    /* transaction pointer */
1864
1865        mp = args->mp;
1866
1867        pag = args->pag;
1868        tp = args->tp;
1869        if (!pag->pagf_init) {
1870                if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1871                                &agbp)))
1872                        return error;
1873                if (!pag->pagf_init) {
1874                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1875                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1876                        args->agbp = NULL;
1877                        return 0;
1878                }
1879        } else
1880                agbp = NULL;
1881
1882        /*
1883         * If this is a metadata preferred pag and we are user data
1884         * then try somewhere else if we are not being asked to
1885         * try harder at this point
1886         */
1887        if (pag->pagf_metadata && args->userdata &&
1888            (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
1889                ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1890                args->agbp = NULL;
1891                return 0;
1892        }
1893
1894        if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1895                /*
1896                 * If it looks like there isn't a long enough extent, or enough
1897                 * total blocks, reject it.
1898                 */
1899                need = XFS_MIN_FREELIST_PAG(pag, mp);
1900                longest = xfs_alloc_longest_free_extent(mp, pag);
1901                if ((args->minlen + args->alignment + args->minalignslop - 1) >
1902                                longest ||
1903                    ((int)(pag->pagf_freeblks + pag->pagf_flcount -
1904                           need - args->total) < (int)args->minleft)) {
1905                        if (agbp)
1906                                xfs_trans_brelse(tp, agbp);
1907                        args->agbp = NULL;
1908                        return 0;
1909                }
1910        }
1911
1912        /*
1913         * Get the a.g. freespace buffer.
1914         * Can fail if we're not blocking on locks, and it's held.
1915         */
1916        if (agbp == NULL) {
1917                if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1918                                &agbp)))
1919                        return error;
1920                if (agbp == NULL) {
1921                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1922                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1923                        args->agbp = NULL;
1924                        return 0;
1925                }
1926        }
1927        /*
1928         * Figure out how many blocks we should have in the freelist.
1929         */
1930        agf = XFS_BUF_TO_AGF(agbp);
1931        need = XFS_MIN_FREELIST(agf, mp);
1932        /*
1933         * If there isn't enough total or single-extent, reject it.
1934         */
1935        if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1936                delta = need > be32_to_cpu(agf->agf_flcount) ?
1937                        (need - be32_to_cpu(agf->agf_flcount)) : 0;
1938                longest = be32_to_cpu(agf->agf_longest);
1939                longest = (longest > delta) ? (longest - delta) :
1940                        (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
1941                if ((args->minlen + args->alignment + args->minalignslop - 1) >
1942                                longest ||
1943                    ((int)(be32_to_cpu(agf->agf_freeblks) +
1944                     be32_to_cpu(agf->agf_flcount) - need - args->total) <
1945                                (int)args->minleft)) {
1946                        xfs_trans_brelse(tp, agbp);
1947                        args->agbp = NULL;
1948                        return 0;
1949                }
1950        }
1951        /*
1952         * Make the freelist shorter if it's too long.
1953         */
1954        while (be32_to_cpu(agf->agf_flcount) > need) {
1955                xfs_buf_t       *bp;
1956
1957                error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
1958                if (error)
1959                        return error;
1960                if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
1961                        return error;
1962                bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
1963                xfs_trans_binval(tp, bp);
1964        }
1965        /*
1966         * Initialize the args structure.
1967         */
1968        memset(&targs, 0, sizeof(targs));
1969        targs.tp = tp;
1970        targs.mp = mp;
1971        targs.agbp = agbp;
1972        targs.agno = args->agno;
1973        targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
1974        targs.type = XFS_ALLOCTYPE_THIS_AG;
1975        targs.pag = pag;
1976        if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp)))
1977                return error;
1978        /*
1979         * Make the freelist longer if it's too short.
1980         */
1981        while (be32_to_cpu(agf->agf_flcount) < need) {
1982                targs.agbno = 0;
1983                targs.maxlen = need - be32_to_cpu(agf->agf_flcount);
1984                /*
1985                 * Allocate as many blocks as possible at once.
1986                 */
1987                if ((error = xfs_alloc_ag_vextent(&targs))) {
1988                        xfs_trans_brelse(tp, agflbp);
1989                        return error;
1990                }
1991                /*
1992                 * Stop if we run out.  Won't happen if callers are obeying
1993                 * the restrictions correctly.  Can happen for free calls
1994                 * on a completely full ag.
1995                 */
1996                if (targs.agbno == NULLAGBLOCK) {
1997                        if (flags & XFS_ALLOC_FLAG_FREEING)
1998                                break;
1999                        xfs_trans_brelse(tp, agflbp);
2000                        args->agbp = NULL;
2001                        return 0;
2002                }
2003                /*
2004                 * Put each allocated block on the list.
2005                 */
2006                for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
2007                        error = xfs_alloc_put_freelist(tp, agbp,
2008                                                        agflbp, bno, 0);
2009                        if (error)
2010                                return error;
2011                }
2012        }
2013        xfs_trans_brelse(tp, agflbp);
2014        args->agbp = agbp;
2015        return 0;
2016}
2017
2018/*
2019 * Get a block from the freelist.
2020 * Returns with the buffer for the block gotten.
2021 */
2022int                             /* error */
2023xfs_alloc_get_freelist(
2024        xfs_trans_t     *tp,    /* transaction pointer */
2025        xfs_buf_t       *agbp,  /* buffer containing the agf structure */
2026        xfs_agblock_t   *bnop,  /* block address retrieved from freelist */
2027        int             btreeblk) /* destination is a AGF btree */
2028{
2029        xfs_agf_t       *agf;   /* a.g. freespace structure */
2030        xfs_buf_t       *agflbp;/* buffer for a.g. freelist structure */
2031        xfs_agblock_t   bno;    /* block number returned */
2032        __be32          *agfl_bno;
2033        int             error;
2034        int             logflags;
2035        xfs_mount_t     *mp = tp->t_mountp;
2036        xfs_perag_t     *pag;   /* per allocation group data */
2037
2038        /*
2039         * Freelist is empty, give up.
2040         */
2041        agf = XFS_BUF_TO_AGF(agbp);
2042        if (!agf->agf_flcount) {
2043                *bnop = NULLAGBLOCK;
2044                return 0;
2045        }
2046        /*
2047         * Read the array of free blocks.
2048         */
2049        error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2050                                    &agflbp);
2051        if (error)
2052                return error;
2053
2054
2055        /*
2056         * Get the block number and update the data structures.
2057         */
2058        agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2059        bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2060        be32_add_cpu(&agf->agf_flfirst, 1);
2061        xfs_trans_brelse(tp, agflbp);
2062        if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
2063                agf->agf_flfirst = 0;
2064
2065        pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2066        be32_add_cpu(&agf->agf_flcount, -1);
2067        xfs_trans_agflist_delta(tp, -1);
2068        pag->pagf_flcount--;
2069        xfs_perag_put(pag);
2070
2071        logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2072        if (btreeblk) {
2073                be32_add_cpu(&agf->agf_btreeblks, 1);
2074                pag->pagf_btreeblks++;
2075                logflags |= XFS_AGF_BTREEBLKS;
2076        }
2077
2078        xfs_alloc_log_agf(tp, agbp, logflags);
2079        *bnop = bno;
2080
2081        return 0;
2082}
2083
2084/*
2085 * Log the given fields from the agf structure.
2086 */
2087void
2088xfs_alloc_log_agf(
2089        xfs_trans_t     *tp,    /* transaction pointer */
2090        xfs_buf_t       *bp,    /* buffer for a.g. freelist header */
2091        int             fields) /* mask of fields to be logged (XFS_AGF_...) */
2092{
2093        int     first;          /* first byte offset */
2094        int     last;           /* last byte offset */
2095        static const short      offsets[] = {
2096                offsetof(xfs_agf_t, agf_magicnum),
2097                offsetof(xfs_agf_t, agf_versionnum),
2098                offsetof(xfs_agf_t, agf_seqno),
2099                offsetof(xfs_agf_t, agf_length),
2100                offsetof(xfs_agf_t, agf_roots[0]),
2101                offsetof(xfs_agf_t, agf_levels[0]),
2102                offsetof(xfs_agf_t, agf_flfirst),
2103                offsetof(xfs_agf_t, agf_fllast),
2104                offsetof(xfs_agf_t, agf_flcount),
2105                offsetof(xfs_agf_t, agf_freeblks),
2106                offsetof(xfs_agf_t, agf_longest),
2107                offsetof(xfs_agf_t, agf_btreeblks),
2108                offsetof(xfs_agf_t, agf_uuid),
2109                sizeof(xfs_agf_t)
2110        };
2111
2112        trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2113
2114        xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
2115
2116        xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2117        xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2118}
2119
2120/*
2121 * Interface for inode allocation to force the pag data to be initialized.
2122 */
2123int                                     /* error */
2124xfs_alloc_pagf_init(
2125        xfs_mount_t             *mp,    /* file system mount structure */
2126        xfs_trans_t             *tp,    /* transaction pointer */
2127        xfs_agnumber_t          agno,   /* allocation group number */
2128        int                     flags)  /* XFS_ALLOC_FLAGS_... */
2129{
2130        xfs_buf_t               *bp;
2131        int                     error;
2132
2133        if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2134                return error;
2135        if (bp)
2136                xfs_trans_brelse(tp, bp);
2137        return 0;
2138}
2139
2140/*
2141 * Put the block on the freelist for the allocation group.
2142 */
2143int                                     /* error */
2144xfs_alloc_put_freelist(
2145        xfs_trans_t             *tp,    /* transaction pointer */
2146        xfs_buf_t               *agbp,  /* buffer for a.g. freelist header */
2147        xfs_buf_t               *agflbp,/* buffer for a.g. free block array */
2148        xfs_agblock_t           bno,    /* block being freed */
2149        int                     btreeblk) /* block came from a AGF btree */
2150{
2151        xfs_agf_t               *agf;   /* a.g. freespace structure */
2152        __be32                  *blockp;/* pointer to array entry */
2153        int                     error;
2154        int                     logflags;
2155        xfs_mount_t             *mp;    /* mount structure */
2156        xfs_perag_t             *pag;   /* per allocation group data */
2157        __be32                  *agfl_bno;
2158        int                     startoff;
2159
2160        agf = XFS_BUF_TO_AGF(agbp);
2161        mp = tp->t_mountp;
2162
2163        if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
2164                        be32_to_cpu(agf->agf_seqno), &agflbp)))
2165                return error;
2166        be32_add_cpu(&agf->agf_fllast, 1);
2167        if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
2168                agf->agf_fllast = 0;
2169
2170        pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2171        be32_add_cpu(&agf->agf_flcount, 1);
2172        xfs_trans_agflist_delta(tp, 1);
2173        pag->pagf_flcount++;
2174
2175        logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2176        if (btreeblk) {
2177                be32_add_cpu(&agf->agf_btreeblks, -1);
2178                pag->pagf_btreeblks--;
2179                logflags |= XFS_AGF_BTREEBLKS;
2180        }
2181        xfs_perag_put(pag);
2182
2183        xfs_alloc_log_agf(tp, agbp, logflags);
2184
2185        ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2186
2187        agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2188        blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
2189        *blockp = cpu_to_be32(bno);
2190        startoff = (char *)blockp - (char *)agflbp->b_addr;
2191
2192        xfs_alloc_log_agf(tp, agbp, logflags);
2193
2194        xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
2195        xfs_trans_log_buf(tp, agflbp, startoff,
2196                          startoff + sizeof(xfs_agblock_t) - 1);
2197        return 0;
2198}
2199
2200static bool
2201xfs_agf_verify(
2202        struct xfs_mount *mp,
2203        struct xfs_buf  *bp)
2204 {
2205        struct xfs_agf  *agf = XFS_BUF_TO_AGF(bp);
2206
2207        if (xfs_sb_version_hascrc(&mp->m_sb) &&
2208            !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_uuid))
2209                        return false;
2210
2211        if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
2212              XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2213              be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2214              be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2215              be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2216              be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
2217                return false;
2218
2219        /*
2220         * during growfs operations, the perag is not fully initialised,
2221         * so we can't use it for any useful checking. growfs ensures we can't
2222         * use it by using uncached buffers that don't have the perag attached
2223         * so we can detect and avoid this problem.
2224         */
2225        if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
2226                return false;
2227
2228        if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2229            be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
2230                return false;
2231
2232        return true;;
2233
2234}
2235
2236static void
2237xfs_agf_read_verify(
2238        struct xfs_buf  *bp)
2239{
2240        struct xfs_mount *mp = bp->b_target->bt_mount;
2241        int             agf_ok = 1;
2242
2243        if (xfs_sb_version_hascrc(&mp->m_sb))
2244                agf_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
2245                                          offsetof(struct xfs_agf, agf_crc));
2246
2247        agf_ok = agf_ok && xfs_agf_verify(mp, bp);
2248
2249        if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
2250                        XFS_RANDOM_ALLOC_READ_AGF))) {
2251                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
2252                xfs_buf_ioerror(bp, EFSCORRUPTED);
2253        }
2254}
2255
2256static void
2257xfs_agf_write_verify(
2258        struct xfs_buf  *bp)
2259{
2260        struct xfs_mount *mp = bp->b_target->bt_mount;
2261        struct xfs_buf_log_item *bip = bp->b_fspriv;
2262
2263        if (!xfs_agf_verify(mp, bp)) {
2264                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
2265                xfs_buf_ioerror(bp, EFSCORRUPTED);
2266                return;
2267        }
2268
2269        if (!xfs_sb_version_hascrc(&mp->m_sb))
2270                return;
2271
2272        if (bip)
2273                XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2274
2275        xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
2276                         offsetof(struct xfs_agf, agf_crc));
2277}
2278
2279const struct xfs_buf_ops xfs_agf_buf_ops = {
2280        .verify_read = xfs_agf_read_verify,
2281        .verify_write = xfs_agf_write_verify,
2282};
2283
2284/*
2285 * Read in the allocation group header (free/alloc section).
2286 */
2287int                                     /* error */
2288xfs_read_agf(
2289        struct xfs_mount        *mp,    /* mount point structure */
2290        struct xfs_trans        *tp,    /* transaction pointer */
2291        xfs_agnumber_t          agno,   /* allocation group number */
2292        int                     flags,  /* XFS_BUF_ */
2293        struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
2294{
2295        int             error;
2296
2297        ASSERT(agno != NULLAGNUMBER);
2298        error = xfs_trans_read_buf(
2299                        mp, tp, mp->m_ddev_targp,
2300                        XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2301                        XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
2302        if (error)
2303                return error;
2304        if (!*bpp)
2305                return 0;
2306
2307        ASSERT(!(*bpp)->b_error);
2308        xfs_buf_set_ref(*bpp, XFS_AGF_REF);
2309        return 0;
2310}
2311
2312/*
2313 * Read in the allocation group header (free/alloc section).
2314 */
2315int                                     /* error */
2316xfs_alloc_read_agf(
2317        struct xfs_mount        *mp,    /* mount point structure */
2318        struct xfs_trans        *tp,    /* transaction pointer */
2319        xfs_agnumber_t          agno,   /* allocation group number */
2320        int                     flags,  /* XFS_ALLOC_FLAG_... */
2321        struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
2322{
2323        struct xfs_agf          *agf;           /* ag freelist header */
2324        struct xfs_perag        *pag;           /* per allocation group data */
2325        int                     error;
2326
2327        ASSERT(agno != NULLAGNUMBER);
2328
2329        error = xfs_read_agf(mp, tp, agno,
2330                        (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2331                        bpp);
2332        if (error)
2333                return error;
2334        if (!*bpp)
2335                return 0;
2336        ASSERT(!(*bpp)->b_error);
2337
2338        agf = XFS_BUF_TO_AGF(*bpp);
2339        pag = xfs_perag_get(mp, agno);
2340        if (!pag->pagf_init) {
2341                pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
2342                pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
2343                pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2344                pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2345                pag->pagf_levels[XFS_BTNUM_BNOi] =
2346                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2347                pag->pagf_levels[XFS_BTNUM_CNTi] =
2348                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
2349                spin_lock_init(&pag->pagb_lock);
2350                pag->pagb_count = 0;
2351                pag->pagb_tree = RB_ROOT;
2352                pag->pagf_init = 1;
2353        }
2354#ifdef DEBUG
2355        else if (!XFS_FORCED_SHUTDOWN(mp)) {
2356                ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
2357                ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
2358                ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2359                ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2360                ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
2361                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2362                ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
2363                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2364        }
2365#endif
2366        xfs_perag_put(pag);
2367        return 0;
2368}
2369
2370/*
2371 * Allocate an extent (variable-size).
2372 * Depending on the allocation type, we either look in a single allocation
2373 * group or loop over the allocation groups to find the result.
2374 */
2375int                             /* error */
2376xfs_alloc_vextent(
2377        xfs_alloc_arg_t *args)  /* allocation argument structure */
2378{
2379        xfs_agblock_t   agsize; /* allocation group size */
2380        int             error;
2381        int             flags;  /* XFS_ALLOC_FLAG_... locking flags */
2382        xfs_extlen_t    minleft;/* minimum left value, temp copy */
2383        xfs_mount_t     *mp;    /* mount structure pointer */
2384        xfs_agnumber_t  sagno;  /* starting allocation group number */
2385        xfs_alloctype_t type;   /* input allocation type */
2386        int             bump_rotor = 0;
2387        int             no_min = 0;
2388        xfs_agnumber_t  rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2389
2390        mp = args->mp;
2391        type = args->otype = args->type;
2392        args->agbno = NULLAGBLOCK;
2393        /*
2394         * Just fix this up, for the case where the last a.g. is shorter
2395         * (or there's only one a.g.) and the caller couldn't easily figure
2396         * that out (xfs_bmap_alloc).
2397         */
2398        agsize = mp->m_sb.sb_agblocks;
2399        if (args->maxlen > agsize)
2400                args->maxlen = agsize;
2401        if (args->alignment == 0)
2402                args->alignment = 1;
2403        ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2404        ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2405        ASSERT(args->minlen <= args->maxlen);
2406        ASSERT(args->minlen <= agsize);
2407        ASSERT(args->mod < args->prod);
2408        if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2409            XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2410            args->minlen > args->maxlen || args->minlen > agsize ||
2411            args->mod >= args->prod) {
2412                args->fsbno = NULLFSBLOCK;
2413                trace_xfs_alloc_vextent_badargs(args);
2414                return 0;
2415        }
2416        minleft = args->minleft;
2417
2418        switch (type) {
2419        case XFS_ALLOCTYPE_THIS_AG:
2420        case XFS_ALLOCTYPE_NEAR_BNO:
2421        case XFS_ALLOCTYPE_THIS_BNO:
2422                /*
2423                 * These three force us into a single a.g.
2424                 */
2425                args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2426                args->pag = xfs_perag_get(mp, args->agno);
2427                args->minleft = 0;
2428                error = xfs_alloc_fix_freelist(args, 0);
2429                args->minleft = minleft;
2430                if (error) {
2431                        trace_xfs_alloc_vextent_nofix(args);
2432                        goto error0;
2433                }
2434                if (!args->agbp) {
2435                        trace_xfs_alloc_vextent_noagbp(args);
2436                        break;
2437                }
2438                args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2439                if ((error = xfs_alloc_ag_vextent(args)))
2440                        goto error0;
2441                break;
2442        case XFS_ALLOCTYPE_START_BNO:
2443                /*
2444                 * Try near allocation first, then anywhere-in-ag after
2445                 * the first a.g. fails.
2446                 */
2447                if ((args->userdata  == XFS_ALLOC_INITIAL_USER_DATA) &&
2448                    (mp->m_flags & XFS_MOUNT_32BITINODES)) {
2449                        args->fsbno = XFS_AGB_TO_FSB(mp,
2450                                        ((mp->m_agfrotor / rotorstep) %
2451                                        mp->m_sb.sb_agcount), 0);
2452                        bump_rotor = 1;
2453                }
2454                args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2455                args->type = XFS_ALLOCTYPE_NEAR_BNO;
2456                /* FALLTHROUGH */
2457        case XFS_ALLOCTYPE_ANY_AG:
2458        case XFS_ALLOCTYPE_START_AG:
2459        case XFS_ALLOCTYPE_FIRST_AG:
2460                /*
2461                 * Rotate through the allocation groups looking for a winner.
2462                 */
2463                if (type == XFS_ALLOCTYPE_ANY_AG) {
2464                        /*
2465                         * Start with the last place we left off.
2466                         */
2467                        args->agno = sagno = (mp->m_agfrotor / rotorstep) %
2468                                        mp->m_sb.sb_agcount;
2469                        args->type = XFS_ALLOCTYPE_THIS_AG;
2470                        flags = XFS_ALLOC_FLAG_TRYLOCK;
2471                } else if (type == XFS_ALLOCTYPE_FIRST_AG) {
2472                        /*
2473                         * Start with allocation group given by bno.
2474                         */
2475                        args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2476                        args->type = XFS_ALLOCTYPE_THIS_AG;
2477                        sagno = 0;
2478                        flags = 0;
2479                } else {
2480                        if (type == XFS_ALLOCTYPE_START_AG)
2481                                args->type = XFS_ALLOCTYPE_THIS_AG;
2482                        /*
2483                         * Start with the given allocation group.
2484                         */
2485                        args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2486                        flags = XFS_ALLOC_FLAG_TRYLOCK;
2487                }
2488                /*
2489                 * Loop over allocation groups twice; first time with
2490                 * trylock set, second time without.
2491                 */
2492                for (;;) {
2493                        args->pag = xfs_perag_get(mp, args->agno);
2494                        if (no_min) args->minleft = 0;
2495                        error = xfs_alloc_fix_freelist(args, flags);
2496                        args->minleft = minleft;
2497                        if (error) {
2498                                trace_xfs_alloc_vextent_nofix(args);
2499                                goto error0;
2500                        }
2501                        /*
2502                         * If we get a buffer back then the allocation will fly.
2503                         */
2504                        if (args->agbp) {
2505                                if ((error = xfs_alloc_ag_vextent(args)))
2506                                        goto error0;
2507                                break;
2508                        }
2509
2510                        trace_xfs_alloc_vextent_loopfailed(args);
2511
2512                        /*
2513                         * Didn't work, figure out the next iteration.
2514                         */
2515                        if (args->agno == sagno &&
2516                            type == XFS_ALLOCTYPE_START_BNO)
2517                                args->type = XFS_ALLOCTYPE_THIS_AG;
2518                        /*
2519                        * For the first allocation, we can try any AG to get
2520                        * space.  However, if we already have allocated a
2521                        * block, we don't want to try AGs whose number is below
2522                        * sagno. Otherwise, we may end up with out-of-order
2523                        * locking of AGF, which might cause deadlock.
2524                        */
2525                        if (++(args->agno) == mp->m_sb.sb_agcount) {
2526                                if (args->firstblock != NULLFSBLOCK)
2527                                        args->agno = sagno;
2528                                else
2529                                        args->agno = 0;
2530                        }
2531                        /*
2532                         * Reached the starting a.g., must either be done
2533                         * or switch to non-trylock mode.
2534                         */
2535                        if (args->agno == sagno) {
2536                                if (no_min == 1) {
2537                                        args->agbno = NULLAGBLOCK;
2538                                        trace_xfs_alloc_vextent_allfailed(args);
2539                                        break;
2540                                }
2541                                if (flags == 0) {
2542                                        no_min = 1;
2543                                } else {
2544                                        flags = 0;
2545                                        if (type == XFS_ALLOCTYPE_START_BNO) {
2546                                                args->agbno = XFS_FSB_TO_AGBNO(mp,
2547                                                        args->fsbno);
2548                                                args->type = XFS_ALLOCTYPE_NEAR_BNO;
2549                                        }
2550                                }
2551                        }
2552                        xfs_perag_put(args->pag);
2553                }
2554                if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2555                        if (args->agno == sagno)
2556                                mp->m_agfrotor = (mp->m_agfrotor + 1) %
2557                                        (mp->m_sb.sb_agcount * rotorstep);
2558                        else
2559                                mp->m_agfrotor = (args->agno * rotorstep + 1) %
2560                                        (mp->m_sb.sb_agcount * rotorstep);
2561                }
2562                break;
2563        default:
2564                ASSERT(0);
2565                /* NOTREACHED */
2566        }
2567        if (args->agbno == NULLAGBLOCK)
2568                args->fsbno = NULLFSBLOCK;
2569        else {
2570                args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2571#ifdef DEBUG
2572                ASSERT(args->len >= args->minlen);
2573                ASSERT(args->len <= args->maxlen);
2574                ASSERT(args->agbno % args->alignment == 0);
2575                XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2576                        args->len);
2577#endif
2578        }
2579        xfs_perag_put(args->pag);
2580        return 0;
2581error0:
2582        xfs_perag_put(args->pag);
2583        return error;
2584}
2585
2586/*
2587 * Free an extent.
2588 * Just break up the extent address and hand off to xfs_free_ag_extent
2589 * after fixing up the freelist.
2590 */
2591int                             /* error */
2592xfs_free_extent(
2593        xfs_trans_t     *tp,    /* transaction pointer */
2594        xfs_fsblock_t   bno,    /* starting block number of extent */
2595        xfs_extlen_t    len)    /* length of extent */
2596{
2597        xfs_alloc_arg_t args;
2598        int             error;
2599
2600        ASSERT(len != 0);
2601        memset(&args, 0, sizeof(xfs_alloc_arg_t));
2602        args.tp = tp;
2603        args.mp = tp->t_mountp;
2604
2605        /*
2606         * validate that the block number is legal - the enables us to detect
2607         * and handle a silent filesystem corruption rather than crashing.
2608         */
2609        args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2610        if (args.agno >= args.mp->m_sb.sb_agcount)
2611                return EFSCORRUPTED;
2612
2613        args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
2614        if (args.agbno >= args.mp->m_sb.sb_agblocks)
2615                return EFSCORRUPTED;
2616
2617        args.pag = xfs_perag_get(args.mp, args.agno);
2618        ASSERT(args.pag);
2619
2620        error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
2621        if (error)
2622                goto error0;
2623
2624        /* validate the extent size is legal now we have the agf locked */
2625        if (args.agbno + len >
2626                        be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) {
2627                error = EFSCORRUPTED;
2628                goto error0;
2629        }
2630
2631        error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
2632        if (!error)
2633                xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0);
2634error0:
2635        xfs_perag_put(args.pag);
2636        return error;
2637}
2638