linux/fs/xfs/xfs_alloc_btree.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_types.h"
  21#include "xfs_bit.h"
  22#include "xfs_log.h"
  23#include "xfs_inum.h"
  24#include "xfs_trans.h"
  25#include "xfs_sb.h"
  26#include "xfs_ag.h"
  27#include "xfs_dir2.h"
  28#include "xfs_dmapi.h"
  29#include "xfs_mount.h"
  30#include "xfs_bmap_btree.h"
  31#include "xfs_alloc_btree.h"
  32#include "xfs_ialloc_btree.h"
  33#include "xfs_dir2_sf.h"
  34#include "xfs_attr_sf.h"
  35#include "xfs_dinode.h"
  36#include "xfs_inode.h"
  37#include "xfs_btree.h"
  38#include "xfs_ialloc.h"
  39#include "xfs_alloc.h"
  40#include "xfs_error.h"
  41
  42/*
  43 * Prototypes for internal functions.
  44 */
  45
  46STATIC void xfs_alloc_log_block(xfs_trans_t *, xfs_buf_t *, int);
  47STATIC void xfs_alloc_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
  48STATIC void xfs_alloc_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
  49STATIC void xfs_alloc_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
  50STATIC int xfs_alloc_lshift(xfs_btree_cur_t *, int, int *);
  51STATIC int xfs_alloc_newroot(xfs_btree_cur_t *, int *);
  52STATIC int xfs_alloc_rshift(xfs_btree_cur_t *, int, int *);
  53STATIC int xfs_alloc_split(xfs_btree_cur_t *, int, xfs_agblock_t *,
  54                xfs_alloc_key_t *, xfs_btree_cur_t **, int *);
  55STATIC int xfs_alloc_updkey(xfs_btree_cur_t *, xfs_alloc_key_t *, int);
  56
  57/*
  58 * Internal functions.
  59 */
  60
  61/*
  62 * Single level of the xfs_alloc_delete record deletion routine.
  63 * Delete record pointed to by cur/level.
  64 * Remove the record from its block then rebalance the tree.
  65 * Return 0 for error, 1 for done, 2 to go on to the next level.
  66 */
  67STATIC int                              /* error */
  68xfs_alloc_delrec(
  69        xfs_btree_cur_t         *cur,   /* btree cursor */
  70        int                     level,  /* level removing record from */
  71        int                     *stat)  /* fail/done/go-on */
  72{
  73        xfs_agf_t               *agf;   /* allocation group freelist header */
  74        xfs_alloc_block_t       *block; /* btree block record/key lives in */
  75        xfs_agblock_t           bno;    /* btree block number */
  76        xfs_buf_t               *bp;    /* buffer for block */
  77        int                     error;  /* error return value */
  78        int                     i;      /* loop index */
  79        xfs_alloc_key_t         key;    /* kp points here if block is level 0 */
  80        xfs_agblock_t           lbno;   /* left block's block number */
  81        xfs_buf_t               *lbp;   /* left block's buffer pointer */
  82        xfs_alloc_block_t       *left;  /* left btree block */
  83        xfs_alloc_key_t         *lkp=NULL;      /* left block key pointer */
  84        xfs_alloc_ptr_t         *lpp=NULL;      /* left block address pointer */
  85        int                     lrecs=0;        /* number of records in left block */
  86        xfs_alloc_rec_t         *lrp;   /* left block record pointer */
  87        xfs_mount_t             *mp;    /* mount structure */
  88        int                     ptr;    /* index in btree block for this rec */
  89        xfs_agblock_t           rbno;   /* right block's block number */
  90        xfs_buf_t               *rbp;   /* right block's buffer pointer */
  91        xfs_alloc_block_t       *right; /* right btree block */
  92        xfs_alloc_key_t         *rkp;   /* right block key pointer */
  93        xfs_alloc_ptr_t         *rpp;   /* right block address pointer */
  94        int                     rrecs=0;        /* number of records in right block */
  95        int                     numrecs;
  96        xfs_alloc_rec_t         *rrp;   /* right block record pointer */
  97        xfs_btree_cur_t         *tcur;  /* temporary btree cursor */
  98
  99        /*
 100         * Get the index of the entry being deleted, check for nothing there.
 101         */
 102        ptr = cur->bc_ptrs[level];
 103        if (ptr == 0) {
 104                *stat = 0;
 105                return 0;
 106        }
 107        /*
 108         * Get the buffer & block containing the record or key/ptr.
 109         */
 110        bp = cur->bc_bufs[level];
 111        block = XFS_BUF_TO_ALLOC_BLOCK(bp);
 112#ifdef DEBUG
 113        if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
 114                return error;
 115#endif
 116        /*
 117         * Fail if we're off the end of the block.
 118         */
 119        numrecs = be16_to_cpu(block->bb_numrecs);
 120        if (ptr > numrecs) {
 121                *stat = 0;
 122                return 0;
 123        }
 124        XFS_STATS_INC(xs_abt_delrec);
 125        /*
 126         * It's a nonleaf.  Excise the key and ptr being deleted, by
 127         * sliding the entries past them down one.
 128         * Log the changed areas of the block.
 129         */
 130        if (level > 0) {
 131                lkp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
 132                lpp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
 133#ifdef DEBUG
 134                for (i = ptr; i < numrecs; i++) {
 135                        if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
 136                                return error;
 137                }
 138#endif
 139                if (ptr < numrecs) {
 140                        memmove(&lkp[ptr - 1], &lkp[ptr],
 141                                (numrecs - ptr) * sizeof(*lkp));
 142                        memmove(&lpp[ptr - 1], &lpp[ptr],
 143                                (numrecs - ptr) * sizeof(*lpp));
 144                        xfs_alloc_log_ptrs(cur, bp, ptr, numrecs - 1);
 145                        xfs_alloc_log_keys(cur, bp, ptr, numrecs - 1);
 146                }
 147        }
 148        /*
 149         * It's a leaf.  Excise the record being deleted, by sliding the
 150         * entries past it down one.  Log the changed areas of the block.
 151         */
 152        else {
 153                lrp = XFS_ALLOC_REC_ADDR(block, 1, cur);
 154                if (ptr < numrecs) {
 155                        memmove(&lrp[ptr - 1], &lrp[ptr],
 156                                (numrecs - ptr) * sizeof(*lrp));
 157                        xfs_alloc_log_recs(cur, bp, ptr, numrecs - 1);
 158                }
 159                /*
 160                 * If it's the first record in the block, we'll need a key
 161                 * structure to pass up to the next level (updkey).
 162                 */
 163                if (ptr == 1) {
 164                        key.ar_startblock = lrp->ar_startblock;
 165                        key.ar_blockcount = lrp->ar_blockcount;
 166                        lkp = &key;
 167                }
 168        }
 169        /*
 170         * Decrement and log the number of entries in the block.
 171         */
 172        numrecs--;
 173        block->bb_numrecs = cpu_to_be16(numrecs);
 174        xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
 175        /*
 176         * See if the longest free extent in the allocation group was
 177         * changed by this operation.  True if it's the by-size btree, and
 178         * this is the leaf level, and there is no right sibling block,
 179         * and this was the last record.
 180         */
 181        agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
 182        mp = cur->bc_mp;
 183
 184        if (level == 0 &&
 185            cur->bc_btnum == XFS_BTNUM_CNT &&
 186            be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
 187            ptr > numrecs) {
 188                ASSERT(ptr == numrecs + 1);
 189                /*
 190                 * There are still records in the block.  Grab the size
 191                 * from the last one.
 192                 */
 193                if (numrecs) {
 194                        rrp = XFS_ALLOC_REC_ADDR(block, numrecs, cur);
 195                        agf->agf_longest = rrp->ar_blockcount;
 196                }
 197                /*
 198                 * No free extents left.
 199                 */
 200                else
 201                        agf->agf_longest = 0;
 202                mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest =
 203                        be32_to_cpu(agf->agf_longest);
 204                xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
 205                        XFS_AGF_LONGEST);
 206        }
 207        /*
 208         * Is this the root level?  If so, we're almost done.
 209         */
 210        if (level == cur->bc_nlevels - 1) {
 211                /*
 212                 * If this is the root level,
 213                 * and there's only one entry left,
 214                 * and it's NOT the leaf level,
 215                 * then we can get rid of this level.
 216                 */
 217                if (numrecs == 1 && level > 0) {
 218                        /*
 219                         * lpp is still set to the first pointer in the block.
 220                         * Make it the new root of the btree.
 221                         */
 222                        bno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]);
 223                        agf->agf_roots[cur->bc_btnum] = *lpp;
 224                        be32_add(&agf->agf_levels[cur->bc_btnum], -1);
 225                        mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_levels[cur->bc_btnum]--;
 226                        /*
 227                         * Put this buffer/block on the ag's freelist.
 228                         */
 229                        error = xfs_alloc_put_freelist(cur->bc_tp,
 230                                        cur->bc_private.a.agbp, NULL, bno, 1);
 231                        if (error)
 232                                return error;
 233                        /*
 234                         * Since blocks move to the free list without the
 235                         * coordination used in xfs_bmap_finish, we can't allow
 236                         * block to be available for reallocation and
 237                         * non-transaction writing (user data) until we know
 238                         * that the transaction that moved it to the free list
 239                         * is permanently on disk. We track the blocks by
 240                         * declaring these blocks as "busy"; the busy list is
 241                         * maintained on a per-ag basis and each transaction
 242                         * records which entries should be removed when the
 243                         * iclog commits to disk. If a busy block is
 244                         * allocated, the iclog is pushed up to the LSN
 245                         * that freed the block.
 246                         */
 247                        xfs_alloc_mark_busy(cur->bc_tp,
 248                                be32_to_cpu(agf->agf_seqno), bno, 1);
 249
 250                        xfs_trans_agbtree_delta(cur->bc_tp, -1);
 251                        xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
 252                                XFS_AGF_ROOTS | XFS_AGF_LEVELS);
 253                        /*
 254                         * Update the cursor so there's one fewer level.
 255                         */
 256                        xfs_btree_setbuf(cur, level, NULL);
 257                        cur->bc_nlevels--;
 258                } else if (level > 0 &&
 259                           (error = xfs_alloc_decrement(cur, level, &i)))
 260                        return error;
 261                *stat = 1;
 262                return 0;
 263        }
 264        /*
 265         * If we deleted the leftmost entry in the block, update the
 266         * key values above us in the tree.
 267         */
 268        if (ptr == 1 && (error = xfs_alloc_updkey(cur, lkp, level + 1)))
 269                return error;
 270        /*
 271         * If the number of records remaining in the block is at least
 272         * the minimum, we're done.
 273         */
 274        if (numrecs >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
 275                if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
 276                        return error;
 277                *stat = 1;
 278                return 0;
 279        }
 280        /*
 281         * Otherwise, we have to move some records around to keep the
 282         * tree balanced.  Look at the left and right sibling blocks to
 283         * see if we can re-balance by moving only one record.
 284         */
 285        rbno = be32_to_cpu(block->bb_rightsib);
 286        lbno = be32_to_cpu(block->bb_leftsib);
 287        bno = NULLAGBLOCK;
 288        ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
 289        /*
 290         * Duplicate the cursor so our btree manipulations here won't
 291         * disrupt the next level up.
 292         */
 293        if ((error = xfs_btree_dup_cursor(cur, &tcur)))
 294                return error;
 295        /*
 296         * If there's a right sibling, see if it's ok to shift an entry
 297         * out of it.
 298         */
 299        if (rbno != NULLAGBLOCK) {
 300                /*
 301                 * Move the temp cursor to the last entry in the next block.
 302                 * Actually any entry but the first would suffice.
 303                 */
 304                i = xfs_btree_lastrec(tcur, level);
 305                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 306                if ((error = xfs_alloc_increment(tcur, level, &i)))
 307                        goto error0;
 308                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 309                i = xfs_btree_lastrec(tcur, level);
 310                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 311                /*
 312                 * Grab a pointer to the block.
 313                 */
 314                rbp = tcur->bc_bufs[level];
 315                right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
 316#ifdef DEBUG
 317                if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
 318                        goto error0;
 319#endif
 320                /*
 321                 * Grab the current block number, for future use.
 322                 */
 323                bno = be32_to_cpu(right->bb_leftsib);
 324                /*
 325                 * If right block is full enough so that removing one entry
 326                 * won't make it too empty, and left-shifting an entry out
 327                 * of right to us works, we're done.
 328                 */
 329                if (be16_to_cpu(right->bb_numrecs) - 1 >=
 330                     XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
 331                        if ((error = xfs_alloc_lshift(tcur, level, &i)))
 332                                goto error0;
 333                        if (i) {
 334                                ASSERT(be16_to_cpu(block->bb_numrecs) >=
 335                                       XFS_ALLOC_BLOCK_MINRECS(level, cur));
 336                                xfs_btree_del_cursor(tcur,
 337                                                     XFS_BTREE_NOERROR);
 338                                if (level > 0 &&
 339                                    (error = xfs_alloc_decrement(cur, level,
 340                                            &i)))
 341                                        return error;
 342                                *stat = 1;
 343                                return 0;
 344                        }
 345                }
 346                /*
 347                 * Otherwise, grab the number of records in right for
 348                 * future reference, and fix up the temp cursor to point
 349                 * to our block again (last record).
 350                 */
 351                rrecs = be16_to_cpu(right->bb_numrecs);
 352                if (lbno != NULLAGBLOCK) {
 353                        i = xfs_btree_firstrec(tcur, level);
 354                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 355                        if ((error = xfs_alloc_decrement(tcur, level, &i)))
 356                                goto error0;
 357                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 358                }
 359        }
 360        /*
 361         * If there's a left sibling, see if it's ok to shift an entry
 362         * out of it.
 363         */
 364        if (lbno != NULLAGBLOCK) {
 365                /*
 366                 * Move the temp cursor to the first entry in the
 367                 * previous block.
 368                 */
 369                i = xfs_btree_firstrec(tcur, level);
 370                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 371                if ((error = xfs_alloc_decrement(tcur, level, &i)))
 372                        goto error0;
 373                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 374                xfs_btree_firstrec(tcur, level);
 375                /*
 376                 * Grab a pointer to the block.
 377                 */
 378                lbp = tcur->bc_bufs[level];
 379                left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
 380#ifdef DEBUG
 381                if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
 382                        goto error0;
 383#endif
 384                /*
 385                 * Grab the current block number, for future use.
 386                 */
 387                bno = be32_to_cpu(left->bb_rightsib);
 388                /*
 389                 * If left block is full enough so that removing one entry
 390                 * won't make it too empty, and right-shifting an entry out
 391                 * of left to us works, we're done.
 392                 */
 393                if (be16_to_cpu(left->bb_numrecs) - 1 >=
 394                     XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
 395                        if ((error = xfs_alloc_rshift(tcur, level, &i)))
 396                                goto error0;
 397                        if (i) {
 398                                ASSERT(be16_to_cpu(block->bb_numrecs) >=
 399                                       XFS_ALLOC_BLOCK_MINRECS(level, cur));
 400                                xfs_btree_del_cursor(tcur,
 401                                                     XFS_BTREE_NOERROR);
 402                                if (level == 0)
 403                                        cur->bc_ptrs[0]++;
 404                                *stat = 1;
 405                                return 0;
 406                        }
 407                }
 408                /*
 409                 * Otherwise, grab the number of records in right for
 410                 * future reference.
 411                 */
 412                lrecs = be16_to_cpu(left->bb_numrecs);
 413        }
 414        /*
 415         * Delete the temp cursor, we're done with it.
 416         */
 417        xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
 418        /*
 419         * If here, we need to do a join to keep the tree balanced.
 420         */
 421        ASSERT(bno != NULLAGBLOCK);
 422        /*
 423         * See if we can join with the left neighbor block.
 424         */
 425        if (lbno != NULLAGBLOCK &&
 426            lrecs + numrecs <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
 427                /*
 428                 * Set "right" to be the starting block,
 429                 * "left" to be the left neighbor.
 430                 */
 431                rbno = bno;
 432                right = block;
 433                rrecs = be16_to_cpu(right->bb_numrecs);
 434                rbp = bp;
 435                if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
 436                                cur->bc_private.a.agno, lbno, 0, &lbp,
 437                                XFS_ALLOC_BTREE_REF)))
 438                        return error;
 439                left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
 440                lrecs = be16_to_cpu(left->bb_numrecs);
 441                if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
 442                        return error;
 443        }
 444        /*
 445         * If that won't work, see if we can join with the right neighbor block.
 446         */
 447        else if (rbno != NULLAGBLOCK &&
 448                 rrecs + numrecs <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
 449                /*
 450                 * Set "left" to be the starting block,
 451                 * "right" to be the right neighbor.
 452                 */
 453                lbno = bno;
 454                left = block;
 455                lrecs = be16_to_cpu(left->bb_numrecs);
 456                lbp = bp;
 457                if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
 458                                cur->bc_private.a.agno, rbno, 0, &rbp,
 459                                XFS_ALLOC_BTREE_REF)))
 460                        return error;
 461                right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
 462                rrecs = be16_to_cpu(right->bb_numrecs);
 463                if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
 464                        return error;
 465        }
 466        /*
 467         * Otherwise, we can't fix the imbalance.
 468         * Just return.  This is probably a logic error, but it's not fatal.
 469         */
 470        else {
 471                if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
 472                        return error;
 473                *stat = 1;
 474                return 0;
 475        }
 476        /*
 477         * We're now going to join "left" and "right" by moving all the stuff
 478         * in "right" to "left" and deleting "right".
 479         */
 480        if (level > 0) {
 481                /*
 482                 * It's a non-leaf.  Move keys and pointers.
 483                 */
 484                lkp = XFS_ALLOC_KEY_ADDR(left, lrecs + 1, cur);
 485                lpp = XFS_ALLOC_PTR_ADDR(left, lrecs + 1, cur);
 486                rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
 487                rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
 488#ifdef DEBUG
 489                for (i = 0; i < rrecs; i++) {
 490                        if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
 491                                return error;
 492                }
 493#endif
 494                memcpy(lkp, rkp, rrecs * sizeof(*lkp));
 495                memcpy(lpp, rpp, rrecs * sizeof(*lpp));
 496                xfs_alloc_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
 497                xfs_alloc_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
 498        } else {
 499                /*
 500                 * It's a leaf.  Move records.
 501                 */
 502                lrp = XFS_ALLOC_REC_ADDR(left, lrecs + 1, cur);
 503                rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
 504                memcpy(lrp, rrp, rrecs * sizeof(*lrp));
 505                xfs_alloc_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
 506        }
 507        /*
 508         * If we joined with the left neighbor, set the buffer in the
 509         * cursor to the left block, and fix up the index.
 510         */
 511        if (bp != lbp) {
 512                xfs_btree_setbuf(cur, level, lbp);
 513                cur->bc_ptrs[level] += lrecs;
 514        }
 515        /*
 516         * If we joined with the right neighbor and there's a level above
 517         * us, increment the cursor at that level.
 518         */
 519        else if (level + 1 < cur->bc_nlevels &&
 520                 (error = xfs_alloc_increment(cur, level + 1, &i)))
 521                return error;
 522        /*
 523         * Fix up the number of records in the surviving block.
 524         */
 525        lrecs += rrecs;
 526        left->bb_numrecs = cpu_to_be16(lrecs);
 527        /*
 528         * Fix up the right block pointer in the surviving block, and log it.
 529         */
 530        left->bb_rightsib = right->bb_rightsib;
 531        xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
 532        /*
 533         * If there is a right sibling now, make it point to the
 534         * remaining block.
 535         */
 536        if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
 537                xfs_alloc_block_t       *rrblock;
 538                xfs_buf_t               *rrbp;
 539
 540                if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
 541                                cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0,
 542                                &rrbp, XFS_ALLOC_BTREE_REF)))
 543                        return error;
 544                rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp);
 545                if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
 546                        return error;
 547                rrblock->bb_leftsib = cpu_to_be32(lbno);
 548                xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
 549        }
 550        /*
 551         * Free the deleting block by putting it on the freelist.
 552         */
 553        error = xfs_alloc_put_freelist(cur->bc_tp,
 554                                         cur->bc_private.a.agbp, NULL, rbno, 1);
 555        if (error)
 556                return error;
 557        /*
 558         * Since blocks move to the free list without the coordination
 559         * used in xfs_bmap_finish, we can't allow block to be available
 560         * for reallocation and non-transaction writing (user data)
 561         * until we know that the transaction that moved it to the free
 562         * list is permanently on disk. We track the blocks by declaring
 563         * these blocks as "busy"; the busy list is maintained on a
 564         * per-ag basis and each transaction records which entries
 565         * should be removed when the iclog commits to disk. If a
 566         * busy block is allocated, the iclog is pushed up to the
 567         * LSN that freed the block.
 568         */
 569        xfs_alloc_mark_busy(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
 570        xfs_trans_agbtree_delta(cur->bc_tp, -1);
 571
 572        /*
 573         * Adjust the current level's cursor so that we're left referring
 574         * to the right node, after we're done.
 575         * If this leaves the ptr value 0 our caller will fix it up.
 576         */
 577        if (level > 0)
 578                cur->bc_ptrs[level]--;
 579        /*
 580         * Return value means the next level up has something to do.
 581         */
 582        *stat = 2;
 583        return 0;
 584
 585error0:
 586        xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
 587        return error;
 588}
 589
 590/*
 591 * Insert one record/level.  Return information to the caller
 592 * allowing the next level up to proceed if necessary.
 593 */
 594STATIC int                              /* error */
 595xfs_alloc_insrec(
 596        xfs_btree_cur_t         *cur,   /* btree cursor */
 597        int                     level,  /* level to insert record at */
 598        xfs_agblock_t           *bnop,  /* i/o: block number inserted */
 599        xfs_alloc_rec_t         *recp,  /* i/o: record data inserted */
 600        xfs_btree_cur_t         **curp, /* output: new cursor replacing cur */
 601        int                     *stat)  /* output: success/failure */
 602{
 603        xfs_agf_t               *agf;   /* allocation group freelist header */
 604        xfs_alloc_block_t       *block; /* btree block record/key lives in */
 605        xfs_buf_t               *bp;    /* buffer for block */
 606        int                     error;  /* error return value */
 607        int                     i;      /* loop index */
 608        xfs_alloc_key_t         key;    /* key value being inserted */
 609        xfs_alloc_key_t         *kp;    /* pointer to btree keys */
 610        xfs_agblock_t           nbno;   /* block number of allocated block */
 611        xfs_btree_cur_t         *ncur;  /* new cursor to be used at next lvl */
 612        xfs_alloc_key_t         nkey;   /* new key value, from split */
 613        xfs_alloc_rec_t         nrec;   /* new record value, for caller */
 614        int                     numrecs;
 615        int                     optr;   /* old ptr value */
 616        xfs_alloc_ptr_t         *pp;    /* pointer to btree addresses */
 617        int                     ptr;    /* index in btree block for this rec */
 618        xfs_alloc_rec_t         *rp;    /* pointer to btree records */
 619
 620        ASSERT(be32_to_cpu(recp->ar_blockcount) > 0);
 621
 622        /*
 623         * GCC doesn't understand the (arguably complex) control flow in
 624         * this function and complains about uninitialized structure fields
 625         * without this.
 626         */
 627        memset(&nrec, 0, sizeof(nrec));
 628
 629        /*
 630         * If we made it to the root level, allocate a new root block
 631         * and we're done.
 632         */
 633        if (level >= cur->bc_nlevels) {
 634                XFS_STATS_INC(xs_abt_insrec);
 635                if ((error = xfs_alloc_newroot(cur, &i)))
 636                        return error;
 637                *bnop = NULLAGBLOCK;
 638                *stat = i;
 639                return 0;
 640        }
 641        /*
 642         * Make a key out of the record data to be inserted, and save it.
 643         */
 644        key.ar_startblock = recp->ar_startblock;
 645        key.ar_blockcount = recp->ar_blockcount;
 646        optr = ptr = cur->bc_ptrs[level];
 647        /*
 648         * If we're off the left edge, return failure.
 649         */
 650        if (ptr == 0) {
 651                *stat = 0;
 652                return 0;
 653        }
 654        XFS_STATS_INC(xs_abt_insrec);
 655        /*
 656         * Get pointers to the btree buffer and block.
 657         */
 658        bp = cur->bc_bufs[level];
 659        block = XFS_BUF_TO_ALLOC_BLOCK(bp);
 660        numrecs = be16_to_cpu(block->bb_numrecs);
 661#ifdef DEBUG
 662        if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
 663                return error;
 664        /*
 665         * Check that the new entry is being inserted in the right place.
 666         */
 667        if (ptr <= numrecs) {
 668                if (level == 0) {
 669                        rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
 670                        xfs_btree_check_rec(cur->bc_btnum, recp, rp);
 671                } else {
 672                        kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur);
 673                        xfs_btree_check_key(cur->bc_btnum, &key, kp);
 674                }
 675        }
 676#endif
 677        nbno = NULLAGBLOCK;
 678        ncur = NULL;
 679        /*
 680         * If the block is full, we can't insert the new entry until we
 681         * make the block un-full.
 682         */
 683        if (numrecs == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
 684                /*
 685                 * First, try shifting an entry to the right neighbor.
 686                 */
 687                if ((error = xfs_alloc_rshift(cur, level, &i)))
 688                        return error;
 689                if (i) {
 690                        /* nothing */
 691                }
 692                /*
 693                 * Next, try shifting an entry to the left neighbor.
 694                 */
 695                else {
 696                        if ((error = xfs_alloc_lshift(cur, level, &i)))
 697                                return error;
 698                        if (i)
 699                                optr = ptr = cur->bc_ptrs[level];
 700                        else {
 701                                /*
 702                                 * Next, try splitting the current block in
 703                                 * half. If this works we have to re-set our
 704                                 * variables because we could be in a
 705                                 * different block now.
 706                                 */
 707                                if ((error = xfs_alloc_split(cur, level, &nbno,
 708                                                &nkey, &ncur, &i)))
 709                                        return error;
 710                                if (i) {
 711                                        bp = cur->bc_bufs[level];
 712                                        block = XFS_BUF_TO_ALLOC_BLOCK(bp);
 713#ifdef DEBUG
 714                                        if ((error =
 715                                                xfs_btree_check_sblock(cur,
 716                                                        block, level, bp)))
 717                                                return error;
 718#endif
 719                                        ptr = cur->bc_ptrs[level];
 720                                        nrec.ar_startblock = nkey.ar_startblock;
 721                                        nrec.ar_blockcount = nkey.ar_blockcount;
 722                                }
 723                                /*
 724                                 * Otherwise the insert fails.
 725                                 */
 726                                else {
 727                                        *stat = 0;
 728                                        return 0;
 729                                }
 730                        }
 731                }
 732        }
 733        /*
 734         * At this point we know there's room for our new entry in the block
 735         * we're pointing at.
 736         */
 737        numrecs = be16_to_cpu(block->bb_numrecs);
 738        if (level > 0) {
 739                /*
 740                 * It's a non-leaf entry.  Make a hole for the new data
 741                 * in the key and ptr regions of the block.
 742                 */
 743                kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
 744                pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
 745#ifdef DEBUG
 746                for (i = numrecs; i >= ptr; i--) {
 747                        if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level)))
 748                                return error;
 749                }
 750#endif
 751                memmove(&kp[ptr], &kp[ptr - 1],
 752                        (numrecs - ptr + 1) * sizeof(*kp));
 753                memmove(&pp[ptr], &pp[ptr - 1],
 754                        (numrecs - ptr + 1) * sizeof(*pp));
 755#ifdef DEBUG
 756                if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
 757                        return error;
 758#endif
 759                /*
 760                 * Now stuff the new data in, bump numrecs and log the new data.
 761                 */
 762                kp[ptr - 1] = key;
 763                pp[ptr - 1] = cpu_to_be32(*bnop);
 764                numrecs++;
 765                block->bb_numrecs = cpu_to_be16(numrecs);
 766                xfs_alloc_log_keys(cur, bp, ptr, numrecs);
 767                xfs_alloc_log_ptrs(cur, bp, ptr, numrecs);
 768#ifdef DEBUG
 769                if (ptr < numrecs)
 770                        xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
 771                                kp + ptr);
 772#endif
 773        } else {
 774                /*
 775                 * It's a leaf entry.  Make a hole for the new record.
 776                 */
 777                rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
 778                memmove(&rp[ptr], &rp[ptr - 1],
 779                        (numrecs - ptr + 1) * sizeof(*rp));
 780                /*
 781                 * Now stuff the new record in, bump numrecs
 782                 * and log the new data.
 783                 */
 784                rp[ptr - 1] = *recp;
 785                numrecs++;
 786                block->bb_numrecs = cpu_to_be16(numrecs);
 787                xfs_alloc_log_recs(cur, bp, ptr, numrecs);
 788#ifdef DEBUG
 789                if (ptr < numrecs)
 790                        xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
 791                                rp + ptr);
 792#endif
 793        }
 794        /*
 795         * Log the new number of records in the btree header.
 796         */
 797        xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
 798        /*
 799         * If we inserted at the start of a block, update the parents' keys.
 800         */
 801        if (optr == 1 && (error = xfs_alloc_updkey(cur, &key, level + 1)))
 802                return error;
 803        /*
 804         * Look to see if the longest extent in the allocation group
 805         * needs to be updated.
 806         */
 807
 808        agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
 809        if (level == 0 &&
 810            cur->bc_btnum == XFS_BTNUM_CNT &&
 811            be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
 812            be32_to_cpu(recp->ar_blockcount) > be32_to_cpu(agf->agf_longest)) {
 813                /*
 814                 * If this is a leaf in the by-size btree and there
 815                 * is no right sibling block and this block is bigger
 816                 * than the previous longest block, update it.
 817                 */
 818                agf->agf_longest = recp->ar_blockcount;
 819                cur->bc_mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest
 820                        = be32_to_cpu(recp->ar_blockcount);
 821                xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
 822                        XFS_AGF_LONGEST);
 823        }
 824        /*
 825         * Return the new block number, if any.
 826         * If there is one, give back a record value and a cursor too.
 827         */
 828        *bnop = nbno;
 829        if (nbno != NULLAGBLOCK) {
 830                *recp = nrec;
 831                *curp = ncur;
 832        }
 833        *stat = 1;
 834        return 0;
 835}
 836
 837/*
 838 * Log header fields from a btree block.
 839 */
 840STATIC void
 841xfs_alloc_log_block(
 842        xfs_trans_t             *tp,    /* transaction pointer */
 843        xfs_buf_t               *bp,    /* buffer containing btree block */
 844        int                     fields) /* mask of fields: XFS_BB_... */
 845{
 846        int                     first;  /* first byte offset logged */
 847        int                     last;   /* last byte offset logged */
 848        static const short      offsets[] = {   /* table of offsets */
 849                offsetof(xfs_alloc_block_t, bb_magic),
 850                offsetof(xfs_alloc_block_t, bb_level),
 851                offsetof(xfs_alloc_block_t, bb_numrecs),
 852                offsetof(xfs_alloc_block_t, bb_leftsib),
 853                offsetof(xfs_alloc_block_t, bb_rightsib),
 854                sizeof(xfs_alloc_block_t)
 855        };
 856
 857        xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last);
 858        xfs_trans_log_buf(tp, bp, first, last);
 859}
 860
 861/*
 862 * Log keys from a btree block (nonleaf).
 863 */
 864STATIC void
 865xfs_alloc_log_keys(
 866        xfs_btree_cur_t         *cur,   /* btree cursor */
 867        xfs_buf_t               *bp,    /* buffer containing btree block */
 868        int                     kfirst, /* index of first key to log */
 869        int                     klast)  /* index of last key to log */
 870{
 871        xfs_alloc_block_t       *block; /* btree block to log from */
 872        int                     first;  /* first byte offset logged */
 873        xfs_alloc_key_t         *kp;    /* key pointer in btree block */
 874        int                     last;   /* last byte offset logged */
 875
 876        block = XFS_BUF_TO_ALLOC_BLOCK(bp);
 877        kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
 878        first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
 879        last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
 880        xfs_trans_log_buf(cur->bc_tp, bp, first, last);
 881}
 882
 883/*
 884 * Log block pointer fields from a btree block (nonleaf).
 885 */
 886STATIC void
 887xfs_alloc_log_ptrs(
 888        xfs_btree_cur_t         *cur,   /* btree cursor */
 889        xfs_buf_t               *bp,    /* buffer containing btree block */
 890        int                     pfirst, /* index of first pointer to log */
 891        int                     plast)  /* index of last pointer to log */
 892{
 893        xfs_alloc_block_t       *block; /* btree block to log from */
 894        int                     first;  /* first byte offset logged */
 895        int                     last;   /* last byte offset logged */
 896        xfs_alloc_ptr_t         *pp;    /* block-pointer pointer in btree blk */
 897
 898        block = XFS_BUF_TO_ALLOC_BLOCK(bp);
 899        pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
 900        first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
 901        last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
 902        xfs_trans_log_buf(cur->bc_tp, bp, first, last);
 903}
 904
 905/*
 906 * Log records from a btree block (leaf).
 907 */
 908STATIC void
 909xfs_alloc_log_recs(
 910        xfs_btree_cur_t         *cur,   /* btree cursor */
 911        xfs_buf_t               *bp,    /* buffer containing btree block */
 912        int                     rfirst, /* index of first record to log */
 913        int                     rlast)  /* index of last record to log */
 914{
 915        xfs_alloc_block_t       *block; /* btree block to log from */
 916        int                     first;  /* first byte offset logged */
 917        int                     last;   /* last byte offset logged */
 918        xfs_alloc_rec_t         *rp;    /* record pointer for btree block */
 919
 920
 921        block = XFS_BUF_TO_ALLOC_BLOCK(bp);
 922        rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
 923#ifdef DEBUG
 924        {
 925                xfs_agf_t       *agf;
 926                xfs_alloc_rec_t *p;
 927
 928                agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
 929                for (p = &rp[rfirst - 1]; p <= &rp[rlast - 1]; p++)
 930                        ASSERT(be32_to_cpu(p->ar_startblock) +
 931                               be32_to_cpu(p->ar_blockcount) <=
 932                               be32_to_cpu(agf->agf_length));
 933        }
 934#endif
 935        first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
 936        last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
 937        xfs_trans_log_buf(cur->bc_tp, bp, first, last);
 938}
 939
 940/*
 941 * Lookup the record.  The cursor is made to point to it, based on dir.
 942 * Return 0 if can't find any such record, 1 for success.
 943 */
 944STATIC int                              /* error */
 945xfs_alloc_lookup(
 946        xfs_btree_cur_t         *cur,   /* btree cursor */
 947        xfs_lookup_t            dir,    /* <=, ==, or >= */
 948        int                     *stat)  /* success/failure */
 949{
 950        xfs_agblock_t           agbno;  /* a.g. relative btree block number */
 951        xfs_agnumber_t          agno;   /* allocation group number */
 952        xfs_alloc_block_t       *block=NULL;    /* current btree block */
 953        int                     diff;   /* difference for the current key */
 954        int                     error;  /* error return value */
 955        int                     keyno=0;        /* current key number */
 956        int                     level;  /* level in the btree */
 957        xfs_mount_t             *mp;    /* file system mount point */
 958
 959        XFS_STATS_INC(xs_abt_lookup);
 960        /*
 961         * Get the allocation group header, and the root block number.
 962         */
 963        mp = cur->bc_mp;
 964
 965        {
 966                xfs_agf_t       *agf;   /* a.g. freespace header */
 967
 968                agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
 969                agno = be32_to_cpu(agf->agf_seqno);
 970                agbno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]);
 971        }
 972        /*
 973         * Iterate over each level in the btree, starting at the root.
 974         * For each level above the leaves, find the key we need, based
 975         * on the lookup record, then follow the corresponding block
 976         * pointer down to the next level.
 977         */
 978        for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
 979                xfs_buf_t       *bp;    /* buffer pointer for btree block */
 980                xfs_daddr_t     d;      /* disk address of btree block */
 981
 982                /*
 983                 * Get the disk address we're looking for.
 984                 */
 985                d = XFS_AGB_TO_DADDR(mp, agno, agbno);
 986                /*
 987                 * If the old buffer at this level is for a different block,
 988                 * throw it away, otherwise just use it.
 989                 */
 990                bp = cur->bc_bufs[level];
 991                if (bp && XFS_BUF_ADDR(bp) != d)
 992                        bp = NULL;
 993                if (!bp) {
 994                        /*
 995                         * Need to get a new buffer.  Read it, then
 996                         * set it in the cursor, releasing the old one.
 997                         */
 998                        if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, agno,
 999                                        agbno, 0, &bp, XFS_ALLOC_BTREE_REF)))
1000                                return error;
1001                        xfs_btree_setbuf(cur, level, bp);
1002                        /*
1003                         * Point to the btree block, now that we have the buffer
1004                         */
1005                        block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1006                        if ((error = xfs_btree_check_sblock(cur, block, level,
1007                                        bp)))
1008                                return error;
1009                } else
1010                        block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1011                /*
1012                 * If we already had a key match at a higher level, we know
1013                 * we need to use the first entry in this block.
1014                 */
1015                if (diff == 0)
1016                        keyno = 1;
1017                /*
1018                 * Otherwise we need to search this block.  Do a binary search.
1019                 */
1020                else {
1021                        int             high;   /* high entry number */
1022                        xfs_alloc_key_t *kkbase=NULL;/* base of keys in block */
1023                        xfs_alloc_rec_t *krbase=NULL;/* base of records in block */
1024                        int             low;    /* low entry number */
1025
1026                        /*
1027                         * Get a pointer to keys or records.
1028                         */
1029                        if (level > 0)
1030                                kkbase = XFS_ALLOC_KEY_ADDR(block, 1, cur);
1031                        else
1032                                krbase = XFS_ALLOC_REC_ADDR(block, 1, cur);
1033                        /*
1034                         * Set low and high entry numbers, 1-based.
1035                         */
1036                        low = 1;
1037                        if (!(high = be16_to_cpu(block->bb_numrecs))) {
1038                                /*
1039                                 * If the block is empty, the tree must
1040                                 * be an empty leaf.
1041                                 */
1042                                ASSERT(level == 0 && cur->bc_nlevels == 1);
1043                                cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1044                                *stat = 0;
1045                                return 0;
1046                        }
1047                        /*
1048                         * Binary search the block.
1049                         */
1050                        while (low <= high) {
1051                                xfs_extlen_t    blockcount;     /* key value */
1052                                xfs_agblock_t   startblock;     /* key value */
1053
1054                                XFS_STATS_INC(xs_abt_compare);
1055                                /*
1056                                 * keyno is average of low and high.
1057                                 */
1058                                keyno = (low + high) >> 1;
1059                                /*
1060                                 * Get startblock & blockcount.
1061                                 */
1062                                if (level > 0) {
1063                                        xfs_alloc_key_t *kkp;
1064
1065                                        kkp = kkbase + keyno - 1;
1066                                        startblock = be32_to_cpu(kkp->ar_startblock);
1067                                        blockcount = be32_to_cpu(kkp->ar_blockcount);
1068                                } else {
1069                                        xfs_alloc_rec_t *krp;
1070
1071                                        krp = krbase + keyno - 1;
1072                                        startblock = be32_to_cpu(krp->ar_startblock);
1073                                        blockcount = be32_to_cpu(krp->ar_blockcount);
1074                                }
1075                                /*
1076                                 * Compute difference to get next direction.
1077                                 */
1078                                if (cur->bc_btnum == XFS_BTNUM_BNO)
1079                                        diff = (int)startblock -
1080                                               (int)cur->bc_rec.a.ar_startblock;
1081                                else if (!(diff = (int)blockcount -
1082                                            (int)cur->bc_rec.a.ar_blockcount))
1083                                        diff = (int)startblock -
1084                                            (int)cur->bc_rec.a.ar_startblock;
1085                                /*
1086                                 * Less than, move right.
1087                                 */
1088                                if (diff < 0)
1089                                        low = keyno + 1;
1090                                /*
1091                                 * Greater than, move left.
1092                                 */
1093                                else if (diff > 0)
1094                                        high = keyno - 1;
1095                                /*
1096                                 * Equal, we're done.
1097                                 */
1098                                else
1099                                        break;
1100                        }
1101                }
1102                /*
1103                 * If there are more levels, set up for the next level
1104                 * by getting the block number and filling in the cursor.
1105                 */
1106                if (level > 0) {
1107                        /*
1108                         * If we moved left, need the previous key number,
1109                         * unless there isn't one.
1110                         */
1111                        if (diff > 0 && --keyno < 1)
1112                                keyno = 1;
1113                        agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, keyno, cur));
1114#ifdef DEBUG
1115                        if ((error = xfs_btree_check_sptr(cur, agbno, level)))
1116                                return error;
1117#endif
1118                        cur->bc_ptrs[level] = keyno;
1119                }
1120        }
1121        /*
1122         * Done with the search.
1123         * See if we need to adjust the results.
1124         */
1125        if (dir != XFS_LOOKUP_LE && diff < 0) {
1126                keyno++;
1127                /*
1128                 * If ge search and we went off the end of the block, but it's
1129                 * not the last block, we're in the wrong block.
1130                 */
1131                if (dir == XFS_LOOKUP_GE &&
1132                    keyno > be16_to_cpu(block->bb_numrecs) &&
1133                    be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
1134                        int     i;
1135
1136                        cur->bc_ptrs[0] = keyno;
1137                        if ((error = xfs_alloc_increment(cur, 0, &i)))
1138                                return error;
1139                        XFS_WANT_CORRUPTED_RETURN(i == 1);
1140                        *stat = 1;
1141                        return 0;
1142                }
1143        }
1144        else if (dir == XFS_LOOKUP_LE && diff > 0)
1145                keyno--;
1146        cur->bc_ptrs[0] = keyno;
1147        /*
1148         * Return if we succeeded or not.
1149         */
1150        if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs))
1151                *stat = 0;
1152        else
1153                *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
1154        return 0;
1155}
1156
1157/*
1158 * Move 1 record left from cur/level if possible.
1159 * Update cur to reflect the new path.
1160 */
1161STATIC int                              /* error */
1162xfs_alloc_lshift(
1163        xfs_btree_cur_t         *cur,   /* btree cursor */
1164        int                     level,  /* level to shift record on */
1165        int                     *stat)  /* success/failure */
1166{
1167        int                     error;  /* error return value */
1168#ifdef DEBUG
1169        int                     i;      /* loop index */
1170#endif
1171        xfs_alloc_key_t         key;    /* key value for leaf level upward */
1172        xfs_buf_t               *lbp;   /* buffer for left neighbor block */
1173        xfs_alloc_block_t       *left;  /* left neighbor btree block */
1174        int                     nrec;   /* new number of left block entries */
1175        xfs_buf_t               *rbp;   /* buffer for right (current) block */
1176        xfs_alloc_block_t       *right; /* right (current) btree block */
1177        xfs_alloc_key_t         *rkp=NULL;      /* key pointer for right block */
1178        xfs_alloc_ptr_t         *rpp=NULL;      /* address pointer for right block */
1179        xfs_alloc_rec_t         *rrp=NULL;      /* record pointer for right block */
1180
1181        /*
1182         * Set up variables for this block as "right".
1183         */
1184        rbp = cur->bc_bufs[level];
1185        right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1186#ifdef DEBUG
1187        if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1188                return error;
1189#endif
1190        /*
1191         * If we've got no left sibling then we can't shift an entry left.
1192         */
1193        if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) {
1194                *stat = 0;
1195                return 0;
1196        }
1197        /*
1198         * If the cursor entry is the one that would be moved, don't
1199         * do it... it's too complicated.
1200         */
1201        if (cur->bc_ptrs[level] <= 1) {
1202                *stat = 0;
1203                return 0;
1204        }
1205        /*
1206         * Set up the left neighbor as "left".
1207         */
1208        if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1209                        cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib),
1210                        0, &lbp, XFS_ALLOC_BTREE_REF)))
1211                return error;
1212        left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1213        if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1214                return error;
1215        /*
1216         * If it's full, it can't take another entry.
1217         */
1218        if (be16_to_cpu(left->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
1219                *stat = 0;
1220                return 0;
1221        }
1222        nrec = be16_to_cpu(left->bb_numrecs) + 1;
1223        /*
1224         * If non-leaf, copy a key and a ptr to the left block.
1225         */
1226        if (level > 0) {
1227                xfs_alloc_key_t *lkp;   /* key pointer for left block */
1228                xfs_alloc_ptr_t *lpp;   /* address pointer for left block */
1229
1230                lkp = XFS_ALLOC_KEY_ADDR(left, nrec, cur);
1231                rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
1232                *lkp = *rkp;
1233                xfs_alloc_log_keys(cur, lbp, nrec, nrec);
1234                lpp = XFS_ALLOC_PTR_ADDR(left, nrec, cur);
1235                rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
1236#ifdef DEBUG
1237                if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
1238                        return error;
1239#endif
1240                *lpp = *rpp;
1241                xfs_alloc_log_ptrs(cur, lbp, nrec, nrec);
1242                xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
1243        }
1244        /*
1245         * If leaf, copy a record to the left block.
1246         */
1247        else {
1248                xfs_alloc_rec_t *lrp;   /* record pointer for left block */
1249
1250                lrp = XFS_ALLOC_REC_ADDR(left, nrec, cur);
1251                rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
1252                *lrp = *rrp;
1253                xfs_alloc_log_recs(cur, lbp, nrec, nrec);
1254                xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
1255        }
1256        /*
1257         * Bump and log left's numrecs, decrement and log right's numrecs.
1258         */
1259        be16_add(&left->bb_numrecs, 1);
1260        xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1261        be16_add(&right->bb_numrecs, -1);
1262        xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1263        /*
1264         * Slide the contents of right down one entry.
1265         */
1266        if (level > 0) {
1267#ifdef DEBUG
1268                for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1269                        if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]),
1270                                        level)))
1271                                return error;
1272                }
1273#endif
1274                memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1275                memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1276                xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1277                xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1278        } else {
1279                memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1280                xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1281                key.ar_startblock = rrp->ar_startblock;
1282                key.ar_blockcount = rrp->ar_blockcount;
1283                rkp = &key;
1284        }
1285        /*
1286         * Update the parent key values of right.
1287         */
1288        if ((error = xfs_alloc_updkey(cur, rkp, level + 1)))
1289                return error;
1290        /*
1291         * Slide the cursor value left one.
1292         */
1293        cur->bc_ptrs[level]--;
1294        *stat = 1;
1295        return 0;
1296}
1297
1298/*
1299 * Allocate a new root block, fill it in.
1300 */
1301STATIC int                              /* error */
1302xfs_alloc_newroot(
1303        xfs_btree_cur_t         *cur,   /* btree cursor */
1304        int                     *stat)  /* success/failure */
1305{
1306        int                     error;  /* error return value */
1307        xfs_agblock_t           lbno;   /* left block number */
1308        xfs_buf_t               *lbp;   /* left btree buffer */
1309        xfs_alloc_block_t       *left;  /* left btree block */
1310        xfs_mount_t             *mp;    /* mount structure */
1311        xfs_agblock_t           nbno;   /* new block number */
1312        xfs_buf_t               *nbp;   /* new (root) buffer */
1313        xfs_alloc_block_t       *new;   /* new (root) btree block */
1314        int                     nptr;   /* new value for key index, 1 or 2 */
1315        xfs_agblock_t           rbno;   /* right block number */
1316        xfs_buf_t               *rbp;   /* right btree buffer */
1317        xfs_alloc_block_t       *right; /* right btree block */
1318
1319        mp = cur->bc_mp;
1320
1321        ASSERT(cur->bc_nlevels < XFS_AG_MAXLEVELS(mp));
1322        /*
1323         * Get a buffer from the freelist blocks, for the new root.
1324         */
1325        error = xfs_alloc_get_freelist(cur->bc_tp,
1326                                        cur->bc_private.a.agbp, &nbno, 1);
1327        if (error)
1328                return error;
1329        /*
1330         * None available, we fail.
1331         */
1332        if (nbno == NULLAGBLOCK) {
1333                *stat = 0;
1334                return 0;
1335        }
1336        xfs_trans_agbtree_delta(cur->bc_tp, 1);
1337        nbp = xfs_btree_get_bufs(mp, cur->bc_tp, cur->bc_private.a.agno, nbno,
1338                0);
1339        new = XFS_BUF_TO_ALLOC_BLOCK(nbp);
1340        /*
1341         * Set the root data in the a.g. freespace structure.
1342         */
1343        {
1344                xfs_agf_t       *agf;   /* a.g. freespace header */
1345                xfs_agnumber_t  seqno;
1346
1347                agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
1348                agf->agf_roots[cur->bc_btnum] = cpu_to_be32(nbno);
1349                be32_add(&agf->agf_levels[cur->bc_btnum], 1);
1350                seqno = be32_to_cpu(agf->agf_seqno);
1351                mp->m_perag[seqno].pagf_levels[cur->bc_btnum]++;
1352                xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
1353                        XFS_AGF_ROOTS | XFS_AGF_LEVELS);
1354        }
1355        /*
1356         * At the previous root level there are now two blocks: the old
1357         * root, and the new block generated when it was split.
1358         * We don't know which one the cursor is pointing at, so we
1359         * set up variables "left" and "right" for each case.
1360         */
1361        lbp = cur->bc_bufs[cur->bc_nlevels - 1];
1362        left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1363#ifdef DEBUG
1364        if ((error = xfs_btree_check_sblock(cur, left, cur->bc_nlevels - 1, lbp)))
1365                return error;
1366#endif
1367        if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
1368                /*
1369                 * Our block is left, pick up the right block.
1370                 */
1371                lbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(lbp));
1372                rbno = be32_to_cpu(left->bb_rightsib);
1373                if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
1374                                cur->bc_private.a.agno, rbno, 0, &rbp,
1375                                XFS_ALLOC_BTREE_REF)))
1376                        return error;
1377                right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1378                if ((error = xfs_btree_check_sblock(cur, right,
1379                                cur->bc_nlevels - 1, rbp)))
1380                        return error;
1381                nptr = 1;
1382        } else {
1383                /*
1384                 * Our block is right, pick up the left block.
1385                 */
1386                rbp = lbp;
1387                right = left;
1388                rbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(rbp));
1389                lbno = be32_to_cpu(right->bb_leftsib);
1390                if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
1391                                cur->bc_private.a.agno, lbno, 0, &lbp,
1392                                XFS_ALLOC_BTREE_REF)))
1393                        return error;
1394                left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1395                if ((error = xfs_btree_check_sblock(cur, left,
1396                                cur->bc_nlevels - 1, lbp)))
1397                        return error;
1398                nptr = 2;
1399        }
1400        /*
1401         * Fill in the new block's btree header and log it.
1402         */
1403        new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1404        new->bb_level = cpu_to_be16(cur->bc_nlevels);
1405        new->bb_numrecs = cpu_to_be16(2);
1406        new->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1407        new->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1408        xfs_alloc_log_block(cur->bc_tp, nbp, XFS_BB_ALL_BITS);
1409        ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
1410        /*
1411         * Fill in the key data in the new root.
1412         */
1413        {
1414                xfs_alloc_key_t         *kp;    /* btree key pointer */
1415
1416                kp = XFS_ALLOC_KEY_ADDR(new, 1, cur);
1417                if (be16_to_cpu(left->bb_level) > 0) {
1418                        kp[0] = *XFS_ALLOC_KEY_ADDR(left, 1, cur);
1419                        kp[1] = *XFS_ALLOC_KEY_ADDR(right, 1, cur);
1420                } else {
1421                        xfs_alloc_rec_t *rp;    /* btree record pointer */
1422
1423                        rp = XFS_ALLOC_REC_ADDR(left, 1, cur);
1424                        kp[0].ar_startblock = rp->ar_startblock;
1425                        kp[0].ar_blockcount = rp->ar_blockcount;
1426                        rp = XFS_ALLOC_REC_ADDR(right, 1, cur);
1427                        kp[1].ar_startblock = rp->ar_startblock;
1428                        kp[1].ar_blockcount = rp->ar_blockcount;
1429                }
1430        }
1431        xfs_alloc_log_keys(cur, nbp, 1, 2);
1432        /*
1433         * Fill in the pointer data in the new root.
1434         */
1435        {
1436                xfs_alloc_ptr_t         *pp;    /* btree address pointer */
1437
1438                pp = XFS_ALLOC_PTR_ADDR(new, 1, cur);
1439                pp[0] = cpu_to_be32(lbno);
1440                pp[1] = cpu_to_be32(rbno);
1441        }
1442        xfs_alloc_log_ptrs(cur, nbp, 1, 2);
1443        /*
1444         * Fix up the cursor.
1445         */
1446        xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
1447        cur->bc_ptrs[cur->bc_nlevels] = nptr;
1448        cur->bc_nlevels++;
1449        *stat = 1;
1450        return 0;
1451}
1452
1453/*
1454 * Move 1 record right from cur/level if possible.
1455 * Update cur to reflect the new path.
1456 */
1457STATIC int                              /* error */
1458xfs_alloc_rshift(
1459        xfs_btree_cur_t         *cur,   /* btree cursor */
1460        int                     level,  /* level to shift record on */
1461        int                     *stat)  /* success/failure */
1462{
1463        int                     error;  /* error return value */
1464        int                     i;      /* loop index */
1465        xfs_alloc_key_t         key;    /* key value for leaf level upward */
1466        xfs_buf_t               *lbp;   /* buffer for left (current) block */
1467        xfs_alloc_block_t       *left;  /* left (current) btree block */
1468        xfs_buf_t               *rbp;   /* buffer for right neighbor block */
1469        xfs_alloc_block_t       *right; /* right neighbor btree block */
1470        xfs_alloc_key_t         *rkp;   /* key pointer for right block */
1471        xfs_btree_cur_t         *tcur;  /* temporary cursor */
1472
1473        /*
1474         * Set up variables for this block as "left".
1475         */
1476        lbp = cur->bc_bufs[level];
1477        left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1478#ifdef DEBUG
1479        if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1480                return error;
1481#endif
1482        /*
1483         * If we've got no right sibling then we can't shift an entry right.
1484         */
1485        if (be32_to_cpu(left->bb_rightsib) == NULLAGBLOCK) {
1486                *stat = 0;
1487                return 0;
1488        }
1489        /*
1490         * If the cursor entry is the one that would be moved, don't
1491         * do it... it's too complicated.
1492         */
1493        if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
1494                *stat = 0;
1495                return 0;
1496        }
1497        /*
1498         * Set up the right neighbor as "right".
1499         */
1500        if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1501                        cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib),
1502                        0, &rbp, XFS_ALLOC_BTREE_REF)))
1503                return error;
1504        right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1505        if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1506                return error;
1507        /*
1508         * If it's full, it can't take another entry.
1509         */
1510        if (be16_to_cpu(right->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
1511                *stat = 0;
1512                return 0;
1513        }
1514        /*
1515         * Make a hole at the start of the right neighbor block, then
1516         * copy the last left block entry to the hole.
1517         */
1518        if (level > 0) {
1519                xfs_alloc_key_t *lkp;   /* key pointer for left block */
1520                xfs_alloc_ptr_t *lpp;   /* address pointer for left block */
1521                xfs_alloc_ptr_t *rpp;   /* address pointer for right block */
1522
1523                lkp = XFS_ALLOC_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1524                lpp = XFS_ALLOC_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1525                rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
1526                rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
1527#ifdef DEBUG
1528                for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
1529                        if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
1530                                return error;
1531                }
1532#endif
1533                memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1534                memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1535#ifdef DEBUG
1536                if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level)))
1537                        return error;
1538#endif
1539                *rkp = *lkp;
1540                *rpp = *lpp;
1541                xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1542                xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1543                xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
1544        } else {
1545                xfs_alloc_rec_t *lrp;   /* record pointer for left block */
1546                xfs_alloc_rec_t *rrp;   /* record pointer for right block */
1547
1548                lrp = XFS_ALLOC_REC_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1549                rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
1550                memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1551                *rrp = *lrp;
1552                xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1553                key.ar_startblock = rrp->ar_startblock;
1554                key.ar_blockcount = rrp->ar_blockcount;
1555                rkp = &key;
1556                xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
1557        }
1558        /*
1559         * Decrement and log left's numrecs, bump and log right's numrecs.
1560         */
1561        be16_add(&left->bb_numrecs, -1);
1562        xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1563        be16_add(&right->bb_numrecs, 1);
1564        xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1565        /*
1566         * Using a temporary cursor, update the parent key values of the
1567         * block on the right.
1568         */
1569        if ((error = xfs_btree_dup_cursor(cur, &tcur)))
1570                return error;
1571        i = xfs_btree_lastrec(tcur, level);
1572        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1573        if ((error = xfs_alloc_increment(tcur, level, &i)) ||
1574            (error = xfs_alloc_updkey(tcur, rkp, level + 1)))
1575                goto error0;
1576        xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
1577        *stat = 1;
1578        return 0;
1579error0:
1580        xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
1581        return error;
1582}
1583
1584/*
1585 * Split cur/level block in half.
1586 * Return new block number and its first record (to be inserted into parent).
1587 */
1588STATIC int                              /* error */
1589xfs_alloc_split(
1590        xfs_btree_cur_t         *cur,   /* btree cursor */
1591        int                     level,  /* level to split */
1592        xfs_agblock_t           *bnop,  /* output: block number allocated */
1593        xfs_alloc_key_t         *keyp,  /* output: first key of new block */
1594        xfs_btree_cur_t         **curp, /* output: new cursor */
1595        int                     *stat)  /* success/failure */
1596{
1597        int                     error;  /* error return value */
1598        int                     i;      /* loop index/record number */
1599        xfs_agblock_t           lbno;   /* left (current) block number */
1600        xfs_buf_t               *lbp;   /* buffer for left block */
1601        xfs_alloc_block_t       *left;  /* left (current) btree block */
1602        xfs_agblock_t           rbno;   /* right (new) block number */
1603        xfs_buf_t               *rbp;   /* buffer for right block */
1604        xfs_alloc_block_t       *right; /* right (new) btree block */
1605
1606        /*
1607         * Allocate the new block from the freelist.
1608         * If we can't do it, we're toast.  Give up.
1609         */
1610        error = xfs_alloc_get_freelist(cur->bc_tp,
1611                                         cur->bc_private.a.agbp, &rbno, 1);
1612        if (error)
1613                return error;
1614        if (rbno == NULLAGBLOCK) {
1615                *stat = 0;
1616                return 0;
1617        }
1618        xfs_trans_agbtree_delta(cur->bc_tp, 1);
1619        rbp = xfs_btree_get_bufs(cur->bc_mp, cur->bc_tp, cur->bc_private.a.agno,
1620                rbno, 0);
1621        /*
1622         * Set up the new block as "right".
1623         */
1624        right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1625        /*
1626         * "Left" is the current (according to the cursor) block.
1627         */
1628        lbp = cur->bc_bufs[level];
1629        left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1630#ifdef DEBUG
1631        if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1632                return error;
1633#endif
1634        /*
1635         * Fill in the btree header for the new block.
1636         */
1637        right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1638        right->bb_level = left->bb_level;
1639        right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
1640        /*
1641         * Make sure that if there's an odd number of entries now, that
1642         * each new block will have the same number of entries.
1643         */
1644        if ((be16_to_cpu(left->bb_numrecs) & 1) &&
1645            cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
1646                be16_add(&right->bb_numrecs, 1);
1647        i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
1648        /*
1649         * For non-leaf blocks, copy keys and addresses over to the new block.
1650         */
1651        if (level > 0) {
1652                xfs_alloc_key_t *lkp;   /* left btree key pointer */
1653                xfs_alloc_ptr_t *lpp;   /* left btree address pointer */
1654                xfs_alloc_key_t *rkp;   /* right btree key pointer */
1655                xfs_alloc_ptr_t *rpp;   /* right btree address pointer */
1656
1657                lkp = XFS_ALLOC_KEY_ADDR(left, i, cur);
1658                lpp = XFS_ALLOC_PTR_ADDR(left, i, cur);
1659                rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
1660                rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
1661#ifdef DEBUG
1662                for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1663                        if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
1664                                return error;
1665                }
1666#endif
1667                memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1668                memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1669                xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1670                xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1671                *keyp = *rkp;
1672        }
1673        /*
1674         * For leaf blocks, copy records over to the new block.
1675         */
1676        else {
1677                xfs_alloc_rec_t *lrp;   /* left btree record pointer */
1678                xfs_alloc_rec_t *rrp;   /* right btree record pointer */
1679
1680                lrp = XFS_ALLOC_REC_ADDR(left, i, cur);
1681                rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
1682                memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1683                xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1684                keyp->ar_startblock = rrp->ar_startblock;
1685                keyp->ar_blockcount = rrp->ar_blockcount;
1686        }
1687        /*
1688         * Find the left block number by looking in the buffer.
1689         * Adjust numrecs, sibling pointers.
1690         */
1691        lbno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(lbp));
1692        be16_add(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
1693        right->bb_rightsib = left->bb_rightsib;
1694        left->bb_rightsib = cpu_to_be32(rbno);
1695        right->bb_leftsib = cpu_to_be32(lbno);
1696        xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_ALL_BITS);
1697        xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
1698        /*
1699         * If there's a block to the new block's right, make that block
1700         * point back to right instead of to left.
1701         */
1702        if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) {
1703                xfs_alloc_block_t       *rrblock;       /* rr btree block */
1704                xfs_buf_t               *rrbp;          /* buffer for rrblock */
1705
1706                if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1707                                cur->bc_private.a.agno, be32_to_cpu(right->bb_rightsib), 0,
1708                                &rrbp, XFS_ALLOC_BTREE_REF)))
1709                        return error;
1710                rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp);
1711                if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
1712                        return error;
1713                rrblock->bb_leftsib = cpu_to_be32(rbno);
1714                xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
1715        }
1716        /*
1717         * If the cursor is really in the right block, move it there.
1718         * If it's just pointing past the last entry in left, then we'll
1719         * insert there, so don't change anything in that case.
1720         */
1721        if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) {
1722                xfs_btree_setbuf(cur, level, rbp);
1723                cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs);
1724        }
1725        /*
1726         * If there are more levels, we'll need another cursor which refers to
1727         * the right block, no matter where this cursor was.
1728         */
1729        if (level + 1 < cur->bc_nlevels) {
1730                if ((error = xfs_btree_dup_cursor(cur, curp)))
1731                        return error;
1732                (*curp)->bc_ptrs[level + 1]++;
1733        }
1734        *bnop = rbno;
1735        *stat = 1;
1736        return 0;
1737}
1738
1739/*
1740 * Update keys at all levels from here to the root along the cursor's path.
1741 */
1742STATIC int                              /* error */
1743xfs_alloc_updkey(
1744        xfs_btree_cur_t         *cur,   /* btree cursor */
1745        xfs_alloc_key_t         *keyp,  /* new key value to update to */
1746        int                     level)  /* starting level for update */
1747{
1748        int                     ptr;    /* index of key in block */
1749
1750        /*
1751         * Go up the tree from this level toward the root.
1752         * At each level, update the key value to the value input.
1753         * Stop when we reach a level where the cursor isn't pointing
1754         * at the first entry in the block.
1755         */
1756        for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1757                xfs_alloc_block_t       *block; /* btree block */
1758                xfs_buf_t               *bp;    /* buffer for block */
1759#ifdef DEBUG
1760                int                     error;  /* error return value */
1761#endif
1762                xfs_alloc_key_t         *kp;    /* ptr to btree block keys */
1763
1764                bp = cur->bc_bufs[level];
1765                block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1766#ifdef DEBUG
1767                if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1768                        return error;
1769#endif
1770                ptr = cur->bc_ptrs[level];
1771                kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur);
1772                *kp = *keyp;
1773                xfs_alloc_log_keys(cur, bp, ptr, ptr);
1774        }
1775        return 0;
1776}
1777
1778/*
1779 * Externally visible routines.
1780 */
1781
1782/*
1783 * Decrement cursor by one record at the level.
1784 * For nonzero levels the leaf-ward information is untouched.
1785 */
1786int                                     /* error */
1787xfs_alloc_decrement(
1788        xfs_btree_cur_t         *cur,   /* btree cursor */
1789        int                     level,  /* level in btree, 0 is leaf */
1790        int                     *stat)  /* success/failure */
1791{
1792        xfs_alloc_block_t       *block; /* btree block */
1793        int                     error;  /* error return value */
1794        int                     lev;    /* btree level */
1795
1796        ASSERT(level < cur->bc_nlevels);
1797        /*
1798         * Read-ahead to the left at this level.
1799         */
1800        xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1801        /*
1802         * Decrement the ptr at this level.  If we're still in the block
1803         * then we're done.
1804         */
1805        if (--cur->bc_ptrs[level] > 0) {
1806                *stat = 1;
1807                return 0;
1808        }
1809        /*
1810         * Get a pointer to the btree block.
1811         */
1812        block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[level]);
1813#ifdef DEBUG
1814        if ((error = xfs_btree_check_sblock(cur, block, level,
1815                        cur->bc_bufs[level])))
1816                return error;
1817#endif
1818        /*
1819         * If we just went off the left edge of the tree, return failure.
1820         */
1821        if (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK) {
1822                *stat = 0;
1823                return 0;
1824        }
1825        /*
1826         * March up the tree decrementing pointers.
1827         * Stop when we don't go off the left edge of a block.
1828         */
1829        for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1830                if (--cur->bc_ptrs[lev] > 0)
1831                        break;
1832                /*
1833                 * Read-ahead the left block, we're going to read it
1834                 * in the next loop.
1835                 */
1836                xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1837        }
1838        /*
1839         * If we went off the root then we are seriously confused.
1840         */
1841        ASSERT(lev < cur->bc_nlevels);
1842        /*
1843         * Now walk back down the tree, fixing up the cursor's buffer
1844         * pointers and key numbers.
1845         */
1846        for (block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
1847                xfs_agblock_t   agbno;  /* block number of btree block */
1848                xfs_buf_t       *bp;    /* buffer pointer for block */
1849
1850                agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
1851                if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1852                                cur->bc_private.a.agno, agbno, 0, &bp,
1853                                XFS_ALLOC_BTREE_REF)))
1854                        return error;
1855                lev--;
1856                xfs_btree_setbuf(cur, lev, bp);
1857                block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1858                if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1859                        return error;
1860                cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
1861        }
1862        *stat = 1;
1863        return 0;
1864}
1865
1866/*
1867 * Delete the record pointed to by cur.
1868 * The cursor refers to the place where the record was (could be inserted)
1869 * when the operation returns.
1870 */
1871int                                     /* error */
1872xfs_alloc_delete(
1873        xfs_btree_cur_t *cur,           /* btree cursor */
1874        int             *stat)          /* success/failure */
1875{
1876        int             error;          /* error return value */
1877        int             i;              /* result code */
1878        int             level;          /* btree level */
1879
1880        /*
1881         * Go up the tree, starting at leaf level.
1882         * If 2 is returned then a join was done; go to the next level.
1883         * Otherwise we are done.
1884         */
1885        for (level = 0, i = 2; i == 2; level++) {
1886                if ((error = xfs_alloc_delrec(cur, level, &i)))
1887                        return error;
1888        }
1889        if (i == 0) {
1890                for (level = 1; level < cur->bc_nlevels; level++) {
1891                        if (cur->bc_ptrs[level] == 0) {
1892                                if ((error = xfs_alloc_decrement(cur, level, &i)))
1893                                        return error;
1894                                break;
1895                        }
1896                }
1897        }
1898        *stat = i;
1899        return 0;
1900}
1901
1902/*
1903 * Get the data from the pointed-to record.
1904 */
1905int                                     /* error */
1906xfs_alloc_get_rec(
1907        xfs_btree_cur_t         *cur,   /* btree cursor */
1908        xfs_agblock_t           *bno,   /* output: starting block of extent */
1909        xfs_extlen_t            *len,   /* output: length of extent */
1910        int                     *stat)  /* output: success/failure */
1911{
1912        xfs_alloc_block_t       *block; /* btree block */
1913#ifdef DEBUG
1914        int                     error;  /* error return value */
1915#endif
1916        int                     ptr;    /* record number */
1917
1918        ptr = cur->bc_ptrs[0];
1919        block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]);
1920#ifdef DEBUG
1921        if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0])))
1922                return error;
1923#endif
1924        /*
1925         * Off the right end or left end, return failure.
1926         */
1927        if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
1928                *stat = 0;
1929                return 0;
1930        }
1931        /*
1932         * Point to the record and extract its data.
1933         */
1934        {
1935                xfs_alloc_rec_t         *rec;   /* record data */
1936
1937                rec = XFS_ALLOC_REC_ADDR(block, ptr, cur);
1938                *bno = be32_to_cpu(rec->ar_startblock);
1939                *len = be32_to_cpu(rec->ar_blockcount);
1940        }
1941        *stat = 1;
1942        return 0;
1943}
1944
1945/*
1946 * Increment cursor by one record at the level.
1947 * For nonzero levels the leaf-ward information is untouched.
1948 */
1949int                                     /* error */
1950xfs_alloc_increment(
1951        xfs_btree_cur_t         *cur,   /* btree cursor */
1952        int                     level,  /* level in btree, 0 is leaf */
1953        int                     *stat)  /* success/failure */
1954{
1955        xfs_alloc_block_t       *block; /* btree block */
1956        xfs_buf_t               *bp;    /* tree block buffer */
1957        int                     error;  /* error return value */
1958        int                     lev;    /* btree level */
1959
1960        ASSERT(level < cur->bc_nlevels);
1961        /*
1962         * Read-ahead to the right at this level.
1963         */
1964        xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1965        /*
1966         * Get a pointer to the btree block.
1967         */
1968        bp = cur->bc_bufs[level];
1969        block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1970#ifdef DEBUG
1971        if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1972                return error;
1973#endif
1974        /*
1975         * Increment the ptr at this level.  If we're still in the block
1976         * then we're done.
1977         */
1978        if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
1979                *stat = 1;
1980                return 0;
1981        }
1982        /*
1983         * If we just went off the right edge of the tree, return failure.
1984         */
1985        if (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK) {
1986                *stat = 0;
1987                return 0;
1988        }
1989        /*
1990         * March up the tree incrementing pointers.
1991         * Stop when we don't go off the right edge of a block.
1992         */
1993        for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1994                bp = cur->bc_bufs[lev];
1995                block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1996#ifdef DEBUG
1997                if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1998                        return error;
1999#endif
2000                if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
2001                        break;
2002                /*
2003                 * Read-ahead the right block, we're going to read it
2004                 * in the next loop.
2005                 */
2006                xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
2007        }
2008        /*
2009         * If we went off the root then we are seriously confused.
2010         */
2011        ASSERT(lev < cur->bc_nlevels);
2012        /*
2013         * Now walk back down the tree, fixing up the cursor's buffer
2014         * pointers and key numbers.
2015         */
2016        for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_ALLOC_BLOCK(bp);
2017             lev > level; ) {
2018                xfs_agblock_t   agbno;  /* block number of btree block */
2019
2020                agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
2021                if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
2022                                cur->bc_private.a.agno, agbno, 0, &bp,
2023                                XFS_ALLOC_BTREE_REF)))
2024                        return error;
2025                lev--;
2026                xfs_btree_setbuf(cur, lev, bp);
2027                block = XFS_BUF_TO_ALLOC_BLOCK(bp);
2028                if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
2029                        return error;
2030                cur->bc_ptrs[lev] = 1;
2031        }
2032        *stat = 1;
2033        return 0;
2034}
2035
2036/*
2037 * Insert the current record at the point referenced by cur.
2038 * The cursor may be inconsistent on return if splits have been done.
2039 */
2040int                                     /* error */
2041xfs_alloc_insert(
2042        xfs_btree_cur_t *cur,           /* btree cursor */
2043        int             *stat)          /* success/failure */
2044{
2045        int             error;          /* error return value */
2046        int             i;              /* result value, 0 for failure */
2047        int             level;          /* current level number in btree */
2048        xfs_agblock_t   nbno;           /* new block number (split result) */
2049        xfs_btree_cur_t *ncur;          /* new cursor (split result) */
2050        xfs_alloc_rec_t nrec;           /* record being inserted this level */
2051        xfs_btree_cur_t *pcur;          /* previous level's cursor */
2052
2053        level = 0;
2054        nbno = NULLAGBLOCK;
2055        nrec.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
2056        nrec.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
2057        ncur = NULL;
2058        pcur = cur;
2059        /*
2060         * Loop going up the tree, starting at the leaf level.
2061         * Stop when we don't get a split block, that must mean that
2062         * the insert is finished with this level.
2063         */
2064        do {
2065                /*
2066                 * Insert nrec/nbno into this level of the tree.
2067                 * Note if we fail, nbno will be null.
2068                 */
2069                if ((error = xfs_alloc_insrec(pcur, level++, &nbno, &nrec, &ncur,
2070                                &i))) {
2071                        if (pcur != cur)
2072                                xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
2073                        return error;
2074                }
2075                /*
2076                 * See if the cursor we just used is trash.
2077                 * Can't trash the caller's cursor, but otherwise we should
2078                 * if ncur is a new cursor or we're about to be done.
2079                 */
2080                if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
2081                        cur->bc_nlevels = pcur->bc_nlevels;
2082                        xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
2083                }
2084                /*
2085                 * If we got a new cursor, switch to it.
2086                 */
2087                if (ncur) {
2088                        pcur = ncur;
2089                        ncur = NULL;
2090                }
2091        } while (nbno != NULLAGBLOCK);
2092        *stat = i;
2093        return 0;
2094}
2095
2096/*
2097 * Lookup the record equal to [bno, len] in the btree given by cur.
2098 */
2099int                                     /* error */
2100xfs_alloc_lookup_eq(
2101        xfs_btree_cur_t *cur,           /* btree cursor */
2102        xfs_agblock_t   bno,            /* starting block of extent */
2103        xfs_extlen_t    len,            /* length of extent */
2104        int             *stat)          /* success/failure */
2105{
2106        cur->bc_rec.a.ar_startblock = bno;
2107        cur->bc_rec.a.ar_blockcount = len;
2108        return xfs_alloc_lookup(cur, XFS_LOOKUP_EQ, stat);
2109}
2110
2111/*
2112 * Lookup the first record greater than or equal to [bno, len]
2113 * in the btree given by cur.
2114 */
2115int                                     /* error */
2116xfs_alloc_lookup_ge(
2117        xfs_btree_cur_t *cur,           /* btree cursor */
2118        xfs_agblock_t   bno,            /* starting block of extent */
2119        xfs_extlen_t    len,            /* length of extent */
2120        int             *stat)          /* success/failure */
2121{
2122        cur->bc_rec.a.ar_startblock = bno;
2123        cur->bc_rec.a.ar_blockcount = len;
2124        return xfs_alloc_lookup(cur, XFS_LOOKUP_GE, stat);
2125}
2126
2127/*
2128 * Lookup the first record less than or equal to [bno, len]
2129 * in the btree given by cur.
2130 */
2131int                                     /* error */
2132xfs_alloc_lookup_le(
2133        xfs_btree_cur_t *cur,           /* btree cursor */
2134        xfs_agblock_t   bno,            /* starting block of extent */
2135        xfs_extlen_t    len,            /* length of extent */
2136        int             *stat)          /* success/failure */
2137{
2138        cur->bc_rec.a.ar_startblock = bno;
2139        cur->bc_rec.a.ar_blockcount = len;
2140        return xfs_alloc_lookup(cur, XFS_LOOKUP_LE, stat);
2141}
2142
2143/*
2144 * Update the record referred to by cur, to the value given by [bno, len].
2145 * This either works (return 0) or gets an EFSCORRUPTED error.
2146 */
2147int                                     /* error */
2148xfs_alloc_update(
2149        xfs_btree_cur_t         *cur,   /* btree cursor */
2150        xfs_agblock_t           bno,    /* starting block of extent */
2151        xfs_extlen_t            len)    /* length of extent */
2152{
2153        xfs_alloc_block_t       *block; /* btree block to update */
2154        int                     error;  /* error return value */
2155        int                     ptr;    /* current record number (updating) */
2156
2157        ASSERT(len > 0);
2158        /*
2159         * Pick up the a.g. freelist struct and the current block.
2160         */
2161        block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]);
2162#ifdef DEBUG
2163        if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0])))
2164                return error;
2165#endif
2166        /*
2167         * Get the address of the rec to be updated.
2168         */
2169        ptr = cur->bc_ptrs[0];
2170        {
2171                xfs_alloc_rec_t         *rp;    /* pointer to updated record */
2172
2173                rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
2174                /*
2175                 * Fill in the new contents and log them.
2176                 */
2177                rp->ar_startblock = cpu_to_be32(bno);
2178                rp->ar_blockcount = cpu_to_be32(len);
2179                xfs_alloc_log_recs(cur, cur->bc_bufs[0], ptr, ptr);
2180        }
2181        /*
2182         * If it's the by-size btree and it's the last leaf block and
2183         * it's the last record... then update the size of the longest
2184         * extent in the a.g., which we cache in the a.g. freelist header.
2185         */
2186        if (cur->bc_btnum == XFS_BTNUM_CNT &&
2187            be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
2188            ptr == be16_to_cpu(block->bb_numrecs)) {
2189                xfs_agf_t       *agf;   /* a.g. freespace header */
2190                xfs_agnumber_t  seqno;
2191
2192                agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
2193                seqno = be32_to_cpu(agf->agf_seqno);
2194                cur->bc_mp->m_perag[seqno].pagf_longest = len;
2195                agf->agf_longest = cpu_to_be32(len);
2196                xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
2197                        XFS_AGF_LONGEST);
2198        }
2199        /*
2200         * Updating first record in leaf. Pass new key value up to our parent.
2201         */
2202        if (ptr == 1) {
2203                xfs_alloc_key_t key;    /* key containing [bno, len] */
2204
2205                key.ar_startblock = cpu_to_be32(bno);
2206                key.ar_blockcount = cpu_to_be32(len);
2207                if ((error = xfs_alloc_updkey(cur, &key, 1)))
2208                        return error;
2209        }
2210        return 0;
2211}
2212