linux/fs/xfs/xfs_btree.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_types.h"
  21#include "xfs_bit.h"
  22#include "xfs_log.h"
  23#include "xfs_trans.h"
  24#include "xfs_sb.h"
  25#include "xfs_ag.h"
  26#include "xfs_mount.h"
  27#include "xfs_bmap_btree.h"
  28#include "xfs_alloc_btree.h"
  29#include "xfs_ialloc_btree.h"
  30#include "xfs_dinode.h"
  31#include "xfs_inode.h"
  32#include "xfs_inode_item.h"
  33#include "xfs_btree.h"
  34#include "xfs_error.h"
  35#include "xfs_trace.h"
  36
  37/*
  38 * Cursor allocation zone.
  39 */
  40kmem_zone_t     *xfs_btree_cur_zone;
  41
  42/*
  43 * Btree magic numbers.
  44 */
  45const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
  46        XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
  47};
  48
  49
  50STATIC int                              /* error (0 or EFSCORRUPTED) */
  51xfs_btree_check_lblock(
  52        struct xfs_btree_cur    *cur,   /* btree cursor */
  53        struct xfs_btree_block  *block, /* btree long form block pointer */
  54        int                     level,  /* level of the btree block */
  55        struct xfs_buf          *bp)    /* buffer for block, if any */
  56{
  57        int                     lblock_ok; /* block passes checks */
  58        struct xfs_mount        *mp;    /* file system mount point */
  59
  60        mp = cur->bc_mp;
  61        lblock_ok =
  62                be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
  63                be16_to_cpu(block->bb_level) == level &&
  64                be16_to_cpu(block->bb_numrecs) <=
  65                        cur->bc_ops->get_maxrecs(cur, level) &&
  66                block->bb_u.l.bb_leftsib &&
  67                (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
  68                 XFS_FSB_SANITY_CHECK(mp,
  69                        be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
  70                block->bb_u.l.bb_rightsib &&
  71                (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
  72                 XFS_FSB_SANITY_CHECK(mp,
  73                        be64_to_cpu(block->bb_u.l.bb_rightsib)));
  74        if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
  75                        XFS_ERRTAG_BTREE_CHECK_LBLOCK,
  76                        XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
  77                if (bp)
  78                        trace_xfs_btree_corrupt(bp, _RET_IP_);
  79                XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
  80                                 mp);
  81                return XFS_ERROR(EFSCORRUPTED);
  82        }
  83        return 0;
  84}
  85
  86STATIC int                              /* error (0 or EFSCORRUPTED) */
  87xfs_btree_check_sblock(
  88        struct xfs_btree_cur    *cur,   /* btree cursor */
  89        struct xfs_btree_block  *block, /* btree short form block pointer */
  90        int                     level,  /* level of the btree block */
  91        struct xfs_buf          *bp)    /* buffer containing block */
  92{
  93        struct xfs_buf          *agbp;  /* buffer for ag. freespace struct */
  94        struct xfs_agf          *agf;   /* ag. freespace structure */
  95        xfs_agblock_t           agflen; /* native ag. freespace length */
  96        int                     sblock_ok; /* block passes checks */
  97
  98        agbp = cur->bc_private.a.agbp;
  99        agf = XFS_BUF_TO_AGF(agbp);
 100        agflen = be32_to_cpu(agf->agf_length);
 101        sblock_ok =
 102                be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
 103                be16_to_cpu(block->bb_level) == level &&
 104                be16_to_cpu(block->bb_numrecs) <=
 105                        cur->bc_ops->get_maxrecs(cur, level) &&
 106                (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
 107                 be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
 108                block->bb_u.s.bb_leftsib &&
 109                (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
 110                 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
 111                block->bb_u.s.bb_rightsib;
 112        if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
 113                        XFS_ERRTAG_BTREE_CHECK_SBLOCK,
 114                        XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
 115                if (bp)
 116                        trace_xfs_btree_corrupt(bp, _RET_IP_);
 117                XFS_CORRUPTION_ERROR("xfs_btree_check_sblock",
 118                        XFS_ERRLEVEL_LOW, cur->bc_mp, block);
 119                return XFS_ERROR(EFSCORRUPTED);
 120        }
 121        return 0;
 122}
 123
 124/*
 125 * Debug routine: check that block header is ok.
 126 */
 127int
 128xfs_btree_check_block(
 129        struct xfs_btree_cur    *cur,   /* btree cursor */
 130        struct xfs_btree_block  *block, /* generic btree block pointer */
 131        int                     level,  /* level of the btree block */
 132        struct xfs_buf          *bp)    /* buffer containing block, if any */
 133{
 134        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
 135                return xfs_btree_check_lblock(cur, block, level, bp);
 136        else
 137                return xfs_btree_check_sblock(cur, block, level, bp);
 138}
 139
 140/*
 141 * Check that (long) pointer is ok.
 142 */
 143int                                     /* error (0 or EFSCORRUPTED) */
 144xfs_btree_check_lptr(
 145        struct xfs_btree_cur    *cur,   /* btree cursor */
 146        xfs_dfsbno_t            bno,    /* btree block disk address */
 147        int                     level)  /* btree block level */
 148{
 149        XFS_WANT_CORRUPTED_RETURN(
 150                level > 0 &&
 151                bno != NULLDFSBNO &&
 152                XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
 153        return 0;
 154}
 155
 156#ifdef DEBUG
 157/*
 158 * Check that (short) pointer is ok.
 159 */
 160STATIC int                              /* error (0 or EFSCORRUPTED) */
 161xfs_btree_check_sptr(
 162        struct xfs_btree_cur    *cur,   /* btree cursor */
 163        xfs_agblock_t           bno,    /* btree block disk address */
 164        int                     level)  /* btree block level */
 165{
 166        xfs_agblock_t           agblocks = cur->bc_mp->m_sb.sb_agblocks;
 167
 168        XFS_WANT_CORRUPTED_RETURN(
 169                level > 0 &&
 170                bno != NULLAGBLOCK &&
 171                bno != 0 &&
 172                bno < agblocks);
 173        return 0;
 174}
 175
 176/*
 177 * Check that block ptr is ok.
 178 */
 179STATIC int                              /* error (0 or EFSCORRUPTED) */
 180xfs_btree_check_ptr(
 181        struct xfs_btree_cur    *cur,   /* btree cursor */
 182        union xfs_btree_ptr     *ptr,   /* btree block disk address */
 183        int                     index,  /* offset from ptr to check */
 184        int                     level)  /* btree block level */
 185{
 186        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
 187                return xfs_btree_check_lptr(cur,
 188                                be64_to_cpu((&ptr->l)[index]), level);
 189        } else {
 190                return xfs_btree_check_sptr(cur,
 191                                be32_to_cpu((&ptr->s)[index]), level);
 192        }
 193}
 194#endif
 195
 196/*
 197 * Delete the btree cursor.
 198 */
 199void
 200xfs_btree_del_cursor(
 201        xfs_btree_cur_t *cur,           /* btree cursor */
 202        int             error)          /* del because of error */
 203{
 204        int             i;              /* btree level */
 205
 206        /*
 207         * Clear the buffer pointers, and release the buffers.
 208         * If we're doing this in the face of an error, we
 209         * need to make sure to inspect all of the entries
 210         * in the bc_bufs array for buffers to be unlocked.
 211         * This is because some of the btree code works from
 212         * level n down to 0, and if we get an error along
 213         * the way we won't have initialized all the entries
 214         * down to 0.
 215         */
 216        for (i = 0; i < cur->bc_nlevels; i++) {
 217                if (cur->bc_bufs[i])
 218                        xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
 219                else if (!error)
 220                        break;
 221        }
 222        /*
 223         * Can't free a bmap cursor without having dealt with the
 224         * allocated indirect blocks' accounting.
 225         */
 226        ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
 227               cur->bc_private.b.allocated == 0);
 228        /*
 229         * Free the cursor.
 230         */
 231        kmem_zone_free(xfs_btree_cur_zone, cur);
 232}
 233
 234/*
 235 * Duplicate the btree cursor.
 236 * Allocate a new one, copy the record, re-get the buffers.
 237 */
 238int                                     /* error */
 239xfs_btree_dup_cursor(
 240        xfs_btree_cur_t *cur,           /* input cursor */
 241        xfs_btree_cur_t **ncur)         /* output cursor */
 242{
 243        xfs_buf_t       *bp;            /* btree block's buffer pointer */
 244        int             error;          /* error return value */
 245        int             i;              /* level number of btree block */
 246        xfs_mount_t     *mp;            /* mount structure for filesystem */
 247        xfs_btree_cur_t *new;           /* new cursor value */
 248        xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
 249
 250        tp = cur->bc_tp;
 251        mp = cur->bc_mp;
 252
 253        /*
 254         * Allocate a new cursor like the old one.
 255         */
 256        new = cur->bc_ops->dup_cursor(cur);
 257
 258        /*
 259         * Copy the record currently in the cursor.
 260         */
 261        new->bc_rec = cur->bc_rec;
 262
 263        /*
 264         * For each level current, re-get the buffer and copy the ptr value.
 265         */
 266        for (i = 0; i < new->bc_nlevels; i++) {
 267                new->bc_ptrs[i] = cur->bc_ptrs[i];
 268                new->bc_ra[i] = cur->bc_ra[i];
 269                bp = cur->bc_bufs[i];
 270                if (bp) {
 271                        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
 272                                                   XFS_BUF_ADDR(bp), mp->m_bsize,
 273                                                   0, &bp,
 274                                                   cur->bc_ops->buf_ops);
 275                        if (error) {
 276                                xfs_btree_del_cursor(new, error);
 277                                *ncur = NULL;
 278                                return error;
 279                        }
 280                        new->bc_bufs[i] = bp;
 281                        ASSERT(!xfs_buf_geterror(bp));
 282                } else
 283                        new->bc_bufs[i] = NULL;
 284        }
 285        *ncur = new;
 286        return 0;
 287}
 288
 289/*
 290 * XFS btree block layout and addressing:
 291 *
 292 * There are two types of blocks in the btree: leaf and non-leaf blocks.
 293 *
 294 * The leaf record start with a header then followed by records containing
 295 * the values.  A non-leaf block also starts with the same header, and
 296 * then first contains lookup keys followed by an equal number of pointers
 297 * to the btree blocks at the previous level.
 298 *
 299 *              +--------+-------+-------+-------+-------+-------+-------+
 300 * Leaf:        | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
 301 *              +--------+-------+-------+-------+-------+-------+-------+
 302 *
 303 *              +--------+-------+-------+-------+-------+-------+-------+
 304 * Non-Leaf:    | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
 305 *              +--------+-------+-------+-------+-------+-------+-------+
 306 *
 307 * The header is called struct xfs_btree_block for reasons better left unknown
 308 * and comes in different versions for short (32bit) and long (64bit) block
 309 * pointers.  The record and key structures are defined by the btree instances
 310 * and opaque to the btree core.  The block pointers are simple disk endian
 311 * integers, available in a short (32bit) and long (64bit) variant.
 312 *
 313 * The helpers below calculate the offset of a given record, key or pointer
 314 * into a btree block (xfs_btree_*_offset) or return a pointer to the given
 315 * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
 316 * inside the btree block is done using indices starting at one, not zero!
 317 */
 318
 319/*
 320 * Return size of the btree block header for this btree instance.
 321 */
 322static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
 323{
 324        return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
 325                XFS_BTREE_LBLOCK_LEN :
 326                XFS_BTREE_SBLOCK_LEN;
 327}
 328
 329/*
 330 * Return size of btree block pointers for this btree instance.
 331 */
 332static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
 333{
 334        return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
 335                sizeof(__be64) : sizeof(__be32);
 336}
 337
 338/*
 339 * Calculate offset of the n-th record in a btree block.
 340 */
 341STATIC size_t
 342xfs_btree_rec_offset(
 343        struct xfs_btree_cur    *cur,
 344        int                     n)
 345{
 346        return xfs_btree_block_len(cur) +
 347                (n - 1) * cur->bc_ops->rec_len;
 348}
 349
 350/*
 351 * Calculate offset of the n-th key in a btree block.
 352 */
 353STATIC size_t
 354xfs_btree_key_offset(
 355        struct xfs_btree_cur    *cur,
 356        int                     n)
 357{
 358        return xfs_btree_block_len(cur) +
 359                (n - 1) * cur->bc_ops->key_len;
 360}
 361
 362/*
 363 * Calculate offset of the n-th block pointer in a btree block.
 364 */
 365STATIC size_t
 366xfs_btree_ptr_offset(
 367        struct xfs_btree_cur    *cur,
 368        int                     n,
 369        int                     level)
 370{
 371        return xfs_btree_block_len(cur) +
 372                cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
 373                (n - 1) * xfs_btree_ptr_len(cur);
 374}
 375
 376/*
 377 * Return a pointer to the n-th record in the btree block.
 378 */
 379STATIC union xfs_btree_rec *
 380xfs_btree_rec_addr(
 381        struct xfs_btree_cur    *cur,
 382        int                     n,
 383        struct xfs_btree_block  *block)
 384{
 385        return (union xfs_btree_rec *)
 386                ((char *)block + xfs_btree_rec_offset(cur, n));
 387}
 388
 389/*
 390 * Return a pointer to the n-th key in the btree block.
 391 */
 392STATIC union xfs_btree_key *
 393xfs_btree_key_addr(
 394        struct xfs_btree_cur    *cur,
 395        int                     n,
 396        struct xfs_btree_block  *block)
 397{
 398        return (union xfs_btree_key *)
 399                ((char *)block + xfs_btree_key_offset(cur, n));
 400}
 401
 402/*
 403 * Return a pointer to the n-th block pointer in the btree block.
 404 */
 405STATIC union xfs_btree_ptr *
 406xfs_btree_ptr_addr(
 407        struct xfs_btree_cur    *cur,
 408        int                     n,
 409        struct xfs_btree_block  *block)
 410{
 411        int                     level = xfs_btree_get_level(block);
 412
 413        ASSERT(block->bb_level != 0);
 414
 415        return (union xfs_btree_ptr *)
 416                ((char *)block + xfs_btree_ptr_offset(cur, n, level));
 417}
 418
 419/*
 420 * Get a the root block which is stored in the inode.
 421 *
 422 * For now this btree implementation assumes the btree root is always
 423 * stored in the if_broot field of an inode fork.
 424 */
 425STATIC struct xfs_btree_block *
 426xfs_btree_get_iroot(
 427       struct xfs_btree_cur    *cur)
 428{
 429       struct xfs_ifork        *ifp;
 430
 431       ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
 432       return (struct xfs_btree_block *)ifp->if_broot;
 433}
 434
 435/*
 436 * Retrieve the block pointer from the cursor at the given level.
 437 * This may be an inode btree root or from a buffer.
 438 */
 439STATIC struct xfs_btree_block *         /* generic btree block pointer */
 440xfs_btree_get_block(
 441        struct xfs_btree_cur    *cur,   /* btree cursor */
 442        int                     level,  /* level in btree */
 443        struct xfs_buf          **bpp)  /* buffer containing the block */
 444{
 445        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
 446            (level == cur->bc_nlevels - 1)) {
 447                *bpp = NULL;
 448                return xfs_btree_get_iroot(cur);
 449        }
 450
 451        *bpp = cur->bc_bufs[level];
 452        return XFS_BUF_TO_BLOCK(*bpp);
 453}
 454
 455/*
 456 * Get a buffer for the block, return it with no data read.
 457 * Long-form addressing.
 458 */
 459xfs_buf_t *                             /* buffer for fsbno */
 460xfs_btree_get_bufl(
 461        xfs_mount_t     *mp,            /* file system mount point */
 462        xfs_trans_t     *tp,            /* transaction pointer */
 463        xfs_fsblock_t   fsbno,          /* file system block number */
 464        uint            lock)           /* lock flags for get_buf */
 465{
 466        xfs_buf_t       *bp;            /* buffer pointer (return value) */
 467        xfs_daddr_t             d;              /* real disk block address */
 468
 469        ASSERT(fsbno != NULLFSBLOCK);
 470        d = XFS_FSB_TO_DADDR(mp, fsbno);
 471        bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
 472        ASSERT(!xfs_buf_geterror(bp));
 473        return bp;
 474}
 475
 476/*
 477 * Get a buffer for the block, return it with no data read.
 478 * Short-form addressing.
 479 */
 480xfs_buf_t *                             /* buffer for agno/agbno */
 481xfs_btree_get_bufs(
 482        xfs_mount_t     *mp,            /* file system mount point */
 483        xfs_trans_t     *tp,            /* transaction pointer */
 484        xfs_agnumber_t  agno,           /* allocation group number */
 485        xfs_agblock_t   agbno,          /* allocation group block number */
 486        uint            lock)           /* lock flags for get_buf */
 487{
 488        xfs_buf_t       *bp;            /* buffer pointer (return value) */
 489        xfs_daddr_t             d;              /* real disk block address */
 490
 491        ASSERT(agno != NULLAGNUMBER);
 492        ASSERT(agbno != NULLAGBLOCK);
 493        d = XFS_AGB_TO_DADDR(mp, agno, agbno);
 494        bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
 495        ASSERT(!xfs_buf_geterror(bp));
 496        return bp;
 497}
 498
 499/*
 500 * Check for the cursor referring to the last block at the given level.
 501 */
 502int                                     /* 1=is last block, 0=not last block */
 503xfs_btree_islastblock(
 504        xfs_btree_cur_t         *cur,   /* btree cursor */
 505        int                     level)  /* level to check */
 506{
 507        struct xfs_btree_block  *block; /* generic btree block pointer */
 508        xfs_buf_t               *bp;    /* buffer containing block */
 509
 510        block = xfs_btree_get_block(cur, level, &bp);
 511        xfs_btree_check_block(cur, block, level, bp);
 512        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
 513                return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO);
 514        else
 515                return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
 516}
 517
 518/*
 519 * Change the cursor to point to the first record at the given level.
 520 * Other levels are unaffected.
 521 */
 522STATIC int                              /* success=1, failure=0 */
 523xfs_btree_firstrec(
 524        xfs_btree_cur_t         *cur,   /* btree cursor */
 525        int                     level)  /* level to change */
 526{
 527        struct xfs_btree_block  *block; /* generic btree block pointer */
 528        xfs_buf_t               *bp;    /* buffer containing block */
 529
 530        /*
 531         * Get the block pointer for this level.
 532         */
 533        block = xfs_btree_get_block(cur, level, &bp);
 534        xfs_btree_check_block(cur, block, level, bp);
 535        /*
 536         * It's empty, there is no such record.
 537         */
 538        if (!block->bb_numrecs)
 539                return 0;
 540        /*
 541         * Set the ptr value to 1, that's the first record/key.
 542         */
 543        cur->bc_ptrs[level] = 1;
 544        return 1;
 545}
 546
 547/*
 548 * Change the cursor to point to the last record in the current block
 549 * at the given level.  Other levels are unaffected.
 550 */
 551STATIC int                              /* success=1, failure=0 */
 552xfs_btree_lastrec(
 553        xfs_btree_cur_t         *cur,   /* btree cursor */
 554        int                     level)  /* level to change */
 555{
 556        struct xfs_btree_block  *block; /* generic btree block pointer */
 557        xfs_buf_t               *bp;    /* buffer containing block */
 558
 559        /*
 560         * Get the block pointer for this level.
 561         */
 562        block = xfs_btree_get_block(cur, level, &bp);
 563        xfs_btree_check_block(cur, block, level, bp);
 564        /*
 565         * It's empty, there is no such record.
 566         */
 567        if (!block->bb_numrecs)
 568                return 0;
 569        /*
 570         * Set the ptr value to numrecs, that's the last record/key.
 571         */
 572        cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
 573        return 1;
 574}
 575
 576/*
 577 * Compute first and last byte offsets for the fields given.
 578 * Interprets the offsets table, which contains struct field offsets.
 579 */
 580void
 581xfs_btree_offsets(
 582        __int64_t       fields,         /* bitmask of fields */
 583        const short     *offsets,       /* table of field offsets */
 584        int             nbits,          /* number of bits to inspect */
 585        int             *first,         /* output: first byte offset */
 586        int             *last)          /* output: last byte offset */
 587{
 588        int             i;              /* current bit number */
 589        __int64_t       imask;          /* mask for current bit number */
 590
 591        ASSERT(fields != 0);
 592        /*
 593         * Find the lowest bit, so the first byte offset.
 594         */
 595        for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
 596                if (imask & fields) {
 597                        *first = offsets[i];
 598                        break;
 599                }
 600        }
 601        /*
 602         * Find the highest bit, so the last byte offset.
 603         */
 604        for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
 605                if (imask & fields) {
 606                        *last = offsets[i + 1] - 1;
 607                        break;
 608                }
 609        }
 610}
 611
 612/*
 613 * Get a buffer for the block, return it read in.
 614 * Long-form addressing.
 615 */
 616int
 617xfs_btree_read_bufl(
 618        struct xfs_mount        *mp,            /* file system mount point */
 619        struct xfs_trans        *tp,            /* transaction pointer */
 620        xfs_fsblock_t           fsbno,          /* file system block number */
 621        uint                    lock,           /* lock flags for read_buf */
 622        struct xfs_buf          **bpp,          /* buffer for fsbno */
 623        int                     refval,         /* ref count value for buffer */
 624        const struct xfs_buf_ops *ops)
 625{
 626        struct xfs_buf          *bp;            /* return value */
 627        xfs_daddr_t             d;              /* real disk block address */
 628        int                     error;
 629
 630        ASSERT(fsbno != NULLFSBLOCK);
 631        d = XFS_FSB_TO_DADDR(mp, fsbno);
 632        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
 633                                   mp->m_bsize, lock, &bp, ops);
 634        if (error)
 635                return error;
 636        ASSERT(!xfs_buf_geterror(bp));
 637        if (bp)
 638                xfs_buf_set_ref(bp, refval);
 639        *bpp = bp;
 640        return 0;
 641}
 642
 643/*
 644 * Read-ahead the block, don't wait for it, don't return a buffer.
 645 * Long-form addressing.
 646 */
 647/* ARGSUSED */
 648void
 649xfs_btree_reada_bufl(
 650        struct xfs_mount        *mp,            /* file system mount point */
 651        xfs_fsblock_t           fsbno,          /* file system block number */
 652        xfs_extlen_t            count,          /* count of filesystem blocks */
 653        const struct xfs_buf_ops *ops)
 654{
 655        xfs_daddr_t             d;
 656
 657        ASSERT(fsbno != NULLFSBLOCK);
 658        d = XFS_FSB_TO_DADDR(mp, fsbno);
 659        xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
 660}
 661
 662/*
 663 * Read-ahead the block, don't wait for it, don't return a buffer.
 664 * Short-form addressing.
 665 */
 666/* ARGSUSED */
 667void
 668xfs_btree_reada_bufs(
 669        struct xfs_mount        *mp,            /* file system mount point */
 670        xfs_agnumber_t          agno,           /* allocation group number */
 671        xfs_agblock_t           agbno,          /* allocation group block number */
 672        xfs_extlen_t            count,          /* count of filesystem blocks */
 673        const struct xfs_buf_ops *ops)
 674{
 675        xfs_daddr_t             d;
 676
 677        ASSERT(agno != NULLAGNUMBER);
 678        ASSERT(agbno != NULLAGBLOCK);
 679        d = XFS_AGB_TO_DADDR(mp, agno, agbno);
 680        xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
 681}
 682
 683STATIC int
 684xfs_btree_readahead_lblock(
 685        struct xfs_btree_cur    *cur,
 686        int                     lr,
 687        struct xfs_btree_block  *block)
 688{
 689        int                     rval = 0;
 690        xfs_dfsbno_t            left = be64_to_cpu(block->bb_u.l.bb_leftsib);
 691        xfs_dfsbno_t            right = be64_to_cpu(block->bb_u.l.bb_rightsib);
 692
 693        if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
 694                xfs_btree_reada_bufl(cur->bc_mp, left, 1,
 695                                     cur->bc_ops->buf_ops);
 696                rval++;
 697        }
 698
 699        if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
 700                xfs_btree_reada_bufl(cur->bc_mp, right, 1,
 701                                     cur->bc_ops->buf_ops);
 702                rval++;
 703        }
 704
 705        return rval;
 706}
 707
 708STATIC int
 709xfs_btree_readahead_sblock(
 710        struct xfs_btree_cur    *cur,
 711        int                     lr,
 712        struct xfs_btree_block *block)
 713{
 714        int                     rval = 0;
 715        xfs_agblock_t           left = be32_to_cpu(block->bb_u.s.bb_leftsib);
 716        xfs_agblock_t           right = be32_to_cpu(block->bb_u.s.bb_rightsib);
 717
 718
 719        if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
 720                xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
 721                                     left, 1, cur->bc_ops->buf_ops);
 722                rval++;
 723        }
 724
 725        if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
 726                xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
 727                                     right, 1, cur->bc_ops->buf_ops);
 728                rval++;
 729        }
 730
 731        return rval;
 732}
 733
 734/*
 735 * Read-ahead btree blocks, at the given level.
 736 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
 737 */
 738STATIC int
 739xfs_btree_readahead(
 740        struct xfs_btree_cur    *cur,           /* btree cursor */
 741        int                     lev,            /* level in btree */
 742        int                     lr)             /* left/right bits */
 743{
 744        struct xfs_btree_block  *block;
 745
 746        /*
 747         * No readahead needed if we are at the root level and the
 748         * btree root is stored in the inode.
 749         */
 750        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
 751            (lev == cur->bc_nlevels - 1))
 752                return 0;
 753
 754        if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
 755                return 0;
 756
 757        cur->bc_ra[lev] |= lr;
 758        block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
 759
 760        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
 761                return xfs_btree_readahead_lblock(cur, lr, block);
 762        return xfs_btree_readahead_sblock(cur, lr, block);
 763}
 764
 765/*
 766 * Set the buffer for level "lev" in the cursor to bp, releasing
 767 * any previous buffer.
 768 */
 769STATIC void
 770xfs_btree_setbuf(
 771        xfs_btree_cur_t         *cur,   /* btree cursor */
 772        int                     lev,    /* level in btree */
 773        xfs_buf_t               *bp)    /* new buffer to set */
 774{
 775        struct xfs_btree_block  *b;     /* btree block */
 776
 777        if (cur->bc_bufs[lev])
 778                xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]);
 779        cur->bc_bufs[lev] = bp;
 780        cur->bc_ra[lev] = 0;
 781
 782        b = XFS_BUF_TO_BLOCK(bp);
 783        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
 784                if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO))
 785                        cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
 786                if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO))
 787                        cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
 788        } else {
 789                if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
 790                        cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
 791                if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
 792                        cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
 793        }
 794}
 795
 796STATIC int
 797xfs_btree_ptr_is_null(
 798        struct xfs_btree_cur    *cur,
 799        union xfs_btree_ptr     *ptr)
 800{
 801        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
 802                return ptr->l == cpu_to_be64(NULLDFSBNO);
 803        else
 804                return ptr->s == cpu_to_be32(NULLAGBLOCK);
 805}
 806
 807STATIC void
 808xfs_btree_set_ptr_null(
 809        struct xfs_btree_cur    *cur,
 810        union xfs_btree_ptr     *ptr)
 811{
 812        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
 813                ptr->l = cpu_to_be64(NULLDFSBNO);
 814        else
 815                ptr->s = cpu_to_be32(NULLAGBLOCK);
 816}
 817
 818/*
 819 * Get/set/init sibling pointers
 820 */
 821STATIC void
 822xfs_btree_get_sibling(
 823        struct xfs_btree_cur    *cur,
 824        struct xfs_btree_block  *block,
 825        union xfs_btree_ptr     *ptr,
 826        int                     lr)
 827{
 828        ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
 829
 830        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
 831                if (lr == XFS_BB_RIGHTSIB)
 832                        ptr->l = block->bb_u.l.bb_rightsib;
 833                else
 834                        ptr->l = block->bb_u.l.bb_leftsib;
 835        } else {
 836                if (lr == XFS_BB_RIGHTSIB)
 837                        ptr->s = block->bb_u.s.bb_rightsib;
 838                else
 839                        ptr->s = block->bb_u.s.bb_leftsib;
 840        }
 841}
 842
 843STATIC void
 844xfs_btree_set_sibling(
 845        struct xfs_btree_cur    *cur,
 846        struct xfs_btree_block  *block,
 847        union xfs_btree_ptr     *ptr,
 848        int                     lr)
 849{
 850        ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
 851
 852        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
 853                if (lr == XFS_BB_RIGHTSIB)
 854                        block->bb_u.l.bb_rightsib = ptr->l;
 855                else
 856                        block->bb_u.l.bb_leftsib = ptr->l;
 857        } else {
 858                if (lr == XFS_BB_RIGHTSIB)
 859                        block->bb_u.s.bb_rightsib = ptr->s;
 860                else
 861                        block->bb_u.s.bb_leftsib = ptr->s;
 862        }
 863}
 864
 865void
 866xfs_btree_init_block(
 867        struct xfs_mount *mp,
 868        struct xfs_buf  *bp,
 869        __u32           magic,
 870        __u16           level,
 871        __u16           numrecs,
 872        unsigned int    flags)
 873{
 874        struct xfs_btree_block  *new = XFS_BUF_TO_BLOCK(bp);
 875
 876        new->bb_magic = cpu_to_be32(magic);
 877        new->bb_level = cpu_to_be16(level);
 878        new->bb_numrecs = cpu_to_be16(numrecs);
 879
 880        if (flags & XFS_BTREE_LONG_PTRS) {
 881                new->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
 882                new->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
 883        } else {
 884                new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
 885                new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
 886        }
 887}
 888
 889STATIC void
 890xfs_btree_init_block_cur(
 891        struct xfs_btree_cur    *cur,
 892        int                     level,
 893        int                     numrecs,
 894        struct xfs_buf          *bp)
 895{
 896        xfs_btree_init_block(cur->bc_mp, bp, xfs_magics[cur->bc_btnum],
 897                               level, numrecs, cur->bc_flags);
 898}
 899
 900/*
 901 * Return true if ptr is the last record in the btree and
 902 * we need to track updateѕ to this record.  The decision
 903 * will be further refined in the update_lastrec method.
 904 */
 905STATIC int
 906xfs_btree_is_lastrec(
 907        struct xfs_btree_cur    *cur,
 908        struct xfs_btree_block  *block,
 909        int                     level)
 910{
 911        union xfs_btree_ptr     ptr;
 912
 913        if (level > 0)
 914                return 0;
 915        if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
 916                return 0;
 917
 918        xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
 919        if (!xfs_btree_ptr_is_null(cur, &ptr))
 920                return 0;
 921        return 1;
 922}
 923
 924STATIC void
 925xfs_btree_buf_to_ptr(
 926        struct xfs_btree_cur    *cur,
 927        struct xfs_buf          *bp,
 928        union xfs_btree_ptr     *ptr)
 929{
 930        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
 931                ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
 932                                        XFS_BUF_ADDR(bp)));
 933        else {
 934                ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
 935                                        XFS_BUF_ADDR(bp)));
 936        }
 937}
 938
 939STATIC xfs_daddr_t
 940xfs_btree_ptr_to_daddr(
 941        struct xfs_btree_cur    *cur,
 942        union xfs_btree_ptr     *ptr)
 943{
 944        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
 945                ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
 946
 947                return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
 948        } else {
 949                ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
 950                ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
 951
 952                return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
 953                                        be32_to_cpu(ptr->s));
 954        }
 955}
 956
 957STATIC void
 958xfs_btree_set_refs(
 959        struct xfs_btree_cur    *cur,
 960        struct xfs_buf          *bp)
 961{
 962        switch (cur->bc_btnum) {
 963        case XFS_BTNUM_BNO:
 964        case XFS_BTNUM_CNT:
 965                xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
 966                break;
 967        case XFS_BTNUM_INO:
 968                xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
 969                break;
 970        case XFS_BTNUM_BMAP:
 971                xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
 972                break;
 973        default:
 974                ASSERT(0);
 975        }
 976}
 977
 978STATIC int
 979xfs_btree_get_buf_block(
 980        struct xfs_btree_cur    *cur,
 981        union xfs_btree_ptr     *ptr,
 982        int                     flags,
 983        struct xfs_btree_block  **block,
 984        struct xfs_buf          **bpp)
 985{
 986        struct xfs_mount        *mp = cur->bc_mp;
 987        xfs_daddr_t             d;
 988
 989        /* need to sort out how callers deal with failures first */
 990        ASSERT(!(flags & XBF_TRYLOCK));
 991
 992        d = xfs_btree_ptr_to_daddr(cur, ptr);
 993        *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
 994                                 mp->m_bsize, flags);
 995
 996        if (!*bpp)
 997                return ENOMEM;
 998
 999        (*bpp)->b_ops = cur->bc_ops->buf_ops;
1000        *block = XFS_BUF_TO_BLOCK(*bpp);
1001        return 0;
1002}
1003
1004/*
1005 * Read in the buffer at the given ptr and return the buffer and
1006 * the block pointer within the buffer.
1007 */
1008STATIC int
1009xfs_btree_read_buf_block(
1010        struct xfs_btree_cur    *cur,
1011        union xfs_btree_ptr     *ptr,
1012        int                     level,
1013        int                     flags,
1014        struct xfs_btree_block  **block,
1015        struct xfs_buf          **bpp)
1016{
1017        struct xfs_mount        *mp = cur->bc_mp;
1018        xfs_daddr_t             d;
1019        int                     error;
1020
1021        /* need to sort out how callers deal with failures first */
1022        ASSERT(!(flags & XBF_TRYLOCK));
1023
1024        d = xfs_btree_ptr_to_daddr(cur, ptr);
1025        error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
1026                                   mp->m_bsize, flags, bpp,
1027                                   cur->bc_ops->buf_ops);
1028        if (error)
1029                return error;
1030
1031        ASSERT(!xfs_buf_geterror(*bpp));
1032        xfs_btree_set_refs(cur, *bpp);
1033        *block = XFS_BUF_TO_BLOCK(*bpp);
1034        return 0;
1035}
1036
1037/*
1038 * Copy keys from one btree block to another.
1039 */
1040STATIC void
1041xfs_btree_copy_keys(
1042        struct xfs_btree_cur    *cur,
1043        union xfs_btree_key     *dst_key,
1044        union xfs_btree_key     *src_key,
1045        int                     numkeys)
1046{
1047        ASSERT(numkeys >= 0);
1048        memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1049}
1050
1051/*
1052 * Copy records from one btree block to another.
1053 */
1054STATIC void
1055xfs_btree_copy_recs(
1056        struct xfs_btree_cur    *cur,
1057        union xfs_btree_rec     *dst_rec,
1058        union xfs_btree_rec     *src_rec,
1059        int                     numrecs)
1060{
1061        ASSERT(numrecs >= 0);
1062        memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1063}
1064
1065/*
1066 * Copy block pointers from one btree block to another.
1067 */
1068STATIC void
1069xfs_btree_copy_ptrs(
1070        struct xfs_btree_cur    *cur,
1071        union xfs_btree_ptr     *dst_ptr,
1072        union xfs_btree_ptr     *src_ptr,
1073        int                     numptrs)
1074{
1075        ASSERT(numptrs >= 0);
1076        memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1077}
1078
1079/*
1080 * Shift keys one index left/right inside a single btree block.
1081 */
1082STATIC void
1083xfs_btree_shift_keys(
1084        struct xfs_btree_cur    *cur,
1085        union xfs_btree_key     *key,
1086        int                     dir,
1087        int                     numkeys)
1088{
1089        char                    *dst_key;
1090
1091        ASSERT(numkeys >= 0);
1092        ASSERT(dir == 1 || dir == -1);
1093
1094        dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1095        memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1096}
1097
1098/*
1099 * Shift records one index left/right inside a single btree block.
1100 */
1101STATIC void
1102xfs_btree_shift_recs(
1103        struct xfs_btree_cur    *cur,
1104        union xfs_btree_rec     *rec,
1105        int                     dir,
1106        int                     numrecs)
1107{
1108        char                    *dst_rec;
1109
1110        ASSERT(numrecs >= 0);
1111        ASSERT(dir == 1 || dir == -1);
1112
1113        dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1114        memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1115}
1116
1117/*
1118 * Shift block pointers one index left/right inside a single btree block.
1119 */
1120STATIC void
1121xfs_btree_shift_ptrs(
1122        struct xfs_btree_cur    *cur,
1123        union xfs_btree_ptr     *ptr,
1124        int                     dir,
1125        int                     numptrs)
1126{
1127        char                    *dst_ptr;
1128
1129        ASSERT(numptrs >= 0);
1130        ASSERT(dir == 1 || dir == -1);
1131
1132        dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1133        memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1134}
1135
1136/*
1137 * Log key values from the btree block.
1138 */
1139STATIC void
1140xfs_btree_log_keys(
1141        struct xfs_btree_cur    *cur,
1142        struct xfs_buf          *bp,
1143        int                     first,
1144        int                     last)
1145{
1146        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1147        XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1148
1149        if (bp) {
1150                xfs_trans_log_buf(cur->bc_tp, bp,
1151                                  xfs_btree_key_offset(cur, first),
1152                                  xfs_btree_key_offset(cur, last + 1) - 1);
1153        } else {
1154                xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1155                                xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1156        }
1157
1158        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1159}
1160
1161/*
1162 * Log record values from the btree block.
1163 */
1164void
1165xfs_btree_log_recs(
1166        struct xfs_btree_cur    *cur,
1167        struct xfs_buf          *bp,
1168        int                     first,
1169        int                     last)
1170{
1171        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1172        XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1173
1174        xfs_trans_log_buf(cur->bc_tp, bp,
1175                          xfs_btree_rec_offset(cur, first),
1176                          xfs_btree_rec_offset(cur, last + 1) - 1);
1177
1178        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1179}
1180
1181/*
1182 * Log block pointer fields from a btree block (nonleaf).
1183 */
1184STATIC void
1185xfs_btree_log_ptrs(
1186        struct xfs_btree_cur    *cur,   /* btree cursor */
1187        struct xfs_buf          *bp,    /* buffer containing btree block */
1188        int                     first,  /* index of first pointer to log */
1189        int                     last)   /* index of last pointer to log */
1190{
1191        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1192        XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1193
1194        if (bp) {
1195                struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
1196                int                     level = xfs_btree_get_level(block);
1197
1198                xfs_trans_log_buf(cur->bc_tp, bp,
1199                                xfs_btree_ptr_offset(cur, first, level),
1200                                xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1201        } else {
1202                xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1203                        xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1204        }
1205
1206        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1207}
1208
1209/*
1210 * Log fields from a btree block header.
1211 */
1212void
1213xfs_btree_log_block(
1214        struct xfs_btree_cur    *cur,   /* btree cursor */
1215        struct xfs_buf          *bp,    /* buffer containing btree block */
1216        int                     fields) /* mask of fields: XFS_BB_... */
1217{
1218        int                     first;  /* first byte offset logged */
1219        int                     last;   /* last byte offset logged */
1220        static const short      soffsets[] = {  /* table of offsets (short) */
1221                offsetof(struct xfs_btree_block, bb_magic),
1222                offsetof(struct xfs_btree_block, bb_level),
1223                offsetof(struct xfs_btree_block, bb_numrecs),
1224                offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1225                offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
1226                XFS_BTREE_SBLOCK_LEN
1227        };
1228        static const short      loffsets[] = {  /* table of offsets (long) */
1229                offsetof(struct xfs_btree_block, bb_magic),
1230                offsetof(struct xfs_btree_block, bb_level),
1231                offsetof(struct xfs_btree_block, bb_numrecs),
1232                offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1233                offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
1234                XFS_BTREE_LBLOCK_LEN
1235        };
1236
1237        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1238        XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
1239
1240        if (bp) {
1241                xfs_btree_offsets(fields,
1242                                  (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1243                                        loffsets : soffsets,
1244                                  XFS_BB_NUM_BITS, &first, &last);
1245                xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1246        } else {
1247                xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1248                        xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1249        }
1250
1251        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1252}
1253
1254/*
1255 * Increment cursor by one record at the level.
1256 * For nonzero levels the leaf-ward information is untouched.
1257 */
1258int                                             /* error */
1259xfs_btree_increment(
1260        struct xfs_btree_cur    *cur,
1261        int                     level,
1262        int                     *stat)          /* success/failure */
1263{
1264        struct xfs_btree_block  *block;
1265        union xfs_btree_ptr     ptr;
1266        struct xfs_buf          *bp;
1267        int                     error;          /* error return value */
1268        int                     lev;
1269
1270        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1271        XFS_BTREE_TRACE_ARGI(cur, level);
1272
1273        ASSERT(level < cur->bc_nlevels);
1274
1275        /* Read-ahead to the right at this level. */
1276        xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1277
1278        /* Get a pointer to the btree block. */
1279        block = xfs_btree_get_block(cur, level, &bp);
1280
1281#ifdef DEBUG
1282        error = xfs_btree_check_block(cur, block, level, bp);
1283        if (error)
1284                goto error0;
1285#endif
1286
1287        /* We're done if we remain in the block after the increment. */
1288        if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1289                goto out1;
1290
1291        /* Fail if we just went off the right edge of the tree. */
1292        xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1293        if (xfs_btree_ptr_is_null(cur, &ptr))
1294                goto out0;
1295
1296        XFS_BTREE_STATS_INC(cur, increment);
1297
1298        /*
1299         * March up the tree incrementing pointers.
1300         * Stop when we don't go off the right edge of a block.
1301         */
1302        for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1303                block = xfs_btree_get_block(cur, lev, &bp);
1304
1305#ifdef DEBUG
1306                error = xfs_btree_check_block(cur, block, lev, bp);
1307                if (error)
1308                        goto error0;
1309#endif
1310
1311                if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1312                        break;
1313
1314                /* Read-ahead the right block for the next loop. */
1315                xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1316        }
1317
1318        /*
1319         * If we went off the root then we are either seriously
1320         * confused or have the tree root in an inode.
1321         */
1322        if (lev == cur->bc_nlevels) {
1323                if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1324                        goto out0;
1325                ASSERT(0);
1326                error = EFSCORRUPTED;
1327                goto error0;
1328        }
1329        ASSERT(lev < cur->bc_nlevels);
1330
1331        /*
1332         * Now walk back down the tree, fixing up the cursor's buffer
1333         * pointers and key numbers.
1334         */
1335        for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1336                union xfs_btree_ptr     *ptrp;
1337
1338                ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1339                error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1340                                                        0, &block, &bp);
1341                if (error)
1342                        goto error0;
1343
1344                xfs_btree_setbuf(cur, lev, bp);
1345                cur->bc_ptrs[lev] = 1;
1346        }
1347out1:
1348        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1349        *stat = 1;
1350        return 0;
1351
1352out0:
1353        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1354        *stat = 0;
1355        return 0;
1356
1357error0:
1358        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1359        return error;
1360}
1361
1362/*
1363 * Decrement cursor by one record at the level.
1364 * For nonzero levels the leaf-ward information is untouched.
1365 */
1366int                                             /* error */
1367xfs_btree_decrement(
1368        struct xfs_btree_cur    *cur,
1369        int                     level,
1370        int                     *stat)          /* success/failure */
1371{
1372        struct xfs_btree_block  *block;
1373        xfs_buf_t               *bp;
1374        int                     error;          /* error return value */
1375        int                     lev;
1376        union xfs_btree_ptr     ptr;
1377
1378        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1379        XFS_BTREE_TRACE_ARGI(cur, level);
1380
1381        ASSERT(level < cur->bc_nlevels);
1382
1383        /* Read-ahead to the left at this level. */
1384        xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1385
1386        /* We're done if we remain in the block after the decrement. */
1387        if (--cur->bc_ptrs[level] > 0)
1388                goto out1;
1389
1390        /* Get a pointer to the btree block. */
1391        block = xfs_btree_get_block(cur, level, &bp);
1392
1393#ifdef DEBUG
1394        error = xfs_btree_check_block(cur, block, level, bp);
1395        if (error)
1396                goto error0;
1397#endif
1398
1399        /* Fail if we just went off the left edge of the tree. */
1400        xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1401        if (xfs_btree_ptr_is_null(cur, &ptr))
1402                goto out0;
1403
1404        XFS_BTREE_STATS_INC(cur, decrement);
1405
1406        /*
1407         * March up the tree decrementing pointers.
1408         * Stop when we don't go off the left edge of a block.
1409         */
1410        for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1411                if (--cur->bc_ptrs[lev] > 0)
1412                        break;
1413                /* Read-ahead the left block for the next loop. */
1414                xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1415        }
1416
1417        /*
1418         * If we went off the root then we are seriously confused.
1419         * or the root of the tree is in an inode.
1420         */
1421        if (lev == cur->bc_nlevels) {
1422                if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1423                        goto out0;
1424                ASSERT(0);
1425                error = EFSCORRUPTED;
1426                goto error0;
1427        }
1428        ASSERT(lev < cur->bc_nlevels);
1429
1430        /*
1431         * Now walk back down the tree, fixing up the cursor's buffer
1432         * pointers and key numbers.
1433         */
1434        for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1435                union xfs_btree_ptr     *ptrp;
1436
1437                ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1438                error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1439                                                        0, &block, &bp);
1440                if (error)
1441                        goto error0;
1442                xfs_btree_setbuf(cur, lev, bp);
1443                cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1444        }
1445out1:
1446        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1447        *stat = 1;
1448        return 0;
1449
1450out0:
1451        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1452        *stat = 0;
1453        return 0;
1454
1455error0:
1456        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1457        return error;
1458}
1459
1460STATIC int
1461xfs_btree_lookup_get_block(
1462        struct xfs_btree_cur    *cur,   /* btree cursor */
1463        int                     level,  /* level in the btree */
1464        union xfs_btree_ptr     *pp,    /* ptr to btree block */
1465        struct xfs_btree_block  **blkp) /* return btree block */
1466{
1467        struct xfs_buf          *bp;    /* buffer pointer for btree block */
1468        int                     error = 0;
1469
1470        /* special case the root block if in an inode */
1471        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1472            (level == cur->bc_nlevels - 1)) {
1473                *blkp = xfs_btree_get_iroot(cur);
1474                return 0;
1475        }
1476
1477        /*
1478         * If the old buffer at this level for the disk address we are
1479         * looking for re-use it.
1480         *
1481         * Otherwise throw it away and get a new one.
1482         */
1483        bp = cur->bc_bufs[level];
1484        if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1485                *blkp = XFS_BUF_TO_BLOCK(bp);
1486                return 0;
1487        }
1488
1489        error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp);
1490        if (error)
1491                return error;
1492
1493        xfs_btree_setbuf(cur, level, bp);
1494        return 0;
1495}
1496
1497/*
1498 * Get current search key.  For level 0 we don't actually have a key
1499 * structure so we make one up from the record.  For all other levels
1500 * we just return the right key.
1501 */
1502STATIC union xfs_btree_key *
1503xfs_lookup_get_search_key(
1504        struct xfs_btree_cur    *cur,
1505        int                     level,
1506        int                     keyno,
1507        struct xfs_btree_block  *block,
1508        union xfs_btree_key     *kp)
1509{
1510        if (level == 0) {
1511                cur->bc_ops->init_key_from_rec(kp,
1512                                xfs_btree_rec_addr(cur, keyno, block));
1513                return kp;
1514        }
1515
1516        return xfs_btree_key_addr(cur, keyno, block);
1517}
1518
1519/*
1520 * Lookup the record.  The cursor is made to point to it, based on dir.
1521 * Return 0 if can't find any such record, 1 for success.
1522 */
1523int                                     /* error */
1524xfs_btree_lookup(
1525        struct xfs_btree_cur    *cur,   /* btree cursor */
1526        xfs_lookup_t            dir,    /* <=, ==, or >= */
1527        int                     *stat)  /* success/failure */
1528{
1529        struct xfs_btree_block  *block; /* current btree block */
1530        __int64_t               diff;   /* difference for the current key */
1531        int                     error;  /* error return value */
1532        int                     keyno;  /* current key number */
1533        int                     level;  /* level in the btree */
1534        union xfs_btree_ptr     *pp;    /* ptr to btree block */
1535        union xfs_btree_ptr     ptr;    /* ptr to btree block */
1536
1537        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1538        XFS_BTREE_TRACE_ARGI(cur, dir);
1539
1540        XFS_BTREE_STATS_INC(cur, lookup);
1541
1542        block = NULL;
1543        keyno = 0;
1544
1545        /* initialise start pointer from cursor */
1546        cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1547        pp = &ptr;
1548
1549        /*
1550         * Iterate over each level in the btree, starting at the root.
1551         * For each level above the leaves, find the key we need, based
1552         * on the lookup record, then follow the corresponding block
1553         * pointer down to the next level.
1554         */
1555        for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1556                /* Get the block we need to do the lookup on. */
1557                error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1558                if (error)
1559                        goto error0;
1560
1561                if (diff == 0) {
1562                        /*
1563                         * If we already had a key match at a higher level, we
1564                         * know we need to use the first entry in this block.
1565                         */
1566                        keyno = 1;
1567                } else {
1568                        /* Otherwise search this block. Do a binary search. */
1569
1570                        int     high;   /* high entry number */
1571                        int     low;    /* low entry number */
1572
1573                        /* Set low and high entry numbers, 1-based. */
1574                        low = 1;
1575                        high = xfs_btree_get_numrecs(block);
1576                        if (!high) {
1577                                /* Block is empty, must be an empty leaf. */
1578                                ASSERT(level == 0 && cur->bc_nlevels == 1);
1579
1580                                cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1581                                XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1582                                *stat = 0;
1583                                return 0;
1584                        }
1585
1586                        /* Binary search the block. */
1587                        while (low <= high) {
1588                                union xfs_btree_key     key;
1589                                union xfs_btree_key     *kp;
1590
1591                                XFS_BTREE_STATS_INC(cur, compare);
1592
1593                                /* keyno is average of low and high. */
1594                                keyno = (low + high) >> 1;
1595
1596                                /* Get current search key */
1597                                kp = xfs_lookup_get_search_key(cur, level,
1598                                                keyno, block, &key);
1599
1600                                /*
1601                                 * Compute difference to get next direction:
1602                                 *  - less than, move right
1603                                 *  - greater than, move left
1604                                 *  - equal, we're done
1605                                 */
1606                                diff = cur->bc_ops->key_diff(cur, kp);
1607                                if (diff < 0)
1608                                        low = keyno + 1;
1609                                else if (diff > 0)
1610                                        high = keyno - 1;
1611                                else
1612                                        break;
1613                        }
1614                }
1615
1616                /*
1617                 * If there are more levels, set up for the next level
1618                 * by getting the block number and filling in the cursor.
1619                 */
1620                if (level > 0) {
1621                        /*
1622                         * If we moved left, need the previous key number,
1623                         * unless there isn't one.
1624                         */
1625                        if (diff > 0 && --keyno < 1)
1626                                keyno = 1;
1627                        pp = xfs_btree_ptr_addr(cur, keyno, block);
1628
1629#ifdef DEBUG
1630                        error = xfs_btree_check_ptr(cur, pp, 0, level);
1631                        if (error)
1632                                goto error0;
1633#endif
1634                        cur->bc_ptrs[level] = keyno;
1635                }
1636        }
1637
1638        /* Done with the search. See if we need to adjust the results. */
1639        if (dir != XFS_LOOKUP_LE && diff < 0) {
1640                keyno++;
1641                /*
1642                 * If ge search and we went off the end of the block, but it's
1643                 * not the last block, we're in the wrong block.
1644                 */
1645                xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1646                if (dir == XFS_LOOKUP_GE &&
1647                    keyno > xfs_btree_get_numrecs(block) &&
1648                    !xfs_btree_ptr_is_null(cur, &ptr)) {
1649                        int     i;
1650
1651                        cur->bc_ptrs[0] = keyno;
1652                        error = xfs_btree_increment(cur, 0, &i);
1653                        if (error)
1654                                goto error0;
1655                        XFS_WANT_CORRUPTED_RETURN(i == 1);
1656                        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1657                        *stat = 1;
1658                        return 0;
1659                }
1660        } else if (dir == XFS_LOOKUP_LE && diff > 0)
1661                keyno--;
1662        cur->bc_ptrs[0] = keyno;
1663
1664        /* Return if we succeeded or not. */
1665        if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
1666                *stat = 0;
1667        else if (dir != XFS_LOOKUP_EQ || diff == 0)
1668                *stat = 1;
1669        else
1670                *stat = 0;
1671        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1672        return 0;
1673
1674error0:
1675        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1676        return error;
1677}
1678
1679/*
1680 * Update keys at all levels from here to the root along the cursor's path.
1681 */
1682STATIC int
1683xfs_btree_updkey(
1684        struct xfs_btree_cur    *cur,
1685        union xfs_btree_key     *keyp,
1686        int                     level)
1687{
1688        struct xfs_btree_block  *block;
1689        struct xfs_buf          *bp;
1690        union xfs_btree_key     *kp;
1691        int                     ptr;
1692
1693        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1694        XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
1695
1696        ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
1697
1698        /*
1699         * Go up the tree from this level toward the root.
1700         * At each level, update the key value to the value input.
1701         * Stop when we reach a level where the cursor isn't pointing
1702         * at the first entry in the block.
1703         */
1704        for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1705#ifdef DEBUG
1706                int             error;
1707#endif
1708                block = xfs_btree_get_block(cur, level, &bp);
1709#ifdef DEBUG
1710                error = xfs_btree_check_block(cur, block, level, bp);
1711                if (error) {
1712                        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1713                        return error;
1714                }
1715#endif
1716                ptr = cur->bc_ptrs[level];
1717                kp = xfs_btree_key_addr(cur, ptr, block);
1718                xfs_btree_copy_keys(cur, kp, keyp, 1);
1719                xfs_btree_log_keys(cur, bp, ptr, ptr);
1720        }
1721
1722        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1723        return 0;
1724}
1725
1726/*
1727 * Update the record referred to by cur to the value in the
1728 * given record. This either works (return 0) or gets an
1729 * EFSCORRUPTED error.
1730 */
1731int
1732xfs_btree_update(
1733        struct xfs_btree_cur    *cur,
1734        union xfs_btree_rec     *rec)
1735{
1736        struct xfs_btree_block  *block;
1737        struct xfs_buf          *bp;
1738        int                     error;
1739        int                     ptr;
1740        union xfs_btree_rec     *rp;
1741
1742        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1743        XFS_BTREE_TRACE_ARGR(cur, rec);
1744
1745        /* Pick up the current block. */
1746        block = xfs_btree_get_block(cur, 0, &bp);
1747
1748#ifdef DEBUG
1749        error = xfs_btree_check_block(cur, block, 0, bp);
1750        if (error)
1751                goto error0;
1752#endif
1753        /* Get the address of the rec to be updated. */
1754        ptr = cur->bc_ptrs[0];
1755        rp = xfs_btree_rec_addr(cur, ptr, block);
1756
1757        /* Fill in the new contents and log them. */
1758        xfs_btree_copy_recs(cur, rp, rec, 1);
1759        xfs_btree_log_recs(cur, bp, ptr, ptr);
1760
1761        /*
1762         * If we are tracking the last record in the tree and
1763         * we are at the far right edge of the tree, update it.
1764         */
1765        if (xfs_btree_is_lastrec(cur, block, 0)) {
1766                cur->bc_ops->update_lastrec(cur, block, rec,
1767                                            ptr, LASTREC_UPDATE);
1768        }
1769
1770        /* Updating first rec in leaf. Pass new key value up to our parent. */
1771        if (ptr == 1) {
1772                union xfs_btree_key     key;
1773
1774                cur->bc_ops->init_key_from_rec(&key, rec);
1775                error = xfs_btree_updkey(cur, &key, 1);
1776                if (error)
1777                        goto error0;
1778        }
1779
1780        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1781        return 0;
1782
1783error0:
1784        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1785        return error;
1786}
1787
1788/*
1789 * Move 1 record left from cur/level if possible.
1790 * Update cur to reflect the new path.
1791 */
1792STATIC int                                      /* error */
1793xfs_btree_lshift(
1794        struct xfs_btree_cur    *cur,
1795        int                     level,
1796        int                     *stat)          /* success/failure */
1797{
1798        union xfs_btree_key     key;            /* btree key */
1799        struct xfs_buf          *lbp;           /* left buffer pointer */
1800        struct xfs_btree_block  *left;          /* left btree block */
1801        int                     lrecs;          /* left record count */
1802        struct xfs_buf          *rbp;           /* right buffer pointer */
1803        struct xfs_btree_block  *right;         /* right btree block */
1804        int                     rrecs;          /* right record count */
1805        union xfs_btree_ptr     lptr;           /* left btree pointer */
1806        union xfs_btree_key     *rkp = NULL;    /* right btree key */
1807        union xfs_btree_ptr     *rpp = NULL;    /* right address pointer */
1808        union xfs_btree_rec     *rrp = NULL;    /* right record pointer */
1809        int                     error;          /* error return value */
1810
1811        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1812        XFS_BTREE_TRACE_ARGI(cur, level);
1813
1814        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1815            level == cur->bc_nlevels - 1)
1816                goto out0;
1817
1818        /* Set up variables for this block as "right". */
1819        right = xfs_btree_get_block(cur, level, &rbp);
1820
1821#ifdef DEBUG
1822        error = xfs_btree_check_block(cur, right, level, rbp);
1823        if (error)
1824                goto error0;
1825#endif
1826
1827        /* If we've got no left sibling then we can't shift an entry left. */
1828        xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
1829        if (xfs_btree_ptr_is_null(cur, &lptr))
1830                goto out0;
1831
1832        /*
1833         * If the cursor entry is the one that would be moved, don't
1834         * do it... it's too complicated.
1835         */
1836        if (cur->bc_ptrs[level] <= 1)
1837                goto out0;
1838
1839        /* Set up the left neighbor as "left". */
1840        error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp);
1841        if (error)
1842                goto error0;
1843
1844        /* If it's full, it can't take another entry. */
1845        lrecs = xfs_btree_get_numrecs(left);
1846        if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
1847                goto out0;
1848
1849        rrecs = xfs_btree_get_numrecs(right);
1850
1851        /*
1852         * We add one entry to the left side and remove one for the right side.
1853         * Account for it here, the changes will be updated on disk and logged
1854         * later.
1855         */
1856        lrecs++;
1857        rrecs--;
1858
1859        XFS_BTREE_STATS_INC(cur, lshift);
1860        XFS_BTREE_STATS_ADD(cur, moves, 1);
1861
1862        /*
1863         * If non-leaf, copy a key and a ptr to the left block.
1864         * Log the changes to the left block.
1865         */
1866        if (level > 0) {
1867                /* It's a non-leaf.  Move keys and pointers. */
1868                union xfs_btree_key     *lkp;   /* left btree key */
1869                union xfs_btree_ptr     *lpp;   /* left address pointer */
1870
1871                lkp = xfs_btree_key_addr(cur, lrecs, left);
1872                rkp = xfs_btree_key_addr(cur, 1, right);
1873
1874                lpp = xfs_btree_ptr_addr(cur, lrecs, left);
1875                rpp = xfs_btree_ptr_addr(cur, 1, right);
1876#ifdef DEBUG
1877                error = xfs_btree_check_ptr(cur, rpp, 0, level);
1878                if (error)
1879                        goto error0;
1880#endif
1881                xfs_btree_copy_keys(cur, lkp, rkp, 1);
1882                xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
1883
1884                xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
1885                xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
1886
1887                ASSERT(cur->bc_ops->keys_inorder(cur,
1888                        xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
1889        } else {
1890                /* It's a leaf.  Move records.  */
1891                union xfs_btree_rec     *lrp;   /* left record pointer */
1892
1893                lrp = xfs_btree_rec_addr(cur, lrecs, left);
1894                rrp = xfs_btree_rec_addr(cur, 1, right);
1895
1896                xfs_btree_copy_recs(cur, lrp, rrp, 1);
1897                xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
1898
1899                ASSERT(cur->bc_ops->recs_inorder(cur,
1900                        xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
1901        }
1902
1903        xfs_btree_set_numrecs(left, lrecs);
1904        xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
1905
1906        xfs_btree_set_numrecs(right, rrecs);
1907        xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
1908
1909        /*
1910         * Slide the contents of right down one entry.
1911         */
1912        XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
1913        if (level > 0) {
1914                /* It's a nonleaf. operate on keys and ptrs */
1915#ifdef DEBUG
1916                int                     i;              /* loop index */
1917
1918                for (i = 0; i < rrecs; i++) {
1919                        error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
1920                        if (error)
1921                                goto error0;
1922                }
1923#endif
1924                xfs_btree_shift_keys(cur,
1925                                xfs_btree_key_addr(cur, 2, right),
1926                                -1, rrecs);
1927                xfs_btree_shift_ptrs(cur,
1928                                xfs_btree_ptr_addr(cur, 2, right),
1929                                -1, rrecs);
1930
1931                xfs_btree_log_keys(cur, rbp, 1, rrecs);
1932                xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
1933        } else {
1934                /* It's a leaf. operate on records */
1935                xfs_btree_shift_recs(cur,
1936                        xfs_btree_rec_addr(cur, 2, right),
1937                        -1, rrecs);
1938                xfs_btree_log_recs(cur, rbp, 1, rrecs);
1939
1940                /*
1941                 * If it's the first record in the block, we'll need a key
1942                 * structure to pass up to the next level (updkey).
1943                 */
1944                cur->bc_ops->init_key_from_rec(&key,
1945                        xfs_btree_rec_addr(cur, 1, right));
1946                rkp = &key;
1947        }
1948
1949        /* Update the parent key values of right. */
1950        error = xfs_btree_updkey(cur, rkp, level + 1);
1951        if (error)
1952                goto error0;
1953
1954        /* Slide the cursor value left one. */
1955        cur->bc_ptrs[level]--;
1956
1957        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1958        *stat = 1;
1959        return 0;
1960
1961out0:
1962        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1963        *stat = 0;
1964        return 0;
1965
1966error0:
1967        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1968        return error;
1969}
1970
1971/*
1972 * Move 1 record right from cur/level if possible.
1973 * Update cur to reflect the new path.
1974 */
1975STATIC int                                      /* error */
1976xfs_btree_rshift(
1977        struct xfs_btree_cur    *cur,
1978        int                     level,
1979        int                     *stat)          /* success/failure */
1980{
1981        union xfs_btree_key     key;            /* btree key */
1982        struct xfs_buf          *lbp;           /* left buffer pointer */
1983        struct xfs_btree_block  *left;          /* left btree block */
1984        struct xfs_buf          *rbp;           /* right buffer pointer */
1985        struct xfs_btree_block  *right;         /* right btree block */
1986        struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
1987        union xfs_btree_ptr     rptr;           /* right block pointer */
1988        union xfs_btree_key     *rkp;           /* right btree key */
1989        int                     rrecs;          /* right record count */
1990        int                     lrecs;          /* left record count */
1991        int                     error;          /* error return value */
1992        int                     i;              /* loop counter */
1993
1994        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1995        XFS_BTREE_TRACE_ARGI(cur, level);
1996
1997        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1998            (level == cur->bc_nlevels - 1))
1999                goto out0;
2000
2001        /* Set up variables for this block as "left". */
2002        left = xfs_btree_get_block(cur, level, &lbp);
2003
2004#ifdef DEBUG
2005        error = xfs_btree_check_block(cur, left, level, lbp);
2006        if (error)
2007                goto error0;
2008#endif
2009
2010        /* If we've got no right sibling then we can't shift an entry right. */
2011        xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2012        if (xfs_btree_ptr_is_null(cur, &rptr))
2013                goto out0;
2014
2015        /*
2016         * If the cursor entry is the one that would be moved, don't
2017         * do it... it's too complicated.
2018         */
2019        lrecs = xfs_btree_get_numrecs(left);
2020        if (cur->bc_ptrs[level] >= lrecs)
2021                goto out0;
2022
2023        /* Set up the right neighbor as "right". */
2024        error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp);
2025        if (error)
2026                goto error0;
2027
2028        /* If it's full, it can't take another entry. */
2029        rrecs = xfs_btree_get_numrecs(right);
2030        if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2031                goto out0;
2032
2033        XFS_BTREE_STATS_INC(cur, rshift);
2034        XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2035
2036        /*
2037         * Make a hole at the start of the right neighbor block, then
2038         * copy the last left block entry to the hole.
2039         */
2040        if (level > 0) {
2041                /* It's a nonleaf. make a hole in the keys and ptrs */
2042                union xfs_btree_key     *lkp;
2043                union xfs_btree_ptr     *lpp;
2044                union xfs_btree_ptr     *rpp;
2045
2046                lkp = xfs_btree_key_addr(cur, lrecs, left);
2047                lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2048                rkp = xfs_btree_key_addr(cur, 1, right);
2049                rpp = xfs_btree_ptr_addr(cur, 1, right);
2050
2051#ifdef DEBUG
2052                for (i = rrecs - 1; i >= 0; i--) {
2053                        error = xfs_btree_check_ptr(cur, rpp, i, level);
2054                        if (error)
2055                                goto error0;
2056                }
2057#endif
2058
2059                xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2060                xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2061
2062#ifdef DEBUG
2063                error = xfs_btree_check_ptr(cur, lpp, 0, level);
2064                if (error)
2065                        goto error0;
2066#endif
2067
2068                /* Now put the new data in, and log it. */
2069                xfs_btree_copy_keys(cur, rkp, lkp, 1);
2070                xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2071
2072                xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2073                xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2074
2075                ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2076                        xfs_btree_key_addr(cur, 2, right)));
2077        } else {
2078                /* It's a leaf. make a hole in the records */
2079                union xfs_btree_rec     *lrp;
2080                union xfs_btree_rec     *rrp;
2081
2082                lrp = xfs_btree_rec_addr(cur, lrecs, left);
2083                rrp = xfs_btree_rec_addr(cur, 1, right);
2084
2085                xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2086
2087                /* Now put the new data in, and log it. */
2088                xfs_btree_copy_recs(cur, rrp, lrp, 1);
2089                xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2090
2091                cur->bc_ops->init_key_from_rec(&key, rrp);
2092                rkp = &key;
2093
2094                ASSERT(cur->bc_ops->recs_inorder(cur, rrp,
2095                        xfs_btree_rec_addr(cur, 2, right)));
2096        }
2097
2098        /*
2099         * Decrement and log left's numrecs, bump and log right's numrecs.
2100         */
2101        xfs_btree_set_numrecs(left, --lrecs);
2102        xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2103
2104        xfs_btree_set_numrecs(right, ++rrecs);
2105        xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2106
2107        /*
2108         * Using a temporary cursor, update the parent key values of the
2109         * block on the right.
2110         */
2111        error = xfs_btree_dup_cursor(cur, &tcur);
2112        if (error)
2113                goto error0;
2114        i = xfs_btree_lastrec(tcur, level);
2115        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2116
2117        error = xfs_btree_increment(tcur, level, &i);
2118        if (error)
2119                goto error1;
2120
2121        error = xfs_btree_updkey(tcur, rkp, level + 1);
2122        if (error)
2123                goto error1;
2124
2125        xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2126
2127        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2128        *stat = 1;
2129        return 0;
2130
2131out0:
2132        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2133        *stat = 0;
2134        return 0;
2135
2136error0:
2137        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2138        return error;
2139
2140error1:
2141        XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2142        xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2143        return error;
2144}
2145
2146/*
2147 * Split cur/level block in half.
2148 * Return new block number and the key to its first
2149 * record (to be inserted into parent).
2150 */
2151STATIC int                                      /* error */
2152xfs_btree_split(
2153        struct xfs_btree_cur    *cur,
2154        int                     level,
2155        union xfs_btree_ptr     *ptrp,
2156        union xfs_btree_key     *key,
2157        struct xfs_btree_cur    **curp,
2158        int                     *stat)          /* success/failure */
2159{
2160        union xfs_btree_ptr     lptr;           /* left sibling block ptr */
2161        struct xfs_buf          *lbp;           /* left buffer pointer */
2162        struct xfs_btree_block  *left;          /* left btree block */
2163        union xfs_btree_ptr     rptr;           /* right sibling block ptr */
2164        struct xfs_buf          *rbp;           /* right buffer pointer */
2165        struct xfs_btree_block  *right;         /* right btree block */
2166        union xfs_btree_ptr     rrptr;          /* right-right sibling ptr */
2167        struct xfs_buf          *rrbp;          /* right-right buffer pointer */
2168        struct xfs_btree_block  *rrblock;       /* right-right btree block */
2169        int                     lrecs;
2170        int                     rrecs;
2171        int                     src_index;
2172        int                     error;          /* error return value */
2173#ifdef DEBUG
2174        int                     i;
2175#endif
2176
2177        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2178        XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
2179
2180        XFS_BTREE_STATS_INC(cur, split);
2181
2182        /* Set up left block (current one). */
2183        left = xfs_btree_get_block(cur, level, &lbp);
2184
2185#ifdef DEBUG
2186        error = xfs_btree_check_block(cur, left, level, lbp);
2187        if (error)
2188                goto error0;
2189#endif
2190
2191        xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2192
2193        /* Allocate the new block. If we can't do it, we're toast. Give up. */
2194        error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat);
2195        if (error)
2196                goto error0;
2197        if (*stat == 0)
2198                goto out0;
2199        XFS_BTREE_STATS_INC(cur, alloc);
2200
2201        /* Set up the new block as "right". */
2202        error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
2203        if (error)
2204                goto error0;
2205
2206        /* Fill in the btree header for the new right block. */
2207        xfs_btree_init_block_cur(cur, xfs_btree_get_level(left), 0, rbp);
2208
2209        /*
2210         * Split the entries between the old and the new block evenly.
2211         * Make sure that if there's an odd number of entries now, that
2212         * each new block will have the same number of entries.
2213         */
2214        lrecs = xfs_btree_get_numrecs(left);
2215        rrecs = lrecs / 2;
2216        if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
2217                rrecs++;
2218        src_index = (lrecs - rrecs + 1);
2219
2220        XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2221
2222        /*
2223         * Copy btree block entries from the left block over to the
2224         * new block, the right. Update the right block and log the
2225         * changes.
2226         */
2227        if (level > 0) {
2228                /* It's a non-leaf.  Move keys and pointers. */
2229                union xfs_btree_key     *lkp;   /* left btree key */
2230                union xfs_btree_ptr     *lpp;   /* left address pointer */
2231                union xfs_btree_key     *rkp;   /* right btree key */
2232                union xfs_btree_ptr     *rpp;   /* right address pointer */
2233
2234                lkp = xfs_btree_key_addr(cur, src_index, left);
2235                lpp = xfs_btree_ptr_addr(cur, src_index, left);
2236                rkp = xfs_btree_key_addr(cur, 1, right);
2237                rpp = xfs_btree_ptr_addr(cur, 1, right);
2238
2239#ifdef DEBUG
2240                for (i = src_index; i < rrecs; i++) {
2241                        error = xfs_btree_check_ptr(cur, lpp, i, level);
2242                        if (error)
2243                                goto error0;
2244                }
2245#endif
2246
2247                xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2248                xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2249
2250                xfs_btree_log_keys(cur, rbp, 1, rrecs);
2251                xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2252
2253                /* Grab the keys to the entries moved to the right block */
2254                xfs_btree_copy_keys(cur, key, rkp, 1);
2255        } else {
2256                /* It's a leaf.  Move records.  */
2257                union xfs_btree_rec     *lrp;   /* left record pointer */
2258                union xfs_btree_rec     *rrp;   /* right record pointer */
2259
2260                lrp = xfs_btree_rec_addr(cur, src_index, left);
2261                rrp = xfs_btree_rec_addr(cur, 1, right);
2262
2263                xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2264                xfs_btree_log_recs(cur, rbp, 1, rrecs);
2265
2266                cur->bc_ops->init_key_from_rec(key,
2267                        xfs_btree_rec_addr(cur, 1, right));
2268        }
2269
2270
2271        /*
2272         * Find the left block number by looking in the buffer.
2273         * Adjust numrecs, sibling pointers.
2274         */
2275        xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2276        xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2277        xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2278        xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2279
2280        lrecs -= rrecs;
2281        xfs_btree_set_numrecs(left, lrecs);
2282        xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2283
2284        xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2285        xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2286
2287        /*
2288         * If there's a block to the new block's right, make that block
2289         * point back to right instead of to left.
2290         */
2291        if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
2292                error = xfs_btree_read_buf_block(cur, &rrptr, level,
2293                                                        0, &rrblock, &rrbp);
2294                if (error)
2295                        goto error0;
2296                xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2297                xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2298        }
2299        /*
2300         * If the cursor is really in the right block, move it there.
2301         * If it's just pointing past the last entry in left, then we'll
2302         * insert there, so don't change anything in that case.
2303         */
2304        if (cur->bc_ptrs[level] > lrecs + 1) {
2305                xfs_btree_setbuf(cur, level, rbp);
2306                cur->bc_ptrs[level] -= lrecs;
2307        }
2308        /*
2309         * If there are more levels, we'll need another cursor which refers
2310         * the right block, no matter where this cursor was.
2311         */
2312        if (level + 1 < cur->bc_nlevels) {
2313                error = xfs_btree_dup_cursor(cur, curp);
2314                if (error)
2315                        goto error0;
2316                (*curp)->bc_ptrs[level + 1]++;
2317        }
2318        *ptrp = rptr;
2319        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2320        *stat = 1;
2321        return 0;
2322out0:
2323        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2324        *stat = 0;
2325        return 0;
2326
2327error0:
2328        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2329        return error;
2330}
2331
2332/*
2333 * Copy the old inode root contents into a real block and make the
2334 * broot point to it.
2335 */
2336int                                             /* error */
2337xfs_btree_new_iroot(
2338        struct xfs_btree_cur    *cur,           /* btree cursor */
2339        int                     *logflags,      /* logging flags for inode */
2340        int                     *stat)          /* return status - 0 fail */
2341{
2342        struct xfs_buf          *cbp;           /* buffer for cblock */
2343        struct xfs_btree_block  *block;         /* btree block */
2344        struct xfs_btree_block  *cblock;        /* child btree block */
2345        union xfs_btree_key     *ckp;           /* child key pointer */
2346        union xfs_btree_ptr     *cpp;           /* child ptr pointer */
2347        union xfs_btree_key     *kp;            /* pointer to btree key */
2348        union xfs_btree_ptr     *pp;            /* pointer to block addr */
2349        union xfs_btree_ptr     nptr;           /* new block addr */
2350        int                     level;          /* btree level */
2351        int                     error;          /* error return code */
2352#ifdef DEBUG
2353        int                     i;              /* loop counter */
2354#endif
2355
2356        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2357        XFS_BTREE_STATS_INC(cur, newroot);
2358
2359        ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2360
2361        level = cur->bc_nlevels - 1;
2362
2363        block = xfs_btree_get_iroot(cur);
2364        pp = xfs_btree_ptr_addr(cur, 1, block);
2365
2366        /* Allocate the new block. If we can't do it, we're toast. Give up. */
2367        error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat);
2368        if (error)
2369                goto error0;
2370        if (*stat == 0) {
2371                XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2372                return 0;
2373        }
2374        XFS_BTREE_STATS_INC(cur, alloc);
2375
2376        /* Copy the root into a real block. */
2377        error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
2378        if (error)
2379                goto error0;
2380
2381        memcpy(cblock, block, xfs_btree_block_len(cur));
2382
2383        be16_add_cpu(&block->bb_level, 1);
2384        xfs_btree_set_numrecs(block, 1);
2385        cur->bc_nlevels++;
2386        cur->bc_ptrs[level + 1] = 1;
2387
2388        kp = xfs_btree_key_addr(cur, 1, block);
2389        ckp = xfs_btree_key_addr(cur, 1, cblock);
2390        xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
2391
2392        cpp = xfs_btree_ptr_addr(cur, 1, cblock);
2393#ifdef DEBUG
2394        for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
2395                error = xfs_btree_check_ptr(cur, pp, i, level);
2396                if (error)
2397                        goto error0;
2398        }
2399#endif
2400        xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
2401
2402#ifdef DEBUG
2403        error = xfs_btree_check_ptr(cur, &nptr, 0, level);
2404        if (error)
2405                goto error0;
2406#endif
2407        xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
2408
2409        xfs_iroot_realloc(cur->bc_private.b.ip,
2410                          1 - xfs_btree_get_numrecs(cblock),
2411                          cur->bc_private.b.whichfork);
2412
2413        xfs_btree_setbuf(cur, level, cbp);
2414
2415        /*
2416         * Do all this logging at the end so that
2417         * the root is at the right level.
2418         */
2419        xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
2420        xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2421        xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2422
2423        *logflags |=
2424                XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork);
2425        *stat = 1;
2426        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2427        return 0;
2428error0:
2429        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2430        return error;
2431}
2432
2433/*
2434 * Allocate a new root block, fill it in.
2435 */
2436STATIC int                              /* error */
2437xfs_btree_new_root(
2438        struct xfs_btree_cur    *cur,   /* btree cursor */
2439        int                     *stat)  /* success/failure */
2440{
2441        struct xfs_btree_block  *block; /* one half of the old root block */
2442        struct xfs_buf          *bp;    /* buffer containing block */
2443        int                     error;  /* error return value */
2444        struct xfs_buf          *lbp;   /* left buffer pointer */
2445        struct xfs_btree_block  *left;  /* left btree block */
2446        struct xfs_buf          *nbp;   /* new (root) buffer */
2447        struct xfs_btree_block  *new;   /* new (root) btree block */
2448        int                     nptr;   /* new value for key index, 1 or 2 */
2449        struct xfs_buf          *rbp;   /* right buffer pointer */
2450        struct xfs_btree_block  *right; /* right btree block */
2451        union xfs_btree_ptr     rptr;
2452        union xfs_btree_ptr     lptr;
2453
2454        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2455        XFS_BTREE_STATS_INC(cur, newroot);
2456
2457        /* initialise our start point from the cursor */
2458        cur->bc_ops->init_ptr_from_cur(cur, &rptr);
2459
2460        /* Allocate the new block. If we can't do it, we're toast. Give up. */
2461        error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat);
2462        if (error)
2463                goto error0;
2464        if (*stat == 0)
2465                goto out0;
2466        XFS_BTREE_STATS_INC(cur, alloc);
2467
2468        /* Set up the new block. */
2469        error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
2470        if (error)
2471                goto error0;
2472
2473        /* Set the root in the holding structure  increasing the level by 1. */
2474        cur->bc_ops->set_root(cur, &lptr, 1);
2475
2476        /*
2477         * At the previous root level there are now two blocks: the old root,
2478         * and the new block generated when it was split.  We don't know which
2479         * one the cursor is pointing at, so we set up variables "left" and
2480         * "right" for each case.
2481         */
2482        block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
2483
2484#ifdef DEBUG
2485        error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
2486        if (error)
2487                goto error0;
2488#endif
2489
2490        xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
2491        if (!xfs_btree_ptr_is_null(cur, &rptr)) {
2492                /* Our block is left, pick up the right block. */
2493                lbp = bp;
2494                xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2495                left = block;
2496                error = xfs_btree_read_buf_block(cur, &rptr,
2497                                        cur->bc_nlevels - 1, 0, &right, &rbp);
2498                if (error)
2499                        goto error0;
2500                bp = rbp;
2501                nptr = 1;
2502        } else {
2503                /* Our block is right, pick up the left block. */
2504                rbp = bp;
2505                xfs_btree_buf_to_ptr(cur, rbp, &rptr);
2506                right = block;
2507                xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2508                error = xfs_btree_read_buf_block(cur, &lptr,
2509                                        cur->bc_nlevels - 1, 0, &left, &lbp);
2510                if (error)
2511                        goto error0;
2512                bp = lbp;
2513                nptr = 2;
2514        }
2515        /* Fill in the new block's btree header and log it. */
2516        xfs_btree_init_block_cur(cur, cur->bc_nlevels, 2, nbp);
2517        xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
2518        ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
2519                        !xfs_btree_ptr_is_null(cur, &rptr));
2520
2521        /* Fill in the key data in the new root. */
2522        if (xfs_btree_get_level(left) > 0) {
2523                xfs_btree_copy_keys(cur,
2524                                xfs_btree_key_addr(cur, 1, new),
2525                                xfs_btree_key_addr(cur, 1, left), 1);
2526                xfs_btree_copy_keys(cur,
2527                                xfs_btree_key_addr(cur, 2, new),
2528                                xfs_btree_key_addr(cur, 1, right), 1);
2529        } else {
2530                cur->bc_ops->init_key_from_rec(
2531                                xfs_btree_key_addr(cur, 1, new),
2532                                xfs_btree_rec_addr(cur, 1, left));
2533                cur->bc_ops->init_key_from_rec(
2534                                xfs_btree_key_addr(cur, 2, new),
2535                                xfs_btree_rec_addr(cur, 1, right));
2536        }
2537        xfs_btree_log_keys(cur, nbp, 1, 2);
2538
2539        /* Fill in the pointer data in the new root. */
2540        xfs_btree_copy_ptrs(cur,
2541                xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
2542        xfs_btree_copy_ptrs(cur,
2543                xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
2544        xfs_btree_log_ptrs(cur, nbp, 1, 2);
2545
2546        /* Fix up the cursor. */
2547        xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
2548        cur->bc_ptrs[cur->bc_nlevels] = nptr;
2549        cur->bc_nlevels++;
2550        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2551        *stat = 1;
2552        return 0;
2553error0:
2554        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2555        return error;
2556out0:
2557        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2558        *stat = 0;
2559        return 0;
2560}
2561
2562STATIC int
2563xfs_btree_make_block_unfull(
2564        struct xfs_btree_cur    *cur,   /* btree cursor */
2565        int                     level,  /* btree level */
2566        int                     numrecs,/* # of recs in block */
2567        int                     *oindex,/* old tree index */
2568        int                     *index, /* new tree index */
2569        union xfs_btree_ptr     *nptr,  /* new btree ptr */
2570        struct xfs_btree_cur    **ncur, /* new btree cursor */
2571        union xfs_btree_rec     *nrec,  /* new record */
2572        int                     *stat)
2573{
2574        union xfs_btree_key     key;    /* new btree key value */
2575        int                     error = 0;
2576
2577        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2578            level == cur->bc_nlevels - 1) {
2579                struct xfs_inode *ip = cur->bc_private.b.ip;
2580
2581                if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
2582                        /* A root block that can be made bigger. */
2583
2584                        xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
2585                } else {
2586                        /* A root block that needs replacing */
2587                        int     logflags = 0;
2588
2589                        error = xfs_btree_new_iroot(cur, &logflags, stat);
2590                        if (error || *stat == 0)
2591                                return error;
2592
2593                        xfs_trans_log_inode(cur->bc_tp, ip, logflags);
2594                }
2595
2596                return 0;
2597        }
2598
2599        /* First, try shifting an entry to the right neighbor. */
2600        error = xfs_btree_rshift(cur, level, stat);
2601        if (error || *stat)
2602                return error;
2603
2604        /* Next, try shifting an entry to the left neighbor. */
2605        error = xfs_btree_lshift(cur, level, stat);
2606        if (error)
2607                return error;
2608
2609        if (*stat) {
2610                *oindex = *index = cur->bc_ptrs[level];
2611                return 0;
2612        }
2613
2614        /*
2615         * Next, try splitting the current block in half.
2616         *
2617         * If this works we have to re-set our variables because we
2618         * could be in a different block now.
2619         */
2620        error = xfs_btree_split(cur, level, nptr, &key, ncur, stat);
2621        if (error || *stat == 0)
2622                return error;
2623
2624
2625        *index = cur->bc_ptrs[level];
2626        cur->bc_ops->init_rec_from_key(&key, nrec);
2627        return 0;
2628}
2629
2630/*
2631 * Insert one record/level.  Return information to the caller
2632 * allowing the next level up to proceed if necessary.
2633 */
2634STATIC int
2635xfs_btree_insrec(
2636        struct xfs_btree_cur    *cur,   /* btree cursor */
2637        int                     level,  /* level to insert record at */
2638        union xfs_btree_ptr     *ptrp,  /* i/o: block number inserted */
2639        union xfs_btree_rec     *recp,  /* i/o: record data inserted */
2640        struct xfs_btree_cur    **curp, /* output: new cursor replacing cur */
2641        int                     *stat)  /* success/failure */
2642{
2643        struct xfs_btree_block  *block; /* btree block */
2644        struct xfs_buf          *bp;    /* buffer for block */
2645        union xfs_btree_key     key;    /* btree key */
2646        union xfs_btree_ptr     nptr;   /* new block ptr */
2647        struct xfs_btree_cur    *ncur;  /* new btree cursor */
2648        union xfs_btree_rec     nrec;   /* new record count */
2649        int                     optr;   /* old key/record index */
2650        int                     ptr;    /* key/record index */
2651        int                     numrecs;/* number of records */
2652        int                     error;  /* error return value */
2653#ifdef DEBUG
2654        int                     i;
2655#endif
2656
2657        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2658        XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp);
2659
2660        ncur = NULL;
2661
2662        /*
2663         * If we have an external root pointer, and we've made it to the
2664         * root level, allocate a new root block and we're done.
2665         */
2666        if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2667            (level >= cur->bc_nlevels)) {
2668                error = xfs_btree_new_root(cur, stat);
2669                xfs_btree_set_ptr_null(cur, ptrp);
2670
2671                XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2672                return error;
2673        }
2674
2675        /* If we're off the left edge, return failure. */
2676        ptr = cur->bc_ptrs[level];
2677        if (ptr == 0) {
2678                XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2679                *stat = 0;
2680                return 0;
2681        }
2682
2683        /* Make a key out of the record data to be inserted, and save it. */
2684        cur->bc_ops->init_key_from_rec(&key, recp);
2685
2686        optr = ptr;
2687
2688        XFS_BTREE_STATS_INC(cur, insrec);
2689
2690        /* Get pointers to the btree buffer and block. */
2691        block = xfs_btree_get_block(cur, level, &bp);
2692        numrecs = xfs_btree_get_numrecs(block);
2693
2694#ifdef DEBUG
2695        error = xfs_btree_check_block(cur, block, level, bp);
2696        if (error)
2697                goto error0;
2698
2699        /* Check that the new entry is being inserted in the right place. */
2700        if (ptr <= numrecs) {
2701                if (level == 0) {
2702                        ASSERT(cur->bc_ops->recs_inorder(cur, recp,
2703                                xfs_btree_rec_addr(cur, ptr, block)));
2704                } else {
2705                        ASSERT(cur->bc_ops->keys_inorder(cur, &key,
2706                                xfs_btree_key_addr(cur, ptr, block)));
2707                }
2708        }
2709#endif
2710
2711        /*
2712         * If the block is full, we can't insert the new entry until we
2713         * make the block un-full.
2714         */
2715        xfs_btree_set_ptr_null(cur, &nptr);
2716        if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
2717                error = xfs_btree_make_block_unfull(cur, level, numrecs,
2718                                        &optr, &ptr, &nptr, &ncur, &nrec, stat);
2719                if (error || *stat == 0)
2720                        goto error0;
2721        }
2722
2723        /*
2724         * The current block may have changed if the block was
2725         * previously full and we have just made space in it.
2726         */
2727        block = xfs_btree_get_block(cur, level, &bp);
2728        numrecs = xfs_btree_get_numrecs(block);
2729
2730#ifdef DEBUG
2731        error = xfs_btree_check_block(cur, block, level, bp);
2732        if (error)
2733                return error;
2734#endif
2735
2736        /*
2737         * At this point we know there's room for our new entry in the block
2738         * we're pointing at.
2739         */
2740        XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
2741
2742        if (level > 0) {
2743                /* It's a nonleaf. make a hole in the keys and ptrs */
2744                union xfs_btree_key     *kp;
2745                union xfs_btree_ptr     *pp;
2746
2747                kp = xfs_btree_key_addr(cur, ptr, block);
2748                pp = xfs_btree_ptr_addr(cur, ptr, block);
2749
2750#ifdef DEBUG
2751                for (i = numrecs - ptr; i >= 0; i--) {
2752                        error = xfs_btree_check_ptr(cur, pp, i, level);
2753                        if (error)
2754                                return error;
2755                }
2756#endif
2757
2758                xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
2759                xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
2760
2761#ifdef DEBUG
2762                error = xfs_btree_check_ptr(cur, ptrp, 0, level);
2763                if (error)
2764                        goto error0;
2765#endif
2766
2767                /* Now put the new data in, bump numrecs and log it. */
2768                xfs_btree_copy_keys(cur, kp, &key, 1);
2769                xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
2770                numrecs++;
2771                xfs_btree_set_numrecs(block, numrecs);
2772                xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
2773                xfs_btree_log_keys(cur, bp, ptr, numrecs);
2774#ifdef DEBUG
2775                if (ptr < numrecs) {
2776                        ASSERT(cur->bc_ops->keys_inorder(cur, kp,
2777                                xfs_btree_key_addr(cur, ptr + 1, block)));
2778                }
2779#endif
2780        } else {
2781                /* It's a leaf. make a hole in the records */
2782                union xfs_btree_rec             *rp;
2783
2784                rp = xfs_btree_rec_addr(cur, ptr, block);
2785
2786                xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
2787
2788                /* Now put the new data in, bump numrecs and log it. */
2789                xfs_btree_copy_recs(cur, rp, recp, 1);
2790                xfs_btree_set_numrecs(block, ++numrecs);
2791                xfs_btree_log_recs(cur, bp, ptr, numrecs);
2792#ifdef DEBUG
2793                if (ptr < numrecs) {
2794                        ASSERT(cur->bc_ops->recs_inorder(cur, rp,
2795                                xfs_btree_rec_addr(cur, ptr + 1, block)));
2796                }
2797#endif
2798        }
2799
2800        /* Log the new number of records in the btree header. */
2801        xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
2802
2803        /* If we inserted at the start of a block, update the parents' keys. */
2804        if (optr == 1) {
2805                error = xfs_btree_updkey(cur, &key, level + 1);
2806                if (error)
2807                        goto error0;
2808        }
2809
2810        /*
2811         * If we are tracking the last record in the tree and
2812         * we are at the far right edge of the tree, update it.
2813         */
2814        if (xfs_btree_is_lastrec(cur, block, level)) {
2815                cur->bc_ops->update_lastrec(cur, block, recp,
2816                                            ptr, LASTREC_INSREC);
2817        }
2818
2819        /*
2820         * Return the new block number, if any.
2821         * If there is one, give back a record value and a cursor too.
2822         */
2823        *ptrp = nptr;
2824        if (!xfs_btree_ptr_is_null(cur, &nptr)) {
2825                *recp = nrec;
2826                *curp = ncur;
2827        }
2828
2829        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2830        *stat = 1;
2831        return 0;
2832
2833error0:
2834        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2835        return error;
2836}
2837
2838/*
2839 * Insert the record at the point referenced by cur.
2840 *
2841 * A multi-level split of the tree on insert will invalidate the original
2842 * cursor.  All callers of this function should assume that the cursor is
2843 * no longer valid and revalidate it.
2844 */
2845int
2846xfs_btree_insert(
2847        struct xfs_btree_cur    *cur,
2848        int                     *stat)
2849{
2850        int                     error;  /* error return value */
2851        int                     i;      /* result value, 0 for failure */
2852        int                     level;  /* current level number in btree */
2853        union xfs_btree_ptr     nptr;   /* new block number (split result) */
2854        struct xfs_btree_cur    *ncur;  /* new cursor (split result) */
2855        struct xfs_btree_cur    *pcur;  /* previous level's cursor */
2856        union xfs_btree_rec     rec;    /* record to insert */
2857
2858        level = 0;
2859        ncur = NULL;
2860        pcur = cur;
2861
2862        xfs_btree_set_ptr_null(cur, &nptr);
2863        cur->bc_ops->init_rec_from_cur(cur, &rec);
2864
2865        /*
2866         * Loop going up the tree, starting at the leaf level.
2867         * Stop when we don't get a split block, that must mean that
2868         * the insert is finished with this level.
2869         */
2870        do {
2871                /*
2872                 * Insert nrec/nptr into this level of the tree.
2873                 * Note if we fail, nptr will be null.
2874                 */
2875                error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i);
2876                if (error) {
2877                        if (pcur != cur)
2878                                xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
2879                        goto error0;
2880                }
2881
2882                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2883                level++;
2884
2885                /*
2886                 * See if the cursor we just used is trash.
2887                 * Can't trash the caller's cursor, but otherwise we should
2888                 * if ncur is a new cursor or we're about to be done.
2889                 */
2890                if (pcur != cur &&
2891                    (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
2892                        /* Save the state from the cursor before we trash it */
2893                        if (cur->bc_ops->update_cursor)
2894                                cur->bc_ops->update_cursor(pcur, cur);
2895                        cur->bc_nlevels = pcur->bc_nlevels;
2896                        xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
2897                }
2898                /* If we got a new cursor, switch to it. */
2899                if (ncur) {
2900                        pcur = ncur;
2901                        ncur = NULL;
2902                }
2903        } while (!xfs_btree_ptr_is_null(cur, &nptr));
2904
2905        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2906        *stat = i;
2907        return 0;
2908error0:
2909        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2910        return error;
2911}
2912
2913/*
2914 * Try to merge a non-leaf block back into the inode root.
2915 *
2916 * Note: the killroot names comes from the fact that we're effectively
2917 * killing the old root block.  But because we can't just delete the
2918 * inode we have to copy the single block it was pointing to into the
2919 * inode.
2920 */
2921STATIC int
2922xfs_btree_kill_iroot(
2923        struct xfs_btree_cur    *cur)
2924{
2925        int                     whichfork = cur->bc_private.b.whichfork;
2926        struct xfs_inode        *ip = cur->bc_private.b.ip;
2927        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
2928        struct xfs_btree_block  *block;
2929        struct xfs_btree_block  *cblock;
2930        union xfs_btree_key     *kp;
2931        union xfs_btree_key     *ckp;
2932        union xfs_btree_ptr     *pp;
2933        union xfs_btree_ptr     *cpp;
2934        struct xfs_buf          *cbp;
2935        int                     level;
2936        int                     index;
2937        int                     numrecs;
2938#ifdef DEBUG
2939        union xfs_btree_ptr     ptr;
2940        int                     i;
2941#endif
2942
2943        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2944
2945        ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2946        ASSERT(cur->bc_nlevels > 1);
2947
2948        /*
2949         * Don't deal with the root block needs to be a leaf case.
2950         * We're just going to turn the thing back into extents anyway.
2951         */
2952        level = cur->bc_nlevels - 1;
2953        if (level == 1)
2954                goto out0;
2955
2956        /*
2957         * Give up if the root has multiple children.
2958         */
2959        block = xfs_btree_get_iroot(cur);
2960        if (xfs_btree_get_numrecs(block) != 1)
2961                goto out0;
2962
2963        cblock = xfs_btree_get_block(cur, level - 1, &cbp);
2964        numrecs = xfs_btree_get_numrecs(cblock);
2965
2966        /*
2967         * Only do this if the next level will fit.
2968         * Then the data must be copied up to the inode,
2969         * instead of freeing the root you free the next level.
2970         */
2971        if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
2972                goto out0;
2973
2974        XFS_BTREE_STATS_INC(cur, killroot);
2975
2976#ifdef DEBUG
2977        xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
2978        ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
2979        xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
2980        ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
2981#endif
2982
2983        index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
2984        if (index) {
2985                xfs_iroot_realloc(cur->bc_private.b.ip, index,
2986                                  cur->bc_private.b.whichfork);
2987                block = ifp->if_broot;
2988        }
2989
2990        be16_add_cpu(&block->bb_numrecs, index);
2991        ASSERT(block->bb_numrecs == cblock->bb_numrecs);
2992
2993        kp = xfs_btree_key_addr(cur, 1, block);
2994        ckp = xfs_btree_key_addr(cur, 1, cblock);
2995        xfs_btree_copy_keys(cur, kp, ckp, numrecs);
2996
2997        pp = xfs_btree_ptr_addr(cur, 1, block);
2998        cpp = xfs_btree_ptr_addr(cur, 1, cblock);
2999#ifdef DEBUG
3000        for (i = 0; i < numrecs; i++) {
3001                int             error;
3002
3003                error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
3004                if (error) {
3005                        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3006                        return error;
3007                }
3008        }
3009#endif
3010        xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
3011
3012        cur->bc_ops->free_block(cur, cbp);
3013        XFS_BTREE_STATS_INC(cur, free);
3014
3015        cur->bc_bufs[level - 1] = NULL;
3016        be16_add_cpu(&block->bb_level, -1);
3017        xfs_trans_log_inode(cur->bc_tp, ip,
3018                XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork));
3019        cur->bc_nlevels--;
3020out0:
3021        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3022        return 0;
3023}
3024
3025/*
3026 * Kill the current root node, and replace it with it's only child node.
3027 */
3028STATIC int
3029xfs_btree_kill_root(
3030        struct xfs_btree_cur    *cur,
3031        struct xfs_buf          *bp,
3032        int                     level,
3033        union xfs_btree_ptr     *newroot)
3034{
3035        int                     error;
3036
3037        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3038        XFS_BTREE_STATS_INC(cur, killroot);
3039
3040        /*
3041         * Update the root pointer, decreasing the level by 1 and then
3042         * free the old root.
3043         */
3044        cur->bc_ops->set_root(cur, newroot, -1);
3045
3046        error = cur->bc_ops->free_block(cur, bp);
3047        if (error) {
3048                XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3049                return error;
3050        }
3051
3052        XFS_BTREE_STATS_INC(cur, free);
3053
3054        cur->bc_bufs[level] = NULL;
3055        cur->bc_ra[level] = 0;
3056        cur->bc_nlevels--;
3057
3058        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3059        return 0;
3060}
3061
3062STATIC int
3063xfs_btree_dec_cursor(
3064        struct xfs_btree_cur    *cur,
3065        int                     level,
3066        int                     *stat)
3067{
3068        int                     error;
3069        int                     i;
3070
3071        if (level > 0) {
3072                error = xfs_btree_decrement(cur, level, &i);
3073                if (error)
3074                        return error;
3075        }
3076
3077        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3078        *stat = 1;
3079        return 0;
3080}
3081
3082/*
3083 * Single level of the btree record deletion routine.
3084 * Delete record pointed to by cur/level.
3085 * Remove the record from its block then rebalance the tree.
3086 * Return 0 for error, 1 for done, 2 to go on to the next level.
3087 */
3088STATIC int                                      /* error */
3089xfs_btree_delrec(
3090        struct xfs_btree_cur    *cur,           /* btree cursor */
3091        int                     level,          /* level removing record from */
3092        int                     *stat)          /* fail/done/go-on */
3093{
3094        struct xfs_btree_block  *block;         /* btree block */
3095        union xfs_btree_ptr     cptr;           /* current block ptr */
3096        struct xfs_buf          *bp;            /* buffer for block */
3097        int                     error;          /* error return value */
3098        int                     i;              /* loop counter */
3099        union xfs_btree_key     key;            /* storage for keyp */
3100        union xfs_btree_key     *keyp = &key;   /* passed to the next level */
3101        union xfs_btree_ptr     lptr;           /* left sibling block ptr */
3102        struct xfs_buf          *lbp;           /* left buffer pointer */
3103        struct xfs_btree_block  *left;          /* left btree block */
3104        int                     lrecs = 0;      /* left record count */
3105        int                     ptr;            /* key/record index */
3106        union xfs_btree_ptr     rptr;           /* right sibling block ptr */
3107        struct xfs_buf          *rbp;           /* right buffer pointer */
3108        struct xfs_btree_block  *right;         /* right btree block */
3109        struct xfs_btree_block  *rrblock;       /* right-right btree block */
3110        struct xfs_buf          *rrbp;          /* right-right buffer pointer */
3111        int                     rrecs = 0;      /* right record count */
3112        struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
3113        int                     numrecs;        /* temporary numrec count */
3114
3115        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3116        XFS_BTREE_TRACE_ARGI(cur, level);
3117
3118        tcur = NULL;
3119
3120        /* Get the index of the entry being deleted, check for nothing there. */
3121        ptr = cur->bc_ptrs[level];
3122        if (ptr == 0) {
3123                XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3124                *stat = 0;
3125                return 0;
3126        }
3127
3128        /* Get the buffer & block containing the record or key/ptr. */
3129        block = xfs_btree_get_block(cur, level, &bp);
3130        numrecs = xfs_btree_get_numrecs(block);
3131
3132#ifdef DEBUG
3133        error = xfs_btree_check_block(cur, block, level, bp);
3134        if (error)
3135                goto error0;
3136#endif
3137
3138        /* Fail if we're off the end of the block. */
3139        if (ptr > numrecs) {
3140                XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3141                *stat = 0;
3142                return 0;
3143        }
3144
3145        XFS_BTREE_STATS_INC(cur, delrec);
3146        XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3147
3148        /* Excise the entries being deleted. */
3149        if (level > 0) {
3150                /* It's a nonleaf. operate on keys and ptrs */
3151                union xfs_btree_key     *lkp;
3152                union xfs_btree_ptr     *lpp;
3153
3154                lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3155                lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3156
3157#ifdef DEBUG
3158                for (i = 0; i < numrecs - ptr; i++) {
3159                        error = xfs_btree_check_ptr(cur, lpp, i, level);
3160                        if (error)
3161                                goto error0;
3162                }
3163#endif
3164
3165                if (ptr < numrecs) {
3166                        xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3167                        xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3168                        xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3169                        xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3170                }
3171
3172                /*
3173                 * If it's the first record in the block, we'll need to pass a
3174                 * key up to the next level (updkey).
3175                 */
3176                if (ptr == 1)
3177                        keyp = xfs_btree_key_addr(cur, 1, block);
3178        } else {
3179                /* It's a leaf. operate on records */
3180                if (ptr < numrecs) {
3181                        xfs_btree_shift_recs(cur,
3182                                xfs_btree_rec_addr(cur, ptr + 1, block),
3183                                -1, numrecs - ptr);
3184                        xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3185                }
3186
3187                /*
3188                 * If it's the first record in the block, we'll need a key
3189                 * structure to pass up to the next level (updkey).
3190                 */
3191                if (ptr == 1) {
3192                        cur->bc_ops->init_key_from_rec(&key,
3193                                        xfs_btree_rec_addr(cur, 1, block));
3194                        keyp = &key;
3195                }
3196        }
3197
3198        /*
3199         * Decrement and log the number of entries in the block.
3200         */
3201        xfs_btree_set_numrecs(block, --numrecs);
3202        xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3203
3204        /*
3205         * If we are tracking the last record in the tree and
3206         * we are at the far right edge of the tree, update it.
3207         */
3208        if (xfs_btree_is_lastrec(cur, block, level)) {
3209                cur->bc_ops->update_lastrec(cur, block, NULL,
3210                                            ptr, LASTREC_DELREC);
3211        }
3212
3213        /*
3214         * We're at the root level.  First, shrink the root block in-memory.
3215         * Try to get rid of the next level down.  If we can't then there's
3216         * nothing left to do.
3217         */
3218        if (level == cur->bc_nlevels - 1) {
3219                if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3220                        xfs_iroot_realloc(cur->bc_private.b.ip, -1,
3221                                          cur->bc_private.b.whichfork);
3222
3223                        error = xfs_btree_kill_iroot(cur);
3224                        if (error)
3225                                goto error0;
3226
3227                        error = xfs_btree_dec_cursor(cur, level, stat);
3228                        if (error)
3229                                goto error0;
3230                        *stat = 1;
3231                        return 0;
3232                }
3233
3234                /*
3235                 * If this is the root level, and there's only one entry left,
3236                 * and it's NOT the leaf level, then we can get rid of this
3237                 * level.
3238                 */
3239                if (numrecs == 1 && level > 0) {
3240                        union xfs_btree_ptr     *pp;
3241                        /*
3242                         * pp is still set to the first pointer in the block.
3243                         * Make it the new root of the btree.
3244                         */
3245                        pp = xfs_btree_ptr_addr(cur, 1, block);
3246                        error = xfs_btree_kill_root(cur, bp, level, pp);
3247                        if (error)
3248                                goto error0;
3249                } else if (level > 0) {
3250                        error = xfs_btree_dec_cursor(cur, level, stat);
3251                        if (error)
3252                                goto error0;
3253                }
3254                *stat = 1;
3255                return 0;
3256        }
3257
3258        /*
3259         * If we deleted the leftmost entry in the block, update the
3260         * key values above us in the tree.
3261         */
3262        if (ptr == 1) {
3263                error = xfs_btree_updkey(cur, keyp, level + 1);
3264                if (error)
3265                        goto error0;
3266        }
3267
3268        /*
3269         * If the number of records remaining in the block is at least
3270         * the minimum, we're done.
3271         */
3272        if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3273                error = xfs_btree_dec_cursor(cur, level, stat);
3274                if (error)
3275                        goto error0;
3276                return 0;
3277        }
3278
3279        /*
3280         * Otherwise, we have to move some records around to keep the
3281         * tree balanced.  Look at the left and right sibling blocks to
3282         * see if we can re-balance by moving only one record.
3283         */
3284        xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3285        xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
3286
3287        if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3288                /*
3289                 * One child of root, need to get a chance to copy its contents
3290                 * into the root and delete it. Can't go up to next level,
3291                 * there's nothing to delete there.
3292                 */
3293                if (xfs_btree_ptr_is_null(cur, &rptr) &&
3294                    xfs_btree_ptr_is_null(cur, &lptr) &&
3295                    level == cur->bc_nlevels - 2) {
3296                        error = xfs_btree_kill_iroot(cur);
3297                        if (!error)
3298                                error = xfs_btree_dec_cursor(cur, level, stat);
3299                        if (error)
3300                                goto error0;
3301                        return 0;
3302                }
3303        }
3304
3305        ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
3306               !xfs_btree_ptr_is_null(cur, &lptr));
3307
3308        /*
3309         * Duplicate the cursor so our btree manipulations here won't
3310         * disrupt the next level up.
3311         */
3312        error = xfs_btree_dup_cursor(cur, &tcur);
3313        if (error)
3314                goto error0;
3315
3316        /*
3317         * If there's a right sibling, see if it's ok to shift an entry
3318         * out of it.
3319         */
3320        if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3321                /*
3322                 * Move the temp cursor to the last entry in the next block.
3323                 * Actually any entry but the first would suffice.
3324                 */
3325                i = xfs_btree_lastrec(tcur, level);
3326                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3327
3328                error = xfs_btree_increment(tcur, level, &i);
3329                if (error)
3330                        goto error0;
3331                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3332
3333                i = xfs_btree_lastrec(tcur, level);
3334                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3335
3336                /* Grab a pointer to the block. */
3337                right = xfs_btree_get_block(tcur, level, &rbp);
3338#ifdef DEBUG
3339                error = xfs_btree_check_block(tcur, right, level, rbp);
3340                if (error)
3341                        goto error0;
3342#endif
3343                /* Grab the current block number, for future use. */
3344                xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
3345
3346                /*
3347                 * If right block is full enough so that removing one entry
3348                 * won't make it too empty, and left-shifting an entry out
3349                 * of right to us works, we're done.
3350                 */
3351                if (xfs_btree_get_numrecs(right) - 1 >=
3352                    cur->bc_ops->get_minrecs(tcur, level)) {
3353                        error = xfs_btree_lshift(tcur, level, &i);
3354                        if (error)
3355                                goto error0;
3356                        if (i) {
3357                                ASSERT(xfs_btree_get_numrecs(block) >=
3358                                       cur->bc_ops->get_minrecs(tcur, level));
3359
3360                                xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3361                                tcur = NULL;
3362
3363                                error = xfs_btree_dec_cursor(cur, level, stat);
3364                                if (error)
3365                                        goto error0;
3366                                return 0;
3367                        }
3368                }
3369
3370                /*
3371                 * Otherwise, grab the number of records in right for
3372                 * future reference, and fix up the temp cursor to point
3373                 * to our block again (last record).
3374                 */
3375                rrecs = xfs_btree_get_numrecs(right);
3376                if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3377                        i = xfs_btree_firstrec(tcur, level);
3378                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3379
3380                        error = xfs_btree_decrement(tcur, level, &i);
3381                        if (error)
3382                                goto error0;
3383                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3384                }
3385        }
3386
3387        /*
3388         * If there's a left sibling, see if it's ok to shift an entry
3389         * out of it.
3390         */
3391        if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3392                /*
3393                 * Move the temp cursor to the first entry in the
3394                 * previous block.
3395                 */
3396                i = xfs_btree_firstrec(tcur, level);
3397                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3398
3399                error = xfs_btree_decrement(tcur, level, &i);
3400                if (error)
3401                        goto error0;
3402                i = xfs_btree_firstrec(tcur, level);
3403                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3404
3405                /* Grab a pointer to the block. */
3406                left = xfs_btree_get_block(tcur, level, &lbp);
3407#ifdef DEBUG
3408                error = xfs_btree_check_block(cur, left, level, lbp);
3409                if (error)
3410                        goto error0;
3411#endif
3412                /* Grab the current block number, for future use. */
3413                xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
3414
3415                /*
3416                 * If left block is full enough so that removing one entry
3417                 * won't make it too empty, and right-shifting an entry out
3418                 * of left to us works, we're done.
3419                 */
3420                if (xfs_btree_get_numrecs(left) - 1 >=
3421                    cur->bc_ops->get_minrecs(tcur, level)) {
3422                        error = xfs_btree_rshift(tcur, level, &i);
3423                        if (error)
3424                                goto error0;
3425                        if (i) {
3426                                ASSERT(xfs_btree_get_numrecs(block) >=
3427                                       cur->bc_ops->get_minrecs(tcur, level));
3428                                xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3429                                tcur = NULL;
3430                                if (level == 0)
3431                                        cur->bc_ptrs[0]++;
3432                                XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3433                                *stat = 1;
3434                                return 0;
3435                        }
3436                }
3437
3438                /*
3439                 * Otherwise, grab the number of records in right for
3440                 * future reference.
3441                 */
3442                lrecs = xfs_btree_get_numrecs(left);
3443        }
3444
3445        /* Delete the temp cursor, we're done with it. */
3446        xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3447        tcur = NULL;
3448
3449        /* If here, we need to do a join to keep the tree balanced. */
3450        ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
3451
3452        if (!xfs_btree_ptr_is_null(cur, &lptr) &&
3453            lrecs + xfs_btree_get_numrecs(block) <=
3454                        cur->bc_ops->get_maxrecs(cur, level)) {
3455                /*
3456                 * Set "right" to be the starting block,
3457                 * "left" to be the left neighbor.
3458                 */
3459                rptr = cptr;
3460                right = block;
3461                rbp = bp;
3462                error = xfs_btree_read_buf_block(cur, &lptr, level,
3463                                                        0, &left, &lbp);
3464                if (error)
3465                        goto error0;
3466
3467        /*
3468         * If that won't work, see if we can join with the right neighbor block.
3469         */
3470        } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
3471                   rrecs + xfs_btree_get_numrecs(block) <=
3472                        cur->bc_ops->get_maxrecs(cur, level)) {
3473                /*
3474                 * Set "left" to be the starting block,
3475                 * "right" to be the right neighbor.
3476                 */
3477                lptr = cptr;
3478                left = block;
3479                lbp = bp;
3480                error = xfs_btree_read_buf_block(cur, &rptr, level,
3481                                                        0, &right, &rbp);
3482                if (error)
3483                        goto error0;
3484
3485        /*
3486         * Otherwise, we can't fix the imbalance.
3487         * Just return.  This is probably a logic error, but it's not fatal.
3488         */
3489        } else {
3490                error = xfs_btree_dec_cursor(cur, level, stat);
3491                if (error)
3492                        goto error0;
3493                return 0;
3494        }
3495
3496        rrecs = xfs_btree_get_numrecs(right);
3497        lrecs = xfs_btree_get_numrecs(left);
3498
3499        /*
3500         * We're now going to join "left" and "right" by moving all the stuff
3501         * in "right" to "left" and deleting "right".
3502         */
3503        XFS_BTREE_STATS_ADD(cur, moves, rrecs);
3504        if (level > 0) {
3505                /* It's a non-leaf.  Move keys and pointers. */
3506                union xfs_btree_key     *lkp;   /* left btree key */
3507                union xfs_btree_ptr     *lpp;   /* left address pointer */
3508                union xfs_btree_key     *rkp;   /* right btree key */
3509                union xfs_btree_ptr     *rpp;   /* right address pointer */
3510
3511                lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
3512                lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
3513                rkp = xfs_btree_key_addr(cur, 1, right);
3514                rpp = xfs_btree_ptr_addr(cur, 1, right);
3515#ifdef DEBUG
3516                for (i = 1; i < rrecs; i++) {
3517                        error = xfs_btree_check_ptr(cur, rpp, i, level);
3518                        if (error)
3519                                goto error0;
3520                }
3521#endif
3522                xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
3523                xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
3524
3525                xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
3526                xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
3527        } else {
3528                /* It's a leaf.  Move records.  */
3529                union xfs_btree_rec     *lrp;   /* left record pointer */
3530                union xfs_btree_rec     *rrp;   /* right record pointer */
3531
3532                lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
3533                rrp = xfs_btree_rec_addr(cur, 1, right);
3534
3535                xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
3536                xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
3537        }
3538
3539        XFS_BTREE_STATS_INC(cur, join);
3540
3541        /*
3542         * Fix up the number of records and right block pointer in the
3543         * surviving block, and log it.
3544         */
3545        xfs_btree_set_numrecs(left, lrecs + rrecs);
3546        xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
3547        xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3548        xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
3549
3550        /* If there is a right sibling, point it to the remaining block. */
3551        xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3552        if (!xfs_btree_ptr_is_null(cur, &cptr)) {
3553                error = xfs_btree_read_buf_block(cur, &cptr, level,
3554                                                        0, &rrblock, &rrbp);
3555                if (error)
3556                        goto error0;
3557                xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
3558                xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
3559        }
3560
3561        /* Free the deleted block. */
3562        error = cur->bc_ops->free_block(cur, rbp);
3563        if (error)
3564                goto error0;
3565        XFS_BTREE_STATS_INC(cur, free);
3566
3567        /*
3568         * If we joined with the left neighbor, set the buffer in the
3569         * cursor to the left block, and fix up the index.
3570         */
3571        if (bp != lbp) {
3572                cur->bc_bufs[level] = lbp;
3573                cur->bc_ptrs[level] += lrecs;
3574                cur->bc_ra[level] = 0;
3575        }
3576        /*
3577         * If we joined with the right neighbor and there's a level above
3578         * us, increment the cursor at that level.
3579         */
3580        else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
3581                   (level + 1 < cur->bc_nlevels)) {
3582                error = xfs_btree_increment(cur, level + 1, &i);
3583                if (error)
3584                        goto error0;
3585        }
3586
3587        /*
3588         * Readjust the ptr at this level if it's not a leaf, since it's
3589         * still pointing at the deletion point, which makes the cursor
3590         * inconsistent.  If this makes the ptr 0, the caller fixes it up.
3591         * We can't use decrement because it would change the next level up.
3592         */
3593        if (level > 0)
3594                cur->bc_ptrs[level]--;
3595
3596        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3597        /* Return value means the next level up has something to do. */
3598        *stat = 2;
3599        return 0;
3600
3601error0:
3602        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3603        if (tcur)
3604                xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
3605        return error;
3606}
3607
3608/*
3609 * Delete the record pointed to by cur.
3610 * The cursor refers to the place where the record was (could be inserted)
3611 * when the operation returns.
3612 */
3613int                                     /* error */
3614xfs_btree_delete(
3615        struct xfs_btree_cur    *cur,
3616        int                     *stat)  /* success/failure */
3617{
3618        int                     error;  /* error return value */
3619        int                     level;
3620        int                     i;
3621
3622        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3623
3624        /*
3625         * Go up the tree, starting at leaf level.
3626         *
3627         * If 2 is returned then a join was done; go to the next level.
3628         * Otherwise we are done.
3629         */
3630        for (level = 0, i = 2; i == 2; level++) {
3631                error = xfs_btree_delrec(cur, level, &i);
3632                if (error)
3633                        goto error0;
3634        }
3635
3636        if (i == 0) {
3637                for (level = 1; level < cur->bc_nlevels; level++) {
3638                        if (cur->bc_ptrs[level] == 0) {
3639                                error = xfs_btree_decrement(cur, level, &i);
3640                                if (error)
3641                                        goto error0;
3642                                break;
3643                        }
3644                }
3645        }
3646
3647        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3648        *stat = i;
3649        return 0;
3650error0:
3651        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3652        return error;
3653}
3654
3655/*
3656 * Get the data from the pointed-to record.
3657 */
3658int                                     /* error */
3659xfs_btree_get_rec(
3660        struct xfs_btree_cur    *cur,   /* btree cursor */
3661        union xfs_btree_rec     **recp, /* output: btree record */
3662        int                     *stat)  /* output: success/failure */
3663{
3664        struct xfs_btree_block  *block; /* btree block */
3665        struct xfs_buf          *bp;    /* buffer pointer */
3666        int                     ptr;    /* record number */
3667#ifdef DEBUG
3668        int                     error;  /* error return value */
3669#endif
3670
3671        ptr = cur->bc_ptrs[0];
3672        block = xfs_btree_get_block(cur, 0, &bp);
3673
3674#ifdef DEBUG
3675        error = xfs_btree_check_block(cur, block, 0, bp);
3676        if (error)
3677                return error;
3678#endif
3679
3680        /*
3681         * Off the right end or left end, return failure.
3682         */
3683        if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
3684                *stat = 0;
3685                return 0;
3686        }
3687
3688        /*
3689         * Point to the record and extract its data.
3690         */
3691        *recp = xfs_btree_rec_addr(cur, ptr, block);
3692        *stat = 1;
3693        return 0;
3694}
3695