linux/fs/xfs/xfs_btree.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_types.h"
  21#include "xfs_bit.h"
  22#include "xfs_log.h"
  23#include "xfs_trans.h"
  24#include "xfs_sb.h"
  25#include "xfs_ag.h"
  26#include "xfs_mount.h"
  27#include "xfs_bmap_btree.h"
  28#include "xfs_alloc_btree.h"
  29#include "xfs_ialloc_btree.h"
  30#include "xfs_dinode.h"
  31#include "xfs_inode.h"
  32#include "xfs_inode_item.h"
  33#include "xfs_buf_item.h"
  34#include "xfs_btree.h"
  35#include "xfs_error.h"
  36#include "xfs_trace.h"
  37#include "xfs_cksum.h"
  38
  39/*
  40 * Cursor allocation zone.
  41 */
  42kmem_zone_t     *xfs_btree_cur_zone;
  43
  44/*
  45 * Btree magic numbers.
  46 */
  47static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
  48        { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC },
  49        { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC,
  50          XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC }
  51};
  52#define xfs_btree_magic(cur) \
  53        xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
  54
  55
  56STATIC int                              /* error (0 or EFSCORRUPTED) */
  57xfs_btree_check_lblock(
  58        struct xfs_btree_cur    *cur,   /* btree cursor */
  59        struct xfs_btree_block  *block, /* btree long form block pointer */
  60        int                     level,  /* level of the btree block */
  61        struct xfs_buf          *bp)    /* buffer for block, if any */
  62{
  63        int                     lblock_ok = 1; /* block passes checks */
  64        struct xfs_mount        *mp;    /* file system mount point */
  65
  66        mp = cur->bc_mp;
  67
  68        if (xfs_sb_version_hascrc(&mp->m_sb)) {
  69                lblock_ok = lblock_ok &&
  70                        uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid) &&
  71                        block->bb_u.l.bb_blkno == cpu_to_be64(
  72                                bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
  73        }
  74
  75        lblock_ok = lblock_ok &&
  76                be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
  77                be16_to_cpu(block->bb_level) == level &&
  78                be16_to_cpu(block->bb_numrecs) <=
  79                        cur->bc_ops->get_maxrecs(cur, level) &&
  80                block->bb_u.l.bb_leftsib &&
  81                (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
  82                 XFS_FSB_SANITY_CHECK(mp,
  83                        be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
  84                block->bb_u.l.bb_rightsib &&
  85                (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
  86                 XFS_FSB_SANITY_CHECK(mp,
  87                        be64_to_cpu(block->bb_u.l.bb_rightsib)));
  88
  89        if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
  90                        XFS_ERRTAG_BTREE_CHECK_LBLOCK,
  91                        XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
  92                if (bp)
  93                        trace_xfs_btree_corrupt(bp, _RET_IP_);
  94                XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
  95                return XFS_ERROR(EFSCORRUPTED);
  96        }
  97        return 0;
  98}
  99
 100STATIC int                              /* error (0 or EFSCORRUPTED) */
 101xfs_btree_check_sblock(
 102        struct xfs_btree_cur    *cur,   /* btree cursor */
 103        struct xfs_btree_block  *block, /* btree short form block pointer */
 104        int                     level,  /* level of the btree block */
 105        struct xfs_buf          *bp)    /* buffer containing block */
 106{
 107        struct xfs_mount        *mp;    /* file system mount point */
 108        struct xfs_buf          *agbp;  /* buffer for ag. freespace struct */
 109        struct xfs_agf          *agf;   /* ag. freespace structure */
 110        xfs_agblock_t           agflen; /* native ag. freespace length */
 111        int                     sblock_ok = 1; /* block passes checks */
 112
 113        mp = cur->bc_mp;
 114        agbp = cur->bc_private.a.agbp;
 115        agf = XFS_BUF_TO_AGF(agbp);
 116        agflen = be32_to_cpu(agf->agf_length);
 117
 118        if (xfs_sb_version_hascrc(&mp->m_sb)) {
 119                sblock_ok = sblock_ok &&
 120                        uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid) &&
 121                        block->bb_u.s.bb_blkno == cpu_to_be64(
 122                                bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
 123        }
 124
 125        sblock_ok = sblock_ok &&
 126                be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
 127                be16_to_cpu(block->bb_level) == level &&
 128                be16_to_cpu(block->bb_numrecs) <=
 129                        cur->bc_ops->get_maxrecs(cur, level) &&
 130                (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
 131                 be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
 132                block->bb_u.s.bb_leftsib &&
 133                (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
 134                 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
 135                block->bb_u.s.bb_rightsib;
 136
 137        if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,
 138                        XFS_ERRTAG_BTREE_CHECK_SBLOCK,
 139                        XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
 140                if (bp)
 141                        trace_xfs_btree_corrupt(bp, _RET_IP_);
 142                XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
 143                return XFS_ERROR(EFSCORRUPTED);
 144        }
 145        return 0;
 146}
 147
 148/*
 149 * Debug routine: check that block header is ok.
 150 */
 151int
 152xfs_btree_check_block(
 153        struct xfs_btree_cur    *cur,   /* btree cursor */
 154        struct xfs_btree_block  *block, /* generic btree block pointer */
 155        int                     level,  /* level of the btree block */
 156        struct xfs_buf          *bp)    /* buffer containing block, if any */
 157{
 158        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
 159                return xfs_btree_check_lblock(cur, block, level, bp);
 160        else
 161                return xfs_btree_check_sblock(cur, block, level, bp);
 162}
 163
 164/*
 165 * Check that (long) pointer is ok.
 166 */
 167int                                     /* error (0 or EFSCORRUPTED) */
 168xfs_btree_check_lptr(
 169        struct xfs_btree_cur    *cur,   /* btree cursor */
 170        xfs_dfsbno_t            bno,    /* btree block disk address */
 171        int                     level)  /* btree block level */
 172{
 173        XFS_WANT_CORRUPTED_RETURN(
 174                level > 0 &&
 175                bno != NULLDFSBNO &&
 176                XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
 177        return 0;
 178}
 179
 180#ifdef DEBUG
 181/*
 182 * Check that (short) pointer is ok.
 183 */
 184STATIC int                              /* error (0 or EFSCORRUPTED) */
 185xfs_btree_check_sptr(
 186        struct xfs_btree_cur    *cur,   /* btree cursor */
 187        xfs_agblock_t           bno,    /* btree block disk address */
 188        int                     level)  /* btree block level */
 189{
 190        xfs_agblock_t           agblocks = cur->bc_mp->m_sb.sb_agblocks;
 191
 192        XFS_WANT_CORRUPTED_RETURN(
 193                level > 0 &&
 194                bno != NULLAGBLOCK &&
 195                bno != 0 &&
 196                bno < agblocks);
 197        return 0;
 198}
 199
 200/*
 201 * Check that block ptr is ok.
 202 */
 203STATIC int                              /* error (0 or EFSCORRUPTED) */
 204xfs_btree_check_ptr(
 205        struct xfs_btree_cur    *cur,   /* btree cursor */
 206        union xfs_btree_ptr     *ptr,   /* btree block disk address */
 207        int                     index,  /* offset from ptr to check */
 208        int                     level)  /* btree block level */
 209{
 210        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
 211                return xfs_btree_check_lptr(cur,
 212                                be64_to_cpu((&ptr->l)[index]), level);
 213        } else {
 214                return xfs_btree_check_sptr(cur,
 215                                be32_to_cpu((&ptr->s)[index]), level);
 216        }
 217}
 218#endif
 219
 220/*
 221 * Calculate CRC on the whole btree block and stuff it into the
 222 * long-form btree header.
 223 *
 224 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
 225 * it into the buffer so recovery knows what the last modifcation was that made
 226 * it to disk.
 227 */
 228void
 229xfs_btree_lblock_calc_crc(
 230        struct xfs_buf          *bp)
 231{
 232        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
 233        struct xfs_buf_log_item *bip = bp->b_fspriv;
 234
 235        if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
 236                return;
 237        if (bip)
 238                block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
 239        xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
 240                         XFS_BTREE_LBLOCK_CRC_OFF);
 241}
 242
 243bool
 244xfs_btree_lblock_verify_crc(
 245        struct xfs_buf          *bp)
 246{
 247        if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
 248                return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
 249                                        XFS_BTREE_LBLOCK_CRC_OFF);
 250        return true;
 251}
 252
 253/*
 254 * Calculate CRC on the whole btree block and stuff it into the
 255 * short-form btree header.
 256 *
 257 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
 258 * it into the buffer so recovery knows what the last modifcation was that made
 259 * it to disk.
 260 */
 261void
 262xfs_btree_sblock_calc_crc(
 263        struct xfs_buf          *bp)
 264{
 265        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
 266        struct xfs_buf_log_item *bip = bp->b_fspriv;
 267
 268        if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
 269                return;
 270        if (bip)
 271                block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
 272        xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
 273                         XFS_BTREE_SBLOCK_CRC_OFF);
 274}
 275
 276bool
 277xfs_btree_sblock_verify_crc(
 278        struct xfs_buf          *bp)
 279{
 280        if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
 281                return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
 282                                        XFS_BTREE_SBLOCK_CRC_OFF);
 283        return true;
 284}
 285
 286/*
 287 * Delete the btree cursor.
 288 */
 289void
 290xfs_btree_del_cursor(
 291        xfs_btree_cur_t *cur,           /* btree cursor */
 292        int             error)          /* del because of error */
 293{
 294        int             i;              /* btree level */
 295
 296        /*
 297         * Clear the buffer pointers, and release the buffers.
 298         * If we're doing this in the face of an error, we
 299         * need to make sure to inspect all of the entries
 300         * in the bc_bufs array for buffers to be unlocked.
 301         * This is because some of the btree code works from
 302         * level n down to 0, and if we get an error along
 303         * the way we won't have initialized all the entries
 304         * down to 0.
 305         */
 306        for (i = 0; i < cur->bc_nlevels; i++) {
 307                if (cur->bc_bufs[i])
 308                        xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
 309                else if (!error)
 310                        break;
 311        }
 312        /*
 313         * Can't free a bmap cursor without having dealt with the
 314         * allocated indirect blocks' accounting.
 315         */
 316        ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
 317               cur->bc_private.b.allocated == 0);
 318        /*
 319         * Free the cursor.
 320         */
 321        kmem_zone_free(xfs_btree_cur_zone, cur);
 322}
 323
 324/*
 325 * Duplicate the btree cursor.
 326 * Allocate a new one, copy the record, re-get the buffers.
 327 */
 328int                                     /* error */
 329xfs_btree_dup_cursor(
 330        xfs_btree_cur_t *cur,           /* input cursor */
 331        xfs_btree_cur_t **ncur)         /* output cursor */
 332{
 333        xfs_buf_t       *bp;            /* btree block's buffer pointer */
 334        int             error;          /* error return value */
 335        int             i;              /* level number of btree block */
 336        xfs_mount_t     *mp;            /* mount structure for filesystem */
 337        xfs_btree_cur_t *new;           /* new cursor value */
 338        xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
 339
 340        tp = cur->bc_tp;
 341        mp = cur->bc_mp;
 342
 343        /*
 344         * Allocate a new cursor like the old one.
 345         */
 346        new = cur->bc_ops->dup_cursor(cur);
 347
 348        /*
 349         * Copy the record currently in the cursor.
 350         */
 351        new->bc_rec = cur->bc_rec;
 352
 353        /*
 354         * For each level current, re-get the buffer and copy the ptr value.
 355         */
 356        for (i = 0; i < new->bc_nlevels; i++) {
 357                new->bc_ptrs[i] = cur->bc_ptrs[i];
 358                new->bc_ra[i] = cur->bc_ra[i];
 359                bp = cur->bc_bufs[i];
 360                if (bp) {
 361                        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
 362                                                   XFS_BUF_ADDR(bp), mp->m_bsize,
 363                                                   0, &bp,
 364                                                   cur->bc_ops->buf_ops);
 365                        if (error) {
 366                                xfs_btree_del_cursor(new, error);
 367                                *ncur = NULL;
 368                                return error;
 369                        }
 370                }
 371                new->bc_bufs[i] = bp;
 372        }
 373        *ncur = new;
 374        return 0;
 375}
 376
 377/*
 378 * XFS btree block layout and addressing:
 379 *
 380 * There are two types of blocks in the btree: leaf and non-leaf blocks.
 381 *
 382 * The leaf record start with a header then followed by records containing
 383 * the values.  A non-leaf block also starts with the same header, and
 384 * then first contains lookup keys followed by an equal number of pointers
 385 * to the btree blocks at the previous level.
 386 *
 387 *              +--------+-------+-------+-------+-------+-------+-------+
 388 * Leaf:        | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
 389 *              +--------+-------+-------+-------+-------+-------+-------+
 390 *
 391 *              +--------+-------+-------+-------+-------+-------+-------+
 392 * Non-Leaf:    | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
 393 *              +--------+-------+-------+-------+-------+-------+-------+
 394 *
 395 * The header is called struct xfs_btree_block for reasons better left unknown
 396 * and comes in different versions for short (32bit) and long (64bit) block
 397 * pointers.  The record and key structures are defined by the btree instances
 398 * and opaque to the btree core.  The block pointers are simple disk endian
 399 * integers, available in a short (32bit) and long (64bit) variant.
 400 *
 401 * The helpers below calculate the offset of a given record, key or pointer
 402 * into a btree block (xfs_btree_*_offset) or return a pointer to the given
 403 * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
 404 * inside the btree block is done using indices starting at one, not zero!
 405 */
 406
 407/*
 408 * Return size of the btree block header for this btree instance.
 409 */
 410static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
 411{
 412        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
 413                if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
 414                        return XFS_BTREE_LBLOCK_CRC_LEN;
 415                return XFS_BTREE_LBLOCK_LEN;
 416        }
 417        if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
 418                return XFS_BTREE_SBLOCK_CRC_LEN;
 419        return XFS_BTREE_SBLOCK_LEN;
 420}
 421
 422/*
 423 * Return size of btree block pointers for this btree instance.
 424 */
 425static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
 426{
 427        return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
 428                sizeof(__be64) : sizeof(__be32);
 429}
 430
 431/*
 432 * Calculate offset of the n-th record in a btree block.
 433 */
 434STATIC size_t
 435xfs_btree_rec_offset(
 436        struct xfs_btree_cur    *cur,
 437        int                     n)
 438{
 439        return xfs_btree_block_len(cur) +
 440                (n - 1) * cur->bc_ops->rec_len;
 441}
 442
 443/*
 444 * Calculate offset of the n-th key in a btree block.
 445 */
 446STATIC size_t
 447xfs_btree_key_offset(
 448        struct xfs_btree_cur    *cur,
 449        int                     n)
 450{
 451        return xfs_btree_block_len(cur) +
 452                (n - 1) * cur->bc_ops->key_len;
 453}
 454
 455/*
 456 * Calculate offset of the n-th block pointer in a btree block.
 457 */
 458STATIC size_t
 459xfs_btree_ptr_offset(
 460        struct xfs_btree_cur    *cur,
 461        int                     n,
 462        int                     level)
 463{
 464        return xfs_btree_block_len(cur) +
 465                cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
 466                (n - 1) * xfs_btree_ptr_len(cur);
 467}
 468
 469/*
 470 * Return a pointer to the n-th record in the btree block.
 471 */
 472STATIC union xfs_btree_rec *
 473xfs_btree_rec_addr(
 474        struct xfs_btree_cur    *cur,
 475        int                     n,
 476        struct xfs_btree_block  *block)
 477{
 478        return (union xfs_btree_rec *)
 479                ((char *)block + xfs_btree_rec_offset(cur, n));
 480}
 481
 482/*
 483 * Return a pointer to the n-th key in the btree block.
 484 */
 485STATIC union xfs_btree_key *
 486xfs_btree_key_addr(
 487        struct xfs_btree_cur    *cur,
 488        int                     n,
 489        struct xfs_btree_block  *block)
 490{
 491        return (union xfs_btree_key *)
 492                ((char *)block + xfs_btree_key_offset(cur, n));
 493}
 494
 495/*
 496 * Return a pointer to the n-th block pointer in the btree block.
 497 */
 498STATIC union xfs_btree_ptr *
 499xfs_btree_ptr_addr(
 500        struct xfs_btree_cur    *cur,
 501        int                     n,
 502        struct xfs_btree_block  *block)
 503{
 504        int                     level = xfs_btree_get_level(block);
 505
 506        ASSERT(block->bb_level != 0);
 507
 508        return (union xfs_btree_ptr *)
 509                ((char *)block + xfs_btree_ptr_offset(cur, n, level));
 510}
 511
 512/*
 513 * Get the root block which is stored in the inode.
 514 *
 515 * For now this btree implementation assumes the btree root is always
 516 * stored in the if_broot field of an inode fork.
 517 */
 518STATIC struct xfs_btree_block *
 519xfs_btree_get_iroot(
 520       struct xfs_btree_cur    *cur)
 521{
 522       struct xfs_ifork        *ifp;
 523
 524       ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
 525       return (struct xfs_btree_block *)ifp->if_broot;
 526}
 527
 528/*
 529 * Retrieve the block pointer from the cursor at the given level.
 530 * This may be an inode btree root or from a buffer.
 531 */
 532STATIC struct xfs_btree_block *         /* generic btree block pointer */
 533xfs_btree_get_block(
 534        struct xfs_btree_cur    *cur,   /* btree cursor */
 535        int                     level,  /* level in btree */
 536        struct xfs_buf          **bpp)  /* buffer containing the block */
 537{
 538        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
 539            (level == cur->bc_nlevels - 1)) {
 540                *bpp = NULL;
 541                return xfs_btree_get_iroot(cur);
 542        }
 543
 544        *bpp = cur->bc_bufs[level];
 545        return XFS_BUF_TO_BLOCK(*bpp);
 546}
 547
 548/*
 549 * Get a buffer for the block, return it with no data read.
 550 * Long-form addressing.
 551 */
 552xfs_buf_t *                             /* buffer for fsbno */
 553xfs_btree_get_bufl(
 554        xfs_mount_t     *mp,            /* file system mount point */
 555        xfs_trans_t     *tp,            /* transaction pointer */
 556        xfs_fsblock_t   fsbno,          /* file system block number */
 557        uint            lock)           /* lock flags for get_buf */
 558{
 559        xfs_buf_t       *bp;            /* buffer pointer (return value) */
 560        xfs_daddr_t             d;              /* real disk block address */
 561
 562        ASSERT(fsbno != NULLFSBLOCK);
 563        d = XFS_FSB_TO_DADDR(mp, fsbno);
 564        bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
 565        ASSERT(!xfs_buf_geterror(bp));
 566        return bp;
 567}
 568
 569/*
 570 * Get a buffer for the block, return it with no data read.
 571 * Short-form addressing.
 572 */
 573xfs_buf_t *                             /* buffer for agno/agbno */
 574xfs_btree_get_bufs(
 575        xfs_mount_t     *mp,            /* file system mount point */
 576        xfs_trans_t     *tp,            /* transaction pointer */
 577        xfs_agnumber_t  agno,           /* allocation group number */
 578        xfs_agblock_t   agbno,          /* allocation group block number */
 579        uint            lock)           /* lock flags for get_buf */
 580{
 581        xfs_buf_t       *bp;            /* buffer pointer (return value) */
 582        xfs_daddr_t             d;              /* real disk block address */
 583
 584        ASSERT(agno != NULLAGNUMBER);
 585        ASSERT(agbno != NULLAGBLOCK);
 586        d = XFS_AGB_TO_DADDR(mp, agno, agbno);
 587        bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
 588        ASSERT(!xfs_buf_geterror(bp));
 589        return bp;
 590}
 591
 592/*
 593 * Check for the cursor referring to the last block at the given level.
 594 */
 595int                                     /* 1=is last block, 0=not last block */
 596xfs_btree_islastblock(
 597        xfs_btree_cur_t         *cur,   /* btree cursor */
 598        int                     level)  /* level to check */
 599{
 600        struct xfs_btree_block  *block; /* generic btree block pointer */
 601        xfs_buf_t               *bp;    /* buffer containing block */
 602
 603        block = xfs_btree_get_block(cur, level, &bp);
 604        xfs_btree_check_block(cur, block, level, bp);
 605        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
 606                return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO);
 607        else
 608                return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
 609}
 610
 611/*
 612 * Change the cursor to point to the first record at the given level.
 613 * Other levels are unaffected.
 614 */
 615STATIC int                              /* success=1, failure=0 */
 616xfs_btree_firstrec(
 617        xfs_btree_cur_t         *cur,   /* btree cursor */
 618        int                     level)  /* level to change */
 619{
 620        struct xfs_btree_block  *block; /* generic btree block pointer */
 621        xfs_buf_t               *bp;    /* buffer containing block */
 622
 623        /*
 624         * Get the block pointer for this level.
 625         */
 626        block = xfs_btree_get_block(cur, level, &bp);
 627        xfs_btree_check_block(cur, block, level, bp);
 628        /*
 629         * It's empty, there is no such record.
 630         */
 631        if (!block->bb_numrecs)
 632                return 0;
 633        /*
 634         * Set the ptr value to 1, that's the first record/key.
 635         */
 636        cur->bc_ptrs[level] = 1;
 637        return 1;
 638}
 639
 640/*
 641 * Change the cursor to point to the last record in the current block
 642 * at the given level.  Other levels are unaffected.
 643 */
 644STATIC int                              /* success=1, failure=0 */
 645xfs_btree_lastrec(
 646        xfs_btree_cur_t         *cur,   /* btree cursor */
 647        int                     level)  /* level to change */
 648{
 649        struct xfs_btree_block  *block; /* generic btree block pointer */
 650        xfs_buf_t               *bp;    /* buffer containing block */
 651
 652        /*
 653         * Get the block pointer for this level.
 654         */
 655        block = xfs_btree_get_block(cur, level, &bp);
 656        xfs_btree_check_block(cur, block, level, bp);
 657        /*
 658         * It's empty, there is no such record.
 659         */
 660        if (!block->bb_numrecs)
 661                return 0;
 662        /*
 663         * Set the ptr value to numrecs, that's the last record/key.
 664         */
 665        cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
 666        return 1;
 667}
 668
 669/*
 670 * Compute first and last byte offsets for the fields given.
 671 * Interprets the offsets table, which contains struct field offsets.
 672 */
 673void
 674xfs_btree_offsets(
 675        __int64_t       fields,         /* bitmask of fields */
 676        const short     *offsets,       /* table of field offsets */
 677        int             nbits,          /* number of bits to inspect */
 678        int             *first,         /* output: first byte offset */
 679        int             *last)          /* output: last byte offset */
 680{
 681        int             i;              /* current bit number */
 682        __int64_t       imask;          /* mask for current bit number */
 683
 684        ASSERT(fields != 0);
 685        /*
 686         * Find the lowest bit, so the first byte offset.
 687         */
 688        for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
 689                if (imask & fields) {
 690                        *first = offsets[i];
 691                        break;
 692                }
 693        }
 694        /*
 695         * Find the highest bit, so the last byte offset.
 696         */
 697        for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
 698                if (imask & fields) {
 699                        *last = offsets[i + 1] - 1;
 700                        break;
 701                }
 702        }
 703}
 704
 705/*
 706 * Get a buffer for the block, return it read in.
 707 * Long-form addressing.
 708 */
 709int
 710xfs_btree_read_bufl(
 711        struct xfs_mount        *mp,            /* file system mount point */
 712        struct xfs_trans        *tp,            /* transaction pointer */
 713        xfs_fsblock_t           fsbno,          /* file system block number */
 714        uint                    lock,           /* lock flags for read_buf */
 715        struct xfs_buf          **bpp,          /* buffer for fsbno */
 716        int                     refval,         /* ref count value for buffer */
 717        const struct xfs_buf_ops *ops)
 718{
 719        struct xfs_buf          *bp;            /* return value */
 720        xfs_daddr_t             d;              /* real disk block address */
 721        int                     error;
 722
 723        ASSERT(fsbno != NULLFSBLOCK);
 724        d = XFS_FSB_TO_DADDR(mp, fsbno);
 725        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
 726                                   mp->m_bsize, lock, &bp, ops);
 727        if (error)
 728                return error;
 729        ASSERT(!xfs_buf_geterror(bp));
 730        if (bp)
 731                xfs_buf_set_ref(bp, refval);
 732        *bpp = bp;
 733        return 0;
 734}
 735
 736/*
 737 * Read-ahead the block, don't wait for it, don't return a buffer.
 738 * Long-form addressing.
 739 */
 740/* ARGSUSED */
 741void
 742xfs_btree_reada_bufl(
 743        struct xfs_mount        *mp,            /* file system mount point */
 744        xfs_fsblock_t           fsbno,          /* file system block number */
 745        xfs_extlen_t            count,          /* count of filesystem blocks */
 746        const struct xfs_buf_ops *ops)
 747{
 748        xfs_daddr_t             d;
 749
 750        ASSERT(fsbno != NULLFSBLOCK);
 751        d = XFS_FSB_TO_DADDR(mp, fsbno);
 752        xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
 753}
 754
 755/*
 756 * Read-ahead the block, don't wait for it, don't return a buffer.
 757 * Short-form addressing.
 758 */
 759/* ARGSUSED */
 760void
 761xfs_btree_reada_bufs(
 762        struct xfs_mount        *mp,            /* file system mount point */
 763        xfs_agnumber_t          agno,           /* allocation group number */
 764        xfs_agblock_t           agbno,          /* allocation group block number */
 765        xfs_extlen_t            count,          /* count of filesystem blocks */
 766        const struct xfs_buf_ops *ops)
 767{
 768        xfs_daddr_t             d;
 769
 770        ASSERT(agno != NULLAGNUMBER);
 771        ASSERT(agbno != NULLAGBLOCK);
 772        d = XFS_AGB_TO_DADDR(mp, agno, agbno);
 773        xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
 774}
 775
 776STATIC int
 777xfs_btree_readahead_lblock(
 778        struct xfs_btree_cur    *cur,
 779        int                     lr,
 780        struct xfs_btree_block  *block)
 781{
 782        int                     rval = 0;
 783        xfs_dfsbno_t            left = be64_to_cpu(block->bb_u.l.bb_leftsib);
 784        xfs_dfsbno_t            right = be64_to_cpu(block->bb_u.l.bb_rightsib);
 785
 786        if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
 787                xfs_btree_reada_bufl(cur->bc_mp, left, 1,
 788                                     cur->bc_ops->buf_ops);
 789                rval++;
 790        }
 791
 792        if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
 793                xfs_btree_reada_bufl(cur->bc_mp, right, 1,
 794                                     cur->bc_ops->buf_ops);
 795                rval++;
 796        }
 797
 798        return rval;
 799}
 800
 801STATIC int
 802xfs_btree_readahead_sblock(
 803        struct xfs_btree_cur    *cur,
 804        int                     lr,
 805        struct xfs_btree_block *block)
 806{
 807        int                     rval = 0;
 808        xfs_agblock_t           left = be32_to_cpu(block->bb_u.s.bb_leftsib);
 809        xfs_agblock_t           right = be32_to_cpu(block->bb_u.s.bb_rightsib);
 810
 811
 812        if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
 813                xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
 814                                     left, 1, cur->bc_ops->buf_ops);
 815                rval++;
 816        }
 817
 818        if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
 819                xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
 820                                     right, 1, cur->bc_ops->buf_ops);
 821                rval++;
 822        }
 823
 824        return rval;
 825}
 826
 827/*
 828 * Read-ahead btree blocks, at the given level.
 829 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
 830 */
 831STATIC int
 832xfs_btree_readahead(
 833        struct xfs_btree_cur    *cur,           /* btree cursor */
 834        int                     lev,            /* level in btree */
 835        int                     lr)             /* left/right bits */
 836{
 837        struct xfs_btree_block  *block;
 838
 839        /*
 840         * No readahead needed if we are at the root level and the
 841         * btree root is stored in the inode.
 842         */
 843        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
 844            (lev == cur->bc_nlevels - 1))
 845                return 0;
 846
 847        if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
 848                return 0;
 849
 850        cur->bc_ra[lev] |= lr;
 851        block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
 852
 853        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
 854                return xfs_btree_readahead_lblock(cur, lr, block);
 855        return xfs_btree_readahead_sblock(cur, lr, block);
 856}
 857
 858STATIC xfs_daddr_t
 859xfs_btree_ptr_to_daddr(
 860        struct xfs_btree_cur    *cur,
 861        union xfs_btree_ptr     *ptr)
 862{
 863        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
 864                ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
 865
 866                return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
 867        } else {
 868                ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
 869                ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
 870
 871                return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
 872                                        be32_to_cpu(ptr->s));
 873        }
 874}
 875
 876/*
 877 * Readahead @count btree blocks at the given @ptr location.
 878 *
 879 * We don't need to care about long or short form btrees here as we have a
 880 * method of converting the ptr directly to a daddr available to us.
 881 */
 882STATIC void
 883xfs_btree_readahead_ptr(
 884        struct xfs_btree_cur    *cur,
 885        union xfs_btree_ptr     *ptr,
 886        xfs_extlen_t            count)
 887{
 888        xfs_buf_readahead(cur->bc_mp->m_ddev_targp,
 889                          xfs_btree_ptr_to_daddr(cur, ptr),
 890                          cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops);
 891}
 892
 893/*
 894 * Set the buffer for level "lev" in the cursor to bp, releasing
 895 * any previous buffer.
 896 */
 897STATIC void
 898xfs_btree_setbuf(
 899        xfs_btree_cur_t         *cur,   /* btree cursor */
 900        int                     lev,    /* level in btree */
 901        xfs_buf_t               *bp)    /* new buffer to set */
 902{
 903        struct xfs_btree_block  *b;     /* btree block */
 904
 905        if (cur->bc_bufs[lev])
 906                xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]);
 907        cur->bc_bufs[lev] = bp;
 908        cur->bc_ra[lev] = 0;
 909
 910        b = XFS_BUF_TO_BLOCK(bp);
 911        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
 912                if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO))
 913                        cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
 914                if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO))
 915                        cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
 916        } else {
 917                if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
 918                        cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
 919                if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
 920                        cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
 921        }
 922}
 923
 924STATIC int
 925xfs_btree_ptr_is_null(
 926        struct xfs_btree_cur    *cur,
 927        union xfs_btree_ptr     *ptr)
 928{
 929        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
 930                return ptr->l == cpu_to_be64(NULLDFSBNO);
 931        else
 932                return ptr->s == cpu_to_be32(NULLAGBLOCK);
 933}
 934
 935STATIC void
 936xfs_btree_set_ptr_null(
 937        struct xfs_btree_cur    *cur,
 938        union xfs_btree_ptr     *ptr)
 939{
 940        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
 941                ptr->l = cpu_to_be64(NULLDFSBNO);
 942        else
 943                ptr->s = cpu_to_be32(NULLAGBLOCK);
 944}
 945
 946/*
 947 * Get/set/init sibling pointers
 948 */
 949STATIC void
 950xfs_btree_get_sibling(
 951        struct xfs_btree_cur    *cur,
 952        struct xfs_btree_block  *block,
 953        union xfs_btree_ptr     *ptr,
 954        int                     lr)
 955{
 956        ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
 957
 958        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
 959                if (lr == XFS_BB_RIGHTSIB)
 960                        ptr->l = block->bb_u.l.bb_rightsib;
 961                else
 962                        ptr->l = block->bb_u.l.bb_leftsib;
 963        } else {
 964                if (lr == XFS_BB_RIGHTSIB)
 965                        ptr->s = block->bb_u.s.bb_rightsib;
 966                else
 967                        ptr->s = block->bb_u.s.bb_leftsib;
 968        }
 969}
 970
 971STATIC void
 972xfs_btree_set_sibling(
 973        struct xfs_btree_cur    *cur,
 974        struct xfs_btree_block  *block,
 975        union xfs_btree_ptr     *ptr,
 976        int                     lr)
 977{
 978        ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
 979
 980        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
 981                if (lr == XFS_BB_RIGHTSIB)
 982                        block->bb_u.l.bb_rightsib = ptr->l;
 983                else
 984                        block->bb_u.l.bb_leftsib = ptr->l;
 985        } else {
 986                if (lr == XFS_BB_RIGHTSIB)
 987                        block->bb_u.s.bb_rightsib = ptr->s;
 988                else
 989                        block->bb_u.s.bb_leftsib = ptr->s;
 990        }
 991}
 992
 993void
 994xfs_btree_init_block_int(
 995        struct xfs_mount        *mp,
 996        struct xfs_btree_block  *buf,
 997        xfs_daddr_t             blkno,
 998        __u32                   magic,
 999        __u16                   level,
1000        __u16                   numrecs,
1001        __u64                   owner,
1002        unsigned int            flags)
1003{
1004        buf->bb_magic = cpu_to_be32(magic);
1005        buf->bb_level = cpu_to_be16(level);
1006        buf->bb_numrecs = cpu_to_be16(numrecs);
1007
1008        if (flags & XFS_BTREE_LONG_PTRS) {
1009                buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
1010                buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
1011                if (flags & XFS_BTREE_CRC_BLOCKS) {
1012                        buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
1013                        buf->bb_u.l.bb_owner = cpu_to_be64(owner);
1014                        uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid);
1015                        buf->bb_u.l.bb_pad = 0;
1016                        buf->bb_u.l.bb_lsn = 0;
1017                }
1018        } else {
1019                /* owner is a 32 bit value on short blocks */
1020                __u32 __owner = (__u32)owner;
1021
1022                buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1023                buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1024                if (flags & XFS_BTREE_CRC_BLOCKS) {
1025                        buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
1026                        buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
1027                        uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid);
1028                        buf->bb_u.s.bb_lsn = 0;
1029                }
1030        }
1031}
1032
1033void
1034xfs_btree_init_block(
1035        struct xfs_mount *mp,
1036        struct xfs_buf  *bp,
1037        __u32           magic,
1038        __u16           level,
1039        __u16           numrecs,
1040        __u64           owner,
1041        unsigned int    flags)
1042{
1043        xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
1044                                 magic, level, numrecs, owner, flags);
1045}
1046
1047STATIC void
1048xfs_btree_init_block_cur(
1049        struct xfs_btree_cur    *cur,
1050        struct xfs_buf          *bp,
1051        int                     level,
1052        int                     numrecs)
1053{
1054        __u64 owner;
1055
1056        /*
1057         * we can pull the owner from the cursor right now as the different
1058         * owners align directly with the pointer size of the btree. This may
1059         * change in future, but is safe for current users of the generic btree
1060         * code.
1061         */
1062        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1063                owner = cur->bc_private.b.ip->i_ino;
1064        else
1065                owner = cur->bc_private.a.agno;
1066
1067        xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
1068                                 xfs_btree_magic(cur), level, numrecs,
1069                                 owner, cur->bc_flags);
1070}
1071
1072/*
1073 * Return true if ptr is the last record in the btree and
1074 * we need to track updates to this record.  The decision
1075 * will be further refined in the update_lastrec method.
1076 */
1077STATIC int
1078xfs_btree_is_lastrec(
1079        struct xfs_btree_cur    *cur,
1080        struct xfs_btree_block  *block,
1081        int                     level)
1082{
1083        union xfs_btree_ptr     ptr;
1084
1085        if (level > 0)
1086                return 0;
1087        if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
1088                return 0;
1089
1090        xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1091        if (!xfs_btree_ptr_is_null(cur, &ptr))
1092                return 0;
1093        return 1;
1094}
1095
1096STATIC void
1097xfs_btree_buf_to_ptr(
1098        struct xfs_btree_cur    *cur,
1099        struct xfs_buf          *bp,
1100        union xfs_btree_ptr     *ptr)
1101{
1102        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1103                ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
1104                                        XFS_BUF_ADDR(bp)));
1105        else {
1106                ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
1107                                        XFS_BUF_ADDR(bp)));
1108        }
1109}
1110
1111STATIC void
1112xfs_btree_set_refs(
1113        struct xfs_btree_cur    *cur,
1114        struct xfs_buf          *bp)
1115{
1116        switch (cur->bc_btnum) {
1117        case XFS_BTNUM_BNO:
1118        case XFS_BTNUM_CNT:
1119                xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
1120                break;
1121        case XFS_BTNUM_INO:
1122                xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
1123                break;
1124        case XFS_BTNUM_BMAP:
1125                xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
1126                break;
1127        default:
1128                ASSERT(0);
1129        }
1130}
1131
1132STATIC int
1133xfs_btree_get_buf_block(
1134        struct xfs_btree_cur    *cur,
1135        union xfs_btree_ptr     *ptr,
1136        int                     flags,
1137        struct xfs_btree_block  **block,
1138        struct xfs_buf          **bpp)
1139{
1140        struct xfs_mount        *mp = cur->bc_mp;
1141        xfs_daddr_t             d;
1142
1143        /* need to sort out how callers deal with failures first */
1144        ASSERT(!(flags & XBF_TRYLOCK));
1145
1146        d = xfs_btree_ptr_to_daddr(cur, ptr);
1147        *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
1148                                 mp->m_bsize, flags);
1149
1150        if (!*bpp)
1151                return ENOMEM;
1152
1153        (*bpp)->b_ops = cur->bc_ops->buf_ops;
1154        *block = XFS_BUF_TO_BLOCK(*bpp);
1155        return 0;
1156}
1157
1158/*
1159 * Read in the buffer at the given ptr and return the buffer and
1160 * the block pointer within the buffer.
1161 */
1162STATIC int
1163xfs_btree_read_buf_block(
1164        struct xfs_btree_cur    *cur,
1165        union xfs_btree_ptr     *ptr,
1166        int                     level,
1167        int                     flags,
1168        struct xfs_btree_block  **block,
1169        struct xfs_buf          **bpp)
1170{
1171        struct xfs_mount        *mp = cur->bc_mp;
1172        xfs_daddr_t             d;
1173        int                     error;
1174
1175        /* need to sort out how callers deal with failures first */
1176        ASSERT(!(flags & XBF_TRYLOCK));
1177
1178        d = xfs_btree_ptr_to_daddr(cur, ptr);
1179        error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
1180                                   mp->m_bsize, flags, bpp,
1181                                   cur->bc_ops->buf_ops);
1182        if (error)
1183                return error;
1184
1185        ASSERT(!xfs_buf_geterror(*bpp));
1186        xfs_btree_set_refs(cur, *bpp);
1187        *block = XFS_BUF_TO_BLOCK(*bpp);
1188        return 0;
1189}
1190
1191/*
1192 * Copy keys from one btree block to another.
1193 */
1194STATIC void
1195xfs_btree_copy_keys(
1196        struct xfs_btree_cur    *cur,
1197        union xfs_btree_key     *dst_key,
1198        union xfs_btree_key     *src_key,
1199        int                     numkeys)
1200{
1201        ASSERT(numkeys >= 0);
1202        memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1203}
1204
1205/*
1206 * Copy records from one btree block to another.
1207 */
1208STATIC void
1209xfs_btree_copy_recs(
1210        struct xfs_btree_cur    *cur,
1211        union xfs_btree_rec     *dst_rec,
1212        union xfs_btree_rec     *src_rec,
1213        int                     numrecs)
1214{
1215        ASSERT(numrecs >= 0);
1216        memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1217}
1218
1219/*
1220 * Copy block pointers from one btree block to another.
1221 */
1222STATIC void
1223xfs_btree_copy_ptrs(
1224        struct xfs_btree_cur    *cur,
1225        union xfs_btree_ptr     *dst_ptr,
1226        union xfs_btree_ptr     *src_ptr,
1227        int                     numptrs)
1228{
1229        ASSERT(numptrs >= 0);
1230        memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1231}
1232
1233/*
1234 * Shift keys one index left/right inside a single btree block.
1235 */
1236STATIC void
1237xfs_btree_shift_keys(
1238        struct xfs_btree_cur    *cur,
1239        union xfs_btree_key     *key,
1240        int                     dir,
1241        int                     numkeys)
1242{
1243        char                    *dst_key;
1244
1245        ASSERT(numkeys >= 0);
1246        ASSERT(dir == 1 || dir == -1);
1247
1248        dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1249        memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1250}
1251
1252/*
1253 * Shift records one index left/right inside a single btree block.
1254 */
1255STATIC void
1256xfs_btree_shift_recs(
1257        struct xfs_btree_cur    *cur,
1258        union xfs_btree_rec     *rec,
1259        int                     dir,
1260        int                     numrecs)
1261{
1262        char                    *dst_rec;
1263
1264        ASSERT(numrecs >= 0);
1265        ASSERT(dir == 1 || dir == -1);
1266
1267        dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1268        memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1269}
1270
1271/*
1272 * Shift block pointers one index left/right inside a single btree block.
1273 */
1274STATIC void
1275xfs_btree_shift_ptrs(
1276        struct xfs_btree_cur    *cur,
1277        union xfs_btree_ptr     *ptr,
1278        int                     dir,
1279        int                     numptrs)
1280{
1281        char                    *dst_ptr;
1282
1283        ASSERT(numptrs >= 0);
1284        ASSERT(dir == 1 || dir == -1);
1285
1286        dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1287        memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1288}
1289
1290/*
1291 * Log key values from the btree block.
1292 */
1293STATIC void
1294xfs_btree_log_keys(
1295        struct xfs_btree_cur    *cur,
1296        struct xfs_buf          *bp,
1297        int                     first,
1298        int                     last)
1299{
1300        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1301        XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1302
1303        if (bp) {
1304                xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1305                xfs_trans_log_buf(cur->bc_tp, bp,
1306                                  xfs_btree_key_offset(cur, first),
1307                                  xfs_btree_key_offset(cur, last + 1) - 1);
1308        } else {
1309                xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1310                                xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1311        }
1312
1313        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1314}
1315
1316/*
1317 * Log record values from the btree block.
1318 */
1319void
1320xfs_btree_log_recs(
1321        struct xfs_btree_cur    *cur,
1322        struct xfs_buf          *bp,
1323        int                     first,
1324        int                     last)
1325{
1326        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1327        XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1328
1329        xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1330        xfs_trans_log_buf(cur->bc_tp, bp,
1331                          xfs_btree_rec_offset(cur, first),
1332                          xfs_btree_rec_offset(cur, last + 1) - 1);
1333
1334        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1335}
1336
1337/*
1338 * Log block pointer fields from a btree block (nonleaf).
1339 */
1340STATIC void
1341xfs_btree_log_ptrs(
1342        struct xfs_btree_cur    *cur,   /* btree cursor */
1343        struct xfs_buf          *bp,    /* buffer containing btree block */
1344        int                     first,  /* index of first pointer to log */
1345        int                     last)   /* index of last pointer to log */
1346{
1347        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1348        XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1349
1350        if (bp) {
1351                struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
1352                int                     level = xfs_btree_get_level(block);
1353
1354                xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1355                xfs_trans_log_buf(cur->bc_tp, bp,
1356                                xfs_btree_ptr_offset(cur, first, level),
1357                                xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1358        } else {
1359                xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1360                        xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1361        }
1362
1363        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1364}
1365
1366/*
1367 * Log fields from a btree block header.
1368 */
1369void
1370xfs_btree_log_block(
1371        struct xfs_btree_cur    *cur,   /* btree cursor */
1372        struct xfs_buf          *bp,    /* buffer containing btree block */
1373        int                     fields) /* mask of fields: XFS_BB_... */
1374{
1375        int                     first;  /* first byte offset logged */
1376        int                     last;   /* last byte offset logged */
1377        static const short      soffsets[] = {  /* table of offsets (short) */
1378                offsetof(struct xfs_btree_block, bb_magic),
1379                offsetof(struct xfs_btree_block, bb_level),
1380                offsetof(struct xfs_btree_block, bb_numrecs),
1381                offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1382                offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
1383                offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
1384                offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
1385                offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
1386                offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
1387                offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
1388                XFS_BTREE_SBLOCK_CRC_LEN
1389        };
1390        static const short      loffsets[] = {  /* table of offsets (long) */
1391                offsetof(struct xfs_btree_block, bb_magic),
1392                offsetof(struct xfs_btree_block, bb_level),
1393                offsetof(struct xfs_btree_block, bb_numrecs),
1394                offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1395                offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
1396                offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
1397                offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
1398                offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
1399                offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
1400                offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
1401                offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
1402                XFS_BTREE_LBLOCK_CRC_LEN
1403        };
1404
1405        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1406        XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
1407
1408        if (bp) {
1409                int nbits;
1410
1411                if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
1412                        /*
1413                         * We don't log the CRC when updating a btree
1414                         * block but instead recreate it during log
1415                         * recovery.  As the log buffers have checksums
1416                         * of their own this is safe and avoids logging a crc
1417                         * update in a lot of places.
1418                         */
1419                        if (fields == XFS_BB_ALL_BITS)
1420                                fields = XFS_BB_ALL_BITS_CRC;
1421                        nbits = XFS_BB_NUM_BITS_CRC;
1422                } else {
1423                        nbits = XFS_BB_NUM_BITS;
1424                }
1425                xfs_btree_offsets(fields,
1426                                  (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1427                                        loffsets : soffsets,
1428                                  nbits, &first, &last);
1429                xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1430                xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1431        } else {
1432                xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1433                        xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1434        }
1435
1436        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1437}
1438
1439/*
1440 * Increment cursor by one record at the level.
1441 * For nonzero levels the leaf-ward information is untouched.
1442 */
1443int                                             /* error */
1444xfs_btree_increment(
1445        struct xfs_btree_cur    *cur,
1446        int                     level,
1447        int                     *stat)          /* success/failure */
1448{
1449        struct xfs_btree_block  *block;
1450        union xfs_btree_ptr     ptr;
1451        struct xfs_buf          *bp;
1452        int                     error;          /* error return value */
1453        int                     lev;
1454
1455        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1456        XFS_BTREE_TRACE_ARGI(cur, level);
1457
1458        ASSERT(level < cur->bc_nlevels);
1459
1460        /* Read-ahead to the right at this level. */
1461        xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1462
1463        /* Get a pointer to the btree block. */
1464        block = xfs_btree_get_block(cur, level, &bp);
1465
1466#ifdef DEBUG
1467        error = xfs_btree_check_block(cur, block, level, bp);
1468        if (error)
1469                goto error0;
1470#endif
1471
1472        /* We're done if we remain in the block after the increment. */
1473        if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1474                goto out1;
1475
1476        /* Fail if we just went off the right edge of the tree. */
1477        xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1478        if (xfs_btree_ptr_is_null(cur, &ptr))
1479                goto out0;
1480
1481        XFS_BTREE_STATS_INC(cur, increment);
1482
1483        /*
1484         * March up the tree incrementing pointers.
1485         * Stop when we don't go off the right edge of a block.
1486         */
1487        for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1488                block = xfs_btree_get_block(cur, lev, &bp);
1489
1490#ifdef DEBUG
1491                error = xfs_btree_check_block(cur, block, lev, bp);
1492                if (error)
1493                        goto error0;
1494#endif
1495
1496                if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1497                        break;
1498
1499                /* Read-ahead the right block for the next loop. */
1500                xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1501        }
1502
1503        /*
1504         * If we went off the root then we are either seriously
1505         * confused or have the tree root in an inode.
1506         */
1507        if (lev == cur->bc_nlevels) {
1508                if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1509                        goto out0;
1510                ASSERT(0);
1511                error = EFSCORRUPTED;
1512                goto error0;
1513        }
1514        ASSERT(lev < cur->bc_nlevels);
1515
1516        /*
1517         * Now walk back down the tree, fixing up the cursor's buffer
1518         * pointers and key numbers.
1519         */
1520        for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1521                union xfs_btree_ptr     *ptrp;
1522
1523                ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1524                error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1525                                                        0, &block, &bp);
1526                if (error)
1527                        goto error0;
1528
1529                xfs_btree_setbuf(cur, lev, bp);
1530                cur->bc_ptrs[lev] = 1;
1531        }
1532out1:
1533        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1534        *stat = 1;
1535        return 0;
1536
1537out0:
1538        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1539        *stat = 0;
1540        return 0;
1541
1542error0:
1543        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1544        return error;
1545}
1546
1547/*
1548 * Decrement cursor by one record at the level.
1549 * For nonzero levels the leaf-ward information is untouched.
1550 */
1551int                                             /* error */
1552xfs_btree_decrement(
1553        struct xfs_btree_cur    *cur,
1554        int                     level,
1555        int                     *stat)          /* success/failure */
1556{
1557        struct xfs_btree_block  *block;
1558        xfs_buf_t               *bp;
1559        int                     error;          /* error return value */
1560        int                     lev;
1561        union xfs_btree_ptr     ptr;
1562
1563        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1564        XFS_BTREE_TRACE_ARGI(cur, level);
1565
1566        ASSERT(level < cur->bc_nlevels);
1567
1568        /* Read-ahead to the left at this level. */
1569        xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1570
1571        /* We're done if we remain in the block after the decrement. */
1572        if (--cur->bc_ptrs[level] > 0)
1573                goto out1;
1574
1575        /* Get a pointer to the btree block. */
1576        block = xfs_btree_get_block(cur, level, &bp);
1577
1578#ifdef DEBUG
1579        error = xfs_btree_check_block(cur, block, level, bp);
1580        if (error)
1581                goto error0;
1582#endif
1583
1584        /* Fail if we just went off the left edge of the tree. */
1585        xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1586        if (xfs_btree_ptr_is_null(cur, &ptr))
1587                goto out0;
1588
1589        XFS_BTREE_STATS_INC(cur, decrement);
1590
1591        /*
1592         * March up the tree decrementing pointers.
1593         * Stop when we don't go off the left edge of a block.
1594         */
1595        for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1596                if (--cur->bc_ptrs[lev] > 0)
1597                        break;
1598                /* Read-ahead the left block for the next loop. */
1599                xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1600        }
1601
1602        /*
1603         * If we went off the root then we are seriously confused.
1604         * or the root of the tree is in an inode.
1605         */
1606        if (lev == cur->bc_nlevels) {
1607                if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1608                        goto out0;
1609                ASSERT(0);
1610                error = EFSCORRUPTED;
1611                goto error0;
1612        }
1613        ASSERT(lev < cur->bc_nlevels);
1614
1615        /*
1616         * Now walk back down the tree, fixing up the cursor's buffer
1617         * pointers and key numbers.
1618         */
1619        for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1620                union xfs_btree_ptr     *ptrp;
1621
1622                ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1623                error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1624                                                        0, &block, &bp);
1625                if (error)
1626                        goto error0;
1627                xfs_btree_setbuf(cur, lev, bp);
1628                cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1629        }
1630out1:
1631        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1632        *stat = 1;
1633        return 0;
1634
1635out0:
1636        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1637        *stat = 0;
1638        return 0;
1639
1640error0:
1641        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1642        return error;
1643}
1644
1645STATIC int
1646xfs_btree_lookup_get_block(
1647        struct xfs_btree_cur    *cur,   /* btree cursor */
1648        int                     level,  /* level in the btree */
1649        union xfs_btree_ptr     *pp,    /* ptr to btree block */
1650        struct xfs_btree_block  **blkp) /* return btree block */
1651{
1652        struct xfs_buf          *bp;    /* buffer pointer for btree block */
1653        int                     error = 0;
1654
1655        /* special case the root block if in an inode */
1656        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1657            (level == cur->bc_nlevels - 1)) {
1658                *blkp = xfs_btree_get_iroot(cur);
1659                return 0;
1660        }
1661
1662        /*
1663         * If the old buffer at this level for the disk address we are
1664         * looking for re-use it.
1665         *
1666         * Otherwise throw it away and get a new one.
1667         */
1668        bp = cur->bc_bufs[level];
1669        if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1670                *blkp = XFS_BUF_TO_BLOCK(bp);
1671                return 0;
1672        }
1673
1674        error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp);
1675        if (error)
1676                return error;
1677
1678        xfs_btree_setbuf(cur, level, bp);
1679        return 0;
1680}
1681
1682/*
1683 * Get current search key.  For level 0 we don't actually have a key
1684 * structure so we make one up from the record.  For all other levels
1685 * we just return the right key.
1686 */
1687STATIC union xfs_btree_key *
1688xfs_lookup_get_search_key(
1689        struct xfs_btree_cur    *cur,
1690        int                     level,
1691        int                     keyno,
1692        struct xfs_btree_block  *block,
1693        union xfs_btree_key     *kp)
1694{
1695        if (level == 0) {
1696                cur->bc_ops->init_key_from_rec(kp,
1697                                xfs_btree_rec_addr(cur, keyno, block));
1698                return kp;
1699        }
1700
1701        return xfs_btree_key_addr(cur, keyno, block);
1702}
1703
1704/*
1705 * Lookup the record.  The cursor is made to point to it, based on dir.
1706 * stat is set to 0 if can't find any such record, 1 for success.
1707 */
1708int                                     /* error */
1709xfs_btree_lookup(
1710        struct xfs_btree_cur    *cur,   /* btree cursor */
1711        xfs_lookup_t            dir,    /* <=, ==, or >= */
1712        int                     *stat)  /* success/failure */
1713{
1714        struct xfs_btree_block  *block; /* current btree block */
1715        __int64_t               diff;   /* difference for the current key */
1716        int                     error;  /* error return value */
1717        int                     keyno;  /* current key number */
1718        int                     level;  /* level in the btree */
1719        union xfs_btree_ptr     *pp;    /* ptr to btree block */
1720        union xfs_btree_ptr     ptr;    /* ptr to btree block */
1721
1722        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1723        XFS_BTREE_TRACE_ARGI(cur, dir);
1724
1725        XFS_BTREE_STATS_INC(cur, lookup);
1726
1727        block = NULL;
1728        keyno = 0;
1729
1730        /* initialise start pointer from cursor */
1731        cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1732        pp = &ptr;
1733
1734        /*
1735         * Iterate over each level in the btree, starting at the root.
1736         * For each level above the leaves, find the key we need, based
1737         * on the lookup record, then follow the corresponding block
1738         * pointer down to the next level.
1739         */
1740        for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1741                /* Get the block we need to do the lookup on. */
1742                error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1743                if (error)
1744                        goto error0;
1745
1746                if (diff == 0) {
1747                        /*
1748                         * If we already had a key match at a higher level, we
1749                         * know we need to use the first entry in this block.
1750                         */
1751                        keyno = 1;
1752                } else {
1753                        /* Otherwise search this block. Do a binary search. */
1754
1755                        int     high;   /* high entry number */
1756                        int     low;    /* low entry number */
1757
1758                        /* Set low and high entry numbers, 1-based. */
1759                        low = 1;
1760                        high = xfs_btree_get_numrecs(block);
1761                        if (!high) {
1762                                /* Block is empty, must be an empty leaf. */
1763                                ASSERT(level == 0 && cur->bc_nlevels == 1);
1764
1765                                cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1766                                XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1767                                *stat = 0;
1768                                return 0;
1769                        }
1770
1771                        /* Binary search the block. */
1772                        while (low <= high) {
1773                                union xfs_btree_key     key;
1774                                union xfs_btree_key     *kp;
1775
1776                                XFS_BTREE_STATS_INC(cur, compare);
1777
1778                                /* keyno is average of low and high. */
1779                                keyno = (low + high) >> 1;
1780
1781                                /* Get current search key */
1782                                kp = xfs_lookup_get_search_key(cur, level,
1783                                                keyno, block, &key);
1784
1785                                /*
1786                                 * Compute difference to get next direction:
1787                                 *  - less than, move right
1788                                 *  - greater than, move left
1789                                 *  - equal, we're done
1790                                 */
1791                                diff = cur->bc_ops->key_diff(cur, kp);
1792                                if (diff < 0)
1793                                        low = keyno + 1;
1794                                else if (diff > 0)
1795                                        high = keyno - 1;
1796                                else
1797                                        break;
1798                        }
1799                }
1800
1801                /*
1802                 * If there are more levels, set up for the next level
1803                 * by getting the block number and filling in the cursor.
1804                 */
1805                if (level > 0) {
1806                        /*
1807                         * If we moved left, need the previous key number,
1808                         * unless there isn't one.
1809                         */
1810                        if (diff > 0 && --keyno < 1)
1811                                keyno = 1;
1812                        pp = xfs_btree_ptr_addr(cur, keyno, block);
1813
1814#ifdef DEBUG
1815                        error = xfs_btree_check_ptr(cur, pp, 0, level);
1816                        if (error)
1817                                goto error0;
1818#endif
1819                        cur->bc_ptrs[level] = keyno;
1820                }
1821        }
1822
1823        /* Done with the search. See if we need to adjust the results. */
1824        if (dir != XFS_LOOKUP_LE && diff < 0) {
1825                keyno++;
1826                /*
1827                 * If ge search and we went off the end of the block, but it's
1828                 * not the last block, we're in the wrong block.
1829                 */
1830                xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1831                if (dir == XFS_LOOKUP_GE &&
1832                    keyno > xfs_btree_get_numrecs(block) &&
1833                    !xfs_btree_ptr_is_null(cur, &ptr)) {
1834                        int     i;
1835
1836                        cur->bc_ptrs[0] = keyno;
1837                        error = xfs_btree_increment(cur, 0, &i);
1838                        if (error)
1839                                goto error0;
1840                        XFS_WANT_CORRUPTED_RETURN(i == 1);
1841                        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1842                        *stat = 1;
1843                        return 0;
1844                }
1845        } else if (dir == XFS_LOOKUP_LE && diff > 0)
1846                keyno--;
1847        cur->bc_ptrs[0] = keyno;
1848
1849        /* Return if we succeeded or not. */
1850        if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
1851                *stat = 0;
1852        else if (dir != XFS_LOOKUP_EQ || diff == 0)
1853                *stat = 1;
1854        else
1855                *stat = 0;
1856        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1857        return 0;
1858
1859error0:
1860        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1861        return error;
1862}
1863
1864/*
1865 * Update keys at all levels from here to the root along the cursor's path.
1866 */
1867STATIC int
1868xfs_btree_updkey(
1869        struct xfs_btree_cur    *cur,
1870        union xfs_btree_key     *keyp,
1871        int                     level)
1872{
1873        struct xfs_btree_block  *block;
1874        struct xfs_buf          *bp;
1875        union xfs_btree_key     *kp;
1876        int                     ptr;
1877
1878        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1879        XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
1880
1881        ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
1882
1883        /*
1884         * Go up the tree from this level toward the root.
1885         * At each level, update the key value to the value input.
1886         * Stop when we reach a level where the cursor isn't pointing
1887         * at the first entry in the block.
1888         */
1889        for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1890#ifdef DEBUG
1891                int             error;
1892#endif
1893                block = xfs_btree_get_block(cur, level, &bp);
1894#ifdef DEBUG
1895                error = xfs_btree_check_block(cur, block, level, bp);
1896                if (error) {
1897                        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1898                        return error;
1899                }
1900#endif
1901                ptr = cur->bc_ptrs[level];
1902                kp = xfs_btree_key_addr(cur, ptr, block);
1903                xfs_btree_copy_keys(cur, kp, keyp, 1);
1904                xfs_btree_log_keys(cur, bp, ptr, ptr);
1905        }
1906
1907        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1908        return 0;
1909}
1910
1911/*
1912 * Update the record referred to by cur to the value in the
1913 * given record. This either works (return 0) or gets an
1914 * EFSCORRUPTED error.
1915 */
1916int
1917xfs_btree_update(
1918        struct xfs_btree_cur    *cur,
1919        union xfs_btree_rec     *rec)
1920{
1921        struct xfs_btree_block  *block;
1922        struct xfs_buf          *bp;
1923        int                     error;
1924        int                     ptr;
1925        union xfs_btree_rec     *rp;
1926
1927        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1928        XFS_BTREE_TRACE_ARGR(cur, rec);
1929
1930        /* Pick up the current block. */
1931        block = xfs_btree_get_block(cur, 0, &bp);
1932
1933#ifdef DEBUG
1934        error = xfs_btree_check_block(cur, block, 0, bp);
1935        if (error)
1936                goto error0;
1937#endif
1938        /* Get the address of the rec to be updated. */
1939        ptr = cur->bc_ptrs[0];
1940        rp = xfs_btree_rec_addr(cur, ptr, block);
1941
1942        /* Fill in the new contents and log them. */
1943        xfs_btree_copy_recs(cur, rp, rec, 1);
1944        xfs_btree_log_recs(cur, bp, ptr, ptr);
1945
1946        /*
1947         * If we are tracking the last record in the tree and
1948         * we are at the far right edge of the tree, update it.
1949         */
1950        if (xfs_btree_is_lastrec(cur, block, 0)) {
1951                cur->bc_ops->update_lastrec(cur, block, rec,
1952                                            ptr, LASTREC_UPDATE);
1953        }
1954
1955        /* Updating first rec in leaf. Pass new key value up to our parent. */
1956        if (ptr == 1) {
1957                union xfs_btree_key     key;
1958
1959                cur->bc_ops->init_key_from_rec(&key, rec);
1960                error = xfs_btree_updkey(cur, &key, 1);
1961                if (error)
1962                        goto error0;
1963        }
1964
1965        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1966        return 0;
1967
1968error0:
1969        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1970        return error;
1971}
1972
1973/*
1974 * Move 1 record left from cur/level if possible.
1975 * Update cur to reflect the new path.
1976 */
1977STATIC int                                      /* error */
1978xfs_btree_lshift(
1979        struct xfs_btree_cur    *cur,
1980        int                     level,
1981        int                     *stat)          /* success/failure */
1982{
1983        union xfs_btree_key     key;            /* btree key */
1984        struct xfs_buf          *lbp;           /* left buffer pointer */
1985        struct xfs_btree_block  *left;          /* left btree block */
1986        int                     lrecs;          /* left record count */
1987        struct xfs_buf          *rbp;           /* right buffer pointer */
1988        struct xfs_btree_block  *right;         /* right btree block */
1989        int                     rrecs;          /* right record count */
1990        union xfs_btree_ptr     lptr;           /* left btree pointer */
1991        union xfs_btree_key     *rkp = NULL;    /* right btree key */
1992        union xfs_btree_ptr     *rpp = NULL;    /* right address pointer */
1993        union xfs_btree_rec     *rrp = NULL;    /* right record pointer */
1994        int                     error;          /* error return value */
1995
1996        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1997        XFS_BTREE_TRACE_ARGI(cur, level);
1998
1999        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2000            level == cur->bc_nlevels - 1)
2001                goto out0;
2002
2003        /* Set up variables for this block as "right". */
2004        right = xfs_btree_get_block(cur, level, &rbp);
2005
2006#ifdef DEBUG
2007        error = xfs_btree_check_block(cur, right, level, rbp);
2008        if (error)
2009                goto error0;
2010#endif
2011
2012        /* If we've got no left sibling then we can't shift an entry left. */
2013        xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2014        if (xfs_btree_ptr_is_null(cur, &lptr))
2015                goto out0;
2016
2017        /*
2018         * If the cursor entry is the one that would be moved, don't
2019         * do it... it's too complicated.
2020         */
2021        if (cur->bc_ptrs[level] <= 1)
2022                goto out0;
2023
2024        /* Set up the left neighbor as "left". */
2025        error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp);
2026        if (error)
2027                goto error0;
2028
2029        /* If it's full, it can't take another entry. */
2030        lrecs = xfs_btree_get_numrecs(left);
2031        if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
2032                goto out0;
2033
2034        rrecs = xfs_btree_get_numrecs(right);
2035
2036        /*
2037         * We add one entry to the left side and remove one for the right side.
2038         * Account for it here, the changes will be updated on disk and logged
2039         * later.
2040         */
2041        lrecs++;
2042        rrecs--;
2043
2044        XFS_BTREE_STATS_INC(cur, lshift);
2045        XFS_BTREE_STATS_ADD(cur, moves, 1);
2046
2047        /*
2048         * If non-leaf, copy a key and a ptr to the left block.
2049         * Log the changes to the left block.
2050         */
2051        if (level > 0) {
2052                /* It's a non-leaf.  Move keys and pointers. */
2053                union xfs_btree_key     *lkp;   /* left btree key */
2054                union xfs_btree_ptr     *lpp;   /* left address pointer */
2055
2056                lkp = xfs_btree_key_addr(cur, lrecs, left);
2057                rkp = xfs_btree_key_addr(cur, 1, right);
2058
2059                lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2060                rpp = xfs_btree_ptr_addr(cur, 1, right);
2061#ifdef DEBUG
2062                error = xfs_btree_check_ptr(cur, rpp, 0, level);
2063                if (error)
2064                        goto error0;
2065#endif
2066                xfs_btree_copy_keys(cur, lkp, rkp, 1);
2067                xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
2068
2069                xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
2070                xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
2071
2072                ASSERT(cur->bc_ops->keys_inorder(cur,
2073                        xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
2074        } else {
2075                /* It's a leaf.  Move records.  */
2076                union xfs_btree_rec     *lrp;   /* left record pointer */
2077
2078                lrp = xfs_btree_rec_addr(cur, lrecs, left);
2079                rrp = xfs_btree_rec_addr(cur, 1, right);
2080
2081                xfs_btree_copy_recs(cur, lrp, rrp, 1);
2082                xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
2083
2084                ASSERT(cur->bc_ops->recs_inorder(cur,
2085                        xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
2086        }
2087
2088        xfs_btree_set_numrecs(left, lrecs);
2089        xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2090
2091        xfs_btree_set_numrecs(right, rrecs);
2092        xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2093
2094        /*
2095         * Slide the contents of right down one entry.
2096         */
2097        XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
2098        if (level > 0) {
2099                /* It's a nonleaf. operate on keys and ptrs */
2100#ifdef DEBUG
2101                int                     i;              /* loop index */
2102
2103                for (i = 0; i < rrecs; i++) {
2104                        error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
2105                        if (error)
2106                                goto error0;
2107                }
2108#endif
2109                xfs_btree_shift_keys(cur,
2110                                xfs_btree_key_addr(cur, 2, right),
2111                                -1, rrecs);
2112                xfs_btree_shift_ptrs(cur,
2113                                xfs_btree_ptr_addr(cur, 2, right),
2114                                -1, rrecs);
2115
2116                xfs_btree_log_keys(cur, rbp, 1, rrecs);
2117                xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2118        } else {
2119                /* It's a leaf. operate on records */
2120                xfs_btree_shift_recs(cur,
2121                        xfs_btree_rec_addr(cur, 2, right),
2122                        -1, rrecs);
2123                xfs_btree_log_recs(cur, rbp, 1, rrecs);
2124
2125                /*
2126                 * If it's the first record in the block, we'll need a key
2127                 * structure to pass up to the next level (updkey).
2128                 */
2129                cur->bc_ops->init_key_from_rec(&key,
2130                        xfs_btree_rec_addr(cur, 1, right));
2131                rkp = &key;
2132        }
2133
2134        /* Update the parent key values of right. */
2135        error = xfs_btree_updkey(cur, rkp, level + 1);
2136        if (error)
2137                goto error0;
2138
2139        /* Slide the cursor value left one. */
2140        cur->bc_ptrs[level]--;
2141
2142        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2143        *stat = 1;
2144        return 0;
2145
2146out0:
2147        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2148        *stat = 0;
2149        return 0;
2150
2151error0:
2152        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2153        return error;
2154}
2155
2156/*
2157 * Move 1 record right from cur/level if possible.
2158 * Update cur to reflect the new path.
2159 */
2160STATIC int                                      /* error */
2161xfs_btree_rshift(
2162        struct xfs_btree_cur    *cur,
2163        int                     level,
2164        int                     *stat)          /* success/failure */
2165{
2166        union xfs_btree_key     key;            /* btree key */
2167        struct xfs_buf          *lbp;           /* left buffer pointer */
2168        struct xfs_btree_block  *left;          /* left btree block */
2169        struct xfs_buf          *rbp;           /* right buffer pointer */
2170        struct xfs_btree_block  *right;         /* right btree block */
2171        struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
2172        union xfs_btree_ptr     rptr;           /* right block pointer */
2173        union xfs_btree_key     *rkp;           /* right btree key */
2174        int                     rrecs;          /* right record count */
2175        int                     lrecs;          /* left record count */
2176        int                     error;          /* error return value */
2177        int                     i;              /* loop counter */
2178
2179        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2180        XFS_BTREE_TRACE_ARGI(cur, level);
2181
2182        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2183            (level == cur->bc_nlevels - 1))
2184                goto out0;
2185
2186        /* Set up variables for this block as "left". */
2187        left = xfs_btree_get_block(cur, level, &lbp);
2188
2189#ifdef DEBUG
2190        error = xfs_btree_check_block(cur, left, level, lbp);
2191        if (error)
2192                goto error0;
2193#endif
2194
2195        /* If we've got no right sibling then we can't shift an entry right. */
2196        xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2197        if (xfs_btree_ptr_is_null(cur, &rptr))
2198                goto out0;
2199
2200        /*
2201         * If the cursor entry is the one that would be moved, don't
2202         * do it... it's too complicated.
2203         */
2204        lrecs = xfs_btree_get_numrecs(left);
2205        if (cur->bc_ptrs[level] >= lrecs)
2206                goto out0;
2207
2208        /* Set up the right neighbor as "right". */
2209        error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp);
2210        if (error)
2211                goto error0;
2212
2213        /* If it's full, it can't take another entry. */
2214        rrecs = xfs_btree_get_numrecs(right);
2215        if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2216                goto out0;
2217
2218        XFS_BTREE_STATS_INC(cur, rshift);
2219        XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2220
2221        /*
2222         * Make a hole at the start of the right neighbor block, then
2223         * copy the last left block entry to the hole.
2224         */
2225        if (level > 0) {
2226                /* It's a nonleaf. make a hole in the keys and ptrs */
2227                union xfs_btree_key     *lkp;
2228                union xfs_btree_ptr     *lpp;
2229                union xfs_btree_ptr     *rpp;
2230
2231                lkp = xfs_btree_key_addr(cur, lrecs, left);
2232                lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2233                rkp = xfs_btree_key_addr(cur, 1, right);
2234                rpp = xfs_btree_ptr_addr(cur, 1, right);
2235
2236#ifdef DEBUG
2237                for (i = rrecs - 1; i >= 0; i--) {
2238                        error = xfs_btree_check_ptr(cur, rpp, i, level);
2239                        if (error)
2240                                goto error0;
2241                }
2242#endif
2243
2244                xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2245                xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2246
2247#ifdef DEBUG
2248                error = xfs_btree_check_ptr(cur, lpp, 0, level);
2249                if (error)
2250                        goto error0;
2251#endif
2252
2253                /* Now put the new data in, and log it. */
2254                xfs_btree_copy_keys(cur, rkp, lkp, 1);
2255                xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2256
2257                xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2258                xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2259
2260                ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2261                        xfs_btree_key_addr(cur, 2, right)));
2262        } else {
2263                /* It's a leaf. make a hole in the records */
2264                union xfs_btree_rec     *lrp;
2265                union xfs_btree_rec     *rrp;
2266
2267                lrp = xfs_btree_rec_addr(cur, lrecs, left);
2268                rrp = xfs_btree_rec_addr(cur, 1, right);
2269
2270                xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2271
2272                /* Now put the new data in, and log it. */
2273                xfs_btree_copy_recs(cur, rrp, lrp, 1);
2274                xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2275
2276                cur->bc_ops->init_key_from_rec(&key, rrp);
2277                rkp = &key;
2278
2279                ASSERT(cur->bc_ops->recs_inorder(cur, rrp,
2280                        xfs_btree_rec_addr(cur, 2, right)));
2281        }
2282
2283        /*
2284         * Decrement and log left's numrecs, bump and log right's numrecs.
2285         */
2286        xfs_btree_set_numrecs(left, --lrecs);
2287        xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2288
2289        xfs_btree_set_numrecs(right, ++rrecs);
2290        xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2291
2292        /*
2293         * Using a temporary cursor, update the parent key values of the
2294         * block on the right.
2295         */
2296        error = xfs_btree_dup_cursor(cur, &tcur);
2297        if (error)
2298                goto error0;
2299        i = xfs_btree_lastrec(tcur, level);
2300        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2301
2302        error = xfs_btree_increment(tcur, level, &i);
2303        if (error)
2304                goto error1;
2305
2306        error = xfs_btree_updkey(tcur, rkp, level + 1);
2307        if (error)
2308                goto error1;
2309
2310        xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2311
2312        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2313        *stat = 1;
2314        return 0;
2315
2316out0:
2317        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2318        *stat = 0;
2319        return 0;
2320
2321error0:
2322        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2323        return error;
2324
2325error1:
2326        XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2327        xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2328        return error;
2329}
2330
2331/*
2332 * Split cur/level block in half.
2333 * Return new block number and the key to its first
2334 * record (to be inserted into parent).
2335 */
2336STATIC int                                      /* error */
2337xfs_btree_split(
2338        struct xfs_btree_cur    *cur,
2339        int                     level,
2340        union xfs_btree_ptr     *ptrp,
2341        union xfs_btree_key     *key,
2342        struct xfs_btree_cur    **curp,
2343        int                     *stat)          /* success/failure */
2344{
2345        union xfs_btree_ptr     lptr;           /* left sibling block ptr */
2346        struct xfs_buf          *lbp;           /* left buffer pointer */
2347        struct xfs_btree_block  *left;          /* left btree block */
2348        union xfs_btree_ptr     rptr;           /* right sibling block ptr */
2349        struct xfs_buf          *rbp;           /* right buffer pointer */
2350        struct xfs_btree_block  *right;         /* right btree block */
2351        union xfs_btree_ptr     rrptr;          /* right-right sibling ptr */
2352        struct xfs_buf          *rrbp;          /* right-right buffer pointer */
2353        struct xfs_btree_block  *rrblock;       /* right-right btree block */
2354        int                     lrecs;
2355        int                     rrecs;
2356        int                     src_index;
2357        int                     error;          /* error return value */
2358#ifdef DEBUG
2359        int                     i;
2360#endif
2361
2362        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2363        XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
2364
2365        XFS_BTREE_STATS_INC(cur, split);
2366
2367        /* Set up left block (current one). */
2368        left = xfs_btree_get_block(cur, level, &lbp);
2369
2370#ifdef DEBUG
2371        error = xfs_btree_check_block(cur, left, level, lbp);
2372        if (error)
2373                goto error0;
2374#endif
2375
2376        xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2377
2378        /* Allocate the new block. If we can't do it, we're toast. Give up. */
2379        error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat);
2380        if (error)
2381                goto error0;
2382        if (*stat == 0)
2383                goto out0;
2384        XFS_BTREE_STATS_INC(cur, alloc);
2385
2386        /* Set up the new block as "right". */
2387        error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
2388        if (error)
2389                goto error0;
2390
2391        /* Fill in the btree header for the new right block. */
2392        xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
2393
2394        /*
2395         * Split the entries between the old and the new block evenly.
2396         * Make sure that if there's an odd number of entries now, that
2397         * each new block will have the same number of entries.
2398         */
2399        lrecs = xfs_btree_get_numrecs(left);
2400        rrecs = lrecs / 2;
2401        if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
2402                rrecs++;
2403        src_index = (lrecs - rrecs + 1);
2404
2405        XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2406
2407        /*
2408         * Copy btree block entries from the left block over to the
2409         * new block, the right. Update the right block and log the
2410         * changes.
2411         */
2412        if (level > 0) {
2413                /* It's a non-leaf.  Move keys and pointers. */
2414                union xfs_btree_key     *lkp;   /* left btree key */
2415                union xfs_btree_ptr     *lpp;   /* left address pointer */
2416                union xfs_btree_key     *rkp;   /* right btree key */
2417                union xfs_btree_ptr     *rpp;   /* right address pointer */
2418
2419                lkp = xfs_btree_key_addr(cur, src_index, left);
2420                lpp = xfs_btree_ptr_addr(cur, src_index, left);
2421                rkp = xfs_btree_key_addr(cur, 1, right);
2422                rpp = xfs_btree_ptr_addr(cur, 1, right);
2423
2424#ifdef DEBUG
2425                for (i = src_index; i < rrecs; i++) {
2426                        error = xfs_btree_check_ptr(cur, lpp, i, level);
2427                        if (error)
2428                                goto error0;
2429                }
2430#endif
2431
2432                xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2433                xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2434
2435                xfs_btree_log_keys(cur, rbp, 1, rrecs);
2436                xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2437
2438                /* Grab the keys to the entries moved to the right block */
2439                xfs_btree_copy_keys(cur, key, rkp, 1);
2440        } else {
2441                /* It's a leaf.  Move records.  */
2442                union xfs_btree_rec     *lrp;   /* left record pointer */
2443                union xfs_btree_rec     *rrp;   /* right record pointer */
2444
2445                lrp = xfs_btree_rec_addr(cur, src_index, left);
2446                rrp = xfs_btree_rec_addr(cur, 1, right);
2447
2448                xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2449                xfs_btree_log_recs(cur, rbp, 1, rrecs);
2450
2451                cur->bc_ops->init_key_from_rec(key,
2452                        xfs_btree_rec_addr(cur, 1, right));
2453        }
2454
2455
2456        /*
2457         * Find the left block number by looking in the buffer.
2458         * Adjust numrecs, sibling pointers.
2459         */
2460        xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2461        xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2462        xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2463        xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2464
2465        lrecs -= rrecs;
2466        xfs_btree_set_numrecs(left, lrecs);
2467        xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2468
2469        xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2470        xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2471
2472        /*
2473         * If there's a block to the new block's right, make that block
2474         * point back to right instead of to left.
2475         */
2476        if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
2477                error = xfs_btree_read_buf_block(cur, &rrptr, level,
2478                                                        0, &rrblock, &rrbp);
2479                if (error)
2480                        goto error0;
2481                xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2482                xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2483        }
2484        /*
2485         * If the cursor is really in the right block, move it there.
2486         * If it's just pointing past the last entry in left, then we'll
2487         * insert there, so don't change anything in that case.
2488         */
2489        if (cur->bc_ptrs[level] > lrecs + 1) {
2490                xfs_btree_setbuf(cur, level, rbp);
2491                cur->bc_ptrs[level] -= lrecs;
2492        }
2493        /*
2494         * If there are more levels, we'll need another cursor which refers
2495         * the right block, no matter where this cursor was.
2496         */
2497        if (level + 1 < cur->bc_nlevels) {
2498                error = xfs_btree_dup_cursor(cur, curp);
2499                if (error)
2500                        goto error0;
2501                (*curp)->bc_ptrs[level + 1]++;
2502        }
2503        *ptrp = rptr;
2504        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2505        *stat = 1;
2506        return 0;
2507out0:
2508        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2509        *stat = 0;
2510        return 0;
2511
2512error0:
2513        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2514        return error;
2515}
2516
2517/*
2518 * Copy the old inode root contents into a real block and make the
2519 * broot point to it.
2520 */
2521int                                             /* error */
2522xfs_btree_new_iroot(
2523        struct xfs_btree_cur    *cur,           /* btree cursor */
2524        int                     *logflags,      /* logging flags for inode */
2525        int                     *stat)          /* return status - 0 fail */
2526{
2527        struct xfs_buf          *cbp;           /* buffer for cblock */
2528        struct xfs_btree_block  *block;         /* btree block */
2529        struct xfs_btree_block  *cblock;        /* child btree block */
2530        union xfs_btree_key     *ckp;           /* child key pointer */
2531        union xfs_btree_ptr     *cpp;           /* child ptr pointer */
2532        union xfs_btree_key     *kp;            /* pointer to btree key */
2533        union xfs_btree_ptr     *pp;            /* pointer to block addr */
2534        union xfs_btree_ptr     nptr;           /* new block addr */
2535        int                     level;          /* btree level */
2536        int                     error;          /* error return code */
2537#ifdef DEBUG
2538        int                     i;              /* loop counter */
2539#endif
2540
2541        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2542        XFS_BTREE_STATS_INC(cur, newroot);
2543
2544        ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2545
2546        level = cur->bc_nlevels - 1;
2547
2548        block = xfs_btree_get_iroot(cur);
2549        pp = xfs_btree_ptr_addr(cur, 1, block);
2550
2551        /* Allocate the new block. If we can't do it, we're toast. Give up. */
2552        error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat);
2553        if (error)
2554                goto error0;
2555        if (*stat == 0) {
2556                XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2557                return 0;
2558        }
2559        XFS_BTREE_STATS_INC(cur, alloc);
2560
2561        /* Copy the root into a real block. */
2562        error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
2563        if (error)
2564                goto error0;
2565
2566        /*
2567         * we can't just memcpy() the root in for CRC enabled btree blocks.
2568         * In that case have to also ensure the blkno remains correct
2569         */
2570        memcpy(cblock, block, xfs_btree_block_len(cur));
2571        if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
2572                if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
2573                        cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn);
2574                else
2575                        cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn);
2576        }
2577
2578        be16_add_cpu(&block->bb_level, 1);
2579        xfs_btree_set_numrecs(block, 1);
2580        cur->bc_nlevels++;
2581        cur->bc_ptrs[level + 1] = 1;
2582
2583        kp = xfs_btree_key_addr(cur, 1, block);
2584        ckp = xfs_btree_key_addr(cur, 1, cblock);
2585        xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
2586
2587        cpp = xfs_btree_ptr_addr(cur, 1, cblock);
2588#ifdef DEBUG
2589        for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
2590                error = xfs_btree_check_ptr(cur, pp, i, level);
2591                if (error)
2592                        goto error0;
2593        }
2594#endif
2595        xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
2596
2597#ifdef DEBUG
2598        error = xfs_btree_check_ptr(cur, &nptr, 0, level);
2599        if (error)
2600                goto error0;
2601#endif
2602        xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
2603
2604        xfs_iroot_realloc(cur->bc_private.b.ip,
2605                          1 - xfs_btree_get_numrecs(cblock),
2606                          cur->bc_private.b.whichfork);
2607
2608        xfs_btree_setbuf(cur, level, cbp);
2609
2610        /*
2611         * Do all this logging at the end so that
2612         * the root is at the right level.
2613         */
2614        xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
2615        xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2616        xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2617
2618        *logflags |=
2619                XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork);
2620        *stat = 1;
2621        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2622        return 0;
2623error0:
2624        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2625        return error;
2626}
2627
2628/*
2629 * Allocate a new root block, fill it in.
2630 */
2631STATIC int                              /* error */
2632xfs_btree_new_root(
2633        struct xfs_btree_cur    *cur,   /* btree cursor */
2634        int                     *stat)  /* success/failure */
2635{
2636        struct xfs_btree_block  *block; /* one half of the old root block */
2637        struct xfs_buf          *bp;    /* buffer containing block */
2638        int                     error;  /* error return value */
2639        struct xfs_buf          *lbp;   /* left buffer pointer */
2640        struct xfs_btree_block  *left;  /* left btree block */
2641        struct xfs_buf          *nbp;   /* new (root) buffer */
2642        struct xfs_btree_block  *new;   /* new (root) btree block */
2643        int                     nptr;   /* new value for key index, 1 or 2 */
2644        struct xfs_buf          *rbp;   /* right buffer pointer */
2645        struct xfs_btree_block  *right; /* right btree block */
2646        union xfs_btree_ptr     rptr;
2647        union xfs_btree_ptr     lptr;
2648
2649        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2650        XFS_BTREE_STATS_INC(cur, newroot);
2651
2652        /* initialise our start point from the cursor */
2653        cur->bc_ops->init_ptr_from_cur(cur, &rptr);
2654
2655        /* Allocate the new block. If we can't do it, we're toast. Give up. */
2656        error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat);
2657        if (error)
2658                goto error0;
2659        if (*stat == 0)
2660                goto out0;
2661        XFS_BTREE_STATS_INC(cur, alloc);
2662
2663        /* Set up the new block. */
2664        error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
2665        if (error)
2666                goto error0;
2667
2668        /* Set the root in the holding structure  increasing the level by 1. */
2669        cur->bc_ops->set_root(cur, &lptr, 1);
2670
2671        /*
2672         * At the previous root level there are now two blocks: the old root,
2673         * and the new block generated when it was split.  We don't know which
2674         * one the cursor is pointing at, so we set up variables "left" and
2675         * "right" for each case.
2676         */
2677        block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
2678
2679#ifdef DEBUG
2680        error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
2681        if (error)
2682                goto error0;
2683#endif
2684
2685        xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
2686        if (!xfs_btree_ptr_is_null(cur, &rptr)) {
2687                /* Our block is left, pick up the right block. */
2688                lbp = bp;
2689                xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2690                left = block;
2691                error = xfs_btree_read_buf_block(cur, &rptr,
2692                                        cur->bc_nlevels - 1, 0, &right, &rbp);
2693                if (error)
2694                        goto error0;
2695                bp = rbp;
2696                nptr = 1;
2697        } else {
2698                /* Our block is right, pick up the left block. */
2699                rbp = bp;
2700                xfs_btree_buf_to_ptr(cur, rbp, &rptr);
2701                right = block;
2702                xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2703                error = xfs_btree_read_buf_block(cur, &lptr,
2704                                        cur->bc_nlevels - 1, 0, &left, &lbp);
2705                if (error)
2706                        goto error0;
2707                bp = lbp;
2708                nptr = 2;
2709        }
2710        /* Fill in the new block's btree header and log it. */
2711        xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
2712        xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
2713        ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
2714                        !xfs_btree_ptr_is_null(cur, &rptr));
2715
2716        /* Fill in the key data in the new root. */
2717        if (xfs_btree_get_level(left) > 0) {
2718                xfs_btree_copy_keys(cur,
2719                                xfs_btree_key_addr(cur, 1, new),
2720                                xfs_btree_key_addr(cur, 1, left), 1);
2721                xfs_btree_copy_keys(cur,
2722                                xfs_btree_key_addr(cur, 2, new),
2723                                xfs_btree_key_addr(cur, 1, right), 1);
2724        } else {
2725                cur->bc_ops->init_key_from_rec(
2726                                xfs_btree_key_addr(cur, 1, new),
2727                                xfs_btree_rec_addr(cur, 1, left));
2728                cur->bc_ops->init_key_from_rec(
2729                                xfs_btree_key_addr(cur, 2, new),
2730                                xfs_btree_rec_addr(cur, 1, right));
2731        }
2732        xfs_btree_log_keys(cur, nbp, 1, 2);
2733
2734        /* Fill in the pointer data in the new root. */
2735        xfs_btree_copy_ptrs(cur,
2736                xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
2737        xfs_btree_copy_ptrs(cur,
2738                xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
2739        xfs_btree_log_ptrs(cur, nbp, 1, 2);
2740
2741        /* Fix up the cursor. */
2742        xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
2743        cur->bc_ptrs[cur->bc_nlevels] = nptr;
2744        cur->bc_nlevels++;
2745        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2746        *stat = 1;
2747        return 0;
2748error0:
2749        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2750        return error;
2751out0:
2752        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2753        *stat = 0;
2754        return 0;
2755}
2756
2757STATIC int
2758xfs_btree_make_block_unfull(
2759        struct xfs_btree_cur    *cur,   /* btree cursor */
2760        int                     level,  /* btree level */
2761        int                     numrecs,/* # of recs in block */
2762        int                     *oindex,/* old tree index */
2763        int                     *index, /* new tree index */
2764        union xfs_btree_ptr     *nptr,  /* new btree ptr */
2765        struct xfs_btree_cur    **ncur, /* new btree cursor */
2766        union xfs_btree_rec     *nrec,  /* new record */
2767        int                     *stat)
2768{
2769        union xfs_btree_key     key;    /* new btree key value */
2770        int                     error = 0;
2771
2772        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2773            level == cur->bc_nlevels - 1) {
2774                struct xfs_inode *ip = cur->bc_private.b.ip;
2775
2776                if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
2777                        /* A root block that can be made bigger. */
2778                        xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
2779                } else {
2780                        /* A root block that needs replacing */
2781                        int     logflags = 0;
2782
2783                        error = xfs_btree_new_iroot(cur, &logflags, stat);
2784                        if (error || *stat == 0)
2785                                return error;
2786
2787                        xfs_trans_log_inode(cur->bc_tp, ip, logflags);
2788                }
2789
2790                return 0;
2791        }
2792
2793        /* First, try shifting an entry to the right neighbor. */
2794        error = xfs_btree_rshift(cur, level, stat);
2795        if (error || *stat)
2796                return error;
2797
2798        /* Next, try shifting an entry to the left neighbor. */
2799        error = xfs_btree_lshift(cur, level, stat);
2800        if (error)
2801                return error;
2802
2803        if (*stat) {
2804                *oindex = *index = cur->bc_ptrs[level];
2805                return 0;
2806        }
2807
2808        /*
2809         * Next, try splitting the current block in half.
2810         *
2811         * If this works we have to re-set our variables because we
2812         * could be in a different block now.
2813         */
2814        error = xfs_btree_split(cur, level, nptr, &key, ncur, stat);
2815        if (error || *stat == 0)
2816                return error;
2817
2818
2819        *index = cur->bc_ptrs[level];
2820        cur->bc_ops->init_rec_from_key(&key, nrec);
2821        return 0;
2822}
2823
2824/*
2825 * Insert one record/level.  Return information to the caller
2826 * allowing the next level up to proceed if necessary.
2827 */
2828STATIC int
2829xfs_btree_insrec(
2830        struct xfs_btree_cur    *cur,   /* btree cursor */
2831        int                     level,  /* level to insert record at */
2832        union xfs_btree_ptr     *ptrp,  /* i/o: block number inserted */
2833        union xfs_btree_rec     *recp,  /* i/o: record data inserted */
2834        struct xfs_btree_cur    **curp, /* output: new cursor replacing cur */
2835        int                     *stat)  /* success/failure */
2836{
2837        struct xfs_btree_block  *block; /* btree block */
2838        struct xfs_buf          *bp;    /* buffer for block */
2839        union xfs_btree_key     key;    /* btree key */
2840        union xfs_btree_ptr     nptr;   /* new block ptr */
2841        struct xfs_btree_cur    *ncur;  /* new btree cursor */
2842        union xfs_btree_rec     nrec;   /* new record count */
2843        int                     optr;   /* old key/record index */
2844        int                     ptr;    /* key/record index */
2845        int                     numrecs;/* number of records */
2846        int                     error;  /* error return value */
2847#ifdef DEBUG
2848        int                     i;
2849#endif
2850
2851        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2852        XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp);
2853
2854        ncur = NULL;
2855
2856        /*
2857         * If we have an external root pointer, and we've made it to the
2858         * root level, allocate a new root block and we're done.
2859         */
2860        if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2861            (level >= cur->bc_nlevels)) {
2862                error = xfs_btree_new_root(cur, stat);
2863                xfs_btree_set_ptr_null(cur, ptrp);
2864
2865                XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2866                return error;
2867        }
2868
2869        /* If we're off the left edge, return failure. */
2870        ptr = cur->bc_ptrs[level];
2871        if (ptr == 0) {
2872                XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2873                *stat = 0;
2874                return 0;
2875        }
2876
2877        /* Make a key out of the record data to be inserted, and save it. */
2878        cur->bc_ops->init_key_from_rec(&key, recp);
2879
2880        optr = ptr;
2881
2882        XFS_BTREE_STATS_INC(cur, insrec);
2883
2884        /* Get pointers to the btree buffer and block. */
2885        block = xfs_btree_get_block(cur, level, &bp);
2886        numrecs = xfs_btree_get_numrecs(block);
2887
2888#ifdef DEBUG
2889        error = xfs_btree_check_block(cur, block, level, bp);
2890        if (error)
2891                goto error0;
2892
2893        /* Check that the new entry is being inserted in the right place. */
2894        if (ptr <= numrecs) {
2895                if (level == 0) {
2896                        ASSERT(cur->bc_ops->recs_inorder(cur, recp,
2897                                xfs_btree_rec_addr(cur, ptr, block)));
2898                } else {
2899                        ASSERT(cur->bc_ops->keys_inorder(cur, &key,
2900                                xfs_btree_key_addr(cur, ptr, block)));
2901                }
2902        }
2903#endif
2904
2905        /*
2906         * If the block is full, we can't insert the new entry until we
2907         * make the block un-full.
2908         */
2909        xfs_btree_set_ptr_null(cur, &nptr);
2910        if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
2911                error = xfs_btree_make_block_unfull(cur, level, numrecs,
2912                                        &optr, &ptr, &nptr, &ncur, &nrec, stat);
2913                if (error || *stat == 0)
2914                        goto error0;
2915        }
2916
2917        /*
2918         * The current block may have changed if the block was
2919         * previously full and we have just made space in it.
2920         */
2921        block = xfs_btree_get_block(cur, level, &bp);
2922        numrecs = xfs_btree_get_numrecs(block);
2923
2924#ifdef DEBUG
2925        error = xfs_btree_check_block(cur, block, level, bp);
2926        if (error)
2927                return error;
2928#endif
2929
2930        /*
2931         * At this point we know there's room for our new entry in the block
2932         * we're pointing at.
2933         */
2934        XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
2935
2936        if (level > 0) {
2937                /* It's a nonleaf. make a hole in the keys and ptrs */
2938                union xfs_btree_key     *kp;
2939                union xfs_btree_ptr     *pp;
2940
2941                kp = xfs_btree_key_addr(cur, ptr, block);
2942                pp = xfs_btree_ptr_addr(cur, ptr, block);
2943
2944#ifdef DEBUG
2945                for (i = numrecs - ptr; i >= 0; i--) {
2946                        error = xfs_btree_check_ptr(cur, pp, i, level);
2947                        if (error)
2948                                return error;
2949                }
2950#endif
2951
2952                xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
2953                xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
2954
2955#ifdef DEBUG
2956                error = xfs_btree_check_ptr(cur, ptrp, 0, level);
2957                if (error)
2958                        goto error0;
2959#endif
2960
2961                /* Now put the new data in, bump numrecs and log it. */
2962                xfs_btree_copy_keys(cur, kp, &key, 1);
2963                xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
2964                numrecs++;
2965                xfs_btree_set_numrecs(block, numrecs);
2966                xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
2967                xfs_btree_log_keys(cur, bp, ptr, numrecs);
2968#ifdef DEBUG
2969                if (ptr < numrecs) {
2970                        ASSERT(cur->bc_ops->keys_inorder(cur, kp,
2971                                xfs_btree_key_addr(cur, ptr + 1, block)));
2972                }
2973#endif
2974        } else {
2975                /* It's a leaf. make a hole in the records */
2976                union xfs_btree_rec             *rp;
2977
2978                rp = xfs_btree_rec_addr(cur, ptr, block);
2979
2980                xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
2981
2982                /* Now put the new data in, bump numrecs and log it. */
2983                xfs_btree_copy_recs(cur, rp, recp, 1);
2984                xfs_btree_set_numrecs(block, ++numrecs);
2985                xfs_btree_log_recs(cur, bp, ptr, numrecs);
2986#ifdef DEBUG
2987                if (ptr < numrecs) {
2988                        ASSERT(cur->bc_ops->recs_inorder(cur, rp,
2989                                xfs_btree_rec_addr(cur, ptr + 1, block)));
2990                }
2991#endif
2992        }
2993
2994        /* Log the new number of records in the btree header. */
2995        xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
2996
2997        /* If we inserted at the start of a block, update the parents' keys. */
2998        if (optr == 1) {
2999                error = xfs_btree_updkey(cur, &key, level + 1);
3000                if (error)
3001                        goto error0;
3002        }
3003
3004        /*
3005         * If we are tracking the last record in the tree and
3006         * we are at the far right edge of the tree, update it.
3007         */
3008        if (xfs_btree_is_lastrec(cur, block, level)) {
3009                cur->bc_ops->update_lastrec(cur, block, recp,
3010                                            ptr, LASTREC_INSREC);
3011        }
3012
3013        /*
3014         * Return the new block number, if any.
3015         * If there is one, give back a record value and a cursor too.
3016         */
3017        *ptrp = nptr;
3018        if (!xfs_btree_ptr_is_null(cur, &nptr)) {
3019                *recp = nrec;
3020                *curp = ncur;
3021        }
3022
3023        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3024        *stat = 1;
3025        return 0;
3026
3027error0:
3028        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3029        return error;
3030}
3031
3032/*
3033 * Insert the record at the point referenced by cur.
3034 *
3035 * A multi-level split of the tree on insert will invalidate the original
3036 * cursor.  All callers of this function should assume that the cursor is
3037 * no longer valid and revalidate it.
3038 */
3039int
3040xfs_btree_insert(
3041        struct xfs_btree_cur    *cur,
3042        int                     *stat)
3043{
3044        int                     error;  /* error return value */
3045        int                     i;      /* result value, 0 for failure */
3046        int                     level;  /* current level number in btree */
3047        union xfs_btree_ptr     nptr;   /* new block number (split result) */
3048        struct xfs_btree_cur    *ncur;  /* new cursor (split result) */
3049        struct xfs_btree_cur    *pcur;  /* previous level's cursor */
3050        union xfs_btree_rec     rec;    /* record to insert */
3051
3052        level = 0;
3053        ncur = NULL;
3054        pcur = cur;
3055
3056        xfs_btree_set_ptr_null(cur, &nptr);
3057        cur->bc_ops->init_rec_from_cur(cur, &rec);
3058
3059        /*
3060         * Loop going up the tree, starting at the leaf level.
3061         * Stop when we don't get a split block, that must mean that
3062         * the insert is finished with this level.
3063         */
3064        do {
3065                /*
3066                 * Insert nrec/nptr into this level of the tree.
3067                 * Note if we fail, nptr will be null.
3068                 */
3069                error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i);
3070                if (error) {
3071                        if (pcur != cur)
3072                                xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
3073                        goto error0;
3074                }
3075
3076                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3077                level++;
3078
3079                /*
3080                 * See if the cursor we just used is trash.
3081                 * Can't trash the caller's cursor, but otherwise we should
3082                 * if ncur is a new cursor or we're about to be done.
3083                 */
3084                if (pcur != cur &&
3085                    (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
3086                        /* Save the state from the cursor before we trash it */
3087                        if (cur->bc_ops->update_cursor)
3088                                cur->bc_ops->update_cursor(pcur, cur);
3089                        cur->bc_nlevels = pcur->bc_nlevels;
3090                        xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
3091                }
3092                /* If we got a new cursor, switch to it. */
3093                if (ncur) {
3094                        pcur = ncur;
3095                        ncur = NULL;
3096                }
3097        } while (!xfs_btree_ptr_is_null(cur, &nptr));
3098
3099        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3100        *stat = i;
3101        return 0;
3102error0:
3103        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3104        return error;
3105}
3106
3107/*
3108 * Try to merge a non-leaf block back into the inode root.
3109 *
3110 * Note: the killroot names comes from the fact that we're effectively
3111 * killing the old root block.  But because we can't just delete the
3112 * inode we have to copy the single block it was pointing to into the
3113 * inode.
3114 */
3115STATIC int
3116xfs_btree_kill_iroot(
3117        struct xfs_btree_cur    *cur)
3118{
3119        int                     whichfork = cur->bc_private.b.whichfork;
3120        struct xfs_inode        *ip = cur->bc_private.b.ip;
3121        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
3122        struct xfs_btree_block  *block;
3123        struct xfs_btree_block  *cblock;
3124        union xfs_btree_key     *kp;
3125        union xfs_btree_key     *ckp;
3126        union xfs_btree_ptr     *pp;
3127        union xfs_btree_ptr     *cpp;
3128        struct xfs_buf          *cbp;
3129        int                     level;
3130        int                     index;
3131        int                     numrecs;
3132#ifdef DEBUG
3133        union xfs_btree_ptr     ptr;
3134        int                     i;
3135#endif
3136
3137        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3138
3139        ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
3140        ASSERT(cur->bc_nlevels > 1);
3141
3142        /*
3143         * Don't deal with the root block needs to be a leaf case.
3144         * We're just going to turn the thing back into extents anyway.
3145         */
3146        level = cur->bc_nlevels - 1;
3147        if (level == 1)
3148                goto out0;
3149
3150        /*
3151         * Give up if the root has multiple children.
3152         */
3153        block = xfs_btree_get_iroot(cur);
3154        if (xfs_btree_get_numrecs(block) != 1)
3155                goto out0;
3156
3157        cblock = xfs_btree_get_block(cur, level - 1, &cbp);
3158        numrecs = xfs_btree_get_numrecs(cblock);
3159
3160        /*
3161         * Only do this if the next level will fit.
3162         * Then the data must be copied up to the inode,
3163         * instead of freeing the root you free the next level.
3164         */
3165        if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
3166                goto out0;
3167
3168        XFS_BTREE_STATS_INC(cur, killroot);
3169
3170#ifdef DEBUG
3171        xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
3172        ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3173        xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
3174        ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3175#endif
3176
3177        index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
3178        if (index) {
3179                xfs_iroot_realloc(cur->bc_private.b.ip, index,
3180                                  cur->bc_private.b.whichfork);
3181                block = ifp->if_broot;
3182        }
3183
3184        be16_add_cpu(&block->bb_numrecs, index);
3185        ASSERT(block->bb_numrecs == cblock->bb_numrecs);
3186
3187        kp = xfs_btree_key_addr(cur, 1, block);
3188        ckp = xfs_btree_key_addr(cur, 1, cblock);
3189        xfs_btree_copy_keys(cur, kp, ckp, numrecs);
3190
3191        pp = xfs_btree_ptr_addr(cur, 1, block);
3192        cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3193#ifdef DEBUG
3194        for (i = 0; i < numrecs; i++) {
3195                int             error;
3196
3197                error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
3198                if (error) {
3199                        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3200                        return error;
3201                }
3202        }
3203#endif
3204        xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
3205
3206        cur->bc_ops->free_block(cur, cbp);
3207        XFS_BTREE_STATS_INC(cur, free);
3208
3209        cur->bc_bufs[level - 1] = NULL;
3210        be16_add_cpu(&block->bb_level, -1);
3211        xfs_trans_log_inode(cur->bc_tp, ip,
3212                XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork));
3213        cur->bc_nlevels--;
3214out0:
3215        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3216        return 0;
3217}
3218
3219/*
3220 * Kill the current root node, and replace it with it's only child node.
3221 */
3222STATIC int
3223xfs_btree_kill_root(
3224        struct xfs_btree_cur    *cur,
3225        struct xfs_buf          *bp,
3226        int                     level,
3227        union xfs_btree_ptr     *newroot)
3228{
3229        int                     error;
3230
3231        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3232        XFS_BTREE_STATS_INC(cur, killroot);
3233
3234        /*
3235         * Update the root pointer, decreasing the level by 1 and then
3236         * free the old root.
3237         */
3238        cur->bc_ops->set_root(cur, newroot, -1);
3239
3240        error = cur->bc_ops->free_block(cur, bp);
3241        if (error) {
3242                XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3243                return error;
3244        }
3245
3246        XFS_BTREE_STATS_INC(cur, free);
3247
3248        cur->bc_bufs[level] = NULL;
3249        cur->bc_ra[level] = 0;
3250        cur->bc_nlevels--;
3251
3252        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3253        return 0;
3254}
3255
3256STATIC int
3257xfs_btree_dec_cursor(
3258        struct xfs_btree_cur    *cur,
3259        int                     level,
3260        int                     *stat)
3261{
3262        int                     error;
3263        int                     i;
3264
3265        if (level > 0) {
3266                error = xfs_btree_decrement(cur, level, &i);
3267                if (error)
3268                        return error;
3269        }
3270
3271        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3272        *stat = 1;
3273        return 0;
3274}
3275
3276/*
3277 * Single level of the btree record deletion routine.
3278 * Delete record pointed to by cur/level.
3279 * Remove the record from its block then rebalance the tree.
3280 * Return 0 for error, 1 for done, 2 to go on to the next level.
3281 */
3282STATIC int                                      /* error */
3283xfs_btree_delrec(
3284        struct xfs_btree_cur    *cur,           /* btree cursor */
3285        int                     level,          /* level removing record from */
3286        int                     *stat)          /* fail/done/go-on */
3287{
3288        struct xfs_btree_block  *block;         /* btree block */
3289        union xfs_btree_ptr     cptr;           /* current block ptr */
3290        struct xfs_buf          *bp;            /* buffer for block */
3291        int                     error;          /* error return value */
3292        int                     i;              /* loop counter */
3293        union xfs_btree_key     key;            /* storage for keyp */
3294        union xfs_btree_key     *keyp = &key;   /* passed to the next level */
3295        union xfs_btree_ptr     lptr;           /* left sibling block ptr */
3296        struct xfs_buf          *lbp;           /* left buffer pointer */
3297        struct xfs_btree_block  *left;          /* left btree block */
3298        int                     lrecs = 0;      /* left record count */
3299        int                     ptr;            /* key/record index */
3300        union xfs_btree_ptr     rptr;           /* right sibling block ptr */
3301        struct xfs_buf          *rbp;           /* right buffer pointer */
3302        struct xfs_btree_block  *right;         /* right btree block */
3303        struct xfs_btree_block  *rrblock;       /* right-right btree block */
3304        struct xfs_buf          *rrbp;          /* right-right buffer pointer */
3305        int                     rrecs = 0;      /* right record count */
3306        struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
3307        int                     numrecs;        /* temporary numrec count */
3308
3309        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3310        XFS_BTREE_TRACE_ARGI(cur, level);
3311
3312        tcur = NULL;
3313
3314        /* Get the index of the entry being deleted, check for nothing there. */
3315        ptr = cur->bc_ptrs[level];
3316        if (ptr == 0) {
3317                XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3318                *stat = 0;
3319                return 0;
3320        }
3321
3322        /* Get the buffer & block containing the record or key/ptr. */
3323        block = xfs_btree_get_block(cur, level, &bp);
3324        numrecs = xfs_btree_get_numrecs(block);
3325
3326#ifdef DEBUG
3327        error = xfs_btree_check_block(cur, block, level, bp);
3328        if (error)
3329                goto error0;
3330#endif
3331
3332        /* Fail if we're off the end of the block. */
3333        if (ptr > numrecs) {
3334                XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3335                *stat = 0;
3336                return 0;
3337        }
3338
3339        XFS_BTREE_STATS_INC(cur, delrec);
3340        XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3341
3342        /* Excise the entries being deleted. */
3343        if (level > 0) {
3344                /* It's a nonleaf. operate on keys and ptrs */
3345                union xfs_btree_key     *lkp;
3346                union xfs_btree_ptr     *lpp;
3347
3348                lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3349                lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3350
3351#ifdef DEBUG
3352                for (i = 0; i < numrecs - ptr; i++) {
3353                        error = xfs_btree_check_ptr(cur, lpp, i, level);
3354                        if (error)
3355                                goto error0;
3356                }
3357#endif
3358
3359                if (ptr < numrecs) {
3360                        xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3361                        xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3362                        xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3363                        xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3364                }
3365
3366                /*
3367                 * If it's the first record in the block, we'll need to pass a
3368                 * key up to the next level (updkey).
3369                 */
3370                if (ptr == 1)
3371                        keyp = xfs_btree_key_addr(cur, 1, block);
3372        } else {
3373                /* It's a leaf. operate on records */
3374                if (ptr < numrecs) {
3375                        xfs_btree_shift_recs(cur,
3376                                xfs_btree_rec_addr(cur, ptr + 1, block),
3377                                -1, numrecs - ptr);
3378                        xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3379                }
3380
3381                /*
3382                 * If it's the first record in the block, we'll need a key
3383                 * structure to pass up to the next level (updkey).
3384                 */
3385                if (ptr == 1) {
3386                        cur->bc_ops->init_key_from_rec(&key,
3387                                        xfs_btree_rec_addr(cur, 1, block));
3388                        keyp = &key;
3389                }
3390        }
3391
3392        /*
3393         * Decrement and log the number of entries in the block.
3394         */
3395        xfs_btree_set_numrecs(block, --numrecs);
3396        xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3397
3398        /*
3399         * If we are tracking the last record in the tree and
3400         * we are at the far right edge of the tree, update it.
3401         */
3402        if (xfs_btree_is_lastrec(cur, block, level)) {
3403                cur->bc_ops->update_lastrec(cur, block, NULL,
3404                                            ptr, LASTREC_DELREC);
3405        }
3406
3407        /*
3408         * We're at the root level.  First, shrink the root block in-memory.
3409         * Try to get rid of the next level down.  If we can't then there's
3410         * nothing left to do.
3411         */
3412        if (level == cur->bc_nlevels - 1) {
3413                if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3414                        xfs_iroot_realloc(cur->bc_private.b.ip, -1,
3415                                          cur->bc_private.b.whichfork);
3416
3417                        error = xfs_btree_kill_iroot(cur);
3418                        if (error)
3419                                goto error0;
3420
3421                        error = xfs_btree_dec_cursor(cur, level, stat);
3422                        if (error)
3423                                goto error0;
3424                        *stat = 1;
3425                        return 0;
3426                }
3427
3428                /*
3429                 * If this is the root level, and there's only one entry left,
3430                 * and it's NOT the leaf level, then we can get rid of this
3431                 * level.
3432                 */
3433                if (numrecs == 1 && level > 0) {
3434                        union xfs_btree_ptr     *pp;
3435                        /*
3436                         * pp is still set to the first pointer in the block.
3437                         * Make it the new root of the btree.
3438                         */
3439                        pp = xfs_btree_ptr_addr(cur, 1, block);
3440                        error = xfs_btree_kill_root(cur, bp, level, pp);
3441                        if (error)
3442                                goto error0;
3443                } else if (level > 0) {
3444                        error = xfs_btree_dec_cursor(cur, level, stat);
3445                        if (error)
3446                                goto error0;
3447                }
3448                *stat = 1;
3449                return 0;
3450        }
3451
3452        /*
3453         * If we deleted the leftmost entry in the block, update the
3454         * key values above us in the tree.
3455         */
3456        if (ptr == 1) {
3457                error = xfs_btree_updkey(cur, keyp, level + 1);
3458                if (error)
3459                        goto error0;
3460        }
3461
3462        /*
3463         * If the number of records remaining in the block is at least
3464         * the minimum, we're done.
3465         */
3466        if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3467                error = xfs_btree_dec_cursor(cur, level, stat);
3468                if (error)
3469                        goto error0;
3470                return 0;
3471        }
3472
3473        /*
3474         * Otherwise, we have to move some records around to keep the
3475         * tree balanced.  Look at the left and right sibling blocks to
3476         * see if we can re-balance by moving only one record.
3477         */
3478        xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3479        xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
3480
3481        if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3482                /*
3483                 * One child of root, need to get a chance to copy its contents
3484                 * into the root and delete it. Can't go up to next level,
3485                 * there's nothing to delete there.
3486                 */
3487                if (xfs_btree_ptr_is_null(cur, &rptr) &&
3488                    xfs_btree_ptr_is_null(cur, &lptr) &&
3489                    level == cur->bc_nlevels - 2) {
3490                        error = xfs_btree_kill_iroot(cur);
3491                        if (!error)
3492                                error = xfs_btree_dec_cursor(cur, level, stat);
3493                        if (error)
3494                                goto error0;
3495                        return 0;
3496                }
3497        }
3498
3499        ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
3500               !xfs_btree_ptr_is_null(cur, &lptr));
3501
3502        /*
3503         * Duplicate the cursor so our btree manipulations here won't
3504         * disrupt the next level up.
3505         */
3506        error = xfs_btree_dup_cursor(cur, &tcur);
3507        if (error)
3508                goto error0;
3509
3510        /*
3511         * If there's a right sibling, see if it's ok to shift an entry
3512         * out of it.
3513         */
3514        if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3515                /*
3516                 * Move the temp cursor to the last entry in the next block.
3517                 * Actually any entry but the first would suffice.
3518                 */
3519                i = xfs_btree_lastrec(tcur, level);
3520                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3521
3522                error = xfs_btree_increment(tcur, level, &i);
3523                if (error)
3524                        goto error0;
3525                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3526
3527                i = xfs_btree_lastrec(tcur, level);
3528                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3529
3530                /* Grab a pointer to the block. */
3531                right = xfs_btree_get_block(tcur, level, &rbp);
3532#ifdef DEBUG
3533                error = xfs_btree_check_block(tcur, right, level, rbp);
3534                if (error)
3535                        goto error0;
3536#endif
3537                /* Grab the current block number, for future use. */
3538                xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
3539
3540                /*
3541                 * If right block is full enough so that removing one entry
3542                 * won't make it too empty, and left-shifting an entry out
3543                 * of right to us works, we're done.
3544                 */
3545                if (xfs_btree_get_numrecs(right) - 1 >=
3546                    cur->bc_ops->get_minrecs(tcur, level)) {
3547                        error = xfs_btree_lshift(tcur, level, &i);
3548                        if (error)
3549                                goto error0;
3550                        if (i) {
3551                                ASSERT(xfs_btree_get_numrecs(block) >=
3552                                       cur->bc_ops->get_minrecs(tcur, level));
3553
3554                                xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3555                                tcur = NULL;
3556
3557                                error = xfs_btree_dec_cursor(cur, level, stat);
3558                                if (error)
3559                                        goto error0;
3560                                return 0;
3561                        }
3562                }
3563
3564                /*
3565                 * Otherwise, grab the number of records in right for
3566                 * future reference, and fix up the temp cursor to point
3567                 * to our block again (last record).
3568                 */
3569                rrecs = xfs_btree_get_numrecs(right);
3570                if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3571                        i = xfs_btree_firstrec(tcur, level);
3572                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3573
3574                        error = xfs_btree_decrement(tcur, level, &i);
3575                        if (error)
3576                                goto error0;
3577                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3578                }
3579        }
3580
3581        /*
3582         * If there's a left sibling, see if it's ok to shift an entry
3583         * out of it.
3584         */
3585        if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3586                /*
3587                 * Move the temp cursor to the first entry in the
3588                 * previous block.
3589                 */
3590                i = xfs_btree_firstrec(tcur, level);
3591                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3592
3593                error = xfs_btree_decrement(tcur, level, &i);
3594                if (error)
3595                        goto error0;
3596                i = xfs_btree_firstrec(tcur, level);
3597                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3598
3599                /* Grab a pointer to the block. */
3600                left = xfs_btree_get_block(tcur, level, &lbp);
3601#ifdef DEBUG
3602                error = xfs_btree_check_block(cur, left, level, lbp);
3603                if (error)
3604                        goto error0;
3605#endif
3606                /* Grab the current block number, for future use. */
3607                xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
3608
3609                /*
3610                 * If left block is full enough so that removing one entry
3611                 * won't make it too empty, and right-shifting an entry out
3612                 * of left to us works, we're done.
3613                 */
3614                if (xfs_btree_get_numrecs(left) - 1 >=
3615                    cur->bc_ops->get_minrecs(tcur, level)) {
3616                        error = xfs_btree_rshift(tcur, level, &i);
3617                        if (error)
3618                                goto error0;
3619                        if (i) {
3620                                ASSERT(xfs_btree_get_numrecs(block) >=
3621                                       cur->bc_ops->get_minrecs(tcur, level));
3622                                xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3623                                tcur = NULL;
3624                                if (level == 0)
3625                                        cur->bc_ptrs[0]++;
3626                                XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3627                                *stat = 1;
3628                                return 0;
3629                        }
3630                }
3631
3632                /*
3633                 * Otherwise, grab the number of records in right for
3634                 * future reference.
3635                 */
3636                lrecs = xfs_btree_get_numrecs(left);
3637        }
3638
3639        /* Delete the temp cursor, we're done with it. */
3640        xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3641        tcur = NULL;
3642
3643        /* If here, we need to do a join to keep the tree balanced. */
3644        ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
3645
3646        if (!xfs_btree_ptr_is_null(cur, &lptr) &&
3647            lrecs + xfs_btree_get_numrecs(block) <=
3648                        cur->bc_ops->get_maxrecs(cur, level)) {
3649                /*
3650                 * Set "right" to be the starting block,
3651                 * "left" to be the left neighbor.
3652                 */
3653                rptr = cptr;
3654                right = block;
3655                rbp = bp;
3656                error = xfs_btree_read_buf_block(cur, &lptr, level,
3657                                                        0, &left, &lbp);
3658                if (error)
3659                        goto error0;
3660
3661        /*
3662         * If that won't work, see if we can join with the right neighbor block.
3663         */
3664        } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
3665                   rrecs + xfs_btree_get_numrecs(block) <=
3666                        cur->bc_ops->get_maxrecs(cur, level)) {
3667                /*
3668                 * Set "left" to be the starting block,
3669                 * "right" to be the right neighbor.
3670                 */
3671                lptr = cptr;
3672                left = block;
3673                lbp = bp;
3674                error = xfs_btree_read_buf_block(cur, &rptr, level,
3675                                                        0, &right, &rbp);
3676                if (error)
3677                        goto error0;
3678
3679        /*
3680         * Otherwise, we can't fix the imbalance.
3681         * Just return.  This is probably a logic error, but it's not fatal.
3682         */
3683        } else {
3684                error = xfs_btree_dec_cursor(cur, level, stat);
3685                if (error)
3686                        goto error0;
3687                return 0;
3688        }
3689
3690        rrecs = xfs_btree_get_numrecs(right);
3691        lrecs = xfs_btree_get_numrecs(left);
3692
3693        /*
3694         * We're now going to join "left" and "right" by moving all the stuff
3695         * in "right" to "left" and deleting "right".
3696         */
3697        XFS_BTREE_STATS_ADD(cur, moves, rrecs);
3698        if (level > 0) {
3699                /* It's a non-leaf.  Move keys and pointers. */
3700                union xfs_btree_key     *lkp;   /* left btree key */
3701                union xfs_btree_ptr     *lpp;   /* left address pointer */
3702                union xfs_btree_key     *rkp;   /* right btree key */
3703                union xfs_btree_ptr     *rpp;   /* right address pointer */
3704
3705                lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
3706                lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
3707                rkp = xfs_btree_key_addr(cur, 1, right);
3708                rpp = xfs_btree_ptr_addr(cur, 1, right);
3709#ifdef DEBUG
3710                for (i = 1; i < rrecs; i++) {
3711                        error = xfs_btree_check_ptr(cur, rpp, i, level);
3712                        if (error)
3713                                goto error0;
3714                }
3715#endif
3716                xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
3717                xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
3718
3719                xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
3720                xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
3721        } else {
3722                /* It's a leaf.  Move records.  */
3723                union xfs_btree_rec     *lrp;   /* left record pointer */
3724                union xfs_btree_rec     *rrp;   /* right record pointer */
3725
3726                lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
3727                rrp = xfs_btree_rec_addr(cur, 1, right);
3728
3729                xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
3730                xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
3731        }
3732
3733        XFS_BTREE_STATS_INC(cur, join);
3734
3735        /*
3736         * Fix up the number of records and right block pointer in the
3737         * surviving block, and log it.
3738         */
3739        xfs_btree_set_numrecs(left, lrecs + rrecs);
3740        xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
3741        xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3742        xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
3743
3744        /* If there is a right sibling, point it to the remaining block. */
3745        xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3746        if (!xfs_btree_ptr_is_null(cur, &cptr)) {
3747                error = xfs_btree_read_buf_block(cur, &cptr, level,
3748                                                        0, &rrblock, &rrbp);
3749                if (error)
3750                        goto error0;
3751                xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
3752                xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
3753        }
3754
3755        /* Free the deleted block. */
3756        error = cur->bc_ops->free_block(cur, rbp);
3757        if (error)
3758                goto error0;
3759        XFS_BTREE_STATS_INC(cur, free);
3760
3761        /*
3762         * If we joined with the left neighbor, set the buffer in the
3763         * cursor to the left block, and fix up the index.
3764         */
3765        if (bp != lbp) {
3766                cur->bc_bufs[level] = lbp;
3767                cur->bc_ptrs[level] += lrecs;
3768                cur->bc_ra[level] = 0;
3769        }
3770        /*
3771         * If we joined with the right neighbor and there's a level above
3772         * us, increment the cursor at that level.
3773         */
3774        else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
3775                   (level + 1 < cur->bc_nlevels)) {
3776                error = xfs_btree_increment(cur, level + 1, &i);
3777                if (error)
3778                        goto error0;
3779        }
3780
3781        /*
3782         * Readjust the ptr at this level if it's not a leaf, since it's
3783         * still pointing at the deletion point, which makes the cursor
3784         * inconsistent.  If this makes the ptr 0, the caller fixes it up.
3785         * We can't use decrement because it would change the next level up.
3786         */
3787        if (level > 0)
3788                cur->bc_ptrs[level]--;
3789
3790        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3791        /* Return value means the next level up has something to do. */
3792        *stat = 2;
3793        return 0;
3794
3795error0:
3796        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3797        if (tcur)
3798                xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
3799        return error;
3800}
3801
3802/*
3803 * Delete the record pointed to by cur.
3804 * The cursor refers to the place where the record was (could be inserted)
3805 * when the operation returns.
3806 */
3807int                                     /* error */
3808xfs_btree_delete(
3809        struct xfs_btree_cur    *cur,
3810        int                     *stat)  /* success/failure */
3811{
3812        int                     error;  /* error return value */
3813        int                     level;
3814        int                     i;
3815
3816        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3817
3818        /*
3819         * Go up the tree, starting at leaf level.
3820         *
3821         * If 2 is returned then a join was done; go to the next level.
3822         * Otherwise we are done.
3823         */
3824        for (level = 0, i = 2; i == 2; level++) {
3825                error = xfs_btree_delrec(cur, level, &i);
3826                if (error)
3827                        goto error0;
3828        }
3829
3830        if (i == 0) {
3831                for (level = 1; level < cur->bc_nlevels; level++) {
3832                        if (cur->bc_ptrs[level] == 0) {
3833                                error = xfs_btree_decrement(cur, level, &i);
3834                                if (error)
3835                                        goto error0;
3836                                break;
3837                        }
3838                }
3839        }
3840
3841        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3842        *stat = i;
3843        return 0;
3844error0:
3845        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3846        return error;
3847}
3848
3849/*
3850 * Get the data from the pointed-to record.
3851 */
3852int                                     /* error */
3853xfs_btree_get_rec(
3854        struct xfs_btree_cur    *cur,   /* btree cursor */
3855        union xfs_btree_rec     **recp, /* output: btree record */
3856        int                     *stat)  /* output: success/failure */
3857{
3858        struct xfs_btree_block  *block; /* btree block */
3859        struct xfs_buf          *bp;    /* buffer pointer */
3860        int                     ptr;    /* record number */
3861#ifdef DEBUG
3862        int                     error;  /* error return value */
3863#endif
3864
3865        ptr = cur->bc_ptrs[0];
3866        block = xfs_btree_get_block(cur, 0, &bp);
3867
3868#ifdef DEBUG
3869        error = xfs_btree_check_block(cur, block, 0, bp);
3870        if (error)
3871                return error;
3872#endif
3873
3874        /*
3875         * Off the right end or left end, return failure.
3876         */
3877        if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
3878                *stat = 0;
3879                return 0;
3880        }
3881
3882        /*
3883         * Point to the record and extract its data.
3884         */
3885        *recp = xfs_btree_rec_addr(cur, ptr, block);
3886        *stat = 1;
3887        return 0;
3888}
3889
3890/*
3891 * Change the owner of a btree.
3892 *
3893 * The mechanism we use here is ordered buffer logging. Because we don't know
3894 * how many buffers were are going to need to modify, we don't really want to
3895 * have to make transaction reservations for the worst case of every buffer in a
3896 * full size btree as that may be more space that we can fit in the log....
3897 *
3898 * We do the btree walk in the most optimal manner possible - we have sibling
3899 * pointers so we can just walk all the blocks on each level from left to right
3900 * in a single pass, and then move to the next level and do the same. We can
3901 * also do readahead on the sibling pointers to get IO moving more quickly,
3902 * though for slow disks this is unlikely to make much difference to performance
3903 * as the amount of CPU work we have to do before moving to the next block is
3904 * relatively small.
3905 *
3906 * For each btree block that we load, modify the owner appropriately, set the
3907 * buffer as an ordered buffer and log it appropriately. We need to ensure that
3908 * we mark the region we change dirty so that if the buffer is relogged in
3909 * a subsequent transaction the changes we make here as an ordered buffer are
3910 * correctly relogged in that transaction.  If we are in recovery context, then
3911 * just queue the modified buffer as delayed write buffer so the transaction
3912 * recovery completion writes the changes to disk.
3913 */
3914static int
3915xfs_btree_block_change_owner(
3916        struct xfs_btree_cur    *cur,
3917        int                     level,
3918        __uint64_t              new_owner,
3919        struct list_head        *buffer_list)
3920{
3921        struct xfs_btree_block  *block;
3922        struct xfs_buf          *bp;
3923        union xfs_btree_ptr     rptr;
3924
3925        /* do right sibling readahead */
3926        xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
3927
3928        /* modify the owner */
3929        block = xfs_btree_get_block(cur, level, &bp);
3930        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
3931                block->bb_u.l.bb_owner = cpu_to_be64(new_owner);
3932        else
3933                block->bb_u.s.bb_owner = cpu_to_be32(new_owner);
3934
3935        /*
3936         * If the block is a root block hosted in an inode, we might not have a
3937         * buffer pointer here and we shouldn't attempt to log the change as the
3938         * information is already held in the inode and discarded when the root
3939         * block is formatted into the on-disk inode fork. We still change it,
3940         * though, so everything is consistent in memory.
3941         */
3942        if (bp) {
3943                if (cur->bc_tp) {
3944                        xfs_trans_ordered_buf(cur->bc_tp, bp);
3945                        xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
3946                } else {
3947                        xfs_buf_delwri_queue(bp, buffer_list);
3948                }
3949        } else {
3950                ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
3951                ASSERT(level == cur->bc_nlevels - 1);
3952        }
3953
3954        /* now read rh sibling block for next iteration */
3955        xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3956        if (xfs_btree_ptr_is_null(cur, &rptr))
3957                return ENOENT;
3958
3959        return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
3960}
3961
3962int
3963xfs_btree_change_owner(
3964        struct xfs_btree_cur    *cur,
3965        __uint64_t              new_owner,
3966        struct list_head        *buffer_list)
3967{
3968        union xfs_btree_ptr     lptr;
3969        int                     level;
3970        struct xfs_btree_block  *block = NULL;
3971        int                     error = 0;
3972
3973        cur->bc_ops->init_ptr_from_cur(cur, &lptr);
3974
3975        /* for each level */
3976        for (level = cur->bc_nlevels - 1; level >= 0; level--) {
3977                /* grab the left hand block */
3978                error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
3979                if (error)
3980                        return error;
3981
3982                /* readahead the left most block for the next level down */
3983                if (level > 0) {
3984                        union xfs_btree_ptr     *ptr;
3985
3986                        ptr = xfs_btree_ptr_addr(cur, 1, block);
3987                        xfs_btree_readahead_ptr(cur, ptr, 1);
3988
3989                        /* save for the next iteration of the loop */
3990                        lptr = *ptr;
3991                }
3992
3993                /* for each buffer in the level */
3994                do {
3995                        error = xfs_btree_block_change_owner(cur, level,
3996                                                             new_owner,
3997                                                             buffer_list);
3998                } while (!error);
3999
4000                if (error != ENOENT)
4001                        return error;
4002        }
4003
4004        return 0;
4005}
4006