linux/fs/xfs/libxfs/xfs_btree.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   4 * All Rights Reserved.
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_log_format.h"
  11#include "xfs_trans_resv.h"
  12#include "xfs_bit.h"
  13#include "xfs_mount.h"
  14#include "xfs_inode.h"
  15#include "xfs_trans.h"
  16#include "xfs_buf_item.h"
  17#include "xfs_btree.h"
  18#include "xfs_errortag.h"
  19#include "xfs_error.h"
  20#include "xfs_trace.h"
  21#include "xfs_alloc.h"
  22#include "xfs_log.h"
  23#include "xfs_btree_staging.h"
  24#include "xfs_ag.h"
  25#include "xfs_alloc_btree.h"
  26#include "xfs_ialloc_btree.h"
  27#include "xfs_bmap_btree.h"
  28#include "xfs_rmap_btree.h"
  29#include "xfs_refcount_btree.h"
  30
  31/*
  32 * Btree magic numbers.
  33 */
  34static const uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
  35        { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
  36          XFS_FIBT_MAGIC, 0 },
  37        { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC,
  38          XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC,
  39          XFS_REFC_CRC_MAGIC }
  40};
  41
  42uint32_t
  43xfs_btree_magic(
  44        int                     crc,
  45        xfs_btnum_t             btnum)
  46{
  47        uint32_t                magic = xfs_magics[crc][btnum];
  48
  49        /* Ensure we asked for crc for crc-only magics. */
  50        ASSERT(magic != 0);
  51        return magic;
  52}
  53
  54/*
  55 * These sibling pointer checks are optimised for null sibling pointers. This
  56 * happens a lot, and we don't need to byte swap at runtime if the sibling
  57 * pointer is NULL.
  58 *
  59 * These are explicitly marked at inline because the cost of calling them as
  60 * functions instead of inlining them is about 36 bytes extra code per call site
  61 * on x86-64. Yes, gcc-11 fails to inline them, and explicit inlining of these
  62 * two sibling check functions reduces the compiled code size by over 300
  63 * bytes.
  64 */
  65static inline xfs_failaddr_t
  66xfs_btree_check_lblock_siblings(
  67        struct xfs_mount        *mp,
  68        struct xfs_btree_cur    *cur,
  69        int                     level,
  70        xfs_fsblock_t           fsb,
  71        __be64                  dsibling)
  72{
  73        xfs_fsblock_t           sibling;
  74
  75        if (dsibling == cpu_to_be64(NULLFSBLOCK))
  76                return NULL;
  77
  78        sibling = be64_to_cpu(dsibling);
  79        if (sibling == fsb)
  80                return __this_address;
  81        if (level >= 0) {
  82                if (!xfs_btree_check_lptr(cur, sibling, level + 1))
  83                        return __this_address;
  84        } else {
  85                if (!xfs_verify_fsbno(mp, sibling))
  86                        return __this_address;
  87        }
  88
  89        return NULL;
  90}
  91
  92static inline xfs_failaddr_t
  93xfs_btree_check_sblock_siblings(
  94        struct xfs_mount        *mp,
  95        struct xfs_btree_cur    *cur,
  96        int                     level,
  97        xfs_agnumber_t          agno,
  98        xfs_agblock_t           agbno,
  99        __be32                  dsibling)
 100{
 101        xfs_agblock_t           sibling;
 102
 103        if (dsibling == cpu_to_be32(NULLAGBLOCK))
 104                return NULL;
 105
 106        sibling = be32_to_cpu(dsibling);
 107        if (sibling == agbno)
 108                return __this_address;
 109        if (level >= 0) {
 110                if (!xfs_btree_check_sptr(cur, sibling, level + 1))
 111                        return __this_address;
 112        } else {
 113                if (!xfs_verify_agbno(mp, agno, sibling))
 114                        return __this_address;
 115        }
 116        return NULL;
 117}
 118
 119/*
 120 * Check a long btree block header.  Return the address of the failing check,
 121 * or NULL if everything is ok.
 122 */
 123xfs_failaddr_t
 124__xfs_btree_check_lblock(
 125        struct xfs_btree_cur    *cur,
 126        struct xfs_btree_block  *block,
 127        int                     level,
 128        struct xfs_buf          *bp)
 129{
 130        struct xfs_mount        *mp = cur->bc_mp;
 131        xfs_btnum_t             btnum = cur->bc_btnum;
 132        int                     crc = xfs_has_crc(mp);
 133        xfs_failaddr_t          fa;
 134        xfs_fsblock_t           fsb = NULLFSBLOCK;
 135
 136        if (crc) {
 137                if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
 138                        return __this_address;
 139                if (block->bb_u.l.bb_blkno !=
 140                    cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL))
 141                        return __this_address;
 142                if (block->bb_u.l.bb_pad != cpu_to_be32(0))
 143                        return __this_address;
 144        }
 145
 146        if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum))
 147                return __this_address;
 148        if (be16_to_cpu(block->bb_level) != level)
 149                return __this_address;
 150        if (be16_to_cpu(block->bb_numrecs) >
 151            cur->bc_ops->get_maxrecs(cur, level))
 152                return __this_address;
 153
 154        if (bp)
 155                fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
 156
 157        fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
 158                        block->bb_u.l.bb_leftsib);
 159        if (!fa)
 160                fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
 161                                block->bb_u.l.bb_rightsib);
 162        return fa;
 163}
 164
 165/* Check a long btree block header. */
 166static int
 167xfs_btree_check_lblock(
 168        struct xfs_btree_cur    *cur,
 169        struct xfs_btree_block  *block,
 170        int                     level,
 171        struct xfs_buf          *bp)
 172{
 173        struct xfs_mount        *mp = cur->bc_mp;
 174        xfs_failaddr_t          fa;
 175
 176        fa = __xfs_btree_check_lblock(cur, block, level, bp);
 177        if (XFS_IS_CORRUPT(mp, fa != NULL) ||
 178            XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK)) {
 179                if (bp)
 180                        trace_xfs_btree_corrupt(bp, _RET_IP_);
 181                return -EFSCORRUPTED;
 182        }
 183        return 0;
 184}
 185
 186/*
 187 * Check a short btree block header.  Return the address of the failing check,
 188 * or NULL if everything is ok.
 189 */
 190xfs_failaddr_t
 191__xfs_btree_check_sblock(
 192        struct xfs_btree_cur    *cur,
 193        struct xfs_btree_block  *block,
 194        int                     level,
 195        struct xfs_buf          *bp)
 196{
 197        struct xfs_mount        *mp = cur->bc_mp;
 198        xfs_btnum_t             btnum = cur->bc_btnum;
 199        int                     crc = xfs_has_crc(mp);
 200        xfs_failaddr_t          fa;
 201        xfs_agblock_t           agbno = NULLAGBLOCK;
 202        xfs_agnumber_t          agno = NULLAGNUMBER;
 203
 204        if (crc) {
 205                if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
 206                        return __this_address;
 207                if (block->bb_u.s.bb_blkno !=
 208                    cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL))
 209                        return __this_address;
 210        }
 211
 212        if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum))
 213                return __this_address;
 214        if (be16_to_cpu(block->bb_level) != level)
 215                return __this_address;
 216        if (be16_to_cpu(block->bb_numrecs) >
 217            cur->bc_ops->get_maxrecs(cur, level))
 218                return __this_address;
 219
 220        if (bp) {
 221                agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
 222                agno = xfs_daddr_to_agno(mp, xfs_buf_daddr(bp));
 223        }
 224
 225        fa = xfs_btree_check_sblock_siblings(mp, cur, level, agno, agbno,
 226                        block->bb_u.s.bb_leftsib);
 227        if (!fa)
 228                fa = xfs_btree_check_sblock_siblings(mp, cur, level, agno,
 229                                 agbno, block->bb_u.s.bb_rightsib);
 230        return fa;
 231}
 232
 233/* Check a short btree block header. */
 234STATIC int
 235xfs_btree_check_sblock(
 236        struct xfs_btree_cur    *cur,
 237        struct xfs_btree_block  *block,
 238        int                     level,
 239        struct xfs_buf          *bp)
 240{
 241        struct xfs_mount        *mp = cur->bc_mp;
 242        xfs_failaddr_t          fa;
 243
 244        fa = __xfs_btree_check_sblock(cur, block, level, bp);
 245        if (XFS_IS_CORRUPT(mp, fa != NULL) ||
 246            XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_SBLOCK)) {
 247                if (bp)
 248                        trace_xfs_btree_corrupt(bp, _RET_IP_);
 249                return -EFSCORRUPTED;
 250        }
 251        return 0;
 252}
 253
 254/*
 255 * Debug routine: check that block header is ok.
 256 */
 257int
 258xfs_btree_check_block(
 259        struct xfs_btree_cur    *cur,   /* btree cursor */
 260        struct xfs_btree_block  *block, /* generic btree block pointer */
 261        int                     level,  /* level of the btree block */
 262        struct xfs_buf          *bp)    /* buffer containing block, if any */
 263{
 264        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
 265                return xfs_btree_check_lblock(cur, block, level, bp);
 266        else
 267                return xfs_btree_check_sblock(cur, block, level, bp);
 268}
 269
 270/* Check that this long pointer is valid and points within the fs. */
 271bool
 272xfs_btree_check_lptr(
 273        struct xfs_btree_cur    *cur,
 274        xfs_fsblock_t           fsbno,
 275        int                     level)
 276{
 277        if (level <= 0)
 278                return false;
 279        return xfs_verify_fsbno(cur->bc_mp, fsbno);
 280}
 281
 282/* Check that this short pointer is valid and points within the AG. */
 283bool
 284xfs_btree_check_sptr(
 285        struct xfs_btree_cur    *cur,
 286        xfs_agblock_t           agbno,
 287        int                     level)
 288{
 289        if (level <= 0)
 290                return false;
 291        return xfs_verify_agbno(cur->bc_mp, cur->bc_ag.pag->pag_agno, agbno);
 292}
 293
 294/*
 295 * Check that a given (indexed) btree pointer at a certain level of a
 296 * btree is valid and doesn't point past where it should.
 297 */
 298static int
 299xfs_btree_check_ptr(
 300        struct xfs_btree_cur            *cur,
 301        const union xfs_btree_ptr       *ptr,
 302        int                             index,
 303        int                             level)
 304{
 305        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
 306                if (xfs_btree_check_lptr(cur, be64_to_cpu((&ptr->l)[index]),
 307                                level))
 308                        return 0;
 309                xfs_err(cur->bc_mp,
 310"Inode %llu fork %d: Corrupt btree %d pointer at level %d index %d.",
 311                                cur->bc_ino.ip->i_ino,
 312                                cur->bc_ino.whichfork, cur->bc_btnum,
 313                                level, index);
 314        } else {
 315                if (xfs_btree_check_sptr(cur, be32_to_cpu((&ptr->s)[index]),
 316                                level))
 317                        return 0;
 318                xfs_err(cur->bc_mp,
 319"AG %u: Corrupt btree %d pointer at level %d index %d.",
 320                                cur->bc_ag.pag->pag_agno, cur->bc_btnum,
 321                                level, index);
 322        }
 323
 324        return -EFSCORRUPTED;
 325}
 326
 327#ifdef DEBUG
 328# define xfs_btree_debug_check_ptr      xfs_btree_check_ptr
 329#else
 330# define xfs_btree_debug_check_ptr(...) (0)
 331#endif
 332
 333/*
 334 * Calculate CRC on the whole btree block and stuff it into the
 335 * long-form btree header.
 336 *
 337 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
 338 * it into the buffer so recovery knows what the last modification was that made
 339 * it to disk.
 340 */
 341void
 342xfs_btree_lblock_calc_crc(
 343        struct xfs_buf          *bp)
 344{
 345        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
 346        struct xfs_buf_log_item *bip = bp->b_log_item;
 347
 348        if (!xfs_has_crc(bp->b_mount))
 349                return;
 350        if (bip)
 351                block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
 352        xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
 353}
 354
 355bool
 356xfs_btree_lblock_verify_crc(
 357        struct xfs_buf          *bp)
 358{
 359        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
 360        struct xfs_mount        *mp = bp->b_mount;
 361
 362        if (xfs_has_crc(mp)) {
 363                if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
 364                        return false;
 365                return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
 366        }
 367
 368        return true;
 369}
 370
 371/*
 372 * Calculate CRC on the whole btree block and stuff it into the
 373 * short-form btree header.
 374 *
 375 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
 376 * it into the buffer so recovery knows what the last modification was that made
 377 * it to disk.
 378 */
 379void
 380xfs_btree_sblock_calc_crc(
 381        struct xfs_buf          *bp)
 382{
 383        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
 384        struct xfs_buf_log_item *bip = bp->b_log_item;
 385
 386        if (!xfs_has_crc(bp->b_mount))
 387                return;
 388        if (bip)
 389                block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
 390        xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
 391}
 392
 393bool
 394xfs_btree_sblock_verify_crc(
 395        struct xfs_buf          *bp)
 396{
 397        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
 398        struct xfs_mount        *mp = bp->b_mount;
 399
 400        if (xfs_has_crc(mp)) {
 401                if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
 402                        return false;
 403                return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
 404        }
 405
 406        return true;
 407}
 408
 409static int
 410xfs_btree_free_block(
 411        struct xfs_btree_cur    *cur,
 412        struct xfs_buf          *bp)
 413{
 414        int                     error;
 415
 416        error = cur->bc_ops->free_block(cur, bp);
 417        if (!error) {
 418                xfs_trans_binval(cur->bc_tp, bp);
 419                XFS_BTREE_STATS_INC(cur, free);
 420        }
 421        return error;
 422}
 423
 424/*
 425 * Delete the btree cursor.
 426 */
 427void
 428xfs_btree_del_cursor(
 429        struct xfs_btree_cur    *cur,           /* btree cursor */
 430        int                     error)          /* del because of error */
 431{
 432        int                     i;              /* btree level */
 433
 434        /*
 435         * Clear the buffer pointers and release the buffers. If we're doing
 436         * this because of an error, inspect all of the entries in the bc_bufs
 437         * array for buffers to be unlocked. This is because some of the btree
 438         * code works from level n down to 0, and if we get an error along the
 439         * way we won't have initialized all the entries down to 0.
 440         */
 441        for (i = 0; i < cur->bc_nlevels; i++) {
 442                if (cur->bc_levels[i].bp)
 443                        xfs_trans_brelse(cur->bc_tp, cur->bc_levels[i].bp);
 444                else if (!error)
 445                        break;
 446        }
 447
 448        /*
 449         * If we are doing a BMBT update, the number of unaccounted blocks
 450         * allocated during this cursor life time should be zero. If it's not
 451         * zero, then we should be shut down or on our way to shutdown due to
 452         * cancelling a dirty transaction on error.
 453         */
 454        ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 ||
 455               xfs_is_shutdown(cur->bc_mp) || error != 0);
 456        if (unlikely(cur->bc_flags & XFS_BTREE_STAGING))
 457                kmem_free(cur->bc_ops);
 458        if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) && cur->bc_ag.pag)
 459                xfs_perag_put(cur->bc_ag.pag);
 460        kmem_cache_free(cur->bc_cache, cur);
 461}
 462
 463/*
 464 * Duplicate the btree cursor.
 465 * Allocate a new one, copy the record, re-get the buffers.
 466 */
 467int                                     /* error */
 468xfs_btree_dup_cursor(
 469        struct xfs_btree_cur *cur,              /* input cursor */
 470        struct xfs_btree_cur **ncur)            /* output cursor */
 471{
 472        struct xfs_buf  *bp;            /* btree block's buffer pointer */
 473        int             error;          /* error return value */
 474        int             i;              /* level number of btree block */
 475        xfs_mount_t     *mp;            /* mount structure for filesystem */
 476        struct xfs_btree_cur *new;              /* new cursor value */
 477        xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
 478
 479        tp = cur->bc_tp;
 480        mp = cur->bc_mp;
 481
 482        /*
 483         * Allocate a new cursor like the old one.
 484         */
 485        new = cur->bc_ops->dup_cursor(cur);
 486
 487        /*
 488         * Copy the record currently in the cursor.
 489         */
 490        new->bc_rec = cur->bc_rec;
 491
 492        /*
 493         * For each level current, re-get the buffer and copy the ptr value.
 494         */
 495        for (i = 0; i < new->bc_nlevels; i++) {
 496                new->bc_levels[i].ptr = cur->bc_levels[i].ptr;
 497                new->bc_levels[i].ra = cur->bc_levels[i].ra;
 498                bp = cur->bc_levels[i].bp;
 499                if (bp) {
 500                        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
 501                                                   xfs_buf_daddr(bp), mp->m_bsize,
 502                                                   0, &bp,
 503                                                   cur->bc_ops->buf_ops);
 504                        if (error) {
 505                                xfs_btree_del_cursor(new, error);
 506                                *ncur = NULL;
 507                                return error;
 508                        }
 509                }
 510                new->bc_levels[i].bp = bp;
 511        }
 512        *ncur = new;
 513        return 0;
 514}
 515
 516/*
 517 * XFS btree block layout and addressing:
 518 *
 519 * There are two types of blocks in the btree: leaf and non-leaf blocks.
 520 *
 521 * The leaf record start with a header then followed by records containing
 522 * the values.  A non-leaf block also starts with the same header, and
 523 * then first contains lookup keys followed by an equal number of pointers
 524 * to the btree blocks at the previous level.
 525 *
 526 *              +--------+-------+-------+-------+-------+-------+-------+
 527 * Leaf:        | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
 528 *              +--------+-------+-------+-------+-------+-------+-------+
 529 *
 530 *              +--------+-------+-------+-------+-------+-------+-------+
 531 * Non-Leaf:    | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
 532 *              +--------+-------+-------+-------+-------+-------+-------+
 533 *
 534 * The header is called struct xfs_btree_block for reasons better left unknown
 535 * and comes in different versions for short (32bit) and long (64bit) block
 536 * pointers.  The record and key structures are defined by the btree instances
 537 * and opaque to the btree core.  The block pointers are simple disk endian
 538 * integers, available in a short (32bit) and long (64bit) variant.
 539 *
 540 * The helpers below calculate the offset of a given record, key or pointer
 541 * into a btree block (xfs_btree_*_offset) or return a pointer to the given
 542 * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
 543 * inside the btree block is done using indices starting at one, not zero!
 544 *
 545 * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing
 546 * overlapping intervals.  In such a tree, records are still sorted lowest to
 547 * highest and indexed by the smallest key value that refers to the record.
 548 * However, nodes are different: each pointer has two associated keys -- one
 549 * indexing the lowest key available in the block(s) below (the same behavior
 550 * as the key in a regular btree) and another indexing the highest key
 551 * available in the block(s) below.  Because records are /not/ sorted by the
 552 * highest key, all leaf block updates require us to compute the highest key
 553 * that matches any record in the leaf and to recursively update the high keys
 554 * in the nodes going further up in the tree, if necessary.  Nodes look like
 555 * this:
 556 *
 557 *              +--------+-----+-----+-----+-----+-----+-------+-------+-----+
 558 * Non-Leaf:    | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... |
 559 *              +--------+-----+-----+-----+-----+-----+-------+-------+-----+
 560 *
 561 * To perform an interval query on an overlapped tree, perform the usual
 562 * depth-first search and use the low and high keys to decide if we can skip
 563 * that particular node.  If a leaf node is reached, return the records that
 564 * intersect the interval.  Note that an interval query may return numerous
 565 * entries.  For a non-overlapped tree, simply search for the record associated
 566 * with the lowest key and iterate forward until a non-matching record is
 567 * found.  Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by
 568 * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in
 569 * more detail.
 570 *
 571 * Why do we care about overlapping intervals?  Let's say you have a bunch of
 572 * reverse mapping records on a reflink filesystem:
 573 *
 574 * 1: +- file A startblock B offset C length D -----------+
 575 * 2:      +- file E startblock F offset G length H --------------+
 576 * 3:      +- file I startblock F offset J length K --+
 577 * 4:                                                        +- file L... --+
 578 *
 579 * Now say we want to map block (B+D) into file A at offset (C+D).  Ideally,
 580 * we'd simply increment the length of record 1.  But how do we find the record
 581 * that ends at (B+D-1) (i.e. record 1)?  A LE lookup of (B+D-1) would return
 582 * record 3 because the keys are ordered first by startblock.  An interval
 583 * query would return records 1 and 2 because they both overlap (B+D-1), and
 584 * from that we can pick out record 1 as the appropriate left neighbor.
 585 *
 586 * In the non-overlapped case you can do a LE lookup and decrement the cursor
 587 * because a record's interval must end before the next record.
 588 */
 589
 590/*
 591 * Return size of the btree block header for this btree instance.
 592 */
 593static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
 594{
 595        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
 596                if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
 597                        return XFS_BTREE_LBLOCK_CRC_LEN;
 598                return XFS_BTREE_LBLOCK_LEN;
 599        }
 600        if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
 601                return XFS_BTREE_SBLOCK_CRC_LEN;
 602        return XFS_BTREE_SBLOCK_LEN;
 603}
 604
 605/*
 606 * Return size of btree block pointers for this btree instance.
 607 */
 608static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
 609{
 610        return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
 611                sizeof(__be64) : sizeof(__be32);
 612}
 613
 614/*
 615 * Calculate offset of the n-th record in a btree block.
 616 */
 617STATIC size_t
 618xfs_btree_rec_offset(
 619        struct xfs_btree_cur    *cur,
 620        int                     n)
 621{
 622        return xfs_btree_block_len(cur) +
 623                (n - 1) * cur->bc_ops->rec_len;
 624}
 625
 626/*
 627 * Calculate offset of the n-th key in a btree block.
 628 */
 629STATIC size_t
 630xfs_btree_key_offset(
 631        struct xfs_btree_cur    *cur,
 632        int                     n)
 633{
 634        return xfs_btree_block_len(cur) +
 635                (n - 1) * cur->bc_ops->key_len;
 636}
 637
 638/*
 639 * Calculate offset of the n-th high key in a btree block.
 640 */
 641STATIC size_t
 642xfs_btree_high_key_offset(
 643        struct xfs_btree_cur    *cur,
 644        int                     n)
 645{
 646        return xfs_btree_block_len(cur) +
 647                (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2);
 648}
 649
 650/*
 651 * Calculate offset of the n-th block pointer in a btree block.
 652 */
 653STATIC size_t
 654xfs_btree_ptr_offset(
 655        struct xfs_btree_cur    *cur,
 656        int                     n,
 657        int                     level)
 658{
 659        return xfs_btree_block_len(cur) +
 660                cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
 661                (n - 1) * xfs_btree_ptr_len(cur);
 662}
 663
 664/*
 665 * Return a pointer to the n-th record in the btree block.
 666 */
 667union xfs_btree_rec *
 668xfs_btree_rec_addr(
 669        struct xfs_btree_cur    *cur,
 670        int                     n,
 671        struct xfs_btree_block  *block)
 672{
 673        return (union xfs_btree_rec *)
 674                ((char *)block + xfs_btree_rec_offset(cur, n));
 675}
 676
 677/*
 678 * Return a pointer to the n-th key in the btree block.
 679 */
 680union xfs_btree_key *
 681xfs_btree_key_addr(
 682        struct xfs_btree_cur    *cur,
 683        int                     n,
 684        struct xfs_btree_block  *block)
 685{
 686        return (union xfs_btree_key *)
 687                ((char *)block + xfs_btree_key_offset(cur, n));
 688}
 689
 690/*
 691 * Return a pointer to the n-th high key in the btree block.
 692 */
 693union xfs_btree_key *
 694xfs_btree_high_key_addr(
 695        struct xfs_btree_cur    *cur,
 696        int                     n,
 697        struct xfs_btree_block  *block)
 698{
 699        return (union xfs_btree_key *)
 700                ((char *)block + xfs_btree_high_key_offset(cur, n));
 701}
 702
 703/*
 704 * Return a pointer to the n-th block pointer in the btree block.
 705 */
 706union xfs_btree_ptr *
 707xfs_btree_ptr_addr(
 708        struct xfs_btree_cur    *cur,
 709        int                     n,
 710        struct xfs_btree_block  *block)
 711{
 712        int                     level = xfs_btree_get_level(block);
 713
 714        ASSERT(block->bb_level != 0);
 715
 716        return (union xfs_btree_ptr *)
 717                ((char *)block + xfs_btree_ptr_offset(cur, n, level));
 718}
 719
 720struct xfs_ifork *
 721xfs_btree_ifork_ptr(
 722        struct xfs_btree_cur    *cur)
 723{
 724        ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
 725
 726        if (cur->bc_flags & XFS_BTREE_STAGING)
 727                return cur->bc_ino.ifake->if_fork;
 728        return XFS_IFORK_PTR(cur->bc_ino.ip, cur->bc_ino.whichfork);
 729}
 730
 731/*
 732 * Get the root block which is stored in the inode.
 733 *
 734 * For now this btree implementation assumes the btree root is always
 735 * stored in the if_broot field of an inode fork.
 736 */
 737STATIC struct xfs_btree_block *
 738xfs_btree_get_iroot(
 739        struct xfs_btree_cur    *cur)
 740{
 741        struct xfs_ifork        *ifp = xfs_btree_ifork_ptr(cur);
 742
 743        return (struct xfs_btree_block *)ifp->if_broot;
 744}
 745
 746/*
 747 * Retrieve the block pointer from the cursor at the given level.
 748 * This may be an inode btree root or from a buffer.
 749 */
 750struct xfs_btree_block *                /* generic btree block pointer */
 751xfs_btree_get_block(
 752        struct xfs_btree_cur    *cur,   /* btree cursor */
 753        int                     level,  /* level in btree */
 754        struct xfs_buf          **bpp)  /* buffer containing the block */
 755{
 756        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
 757            (level == cur->bc_nlevels - 1)) {
 758                *bpp = NULL;
 759                return xfs_btree_get_iroot(cur);
 760        }
 761
 762        *bpp = cur->bc_levels[level].bp;
 763        return XFS_BUF_TO_BLOCK(*bpp);
 764}
 765
 766/*
 767 * Change the cursor to point to the first record at the given level.
 768 * Other levels are unaffected.
 769 */
 770STATIC int                              /* success=1, failure=0 */
 771xfs_btree_firstrec(
 772        struct xfs_btree_cur    *cur,   /* btree cursor */
 773        int                     level)  /* level to change */
 774{
 775        struct xfs_btree_block  *block; /* generic btree block pointer */
 776        struct xfs_buf          *bp;    /* buffer containing block */
 777
 778        /*
 779         * Get the block pointer for this level.
 780         */
 781        block = xfs_btree_get_block(cur, level, &bp);
 782        if (xfs_btree_check_block(cur, block, level, bp))
 783                return 0;
 784        /*
 785         * It's empty, there is no such record.
 786         */
 787        if (!block->bb_numrecs)
 788                return 0;
 789        /*
 790         * Set the ptr value to 1, that's the first record/key.
 791         */
 792        cur->bc_levels[level].ptr = 1;
 793        return 1;
 794}
 795
 796/*
 797 * Change the cursor to point to the last record in the current block
 798 * at the given level.  Other levels are unaffected.
 799 */
 800STATIC int                              /* success=1, failure=0 */
 801xfs_btree_lastrec(
 802        struct xfs_btree_cur    *cur,   /* btree cursor */
 803        int                     level)  /* level to change */
 804{
 805        struct xfs_btree_block  *block; /* generic btree block pointer */
 806        struct xfs_buf          *bp;    /* buffer containing block */
 807
 808        /*
 809         * Get the block pointer for this level.
 810         */
 811        block = xfs_btree_get_block(cur, level, &bp);
 812        if (xfs_btree_check_block(cur, block, level, bp))
 813                return 0;
 814        /*
 815         * It's empty, there is no such record.
 816         */
 817        if (!block->bb_numrecs)
 818                return 0;
 819        /*
 820         * Set the ptr value to numrecs, that's the last record/key.
 821         */
 822        cur->bc_levels[level].ptr = be16_to_cpu(block->bb_numrecs);
 823        return 1;
 824}
 825
 826/*
 827 * Compute first and last byte offsets for the fields given.
 828 * Interprets the offsets table, which contains struct field offsets.
 829 */
 830void
 831xfs_btree_offsets(
 832        uint32_t        fields,         /* bitmask of fields */
 833        const short     *offsets,       /* table of field offsets */
 834        int             nbits,          /* number of bits to inspect */
 835        int             *first,         /* output: first byte offset */
 836        int             *last)          /* output: last byte offset */
 837{
 838        int             i;              /* current bit number */
 839        uint32_t        imask;          /* mask for current bit number */
 840
 841        ASSERT(fields != 0);
 842        /*
 843         * Find the lowest bit, so the first byte offset.
 844         */
 845        for (i = 0, imask = 1u; ; i++, imask <<= 1) {
 846                if (imask & fields) {
 847                        *first = offsets[i];
 848                        break;
 849                }
 850        }
 851        /*
 852         * Find the highest bit, so the last byte offset.
 853         */
 854        for (i = nbits - 1, imask = 1u << i; ; i--, imask >>= 1) {
 855                if (imask & fields) {
 856                        *last = offsets[i + 1] - 1;
 857                        break;
 858                }
 859        }
 860}
 861
 862/*
 863 * Get a buffer for the block, return it read in.
 864 * Long-form addressing.
 865 */
 866int
 867xfs_btree_read_bufl(
 868        struct xfs_mount        *mp,            /* file system mount point */
 869        struct xfs_trans        *tp,            /* transaction pointer */
 870        xfs_fsblock_t           fsbno,          /* file system block number */
 871        struct xfs_buf          **bpp,          /* buffer for fsbno */
 872        int                     refval,         /* ref count value for buffer */
 873        const struct xfs_buf_ops *ops)
 874{
 875        struct xfs_buf          *bp;            /* return value */
 876        xfs_daddr_t             d;              /* real disk block address */
 877        int                     error;
 878
 879        if (!xfs_verify_fsbno(mp, fsbno))
 880                return -EFSCORRUPTED;
 881        d = XFS_FSB_TO_DADDR(mp, fsbno);
 882        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
 883                                   mp->m_bsize, 0, &bp, ops);
 884        if (error)
 885                return error;
 886        if (bp)
 887                xfs_buf_set_ref(bp, refval);
 888        *bpp = bp;
 889        return 0;
 890}
 891
 892/*
 893 * Read-ahead the block, don't wait for it, don't return a buffer.
 894 * Long-form addressing.
 895 */
 896/* ARGSUSED */
 897void
 898xfs_btree_reada_bufl(
 899        struct xfs_mount        *mp,            /* file system mount point */
 900        xfs_fsblock_t           fsbno,          /* file system block number */
 901        xfs_extlen_t            count,          /* count of filesystem blocks */
 902        const struct xfs_buf_ops *ops)
 903{
 904        xfs_daddr_t             d;
 905
 906        ASSERT(fsbno != NULLFSBLOCK);
 907        d = XFS_FSB_TO_DADDR(mp, fsbno);
 908        xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
 909}
 910
 911/*
 912 * Read-ahead the block, don't wait for it, don't return a buffer.
 913 * Short-form addressing.
 914 */
 915/* ARGSUSED */
 916void
 917xfs_btree_reada_bufs(
 918        struct xfs_mount        *mp,            /* file system mount point */
 919        xfs_agnumber_t          agno,           /* allocation group number */
 920        xfs_agblock_t           agbno,          /* allocation group block number */
 921        xfs_extlen_t            count,          /* count of filesystem blocks */
 922        const struct xfs_buf_ops *ops)
 923{
 924        xfs_daddr_t             d;
 925
 926        ASSERT(agno != NULLAGNUMBER);
 927        ASSERT(agbno != NULLAGBLOCK);
 928        d = XFS_AGB_TO_DADDR(mp, agno, agbno);
 929        xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
 930}
 931
 932STATIC int
 933xfs_btree_readahead_lblock(
 934        struct xfs_btree_cur    *cur,
 935        int                     lr,
 936        struct xfs_btree_block  *block)
 937{
 938        int                     rval = 0;
 939        xfs_fsblock_t           left = be64_to_cpu(block->bb_u.l.bb_leftsib);
 940        xfs_fsblock_t           right = be64_to_cpu(block->bb_u.l.bb_rightsib);
 941
 942        if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
 943                xfs_btree_reada_bufl(cur->bc_mp, left, 1,
 944                                     cur->bc_ops->buf_ops);
 945                rval++;
 946        }
 947
 948        if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
 949                xfs_btree_reada_bufl(cur->bc_mp, right, 1,
 950                                     cur->bc_ops->buf_ops);
 951                rval++;
 952        }
 953
 954        return rval;
 955}
 956
 957STATIC int
 958xfs_btree_readahead_sblock(
 959        struct xfs_btree_cur    *cur,
 960        int                     lr,
 961        struct xfs_btree_block *block)
 962{
 963        int                     rval = 0;
 964        xfs_agblock_t           left = be32_to_cpu(block->bb_u.s.bb_leftsib);
 965        xfs_agblock_t           right = be32_to_cpu(block->bb_u.s.bb_rightsib);
 966
 967
 968        if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
 969                xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno,
 970                                     left, 1, cur->bc_ops->buf_ops);
 971                rval++;
 972        }
 973
 974        if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
 975                xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno,
 976                                     right, 1, cur->bc_ops->buf_ops);
 977                rval++;
 978        }
 979
 980        return rval;
 981}
 982
 983/*
 984 * Read-ahead btree blocks, at the given level.
 985 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
 986 */
 987STATIC int
 988xfs_btree_readahead(
 989        struct xfs_btree_cur    *cur,           /* btree cursor */
 990        int                     lev,            /* level in btree */
 991        int                     lr)             /* left/right bits */
 992{
 993        struct xfs_btree_block  *block;
 994
 995        /*
 996         * No readahead needed if we are at the root level and the
 997         * btree root is stored in the inode.
 998         */
 999        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1000            (lev == cur->bc_nlevels - 1))
1001                return 0;
1002
1003        if ((cur->bc_levels[lev].ra | lr) == cur->bc_levels[lev].ra)
1004                return 0;
1005
1006        cur->bc_levels[lev].ra |= lr;
1007        block = XFS_BUF_TO_BLOCK(cur->bc_levels[lev].bp);
1008
1009        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1010                return xfs_btree_readahead_lblock(cur, lr, block);
1011        return xfs_btree_readahead_sblock(cur, lr, block);
1012}
1013
1014STATIC int
1015xfs_btree_ptr_to_daddr(
1016        struct xfs_btree_cur            *cur,
1017        const union xfs_btree_ptr       *ptr,
1018        xfs_daddr_t                     *daddr)
1019{
1020        xfs_fsblock_t           fsbno;
1021        xfs_agblock_t           agbno;
1022        int                     error;
1023
1024        error = xfs_btree_check_ptr(cur, ptr, 0, 1);
1025        if (error)
1026                return error;
1027
1028        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1029                fsbno = be64_to_cpu(ptr->l);
1030                *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, fsbno);
1031        } else {
1032                agbno = be32_to_cpu(ptr->s);
1033                *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1034                                agbno);
1035        }
1036
1037        return 0;
1038}
1039
1040/*
1041 * Readahead @count btree blocks at the given @ptr location.
1042 *
1043 * We don't need to care about long or short form btrees here as we have a
1044 * method of converting the ptr directly to a daddr available to us.
1045 */
1046STATIC void
1047xfs_btree_readahead_ptr(
1048        struct xfs_btree_cur    *cur,
1049        union xfs_btree_ptr     *ptr,
1050        xfs_extlen_t            count)
1051{
1052        xfs_daddr_t             daddr;
1053
1054        if (xfs_btree_ptr_to_daddr(cur, ptr, &daddr))
1055                return;
1056        xfs_buf_readahead(cur->bc_mp->m_ddev_targp, daddr,
1057                          cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops);
1058}
1059
1060/*
1061 * Set the buffer for level "lev" in the cursor to bp, releasing
1062 * any previous buffer.
1063 */
1064STATIC void
1065xfs_btree_setbuf(
1066        struct xfs_btree_cur    *cur,   /* btree cursor */
1067        int                     lev,    /* level in btree */
1068        struct xfs_buf          *bp)    /* new buffer to set */
1069{
1070        struct xfs_btree_block  *b;     /* btree block */
1071
1072        if (cur->bc_levels[lev].bp)
1073                xfs_trans_brelse(cur->bc_tp, cur->bc_levels[lev].bp);
1074        cur->bc_levels[lev].bp = bp;
1075        cur->bc_levels[lev].ra = 0;
1076
1077        b = XFS_BUF_TO_BLOCK(bp);
1078        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1079                if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK))
1080                        cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA;
1081                if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK))
1082                        cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA;
1083        } else {
1084                if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
1085                        cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA;
1086                if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
1087                        cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA;
1088        }
1089}
1090
1091bool
1092xfs_btree_ptr_is_null(
1093        struct xfs_btree_cur            *cur,
1094        const union xfs_btree_ptr       *ptr)
1095{
1096        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1097                return ptr->l == cpu_to_be64(NULLFSBLOCK);
1098        else
1099                return ptr->s == cpu_to_be32(NULLAGBLOCK);
1100}
1101
1102void
1103xfs_btree_set_ptr_null(
1104        struct xfs_btree_cur    *cur,
1105        union xfs_btree_ptr     *ptr)
1106{
1107        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1108                ptr->l = cpu_to_be64(NULLFSBLOCK);
1109        else
1110                ptr->s = cpu_to_be32(NULLAGBLOCK);
1111}
1112
1113/*
1114 * Get/set/init sibling pointers
1115 */
1116void
1117xfs_btree_get_sibling(
1118        struct xfs_btree_cur    *cur,
1119        struct xfs_btree_block  *block,
1120        union xfs_btree_ptr     *ptr,
1121        int                     lr)
1122{
1123        ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
1124
1125        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1126                if (lr == XFS_BB_RIGHTSIB)
1127                        ptr->l = block->bb_u.l.bb_rightsib;
1128                else
1129                        ptr->l = block->bb_u.l.bb_leftsib;
1130        } else {
1131                if (lr == XFS_BB_RIGHTSIB)
1132                        ptr->s = block->bb_u.s.bb_rightsib;
1133                else
1134                        ptr->s = block->bb_u.s.bb_leftsib;
1135        }
1136}
1137
1138void
1139xfs_btree_set_sibling(
1140        struct xfs_btree_cur            *cur,
1141        struct xfs_btree_block          *block,
1142        const union xfs_btree_ptr       *ptr,
1143        int                             lr)
1144{
1145        ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
1146
1147        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1148                if (lr == XFS_BB_RIGHTSIB)
1149                        block->bb_u.l.bb_rightsib = ptr->l;
1150                else
1151                        block->bb_u.l.bb_leftsib = ptr->l;
1152        } else {
1153                if (lr == XFS_BB_RIGHTSIB)
1154                        block->bb_u.s.bb_rightsib = ptr->s;
1155                else
1156                        block->bb_u.s.bb_leftsib = ptr->s;
1157        }
1158}
1159
1160void
1161xfs_btree_init_block_int(
1162        struct xfs_mount        *mp,
1163        struct xfs_btree_block  *buf,
1164        xfs_daddr_t             blkno,
1165        xfs_btnum_t             btnum,
1166        __u16                   level,
1167        __u16                   numrecs,
1168        __u64                   owner,
1169        unsigned int            flags)
1170{
1171        int                     crc = xfs_has_crc(mp);
1172        __u32                   magic = xfs_btree_magic(crc, btnum);
1173
1174        buf->bb_magic = cpu_to_be32(magic);
1175        buf->bb_level = cpu_to_be16(level);
1176        buf->bb_numrecs = cpu_to_be16(numrecs);
1177
1178        if (flags & XFS_BTREE_LONG_PTRS) {
1179                buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
1180                buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
1181                if (crc) {
1182                        buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
1183                        buf->bb_u.l.bb_owner = cpu_to_be64(owner);
1184                        uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid);
1185                        buf->bb_u.l.bb_pad = 0;
1186                        buf->bb_u.l.bb_lsn = 0;
1187                }
1188        } else {
1189                /* owner is a 32 bit value on short blocks */
1190                __u32 __owner = (__u32)owner;
1191
1192                buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1193                buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1194                if (crc) {
1195                        buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
1196                        buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
1197                        uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid);
1198                        buf->bb_u.s.bb_lsn = 0;
1199                }
1200        }
1201}
1202
1203void
1204xfs_btree_init_block(
1205        struct xfs_mount *mp,
1206        struct xfs_buf  *bp,
1207        xfs_btnum_t     btnum,
1208        __u16           level,
1209        __u16           numrecs,
1210        __u64           owner)
1211{
1212        xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), xfs_buf_daddr(bp),
1213                                 btnum, level, numrecs, owner, 0);
1214}
1215
1216void
1217xfs_btree_init_block_cur(
1218        struct xfs_btree_cur    *cur,
1219        struct xfs_buf          *bp,
1220        int                     level,
1221        int                     numrecs)
1222{
1223        __u64                   owner;
1224
1225        /*
1226         * we can pull the owner from the cursor right now as the different
1227         * owners align directly with the pointer size of the btree. This may
1228         * change in future, but is safe for current users of the generic btree
1229         * code.
1230         */
1231        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1232                owner = cur->bc_ino.ip->i_ino;
1233        else
1234                owner = cur->bc_ag.pag->pag_agno;
1235
1236        xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp),
1237                                xfs_buf_daddr(bp), cur->bc_btnum, level,
1238                                numrecs, owner, cur->bc_flags);
1239}
1240
1241/*
1242 * Return true if ptr is the last record in the btree and
1243 * we need to track updates to this record.  The decision
1244 * will be further refined in the update_lastrec method.
1245 */
1246STATIC int
1247xfs_btree_is_lastrec(
1248        struct xfs_btree_cur    *cur,
1249        struct xfs_btree_block  *block,
1250        int                     level)
1251{
1252        union xfs_btree_ptr     ptr;
1253
1254        if (level > 0)
1255                return 0;
1256        if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
1257                return 0;
1258
1259        xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1260        if (!xfs_btree_ptr_is_null(cur, &ptr))
1261                return 0;
1262        return 1;
1263}
1264
1265STATIC void
1266xfs_btree_buf_to_ptr(
1267        struct xfs_btree_cur    *cur,
1268        struct xfs_buf          *bp,
1269        union xfs_btree_ptr     *ptr)
1270{
1271        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1272                ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
1273                                        xfs_buf_daddr(bp)));
1274        else {
1275                ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
1276                                        xfs_buf_daddr(bp)));
1277        }
1278}
1279
1280STATIC void
1281xfs_btree_set_refs(
1282        struct xfs_btree_cur    *cur,
1283        struct xfs_buf          *bp)
1284{
1285        switch (cur->bc_btnum) {
1286        case XFS_BTNUM_BNO:
1287        case XFS_BTNUM_CNT:
1288                xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
1289                break;
1290        case XFS_BTNUM_INO:
1291        case XFS_BTNUM_FINO:
1292                xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
1293                break;
1294        case XFS_BTNUM_BMAP:
1295                xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
1296                break;
1297        case XFS_BTNUM_RMAP:
1298                xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF);
1299                break;
1300        case XFS_BTNUM_REFC:
1301                xfs_buf_set_ref(bp, XFS_REFC_BTREE_REF);
1302                break;
1303        default:
1304                ASSERT(0);
1305        }
1306}
1307
1308int
1309xfs_btree_get_buf_block(
1310        struct xfs_btree_cur            *cur,
1311        const union xfs_btree_ptr       *ptr,
1312        struct xfs_btree_block          **block,
1313        struct xfs_buf                  **bpp)
1314{
1315        struct xfs_mount        *mp = cur->bc_mp;
1316        xfs_daddr_t             d;
1317        int                     error;
1318
1319        error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
1320        if (error)
1321                return error;
1322        error = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, mp->m_bsize,
1323                        0, bpp);
1324        if (error)
1325                return error;
1326
1327        (*bpp)->b_ops = cur->bc_ops->buf_ops;
1328        *block = XFS_BUF_TO_BLOCK(*bpp);
1329        return 0;
1330}
1331
1332/*
1333 * Read in the buffer at the given ptr and return the buffer and
1334 * the block pointer within the buffer.
1335 */
1336STATIC int
1337xfs_btree_read_buf_block(
1338        struct xfs_btree_cur            *cur,
1339        const union xfs_btree_ptr       *ptr,
1340        int                             flags,
1341        struct xfs_btree_block          **block,
1342        struct xfs_buf                  **bpp)
1343{
1344        struct xfs_mount        *mp = cur->bc_mp;
1345        xfs_daddr_t             d;
1346        int                     error;
1347
1348        /* need to sort out how callers deal with failures first */
1349        ASSERT(!(flags & XBF_TRYLOCK));
1350
1351        error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
1352        if (error)
1353                return error;
1354        error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
1355                                   mp->m_bsize, flags, bpp,
1356                                   cur->bc_ops->buf_ops);
1357        if (error)
1358                return error;
1359
1360        xfs_btree_set_refs(cur, *bpp);
1361        *block = XFS_BUF_TO_BLOCK(*bpp);
1362        return 0;
1363}
1364
1365/*
1366 * Copy keys from one btree block to another.
1367 */
1368void
1369xfs_btree_copy_keys(
1370        struct xfs_btree_cur            *cur,
1371        union xfs_btree_key             *dst_key,
1372        const union xfs_btree_key       *src_key,
1373        int                             numkeys)
1374{
1375        ASSERT(numkeys >= 0);
1376        memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1377}
1378
1379/*
1380 * Copy records from one btree block to another.
1381 */
1382STATIC void
1383xfs_btree_copy_recs(
1384        struct xfs_btree_cur    *cur,
1385        union xfs_btree_rec     *dst_rec,
1386        union xfs_btree_rec     *src_rec,
1387        int                     numrecs)
1388{
1389        ASSERT(numrecs >= 0);
1390        memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1391}
1392
1393/*
1394 * Copy block pointers from one btree block to another.
1395 */
1396void
1397xfs_btree_copy_ptrs(
1398        struct xfs_btree_cur    *cur,
1399        union xfs_btree_ptr     *dst_ptr,
1400        const union xfs_btree_ptr *src_ptr,
1401        int                     numptrs)
1402{
1403        ASSERT(numptrs >= 0);
1404        memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1405}
1406
1407/*
1408 * Shift keys one index left/right inside a single btree block.
1409 */
1410STATIC void
1411xfs_btree_shift_keys(
1412        struct xfs_btree_cur    *cur,
1413        union xfs_btree_key     *key,
1414        int                     dir,
1415        int                     numkeys)
1416{
1417        char                    *dst_key;
1418
1419        ASSERT(numkeys >= 0);
1420        ASSERT(dir == 1 || dir == -1);
1421
1422        dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1423        memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1424}
1425
1426/*
1427 * Shift records one index left/right inside a single btree block.
1428 */
1429STATIC void
1430xfs_btree_shift_recs(
1431        struct xfs_btree_cur    *cur,
1432        union xfs_btree_rec     *rec,
1433        int                     dir,
1434        int                     numrecs)
1435{
1436        char                    *dst_rec;
1437
1438        ASSERT(numrecs >= 0);
1439        ASSERT(dir == 1 || dir == -1);
1440
1441        dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1442        memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1443}
1444
1445/*
1446 * Shift block pointers one index left/right inside a single btree block.
1447 */
1448STATIC void
1449xfs_btree_shift_ptrs(
1450        struct xfs_btree_cur    *cur,
1451        union xfs_btree_ptr     *ptr,
1452        int                     dir,
1453        int                     numptrs)
1454{
1455        char                    *dst_ptr;
1456
1457        ASSERT(numptrs >= 0);
1458        ASSERT(dir == 1 || dir == -1);
1459
1460        dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1461        memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1462}
1463
1464/*
1465 * Log key values from the btree block.
1466 */
1467STATIC void
1468xfs_btree_log_keys(
1469        struct xfs_btree_cur    *cur,
1470        struct xfs_buf          *bp,
1471        int                     first,
1472        int                     last)
1473{
1474
1475        if (bp) {
1476                xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1477                xfs_trans_log_buf(cur->bc_tp, bp,
1478                                  xfs_btree_key_offset(cur, first),
1479                                  xfs_btree_key_offset(cur, last + 1) - 1);
1480        } else {
1481                xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
1482                                xfs_ilog_fbroot(cur->bc_ino.whichfork));
1483        }
1484}
1485
1486/*
1487 * Log record values from the btree block.
1488 */
1489void
1490xfs_btree_log_recs(
1491        struct xfs_btree_cur    *cur,
1492        struct xfs_buf          *bp,
1493        int                     first,
1494        int                     last)
1495{
1496
1497        xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1498        xfs_trans_log_buf(cur->bc_tp, bp,
1499                          xfs_btree_rec_offset(cur, first),
1500                          xfs_btree_rec_offset(cur, last + 1) - 1);
1501
1502}
1503
1504/*
1505 * Log block pointer fields from a btree block (nonleaf).
1506 */
1507STATIC void
1508xfs_btree_log_ptrs(
1509        struct xfs_btree_cur    *cur,   /* btree cursor */
1510        struct xfs_buf          *bp,    /* buffer containing btree block */
1511        int                     first,  /* index of first pointer to log */
1512        int                     last)   /* index of last pointer to log */
1513{
1514
1515        if (bp) {
1516                struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
1517                int                     level = xfs_btree_get_level(block);
1518
1519                xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1520                xfs_trans_log_buf(cur->bc_tp, bp,
1521                                xfs_btree_ptr_offset(cur, first, level),
1522                                xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1523        } else {
1524                xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
1525                        xfs_ilog_fbroot(cur->bc_ino.whichfork));
1526        }
1527
1528}
1529
1530/*
1531 * Log fields from a btree block header.
1532 */
1533void
1534xfs_btree_log_block(
1535        struct xfs_btree_cur    *cur,   /* btree cursor */
1536        struct xfs_buf          *bp,    /* buffer containing btree block */
1537        uint32_t                fields) /* mask of fields: XFS_BB_... */
1538{
1539        int                     first;  /* first byte offset logged */
1540        int                     last;   /* last byte offset logged */
1541        static const short      soffsets[] = {  /* table of offsets (short) */
1542                offsetof(struct xfs_btree_block, bb_magic),
1543                offsetof(struct xfs_btree_block, bb_level),
1544                offsetof(struct xfs_btree_block, bb_numrecs),
1545                offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1546                offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
1547                offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
1548                offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
1549                offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
1550                offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
1551                offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
1552                XFS_BTREE_SBLOCK_CRC_LEN
1553        };
1554        static const short      loffsets[] = {  /* table of offsets (long) */
1555                offsetof(struct xfs_btree_block, bb_magic),
1556                offsetof(struct xfs_btree_block, bb_level),
1557                offsetof(struct xfs_btree_block, bb_numrecs),
1558                offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1559                offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
1560                offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
1561                offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
1562                offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
1563                offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
1564                offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
1565                offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
1566                XFS_BTREE_LBLOCK_CRC_LEN
1567        };
1568
1569        if (bp) {
1570                int nbits;
1571
1572                if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
1573                        /*
1574                         * We don't log the CRC when updating a btree
1575                         * block but instead recreate it during log
1576                         * recovery.  As the log buffers have checksums
1577                         * of their own this is safe and avoids logging a crc
1578                         * update in a lot of places.
1579                         */
1580                        if (fields == XFS_BB_ALL_BITS)
1581                                fields = XFS_BB_ALL_BITS_CRC;
1582                        nbits = XFS_BB_NUM_BITS_CRC;
1583                } else {
1584                        nbits = XFS_BB_NUM_BITS;
1585                }
1586                xfs_btree_offsets(fields,
1587                                  (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1588                                        loffsets : soffsets,
1589                                  nbits, &first, &last);
1590                xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1591                xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1592        } else {
1593                xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
1594                        xfs_ilog_fbroot(cur->bc_ino.whichfork));
1595        }
1596}
1597
1598/*
1599 * Increment cursor by one record at the level.
1600 * For nonzero levels the leaf-ward information is untouched.
1601 */
1602int                                             /* error */
1603xfs_btree_increment(
1604        struct xfs_btree_cur    *cur,
1605        int                     level,
1606        int                     *stat)          /* success/failure */
1607{
1608        struct xfs_btree_block  *block;
1609        union xfs_btree_ptr     ptr;
1610        struct xfs_buf          *bp;
1611        int                     error;          /* error return value */
1612        int                     lev;
1613
1614        ASSERT(level < cur->bc_nlevels);
1615
1616        /* Read-ahead to the right at this level. */
1617        xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1618
1619        /* Get a pointer to the btree block. */
1620        block = xfs_btree_get_block(cur, level, &bp);
1621
1622#ifdef DEBUG
1623        error = xfs_btree_check_block(cur, block, level, bp);
1624        if (error)
1625                goto error0;
1626#endif
1627
1628        /* We're done if we remain in the block after the increment. */
1629        if (++cur->bc_levels[level].ptr <= xfs_btree_get_numrecs(block))
1630                goto out1;
1631
1632        /* Fail if we just went off the right edge of the tree. */
1633        xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1634        if (xfs_btree_ptr_is_null(cur, &ptr))
1635                goto out0;
1636
1637        XFS_BTREE_STATS_INC(cur, increment);
1638
1639        /*
1640         * March up the tree incrementing pointers.
1641         * Stop when we don't go off the right edge of a block.
1642         */
1643        for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1644                block = xfs_btree_get_block(cur, lev, &bp);
1645
1646#ifdef DEBUG
1647                error = xfs_btree_check_block(cur, block, lev, bp);
1648                if (error)
1649                        goto error0;
1650#endif
1651
1652                if (++cur->bc_levels[lev].ptr <= xfs_btree_get_numrecs(block))
1653                        break;
1654
1655                /* Read-ahead the right block for the next loop. */
1656                xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1657        }
1658
1659        /*
1660         * If we went off the root then we are either seriously
1661         * confused or have the tree root in an inode.
1662         */
1663        if (lev == cur->bc_nlevels) {
1664                if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1665                        goto out0;
1666                ASSERT(0);
1667                error = -EFSCORRUPTED;
1668                goto error0;
1669        }
1670        ASSERT(lev < cur->bc_nlevels);
1671
1672        /*
1673         * Now walk back down the tree, fixing up the cursor's buffer
1674         * pointers and key numbers.
1675         */
1676        for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1677                union xfs_btree_ptr     *ptrp;
1678
1679                ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block);
1680                --lev;
1681                error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
1682                if (error)
1683                        goto error0;
1684
1685                xfs_btree_setbuf(cur, lev, bp);
1686                cur->bc_levels[lev].ptr = 1;
1687        }
1688out1:
1689        *stat = 1;
1690        return 0;
1691
1692out0:
1693        *stat = 0;
1694        return 0;
1695
1696error0:
1697        return error;
1698}
1699
1700/*
1701 * Decrement cursor by one record at the level.
1702 * For nonzero levels the leaf-ward information is untouched.
1703 */
1704int                                             /* error */
1705xfs_btree_decrement(
1706        struct xfs_btree_cur    *cur,
1707        int                     level,
1708        int                     *stat)          /* success/failure */
1709{
1710        struct xfs_btree_block  *block;
1711        struct xfs_buf          *bp;
1712        int                     error;          /* error return value */
1713        int                     lev;
1714        union xfs_btree_ptr     ptr;
1715
1716        ASSERT(level < cur->bc_nlevels);
1717
1718        /* Read-ahead to the left at this level. */
1719        xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1720
1721        /* We're done if we remain in the block after the decrement. */
1722        if (--cur->bc_levels[level].ptr > 0)
1723                goto out1;
1724
1725        /* Get a pointer to the btree block. */
1726        block = xfs_btree_get_block(cur, level, &bp);
1727
1728#ifdef DEBUG
1729        error = xfs_btree_check_block(cur, block, level, bp);
1730        if (error)
1731                goto error0;
1732#endif
1733
1734        /* Fail if we just went off the left edge of the tree. */
1735        xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1736        if (xfs_btree_ptr_is_null(cur, &ptr))
1737                goto out0;
1738
1739        XFS_BTREE_STATS_INC(cur, decrement);
1740
1741        /*
1742         * March up the tree decrementing pointers.
1743         * Stop when we don't go off the left edge of a block.
1744         */
1745        for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1746                if (--cur->bc_levels[lev].ptr > 0)
1747                        break;
1748                /* Read-ahead the left block for the next loop. */
1749                xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1750        }
1751
1752        /*
1753         * If we went off the root then we are seriously confused.
1754         * or the root of the tree is in an inode.
1755         */
1756        if (lev == cur->bc_nlevels) {
1757                if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1758                        goto out0;
1759                ASSERT(0);
1760                error = -EFSCORRUPTED;
1761                goto error0;
1762        }
1763        ASSERT(lev < cur->bc_nlevels);
1764
1765        /*
1766         * Now walk back down the tree, fixing up the cursor's buffer
1767         * pointers and key numbers.
1768         */
1769        for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1770                union xfs_btree_ptr     *ptrp;
1771
1772                ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block);
1773                --lev;
1774                error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
1775                if (error)
1776                        goto error0;
1777                xfs_btree_setbuf(cur, lev, bp);
1778                cur->bc_levels[lev].ptr = xfs_btree_get_numrecs(block);
1779        }
1780out1:
1781        *stat = 1;
1782        return 0;
1783
1784out0:
1785        *stat = 0;
1786        return 0;
1787
1788error0:
1789        return error;
1790}
1791
1792int
1793xfs_btree_lookup_get_block(
1794        struct xfs_btree_cur            *cur,   /* btree cursor */
1795        int                             level,  /* level in the btree */
1796        const union xfs_btree_ptr       *pp,    /* ptr to btree block */
1797        struct xfs_btree_block          **blkp) /* return btree block */
1798{
1799        struct xfs_buf          *bp;    /* buffer pointer for btree block */
1800        xfs_daddr_t             daddr;
1801        int                     error = 0;
1802
1803        /* special case the root block if in an inode */
1804        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1805            (level == cur->bc_nlevels - 1)) {
1806                *blkp = xfs_btree_get_iroot(cur);
1807                return 0;
1808        }
1809
1810        /*
1811         * If the old buffer at this level for the disk address we are
1812         * looking for re-use it.
1813         *
1814         * Otherwise throw it away and get a new one.
1815         */
1816        bp = cur->bc_levels[level].bp;
1817        error = xfs_btree_ptr_to_daddr(cur, pp, &daddr);
1818        if (error)
1819                return error;
1820        if (bp && xfs_buf_daddr(bp) == daddr) {
1821                *blkp = XFS_BUF_TO_BLOCK(bp);
1822                return 0;
1823        }
1824
1825        error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);
1826        if (error)
1827                return error;
1828
1829        /* Check the inode owner since the verifiers don't. */
1830        if (xfs_has_crc(cur->bc_mp) &&
1831            !(cur->bc_ino.flags & XFS_BTCUR_BMBT_INVALID_OWNER) &&
1832            (cur->bc_flags & XFS_BTREE_LONG_PTRS) &&
1833            be64_to_cpu((*blkp)->bb_u.l.bb_owner) !=
1834                        cur->bc_ino.ip->i_ino)
1835                goto out_bad;
1836
1837        /* Did we get the level we were looking for? */
1838        if (be16_to_cpu((*blkp)->bb_level) != level)
1839                goto out_bad;
1840
1841        /* Check that internal nodes have at least one record. */
1842        if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0)
1843                goto out_bad;
1844
1845        xfs_btree_setbuf(cur, level, bp);
1846        return 0;
1847
1848out_bad:
1849        *blkp = NULL;
1850        xfs_buf_mark_corrupt(bp);
1851        xfs_trans_brelse(cur->bc_tp, bp);
1852        return -EFSCORRUPTED;
1853}
1854
1855/*
1856 * Get current search key.  For level 0 we don't actually have a key
1857 * structure so we make one up from the record.  For all other levels
1858 * we just return the right key.
1859 */
1860STATIC union xfs_btree_key *
1861xfs_lookup_get_search_key(
1862        struct xfs_btree_cur    *cur,
1863        int                     level,
1864        int                     keyno,
1865        struct xfs_btree_block  *block,
1866        union xfs_btree_key     *kp)
1867{
1868        if (level == 0) {
1869                cur->bc_ops->init_key_from_rec(kp,
1870                                xfs_btree_rec_addr(cur, keyno, block));
1871                return kp;
1872        }
1873
1874        return xfs_btree_key_addr(cur, keyno, block);
1875}
1876
1877/*
1878 * Lookup the record.  The cursor is made to point to it, based on dir.
1879 * stat is set to 0 if can't find any such record, 1 for success.
1880 */
1881int                                     /* error */
1882xfs_btree_lookup(
1883        struct xfs_btree_cur    *cur,   /* btree cursor */
1884        xfs_lookup_t            dir,    /* <=, ==, or >= */
1885        int                     *stat)  /* success/failure */
1886{
1887        struct xfs_btree_block  *block; /* current btree block */
1888        int64_t                 diff;   /* difference for the current key */
1889        int                     error;  /* error return value */
1890        int                     keyno;  /* current key number */
1891        int                     level;  /* level in the btree */
1892        union xfs_btree_ptr     *pp;    /* ptr to btree block */
1893        union xfs_btree_ptr     ptr;    /* ptr to btree block */
1894
1895        XFS_BTREE_STATS_INC(cur, lookup);
1896
1897        /* No such thing as a zero-level tree. */
1898        if (XFS_IS_CORRUPT(cur->bc_mp, cur->bc_nlevels == 0))
1899                return -EFSCORRUPTED;
1900
1901        block = NULL;
1902        keyno = 0;
1903
1904        /* initialise start pointer from cursor */
1905        cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1906        pp = &ptr;
1907
1908        /*
1909         * Iterate over each level in the btree, starting at the root.
1910         * For each level above the leaves, find the key we need, based
1911         * on the lookup record, then follow the corresponding block
1912         * pointer down to the next level.
1913         */
1914        for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1915                /* Get the block we need to do the lookup on. */
1916                error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1917                if (error)
1918                        goto error0;
1919
1920                if (diff == 0) {
1921                        /*
1922                         * If we already had a key match at a higher level, we
1923                         * know we need to use the first entry in this block.
1924                         */
1925                        keyno = 1;
1926                } else {
1927                        /* Otherwise search this block. Do a binary search. */
1928
1929                        int     high;   /* high entry number */
1930                        int     low;    /* low entry number */
1931
1932                        /* Set low and high entry numbers, 1-based. */
1933                        low = 1;
1934                        high = xfs_btree_get_numrecs(block);
1935                        if (!high) {
1936                                /* Block is empty, must be an empty leaf. */
1937                                if (level != 0 || cur->bc_nlevels != 1) {
1938                                        XFS_CORRUPTION_ERROR(__func__,
1939                                                        XFS_ERRLEVEL_LOW,
1940                                                        cur->bc_mp, block,
1941                                                        sizeof(*block));
1942                                        return -EFSCORRUPTED;
1943                                }
1944
1945                                cur->bc_levels[0].ptr = dir != XFS_LOOKUP_LE;
1946                                *stat = 0;
1947                                return 0;
1948                        }
1949
1950                        /* Binary search the block. */
1951                        while (low <= high) {
1952                                union xfs_btree_key     key;
1953                                union xfs_btree_key     *kp;
1954
1955                                XFS_BTREE_STATS_INC(cur, compare);
1956
1957                                /* keyno is average of low and high. */
1958                                keyno = (low + high) >> 1;
1959
1960                                /* Get current search key */
1961                                kp = xfs_lookup_get_search_key(cur, level,
1962                                                keyno, block, &key);
1963
1964                                /*
1965                                 * Compute difference to get next direction:
1966                                 *  - less than, move right
1967                                 *  - greater than, move left
1968                                 *  - equal, we're done
1969                                 */
1970                                diff = cur->bc_ops->key_diff(cur, kp);
1971                                if (diff < 0)
1972                                        low = keyno + 1;
1973                                else if (diff > 0)
1974                                        high = keyno - 1;
1975                                else
1976                                        break;
1977                        }
1978                }
1979
1980                /*
1981                 * If there are more levels, set up for the next level
1982                 * by getting the block number and filling in the cursor.
1983                 */
1984                if (level > 0) {
1985                        /*
1986                         * If we moved left, need the previous key number,
1987                         * unless there isn't one.
1988                         */
1989                        if (diff > 0 && --keyno < 1)
1990                                keyno = 1;
1991                        pp = xfs_btree_ptr_addr(cur, keyno, block);
1992
1993                        error = xfs_btree_debug_check_ptr(cur, pp, 0, level);
1994                        if (error)
1995                                goto error0;
1996
1997                        cur->bc_levels[level].ptr = keyno;
1998                }
1999        }
2000
2001        /* Done with the search. See if we need to adjust the results. */
2002        if (dir != XFS_LOOKUP_LE && diff < 0) {
2003                keyno++;
2004                /*
2005                 * If ge search and we went off the end of the block, but it's
2006                 * not the last block, we're in the wrong block.
2007                 */
2008                xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
2009                if (dir == XFS_LOOKUP_GE &&
2010                    keyno > xfs_btree_get_numrecs(block) &&
2011                    !xfs_btree_ptr_is_null(cur, &ptr)) {
2012                        int     i;
2013
2014                        cur->bc_levels[0].ptr = keyno;
2015                        error = xfs_btree_increment(cur, 0, &i);
2016                        if (error)
2017                                goto error0;
2018                        if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
2019                                return -EFSCORRUPTED;
2020                        *stat = 1;
2021                        return 0;
2022                }
2023        } else if (dir == XFS_LOOKUP_LE && diff > 0)
2024                keyno--;
2025        cur->bc_levels[0].ptr = keyno;
2026
2027        /* Return if we succeeded or not. */
2028        if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
2029                *stat = 0;
2030        else if (dir != XFS_LOOKUP_EQ || diff == 0)
2031                *stat = 1;
2032        else
2033                *stat = 0;
2034        return 0;
2035
2036error0:
2037        return error;
2038}
2039
2040/* Find the high key storage area from a regular key. */
2041union xfs_btree_key *
2042xfs_btree_high_key_from_key(
2043        struct xfs_btree_cur    *cur,
2044        union xfs_btree_key     *key)
2045{
2046        ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
2047        return (union xfs_btree_key *)((char *)key +
2048                        (cur->bc_ops->key_len / 2));
2049}
2050
2051/* Determine the low (and high if overlapped) keys of a leaf block */
2052STATIC void
2053xfs_btree_get_leaf_keys(
2054        struct xfs_btree_cur    *cur,
2055        struct xfs_btree_block  *block,
2056        union xfs_btree_key     *key)
2057{
2058        union xfs_btree_key     max_hkey;
2059        union xfs_btree_key     hkey;
2060        union xfs_btree_rec     *rec;
2061        union xfs_btree_key     *high;
2062        int                     n;
2063
2064        rec = xfs_btree_rec_addr(cur, 1, block);
2065        cur->bc_ops->init_key_from_rec(key, rec);
2066
2067        if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2068
2069                cur->bc_ops->init_high_key_from_rec(&max_hkey, rec);
2070                for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
2071                        rec = xfs_btree_rec_addr(cur, n, block);
2072                        cur->bc_ops->init_high_key_from_rec(&hkey, rec);
2073                        if (cur->bc_ops->diff_two_keys(cur, &hkey, &max_hkey)
2074                                        > 0)
2075                                max_hkey = hkey;
2076                }
2077
2078                high = xfs_btree_high_key_from_key(cur, key);
2079                memcpy(high, &max_hkey, cur->bc_ops->key_len / 2);
2080        }
2081}
2082
2083/* Determine the low (and high if overlapped) keys of a node block */
2084STATIC void
2085xfs_btree_get_node_keys(
2086        struct xfs_btree_cur    *cur,
2087        struct xfs_btree_block  *block,
2088        union xfs_btree_key     *key)
2089{
2090        union xfs_btree_key     *hkey;
2091        union xfs_btree_key     *max_hkey;
2092        union xfs_btree_key     *high;
2093        int                     n;
2094
2095        if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2096                memcpy(key, xfs_btree_key_addr(cur, 1, block),
2097                                cur->bc_ops->key_len / 2);
2098
2099                max_hkey = xfs_btree_high_key_addr(cur, 1, block);
2100                for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
2101                        hkey = xfs_btree_high_key_addr(cur, n, block);
2102                        if (cur->bc_ops->diff_two_keys(cur, hkey, max_hkey) > 0)
2103                                max_hkey = hkey;
2104                }
2105
2106                high = xfs_btree_high_key_from_key(cur, key);
2107                memcpy(high, max_hkey, cur->bc_ops->key_len / 2);
2108        } else {
2109                memcpy(key, xfs_btree_key_addr(cur, 1, block),
2110                                cur->bc_ops->key_len);
2111        }
2112}
2113
2114/* Derive the keys for any btree block. */
2115void
2116xfs_btree_get_keys(
2117        struct xfs_btree_cur    *cur,
2118        struct xfs_btree_block  *block,
2119        union xfs_btree_key     *key)
2120{
2121        if (be16_to_cpu(block->bb_level) == 0)
2122                xfs_btree_get_leaf_keys(cur, block, key);
2123        else
2124                xfs_btree_get_node_keys(cur, block, key);
2125}
2126
2127/*
2128 * Decide if we need to update the parent keys of a btree block.  For
2129 * a standard btree this is only necessary if we're updating the first
2130 * record/key.  For an overlapping btree, we must always update the
2131 * keys because the highest key can be in any of the records or keys
2132 * in the block.
2133 */
2134static inline bool
2135xfs_btree_needs_key_update(
2136        struct xfs_btree_cur    *cur,
2137        int                     ptr)
2138{
2139        return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1;
2140}
2141
2142/*
2143 * Update the low and high parent keys of the given level, progressing
2144 * towards the root.  If force_all is false, stop if the keys for a given
2145 * level do not need updating.
2146 */
2147STATIC int
2148__xfs_btree_updkeys(
2149        struct xfs_btree_cur    *cur,
2150        int                     level,
2151        struct xfs_btree_block  *block,
2152        struct xfs_buf          *bp0,
2153        bool                    force_all)
2154{
2155        union xfs_btree_key     key;    /* keys from current level */
2156        union xfs_btree_key     *lkey;  /* keys from the next level up */
2157        union xfs_btree_key     *hkey;
2158        union xfs_btree_key     *nlkey; /* keys from the next level up */
2159        union xfs_btree_key     *nhkey;
2160        struct xfs_buf          *bp;
2161        int                     ptr;
2162
2163        ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
2164
2165        /* Exit if there aren't any parent levels to update. */
2166        if (level + 1 >= cur->bc_nlevels)
2167                return 0;
2168
2169        trace_xfs_btree_updkeys(cur, level, bp0);
2170
2171        lkey = &key;
2172        hkey = xfs_btree_high_key_from_key(cur, lkey);
2173        xfs_btree_get_keys(cur, block, lkey);
2174        for (level++; level < cur->bc_nlevels; level++) {
2175#ifdef DEBUG
2176                int             error;
2177#endif
2178                block = xfs_btree_get_block(cur, level, &bp);
2179                trace_xfs_btree_updkeys(cur, level, bp);
2180#ifdef DEBUG
2181                error = xfs_btree_check_block(cur, block, level, bp);
2182                if (error)
2183                        return error;
2184#endif
2185                ptr = cur->bc_levels[level].ptr;
2186                nlkey = xfs_btree_key_addr(cur, ptr, block);
2187                nhkey = xfs_btree_high_key_addr(cur, ptr, block);
2188                if (!force_all &&
2189                    !(cur->bc_ops->diff_two_keys(cur, nlkey, lkey) != 0 ||
2190                      cur->bc_ops->diff_two_keys(cur, nhkey, hkey) != 0))
2191                        break;
2192                xfs_btree_copy_keys(cur, nlkey, lkey, 1);
2193                xfs_btree_log_keys(cur, bp, ptr, ptr);
2194                if (level + 1 >= cur->bc_nlevels)
2195                        break;
2196                xfs_btree_get_node_keys(cur, block, lkey);
2197        }
2198
2199        return 0;
2200}
2201
2202/* Update all the keys from some level in cursor back to the root. */
2203STATIC int
2204xfs_btree_updkeys_force(
2205        struct xfs_btree_cur    *cur,
2206        int                     level)
2207{
2208        struct xfs_buf          *bp;
2209        struct xfs_btree_block  *block;
2210
2211        block = xfs_btree_get_block(cur, level, &bp);
2212        return __xfs_btree_updkeys(cur, level, block, bp, true);
2213}
2214
2215/*
2216 * Update the parent keys of the given level, progressing towards the root.
2217 */
2218STATIC int
2219xfs_btree_update_keys(
2220        struct xfs_btree_cur    *cur,
2221        int                     level)
2222{
2223        struct xfs_btree_block  *block;
2224        struct xfs_buf          *bp;
2225        union xfs_btree_key     *kp;
2226        union xfs_btree_key     key;
2227        int                     ptr;
2228
2229        ASSERT(level >= 0);
2230
2231        block = xfs_btree_get_block(cur, level, &bp);
2232        if (cur->bc_flags & XFS_BTREE_OVERLAPPING)
2233                return __xfs_btree_updkeys(cur, level, block, bp, false);
2234
2235        /*
2236         * Go up the tree from this level toward the root.
2237         * At each level, update the key value to the value input.
2238         * Stop when we reach a level where the cursor isn't pointing
2239         * at the first entry in the block.
2240         */
2241        xfs_btree_get_keys(cur, block, &key);
2242        for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
2243#ifdef DEBUG
2244                int             error;
2245#endif
2246                block = xfs_btree_get_block(cur, level, &bp);
2247#ifdef DEBUG
2248                error = xfs_btree_check_block(cur, block, level, bp);
2249                if (error)
2250                        return error;
2251#endif
2252                ptr = cur->bc_levels[level].ptr;
2253                kp = xfs_btree_key_addr(cur, ptr, block);
2254                xfs_btree_copy_keys(cur, kp, &key, 1);
2255                xfs_btree_log_keys(cur, bp, ptr, ptr);
2256        }
2257
2258        return 0;
2259}
2260
2261/*
2262 * Update the record referred to by cur to the value in the
2263 * given record. This either works (return 0) or gets an
2264 * EFSCORRUPTED error.
2265 */
2266int
2267xfs_btree_update(
2268        struct xfs_btree_cur    *cur,
2269        union xfs_btree_rec     *rec)
2270{
2271        struct xfs_btree_block  *block;
2272        struct xfs_buf          *bp;
2273        int                     error;
2274        int                     ptr;
2275        union xfs_btree_rec     *rp;
2276
2277        /* Pick up the current block. */
2278        block = xfs_btree_get_block(cur, 0, &bp);
2279
2280#ifdef DEBUG
2281        error = xfs_btree_check_block(cur, block, 0, bp);
2282        if (error)
2283                goto error0;
2284#endif
2285        /* Get the address of the rec to be updated. */
2286        ptr = cur->bc_levels[0].ptr;
2287        rp = xfs_btree_rec_addr(cur, ptr, block);
2288
2289        /* Fill in the new contents and log them. */
2290        xfs_btree_copy_recs(cur, rp, rec, 1);
2291        xfs_btree_log_recs(cur, bp, ptr, ptr);
2292
2293        /*
2294         * If we are tracking the last record in the tree and
2295         * we are at the far right edge of the tree, update it.
2296         */
2297        if (xfs_btree_is_lastrec(cur, block, 0)) {
2298                cur->bc_ops->update_lastrec(cur, block, rec,
2299                                            ptr, LASTREC_UPDATE);
2300        }
2301
2302        /* Pass new key value up to our parent. */
2303        if (xfs_btree_needs_key_update(cur, ptr)) {
2304                error = xfs_btree_update_keys(cur, 0);
2305                if (error)
2306                        goto error0;
2307        }
2308
2309        return 0;
2310
2311error0:
2312        return error;
2313}
2314
2315/*
2316 * Move 1 record left from cur/level if possible.
2317 * Update cur to reflect the new path.
2318 */
2319STATIC int                                      /* error */
2320xfs_btree_lshift(
2321        struct xfs_btree_cur    *cur,
2322        int                     level,
2323        int                     *stat)          /* success/failure */
2324{
2325        struct xfs_buf          *lbp;           /* left buffer pointer */
2326        struct xfs_btree_block  *left;          /* left btree block */
2327        int                     lrecs;          /* left record count */
2328        struct xfs_buf          *rbp;           /* right buffer pointer */
2329        struct xfs_btree_block  *right;         /* right btree block */
2330        struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
2331        int                     rrecs;          /* right record count */
2332        union xfs_btree_ptr     lptr;           /* left btree pointer */
2333        union xfs_btree_key     *rkp = NULL;    /* right btree key */
2334        union xfs_btree_ptr     *rpp = NULL;    /* right address pointer */
2335        union xfs_btree_rec     *rrp = NULL;    /* right record pointer */
2336        int                     error;          /* error return value */
2337        int                     i;
2338
2339        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2340            level == cur->bc_nlevels - 1)
2341                goto out0;
2342
2343        /* Set up variables for this block as "right". */
2344        right = xfs_btree_get_block(cur, level, &rbp);
2345
2346#ifdef DEBUG
2347        error = xfs_btree_check_block(cur, right, level, rbp);
2348        if (error)
2349                goto error0;
2350#endif
2351
2352        /* If we've got no left sibling then we can't shift an entry left. */
2353        xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2354        if (xfs_btree_ptr_is_null(cur, &lptr))
2355                goto out0;
2356
2357        /*
2358         * If the cursor entry is the one that would be moved, don't
2359         * do it... it's too complicated.
2360         */
2361        if (cur->bc_levels[level].ptr <= 1)
2362                goto out0;
2363
2364        /* Set up the left neighbor as "left". */
2365        error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
2366        if (error)
2367                goto error0;
2368
2369        /* If it's full, it can't take another entry. */
2370        lrecs = xfs_btree_get_numrecs(left);
2371        if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
2372                goto out0;
2373
2374        rrecs = xfs_btree_get_numrecs(right);
2375
2376        /*
2377         * We add one entry to the left side and remove one for the right side.
2378         * Account for it here, the changes will be updated on disk and logged
2379         * later.
2380         */
2381        lrecs++;
2382        rrecs--;
2383
2384        XFS_BTREE_STATS_INC(cur, lshift);
2385        XFS_BTREE_STATS_ADD(cur, moves, 1);
2386
2387        /*
2388         * If non-leaf, copy a key and a ptr to the left block.
2389         * Log the changes to the left block.
2390         */
2391        if (level > 0) {
2392                /* It's a non-leaf.  Move keys and pointers. */
2393                union xfs_btree_key     *lkp;   /* left btree key */
2394                union xfs_btree_ptr     *lpp;   /* left address pointer */
2395
2396                lkp = xfs_btree_key_addr(cur, lrecs, left);
2397                rkp = xfs_btree_key_addr(cur, 1, right);
2398
2399                lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2400                rpp = xfs_btree_ptr_addr(cur, 1, right);
2401
2402                error = xfs_btree_debug_check_ptr(cur, rpp, 0, level);
2403                if (error)
2404                        goto error0;
2405
2406                xfs_btree_copy_keys(cur, lkp, rkp, 1);
2407                xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
2408
2409                xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
2410                xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
2411
2412                ASSERT(cur->bc_ops->keys_inorder(cur,
2413                        xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
2414        } else {
2415                /* It's a leaf.  Move records.  */
2416                union xfs_btree_rec     *lrp;   /* left record pointer */
2417
2418                lrp = xfs_btree_rec_addr(cur, lrecs, left);
2419                rrp = xfs_btree_rec_addr(cur, 1, right);
2420
2421                xfs_btree_copy_recs(cur, lrp, rrp, 1);
2422                xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
2423
2424                ASSERT(cur->bc_ops->recs_inorder(cur,
2425                        xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
2426        }
2427
2428        xfs_btree_set_numrecs(left, lrecs);
2429        xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2430
2431        xfs_btree_set_numrecs(right, rrecs);
2432        xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2433
2434        /*
2435         * Slide the contents of right down one entry.
2436         */
2437        XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
2438        if (level > 0) {
2439                /* It's a nonleaf. operate on keys and ptrs */
2440                for (i = 0; i < rrecs; i++) {
2441                        error = xfs_btree_debug_check_ptr(cur, rpp, i + 1, level);
2442                        if (error)
2443                                goto error0;
2444                }
2445
2446                xfs_btree_shift_keys(cur,
2447                                xfs_btree_key_addr(cur, 2, right),
2448                                -1, rrecs);
2449                xfs_btree_shift_ptrs(cur,
2450                                xfs_btree_ptr_addr(cur, 2, right),
2451                                -1, rrecs);
2452
2453                xfs_btree_log_keys(cur, rbp, 1, rrecs);
2454                xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2455        } else {
2456                /* It's a leaf. operate on records */
2457                xfs_btree_shift_recs(cur,
2458                        xfs_btree_rec_addr(cur, 2, right),
2459                        -1, rrecs);
2460                xfs_btree_log_recs(cur, rbp, 1, rrecs);
2461        }
2462
2463        /*
2464         * Using a temporary cursor, update the parent key values of the
2465         * block on the left.
2466         */
2467        if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2468                error = xfs_btree_dup_cursor(cur, &tcur);
2469                if (error)
2470                        goto error0;
2471                i = xfs_btree_firstrec(tcur, level);
2472                if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
2473                        error = -EFSCORRUPTED;
2474                        goto error0;
2475                }
2476
2477                error = xfs_btree_decrement(tcur, level, &i);
2478                if (error)
2479                        goto error1;
2480
2481                /* Update the parent high keys of the left block, if needed. */
2482                error = xfs_btree_update_keys(tcur, level);
2483                if (error)
2484                        goto error1;
2485
2486                xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2487        }
2488
2489        /* Update the parent keys of the right block. */
2490        error = xfs_btree_update_keys(cur, level);
2491        if (error)
2492                goto error0;
2493
2494        /* Slide the cursor value left one. */
2495        cur->bc_levels[level].ptr--;
2496
2497        *stat = 1;
2498        return 0;
2499
2500out0:
2501        *stat = 0;
2502        return 0;
2503
2504error0:
2505        return error;
2506
2507error1:
2508        xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2509        return error;
2510}
2511
2512/*
2513 * Move 1 record right from cur/level if possible.
2514 * Update cur to reflect the new path.
2515 */
2516STATIC int                                      /* error */
2517xfs_btree_rshift(
2518        struct xfs_btree_cur    *cur,
2519        int                     level,
2520        int                     *stat)          /* success/failure */
2521{
2522        struct xfs_buf          *lbp;           /* left buffer pointer */
2523        struct xfs_btree_block  *left;          /* left btree block */
2524        struct xfs_buf          *rbp;           /* right buffer pointer */
2525        struct xfs_btree_block  *right;         /* right btree block */
2526        struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
2527        union xfs_btree_ptr     rptr;           /* right block pointer */
2528        union xfs_btree_key     *rkp;           /* right btree key */
2529        int                     rrecs;          /* right record count */
2530        int                     lrecs;          /* left record count */
2531        int                     error;          /* error return value */
2532        int                     i;              /* loop counter */
2533
2534        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2535            (level == cur->bc_nlevels - 1))
2536                goto out0;
2537
2538        /* Set up variables for this block as "left". */
2539        left = xfs_btree_get_block(cur, level, &lbp);
2540
2541#ifdef DEBUG
2542        error = xfs_btree_check_block(cur, left, level, lbp);
2543        if (error)
2544                goto error0;
2545#endif
2546
2547        /* If we've got no right sibling then we can't shift an entry right. */
2548        xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2549        if (xfs_btree_ptr_is_null(cur, &rptr))
2550                goto out0;
2551
2552        /*
2553         * If the cursor entry is the one that would be moved, don't
2554         * do it... it's too complicated.
2555         */
2556        lrecs = xfs_btree_get_numrecs(left);
2557        if (cur->bc_levels[level].ptr >= lrecs)
2558                goto out0;
2559
2560        /* Set up the right neighbor as "right". */
2561        error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
2562        if (error)
2563                goto error0;
2564
2565        /* If it's full, it can't take another entry. */
2566        rrecs = xfs_btree_get_numrecs(right);
2567        if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2568                goto out0;
2569
2570        XFS_BTREE_STATS_INC(cur, rshift);
2571        XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2572
2573        /*
2574         * Make a hole at the start of the right neighbor block, then
2575         * copy the last left block entry to the hole.
2576         */
2577        if (level > 0) {
2578                /* It's a nonleaf. make a hole in the keys and ptrs */
2579                union xfs_btree_key     *lkp;
2580                union xfs_btree_ptr     *lpp;
2581                union xfs_btree_ptr     *rpp;
2582
2583                lkp = xfs_btree_key_addr(cur, lrecs, left);
2584                lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2585                rkp = xfs_btree_key_addr(cur, 1, right);
2586                rpp = xfs_btree_ptr_addr(cur, 1, right);
2587
2588                for (i = rrecs - 1; i >= 0; i--) {
2589                        error = xfs_btree_debug_check_ptr(cur, rpp, i, level);
2590                        if (error)
2591                                goto error0;
2592                }
2593
2594                xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2595                xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2596
2597                error = xfs_btree_debug_check_ptr(cur, lpp, 0, level);
2598                if (error)
2599                        goto error0;
2600
2601                /* Now put the new data in, and log it. */
2602                xfs_btree_copy_keys(cur, rkp, lkp, 1);
2603                xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2604
2605                xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2606                xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2607
2608                ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2609                        xfs_btree_key_addr(cur, 2, right)));
2610        } else {
2611                /* It's a leaf. make a hole in the records */
2612                union xfs_btree_rec     *lrp;
2613                union xfs_btree_rec     *rrp;
2614
2615                lrp = xfs_btree_rec_addr(cur, lrecs, left);
2616                rrp = xfs_btree_rec_addr(cur, 1, right);
2617
2618                xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2619
2620                /* Now put the new data in, and log it. */
2621                xfs_btree_copy_recs(cur, rrp, lrp, 1);
2622                xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2623        }
2624
2625        /*
2626         * Decrement and log left's numrecs, bump and log right's numrecs.
2627         */
2628        xfs_btree_set_numrecs(left, --lrecs);
2629        xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2630
2631        xfs_btree_set_numrecs(right, ++rrecs);
2632        xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2633
2634        /*
2635         * Using a temporary cursor, update the parent key values of the
2636         * block on the right.
2637         */
2638        error = xfs_btree_dup_cursor(cur, &tcur);
2639        if (error)
2640                goto error0;
2641        i = xfs_btree_lastrec(tcur, level);
2642        if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
2643                error = -EFSCORRUPTED;
2644                goto error0;
2645        }
2646
2647        error = xfs_btree_increment(tcur, level, &i);
2648        if (error)
2649                goto error1;
2650
2651        /* Update the parent high keys of the left block, if needed. */
2652        if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2653                error = xfs_btree_update_keys(cur, level);
2654                if (error)
2655                        goto error1;
2656        }
2657
2658        /* Update the parent keys of the right block. */
2659        error = xfs_btree_update_keys(tcur, level);
2660        if (error)
2661                goto error1;
2662
2663        xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2664
2665        *stat = 1;
2666        return 0;
2667
2668out0:
2669        *stat = 0;
2670        return 0;
2671
2672error0:
2673        return error;
2674
2675error1:
2676        xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2677        return error;
2678}
2679
2680/*
2681 * Split cur/level block in half.
2682 * Return new block number and the key to its first
2683 * record (to be inserted into parent).
2684 */
2685STATIC int                                      /* error */
2686__xfs_btree_split(
2687        struct xfs_btree_cur    *cur,
2688        int                     level,
2689        union xfs_btree_ptr     *ptrp,
2690        union xfs_btree_key     *key,
2691        struct xfs_btree_cur    **curp,
2692        int                     *stat)          /* success/failure */
2693{
2694        union xfs_btree_ptr     lptr;           /* left sibling block ptr */
2695        struct xfs_buf          *lbp;           /* left buffer pointer */
2696        struct xfs_btree_block  *left;          /* left btree block */
2697        union xfs_btree_ptr     rptr;           /* right sibling block ptr */
2698        struct xfs_buf          *rbp;           /* right buffer pointer */
2699        struct xfs_btree_block  *right;         /* right btree block */
2700        union xfs_btree_ptr     rrptr;          /* right-right sibling ptr */
2701        struct xfs_buf          *rrbp;          /* right-right buffer pointer */
2702        struct xfs_btree_block  *rrblock;       /* right-right btree block */
2703        int                     lrecs;
2704        int                     rrecs;
2705        int                     src_index;
2706        int                     error;          /* error return value */
2707        int                     i;
2708
2709        XFS_BTREE_STATS_INC(cur, split);
2710
2711        /* Set up left block (current one). */
2712        left = xfs_btree_get_block(cur, level, &lbp);
2713
2714#ifdef DEBUG
2715        error = xfs_btree_check_block(cur, left, level, lbp);
2716        if (error)
2717                goto error0;
2718#endif
2719
2720        xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2721
2722        /* Allocate the new block. If we can't do it, we're toast. Give up. */
2723        error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat);
2724        if (error)
2725                goto error0;
2726        if (*stat == 0)
2727                goto out0;
2728        XFS_BTREE_STATS_INC(cur, alloc);
2729
2730        /* Set up the new block as "right". */
2731        error = xfs_btree_get_buf_block(cur, &rptr, &right, &rbp);
2732        if (error)
2733                goto error0;
2734
2735        /* Fill in the btree header for the new right block. */
2736        xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
2737
2738        /*
2739         * Split the entries between the old and the new block evenly.
2740         * Make sure that if there's an odd number of entries now, that
2741         * each new block will have the same number of entries.
2742         */
2743        lrecs = xfs_btree_get_numrecs(left);
2744        rrecs = lrecs / 2;
2745        if ((lrecs & 1) && cur->bc_levels[level].ptr <= rrecs + 1)
2746                rrecs++;
2747        src_index = (lrecs - rrecs + 1);
2748
2749        XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2750
2751        /* Adjust numrecs for the later get_*_keys() calls. */
2752        lrecs -= rrecs;
2753        xfs_btree_set_numrecs(left, lrecs);
2754        xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2755
2756        /*
2757         * Copy btree block entries from the left block over to the
2758         * new block, the right. Update the right block and log the
2759         * changes.
2760         */
2761        if (level > 0) {
2762                /* It's a non-leaf.  Move keys and pointers. */
2763                union xfs_btree_key     *lkp;   /* left btree key */
2764                union xfs_btree_ptr     *lpp;   /* left address pointer */
2765                union xfs_btree_key     *rkp;   /* right btree key */
2766                union xfs_btree_ptr     *rpp;   /* right address pointer */
2767
2768                lkp = xfs_btree_key_addr(cur, src_index, left);
2769                lpp = xfs_btree_ptr_addr(cur, src_index, left);
2770                rkp = xfs_btree_key_addr(cur, 1, right);
2771                rpp = xfs_btree_ptr_addr(cur, 1, right);
2772
2773                for (i = src_index; i < rrecs; i++) {
2774                        error = xfs_btree_debug_check_ptr(cur, lpp, i, level);
2775                        if (error)
2776                                goto error0;
2777                }
2778
2779                /* Copy the keys & pointers to the new block. */
2780                xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2781                xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2782
2783                xfs_btree_log_keys(cur, rbp, 1, rrecs);
2784                xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2785
2786                /* Stash the keys of the new block for later insertion. */
2787                xfs_btree_get_node_keys(cur, right, key);
2788        } else {
2789                /* It's a leaf.  Move records.  */
2790                union xfs_btree_rec     *lrp;   /* left record pointer */
2791                union xfs_btree_rec     *rrp;   /* right record pointer */
2792
2793                lrp = xfs_btree_rec_addr(cur, src_index, left);
2794                rrp = xfs_btree_rec_addr(cur, 1, right);
2795
2796                /* Copy records to the new block. */
2797                xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2798                xfs_btree_log_recs(cur, rbp, 1, rrecs);
2799
2800                /* Stash the keys of the new block for later insertion. */
2801                xfs_btree_get_leaf_keys(cur, right, key);
2802        }
2803
2804        /*
2805         * Find the left block number by looking in the buffer.
2806         * Adjust sibling pointers.
2807         */
2808        xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2809        xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2810        xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2811        xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2812
2813        xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2814        xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2815
2816        /*
2817         * If there's a block to the new block's right, make that block
2818         * point back to right instead of to left.
2819         */
2820        if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
2821                error = xfs_btree_read_buf_block(cur, &rrptr,
2822                                                        0, &rrblock, &rrbp);
2823                if (error)
2824                        goto error0;
2825                xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2826                xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2827        }
2828
2829        /* Update the parent high keys of the left block, if needed. */
2830        if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2831                error = xfs_btree_update_keys(cur, level);
2832                if (error)
2833                        goto error0;
2834        }
2835
2836        /*
2837         * If the cursor is really in the right block, move it there.
2838         * If it's just pointing past the last entry in left, then we'll
2839         * insert there, so don't change anything in that case.
2840         */
2841        if (cur->bc_levels[level].ptr > lrecs + 1) {
2842                xfs_btree_setbuf(cur, level, rbp);
2843                cur->bc_levels[level].ptr -= lrecs;
2844        }
2845        /*
2846         * If there are more levels, we'll need another cursor which refers
2847         * the right block, no matter where this cursor was.
2848         */
2849        if (level + 1 < cur->bc_nlevels) {
2850                error = xfs_btree_dup_cursor(cur, curp);
2851                if (error)
2852                        goto error0;
2853                (*curp)->bc_levels[level + 1].ptr++;
2854        }
2855        *ptrp = rptr;
2856        *stat = 1;
2857        return 0;
2858out0:
2859        *stat = 0;
2860        return 0;
2861
2862error0:
2863        return error;
2864}
2865
2866#ifdef __KERNEL__
2867struct xfs_btree_split_args {
2868        struct xfs_btree_cur    *cur;
2869        int                     level;
2870        union xfs_btree_ptr     *ptrp;
2871        union xfs_btree_key     *key;
2872        struct xfs_btree_cur    **curp;
2873        int                     *stat;          /* success/failure */
2874        int                     result;
2875        bool                    kswapd; /* allocation in kswapd context */
2876        struct completion       *done;
2877        struct work_struct      work;
2878};
2879
2880/*
2881 * Stack switching interfaces for allocation
2882 */
2883static void
2884xfs_btree_split_worker(
2885        struct work_struct      *work)
2886{
2887        struct xfs_btree_split_args     *args = container_of(work,
2888                                                struct xfs_btree_split_args, work);
2889        unsigned long           pflags;
2890        unsigned long           new_pflags = 0;
2891
2892        /*
2893         * we are in a transaction context here, but may also be doing work
2894         * in kswapd context, and hence we may need to inherit that state
2895         * temporarily to ensure that we don't block waiting for memory reclaim
2896         * in any way.
2897         */
2898        if (args->kswapd)
2899                new_pflags |= PF_MEMALLOC | PF_KSWAPD;
2900
2901        current_set_flags_nested(&pflags, new_pflags);
2902        xfs_trans_set_context(args->cur->bc_tp);
2903
2904        args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
2905                                         args->key, args->curp, args->stat);
2906
2907        xfs_trans_clear_context(args->cur->bc_tp);
2908        current_restore_flags_nested(&pflags, new_pflags);
2909
2910        /*
2911         * Do not access args after complete() has run here. We don't own args
2912         * and the owner may run and free args before we return here.
2913         */
2914        complete(args->done);
2915
2916}
2917
2918/*
2919 * BMBT split requests often come in with little stack to work on. Push
2920 * them off to a worker thread so there is lots of stack to use. For the other
2921 * btree types, just call directly to avoid the context switch overhead here.
2922 */
2923STATIC int                                      /* error */
2924xfs_btree_split(
2925        struct xfs_btree_cur    *cur,
2926        int                     level,
2927        union xfs_btree_ptr     *ptrp,
2928        union xfs_btree_key     *key,
2929        struct xfs_btree_cur    **curp,
2930        int                     *stat)          /* success/failure */
2931{
2932        struct xfs_btree_split_args     args;
2933        DECLARE_COMPLETION_ONSTACK(done);
2934
2935        if (cur->bc_btnum != XFS_BTNUM_BMAP)
2936                return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
2937
2938        args.cur = cur;
2939        args.level = level;
2940        args.ptrp = ptrp;
2941        args.key = key;
2942        args.curp = curp;
2943        args.stat = stat;
2944        args.done = &done;
2945        args.kswapd = current_is_kswapd();
2946        INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
2947        queue_work(xfs_alloc_wq, &args.work);
2948        wait_for_completion(&done);
2949        destroy_work_on_stack(&args.work);
2950        return args.result;
2951}
2952#else
2953#define xfs_btree_split __xfs_btree_split
2954#endif /* __KERNEL__ */
2955
2956
2957/*
2958 * Copy the old inode root contents into a real block and make the
2959 * broot point to it.
2960 */
2961int                                             /* error */
2962xfs_btree_new_iroot(
2963        struct xfs_btree_cur    *cur,           /* btree cursor */
2964        int                     *logflags,      /* logging flags for inode */
2965        int                     *stat)          /* return status - 0 fail */
2966{
2967        struct xfs_buf          *cbp;           /* buffer for cblock */
2968        struct xfs_btree_block  *block;         /* btree block */
2969        struct xfs_btree_block  *cblock;        /* child btree block */
2970        union xfs_btree_key     *ckp;           /* child key pointer */
2971        union xfs_btree_ptr     *cpp;           /* child ptr pointer */
2972        union xfs_btree_key     *kp;            /* pointer to btree key */
2973        union xfs_btree_ptr     *pp;            /* pointer to block addr */
2974        union xfs_btree_ptr     nptr;           /* new block addr */
2975        int                     level;          /* btree level */
2976        int                     error;          /* error return code */
2977        int                     i;              /* loop counter */
2978
2979        XFS_BTREE_STATS_INC(cur, newroot);
2980
2981        ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2982
2983        level = cur->bc_nlevels - 1;
2984
2985        block = xfs_btree_get_iroot(cur);
2986        pp = xfs_btree_ptr_addr(cur, 1, block);
2987
2988        /* Allocate the new block. If we can't do it, we're toast. Give up. */
2989        error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);
2990        if (error)
2991                goto error0;
2992        if (*stat == 0)
2993                return 0;
2994
2995        XFS_BTREE_STATS_INC(cur, alloc);
2996
2997        /* Copy the root into a real block. */
2998        error = xfs_btree_get_buf_block(cur, &nptr, &cblock, &cbp);
2999        if (error)
3000                goto error0;
3001
3002        /*
3003         * we can't just memcpy() the root in for CRC enabled btree blocks.
3004         * In that case have to also ensure the blkno remains correct
3005         */
3006        memcpy(cblock, block, xfs_btree_block_len(cur));
3007        if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
3008                __be64 bno = cpu_to_be64(xfs_buf_daddr(cbp));
3009                if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
3010                        cblock->bb_u.l.bb_blkno = bno;
3011                else
3012                        cblock->bb_u.s.bb_blkno = bno;
3013        }
3014
3015        be16_add_cpu(&block->bb_level, 1);
3016        xfs_btree_set_numrecs(block, 1);
3017        cur->bc_nlevels++;
3018        ASSERT(cur->bc_nlevels <= cur->bc_maxlevels);
3019        cur->bc_levels[level + 1].ptr = 1;
3020
3021        kp = xfs_btree_key_addr(cur, 1, block);
3022        ckp = xfs_btree_key_addr(cur, 1, cblock);
3023        xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
3024
3025        cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3026        for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
3027                error = xfs_btree_debug_check_ptr(cur, pp, i, level);
3028                if (error)
3029                        goto error0;
3030        }
3031
3032        xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
3033
3034        error = xfs_btree_debug_check_ptr(cur, &nptr, 0, level);
3035        if (error)
3036                goto error0;
3037
3038        xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
3039
3040        xfs_iroot_realloc(cur->bc_ino.ip,
3041                          1 - xfs_btree_get_numrecs(cblock),
3042                          cur->bc_ino.whichfork);
3043
3044        xfs_btree_setbuf(cur, level, cbp);
3045
3046        /*
3047         * Do all this logging at the end so that
3048         * the root is at the right level.
3049         */
3050        xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
3051        xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
3052        xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
3053
3054        *logflags |=
3055                XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork);
3056        *stat = 1;
3057        return 0;
3058error0:
3059        return error;
3060}
3061
3062/*
3063 * Allocate a new root block, fill it in.
3064 */
3065STATIC int                              /* error */
3066xfs_btree_new_root(
3067        struct xfs_btree_cur    *cur,   /* btree cursor */
3068        int                     *stat)  /* success/failure */
3069{
3070        struct xfs_btree_block  *block; /* one half of the old root block */
3071        struct xfs_buf          *bp;    /* buffer containing block */
3072        int                     error;  /* error return value */
3073        struct xfs_buf          *lbp;   /* left buffer pointer */
3074        struct xfs_btree_block  *left;  /* left btree block */
3075        struct xfs_buf          *nbp;   /* new (root) buffer */
3076        struct xfs_btree_block  *new;   /* new (root) btree block */
3077        int                     nptr;   /* new value for key index, 1 or 2 */
3078        struct xfs_buf          *rbp;   /* right buffer pointer */
3079        struct xfs_btree_block  *right; /* right btree block */
3080        union xfs_btree_ptr     rptr;
3081        union xfs_btree_ptr     lptr;
3082
3083        XFS_BTREE_STATS_INC(cur, newroot);
3084
3085        /* initialise our start point from the cursor */
3086        cur->bc_ops->init_ptr_from_cur(cur, &rptr);
3087
3088        /* Allocate the new block. If we can't do it, we're toast. Give up. */
3089        error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat);
3090        if (error)
3091                goto error0;
3092        if (*stat == 0)
3093                goto out0;
3094        XFS_BTREE_STATS_INC(cur, alloc);
3095
3096        /* Set up the new block. */
3097        error = xfs_btree_get_buf_block(cur, &lptr, &new, &nbp);
3098        if (error)
3099                goto error0;
3100
3101        /* Set the root in the holding structure  increasing the level by 1. */
3102        cur->bc_ops->set_root(cur, &lptr, 1);
3103
3104        /*
3105         * At the previous root level there are now two blocks: the old root,
3106         * and the new block generated when it was split.  We don't know which
3107         * one the cursor is pointing at, so we set up variables "left" and
3108         * "right" for each case.
3109         */
3110        block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
3111
3112#ifdef DEBUG
3113        error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
3114        if (error)
3115                goto error0;
3116#endif
3117
3118        xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3119        if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3120                /* Our block is left, pick up the right block. */
3121                lbp = bp;
3122                xfs_btree_buf_to_ptr(cur, lbp, &lptr);
3123                left = block;
3124                error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
3125                if (error)
3126                        goto error0;
3127                bp = rbp;
3128                nptr = 1;
3129        } else {
3130                /* Our block is right, pick up the left block. */
3131                rbp = bp;
3132                xfs_btree_buf_to_ptr(cur, rbp, &rptr);
3133                right = block;
3134                xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
3135                error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
3136                if (error)
3137                        goto error0;
3138                bp = lbp;
3139                nptr = 2;
3140        }
3141
3142        /* Fill in the new block's btree header and log it. */
3143        xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
3144        xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
3145        ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
3146                        !xfs_btree_ptr_is_null(cur, &rptr));
3147
3148        /* Fill in the key data in the new root. */
3149        if (xfs_btree_get_level(left) > 0) {
3150                /*
3151                 * Get the keys for the left block's keys and put them directly
3152                 * in the parent block.  Do the same for the right block.
3153                 */
3154                xfs_btree_get_node_keys(cur, left,
3155                                xfs_btree_key_addr(cur, 1, new));
3156                xfs_btree_get_node_keys(cur, right,
3157                                xfs_btree_key_addr(cur, 2, new));
3158        } else {
3159                /*
3160                 * Get the keys for the left block's records and put them
3161                 * directly in the parent block.  Do the same for the right
3162                 * block.
3163                 */
3164                xfs_btree_get_leaf_keys(cur, left,
3165                        xfs_btree_key_addr(cur, 1, new));
3166                xfs_btree_get_leaf_keys(cur, right,
3167                        xfs_btree_key_addr(cur, 2, new));
3168        }
3169        xfs_btree_log_keys(cur, nbp, 1, 2);
3170
3171        /* Fill in the pointer data in the new root. */
3172        xfs_btree_copy_ptrs(cur,
3173                xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
3174        xfs_btree_copy_ptrs(cur,
3175                xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
3176        xfs_btree_log_ptrs(cur, nbp, 1, 2);
3177
3178        /* Fix up the cursor. */
3179        xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
3180        cur->bc_levels[cur->bc_nlevels].ptr = nptr;
3181        cur->bc_nlevels++;
3182        ASSERT(cur->bc_nlevels <= cur->bc_maxlevels);
3183        *stat = 1;
3184        return 0;
3185error0:
3186        return error;
3187out0:
3188        *stat = 0;
3189        return 0;
3190}
3191
3192STATIC int
3193xfs_btree_make_block_unfull(
3194        struct xfs_btree_cur    *cur,   /* btree cursor */
3195        int                     level,  /* btree level */
3196        int                     numrecs,/* # of recs in block */
3197        int                     *oindex,/* old tree index */
3198        int                     *index, /* new tree index */
3199        union xfs_btree_ptr     *nptr,  /* new btree ptr */
3200        struct xfs_btree_cur    **ncur, /* new btree cursor */
3201        union xfs_btree_key     *key,   /* key of new block */
3202        int                     *stat)
3203{
3204        int                     error = 0;
3205
3206        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
3207            level == cur->bc_nlevels - 1) {
3208                struct xfs_inode *ip = cur->bc_ino.ip;
3209
3210                if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
3211                        /* A root block that can be made bigger. */
3212                        xfs_iroot_realloc(ip, 1, cur->bc_ino.whichfork);
3213                        *stat = 1;
3214                } else {
3215                        /* A root block that needs replacing */
3216                        int     logflags = 0;
3217
3218                        error = xfs_btree_new_iroot(cur, &logflags, stat);
3219                        if (error || *stat == 0)
3220                                return error;
3221
3222                        xfs_trans_log_inode(cur->bc_tp, ip, logflags);
3223                }
3224
3225                return 0;
3226        }
3227
3228        /* First, try shifting an entry to the right neighbor. */
3229        error = xfs_btree_rshift(cur, level, stat);
3230        if (error || *stat)
3231                return error;
3232
3233        /* Next, try shifting an entry to the left neighbor. */
3234        error = xfs_btree_lshift(cur, level, stat);
3235        if (error)
3236                return error;
3237
3238        if (*stat) {
3239                *oindex = *index = cur->bc_levels[level].ptr;
3240                return 0;
3241        }
3242
3243        /*
3244         * Next, try splitting the current block in half.
3245         *
3246         * If this works we have to re-set our variables because we
3247         * could be in a different block now.
3248         */
3249        error = xfs_btree_split(cur, level, nptr, key, ncur, stat);
3250        if (error || *stat == 0)
3251                return error;
3252
3253
3254        *index = cur->bc_levels[level].ptr;
3255        return 0;
3256}
3257
3258/*
3259 * Insert one record/level.  Return information to the caller
3260 * allowing the next level up to proceed if necessary.
3261 */
3262STATIC int
3263xfs_btree_insrec(
3264        struct xfs_btree_cur    *cur,   /* btree cursor */
3265        int                     level,  /* level to insert record at */
3266        union xfs_btree_ptr     *ptrp,  /* i/o: block number inserted */
3267        union xfs_btree_rec     *rec,   /* record to insert */
3268        union xfs_btree_key     *key,   /* i/o: block key for ptrp */
3269        struct xfs_btree_cur    **curp, /* output: new cursor replacing cur */
3270        int                     *stat)  /* success/failure */
3271{
3272        struct xfs_btree_block  *block; /* btree block */
3273        struct xfs_buf          *bp;    /* buffer for block */
3274        union xfs_btree_ptr     nptr;   /* new block ptr */
3275        struct xfs_btree_cur    *ncur = NULL;   /* new btree cursor */
3276        union xfs_btree_key     nkey;   /* new block key */
3277        union xfs_btree_key     *lkey;
3278        int                     optr;   /* old key/record index */
3279        int                     ptr;    /* key/record index */
3280        int                     numrecs;/* number of records */
3281        int                     error;  /* error return value */
3282        int                     i;
3283        xfs_daddr_t             old_bn;
3284
3285        ncur = NULL;
3286        lkey = &nkey;
3287
3288        /*
3289         * If we have an external root pointer, and we've made it to the
3290         * root level, allocate a new root block and we're done.
3291         */
3292        if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
3293            (level >= cur->bc_nlevels)) {
3294                error = xfs_btree_new_root(cur, stat);
3295                xfs_btree_set_ptr_null(cur, ptrp);
3296
3297                return error;
3298        }
3299
3300        /* If we're off the left edge, return failure. */
3301        ptr = cur->bc_levels[level].ptr;
3302        if (ptr == 0) {
3303                *stat = 0;
3304                return 0;
3305        }
3306
3307        optr = ptr;
3308
3309        XFS_BTREE_STATS_INC(cur, insrec);
3310
3311        /* Get pointers to the btree buffer and block. */
3312        block = xfs_btree_get_block(cur, level, &bp);
3313        old_bn = bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL;
3314        numrecs = xfs_btree_get_numrecs(block);
3315
3316#ifdef DEBUG
3317        error = xfs_btree_check_block(cur, block, level, bp);
3318        if (error)
3319                goto error0;
3320
3321        /* Check that the new entry is being inserted in the right place. */
3322        if (ptr <= numrecs) {
3323                if (level == 0) {
3324                        ASSERT(cur->bc_ops->recs_inorder(cur, rec,
3325                                xfs_btree_rec_addr(cur, ptr, block)));
3326                } else {
3327                        ASSERT(cur->bc_ops->keys_inorder(cur, key,
3328                                xfs_btree_key_addr(cur, ptr, block)));
3329                }
3330        }
3331#endif
3332
3333        /*
3334         * If the block is full, we can't insert the new entry until we
3335         * make the block un-full.
3336         */
3337        xfs_btree_set_ptr_null(cur, &nptr);
3338        if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
3339                error = xfs_btree_make_block_unfull(cur, level, numrecs,
3340                                        &optr, &ptr, &nptr, &ncur, lkey, stat);
3341                if (error || *stat == 0)
3342                        goto error0;
3343        }
3344
3345        /*
3346         * The current block may have changed if the block was
3347         * previously full and we have just made space in it.
3348         */
3349        block = xfs_btree_get_block(cur, level, &bp);
3350        numrecs = xfs_btree_get_numrecs(block);
3351
3352#ifdef DEBUG
3353        error = xfs_btree_check_block(cur, block, level, bp);
3354        if (error)
3355                goto error0;
3356#endif
3357
3358        /*
3359         * At this point we know there's room for our new entry in the block
3360         * we're pointing at.
3361         */
3362        XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
3363
3364        if (level > 0) {
3365                /* It's a nonleaf. make a hole in the keys and ptrs */
3366                union xfs_btree_key     *kp;
3367                union xfs_btree_ptr     *pp;
3368
3369                kp = xfs_btree_key_addr(cur, ptr, block);
3370                pp = xfs_btree_ptr_addr(cur, ptr, block);
3371
3372                for (i = numrecs - ptr; i >= 0; i--) {
3373                        error = xfs_btree_debug_check_ptr(cur, pp, i, level);
3374                        if (error)
3375                                goto error0;
3376                }
3377
3378                xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
3379                xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
3380
3381                error = xfs_btree_debug_check_ptr(cur, ptrp, 0, level);
3382                if (error)
3383                        goto error0;
3384
3385                /* Now put the new data in, bump numrecs and log it. */
3386                xfs_btree_copy_keys(cur, kp, key, 1);
3387                xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
3388                numrecs++;
3389                xfs_btree_set_numrecs(block, numrecs);
3390                xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
3391                xfs_btree_log_keys(cur, bp, ptr, numrecs);
3392#ifdef DEBUG
3393                if (ptr < numrecs) {
3394                        ASSERT(cur->bc_ops->keys_inorder(cur, kp,
3395                                xfs_btree_key_addr(cur, ptr + 1, block)));
3396                }
3397#endif
3398        } else {
3399                /* It's a leaf. make a hole in the records */
3400                union xfs_btree_rec             *rp;
3401
3402                rp = xfs_btree_rec_addr(cur, ptr, block);
3403
3404                xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
3405
3406                /* Now put the new data in, bump numrecs and log it. */
3407                xfs_btree_copy_recs(cur, rp, rec, 1);
3408                xfs_btree_set_numrecs(block, ++numrecs);
3409                xfs_btree_log_recs(cur, bp, ptr, numrecs);
3410#ifdef DEBUG
3411                if (ptr < numrecs) {
3412                        ASSERT(cur->bc_ops->recs_inorder(cur, rp,
3413                                xfs_btree_rec_addr(cur, ptr + 1, block)));
3414                }
3415#endif
3416        }
3417
3418        /* Log the new number of records in the btree header. */
3419        xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3420
3421        /*
3422         * If we just inserted into a new tree block, we have to
3423         * recalculate nkey here because nkey is out of date.
3424         *
3425         * Otherwise we're just updating an existing block (having shoved
3426         * some records into the new tree block), so use the regular key
3427         * update mechanism.
3428         */
3429        if (bp && xfs_buf_daddr(bp) != old_bn) {
3430                xfs_btree_get_keys(cur, block, lkey);
3431        } else if (xfs_btree_needs_key_update(cur, optr)) {
3432                error = xfs_btree_update_keys(cur, level);
3433                if (error)
3434                        goto error0;
3435        }
3436
3437        /*
3438         * If we are tracking the last record in the tree and
3439         * we are at the far right edge of the tree, update it.
3440         */
3441        if (xfs_btree_is_lastrec(cur, block, level)) {
3442                cur->bc_ops->update_lastrec(cur, block, rec,
3443                                            ptr, LASTREC_INSREC);
3444        }
3445
3446        /*
3447         * Return the new block number, if any.
3448         * If there is one, give back a record value and a cursor too.
3449         */
3450        *ptrp = nptr;
3451        if (!xfs_btree_ptr_is_null(cur, &nptr)) {
3452                xfs_btree_copy_keys(cur, key, lkey, 1);
3453                *curp = ncur;
3454        }
3455
3456        *stat = 1;
3457        return 0;
3458
3459error0:
3460        if (ncur)
3461                xfs_btree_del_cursor(ncur, error);
3462        return error;
3463}
3464
3465/*
3466 * Insert the record at the point referenced by cur.
3467 *
3468 * A multi-level split of the tree on insert will invalidate the original
3469 * cursor.  All callers of this function should assume that the cursor is
3470 * no longer valid and revalidate it.
3471 */
3472int
3473xfs_btree_insert(
3474        struct xfs_btree_cur    *cur,
3475        int                     *stat)
3476{
3477        int                     error;  /* error return value */
3478        int                     i;      /* result value, 0 for failure */
3479        int                     level;  /* current level number in btree */
3480        union xfs_btree_ptr     nptr;   /* new block number (split result) */
3481        struct xfs_btree_cur    *ncur;  /* new cursor (split result) */
3482        struct xfs_btree_cur    *pcur;  /* previous level's cursor */
3483        union xfs_btree_key     bkey;   /* key of block to insert */
3484        union xfs_btree_key     *key;
3485        union xfs_btree_rec     rec;    /* record to insert */
3486
3487        level = 0;
3488        ncur = NULL;
3489        pcur = cur;
3490        key = &bkey;
3491
3492        xfs_btree_set_ptr_null(cur, &nptr);
3493
3494        /* Make a key out of the record data to be inserted, and save it. */
3495        cur->bc_ops->init_rec_from_cur(cur, &rec);
3496        cur->bc_ops->init_key_from_rec(key, &rec);
3497
3498        /*
3499         * Loop going up the tree, starting at the leaf level.
3500         * Stop when we don't get a split block, that must mean that
3501         * the insert is finished with this level.
3502         */
3503        do {
3504                /*
3505                 * Insert nrec/nptr into this level of the tree.
3506                 * Note if we fail, nptr will be null.
3507                 */
3508                error = xfs_btree_insrec(pcur, level, &nptr, &rec, key,
3509                                &ncur, &i);
3510                if (error) {
3511                        if (pcur != cur)
3512                                xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
3513                        goto error0;
3514                }
3515
3516                if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
3517                        error = -EFSCORRUPTED;
3518                        goto error0;
3519                }
3520                level++;
3521
3522                /*
3523                 * See if the cursor we just used is trash.
3524                 * Can't trash the caller's cursor, but otherwise we should
3525                 * if ncur is a new cursor or we're about to be done.
3526                 */
3527                if (pcur != cur &&
3528                    (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
3529                        /* Save the state from the cursor before we trash it */
3530                        if (cur->bc_ops->update_cursor)
3531                                cur->bc_ops->update_cursor(pcur, cur);
3532                        cur->bc_nlevels = pcur->bc_nlevels;
3533                        xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
3534                }
3535                /* If we got a new cursor, switch to it. */
3536                if (ncur) {
3537                        pcur = ncur;
3538                        ncur = NULL;
3539                }
3540        } while (!xfs_btree_ptr_is_null(cur, &nptr));
3541
3542        *stat = i;
3543        return 0;
3544error0:
3545        return error;
3546}
3547
3548/*
3549 * Try to merge a non-leaf block back into the inode root.
3550 *
3551 * Note: the killroot names comes from the fact that we're effectively
3552 * killing the old root block.  But because we can't just delete the
3553 * inode we have to copy the single block it was pointing to into the
3554 * inode.
3555 */
3556STATIC int
3557xfs_btree_kill_iroot(
3558        struct xfs_btree_cur    *cur)
3559{
3560        int                     whichfork = cur->bc_ino.whichfork;
3561        struct xfs_inode        *ip = cur->bc_ino.ip;
3562        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
3563        struct xfs_btree_block  *block;
3564        struct xfs_btree_block  *cblock;
3565        union xfs_btree_key     *kp;
3566        union xfs_btree_key     *ckp;
3567        union xfs_btree_ptr     *pp;
3568        union xfs_btree_ptr     *cpp;
3569        struct xfs_buf          *cbp;
3570        int                     level;
3571        int                     index;
3572        int                     numrecs;
3573        int                     error;
3574#ifdef DEBUG
3575        union xfs_btree_ptr     ptr;
3576#endif
3577        int                     i;
3578
3579        ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
3580        ASSERT(cur->bc_nlevels > 1);
3581
3582        /*
3583         * Don't deal with the root block needs to be a leaf case.
3584         * We're just going to turn the thing back into extents anyway.
3585         */
3586        level = cur->bc_nlevels - 1;
3587        if (level == 1)
3588                goto out0;
3589
3590        /*
3591         * Give up if the root has multiple children.
3592         */
3593        block = xfs_btree_get_iroot(cur);
3594        if (xfs_btree_get_numrecs(block) != 1)
3595                goto out0;
3596
3597        cblock = xfs_btree_get_block(cur, level - 1, &cbp);
3598        numrecs = xfs_btree_get_numrecs(cblock);
3599
3600        /*
3601         * Only do this if the next level will fit.
3602         * Then the data must be copied up to the inode,
3603         * instead of freeing the root you free the next level.
3604         */
3605        if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
3606                goto out0;
3607
3608        XFS_BTREE_STATS_INC(cur, killroot);
3609
3610#ifdef DEBUG
3611        xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
3612        ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3613        xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
3614        ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3615#endif
3616
3617        index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
3618        if (index) {
3619                xfs_iroot_realloc(cur->bc_ino.ip, index,
3620                                  cur->bc_ino.whichfork);
3621                block = ifp->if_broot;
3622        }
3623
3624        be16_add_cpu(&block->bb_numrecs, index);
3625        ASSERT(block->bb_numrecs == cblock->bb_numrecs);
3626
3627        kp = xfs_btree_key_addr(cur, 1, block);
3628        ckp = xfs_btree_key_addr(cur, 1, cblock);
3629        xfs_btree_copy_keys(cur, kp, ckp, numrecs);
3630
3631        pp = xfs_btree_ptr_addr(cur, 1, block);
3632        cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3633
3634        for (i = 0; i < numrecs; i++) {
3635                error = xfs_btree_debug_check_ptr(cur, cpp, i, level - 1);
3636                if (error)
3637                        return error;
3638        }
3639
3640        xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
3641
3642        error = xfs_btree_free_block(cur, cbp);
3643        if (error)
3644                return error;
3645
3646        cur->bc_levels[level - 1].bp = NULL;
3647        be16_add_cpu(&block->bb_level, -1);
3648        xfs_trans_log_inode(cur->bc_tp, ip,
3649                XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork));
3650        cur->bc_nlevels--;
3651out0:
3652        return 0;
3653}
3654
3655/*
3656 * Kill the current root node, and replace it with it's only child node.
3657 */
3658STATIC int
3659xfs_btree_kill_root(
3660        struct xfs_btree_cur    *cur,
3661        struct xfs_buf          *bp,
3662        int                     level,
3663        union xfs_btree_ptr     *newroot)
3664{
3665        int                     error;
3666
3667        XFS_BTREE_STATS_INC(cur, killroot);
3668
3669        /*
3670         * Update the root pointer, decreasing the level by 1 and then
3671         * free the old root.
3672         */
3673        cur->bc_ops->set_root(cur, newroot, -1);
3674
3675        error = xfs_btree_free_block(cur, bp);
3676        if (error)
3677                return error;
3678
3679        cur->bc_levels[level].bp = NULL;
3680        cur->bc_levels[level].ra = 0;
3681        cur->bc_nlevels--;
3682
3683        return 0;
3684}
3685
3686STATIC int
3687xfs_btree_dec_cursor(
3688        struct xfs_btree_cur    *cur,
3689        int                     level,
3690        int                     *stat)
3691{
3692        int                     error;
3693        int                     i;
3694
3695        if (level > 0) {
3696                error = xfs_btree_decrement(cur, level, &i);
3697                if (error)
3698                        return error;
3699        }
3700
3701        *stat = 1;
3702        return 0;
3703}
3704
3705/*
3706 * Single level of the btree record deletion routine.
3707 * Delete record pointed to by cur/level.
3708 * Remove the record from its block then rebalance the tree.
3709 * Return 0 for error, 1 for done, 2 to go on to the next level.
3710 */
3711STATIC int                                      /* error */
3712xfs_btree_delrec(
3713        struct xfs_btree_cur    *cur,           /* btree cursor */
3714        int                     level,          /* level removing record from */
3715        int                     *stat)          /* fail/done/go-on */
3716{
3717        struct xfs_btree_block  *block;         /* btree block */
3718        union xfs_btree_ptr     cptr;           /* current block ptr */
3719        struct xfs_buf          *bp;            /* buffer for block */
3720        int                     error;          /* error return value */
3721        int                     i;              /* loop counter */
3722        union xfs_btree_ptr     lptr;           /* left sibling block ptr */
3723        struct xfs_buf          *lbp;           /* left buffer pointer */
3724        struct xfs_btree_block  *left;          /* left btree block */
3725        int                     lrecs = 0;      /* left record count */
3726        int                     ptr;            /* key/record index */
3727        union xfs_btree_ptr     rptr;           /* right sibling block ptr */
3728        struct xfs_buf          *rbp;           /* right buffer pointer */
3729        struct xfs_btree_block  *right;         /* right btree block */
3730        struct xfs_btree_block  *rrblock;       /* right-right btree block */
3731        struct xfs_buf          *rrbp;          /* right-right buffer pointer */
3732        int                     rrecs = 0;      /* right record count */
3733        struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
3734        int                     numrecs;        /* temporary numrec count */
3735
3736        tcur = NULL;
3737
3738        /* Get the index of the entry being deleted, check for nothing there. */
3739        ptr = cur->bc_levels[level].ptr;
3740        if (ptr == 0) {
3741                *stat = 0;
3742                return 0;
3743        }
3744
3745        /* Get the buffer & block containing the record or key/ptr. */
3746        block = xfs_btree_get_block(cur, level, &bp);
3747        numrecs = xfs_btree_get_numrecs(block);
3748
3749#ifdef DEBUG
3750        error = xfs_btree_check_block(cur, block, level, bp);
3751        if (error)
3752                goto error0;
3753#endif
3754
3755        /* Fail if we're off the end of the block. */
3756        if (ptr > numrecs) {
3757                *stat = 0;
3758                return 0;
3759        }
3760
3761        XFS_BTREE_STATS_INC(cur, delrec);
3762        XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3763
3764        /* Excise the entries being deleted. */
3765        if (level > 0) {
3766                /* It's a nonleaf. operate on keys and ptrs */
3767                union xfs_btree_key     *lkp;
3768                union xfs_btree_ptr     *lpp;
3769
3770                lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3771                lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3772
3773                for (i = 0; i < numrecs - ptr; i++) {
3774                        error = xfs_btree_debug_check_ptr(cur, lpp, i, level);
3775                        if (error)
3776                                goto error0;
3777                }
3778
3779                if (ptr < numrecs) {
3780                        xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3781                        xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3782                        xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3783                        xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3784                }
3785        } else {
3786                /* It's a leaf. operate on records */
3787                if (ptr < numrecs) {
3788                        xfs_btree_shift_recs(cur,
3789                                xfs_btree_rec_addr(cur, ptr + 1, block),
3790                                -1, numrecs - ptr);
3791                        xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3792                }
3793        }
3794
3795        /*
3796         * Decrement and log the number of entries in the block.
3797         */
3798        xfs_btree_set_numrecs(block, --numrecs);
3799        xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3800
3801        /*
3802         * If we are tracking the last record in the tree and
3803         * we are at the far right edge of the tree, update it.
3804         */
3805        if (xfs_btree_is_lastrec(cur, block, level)) {
3806                cur->bc_ops->update_lastrec(cur, block, NULL,
3807                                            ptr, LASTREC_DELREC);
3808        }
3809
3810        /*
3811         * We're at the root level.  First, shrink the root block in-memory.
3812         * Try to get rid of the next level down.  If we can't then there's
3813         * nothing left to do.
3814         */
3815        if (level == cur->bc_nlevels - 1) {
3816                if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3817                        xfs_iroot_realloc(cur->bc_ino.ip, -1,
3818                                          cur->bc_ino.whichfork);
3819
3820                        error = xfs_btree_kill_iroot(cur);
3821                        if (error)
3822                                goto error0;
3823
3824                        error = xfs_btree_dec_cursor(cur, level, stat);
3825                        if (error)
3826                                goto error0;
3827                        *stat = 1;
3828                        return 0;
3829                }
3830
3831                /*
3832                 * If this is the root level, and there's only one entry left,
3833                 * and it's NOT the leaf level, then we can get rid of this
3834                 * level.
3835                 */
3836                if (numrecs == 1 && level > 0) {
3837                        union xfs_btree_ptr     *pp;
3838                        /*
3839                         * pp is still set to the first pointer in the block.
3840                         * Make it the new root of the btree.
3841                         */
3842                        pp = xfs_btree_ptr_addr(cur, 1, block);
3843                        error = xfs_btree_kill_root(cur, bp, level, pp);
3844                        if (error)
3845                                goto error0;
3846                } else if (level > 0) {
3847                        error = xfs_btree_dec_cursor(cur, level, stat);
3848                        if (error)
3849                                goto error0;
3850                }
3851                *stat = 1;
3852                return 0;
3853        }
3854
3855        /*
3856         * If we deleted the leftmost entry in the block, update the
3857         * key values above us in the tree.
3858         */
3859        if (xfs_btree_needs_key_update(cur, ptr)) {
3860                error = xfs_btree_update_keys(cur, level);
3861                if (error)
3862                        goto error0;
3863        }
3864
3865        /*
3866         * If the number of records remaining in the block is at least
3867         * the minimum, we're done.
3868         */
3869        if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3870                error = xfs_btree_dec_cursor(cur, level, stat);
3871                if (error)
3872                        goto error0;
3873                return 0;
3874        }
3875
3876        /*
3877         * Otherwise, we have to move some records around to keep the
3878         * tree balanced.  Look at the left and right sibling blocks to
3879         * see if we can re-balance by moving only one record.
3880         */
3881        xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3882        xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
3883
3884        if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3885                /*
3886                 * One child of root, need to get a chance to copy its contents
3887                 * into the root and delete it. Can't go up to next level,
3888                 * there's nothing to delete there.
3889                 */
3890                if (xfs_btree_ptr_is_null(cur, &rptr) &&
3891                    xfs_btree_ptr_is_null(cur, &lptr) &&
3892                    level == cur->bc_nlevels - 2) {
3893                        error = xfs_btree_kill_iroot(cur);
3894                        if (!error)
3895                                error = xfs_btree_dec_cursor(cur, level, stat);
3896                        if (error)
3897                                goto error0;
3898                        return 0;
3899                }
3900        }
3901
3902        ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
3903               !xfs_btree_ptr_is_null(cur, &lptr));
3904
3905        /*
3906         * Duplicate the cursor so our btree manipulations here won't
3907         * disrupt the next level up.
3908         */
3909        error = xfs_btree_dup_cursor(cur, &tcur);
3910        if (error)
3911                goto error0;
3912
3913        /*
3914         * If there's a right sibling, see if it's ok to shift an entry
3915         * out of it.
3916         */
3917        if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3918                /*
3919                 * Move the temp cursor to the last entry in the next block.
3920                 * Actually any entry but the first would suffice.
3921                 */
3922                i = xfs_btree_lastrec(tcur, level);
3923                if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
3924                        error = -EFSCORRUPTED;
3925                        goto error0;
3926                }
3927
3928                error = xfs_btree_increment(tcur, level, &i);
3929                if (error)
3930                        goto error0;
3931                if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
3932                        error = -EFSCORRUPTED;
3933                        goto error0;
3934                }
3935
3936                i = xfs_btree_lastrec(tcur, level);
3937                if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
3938                        error = -EFSCORRUPTED;
3939                        goto error0;
3940                }
3941
3942                /* Grab a pointer to the block. */
3943                right = xfs_btree_get_block(tcur, level, &rbp);
3944#ifdef DEBUG
3945                error = xfs_btree_check_block(tcur, right, level, rbp);
3946                if (error)
3947                        goto error0;
3948#endif
3949                /* Grab the current block number, for future use. */
3950                xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
3951
3952                /*
3953                 * If right block is full enough so that removing one entry
3954                 * won't make it too empty, and left-shifting an entry out
3955                 * of right to us works, we're done.
3956                 */
3957                if (xfs_btree_get_numrecs(right) - 1 >=
3958                    cur->bc_ops->get_minrecs(tcur, level)) {
3959                        error = xfs_btree_lshift(tcur, level, &i);
3960                        if (error)
3961                                goto error0;
3962                        if (i) {
3963                                ASSERT(xfs_btree_get_numrecs(block) >=
3964                                       cur->bc_ops->get_minrecs(tcur, level));
3965
3966                                xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3967                                tcur = NULL;
3968
3969                                error = xfs_btree_dec_cursor(cur, level, stat);
3970                                if (error)
3971                                        goto error0;
3972                                return 0;
3973                        }
3974                }
3975
3976                /*
3977                 * Otherwise, grab the number of records in right for
3978                 * future reference, and fix up the temp cursor to point
3979                 * to our block again (last record).
3980                 */
3981                rrecs = xfs_btree_get_numrecs(right);
3982                if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3983                        i = xfs_btree_firstrec(tcur, level);
3984                        if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
3985                                error = -EFSCORRUPTED;
3986                                goto error0;
3987                        }
3988
3989                        error = xfs_btree_decrement(tcur, level, &i);
3990                        if (error)
3991                                goto error0;
3992                        if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
3993                                error = -EFSCORRUPTED;
3994                                goto error0;
3995                        }
3996                }
3997        }
3998
3999        /*
4000         * If there's a left sibling, see if it's ok to shift an entry
4001         * out of it.
4002         */
4003        if (!xfs_btree_ptr_is_null(cur, &lptr)) {
4004                /*
4005                 * Move the temp cursor to the first entry in the
4006                 * previous block.
4007                 */
4008                i = xfs_btree_firstrec(tcur, level);
4009                if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
4010                        error = -EFSCORRUPTED;
4011                        goto error0;
4012                }
4013
4014                error = xfs_btree_decrement(tcur, level, &i);
4015                if (error)
4016                        goto error0;
4017                i = xfs_btree_firstrec(tcur, level);
4018                if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
4019                        error = -EFSCORRUPTED;
4020                        goto error0;
4021                }
4022
4023                /* Grab a pointer to the block. */
4024                left = xfs_btree_get_block(tcur, level, &lbp);
4025#ifdef DEBUG
4026                error = xfs_btree_check_block(cur, left, level, lbp);
4027                if (error)
4028                        goto error0;
4029#endif
4030                /* Grab the current block number, for future use. */
4031                xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
4032
4033                /*
4034                 * If left block is full enough so that removing one entry
4035                 * won't make it too empty, and right-shifting an entry out
4036                 * of left to us works, we're done.
4037                 */
4038                if (xfs_btree_get_numrecs(left) - 1 >=
4039                    cur->bc_ops->get_minrecs(tcur, level)) {
4040                        error = xfs_btree_rshift(tcur, level, &i);
4041                        if (error)
4042                                goto error0;
4043                        if (i) {
4044                                ASSERT(xfs_btree_get_numrecs(block) >=
4045                                       cur->bc_ops->get_minrecs(tcur, level));
4046                                xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
4047                                tcur = NULL;
4048                                if (level == 0)
4049                                        cur->bc_levels[0].ptr++;
4050
4051                                *stat = 1;
4052                                return 0;
4053                        }
4054                }
4055
4056                /*
4057                 * Otherwise, grab the number of records in right for
4058                 * future reference.
4059                 */
4060                lrecs = xfs_btree_get_numrecs(left);
4061        }
4062
4063        /* Delete the temp cursor, we're done with it. */
4064        xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
4065        tcur = NULL;
4066
4067        /* If here, we need to do a join to keep the tree balanced. */
4068        ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
4069
4070        if (!xfs_btree_ptr_is_null(cur, &lptr) &&
4071            lrecs + xfs_btree_get_numrecs(block) <=
4072                        cur->bc_ops->get_maxrecs(cur, level)) {
4073                /*
4074                 * Set "right" to be the starting block,
4075                 * "left" to be the left neighbor.
4076                 */
4077                rptr = cptr;
4078                right = block;
4079                rbp = bp;
4080                error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
4081                if (error)
4082                        goto error0;
4083
4084        /*
4085         * If that won't work, see if we can join with the right neighbor block.
4086         */
4087        } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
4088                   rrecs + xfs_btree_get_numrecs(block) <=
4089                        cur->bc_ops->get_maxrecs(cur, level)) {
4090                /*
4091                 * Set "left" to be the starting block,
4092                 * "right" to be the right neighbor.
4093                 */
4094                lptr = cptr;
4095                left = block;
4096                lbp = bp;
4097                error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
4098                if (error)
4099                        goto error0;
4100
4101        /*
4102         * Otherwise, we can't fix the imbalance.
4103         * Just return.  This is probably a logic error, but it's not fatal.
4104         */
4105        } else {
4106                error = xfs_btree_dec_cursor(cur, level, stat);
4107                if (error)
4108                        goto error0;
4109                return 0;
4110        }
4111
4112        rrecs = xfs_btree_get_numrecs(right);
4113        lrecs = xfs_btree_get_numrecs(left);
4114
4115        /*
4116         * We're now going to join "left" and "right" by moving all the stuff
4117         * in "right" to "left" and deleting "right".
4118         */
4119        XFS_BTREE_STATS_ADD(cur, moves, rrecs);
4120        if (level > 0) {
4121                /* It's a non-leaf.  Move keys and pointers. */
4122                union xfs_btree_key     *lkp;   /* left btree key */
4123                union xfs_btree_ptr     *lpp;   /* left address pointer */
4124                union xfs_btree_key     *rkp;   /* right btree key */
4125                union xfs_btree_ptr     *rpp;   /* right address pointer */
4126
4127                lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
4128                lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
4129                rkp = xfs_btree_key_addr(cur, 1, right);
4130                rpp = xfs_btree_ptr_addr(cur, 1, right);
4131
4132                for (i = 1; i < rrecs; i++) {
4133                        error = xfs_btree_debug_check_ptr(cur, rpp, i, level);
4134                        if (error)
4135                                goto error0;
4136                }
4137
4138                xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
4139                xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
4140
4141                xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
4142                xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
4143        } else {
4144                /* It's a leaf.  Move records.  */
4145                union xfs_btree_rec     *lrp;   /* left record pointer */
4146                union xfs_btree_rec     *rrp;   /* right record pointer */
4147
4148                lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
4149                rrp = xfs_btree_rec_addr(cur, 1, right);
4150
4151                xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
4152                xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
4153        }
4154
4155        XFS_BTREE_STATS_INC(cur, join);
4156
4157        /*
4158         * Fix up the number of records and right block pointer in the
4159         * surviving block, and log it.
4160         */
4161        xfs_btree_set_numrecs(left, lrecs + rrecs);
4162        xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB);
4163        xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
4164        xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
4165
4166        /* If there is a right sibling, point it to the remaining block. */
4167        xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
4168        if (!xfs_btree_ptr_is_null(cur, &cptr)) {
4169                error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);
4170                if (error)
4171                        goto error0;
4172                xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
4173                xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
4174        }
4175
4176        /* Free the deleted block. */
4177        error = xfs_btree_free_block(cur, rbp);
4178        if (error)
4179                goto error0;
4180
4181        /*
4182         * If we joined with the left neighbor, set the buffer in the
4183         * cursor to the left block, and fix up the index.
4184         */
4185        if (bp != lbp) {
4186                cur->bc_levels[level].bp = lbp;
4187                cur->bc_levels[level].ptr += lrecs;
4188                cur->bc_levels[level].ra = 0;
4189        }
4190        /*
4191         * If we joined with the right neighbor and there's a level above
4192         * us, increment the cursor at that level.
4193         */
4194        else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
4195                   (level + 1 < cur->bc_nlevels)) {
4196                error = xfs_btree_increment(cur, level + 1, &i);
4197                if (error)
4198                        goto error0;
4199        }
4200
4201        /*
4202         * Readjust the ptr at this level if it's not a leaf, since it's
4203         * still pointing at the deletion point, which makes the cursor
4204         * inconsistent.  If this makes the ptr 0, the caller fixes it up.
4205         * We can't use decrement because it would change the next level up.
4206         */
4207        if (level > 0)
4208                cur->bc_levels[level].ptr--;
4209
4210        /*
4211         * We combined blocks, so we have to update the parent keys if the
4212         * btree supports overlapped intervals.  However,
4213         * bc_levels[level + 1].ptr points to the old block so that the caller
4214         * knows which record to delete.  Therefore, the caller must be savvy
4215         * enough to call updkeys for us if we return stat == 2.  The other
4216         * exit points from this function don't require deletions further up
4217         * the tree, so they can call updkeys directly.
4218         */
4219
4220        /* Return value means the next level up has something to do. */
4221        *stat = 2;
4222        return 0;
4223
4224error0:
4225        if (tcur)
4226                xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
4227        return error;
4228}
4229
4230/*
4231 * Delete the record pointed to by cur.
4232 * The cursor refers to the place where the record was (could be inserted)
4233 * when the operation returns.
4234 */
4235int                                     /* error */
4236xfs_btree_delete(
4237        struct xfs_btree_cur    *cur,
4238        int                     *stat)  /* success/failure */
4239{
4240        int                     error;  /* error return value */
4241        int                     level;
4242        int                     i;
4243        bool                    joined = false;
4244
4245        /*
4246         * Go up the tree, starting at leaf level.
4247         *
4248         * If 2 is returned then a join was done; go to the next level.
4249         * Otherwise we are done.
4250         */
4251        for (level = 0, i = 2; i == 2; level++) {
4252                error = xfs_btree_delrec(cur, level, &i);
4253                if (error)
4254                        goto error0;
4255                if (i == 2)
4256                        joined = true;
4257        }
4258
4259        /*
4260         * If we combined blocks as part of deleting the record, delrec won't
4261         * have updated the parent high keys so we have to do that here.
4262         */
4263        if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) {
4264                error = xfs_btree_updkeys_force(cur, 0);
4265                if (error)
4266                        goto error0;
4267        }
4268
4269        if (i == 0) {
4270                for (level = 1; level < cur->bc_nlevels; level++) {
4271                        if (cur->bc_levels[level].ptr == 0) {
4272                                error = xfs_btree_decrement(cur, level, &i);
4273                                if (error)
4274                                        goto error0;
4275                                break;
4276                        }
4277                }
4278        }
4279
4280        *stat = i;
4281        return 0;
4282error0:
4283        return error;
4284}
4285
4286/*
4287 * Get the data from the pointed-to record.
4288 */
4289int                                     /* error */
4290xfs_btree_get_rec(
4291        struct xfs_btree_cur    *cur,   /* btree cursor */
4292        union xfs_btree_rec     **recp, /* output: btree record */
4293        int                     *stat)  /* output: success/failure */
4294{
4295        struct xfs_btree_block  *block; /* btree block */
4296        struct xfs_buf          *bp;    /* buffer pointer */
4297        int                     ptr;    /* record number */
4298#ifdef DEBUG
4299        int                     error;  /* error return value */
4300#endif
4301
4302        ptr = cur->bc_levels[0].ptr;
4303        block = xfs_btree_get_block(cur, 0, &bp);
4304
4305#ifdef DEBUG
4306        error = xfs_btree_check_block(cur, block, 0, bp);
4307        if (error)
4308                return error;
4309#endif
4310
4311        /*
4312         * Off the right end or left end, return failure.
4313         */
4314        if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
4315                *stat = 0;
4316                return 0;
4317        }
4318
4319        /*
4320         * Point to the record and extract its data.
4321         */
4322        *recp = xfs_btree_rec_addr(cur, ptr, block);
4323        *stat = 1;
4324        return 0;
4325}
4326
4327/* Visit a block in a btree. */
4328STATIC int
4329xfs_btree_visit_block(
4330        struct xfs_btree_cur            *cur,
4331        int                             level,
4332        xfs_btree_visit_blocks_fn       fn,
4333        void                            *data)
4334{
4335        struct xfs_btree_block          *block;
4336        struct xfs_buf                  *bp;
4337        union xfs_btree_ptr             rptr;
4338        int                             error;
4339
4340        /* do right sibling readahead */
4341        xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
4342        block = xfs_btree_get_block(cur, level, &bp);
4343
4344        /* process the block */
4345        error = fn(cur, level, data);
4346        if (error)
4347                return error;
4348
4349        /* now read rh sibling block for next iteration */
4350        xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
4351        if (xfs_btree_ptr_is_null(cur, &rptr))
4352                return -ENOENT;
4353
4354        /*
4355         * We only visit blocks once in this walk, so we have to avoid the
4356         * internal xfs_btree_lookup_get_block() optimisation where it will
4357         * return the same block without checking if the right sibling points
4358         * back to us and creates a cyclic reference in the btree.
4359         */
4360        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
4361                if (be64_to_cpu(rptr.l) == XFS_DADDR_TO_FSB(cur->bc_mp,
4362                                                        xfs_buf_daddr(bp)))
4363                        return -EFSCORRUPTED;
4364        } else {
4365                if (be32_to_cpu(rptr.s) == xfs_daddr_to_agbno(cur->bc_mp,
4366                                                        xfs_buf_daddr(bp)))
4367                        return -EFSCORRUPTED;
4368        }
4369        return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
4370}
4371
4372
4373/* Visit every block in a btree. */
4374int
4375xfs_btree_visit_blocks(
4376        struct xfs_btree_cur            *cur,
4377        xfs_btree_visit_blocks_fn       fn,
4378        unsigned int                    flags,
4379        void                            *data)
4380{
4381        union xfs_btree_ptr             lptr;
4382        int                             level;
4383        struct xfs_btree_block          *block = NULL;
4384        int                             error = 0;
4385
4386        cur->bc_ops->init_ptr_from_cur(cur, &lptr);
4387
4388        /* for each level */
4389        for (level = cur->bc_nlevels - 1; level >= 0; level--) {
4390                /* grab the left hand block */
4391                error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
4392                if (error)
4393                        return error;
4394
4395                /* readahead the left most block for the next level down */
4396                if (level > 0) {
4397                        union xfs_btree_ptr     *ptr;
4398
4399                        ptr = xfs_btree_ptr_addr(cur, 1, block);
4400                        xfs_btree_readahead_ptr(cur, ptr, 1);
4401
4402                        /* save for the next iteration of the loop */
4403                        xfs_btree_copy_ptrs(cur, &lptr, ptr, 1);
4404
4405                        if (!(flags & XFS_BTREE_VISIT_LEAVES))
4406                                continue;
4407                } else if (!(flags & XFS_BTREE_VISIT_RECORDS)) {
4408                        continue;
4409                }
4410
4411                /* for each buffer in the level */
4412                do {
4413                        error = xfs_btree_visit_block(cur, level, fn, data);
4414                } while (!error);
4415
4416                if (error != -ENOENT)
4417                        return error;
4418        }
4419
4420        return 0;
4421}
4422
4423/*
4424 * Change the owner of a btree.
4425 *
4426 * The mechanism we use here is ordered buffer logging. Because we don't know
4427 * how many buffers were are going to need to modify, we don't really want to
4428 * have to make transaction reservations for the worst case of every buffer in a
4429 * full size btree as that may be more space that we can fit in the log....
4430 *
4431 * We do the btree walk in the most optimal manner possible - we have sibling
4432 * pointers so we can just walk all the blocks on each level from left to right
4433 * in a single pass, and then move to the next level and do the same. We can
4434 * also do readahead on the sibling pointers to get IO moving more quickly,
4435 * though for slow disks this is unlikely to make much difference to performance
4436 * as the amount of CPU work we have to do before moving to the next block is
4437 * relatively small.
4438 *
4439 * For each btree block that we load, modify the owner appropriately, set the
4440 * buffer as an ordered buffer and log it appropriately. We need to ensure that
4441 * we mark the region we change dirty so that if the buffer is relogged in
4442 * a subsequent transaction the changes we make here as an ordered buffer are
4443 * correctly relogged in that transaction.  If we are in recovery context, then
4444 * just queue the modified buffer as delayed write buffer so the transaction
4445 * recovery completion writes the changes to disk.
4446 */
4447struct xfs_btree_block_change_owner_info {
4448        uint64_t                new_owner;
4449        struct list_head        *buffer_list;
4450};
4451
4452static int
4453xfs_btree_block_change_owner(
4454        struct xfs_btree_cur    *cur,
4455        int                     level,
4456        void                    *data)
4457{
4458        struct xfs_btree_block_change_owner_info        *bbcoi = data;
4459        struct xfs_btree_block  *block;
4460        struct xfs_buf          *bp;
4461
4462        /* modify the owner */
4463        block = xfs_btree_get_block(cur, level, &bp);
4464        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
4465                if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner))
4466                        return 0;
4467                block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
4468        } else {
4469                if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner))
4470                        return 0;
4471                block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
4472        }
4473
4474        /*
4475         * If the block is a root block hosted in an inode, we might not have a
4476         * buffer pointer here and we shouldn't attempt to log the change as the
4477         * information is already held in the inode and discarded when the root
4478         * block is formatted into the on-disk inode fork. We still change it,
4479         * though, so everything is consistent in memory.
4480         */
4481        if (!bp) {
4482                ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
4483                ASSERT(level == cur->bc_nlevels - 1);
4484                return 0;
4485        }
4486
4487        if (cur->bc_tp) {
4488                if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) {
4489                        xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
4490                        return -EAGAIN;
4491                }
4492        } else {
4493                xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
4494        }
4495
4496        return 0;
4497}
4498
4499int
4500xfs_btree_change_owner(
4501        struct xfs_btree_cur    *cur,
4502        uint64_t                new_owner,
4503        struct list_head        *buffer_list)
4504{
4505        struct xfs_btree_block_change_owner_info        bbcoi;
4506
4507        bbcoi.new_owner = new_owner;
4508        bbcoi.buffer_list = buffer_list;
4509
4510        return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner,
4511                        XFS_BTREE_VISIT_ALL, &bbcoi);
4512}
4513
4514/* Verify the v5 fields of a long-format btree block. */
4515xfs_failaddr_t
4516xfs_btree_lblock_v5hdr_verify(
4517        struct xfs_buf          *bp,
4518        uint64_t                owner)
4519{
4520        struct xfs_mount        *mp = bp->b_mount;
4521        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
4522
4523        if (!xfs_has_crc(mp))
4524                return __this_address;
4525        if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
4526                return __this_address;
4527        if (block->bb_u.l.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
4528                return __this_address;
4529        if (owner != XFS_RMAP_OWN_UNKNOWN &&
4530            be64_to_cpu(block->bb_u.l.bb_owner) != owner)
4531                return __this_address;
4532        return NULL;
4533}
4534
4535/* Verify a long-format btree block. */
4536xfs_failaddr_t
4537xfs_btree_lblock_verify(
4538        struct xfs_buf          *bp,
4539        unsigned int            max_recs)
4540{
4541        struct xfs_mount        *mp = bp->b_mount;
4542        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
4543        xfs_fsblock_t           fsb;
4544        xfs_failaddr_t          fa;
4545
4546        /* numrecs verification */
4547        if (be16_to_cpu(block->bb_numrecs) > max_recs)
4548                return __this_address;
4549
4550        /* sibling pointer verification */
4551        fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
4552        fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
4553                        block->bb_u.l.bb_leftsib);
4554        if (!fa)
4555                fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
4556                                block->bb_u.l.bb_rightsib);
4557        return fa;
4558}
4559
4560/**
4561 * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
4562 *                                    btree block
4563 *
4564 * @bp: buffer containing the btree block
4565 */
4566xfs_failaddr_t
4567xfs_btree_sblock_v5hdr_verify(
4568        struct xfs_buf          *bp)
4569{
4570        struct xfs_mount        *mp = bp->b_mount;
4571        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
4572        struct xfs_perag        *pag = bp->b_pag;
4573
4574        if (!xfs_has_crc(mp))
4575                return __this_address;
4576        if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
4577                return __this_address;
4578        if (block->bb_u.s.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
4579                return __this_address;
4580        if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
4581                return __this_address;
4582        return NULL;
4583}
4584
4585/**
4586 * xfs_btree_sblock_verify() -- verify a short-format btree block
4587 *
4588 * @bp: buffer containing the btree block
4589 * @max_recs: maximum records allowed in this btree node
4590 */
4591xfs_failaddr_t
4592xfs_btree_sblock_verify(
4593        struct xfs_buf          *bp,
4594        unsigned int            max_recs)
4595{
4596        struct xfs_mount        *mp = bp->b_mount;
4597        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
4598        xfs_agnumber_t          agno;
4599        xfs_agblock_t           agbno;
4600        xfs_failaddr_t          fa;
4601
4602        /* numrecs verification */
4603        if (be16_to_cpu(block->bb_numrecs) > max_recs)
4604                return __this_address;
4605
4606        /* sibling pointer verification */
4607        agno = xfs_daddr_to_agno(mp, xfs_buf_daddr(bp));
4608        agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
4609        fa = xfs_btree_check_sblock_siblings(mp, NULL, -1, agno, agbno,
4610                        block->bb_u.s.bb_leftsib);
4611        if (!fa)
4612                fa = xfs_btree_check_sblock_siblings(mp, NULL, -1, agno, agbno,
4613                                block->bb_u.s.bb_rightsib);
4614        return fa;
4615}
4616
4617/*
4618 * For the given limits on leaf and keyptr records per block, calculate the
4619 * height of the tree needed to index the number of leaf records.
4620 */
4621unsigned int
4622xfs_btree_compute_maxlevels(
4623        const unsigned int      *limits,
4624        unsigned long long      records)
4625{
4626        unsigned long long      level_blocks = howmany_64(records, limits[0]);
4627        unsigned int            height = 1;
4628
4629        while (level_blocks > 1) {
4630                level_blocks = howmany_64(level_blocks, limits[1]);
4631                height++;
4632        }
4633
4634        return height;
4635}
4636
4637/*
4638 * For the given limits on leaf and keyptr records per block, calculate the
4639 * number of blocks needed to index the given number of leaf records.
4640 */
4641unsigned long long
4642xfs_btree_calc_size(
4643        const unsigned int      *limits,
4644        unsigned long long      records)
4645{
4646        unsigned long long      level_blocks = howmany_64(records, limits[0]);
4647        unsigned long long      blocks = level_blocks;
4648
4649        while (level_blocks > 1) {
4650                level_blocks = howmany_64(level_blocks, limits[1]);
4651                blocks += level_blocks;
4652        }
4653
4654        return blocks;
4655}
4656
4657/*
4658 * Given a number of available blocks for the btree to consume with records and
4659 * pointers, calculate the height of the tree needed to index all the records
4660 * that space can hold based on the number of pointers each interior node
4661 * holds.
4662 *
4663 * We start by assuming a single level tree consumes a single block, then track
4664 * the number of blocks each node level consumes until we no longer have space
4665 * to store the next node level. At this point, we are indexing all the leaf
4666 * blocks in the space, and there's no more free space to split the tree any
4667 * further. That's our maximum btree height.
4668 */
4669unsigned int
4670xfs_btree_space_to_height(
4671        const unsigned int      *limits,
4672        unsigned long long      leaf_blocks)
4673{
4674        unsigned long long      node_blocks = limits[1];
4675        unsigned long long      blocks_left = leaf_blocks - 1;
4676        unsigned int            height = 1;
4677
4678        if (leaf_blocks < 1)
4679                return 0;
4680
4681        while (node_blocks < blocks_left) {
4682                blocks_left -= node_blocks;
4683                node_blocks *= limits[1];
4684                height++;
4685        }
4686
4687        return height;
4688}
4689
4690/*
4691 * Query a regular btree for all records overlapping a given interval.
4692 * Start with a LE lookup of the key of low_rec and return all records
4693 * until we find a record with a key greater than the key of high_rec.
4694 */
4695STATIC int
4696xfs_btree_simple_query_range(
4697        struct xfs_btree_cur            *cur,
4698        const union xfs_btree_key       *low_key,
4699        const union xfs_btree_key       *high_key,
4700        xfs_btree_query_range_fn        fn,
4701        void                            *priv)
4702{
4703        union xfs_btree_rec             *recp;
4704        union xfs_btree_key             rec_key;
4705        int64_t                         diff;
4706        int                             stat;
4707        bool                            firstrec = true;
4708        int                             error;
4709
4710        ASSERT(cur->bc_ops->init_high_key_from_rec);
4711        ASSERT(cur->bc_ops->diff_two_keys);
4712
4713        /*
4714         * Find the leftmost record.  The btree cursor must be set
4715         * to the low record used to generate low_key.
4716         */
4717        stat = 0;
4718        error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
4719        if (error)
4720                goto out;
4721
4722        /* Nothing?  See if there's anything to the right. */
4723        if (!stat) {
4724                error = xfs_btree_increment(cur, 0, &stat);
4725                if (error)
4726                        goto out;
4727        }
4728
4729        while (stat) {
4730                /* Find the record. */
4731                error = xfs_btree_get_rec(cur, &recp, &stat);
4732                if (error || !stat)
4733                        break;
4734
4735                /* Skip if high_key(rec) < low_key. */
4736                if (firstrec) {
4737                        cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
4738                        firstrec = false;
4739                        diff = cur->bc_ops->diff_two_keys(cur, low_key,
4740                                        &rec_key);
4741                        if (diff > 0)
4742                                goto advloop;
4743                }
4744
4745                /* Stop if high_key < low_key(rec). */
4746                cur->bc_ops->init_key_from_rec(&rec_key, recp);
4747                diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key);
4748                if (diff > 0)
4749                        break;
4750
4751                /* Callback */
4752                error = fn(cur, recp, priv);
4753                if (error)
4754                        break;
4755
4756advloop:
4757                /* Move on to the next record. */
4758                error = xfs_btree_increment(cur, 0, &stat);
4759                if (error)
4760                        break;
4761        }
4762
4763out:
4764        return error;
4765}
4766
4767/*
4768 * Query an overlapped interval btree for all records overlapping a given
4769 * interval.  This function roughly follows the algorithm given in
4770 * "Interval Trees" of _Introduction to Algorithms_, which is section
4771 * 14.3 in the 2nd and 3rd editions.
4772 *
4773 * First, generate keys for the low and high records passed in.
4774 *
4775 * For any leaf node, generate the high and low keys for the record.
4776 * If the record keys overlap with the query low/high keys, pass the
4777 * record to the function iterator.
4778 *
4779 * For any internal node, compare the low and high keys of each
4780 * pointer against the query low/high keys.  If there's an overlap,
4781 * follow the pointer.
4782 *
4783 * As an optimization, we stop scanning a block when we find a low key
4784 * that is greater than the query's high key.
4785 */
4786STATIC int
4787xfs_btree_overlapped_query_range(
4788        struct xfs_btree_cur            *cur,
4789        const union xfs_btree_key       *low_key,
4790        const union xfs_btree_key       *high_key,
4791        xfs_btree_query_range_fn        fn,
4792        void                            *priv)
4793{
4794        union xfs_btree_ptr             ptr;
4795        union xfs_btree_ptr             *pp;
4796        union xfs_btree_key             rec_key;
4797        union xfs_btree_key             rec_hkey;
4798        union xfs_btree_key             *lkp;
4799        union xfs_btree_key             *hkp;
4800        union xfs_btree_rec             *recp;
4801        struct xfs_btree_block          *block;
4802        int64_t                         ldiff;
4803        int64_t                         hdiff;
4804        int                             level;
4805        struct xfs_buf                  *bp;
4806        int                             i;
4807        int                             error;
4808
4809        /* Load the root of the btree. */
4810        level = cur->bc_nlevels - 1;
4811        cur->bc_ops->init_ptr_from_cur(cur, &ptr);
4812        error = xfs_btree_lookup_get_block(cur, level, &ptr, &block);
4813        if (error)
4814                return error;
4815        xfs_btree_get_block(cur, level, &bp);
4816        trace_xfs_btree_overlapped_query_range(cur, level, bp);
4817#ifdef DEBUG
4818        error = xfs_btree_check_block(cur, block, level, bp);
4819        if (error)
4820                goto out;
4821#endif
4822        cur->bc_levels[level].ptr = 1;
4823
4824        while (level < cur->bc_nlevels) {
4825                block = xfs_btree_get_block(cur, level, &bp);
4826
4827                /* End of node, pop back towards the root. */
4828                if (cur->bc_levels[level].ptr >
4829                                        be16_to_cpu(block->bb_numrecs)) {
4830pop_up:
4831                        if (level < cur->bc_nlevels - 1)
4832                                cur->bc_levels[level + 1].ptr++;
4833                        level++;
4834                        continue;
4835                }
4836
4837                if (level == 0) {
4838                        /* Handle a leaf node. */
4839                        recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr,
4840                                        block);
4841
4842                        cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp);
4843                        ldiff = cur->bc_ops->diff_two_keys(cur, &rec_hkey,
4844                                        low_key);
4845
4846                        cur->bc_ops->init_key_from_rec(&rec_key, recp);
4847                        hdiff = cur->bc_ops->diff_two_keys(cur, high_key,
4848                                        &rec_key);
4849
4850                        /*
4851                         * If (record's high key >= query's low key) and
4852                         *    (query's high key >= record's low key), then
4853                         * this record overlaps the query range; callback.
4854                         */
4855                        if (ldiff >= 0 && hdiff >= 0) {
4856                                error = fn(cur, recp, priv);
4857                                if (error)
4858                                        break;
4859                        } else if (hdiff < 0) {
4860                                /* Record is larger than high key; pop. */
4861                                goto pop_up;
4862                        }
4863                        cur->bc_levels[level].ptr++;
4864                        continue;
4865                }
4866
4867                /* Handle an internal node. */
4868                lkp = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block);
4869                hkp = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr,
4870                                block);
4871                pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block);
4872
4873                ldiff = cur->bc_ops->diff_two_keys(cur, hkp, low_key);
4874                hdiff = cur->bc_ops->diff_two_keys(cur, high_key, lkp);
4875
4876                /*
4877                 * If (pointer's high key >= query's low key) and
4878                 *    (query's high key >= pointer's low key), then
4879                 * this record overlaps the query range; follow pointer.
4880                 */
4881                if (ldiff >= 0 && hdiff >= 0) {
4882                        level--;
4883                        error = xfs_btree_lookup_get_block(cur, level, pp,
4884                                        &block);
4885                        if (error)
4886                                goto out;
4887                        xfs_btree_get_block(cur, level, &bp);
4888                        trace_xfs_btree_overlapped_query_range(cur, level, bp);
4889#ifdef DEBUG
4890                        error = xfs_btree_check_block(cur, block, level, bp);
4891                        if (error)
4892                                goto out;
4893#endif
4894                        cur->bc_levels[level].ptr = 1;
4895                        continue;
4896                } else if (hdiff < 0) {
4897                        /* The low key is larger than the upper range; pop. */
4898                        goto pop_up;
4899                }
4900                cur->bc_levels[level].ptr++;
4901        }
4902
4903out:
4904        /*
4905         * If we don't end this function with the cursor pointing at a record
4906         * block, a subsequent non-error cursor deletion will not release
4907         * node-level buffers, causing a buffer leak.  This is quite possible
4908         * with a zero-results range query, so release the buffers if we
4909         * failed to return any results.
4910         */
4911        if (cur->bc_levels[0].bp == NULL) {
4912                for (i = 0; i < cur->bc_nlevels; i++) {
4913                        if (cur->bc_levels[i].bp) {
4914                                xfs_trans_brelse(cur->bc_tp,
4915                                                cur->bc_levels[i].bp);
4916                                cur->bc_levels[i].bp = NULL;
4917                                cur->bc_levels[i].ptr = 0;
4918                                cur->bc_levels[i].ra = 0;
4919                        }
4920                }
4921        }
4922
4923        return error;
4924}
4925
4926/*
4927 * Query a btree for all records overlapping a given interval of keys.  The
4928 * supplied function will be called with each record found; return one of the
4929 * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error
4930 * code.  This function returns -ECANCELED, zero, or a negative error code.
4931 */
4932int
4933xfs_btree_query_range(
4934        struct xfs_btree_cur            *cur,
4935        const union xfs_btree_irec      *low_rec,
4936        const union xfs_btree_irec      *high_rec,
4937        xfs_btree_query_range_fn        fn,
4938        void                            *priv)
4939{
4940        union xfs_btree_rec             rec;
4941        union xfs_btree_key             low_key;
4942        union xfs_btree_key             high_key;
4943
4944        /* Find the keys of both ends of the interval. */
4945        cur->bc_rec = *high_rec;
4946        cur->bc_ops->init_rec_from_cur(cur, &rec);
4947        cur->bc_ops->init_key_from_rec(&high_key, &rec);
4948
4949        cur->bc_rec = *low_rec;
4950        cur->bc_ops->init_rec_from_cur(cur, &rec);
4951        cur->bc_ops->init_key_from_rec(&low_key, &rec);
4952
4953        /* Enforce low key < high key. */
4954        if (cur->bc_ops->diff_two_keys(cur, &low_key, &high_key) > 0)
4955                return -EINVAL;
4956
4957        if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
4958                return xfs_btree_simple_query_range(cur, &low_key,
4959                                &high_key, fn, priv);
4960        return xfs_btree_overlapped_query_range(cur, &low_key, &high_key,
4961                        fn, priv);
4962}
4963
4964/* Query a btree for all records. */
4965int
4966xfs_btree_query_all(
4967        struct xfs_btree_cur            *cur,
4968        xfs_btree_query_range_fn        fn,
4969        void                            *priv)
4970{
4971        union xfs_btree_key             low_key;
4972        union xfs_btree_key             high_key;
4973
4974        memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
4975        memset(&low_key, 0, sizeof(low_key));
4976        memset(&high_key, 0xFF, sizeof(high_key));
4977
4978        return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv);
4979}
4980
4981static int
4982xfs_btree_count_blocks_helper(
4983        struct xfs_btree_cur    *cur,
4984        int                     level,
4985        void                    *data)
4986{
4987        xfs_extlen_t            *blocks = data;
4988        (*blocks)++;
4989
4990        return 0;
4991}
4992
4993/* Count the blocks in a btree and return the result in *blocks. */
4994int
4995xfs_btree_count_blocks(
4996        struct xfs_btree_cur    *cur,
4997        xfs_extlen_t            *blocks)
4998{
4999        *blocks = 0;
5000        return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper,
5001                        XFS_BTREE_VISIT_ALL, blocks);
5002}
5003
5004/* Compare two btree pointers. */
5005int64_t
5006xfs_btree_diff_two_ptrs(
5007        struct xfs_btree_cur            *cur,
5008        const union xfs_btree_ptr       *a,
5009        const union xfs_btree_ptr       *b)
5010{
5011        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
5012                return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l);
5013        return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
5014}
5015
5016/* If there's an extent, we're done. */
5017STATIC int
5018xfs_btree_has_record_helper(
5019        struct xfs_btree_cur            *cur,
5020        const union xfs_btree_rec       *rec,
5021        void                            *priv)
5022{
5023        return -ECANCELED;
5024}
5025
5026/* Is there a record covering a given range of keys? */
5027int
5028xfs_btree_has_record(
5029        struct xfs_btree_cur            *cur,
5030        const union xfs_btree_irec      *low,
5031        const union xfs_btree_irec      *high,
5032        bool                            *exists)
5033{
5034        int                             error;
5035
5036        error = xfs_btree_query_range(cur, low, high,
5037                        &xfs_btree_has_record_helper, NULL);
5038        if (error == -ECANCELED) {
5039                *exists = true;
5040                return 0;
5041        }
5042        *exists = false;
5043        return error;
5044}
5045
5046/* Are there more records in this btree? */
5047bool
5048xfs_btree_has_more_records(
5049        struct xfs_btree_cur    *cur)
5050{
5051        struct xfs_btree_block  *block;
5052        struct xfs_buf          *bp;
5053
5054        block = xfs_btree_get_block(cur, 0, &bp);
5055
5056        /* There are still records in this block. */
5057        if (cur->bc_levels[0].ptr < xfs_btree_get_numrecs(block))
5058                return true;
5059
5060        /* There are more record blocks. */
5061        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
5062                return block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK);
5063        else
5064                return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK);
5065}
5066
5067/* Set up all the btree cursor caches. */
5068int __init
5069xfs_btree_init_cur_caches(void)
5070{
5071        int             error;
5072
5073        error = xfs_allocbt_init_cur_cache();
5074        if (error)
5075                return error;
5076        error = xfs_inobt_init_cur_cache();
5077        if (error)
5078                goto err;
5079        error = xfs_bmbt_init_cur_cache();
5080        if (error)
5081                goto err;
5082        error = xfs_rmapbt_init_cur_cache();
5083        if (error)
5084                goto err;
5085        error = xfs_refcountbt_init_cur_cache();
5086        if (error)
5087                goto err;
5088
5089        return 0;
5090err:
5091        xfs_btree_destroy_cur_caches();
5092        return error;
5093}
5094
5095/* Destroy all the btree cursor caches, if they've been allocated. */
5096void
5097xfs_btree_destroy_cur_caches(void)
5098{
5099        xfs_allocbt_destroy_cur_cache();
5100        xfs_inobt_destroy_cur_cache();
5101        xfs_bmbt_destroy_cur_cache();
5102        xfs_rmapbt_destroy_cur_cache();
5103        xfs_refcountbt_destroy_cur_cache();
5104}
5105