linux/fs/xfs/xfs_btree.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_types.h"
  21#include "xfs_bit.h"
  22#include "xfs_log.h"
  23#include "xfs_inum.h"
  24#include "xfs_trans.h"
  25#include "xfs_sb.h"
  26#include "xfs_ag.h"
  27#include "xfs_mount.h"
  28#include "xfs_bmap_btree.h"
  29#include "xfs_alloc_btree.h"
  30#include "xfs_ialloc_btree.h"
  31#include "xfs_dinode.h"
  32#include "xfs_inode.h"
  33#include "xfs_inode_item.h"
  34#include "xfs_btree.h"
  35#include "xfs_error.h"
  36#include "xfs_trace.h"
  37
  38/*
  39 * Cursor allocation zone.
  40 */
  41kmem_zone_t     *xfs_btree_cur_zone;
  42
  43/*
  44 * Btree magic numbers.
  45 */
  46const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
  47        XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
  48};
  49
  50
  51STATIC int                              /* error (0 or EFSCORRUPTED) */
  52xfs_btree_check_lblock(
  53        struct xfs_btree_cur    *cur,   /* btree cursor */
  54        struct xfs_btree_block  *block, /* btree long form block pointer */
  55        int                     level,  /* level of the btree block */
  56        struct xfs_buf          *bp)    /* buffer for block, if any */
  57{
  58        int                     lblock_ok; /* block passes checks */
  59        struct xfs_mount        *mp;    /* file system mount point */
  60
  61        mp = cur->bc_mp;
  62        lblock_ok =
  63                be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
  64                be16_to_cpu(block->bb_level) == level &&
  65                be16_to_cpu(block->bb_numrecs) <=
  66                        cur->bc_ops->get_maxrecs(cur, level) &&
  67                block->bb_u.l.bb_leftsib &&
  68                (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
  69                 XFS_FSB_SANITY_CHECK(mp,
  70                        be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
  71                block->bb_u.l.bb_rightsib &&
  72                (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
  73                 XFS_FSB_SANITY_CHECK(mp,
  74                        be64_to_cpu(block->bb_u.l.bb_rightsib)));
  75        if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
  76                        XFS_ERRTAG_BTREE_CHECK_LBLOCK,
  77                        XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
  78                if (bp)
  79                        trace_xfs_btree_corrupt(bp, _RET_IP_);
  80                XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
  81                                 mp);
  82                return XFS_ERROR(EFSCORRUPTED);
  83        }
  84        return 0;
  85}
  86
  87STATIC int                              /* error (0 or EFSCORRUPTED) */
  88xfs_btree_check_sblock(
  89        struct xfs_btree_cur    *cur,   /* btree cursor */
  90        struct xfs_btree_block  *block, /* btree short form block pointer */
  91        int                     level,  /* level of the btree block */
  92        struct xfs_buf          *bp)    /* buffer containing block */
  93{
  94        struct xfs_buf          *agbp;  /* buffer for ag. freespace struct */
  95        struct xfs_agf          *agf;   /* ag. freespace structure */
  96        xfs_agblock_t           agflen; /* native ag. freespace length */
  97        int                     sblock_ok; /* block passes checks */
  98
  99        agbp = cur->bc_private.a.agbp;
 100        agf = XFS_BUF_TO_AGF(agbp);
 101        agflen = be32_to_cpu(agf->agf_length);
 102        sblock_ok =
 103                be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
 104                be16_to_cpu(block->bb_level) == level &&
 105                be16_to_cpu(block->bb_numrecs) <=
 106                        cur->bc_ops->get_maxrecs(cur, level) &&
 107                (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
 108                 be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
 109                block->bb_u.s.bb_leftsib &&
 110                (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
 111                 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
 112                block->bb_u.s.bb_rightsib;
 113        if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
 114                        XFS_ERRTAG_BTREE_CHECK_SBLOCK,
 115                        XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
 116                if (bp)
 117                        trace_xfs_btree_corrupt(bp, _RET_IP_);
 118                XFS_CORRUPTION_ERROR("xfs_btree_check_sblock",
 119                        XFS_ERRLEVEL_LOW, cur->bc_mp, block);
 120                return XFS_ERROR(EFSCORRUPTED);
 121        }
 122        return 0;
 123}
 124
 125/*
 126 * Debug routine: check that block header is ok.
 127 */
 128int
 129xfs_btree_check_block(
 130        struct xfs_btree_cur    *cur,   /* btree cursor */
 131        struct xfs_btree_block  *block, /* generic btree block pointer */
 132        int                     level,  /* level of the btree block */
 133        struct xfs_buf          *bp)    /* buffer containing block, if any */
 134{
 135        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
 136                return xfs_btree_check_lblock(cur, block, level, bp);
 137        else
 138                return xfs_btree_check_sblock(cur, block, level, bp);
 139}
 140
 141/*
 142 * Check that (long) pointer is ok.
 143 */
 144int                                     /* error (0 or EFSCORRUPTED) */
 145xfs_btree_check_lptr(
 146        struct xfs_btree_cur    *cur,   /* btree cursor */
 147        xfs_dfsbno_t            bno,    /* btree block disk address */
 148        int                     level)  /* btree block level */
 149{
 150        XFS_WANT_CORRUPTED_RETURN(
 151                level > 0 &&
 152                bno != NULLDFSBNO &&
 153                XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
 154        return 0;
 155}
 156
 157#ifdef DEBUG
 158/*
 159 * Check that (short) pointer is ok.
 160 */
 161STATIC int                              /* error (0 or EFSCORRUPTED) */
 162xfs_btree_check_sptr(
 163        struct xfs_btree_cur    *cur,   /* btree cursor */
 164        xfs_agblock_t           bno,    /* btree block disk address */
 165        int                     level)  /* btree block level */
 166{
 167        xfs_agblock_t           agblocks = cur->bc_mp->m_sb.sb_agblocks;
 168
 169        XFS_WANT_CORRUPTED_RETURN(
 170                level > 0 &&
 171                bno != NULLAGBLOCK &&
 172                bno != 0 &&
 173                bno < agblocks);
 174        return 0;
 175}
 176
 177/*
 178 * Check that block ptr is ok.
 179 */
 180STATIC int                              /* error (0 or EFSCORRUPTED) */
 181xfs_btree_check_ptr(
 182        struct xfs_btree_cur    *cur,   /* btree cursor */
 183        union xfs_btree_ptr     *ptr,   /* btree block disk address */
 184        int                     index,  /* offset from ptr to check */
 185        int                     level)  /* btree block level */
 186{
 187        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
 188                return xfs_btree_check_lptr(cur,
 189                                be64_to_cpu((&ptr->l)[index]), level);
 190        } else {
 191                return xfs_btree_check_sptr(cur,
 192                                be32_to_cpu((&ptr->s)[index]), level);
 193        }
 194}
 195#endif
 196
 197/*
 198 * Delete the btree cursor.
 199 */
 200void
 201xfs_btree_del_cursor(
 202        xfs_btree_cur_t *cur,           /* btree cursor */
 203        int             error)          /* del because of error */
 204{
 205        int             i;              /* btree level */
 206
 207        /*
 208         * Clear the buffer pointers, and release the buffers.
 209         * If we're doing this in the face of an error, we
 210         * need to make sure to inspect all of the entries
 211         * in the bc_bufs array for buffers to be unlocked.
 212         * This is because some of the btree code works from
 213         * level n down to 0, and if we get an error along
 214         * the way we won't have initialized all the entries
 215         * down to 0.
 216         */
 217        for (i = 0; i < cur->bc_nlevels; i++) {
 218                if (cur->bc_bufs[i])
 219                        xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
 220                else if (!error)
 221                        break;
 222        }
 223        /*
 224         * Can't free a bmap cursor without having dealt with the
 225         * allocated indirect blocks' accounting.
 226         */
 227        ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
 228               cur->bc_private.b.allocated == 0);
 229        /*
 230         * Free the cursor.
 231         */
 232        kmem_zone_free(xfs_btree_cur_zone, cur);
 233}
 234
 235/*
 236 * Duplicate the btree cursor.
 237 * Allocate a new one, copy the record, re-get the buffers.
 238 */
 239int                                     /* error */
 240xfs_btree_dup_cursor(
 241        xfs_btree_cur_t *cur,           /* input cursor */
 242        xfs_btree_cur_t **ncur)         /* output cursor */
 243{
 244        xfs_buf_t       *bp;            /* btree block's buffer pointer */
 245        int             error;          /* error return value */
 246        int             i;              /* level number of btree block */
 247        xfs_mount_t     *mp;            /* mount structure for filesystem */
 248        xfs_btree_cur_t *new;           /* new cursor value */
 249        xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
 250
 251        tp = cur->bc_tp;
 252        mp = cur->bc_mp;
 253
 254        /*
 255         * Allocate a new cursor like the old one.
 256         */
 257        new = cur->bc_ops->dup_cursor(cur);
 258
 259        /*
 260         * Copy the record currently in the cursor.
 261         */
 262        new->bc_rec = cur->bc_rec;
 263
 264        /*
 265         * For each level current, re-get the buffer and copy the ptr value.
 266         */
 267        for (i = 0; i < new->bc_nlevels; i++) {
 268                new->bc_ptrs[i] = cur->bc_ptrs[i];
 269                new->bc_ra[i] = cur->bc_ra[i];
 270                if ((bp = cur->bc_bufs[i])) {
 271                        if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
 272                                XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) {
 273                                xfs_btree_del_cursor(new, error);
 274                                *ncur = NULL;
 275                                return error;
 276                        }
 277                        new->bc_bufs[i] = bp;
 278                        ASSERT(!xfs_buf_geterror(bp));
 279                } else
 280                        new->bc_bufs[i] = NULL;
 281        }
 282        *ncur = new;
 283        return 0;
 284}
 285
 286/*
 287 * XFS btree block layout and addressing:
 288 *
 289 * There are two types of blocks in the btree: leaf and non-leaf blocks.
 290 *
 291 * The leaf record start with a header then followed by records containing
 292 * the values.  A non-leaf block also starts with the same header, and
 293 * then first contains lookup keys followed by an equal number of pointers
 294 * to the btree blocks at the previous level.
 295 *
 296 *              +--------+-------+-------+-------+-------+-------+-------+
 297 * Leaf:        | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
 298 *              +--------+-------+-------+-------+-------+-------+-------+
 299 *
 300 *              +--------+-------+-------+-------+-------+-------+-------+
 301 * Non-Leaf:    | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
 302 *              +--------+-------+-------+-------+-------+-------+-------+
 303 *
 304 * The header is called struct xfs_btree_block for reasons better left unknown
 305 * and comes in different versions for short (32bit) and long (64bit) block
 306 * pointers.  The record and key structures are defined by the btree instances
 307 * and opaque to the btree core.  The block pointers are simple disk endian
 308 * integers, available in a short (32bit) and long (64bit) variant.
 309 *
 310 * The helpers below calculate the offset of a given record, key or pointer
 311 * into a btree block (xfs_btree_*_offset) or return a pointer to the given
 312 * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
 313 * inside the btree block is done using indices starting at one, not zero!
 314 */
 315
 316/*
 317 * Return size of the btree block header for this btree instance.
 318 */
 319static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
 320{
 321        return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
 322                XFS_BTREE_LBLOCK_LEN :
 323                XFS_BTREE_SBLOCK_LEN;
 324}
 325
 326/*
 327 * Return size of btree block pointers for this btree instance.
 328 */
 329static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
 330{
 331        return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
 332                sizeof(__be64) : sizeof(__be32);
 333}
 334
 335/*
 336 * Calculate offset of the n-th record in a btree block.
 337 */
 338STATIC size_t
 339xfs_btree_rec_offset(
 340        struct xfs_btree_cur    *cur,
 341        int                     n)
 342{
 343        return xfs_btree_block_len(cur) +
 344                (n - 1) * cur->bc_ops->rec_len;
 345}
 346
 347/*
 348 * Calculate offset of the n-th key in a btree block.
 349 */
 350STATIC size_t
 351xfs_btree_key_offset(
 352        struct xfs_btree_cur    *cur,
 353        int                     n)
 354{
 355        return xfs_btree_block_len(cur) +
 356                (n - 1) * cur->bc_ops->key_len;
 357}
 358
 359/*
 360 * Calculate offset of the n-th block pointer in a btree block.
 361 */
 362STATIC size_t
 363xfs_btree_ptr_offset(
 364        struct xfs_btree_cur    *cur,
 365        int                     n,
 366        int                     level)
 367{
 368        return xfs_btree_block_len(cur) +
 369                cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
 370                (n - 1) * xfs_btree_ptr_len(cur);
 371}
 372
 373/*
 374 * Return a pointer to the n-th record in the btree block.
 375 */
 376STATIC union xfs_btree_rec *
 377xfs_btree_rec_addr(
 378        struct xfs_btree_cur    *cur,
 379        int                     n,
 380        struct xfs_btree_block  *block)
 381{
 382        return (union xfs_btree_rec *)
 383                ((char *)block + xfs_btree_rec_offset(cur, n));
 384}
 385
 386/*
 387 * Return a pointer to the n-th key in the btree block.
 388 */
 389STATIC union xfs_btree_key *
 390xfs_btree_key_addr(
 391        struct xfs_btree_cur    *cur,
 392        int                     n,
 393        struct xfs_btree_block  *block)
 394{
 395        return (union xfs_btree_key *)
 396                ((char *)block + xfs_btree_key_offset(cur, n));
 397}
 398
 399/*
 400 * Return a pointer to the n-th block pointer in the btree block.
 401 */
 402STATIC union xfs_btree_ptr *
 403xfs_btree_ptr_addr(
 404        struct xfs_btree_cur    *cur,
 405        int                     n,
 406        struct xfs_btree_block  *block)
 407{
 408        int                     level = xfs_btree_get_level(block);
 409
 410        ASSERT(block->bb_level != 0);
 411
 412        return (union xfs_btree_ptr *)
 413                ((char *)block + xfs_btree_ptr_offset(cur, n, level));
 414}
 415
 416/*
 417 * Get a the root block which is stored in the inode.
 418 *
 419 * For now this btree implementation assumes the btree root is always
 420 * stored in the if_broot field of an inode fork.
 421 */
 422STATIC struct xfs_btree_block *
 423xfs_btree_get_iroot(
 424       struct xfs_btree_cur    *cur)
 425{
 426       struct xfs_ifork        *ifp;
 427
 428       ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
 429       return (struct xfs_btree_block *)ifp->if_broot;
 430}
 431
 432/*
 433 * Retrieve the block pointer from the cursor at the given level.
 434 * This may be an inode btree root or from a buffer.
 435 */
 436STATIC struct xfs_btree_block *         /* generic btree block pointer */
 437xfs_btree_get_block(
 438        struct xfs_btree_cur    *cur,   /* btree cursor */
 439        int                     level,  /* level in btree */
 440        struct xfs_buf          **bpp)  /* buffer containing the block */
 441{
 442        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
 443            (level == cur->bc_nlevels - 1)) {
 444                *bpp = NULL;
 445                return xfs_btree_get_iroot(cur);
 446        }
 447
 448        *bpp = cur->bc_bufs[level];
 449        return XFS_BUF_TO_BLOCK(*bpp);
 450}
 451
 452/*
 453 * Get a buffer for the block, return it with no data read.
 454 * Long-form addressing.
 455 */
 456xfs_buf_t *                             /* buffer for fsbno */
 457xfs_btree_get_bufl(
 458        xfs_mount_t     *mp,            /* file system mount point */
 459        xfs_trans_t     *tp,            /* transaction pointer */
 460        xfs_fsblock_t   fsbno,          /* file system block number */
 461        uint            lock)           /* lock flags for get_buf */
 462{
 463        xfs_buf_t       *bp;            /* buffer pointer (return value) */
 464        xfs_daddr_t             d;              /* real disk block address */
 465
 466        ASSERT(fsbno != NULLFSBLOCK);
 467        d = XFS_FSB_TO_DADDR(mp, fsbno);
 468        bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
 469        ASSERT(!xfs_buf_geterror(bp));
 470        return bp;
 471}
 472
 473/*
 474 * Get a buffer for the block, return it with no data read.
 475 * Short-form addressing.
 476 */
 477xfs_buf_t *                             /* buffer for agno/agbno */
 478xfs_btree_get_bufs(
 479        xfs_mount_t     *mp,            /* file system mount point */
 480        xfs_trans_t     *tp,            /* transaction pointer */
 481        xfs_agnumber_t  agno,           /* allocation group number */
 482        xfs_agblock_t   agbno,          /* allocation group block number */
 483        uint            lock)           /* lock flags for get_buf */
 484{
 485        xfs_buf_t       *bp;            /* buffer pointer (return value) */
 486        xfs_daddr_t             d;              /* real disk block address */
 487
 488        ASSERT(agno != NULLAGNUMBER);
 489        ASSERT(agbno != NULLAGBLOCK);
 490        d = XFS_AGB_TO_DADDR(mp, agno, agbno);
 491        bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
 492        ASSERT(!xfs_buf_geterror(bp));
 493        return bp;
 494}
 495
 496/*
 497 * Check for the cursor referring to the last block at the given level.
 498 */
 499int                                     /* 1=is last block, 0=not last block */
 500xfs_btree_islastblock(
 501        xfs_btree_cur_t         *cur,   /* btree cursor */
 502        int                     level)  /* level to check */
 503{
 504        struct xfs_btree_block  *block; /* generic btree block pointer */
 505        xfs_buf_t               *bp;    /* buffer containing block */
 506
 507        block = xfs_btree_get_block(cur, level, &bp);
 508        xfs_btree_check_block(cur, block, level, bp);
 509        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
 510                return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO);
 511        else
 512                return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
 513}
 514
 515/*
 516 * Change the cursor to point to the first record at the given level.
 517 * Other levels are unaffected.
 518 */
 519STATIC int                              /* success=1, failure=0 */
 520xfs_btree_firstrec(
 521        xfs_btree_cur_t         *cur,   /* btree cursor */
 522        int                     level)  /* level to change */
 523{
 524        struct xfs_btree_block  *block; /* generic btree block pointer */
 525        xfs_buf_t               *bp;    /* buffer containing block */
 526
 527        /*
 528         * Get the block pointer for this level.
 529         */
 530        block = xfs_btree_get_block(cur, level, &bp);
 531        xfs_btree_check_block(cur, block, level, bp);
 532        /*
 533         * It's empty, there is no such record.
 534         */
 535        if (!block->bb_numrecs)
 536                return 0;
 537        /*
 538         * Set the ptr value to 1, that's the first record/key.
 539         */
 540        cur->bc_ptrs[level] = 1;
 541        return 1;
 542}
 543
 544/*
 545 * Change the cursor to point to the last record in the current block
 546 * at the given level.  Other levels are unaffected.
 547 */
 548STATIC int                              /* success=1, failure=0 */
 549xfs_btree_lastrec(
 550        xfs_btree_cur_t         *cur,   /* btree cursor */
 551        int                     level)  /* level to change */
 552{
 553        struct xfs_btree_block  *block; /* generic btree block pointer */
 554        xfs_buf_t               *bp;    /* buffer containing block */
 555
 556        /*
 557         * Get the block pointer for this level.
 558         */
 559        block = xfs_btree_get_block(cur, level, &bp);
 560        xfs_btree_check_block(cur, block, level, bp);
 561        /*
 562         * It's empty, there is no such record.
 563         */
 564        if (!block->bb_numrecs)
 565                return 0;
 566        /*
 567         * Set the ptr value to numrecs, that's the last record/key.
 568         */
 569        cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
 570        return 1;
 571}
 572
 573/*
 574 * Compute first and last byte offsets for the fields given.
 575 * Interprets the offsets table, which contains struct field offsets.
 576 */
 577void
 578xfs_btree_offsets(
 579        __int64_t       fields,         /* bitmask of fields */
 580        const short     *offsets,       /* table of field offsets */
 581        int             nbits,          /* number of bits to inspect */
 582        int             *first,         /* output: first byte offset */
 583        int             *last)          /* output: last byte offset */
 584{
 585        int             i;              /* current bit number */
 586        __int64_t       imask;          /* mask for current bit number */
 587
 588        ASSERT(fields != 0);
 589        /*
 590         * Find the lowest bit, so the first byte offset.
 591         */
 592        for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
 593                if (imask & fields) {
 594                        *first = offsets[i];
 595                        break;
 596                }
 597        }
 598        /*
 599         * Find the highest bit, so the last byte offset.
 600         */
 601        for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
 602                if (imask & fields) {
 603                        *last = offsets[i + 1] - 1;
 604                        break;
 605                }
 606        }
 607}
 608
 609/*
 610 * Get a buffer for the block, return it read in.
 611 * Long-form addressing.
 612 */
 613int                                     /* error */
 614xfs_btree_read_bufl(
 615        xfs_mount_t     *mp,            /* file system mount point */
 616        xfs_trans_t     *tp,            /* transaction pointer */
 617        xfs_fsblock_t   fsbno,          /* file system block number */
 618        uint            lock,           /* lock flags for read_buf */
 619        xfs_buf_t       **bpp,          /* buffer for fsbno */
 620        int             refval)         /* ref count value for buffer */
 621{
 622        xfs_buf_t       *bp;            /* return value */
 623        xfs_daddr_t             d;              /* real disk block address */
 624        int             error;
 625
 626        ASSERT(fsbno != NULLFSBLOCK);
 627        d = XFS_FSB_TO_DADDR(mp, fsbno);
 628        if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
 629                        mp->m_bsize, lock, &bp))) {
 630                return error;
 631        }
 632        ASSERT(!xfs_buf_geterror(bp));
 633        if (bp)
 634                xfs_buf_set_ref(bp, refval);
 635        *bpp = bp;
 636        return 0;
 637}
 638
 639/*
 640 * Read-ahead the block, don't wait for it, don't return a buffer.
 641 * Long-form addressing.
 642 */
 643/* ARGSUSED */
 644void
 645xfs_btree_reada_bufl(
 646        xfs_mount_t     *mp,            /* file system mount point */
 647        xfs_fsblock_t   fsbno,          /* file system block number */
 648        xfs_extlen_t    count)          /* count of filesystem blocks */
 649{
 650        xfs_daddr_t             d;
 651
 652        ASSERT(fsbno != NULLFSBLOCK);
 653        d = XFS_FSB_TO_DADDR(mp, fsbno);
 654        xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count);
 655}
 656
 657/*
 658 * Read-ahead the block, don't wait for it, don't return a buffer.
 659 * Short-form addressing.
 660 */
 661/* ARGSUSED */
 662void
 663xfs_btree_reada_bufs(
 664        xfs_mount_t     *mp,            /* file system mount point */
 665        xfs_agnumber_t  agno,           /* allocation group number */
 666        xfs_agblock_t   agbno,          /* allocation group block number */
 667        xfs_extlen_t    count)          /* count of filesystem blocks */
 668{
 669        xfs_daddr_t             d;
 670
 671        ASSERT(agno != NULLAGNUMBER);
 672        ASSERT(agbno != NULLAGBLOCK);
 673        d = XFS_AGB_TO_DADDR(mp, agno, agbno);
 674        xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count);
 675}
 676
 677STATIC int
 678xfs_btree_readahead_lblock(
 679        struct xfs_btree_cur    *cur,
 680        int                     lr,
 681        struct xfs_btree_block  *block)
 682{
 683        int                     rval = 0;
 684        xfs_dfsbno_t            left = be64_to_cpu(block->bb_u.l.bb_leftsib);
 685        xfs_dfsbno_t            right = be64_to_cpu(block->bb_u.l.bb_rightsib);
 686
 687        if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
 688                xfs_btree_reada_bufl(cur->bc_mp, left, 1);
 689                rval++;
 690        }
 691
 692        if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
 693                xfs_btree_reada_bufl(cur->bc_mp, right, 1);
 694                rval++;
 695        }
 696
 697        return rval;
 698}
 699
 700STATIC int
 701xfs_btree_readahead_sblock(
 702        struct xfs_btree_cur    *cur,
 703        int                     lr,
 704        struct xfs_btree_block *block)
 705{
 706        int                     rval = 0;
 707        xfs_agblock_t           left = be32_to_cpu(block->bb_u.s.bb_leftsib);
 708        xfs_agblock_t           right = be32_to_cpu(block->bb_u.s.bb_rightsib);
 709
 710
 711        if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
 712                xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
 713                                     left, 1);
 714                rval++;
 715        }
 716
 717        if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
 718                xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
 719                                     right, 1);
 720                rval++;
 721        }
 722
 723        return rval;
 724}
 725
 726/*
 727 * Read-ahead btree blocks, at the given level.
 728 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
 729 */
 730STATIC int
 731xfs_btree_readahead(
 732        struct xfs_btree_cur    *cur,           /* btree cursor */
 733        int                     lev,            /* level in btree */
 734        int                     lr)             /* left/right bits */
 735{
 736        struct xfs_btree_block  *block;
 737
 738        /*
 739         * No readahead needed if we are at the root level and the
 740         * btree root is stored in the inode.
 741         */
 742        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
 743            (lev == cur->bc_nlevels - 1))
 744                return 0;
 745
 746        if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
 747                return 0;
 748
 749        cur->bc_ra[lev] |= lr;
 750        block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
 751
 752        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
 753                return xfs_btree_readahead_lblock(cur, lr, block);
 754        return xfs_btree_readahead_sblock(cur, lr, block);
 755}
 756
 757/*
 758 * Set the buffer for level "lev" in the cursor to bp, releasing
 759 * any previous buffer.
 760 */
 761STATIC void
 762xfs_btree_setbuf(
 763        xfs_btree_cur_t         *cur,   /* btree cursor */
 764        int                     lev,    /* level in btree */
 765        xfs_buf_t               *bp)    /* new buffer to set */
 766{
 767        struct xfs_btree_block  *b;     /* btree block */
 768
 769        if (cur->bc_bufs[lev])
 770                xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]);
 771        cur->bc_bufs[lev] = bp;
 772        cur->bc_ra[lev] = 0;
 773
 774        b = XFS_BUF_TO_BLOCK(bp);
 775        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
 776                if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO))
 777                        cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
 778                if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO))
 779                        cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
 780        } else {
 781                if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
 782                        cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
 783                if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
 784                        cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
 785        }
 786}
 787
 788STATIC int
 789xfs_btree_ptr_is_null(
 790        struct xfs_btree_cur    *cur,
 791        union xfs_btree_ptr     *ptr)
 792{
 793        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
 794                return ptr->l == cpu_to_be64(NULLDFSBNO);
 795        else
 796                return ptr->s == cpu_to_be32(NULLAGBLOCK);
 797}
 798
 799STATIC void
 800xfs_btree_set_ptr_null(
 801        struct xfs_btree_cur    *cur,
 802        union xfs_btree_ptr     *ptr)
 803{
 804        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
 805                ptr->l = cpu_to_be64(NULLDFSBNO);
 806        else
 807                ptr->s = cpu_to_be32(NULLAGBLOCK);
 808}
 809
 810/*
 811 * Get/set/init sibling pointers
 812 */
 813STATIC void
 814xfs_btree_get_sibling(
 815        struct xfs_btree_cur    *cur,
 816        struct xfs_btree_block  *block,
 817        union xfs_btree_ptr     *ptr,
 818        int                     lr)
 819{
 820        ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
 821
 822        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
 823                if (lr == XFS_BB_RIGHTSIB)
 824                        ptr->l = block->bb_u.l.bb_rightsib;
 825                else
 826                        ptr->l = block->bb_u.l.bb_leftsib;
 827        } else {
 828                if (lr == XFS_BB_RIGHTSIB)
 829                        ptr->s = block->bb_u.s.bb_rightsib;
 830                else
 831                        ptr->s = block->bb_u.s.bb_leftsib;
 832        }
 833}
 834
 835STATIC void
 836xfs_btree_set_sibling(
 837        struct xfs_btree_cur    *cur,
 838        struct xfs_btree_block  *block,
 839        union xfs_btree_ptr     *ptr,
 840        int                     lr)
 841{
 842        ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
 843
 844        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
 845                if (lr == XFS_BB_RIGHTSIB)
 846                        block->bb_u.l.bb_rightsib = ptr->l;
 847                else
 848                        block->bb_u.l.bb_leftsib = ptr->l;
 849        } else {
 850                if (lr == XFS_BB_RIGHTSIB)
 851                        block->bb_u.s.bb_rightsib = ptr->s;
 852                else
 853                        block->bb_u.s.bb_leftsib = ptr->s;
 854        }
 855}
 856
 857STATIC void
 858xfs_btree_init_block(
 859        struct xfs_btree_cur    *cur,
 860        int                     level,
 861        int                     numrecs,
 862        struct xfs_btree_block  *new)   /* new block */
 863{
 864        new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
 865        new->bb_level = cpu_to_be16(level);
 866        new->bb_numrecs = cpu_to_be16(numrecs);
 867
 868        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
 869                new->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
 870                new->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
 871        } else {
 872                new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
 873                new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
 874        }
 875}
 876
 877/*
 878 * Return true if ptr is the last record in the btree and
 879 * we need to track updateѕ to this record.  The decision
 880 * will be further refined in the update_lastrec method.
 881 */
 882STATIC int
 883xfs_btree_is_lastrec(
 884        struct xfs_btree_cur    *cur,
 885        struct xfs_btree_block  *block,
 886        int                     level)
 887{
 888        union xfs_btree_ptr     ptr;
 889
 890        if (level > 0)
 891                return 0;
 892        if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
 893                return 0;
 894
 895        xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
 896        if (!xfs_btree_ptr_is_null(cur, &ptr))
 897                return 0;
 898        return 1;
 899}
 900
 901STATIC void
 902xfs_btree_buf_to_ptr(
 903        struct xfs_btree_cur    *cur,
 904        struct xfs_buf          *bp,
 905        union xfs_btree_ptr     *ptr)
 906{
 907        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
 908                ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
 909                                        XFS_BUF_ADDR(bp)));
 910        else {
 911                ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
 912                                        XFS_BUF_ADDR(bp)));
 913        }
 914}
 915
 916STATIC xfs_daddr_t
 917xfs_btree_ptr_to_daddr(
 918        struct xfs_btree_cur    *cur,
 919        union xfs_btree_ptr     *ptr)
 920{
 921        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
 922                ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
 923
 924                return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
 925        } else {
 926                ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
 927                ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
 928
 929                return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
 930                                        be32_to_cpu(ptr->s));
 931        }
 932}
 933
 934STATIC void
 935xfs_btree_set_refs(
 936        struct xfs_btree_cur    *cur,
 937        struct xfs_buf          *bp)
 938{
 939        switch (cur->bc_btnum) {
 940        case XFS_BTNUM_BNO:
 941        case XFS_BTNUM_CNT:
 942                xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
 943                break;
 944        case XFS_BTNUM_INO:
 945                xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
 946                break;
 947        case XFS_BTNUM_BMAP:
 948                xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
 949                break;
 950        default:
 951                ASSERT(0);
 952        }
 953}
 954
 955STATIC int
 956xfs_btree_get_buf_block(
 957        struct xfs_btree_cur    *cur,
 958        union xfs_btree_ptr     *ptr,
 959        int                     flags,
 960        struct xfs_btree_block  **block,
 961        struct xfs_buf          **bpp)
 962{
 963        struct xfs_mount        *mp = cur->bc_mp;
 964        xfs_daddr_t             d;
 965
 966        /* need to sort out how callers deal with failures first */
 967        ASSERT(!(flags & XBF_TRYLOCK));
 968
 969        d = xfs_btree_ptr_to_daddr(cur, ptr);
 970        *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
 971                                 mp->m_bsize, flags);
 972
 973        if (!*bpp)
 974                return ENOMEM;
 975
 976        *block = XFS_BUF_TO_BLOCK(*bpp);
 977        return 0;
 978}
 979
 980/*
 981 * Read in the buffer at the given ptr and return the buffer and
 982 * the block pointer within the buffer.
 983 */
 984STATIC int
 985xfs_btree_read_buf_block(
 986        struct xfs_btree_cur    *cur,
 987        union xfs_btree_ptr     *ptr,
 988        int                     level,
 989        int                     flags,
 990        struct xfs_btree_block  **block,
 991        struct xfs_buf          **bpp)
 992{
 993        struct xfs_mount        *mp = cur->bc_mp;
 994        xfs_daddr_t             d;
 995        int                     error;
 996
 997        /* need to sort out how callers deal with failures first */
 998        ASSERT(!(flags & XBF_TRYLOCK));
 999
1000        d = xfs_btree_ptr_to_daddr(cur, ptr);
1001        error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
1002                                   mp->m_bsize, flags, bpp);
1003        if (error)
1004                return error;
1005
1006        ASSERT(!xfs_buf_geterror(*bpp));
1007
1008        xfs_btree_set_refs(cur, *bpp);
1009        *block = XFS_BUF_TO_BLOCK(*bpp);
1010
1011        error = xfs_btree_check_block(cur, *block, level, *bpp);
1012        if (error)
1013                xfs_trans_brelse(cur->bc_tp, *bpp);
1014        return error;
1015}
1016
1017/*
1018 * Copy keys from one btree block to another.
1019 */
1020STATIC void
1021xfs_btree_copy_keys(
1022        struct xfs_btree_cur    *cur,
1023        union xfs_btree_key     *dst_key,
1024        union xfs_btree_key     *src_key,
1025        int                     numkeys)
1026{
1027        ASSERT(numkeys >= 0);
1028        memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1029}
1030
1031/*
1032 * Copy records from one btree block to another.
1033 */
1034STATIC void
1035xfs_btree_copy_recs(
1036        struct xfs_btree_cur    *cur,
1037        union xfs_btree_rec     *dst_rec,
1038        union xfs_btree_rec     *src_rec,
1039        int                     numrecs)
1040{
1041        ASSERT(numrecs >= 0);
1042        memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1043}
1044
1045/*
1046 * Copy block pointers from one btree block to another.
1047 */
1048STATIC void
1049xfs_btree_copy_ptrs(
1050        struct xfs_btree_cur    *cur,
1051        union xfs_btree_ptr     *dst_ptr,
1052        union xfs_btree_ptr     *src_ptr,
1053        int                     numptrs)
1054{
1055        ASSERT(numptrs >= 0);
1056        memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1057}
1058
1059/*
1060 * Shift keys one index left/right inside a single btree block.
1061 */
1062STATIC void
1063xfs_btree_shift_keys(
1064        struct xfs_btree_cur    *cur,
1065        union xfs_btree_key     *key,
1066        int                     dir,
1067        int                     numkeys)
1068{
1069        char                    *dst_key;
1070
1071        ASSERT(numkeys >= 0);
1072        ASSERT(dir == 1 || dir == -1);
1073
1074        dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1075        memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1076}
1077
1078/*
1079 * Shift records one index left/right inside a single btree block.
1080 */
1081STATIC void
1082xfs_btree_shift_recs(
1083        struct xfs_btree_cur    *cur,
1084        union xfs_btree_rec     *rec,
1085        int                     dir,
1086        int                     numrecs)
1087{
1088        char                    *dst_rec;
1089
1090        ASSERT(numrecs >= 0);
1091        ASSERT(dir == 1 || dir == -1);
1092
1093        dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1094        memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1095}
1096
1097/*
1098 * Shift block pointers one index left/right inside a single btree block.
1099 */
1100STATIC void
1101xfs_btree_shift_ptrs(
1102        struct xfs_btree_cur    *cur,
1103        union xfs_btree_ptr     *ptr,
1104        int                     dir,
1105        int                     numptrs)
1106{
1107        char                    *dst_ptr;
1108
1109        ASSERT(numptrs >= 0);
1110        ASSERT(dir == 1 || dir == -1);
1111
1112        dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1113        memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1114}
1115
1116/*
1117 * Log key values from the btree block.
1118 */
1119STATIC void
1120xfs_btree_log_keys(
1121        struct xfs_btree_cur    *cur,
1122        struct xfs_buf          *bp,
1123        int                     first,
1124        int                     last)
1125{
1126        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1127        XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1128
1129        if (bp) {
1130                xfs_trans_log_buf(cur->bc_tp, bp,
1131                                  xfs_btree_key_offset(cur, first),
1132                                  xfs_btree_key_offset(cur, last + 1) - 1);
1133        } else {
1134                xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1135                                xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1136        }
1137
1138        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1139}
1140
1141/*
1142 * Log record values from the btree block.
1143 */
1144void
1145xfs_btree_log_recs(
1146        struct xfs_btree_cur    *cur,
1147        struct xfs_buf          *bp,
1148        int                     first,
1149        int                     last)
1150{
1151        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1152        XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1153
1154        xfs_trans_log_buf(cur->bc_tp, bp,
1155                          xfs_btree_rec_offset(cur, first),
1156                          xfs_btree_rec_offset(cur, last + 1) - 1);
1157
1158        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1159}
1160
1161/*
1162 * Log block pointer fields from a btree block (nonleaf).
1163 */
1164STATIC void
1165xfs_btree_log_ptrs(
1166        struct xfs_btree_cur    *cur,   /* btree cursor */
1167        struct xfs_buf          *bp,    /* buffer containing btree block */
1168        int                     first,  /* index of first pointer to log */
1169        int                     last)   /* index of last pointer to log */
1170{
1171        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1172        XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1173
1174        if (bp) {
1175                struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
1176                int                     level = xfs_btree_get_level(block);
1177
1178                xfs_trans_log_buf(cur->bc_tp, bp,
1179                                xfs_btree_ptr_offset(cur, first, level),
1180                                xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1181        } else {
1182                xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1183                        xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1184        }
1185
1186        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1187}
1188
1189/*
1190 * Log fields from a btree block header.
1191 */
1192void
1193xfs_btree_log_block(
1194        struct xfs_btree_cur    *cur,   /* btree cursor */
1195        struct xfs_buf          *bp,    /* buffer containing btree block */
1196        int                     fields) /* mask of fields: XFS_BB_... */
1197{
1198        int                     first;  /* first byte offset logged */
1199        int                     last;   /* last byte offset logged */
1200        static const short      soffsets[] = {  /* table of offsets (short) */
1201                offsetof(struct xfs_btree_block, bb_magic),
1202                offsetof(struct xfs_btree_block, bb_level),
1203                offsetof(struct xfs_btree_block, bb_numrecs),
1204                offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1205                offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
1206                XFS_BTREE_SBLOCK_LEN
1207        };
1208        static const short      loffsets[] = {  /* table of offsets (long) */
1209                offsetof(struct xfs_btree_block, bb_magic),
1210                offsetof(struct xfs_btree_block, bb_level),
1211                offsetof(struct xfs_btree_block, bb_numrecs),
1212                offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1213                offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
1214                XFS_BTREE_LBLOCK_LEN
1215        };
1216
1217        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1218        XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
1219
1220        if (bp) {
1221                xfs_btree_offsets(fields,
1222                                  (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1223                                        loffsets : soffsets,
1224                                  XFS_BB_NUM_BITS, &first, &last);
1225                xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1226        } else {
1227                xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1228                        xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1229        }
1230
1231        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1232}
1233
1234/*
1235 * Increment cursor by one record at the level.
1236 * For nonzero levels the leaf-ward information is untouched.
1237 */
1238int                                             /* error */
1239xfs_btree_increment(
1240        struct xfs_btree_cur    *cur,
1241        int                     level,
1242        int                     *stat)          /* success/failure */
1243{
1244        struct xfs_btree_block  *block;
1245        union xfs_btree_ptr     ptr;
1246        struct xfs_buf          *bp;
1247        int                     error;          /* error return value */
1248        int                     lev;
1249
1250        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1251        XFS_BTREE_TRACE_ARGI(cur, level);
1252
1253        ASSERT(level < cur->bc_nlevels);
1254
1255        /* Read-ahead to the right at this level. */
1256        xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1257
1258        /* Get a pointer to the btree block. */
1259        block = xfs_btree_get_block(cur, level, &bp);
1260
1261#ifdef DEBUG
1262        error = xfs_btree_check_block(cur, block, level, bp);
1263        if (error)
1264                goto error0;
1265#endif
1266
1267        /* We're done if we remain in the block after the increment. */
1268        if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1269                goto out1;
1270
1271        /* Fail if we just went off the right edge of the tree. */
1272        xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1273        if (xfs_btree_ptr_is_null(cur, &ptr))
1274                goto out0;
1275
1276        XFS_BTREE_STATS_INC(cur, increment);
1277
1278        /*
1279         * March up the tree incrementing pointers.
1280         * Stop when we don't go off the right edge of a block.
1281         */
1282        for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1283                block = xfs_btree_get_block(cur, lev, &bp);
1284
1285#ifdef DEBUG
1286                error = xfs_btree_check_block(cur, block, lev, bp);
1287                if (error)
1288                        goto error0;
1289#endif
1290
1291                if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1292                        break;
1293
1294                /* Read-ahead the right block for the next loop. */
1295                xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1296        }
1297
1298        /*
1299         * If we went off the root then we are either seriously
1300         * confused or have the tree root in an inode.
1301         */
1302        if (lev == cur->bc_nlevels) {
1303                if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1304                        goto out0;
1305                ASSERT(0);
1306                error = EFSCORRUPTED;
1307                goto error0;
1308        }
1309        ASSERT(lev < cur->bc_nlevels);
1310
1311        /*
1312         * Now walk back down the tree, fixing up the cursor's buffer
1313         * pointers and key numbers.
1314         */
1315        for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1316                union xfs_btree_ptr     *ptrp;
1317
1318                ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1319                error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1320                                                        0, &block, &bp);
1321                if (error)
1322                        goto error0;
1323
1324                xfs_btree_setbuf(cur, lev, bp);
1325                cur->bc_ptrs[lev] = 1;
1326        }
1327out1:
1328        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1329        *stat = 1;
1330        return 0;
1331
1332out0:
1333        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1334        *stat = 0;
1335        return 0;
1336
1337error0:
1338        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1339        return error;
1340}
1341
1342/*
1343 * Decrement cursor by one record at the level.
1344 * For nonzero levels the leaf-ward information is untouched.
1345 */
1346int                                             /* error */
1347xfs_btree_decrement(
1348        struct xfs_btree_cur    *cur,
1349        int                     level,
1350        int                     *stat)          /* success/failure */
1351{
1352        struct xfs_btree_block  *block;
1353        xfs_buf_t               *bp;
1354        int                     error;          /* error return value */
1355        int                     lev;
1356        union xfs_btree_ptr     ptr;
1357
1358        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1359        XFS_BTREE_TRACE_ARGI(cur, level);
1360
1361        ASSERT(level < cur->bc_nlevels);
1362
1363        /* Read-ahead to the left at this level. */
1364        xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1365
1366        /* We're done if we remain in the block after the decrement. */
1367        if (--cur->bc_ptrs[level] > 0)
1368                goto out1;
1369
1370        /* Get a pointer to the btree block. */
1371        block = xfs_btree_get_block(cur, level, &bp);
1372
1373#ifdef DEBUG
1374        error = xfs_btree_check_block(cur, block, level, bp);
1375        if (error)
1376                goto error0;
1377#endif
1378
1379        /* Fail if we just went off the left edge of the tree. */
1380        xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1381        if (xfs_btree_ptr_is_null(cur, &ptr))
1382                goto out0;
1383
1384        XFS_BTREE_STATS_INC(cur, decrement);
1385
1386        /*
1387         * March up the tree decrementing pointers.
1388         * Stop when we don't go off the left edge of a block.
1389         */
1390        for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1391                if (--cur->bc_ptrs[lev] > 0)
1392                        break;
1393                /* Read-ahead the left block for the next loop. */
1394                xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1395        }
1396
1397        /*
1398         * If we went off the root then we are seriously confused.
1399         * or the root of the tree is in an inode.
1400         */
1401        if (lev == cur->bc_nlevels) {
1402                if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1403                        goto out0;
1404                ASSERT(0);
1405                error = EFSCORRUPTED;
1406                goto error0;
1407        }
1408        ASSERT(lev < cur->bc_nlevels);
1409
1410        /*
1411         * Now walk back down the tree, fixing up the cursor's buffer
1412         * pointers and key numbers.
1413         */
1414        for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1415                union xfs_btree_ptr     *ptrp;
1416
1417                ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1418                error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1419                                                        0, &block, &bp);
1420                if (error)
1421                        goto error0;
1422                xfs_btree_setbuf(cur, lev, bp);
1423                cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1424        }
1425out1:
1426        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1427        *stat = 1;
1428        return 0;
1429
1430out0:
1431        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1432        *stat = 0;
1433        return 0;
1434
1435error0:
1436        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1437        return error;
1438}
1439
1440STATIC int
1441xfs_btree_lookup_get_block(
1442        struct xfs_btree_cur    *cur,   /* btree cursor */
1443        int                     level,  /* level in the btree */
1444        union xfs_btree_ptr     *pp,    /* ptr to btree block */
1445        struct xfs_btree_block  **blkp) /* return btree block */
1446{
1447        struct xfs_buf          *bp;    /* buffer pointer for btree block */
1448        int                     error = 0;
1449
1450        /* special case the root block if in an inode */
1451        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1452            (level == cur->bc_nlevels - 1)) {
1453                *blkp = xfs_btree_get_iroot(cur);
1454                return 0;
1455        }
1456
1457        /*
1458         * If the old buffer at this level for the disk address we are
1459         * looking for re-use it.
1460         *
1461         * Otherwise throw it away and get a new one.
1462         */
1463        bp = cur->bc_bufs[level];
1464        if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1465                *blkp = XFS_BUF_TO_BLOCK(bp);
1466                return 0;
1467        }
1468
1469        error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp);
1470        if (error)
1471                return error;
1472
1473        xfs_btree_setbuf(cur, level, bp);
1474        return 0;
1475}
1476
1477/*
1478 * Get current search key.  For level 0 we don't actually have a key
1479 * structure so we make one up from the record.  For all other levels
1480 * we just return the right key.
1481 */
1482STATIC union xfs_btree_key *
1483xfs_lookup_get_search_key(
1484        struct xfs_btree_cur    *cur,
1485        int                     level,
1486        int                     keyno,
1487        struct xfs_btree_block  *block,
1488        union xfs_btree_key     *kp)
1489{
1490        if (level == 0) {
1491                cur->bc_ops->init_key_from_rec(kp,
1492                                xfs_btree_rec_addr(cur, keyno, block));
1493                return kp;
1494        }
1495
1496        return xfs_btree_key_addr(cur, keyno, block);
1497}
1498
1499/*
1500 * Lookup the record.  The cursor is made to point to it, based on dir.
1501 * Return 0 if can't find any such record, 1 for success.
1502 */
1503int                                     /* error */
1504xfs_btree_lookup(
1505        struct xfs_btree_cur    *cur,   /* btree cursor */
1506        xfs_lookup_t            dir,    /* <=, ==, or >= */
1507        int                     *stat)  /* success/failure */
1508{
1509        struct xfs_btree_block  *block; /* current btree block */
1510        __int64_t               diff;   /* difference for the current key */
1511        int                     error;  /* error return value */
1512        int                     keyno;  /* current key number */
1513        int                     level;  /* level in the btree */
1514        union xfs_btree_ptr     *pp;    /* ptr to btree block */
1515        union xfs_btree_ptr     ptr;    /* ptr to btree block */
1516
1517        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1518        XFS_BTREE_TRACE_ARGI(cur, dir);
1519
1520        XFS_BTREE_STATS_INC(cur, lookup);
1521
1522        block = NULL;
1523        keyno = 0;
1524
1525        /* initialise start pointer from cursor */
1526        cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1527        pp = &ptr;
1528
1529        /*
1530         * Iterate over each level in the btree, starting at the root.
1531         * For each level above the leaves, find the key we need, based
1532         * on the lookup record, then follow the corresponding block
1533         * pointer down to the next level.
1534         */
1535        for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1536                /* Get the block we need to do the lookup on. */
1537                error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1538                if (error)
1539                        goto error0;
1540
1541                if (diff == 0) {
1542                        /*
1543                         * If we already had a key match at a higher level, we
1544                         * know we need to use the first entry in this block.
1545                         */
1546                        keyno = 1;
1547                } else {
1548                        /* Otherwise search this block. Do a binary search. */
1549
1550                        int     high;   /* high entry number */
1551                        int     low;    /* low entry number */
1552
1553                        /* Set low and high entry numbers, 1-based. */
1554                        low = 1;
1555                        high = xfs_btree_get_numrecs(block);
1556                        if (!high) {
1557                                /* Block is empty, must be an empty leaf. */
1558                                ASSERT(level == 0 && cur->bc_nlevels == 1);
1559
1560                                cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1561                                XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1562                                *stat = 0;
1563                                return 0;
1564                        }
1565
1566                        /* Binary search the block. */
1567                        while (low <= high) {
1568                                union xfs_btree_key     key;
1569                                union xfs_btree_key     *kp;
1570
1571                                XFS_BTREE_STATS_INC(cur, compare);
1572
1573                                /* keyno is average of low and high. */
1574                                keyno = (low + high) >> 1;
1575
1576                                /* Get current search key */
1577                                kp = xfs_lookup_get_search_key(cur, level,
1578                                                keyno, block, &key);
1579
1580                                /*
1581                                 * Compute difference to get next direction:
1582                                 *  - less than, move right
1583                                 *  - greater than, move left
1584                                 *  - equal, we're done
1585                                 */
1586                                diff = cur->bc_ops->key_diff(cur, kp);
1587                                if (diff < 0)
1588                                        low = keyno + 1;
1589                                else if (diff > 0)
1590                                        high = keyno - 1;
1591                                else
1592                                        break;
1593                        }
1594                }
1595
1596                /*
1597                 * If there are more levels, set up for the next level
1598                 * by getting the block number and filling in the cursor.
1599                 */
1600                if (level > 0) {
1601                        /*
1602                         * If we moved left, need the previous key number,
1603                         * unless there isn't one.
1604                         */
1605                        if (diff > 0 && --keyno < 1)
1606                                keyno = 1;
1607                        pp = xfs_btree_ptr_addr(cur, keyno, block);
1608
1609#ifdef DEBUG
1610                        error = xfs_btree_check_ptr(cur, pp, 0, level);
1611                        if (error)
1612                                goto error0;
1613#endif
1614                        cur->bc_ptrs[level] = keyno;
1615                }
1616        }
1617
1618        /* Done with the search. See if we need to adjust the results. */
1619        if (dir != XFS_LOOKUP_LE && diff < 0) {
1620                keyno++;
1621                /*
1622                 * If ge search and we went off the end of the block, but it's
1623                 * not the last block, we're in the wrong block.
1624                 */
1625                xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1626                if (dir == XFS_LOOKUP_GE &&
1627                    keyno > xfs_btree_get_numrecs(block) &&
1628                    !xfs_btree_ptr_is_null(cur, &ptr)) {
1629                        int     i;
1630
1631                        cur->bc_ptrs[0] = keyno;
1632                        error = xfs_btree_increment(cur, 0, &i);
1633                        if (error)
1634                                goto error0;
1635                        XFS_WANT_CORRUPTED_RETURN(i == 1);
1636                        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1637                        *stat = 1;
1638                        return 0;
1639                }
1640        } else if (dir == XFS_LOOKUP_LE && diff > 0)
1641                keyno--;
1642        cur->bc_ptrs[0] = keyno;
1643
1644        /* Return if we succeeded or not. */
1645        if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
1646                *stat = 0;
1647        else if (dir != XFS_LOOKUP_EQ || diff == 0)
1648                *stat = 1;
1649        else
1650                *stat = 0;
1651        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1652        return 0;
1653
1654error0:
1655        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1656        return error;
1657}
1658
1659/*
1660 * Update keys at all levels from here to the root along the cursor's path.
1661 */
1662STATIC int
1663xfs_btree_updkey(
1664        struct xfs_btree_cur    *cur,
1665        union xfs_btree_key     *keyp,
1666        int                     level)
1667{
1668        struct xfs_btree_block  *block;
1669        struct xfs_buf          *bp;
1670        union xfs_btree_key     *kp;
1671        int                     ptr;
1672
1673        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1674        XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
1675
1676        ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
1677
1678        /*
1679         * Go up the tree from this level toward the root.
1680         * At each level, update the key value to the value input.
1681         * Stop when we reach a level where the cursor isn't pointing
1682         * at the first entry in the block.
1683         */
1684        for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1685#ifdef DEBUG
1686                int             error;
1687#endif
1688                block = xfs_btree_get_block(cur, level, &bp);
1689#ifdef DEBUG
1690                error = xfs_btree_check_block(cur, block, level, bp);
1691                if (error) {
1692                        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1693                        return error;
1694                }
1695#endif
1696                ptr = cur->bc_ptrs[level];
1697                kp = xfs_btree_key_addr(cur, ptr, block);
1698                xfs_btree_copy_keys(cur, kp, keyp, 1);
1699                xfs_btree_log_keys(cur, bp, ptr, ptr);
1700        }
1701
1702        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1703        return 0;
1704}
1705
1706/*
1707 * Update the record referred to by cur to the value in the
1708 * given record. This either works (return 0) or gets an
1709 * EFSCORRUPTED error.
1710 */
1711int
1712xfs_btree_update(
1713        struct xfs_btree_cur    *cur,
1714        union xfs_btree_rec     *rec)
1715{
1716        struct xfs_btree_block  *block;
1717        struct xfs_buf          *bp;
1718        int                     error;
1719        int                     ptr;
1720        union xfs_btree_rec     *rp;
1721
1722        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1723        XFS_BTREE_TRACE_ARGR(cur, rec);
1724
1725        /* Pick up the current block. */
1726        block = xfs_btree_get_block(cur, 0, &bp);
1727
1728#ifdef DEBUG
1729        error = xfs_btree_check_block(cur, block, 0, bp);
1730        if (error)
1731                goto error0;
1732#endif
1733        /* Get the address of the rec to be updated. */
1734        ptr = cur->bc_ptrs[0];
1735        rp = xfs_btree_rec_addr(cur, ptr, block);
1736
1737        /* Fill in the new contents and log them. */
1738        xfs_btree_copy_recs(cur, rp, rec, 1);
1739        xfs_btree_log_recs(cur, bp, ptr, ptr);
1740
1741        /*
1742         * If we are tracking the last record in the tree and
1743         * we are at the far right edge of the tree, update it.
1744         */
1745        if (xfs_btree_is_lastrec(cur, block, 0)) {
1746                cur->bc_ops->update_lastrec(cur, block, rec,
1747                                            ptr, LASTREC_UPDATE);
1748        }
1749
1750        /* Updating first rec in leaf. Pass new key value up to our parent. */
1751        if (ptr == 1) {
1752                union xfs_btree_key     key;
1753
1754                cur->bc_ops->init_key_from_rec(&key, rec);
1755                error = xfs_btree_updkey(cur, &key, 1);
1756                if (error)
1757                        goto error0;
1758        }
1759
1760        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1761        return 0;
1762
1763error0:
1764        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1765        return error;
1766}
1767
1768/*
1769 * Move 1 record left from cur/level if possible.
1770 * Update cur to reflect the new path.
1771 */
1772STATIC int                                      /* error */
1773xfs_btree_lshift(
1774        struct xfs_btree_cur    *cur,
1775        int                     level,
1776        int                     *stat)          /* success/failure */
1777{
1778        union xfs_btree_key     key;            /* btree key */
1779        struct xfs_buf          *lbp;           /* left buffer pointer */
1780        struct xfs_btree_block  *left;          /* left btree block */
1781        int                     lrecs;          /* left record count */
1782        struct xfs_buf          *rbp;           /* right buffer pointer */
1783        struct xfs_btree_block  *right;         /* right btree block */
1784        int                     rrecs;          /* right record count */
1785        union xfs_btree_ptr     lptr;           /* left btree pointer */
1786        union xfs_btree_key     *rkp = NULL;    /* right btree key */
1787        union xfs_btree_ptr     *rpp = NULL;    /* right address pointer */
1788        union xfs_btree_rec     *rrp = NULL;    /* right record pointer */
1789        int                     error;          /* error return value */
1790
1791        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1792        XFS_BTREE_TRACE_ARGI(cur, level);
1793
1794        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1795            level == cur->bc_nlevels - 1)
1796                goto out0;
1797
1798        /* Set up variables for this block as "right". */
1799        right = xfs_btree_get_block(cur, level, &rbp);
1800
1801#ifdef DEBUG
1802        error = xfs_btree_check_block(cur, right, level, rbp);
1803        if (error)
1804                goto error0;
1805#endif
1806
1807        /* If we've got no left sibling then we can't shift an entry left. */
1808        xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
1809        if (xfs_btree_ptr_is_null(cur, &lptr))
1810                goto out0;
1811
1812        /*
1813         * If the cursor entry is the one that would be moved, don't
1814         * do it... it's too complicated.
1815         */
1816        if (cur->bc_ptrs[level] <= 1)
1817                goto out0;
1818
1819        /* Set up the left neighbor as "left". */
1820        error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp);
1821        if (error)
1822                goto error0;
1823
1824        /* If it's full, it can't take another entry. */
1825        lrecs = xfs_btree_get_numrecs(left);
1826        if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
1827                goto out0;
1828
1829        rrecs = xfs_btree_get_numrecs(right);
1830
1831        /*
1832         * We add one entry to the left side and remove one for the right side.
1833         * Account for it here, the changes will be updated on disk and logged
1834         * later.
1835         */
1836        lrecs++;
1837        rrecs--;
1838
1839        XFS_BTREE_STATS_INC(cur, lshift);
1840        XFS_BTREE_STATS_ADD(cur, moves, 1);
1841
1842        /*
1843         * If non-leaf, copy a key and a ptr to the left block.
1844         * Log the changes to the left block.
1845         */
1846        if (level > 0) {
1847                /* It's a non-leaf.  Move keys and pointers. */
1848                union xfs_btree_key     *lkp;   /* left btree key */
1849                union xfs_btree_ptr     *lpp;   /* left address pointer */
1850
1851                lkp = xfs_btree_key_addr(cur, lrecs, left);
1852                rkp = xfs_btree_key_addr(cur, 1, right);
1853
1854                lpp = xfs_btree_ptr_addr(cur, lrecs, left);
1855                rpp = xfs_btree_ptr_addr(cur, 1, right);
1856#ifdef DEBUG
1857                error = xfs_btree_check_ptr(cur, rpp, 0, level);
1858                if (error)
1859                        goto error0;
1860#endif
1861                xfs_btree_copy_keys(cur, lkp, rkp, 1);
1862                xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
1863
1864                xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
1865                xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
1866
1867                ASSERT(cur->bc_ops->keys_inorder(cur,
1868                        xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
1869        } else {
1870                /* It's a leaf.  Move records.  */
1871                union xfs_btree_rec     *lrp;   /* left record pointer */
1872
1873                lrp = xfs_btree_rec_addr(cur, lrecs, left);
1874                rrp = xfs_btree_rec_addr(cur, 1, right);
1875
1876                xfs_btree_copy_recs(cur, lrp, rrp, 1);
1877                xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
1878
1879                ASSERT(cur->bc_ops->recs_inorder(cur,
1880                        xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
1881        }
1882
1883        xfs_btree_set_numrecs(left, lrecs);
1884        xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
1885
1886        xfs_btree_set_numrecs(right, rrecs);
1887        xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
1888
1889        /*
1890         * Slide the contents of right down one entry.
1891         */
1892        XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
1893        if (level > 0) {
1894                /* It's a nonleaf. operate on keys and ptrs */
1895#ifdef DEBUG
1896                int                     i;              /* loop index */
1897
1898                for (i = 0; i < rrecs; i++) {
1899                        error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
1900                        if (error)
1901                                goto error0;
1902                }
1903#endif
1904                xfs_btree_shift_keys(cur,
1905                                xfs_btree_key_addr(cur, 2, right),
1906                                -1, rrecs);
1907                xfs_btree_shift_ptrs(cur,
1908                                xfs_btree_ptr_addr(cur, 2, right),
1909                                -1, rrecs);
1910
1911                xfs_btree_log_keys(cur, rbp, 1, rrecs);
1912                xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
1913        } else {
1914                /* It's a leaf. operate on records */
1915                xfs_btree_shift_recs(cur,
1916                        xfs_btree_rec_addr(cur, 2, right),
1917                        -1, rrecs);
1918                xfs_btree_log_recs(cur, rbp, 1, rrecs);
1919
1920                /*
1921                 * If it's the first record in the block, we'll need a key
1922                 * structure to pass up to the next level (updkey).
1923                 */
1924                cur->bc_ops->init_key_from_rec(&key,
1925                        xfs_btree_rec_addr(cur, 1, right));
1926                rkp = &key;
1927        }
1928
1929        /* Update the parent key values of right. */
1930        error = xfs_btree_updkey(cur, rkp, level + 1);
1931        if (error)
1932                goto error0;
1933
1934        /* Slide the cursor value left one. */
1935        cur->bc_ptrs[level]--;
1936
1937        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1938        *stat = 1;
1939        return 0;
1940
1941out0:
1942        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1943        *stat = 0;
1944        return 0;
1945
1946error0:
1947        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1948        return error;
1949}
1950
1951/*
1952 * Move 1 record right from cur/level if possible.
1953 * Update cur to reflect the new path.
1954 */
1955STATIC int                                      /* error */
1956xfs_btree_rshift(
1957        struct xfs_btree_cur    *cur,
1958        int                     level,
1959        int                     *stat)          /* success/failure */
1960{
1961        union xfs_btree_key     key;            /* btree key */
1962        struct xfs_buf          *lbp;           /* left buffer pointer */
1963        struct xfs_btree_block  *left;          /* left btree block */
1964        struct xfs_buf          *rbp;           /* right buffer pointer */
1965        struct xfs_btree_block  *right;         /* right btree block */
1966        struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
1967        union xfs_btree_ptr     rptr;           /* right block pointer */
1968        union xfs_btree_key     *rkp;           /* right btree key */
1969        int                     rrecs;          /* right record count */
1970        int                     lrecs;          /* left record count */
1971        int                     error;          /* error return value */
1972        int                     i;              /* loop counter */
1973
1974        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1975        XFS_BTREE_TRACE_ARGI(cur, level);
1976
1977        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1978            (level == cur->bc_nlevels - 1))
1979                goto out0;
1980
1981        /* Set up variables for this block as "left". */
1982        left = xfs_btree_get_block(cur, level, &lbp);
1983
1984#ifdef DEBUG
1985        error = xfs_btree_check_block(cur, left, level, lbp);
1986        if (error)
1987                goto error0;
1988#endif
1989
1990        /* If we've got no right sibling then we can't shift an entry right. */
1991        xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
1992        if (xfs_btree_ptr_is_null(cur, &rptr))
1993                goto out0;
1994
1995        /*
1996         * If the cursor entry is the one that would be moved, don't
1997         * do it... it's too complicated.
1998         */
1999        lrecs = xfs_btree_get_numrecs(left);
2000        if (cur->bc_ptrs[level] >= lrecs)
2001                goto out0;
2002
2003        /* Set up the right neighbor as "right". */
2004        error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp);
2005        if (error)
2006                goto error0;
2007
2008        /* If it's full, it can't take another entry. */
2009        rrecs = xfs_btree_get_numrecs(right);
2010        if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2011                goto out0;
2012
2013        XFS_BTREE_STATS_INC(cur, rshift);
2014        XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2015
2016        /*
2017         * Make a hole at the start of the right neighbor block, then
2018         * copy the last left block entry to the hole.
2019         */
2020        if (level > 0) {
2021                /* It's a nonleaf. make a hole in the keys and ptrs */
2022                union xfs_btree_key     *lkp;
2023                union xfs_btree_ptr     *lpp;
2024                union xfs_btree_ptr     *rpp;
2025
2026                lkp = xfs_btree_key_addr(cur, lrecs, left);
2027                lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2028                rkp = xfs_btree_key_addr(cur, 1, right);
2029                rpp = xfs_btree_ptr_addr(cur, 1, right);
2030
2031#ifdef DEBUG
2032                for (i = rrecs - 1; i >= 0; i--) {
2033                        error = xfs_btree_check_ptr(cur, rpp, i, level);
2034                        if (error)
2035                                goto error0;
2036                }
2037#endif
2038
2039                xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2040                xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2041
2042#ifdef DEBUG
2043                error = xfs_btree_check_ptr(cur, lpp, 0, level);
2044                if (error)
2045                        goto error0;
2046#endif
2047
2048                /* Now put the new data in, and log it. */
2049                xfs_btree_copy_keys(cur, rkp, lkp, 1);
2050                xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2051
2052                xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2053                xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2054
2055                ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2056                        xfs_btree_key_addr(cur, 2, right)));
2057        } else {
2058                /* It's a leaf. make a hole in the records */
2059                union xfs_btree_rec     *lrp;
2060                union xfs_btree_rec     *rrp;
2061
2062                lrp = xfs_btree_rec_addr(cur, lrecs, left);
2063                rrp = xfs_btree_rec_addr(cur, 1, right);
2064
2065                xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2066
2067                /* Now put the new data in, and log it. */
2068                xfs_btree_copy_recs(cur, rrp, lrp, 1);
2069                xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2070
2071                cur->bc_ops->init_key_from_rec(&key, rrp);
2072                rkp = &key;
2073
2074                ASSERT(cur->bc_ops->recs_inorder(cur, rrp,
2075                        xfs_btree_rec_addr(cur, 2, right)));
2076        }
2077
2078        /*
2079         * Decrement and log left's numrecs, bump and log right's numrecs.
2080         */
2081        xfs_btree_set_numrecs(left, --lrecs);
2082        xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2083
2084        xfs_btree_set_numrecs(right, ++rrecs);
2085        xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2086
2087        /*
2088         * Using a temporary cursor, update the parent key values of the
2089         * block on the right.
2090         */
2091        error = xfs_btree_dup_cursor(cur, &tcur);
2092        if (error)
2093                goto error0;
2094        i = xfs_btree_lastrec(tcur, level);
2095        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2096
2097        error = xfs_btree_increment(tcur, level, &i);
2098        if (error)
2099                goto error1;
2100
2101        error = xfs_btree_updkey(tcur, rkp, level + 1);
2102        if (error)
2103                goto error1;
2104
2105        xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2106
2107        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2108        *stat = 1;
2109        return 0;
2110
2111out0:
2112        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2113        *stat = 0;
2114        return 0;
2115
2116error0:
2117        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2118        return error;
2119
2120error1:
2121        XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2122        xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2123        return error;
2124}
2125
2126/*
2127 * Split cur/level block in half.
2128 * Return new block number and the key to its first
2129 * record (to be inserted into parent).
2130 */
2131STATIC int                                      /* error */
2132xfs_btree_split(
2133        struct xfs_btree_cur    *cur,
2134        int                     level,
2135        union xfs_btree_ptr     *ptrp,
2136        union xfs_btree_key     *key,
2137        struct xfs_btree_cur    **curp,
2138        int                     *stat)          /* success/failure */
2139{
2140        union xfs_btree_ptr     lptr;           /* left sibling block ptr */
2141        struct xfs_buf          *lbp;           /* left buffer pointer */
2142        struct xfs_btree_block  *left;          /* left btree block */
2143        union xfs_btree_ptr     rptr;           /* right sibling block ptr */
2144        struct xfs_buf          *rbp;           /* right buffer pointer */
2145        struct xfs_btree_block  *right;         /* right btree block */
2146        union xfs_btree_ptr     rrptr;          /* right-right sibling ptr */
2147        struct xfs_buf          *rrbp;          /* right-right buffer pointer */
2148        struct xfs_btree_block  *rrblock;       /* right-right btree block */
2149        int                     lrecs;
2150        int                     rrecs;
2151        int                     src_index;
2152        int                     error;          /* error return value */
2153#ifdef DEBUG
2154        int                     i;
2155#endif
2156
2157        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2158        XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
2159
2160        XFS_BTREE_STATS_INC(cur, split);
2161
2162        /* Set up left block (current one). */
2163        left = xfs_btree_get_block(cur, level, &lbp);
2164
2165#ifdef DEBUG
2166        error = xfs_btree_check_block(cur, left, level, lbp);
2167        if (error)
2168                goto error0;
2169#endif
2170
2171        xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2172
2173        /* Allocate the new block. If we can't do it, we're toast. Give up. */
2174        error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat);
2175        if (error)
2176                goto error0;
2177        if (*stat == 0)
2178                goto out0;
2179        XFS_BTREE_STATS_INC(cur, alloc);
2180
2181        /* Set up the new block as "right". */
2182        error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
2183        if (error)
2184                goto error0;
2185
2186        /* Fill in the btree header for the new right block. */
2187        xfs_btree_init_block(cur, xfs_btree_get_level(left), 0, right);
2188
2189        /*
2190         * Split the entries between the old and the new block evenly.
2191         * Make sure that if there's an odd number of entries now, that
2192         * each new block will have the same number of entries.
2193         */
2194        lrecs = xfs_btree_get_numrecs(left);
2195        rrecs = lrecs / 2;
2196        if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
2197                rrecs++;
2198        src_index = (lrecs - rrecs + 1);
2199
2200        XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2201
2202        /*
2203         * Copy btree block entries from the left block over to the
2204         * new block, the right. Update the right block and log the
2205         * changes.
2206         */
2207        if (level > 0) {
2208                /* It's a non-leaf.  Move keys and pointers. */
2209                union xfs_btree_key     *lkp;   /* left btree key */
2210                union xfs_btree_ptr     *lpp;   /* left address pointer */
2211                union xfs_btree_key     *rkp;   /* right btree key */
2212                union xfs_btree_ptr     *rpp;   /* right address pointer */
2213
2214                lkp = xfs_btree_key_addr(cur, src_index, left);
2215                lpp = xfs_btree_ptr_addr(cur, src_index, left);
2216                rkp = xfs_btree_key_addr(cur, 1, right);
2217                rpp = xfs_btree_ptr_addr(cur, 1, right);
2218
2219#ifdef DEBUG
2220                for (i = src_index; i < rrecs; i++) {
2221                        error = xfs_btree_check_ptr(cur, lpp, i, level);
2222                        if (error)
2223                                goto error0;
2224                }
2225#endif
2226
2227                xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2228                xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2229
2230                xfs_btree_log_keys(cur, rbp, 1, rrecs);
2231                xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2232
2233                /* Grab the keys to the entries moved to the right block */
2234                xfs_btree_copy_keys(cur, key, rkp, 1);
2235        } else {
2236                /* It's a leaf.  Move records.  */
2237                union xfs_btree_rec     *lrp;   /* left record pointer */
2238                union xfs_btree_rec     *rrp;   /* right record pointer */
2239
2240                lrp = xfs_btree_rec_addr(cur, src_index, left);
2241                rrp = xfs_btree_rec_addr(cur, 1, right);
2242
2243                xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2244                xfs_btree_log_recs(cur, rbp, 1, rrecs);
2245
2246                cur->bc_ops->init_key_from_rec(key,
2247                        xfs_btree_rec_addr(cur, 1, right));
2248        }
2249
2250
2251        /*
2252         * Find the left block number by looking in the buffer.
2253         * Adjust numrecs, sibling pointers.
2254         */
2255        xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2256        xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2257        xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2258        xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2259
2260        lrecs -= rrecs;
2261        xfs_btree_set_numrecs(left, lrecs);
2262        xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2263
2264        xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2265        xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2266
2267        /*
2268         * If there's a block to the new block's right, make that block
2269         * point back to right instead of to left.
2270         */
2271        if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
2272                error = xfs_btree_read_buf_block(cur, &rrptr, level,
2273                                                        0, &rrblock, &rrbp);
2274                if (error)
2275                        goto error0;
2276                xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2277                xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2278        }
2279        /*
2280         * If the cursor is really in the right block, move it there.
2281         * If it's just pointing past the last entry in left, then we'll
2282         * insert there, so don't change anything in that case.
2283         */
2284        if (cur->bc_ptrs[level] > lrecs + 1) {
2285                xfs_btree_setbuf(cur, level, rbp);
2286                cur->bc_ptrs[level] -= lrecs;
2287        }
2288        /*
2289         * If there are more levels, we'll need another cursor which refers
2290         * the right block, no matter where this cursor was.
2291         */
2292        if (level + 1 < cur->bc_nlevels) {
2293                error = xfs_btree_dup_cursor(cur, curp);
2294                if (error)
2295                        goto error0;
2296                (*curp)->bc_ptrs[level + 1]++;
2297        }
2298        *ptrp = rptr;
2299        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2300        *stat = 1;
2301        return 0;
2302out0:
2303        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2304        *stat = 0;
2305        return 0;
2306
2307error0:
2308        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2309        return error;
2310}
2311
2312/*
2313 * Copy the old inode root contents into a real block and make the
2314 * broot point to it.
2315 */
2316int                                             /* error */
2317xfs_btree_new_iroot(
2318        struct xfs_btree_cur    *cur,           /* btree cursor */
2319        int                     *logflags,      /* logging flags for inode */
2320        int                     *stat)          /* return status - 0 fail */
2321{
2322        struct xfs_buf          *cbp;           /* buffer for cblock */
2323        struct xfs_btree_block  *block;         /* btree block */
2324        struct xfs_btree_block  *cblock;        /* child btree block */
2325        union xfs_btree_key     *ckp;           /* child key pointer */
2326        union xfs_btree_ptr     *cpp;           /* child ptr pointer */
2327        union xfs_btree_key     *kp;            /* pointer to btree key */
2328        union xfs_btree_ptr     *pp;            /* pointer to block addr */
2329        union xfs_btree_ptr     nptr;           /* new block addr */
2330        int                     level;          /* btree level */
2331        int                     error;          /* error return code */
2332#ifdef DEBUG
2333        int                     i;              /* loop counter */
2334#endif
2335
2336        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2337        XFS_BTREE_STATS_INC(cur, newroot);
2338
2339        ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2340
2341        level = cur->bc_nlevels - 1;
2342
2343        block = xfs_btree_get_iroot(cur);
2344        pp = xfs_btree_ptr_addr(cur, 1, block);
2345
2346        /* Allocate the new block. If we can't do it, we're toast. Give up. */
2347        error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat);
2348        if (error)
2349                goto error0;
2350        if (*stat == 0) {
2351                XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2352                return 0;
2353        }
2354        XFS_BTREE_STATS_INC(cur, alloc);
2355
2356        /* Copy the root into a real block. */
2357        error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
2358        if (error)
2359                goto error0;
2360
2361        memcpy(cblock, block, xfs_btree_block_len(cur));
2362
2363        be16_add_cpu(&block->bb_level, 1);
2364        xfs_btree_set_numrecs(block, 1);
2365        cur->bc_nlevels++;
2366        cur->bc_ptrs[level + 1] = 1;
2367
2368        kp = xfs_btree_key_addr(cur, 1, block);
2369        ckp = xfs_btree_key_addr(cur, 1, cblock);
2370        xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
2371
2372        cpp = xfs_btree_ptr_addr(cur, 1, cblock);
2373#ifdef DEBUG
2374        for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
2375                error = xfs_btree_check_ptr(cur, pp, i, level);
2376                if (error)
2377                        goto error0;
2378        }
2379#endif
2380        xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
2381
2382#ifdef DEBUG
2383        error = xfs_btree_check_ptr(cur, &nptr, 0, level);
2384        if (error)
2385                goto error0;
2386#endif
2387        xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
2388
2389        xfs_iroot_realloc(cur->bc_private.b.ip,
2390                          1 - xfs_btree_get_numrecs(cblock),
2391                          cur->bc_private.b.whichfork);
2392
2393        xfs_btree_setbuf(cur, level, cbp);
2394
2395        /*
2396         * Do all this logging at the end so that
2397         * the root is at the right level.
2398         */
2399        xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
2400        xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2401        xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2402
2403        *logflags |=
2404                XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork);
2405        *stat = 1;
2406        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2407        return 0;
2408error0:
2409        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2410        return error;
2411}
2412
2413/*
2414 * Allocate a new root block, fill it in.
2415 */
2416STATIC int                              /* error */
2417xfs_btree_new_root(
2418        struct xfs_btree_cur    *cur,   /* btree cursor */
2419        int                     *stat)  /* success/failure */
2420{
2421        struct xfs_btree_block  *block; /* one half of the old root block */
2422        struct xfs_buf          *bp;    /* buffer containing block */
2423        int                     error;  /* error return value */
2424        struct xfs_buf          *lbp;   /* left buffer pointer */
2425        struct xfs_btree_block  *left;  /* left btree block */
2426        struct xfs_buf          *nbp;   /* new (root) buffer */
2427        struct xfs_btree_block  *new;   /* new (root) btree block */
2428        int                     nptr;   /* new value for key index, 1 or 2 */
2429        struct xfs_buf          *rbp;   /* right buffer pointer */
2430        struct xfs_btree_block  *right; /* right btree block */
2431        union xfs_btree_ptr     rptr;
2432        union xfs_btree_ptr     lptr;
2433
2434        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2435        XFS_BTREE_STATS_INC(cur, newroot);
2436
2437        /* initialise our start point from the cursor */
2438        cur->bc_ops->init_ptr_from_cur(cur, &rptr);
2439
2440        /* Allocate the new block. If we can't do it, we're toast. Give up. */
2441        error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat);
2442        if (error)
2443                goto error0;
2444        if (*stat == 0)
2445                goto out0;
2446        XFS_BTREE_STATS_INC(cur, alloc);
2447
2448        /* Set up the new block. */
2449        error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
2450        if (error)
2451                goto error0;
2452
2453        /* Set the root in the holding structure  increasing the level by 1. */
2454        cur->bc_ops->set_root(cur, &lptr, 1);
2455
2456        /*
2457         * At the previous root level there are now two blocks: the old root,
2458         * and the new block generated when it was split.  We don't know which
2459         * one the cursor is pointing at, so we set up variables "left" and
2460         * "right" for each case.
2461         */
2462        block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
2463
2464#ifdef DEBUG
2465        error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
2466        if (error)
2467                goto error0;
2468#endif
2469
2470        xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
2471        if (!xfs_btree_ptr_is_null(cur, &rptr)) {
2472                /* Our block is left, pick up the right block. */
2473                lbp = bp;
2474                xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2475                left = block;
2476                error = xfs_btree_read_buf_block(cur, &rptr,
2477                                        cur->bc_nlevels - 1, 0, &right, &rbp);
2478                if (error)
2479                        goto error0;
2480                bp = rbp;
2481                nptr = 1;
2482        } else {
2483                /* Our block is right, pick up the left block. */
2484                rbp = bp;
2485                xfs_btree_buf_to_ptr(cur, rbp, &rptr);
2486                right = block;
2487                xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2488                error = xfs_btree_read_buf_block(cur, &lptr,
2489                                        cur->bc_nlevels - 1, 0, &left, &lbp);
2490                if (error)
2491                        goto error0;
2492                bp = lbp;
2493                nptr = 2;
2494        }
2495        /* Fill in the new block's btree header and log it. */
2496        xfs_btree_init_block(cur, cur->bc_nlevels, 2, new);
2497        xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
2498        ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
2499                        !xfs_btree_ptr_is_null(cur, &rptr));
2500
2501        /* Fill in the key data in the new root. */
2502        if (xfs_btree_get_level(left) > 0) {
2503                xfs_btree_copy_keys(cur,
2504                                xfs_btree_key_addr(cur, 1, new),
2505                                xfs_btree_key_addr(cur, 1, left), 1);
2506                xfs_btree_copy_keys(cur,
2507                                xfs_btree_key_addr(cur, 2, new),
2508                                xfs_btree_key_addr(cur, 1, right), 1);
2509        } else {
2510                cur->bc_ops->init_key_from_rec(
2511                                xfs_btree_key_addr(cur, 1, new),
2512                                xfs_btree_rec_addr(cur, 1, left));
2513                cur->bc_ops->init_key_from_rec(
2514                                xfs_btree_key_addr(cur, 2, new),
2515                                xfs_btree_rec_addr(cur, 1, right));
2516        }
2517        xfs_btree_log_keys(cur, nbp, 1, 2);
2518
2519        /* Fill in the pointer data in the new root. */
2520        xfs_btree_copy_ptrs(cur,
2521                xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
2522        xfs_btree_copy_ptrs(cur,
2523                xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
2524        xfs_btree_log_ptrs(cur, nbp, 1, 2);
2525
2526        /* Fix up the cursor. */
2527        xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
2528        cur->bc_ptrs[cur->bc_nlevels] = nptr;
2529        cur->bc_nlevels++;
2530        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2531        *stat = 1;
2532        return 0;
2533error0:
2534        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2535        return error;
2536out0:
2537        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2538        *stat = 0;
2539        return 0;
2540}
2541
2542STATIC int
2543xfs_btree_make_block_unfull(
2544        struct xfs_btree_cur    *cur,   /* btree cursor */
2545        int                     level,  /* btree level */
2546        int                     numrecs,/* # of recs in block */
2547        int                     *oindex,/* old tree index */
2548        int                     *index, /* new tree index */
2549        union xfs_btree_ptr     *nptr,  /* new btree ptr */
2550        struct xfs_btree_cur    **ncur, /* new btree cursor */
2551        union xfs_btree_rec     *nrec,  /* new record */
2552        int                     *stat)
2553{
2554        union xfs_btree_key     key;    /* new btree key value */
2555        int                     error = 0;
2556
2557        if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2558            level == cur->bc_nlevels - 1) {
2559                struct xfs_inode *ip = cur->bc_private.b.ip;
2560
2561                if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
2562                        /* A root block that can be made bigger. */
2563
2564                        xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
2565                } else {
2566                        /* A root block that needs replacing */
2567                        int     logflags = 0;
2568
2569                        error = xfs_btree_new_iroot(cur, &logflags, stat);
2570                        if (error || *stat == 0)
2571                                return error;
2572
2573                        xfs_trans_log_inode(cur->bc_tp, ip, logflags);
2574                }
2575
2576                return 0;
2577        }
2578
2579        /* First, try shifting an entry to the right neighbor. */
2580        error = xfs_btree_rshift(cur, level, stat);
2581        if (error || *stat)
2582                return error;
2583
2584        /* Next, try shifting an entry to the left neighbor. */
2585        error = xfs_btree_lshift(cur, level, stat);
2586        if (error)
2587                return error;
2588
2589        if (*stat) {
2590                *oindex = *index = cur->bc_ptrs[level];
2591                return 0;
2592        }
2593
2594        /*
2595         * Next, try splitting the current block in half.
2596         *
2597         * If this works we have to re-set our variables because we
2598         * could be in a different block now.
2599         */
2600        error = xfs_btree_split(cur, level, nptr, &key, ncur, stat);
2601        if (error || *stat == 0)
2602                return error;
2603
2604
2605        *index = cur->bc_ptrs[level];
2606        cur->bc_ops->init_rec_from_key(&key, nrec);
2607        return 0;
2608}
2609
2610/*
2611 * Insert one record/level.  Return information to the caller
2612 * allowing the next level up to proceed if necessary.
2613 */
2614STATIC int
2615xfs_btree_insrec(
2616        struct xfs_btree_cur    *cur,   /* btree cursor */
2617        int                     level,  /* level to insert record at */
2618        union xfs_btree_ptr     *ptrp,  /* i/o: block number inserted */
2619        union xfs_btree_rec     *recp,  /* i/o: record data inserted */
2620        struct xfs_btree_cur    **curp, /* output: new cursor replacing cur */
2621        int                     *stat)  /* success/failure */
2622{
2623        struct xfs_btree_block  *block; /* btree block */
2624        struct xfs_buf          *bp;    /* buffer for block */
2625        union xfs_btree_key     key;    /* btree key */
2626        union xfs_btree_ptr     nptr;   /* new block ptr */
2627        struct xfs_btree_cur    *ncur;  /* new btree cursor */
2628        union xfs_btree_rec     nrec;   /* new record count */
2629        int                     optr;   /* old key/record index */
2630        int                     ptr;    /* key/record index */
2631        int                     numrecs;/* number of records */
2632        int                     error;  /* error return value */
2633#ifdef DEBUG
2634        int                     i;
2635#endif
2636
2637        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2638        XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp);
2639
2640        ncur = NULL;
2641
2642        /*
2643         * If we have an external root pointer, and we've made it to the
2644         * root level, allocate a new root block and we're done.
2645         */
2646        if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2647            (level >= cur->bc_nlevels)) {
2648                error = xfs_btree_new_root(cur, stat);
2649                xfs_btree_set_ptr_null(cur, ptrp);
2650
2651                XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2652                return error;
2653        }
2654
2655        /* If we're off the left edge, return failure. */
2656        ptr = cur->bc_ptrs[level];
2657        if (ptr == 0) {
2658                XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2659                *stat = 0;
2660                return 0;
2661        }
2662
2663        /* Make a key out of the record data to be inserted, and save it. */
2664        cur->bc_ops->init_key_from_rec(&key, recp);
2665
2666        optr = ptr;
2667
2668        XFS_BTREE_STATS_INC(cur, insrec);
2669
2670        /* Get pointers to the btree buffer and block. */
2671        block = xfs_btree_get_block(cur, level, &bp);
2672        numrecs = xfs_btree_get_numrecs(block);
2673
2674#ifdef DEBUG
2675        error = xfs_btree_check_block(cur, block, level, bp);
2676        if (error)
2677                goto error0;
2678
2679        /* Check that the new entry is being inserted in the right place. */
2680        if (ptr <= numrecs) {
2681                if (level == 0) {
2682                        ASSERT(cur->bc_ops->recs_inorder(cur, recp,
2683                                xfs_btree_rec_addr(cur, ptr, block)));
2684                } else {
2685                        ASSERT(cur->bc_ops->keys_inorder(cur, &key,
2686                                xfs_btree_key_addr(cur, ptr, block)));
2687                }
2688        }
2689#endif
2690
2691        /*
2692         * If the block is full, we can't insert the new entry until we
2693         * make the block un-full.
2694         */
2695        xfs_btree_set_ptr_null(cur, &nptr);
2696        if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
2697                error = xfs_btree_make_block_unfull(cur, level, numrecs,
2698                                        &optr, &ptr, &nptr, &ncur, &nrec, stat);
2699                if (error || *stat == 0)
2700                        goto error0;
2701        }
2702
2703        /*
2704         * The current block may have changed if the block was
2705         * previously full and we have just made space in it.
2706         */
2707        block = xfs_btree_get_block(cur, level, &bp);
2708        numrecs = xfs_btree_get_numrecs(block);
2709
2710#ifdef DEBUG
2711        error = xfs_btree_check_block(cur, block, level, bp);
2712        if (error)
2713                return error;
2714#endif
2715
2716        /*
2717         * At this point we know there's room for our new entry in the block
2718         * we're pointing at.
2719         */
2720        XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
2721
2722        if (level > 0) {
2723                /* It's a nonleaf. make a hole in the keys and ptrs */
2724                union xfs_btree_key     *kp;
2725                union xfs_btree_ptr     *pp;
2726
2727                kp = xfs_btree_key_addr(cur, ptr, block);
2728                pp = xfs_btree_ptr_addr(cur, ptr, block);
2729
2730#ifdef DEBUG
2731                for (i = numrecs - ptr; i >= 0; i--) {
2732                        error = xfs_btree_check_ptr(cur, pp, i, level);
2733                        if (error)
2734                                return error;
2735                }
2736#endif
2737
2738                xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
2739                xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
2740
2741#ifdef DEBUG
2742                error = xfs_btree_check_ptr(cur, ptrp, 0, level);
2743                if (error)
2744                        goto error0;
2745#endif
2746
2747                /* Now put the new data in, bump numrecs and log it. */
2748                xfs_btree_copy_keys(cur, kp, &key, 1);
2749                xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
2750                numrecs++;
2751                xfs_btree_set_numrecs(block, numrecs);
2752                xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
2753                xfs_btree_log_keys(cur, bp, ptr, numrecs);
2754#ifdef DEBUG
2755                if (ptr < numrecs) {
2756                        ASSERT(cur->bc_ops->keys_inorder(cur, kp,
2757                                xfs_btree_key_addr(cur, ptr + 1, block)));
2758                }
2759#endif
2760        } else {
2761                /* It's a leaf. make a hole in the records */
2762                union xfs_btree_rec             *rp;
2763
2764                rp = xfs_btree_rec_addr(cur, ptr, block);
2765
2766                xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
2767
2768                /* Now put the new data in, bump numrecs and log it. */
2769                xfs_btree_copy_recs(cur, rp, recp, 1);
2770                xfs_btree_set_numrecs(block, ++numrecs);
2771                xfs_btree_log_recs(cur, bp, ptr, numrecs);
2772#ifdef DEBUG
2773                if (ptr < numrecs) {
2774                        ASSERT(cur->bc_ops->recs_inorder(cur, rp,
2775                                xfs_btree_rec_addr(cur, ptr + 1, block)));
2776                }
2777#endif
2778        }
2779
2780        /* Log the new number of records in the btree header. */
2781        xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
2782
2783        /* If we inserted at the start of a block, update the parents' keys. */
2784        if (optr == 1) {
2785                error = xfs_btree_updkey(cur, &key, level + 1);
2786                if (error)
2787                        goto error0;
2788        }
2789
2790        /*
2791         * If we are tracking the last record in the tree and
2792         * we are at the far right edge of the tree, update it.
2793         */
2794        if (xfs_btree_is_lastrec(cur, block, level)) {
2795                cur->bc_ops->update_lastrec(cur, block, recp,
2796                                            ptr, LASTREC_INSREC);
2797        }
2798
2799        /*
2800         * Return the new block number, if any.
2801         * If there is one, give back a record value and a cursor too.
2802         */
2803        *ptrp = nptr;
2804        if (!xfs_btree_ptr_is_null(cur, &nptr)) {
2805                *recp = nrec;
2806                *curp = ncur;
2807        }
2808
2809        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2810        *stat = 1;
2811        return 0;
2812
2813error0:
2814        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2815        return error;
2816}
2817
2818/*
2819 * Insert the record at the point referenced by cur.
2820 *
2821 * A multi-level split of the tree on insert will invalidate the original
2822 * cursor.  All callers of this function should assume that the cursor is
2823 * no longer valid and revalidate it.
2824 */
2825int
2826xfs_btree_insert(
2827        struct xfs_btree_cur    *cur,
2828        int                     *stat)
2829{
2830        int                     error;  /* error return value */
2831        int                     i;      /* result value, 0 for failure */
2832        int                     level;  /* current level number in btree */
2833        union xfs_btree_ptr     nptr;   /* new block number (split result) */
2834        struct xfs_btree_cur    *ncur;  /* new cursor (split result) */
2835        struct xfs_btree_cur    *pcur;  /* previous level's cursor */
2836        union xfs_btree_rec     rec;    /* record to insert */
2837
2838        level = 0;
2839        ncur = NULL;
2840        pcur = cur;
2841
2842        xfs_btree_set_ptr_null(cur, &nptr);
2843        cur->bc_ops->init_rec_from_cur(cur, &rec);
2844
2845        /*
2846         * Loop going up the tree, starting at the leaf level.
2847         * Stop when we don't get a split block, that must mean that
2848         * the insert is finished with this level.
2849         */
2850        do {
2851                /*
2852                 * Insert nrec/nptr into this level of the tree.
2853                 * Note if we fail, nptr will be null.
2854                 */
2855                error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i);
2856                if (error) {
2857                        if (pcur != cur)
2858                                xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
2859                        goto error0;
2860                }
2861
2862                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2863                level++;
2864
2865                /*
2866                 * See if the cursor we just used is trash.
2867                 * Can't trash the caller's cursor, but otherwise we should
2868                 * if ncur is a new cursor or we're about to be done.
2869                 */
2870                if (pcur != cur &&
2871                    (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
2872                        /* Save the state from the cursor before we trash it */
2873                        if (cur->bc_ops->update_cursor)
2874                                cur->bc_ops->update_cursor(pcur, cur);
2875                        cur->bc_nlevels = pcur->bc_nlevels;
2876                        xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
2877                }
2878                /* If we got a new cursor, switch to it. */
2879                if (ncur) {
2880                        pcur = ncur;
2881                        ncur = NULL;
2882                }
2883        } while (!xfs_btree_ptr_is_null(cur, &nptr));
2884
2885        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2886        *stat = i;
2887        return 0;
2888error0:
2889        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2890        return error;
2891}
2892
2893/*
2894 * Try to merge a non-leaf block back into the inode root.
2895 *
2896 * Note: the killroot names comes from the fact that we're effectively
2897 * killing the old root block.  But because we can't just delete the
2898 * inode we have to copy the single block it was pointing to into the
2899 * inode.
2900 */
2901STATIC int
2902xfs_btree_kill_iroot(
2903        struct xfs_btree_cur    *cur)
2904{
2905        int                     whichfork = cur->bc_private.b.whichfork;
2906        struct xfs_inode        *ip = cur->bc_private.b.ip;
2907        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
2908        struct xfs_btree_block  *block;
2909        struct xfs_btree_block  *cblock;
2910        union xfs_btree_key     *kp;
2911        union xfs_btree_key     *ckp;
2912        union xfs_btree_ptr     *pp;
2913        union xfs_btree_ptr     *cpp;
2914        struct xfs_buf          *cbp;
2915        int                     level;
2916        int                     index;
2917        int                     numrecs;
2918#ifdef DEBUG
2919        union xfs_btree_ptr     ptr;
2920        int                     i;
2921#endif
2922
2923        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2924
2925        ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2926        ASSERT(cur->bc_nlevels > 1);
2927
2928        /*
2929         * Don't deal with the root block needs to be a leaf case.
2930         * We're just going to turn the thing back into extents anyway.
2931         */
2932        level = cur->bc_nlevels - 1;
2933        if (level == 1)
2934                goto out0;
2935
2936        /*
2937         * Give up if the root has multiple children.
2938         */
2939        block = xfs_btree_get_iroot(cur);
2940        if (xfs_btree_get_numrecs(block) != 1)
2941                goto out0;
2942
2943        cblock = xfs_btree_get_block(cur, level - 1, &cbp);
2944        numrecs = xfs_btree_get_numrecs(cblock);
2945
2946        /*
2947         * Only do this if the next level will fit.
2948         * Then the data must be copied up to the inode,
2949         * instead of freeing the root you free the next level.
2950         */
2951        if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
2952                goto out0;
2953
2954        XFS_BTREE_STATS_INC(cur, killroot);
2955
2956#ifdef DEBUG
2957        xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
2958        ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
2959        xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
2960        ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
2961#endif
2962
2963        index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
2964        if (index) {
2965                xfs_iroot_realloc(cur->bc_private.b.ip, index,
2966                                  cur->bc_private.b.whichfork);
2967                block = ifp->if_broot;
2968        }
2969
2970        be16_add_cpu(&block->bb_numrecs, index);
2971        ASSERT(block->bb_numrecs == cblock->bb_numrecs);
2972
2973        kp = xfs_btree_key_addr(cur, 1, block);
2974        ckp = xfs_btree_key_addr(cur, 1, cblock);
2975        xfs_btree_copy_keys(cur, kp, ckp, numrecs);
2976
2977        pp = xfs_btree_ptr_addr(cur, 1, block);
2978        cpp = xfs_btree_ptr_addr(cur, 1, cblock);
2979#ifdef DEBUG
2980        for (i = 0; i < numrecs; i++) {
2981                int             error;
2982
2983                error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
2984                if (error) {
2985                        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2986                        return error;
2987                }
2988        }
2989#endif
2990        xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
2991
2992        cur->bc_ops->free_block(cur, cbp);
2993        XFS_BTREE_STATS_INC(cur, free);
2994
2995        cur->bc_bufs[level - 1] = NULL;
2996        be16_add_cpu(&block->bb_level, -1);
2997        xfs_trans_log_inode(cur->bc_tp, ip,
2998                XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork));
2999        cur->bc_nlevels--;
3000out0:
3001        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3002        return 0;
3003}
3004
3005/*
3006 * Kill the current root node, and replace it with it's only child node.
3007 */
3008STATIC int
3009xfs_btree_kill_root(
3010        struct xfs_btree_cur    *cur,
3011        struct xfs_buf          *bp,
3012        int                     level,
3013        union xfs_btree_ptr     *newroot)
3014{
3015        int                     error;
3016
3017        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3018        XFS_BTREE_STATS_INC(cur, killroot);
3019
3020        /*
3021         * Update the root pointer, decreasing the level by 1 and then
3022         * free the old root.
3023         */
3024        cur->bc_ops->set_root(cur, newroot, -1);
3025
3026        error = cur->bc_ops->free_block(cur, bp);
3027        if (error) {
3028                XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3029                return error;
3030        }
3031
3032        XFS_BTREE_STATS_INC(cur, free);
3033
3034        cur->bc_bufs[level] = NULL;
3035        cur->bc_ra[level] = 0;
3036        cur->bc_nlevels--;
3037
3038        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3039        return 0;
3040}
3041
3042STATIC int
3043xfs_btree_dec_cursor(
3044        struct xfs_btree_cur    *cur,
3045        int                     level,
3046        int                     *stat)
3047{
3048        int                     error;
3049        int                     i;
3050
3051        if (level > 0) {
3052                error = xfs_btree_decrement(cur, level, &i);
3053                if (error)
3054                        return error;
3055        }
3056
3057        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3058        *stat = 1;
3059        return 0;
3060}
3061
3062/*
3063 * Single level of the btree record deletion routine.
3064 * Delete record pointed to by cur/level.
3065 * Remove the record from its block then rebalance the tree.
3066 * Return 0 for error, 1 for done, 2 to go on to the next level.
3067 */
3068STATIC int                                      /* error */
3069xfs_btree_delrec(
3070        struct xfs_btree_cur    *cur,           /* btree cursor */
3071        int                     level,          /* level removing record from */
3072        int                     *stat)          /* fail/done/go-on */
3073{
3074        struct xfs_btree_block  *block;         /* btree block */
3075        union xfs_btree_ptr     cptr;           /* current block ptr */
3076        struct xfs_buf          *bp;            /* buffer for block */
3077        int                     error;          /* error return value */
3078        int                     i;              /* loop counter */
3079        union xfs_btree_key     key;            /* storage for keyp */
3080        union xfs_btree_key     *keyp = &key;   /* passed to the next level */
3081        union xfs_btree_ptr     lptr;           /* left sibling block ptr */
3082        struct xfs_buf          *lbp;           /* left buffer pointer */
3083        struct xfs_btree_block  *left;          /* left btree block */
3084        int                     lrecs = 0;      /* left record count */
3085        int                     ptr;            /* key/record index */
3086        union xfs_btree_ptr     rptr;           /* right sibling block ptr */
3087        struct xfs_buf          *rbp;           /* right buffer pointer */
3088        struct xfs_btree_block  *right;         /* right btree block */
3089        struct xfs_btree_block  *rrblock;       /* right-right btree block */
3090        struct xfs_buf          *rrbp;          /* right-right buffer pointer */
3091        int                     rrecs = 0;      /* right record count */
3092        struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
3093        int                     numrecs;        /* temporary numrec count */
3094
3095        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3096        XFS_BTREE_TRACE_ARGI(cur, level);
3097
3098        tcur = NULL;
3099
3100        /* Get the index of the entry being deleted, check for nothing there. */
3101        ptr = cur->bc_ptrs[level];
3102        if (ptr == 0) {
3103                XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3104                *stat = 0;
3105                return 0;
3106        }
3107
3108        /* Get the buffer & block containing the record or key/ptr. */
3109        block = xfs_btree_get_block(cur, level, &bp);
3110        numrecs = xfs_btree_get_numrecs(block);
3111
3112#ifdef DEBUG
3113        error = xfs_btree_check_block(cur, block, level, bp);
3114        if (error)
3115                goto error0;
3116#endif
3117
3118        /* Fail if we're off the end of the block. */
3119        if (ptr > numrecs) {
3120                XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3121                *stat = 0;
3122                return 0;
3123        }
3124
3125        XFS_BTREE_STATS_INC(cur, delrec);
3126        XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3127
3128        /* Excise the entries being deleted. */
3129        if (level > 0) {
3130                /* It's a nonleaf. operate on keys and ptrs */
3131                union xfs_btree_key     *lkp;
3132                union xfs_btree_ptr     *lpp;
3133
3134                lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3135                lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3136
3137#ifdef DEBUG
3138                for (i = 0; i < numrecs - ptr; i++) {
3139                        error = xfs_btree_check_ptr(cur, lpp, i, level);
3140                        if (error)
3141                                goto error0;
3142                }
3143#endif
3144
3145                if (ptr < numrecs) {
3146                        xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3147                        xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3148                        xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3149                        xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3150                }
3151
3152                /*
3153                 * If it's the first record in the block, we'll need to pass a
3154                 * key up to the next level (updkey).
3155                 */
3156                if (ptr == 1)
3157                        keyp = xfs_btree_key_addr(cur, 1, block);
3158        } else {
3159                /* It's a leaf. operate on records */
3160                if (ptr < numrecs) {
3161                        xfs_btree_shift_recs(cur,
3162                                xfs_btree_rec_addr(cur, ptr + 1, block),
3163                                -1, numrecs - ptr);
3164                        xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3165                }
3166
3167                /*
3168                 * If it's the first record in the block, we'll need a key
3169                 * structure to pass up to the next level (updkey).
3170                 */
3171                if (ptr == 1) {
3172                        cur->bc_ops->init_key_from_rec(&key,
3173                                        xfs_btree_rec_addr(cur, 1, block));
3174                        keyp = &key;
3175                }
3176        }
3177
3178        /*
3179         * Decrement and log the number of entries in the block.
3180         */
3181        xfs_btree_set_numrecs(block, --numrecs);
3182        xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3183
3184        /*
3185         * If we are tracking the last record in the tree and
3186         * we are at the far right edge of the tree, update it.
3187         */
3188        if (xfs_btree_is_lastrec(cur, block, level)) {
3189                cur->bc_ops->update_lastrec(cur, block, NULL,
3190                                            ptr, LASTREC_DELREC);
3191        }
3192
3193        /*
3194         * We're at the root level.  First, shrink the root block in-memory.
3195         * Try to get rid of the next level down.  If we can't then there's
3196         * nothing left to do.
3197         */
3198        if (level == cur->bc_nlevels - 1) {
3199                if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3200                        xfs_iroot_realloc(cur->bc_private.b.ip, -1,
3201                                          cur->bc_private.b.whichfork);
3202
3203                        error = xfs_btree_kill_iroot(cur);
3204                        if (error)
3205                                goto error0;
3206
3207                        error = xfs_btree_dec_cursor(cur, level, stat);
3208                        if (error)
3209                                goto error0;
3210                        *stat = 1;
3211                        return 0;
3212                }
3213
3214                /*
3215                 * If this is the root level, and there's only one entry left,
3216                 * and it's NOT the leaf level, then we can get rid of this
3217                 * level.
3218                 */
3219                if (numrecs == 1 && level > 0) {
3220                        union xfs_btree_ptr     *pp;
3221                        /*
3222                         * pp is still set to the first pointer in the block.
3223                         * Make it the new root of the btree.
3224                         */
3225                        pp = xfs_btree_ptr_addr(cur, 1, block);
3226                        error = xfs_btree_kill_root(cur, bp, level, pp);
3227                        if (error)
3228                                goto error0;
3229                } else if (level > 0) {
3230                        error = xfs_btree_dec_cursor(cur, level, stat);
3231                        if (error)
3232                                goto error0;
3233                }
3234                *stat = 1;
3235                return 0;
3236        }
3237
3238        /*
3239         * If we deleted the leftmost entry in the block, update the
3240         * key values above us in the tree.
3241         */
3242        if (ptr == 1) {
3243                error = xfs_btree_updkey(cur, keyp, level + 1);
3244                if (error)
3245                        goto error0;
3246        }
3247
3248        /*
3249         * If the number of records remaining in the block is at least
3250         * the minimum, we're done.
3251         */
3252        if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3253                error = xfs_btree_dec_cursor(cur, level, stat);
3254                if (error)
3255                        goto error0;
3256                return 0;
3257        }
3258
3259        /*
3260         * Otherwise, we have to move some records around to keep the
3261         * tree balanced.  Look at the left and right sibling blocks to
3262         * see if we can re-balance by moving only one record.
3263         */
3264        xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3265        xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
3266
3267        if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3268                /*
3269                 * One child of root, need to get a chance to copy its contents
3270                 * into the root and delete it. Can't go up to next level,
3271                 * there's nothing to delete there.
3272                 */
3273                if (xfs_btree_ptr_is_null(cur, &rptr) &&
3274                    xfs_btree_ptr_is_null(cur, &lptr) &&
3275                    level == cur->bc_nlevels - 2) {
3276                        error = xfs_btree_kill_iroot(cur);
3277                        if (!error)
3278                                error = xfs_btree_dec_cursor(cur, level, stat);
3279                        if (error)
3280                                goto error0;
3281                        return 0;
3282                }
3283        }
3284
3285        ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
3286               !xfs_btree_ptr_is_null(cur, &lptr));
3287
3288        /*
3289         * Duplicate the cursor so our btree manipulations here won't
3290         * disrupt the next level up.
3291         */
3292        error = xfs_btree_dup_cursor(cur, &tcur);
3293        if (error)
3294                goto error0;
3295
3296        /*
3297         * If there's a right sibling, see if it's ok to shift an entry
3298         * out of it.
3299         */
3300        if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3301                /*
3302                 * Move the temp cursor to the last entry in the next block.
3303                 * Actually any entry but the first would suffice.
3304                 */
3305                i = xfs_btree_lastrec(tcur, level);
3306                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3307
3308                error = xfs_btree_increment(tcur, level, &i);
3309                if (error)
3310                        goto error0;
3311                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3312
3313                i = xfs_btree_lastrec(tcur, level);
3314                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3315
3316                /* Grab a pointer to the block. */
3317                right = xfs_btree_get_block(tcur, level, &rbp);
3318#ifdef DEBUG
3319                error = xfs_btree_check_block(tcur, right, level, rbp);
3320                if (error)
3321                        goto error0;
3322#endif
3323                /* Grab the current block number, for future use. */
3324                xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
3325
3326                /*
3327                 * If right block is full enough so that removing one entry
3328                 * won't make it too empty, and left-shifting an entry out
3329                 * of right to us works, we're done.
3330                 */
3331                if (xfs_btree_get_numrecs(right) - 1 >=
3332                    cur->bc_ops->get_minrecs(tcur, level)) {
3333                        error = xfs_btree_lshift(tcur, level, &i);
3334                        if (error)
3335                                goto error0;
3336                        if (i) {
3337                                ASSERT(xfs_btree_get_numrecs(block) >=
3338                                       cur->bc_ops->get_minrecs(tcur, level));
3339
3340                                xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3341                                tcur = NULL;
3342
3343                                error = xfs_btree_dec_cursor(cur, level, stat);
3344                                if (error)
3345                                        goto error0;
3346                                return 0;
3347                        }
3348                }
3349
3350                /*
3351                 * Otherwise, grab the number of records in right for
3352                 * future reference, and fix up the temp cursor to point
3353                 * to our block again (last record).
3354                 */
3355                rrecs = xfs_btree_get_numrecs(right);
3356                if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3357                        i = xfs_btree_firstrec(tcur, level);
3358                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3359
3360                        error = xfs_btree_decrement(tcur, level, &i);
3361                        if (error)
3362                                goto error0;
3363                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3364                }
3365        }
3366
3367        /*
3368         * If there's a left sibling, see if it's ok to shift an entry
3369         * out of it.
3370         */
3371        if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3372                /*
3373                 * Move the temp cursor to the first entry in the
3374                 * previous block.
3375                 */
3376                i = xfs_btree_firstrec(tcur, level);
3377                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3378
3379                error = xfs_btree_decrement(tcur, level, &i);
3380                if (error)
3381                        goto error0;
3382                i = xfs_btree_firstrec(tcur, level);
3383                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3384
3385                /* Grab a pointer to the block. */
3386                left = xfs_btree_get_block(tcur, level, &lbp);
3387#ifdef DEBUG
3388                error = xfs_btree_check_block(cur, left, level, lbp);
3389                if (error)
3390                        goto error0;
3391#endif
3392                /* Grab the current block number, for future use. */
3393                xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
3394
3395                /*
3396                 * If left block is full enough so that removing one entry
3397                 * won't make it too empty, and right-shifting an entry out
3398                 * of left to us works, we're done.
3399                 */
3400                if (xfs_btree_get_numrecs(left) - 1 >=
3401                    cur->bc_ops->get_minrecs(tcur, level)) {
3402                        error = xfs_btree_rshift(tcur, level, &i);
3403                        if (error)
3404                                goto error0;
3405                        if (i) {
3406                                ASSERT(xfs_btree_get_numrecs(block) >=
3407                                       cur->bc_ops->get_minrecs(tcur, level));
3408                                xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3409                                tcur = NULL;
3410                                if (level == 0)
3411                                        cur->bc_ptrs[0]++;
3412                                XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3413                                *stat = 1;
3414                                return 0;
3415                        }
3416                }
3417
3418                /*
3419                 * Otherwise, grab the number of records in right for
3420                 * future reference.
3421                 */
3422                lrecs = xfs_btree_get_numrecs(left);
3423        }
3424
3425        /* Delete the temp cursor, we're done with it. */
3426        xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3427        tcur = NULL;
3428
3429        /* If here, we need to do a join to keep the tree balanced. */
3430        ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
3431
3432        if (!xfs_btree_ptr_is_null(cur, &lptr) &&
3433            lrecs + xfs_btree_get_numrecs(block) <=
3434                        cur->bc_ops->get_maxrecs(cur, level)) {
3435                /*
3436                 * Set "right" to be the starting block,
3437                 * "left" to be the left neighbor.
3438                 */
3439                rptr = cptr;
3440                right = block;
3441                rbp = bp;
3442                error = xfs_btree_read_buf_block(cur, &lptr, level,
3443                                                        0, &left, &lbp);
3444                if (error)
3445                        goto error0;
3446
3447        /*
3448         * If that won't work, see if we can join with the right neighbor block.
3449         */
3450        } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
3451                   rrecs + xfs_btree_get_numrecs(block) <=
3452                        cur->bc_ops->get_maxrecs(cur, level)) {
3453                /*
3454                 * Set "left" to be the starting block,
3455                 * "right" to be the right neighbor.
3456                 */
3457                lptr = cptr;
3458                left = block;
3459                lbp = bp;
3460                error = xfs_btree_read_buf_block(cur, &rptr, level,
3461                                                        0, &right, &rbp);
3462                if (error)
3463                        goto error0;
3464
3465        /*
3466         * Otherwise, we can't fix the imbalance.
3467         * Just return.  This is probably a logic error, but it's not fatal.
3468         */
3469        } else {
3470                error = xfs_btree_dec_cursor(cur, level, stat);
3471                if (error)
3472                        goto error0;
3473                return 0;
3474        }
3475
3476        rrecs = xfs_btree_get_numrecs(right);
3477        lrecs = xfs_btree_get_numrecs(left);
3478
3479        /*
3480         * We're now going to join "left" and "right" by moving all the stuff
3481         * in "right" to "left" and deleting "right".
3482         */
3483        XFS_BTREE_STATS_ADD(cur, moves, rrecs);
3484        if (level > 0) {
3485                /* It's a non-leaf.  Move keys and pointers. */
3486                union xfs_btree_key     *lkp;   /* left btree key */
3487                union xfs_btree_ptr     *lpp;   /* left address pointer */
3488                union xfs_btree_key     *rkp;   /* right btree key */
3489                union xfs_btree_ptr     *rpp;   /* right address pointer */
3490
3491                lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
3492                lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
3493                rkp = xfs_btree_key_addr(cur, 1, right);
3494                rpp = xfs_btree_ptr_addr(cur, 1, right);
3495#ifdef DEBUG
3496                for (i = 1; i < rrecs; i++) {
3497                        error = xfs_btree_check_ptr(cur, rpp, i, level);
3498                        if (error)
3499                                goto error0;
3500                }
3501#endif
3502                xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
3503                xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
3504
3505                xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
3506                xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
3507        } else {
3508                /* It's a leaf.  Move records.  */
3509                union xfs_btree_rec     *lrp;   /* left record pointer */
3510                union xfs_btree_rec     *rrp;   /* right record pointer */
3511
3512                lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
3513                rrp = xfs_btree_rec_addr(cur, 1, right);
3514
3515                xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
3516                xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
3517        }
3518
3519        XFS_BTREE_STATS_INC(cur, join);
3520
3521        /*
3522         * Fix up the number of records and right block pointer in the
3523         * surviving block, and log it.
3524         */
3525        xfs_btree_set_numrecs(left, lrecs + rrecs);
3526        xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
3527        xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3528        xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
3529
3530        /* If there is a right sibling, point it to the remaining block. */
3531        xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3532        if (!xfs_btree_ptr_is_null(cur, &cptr)) {
3533                error = xfs_btree_read_buf_block(cur, &cptr, level,
3534                                                        0, &rrblock, &rrbp);
3535                if (error)
3536                        goto error0;
3537                xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
3538                xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
3539        }
3540
3541        /* Free the deleted block. */
3542        error = cur->bc_ops->free_block(cur, rbp);
3543        if (error)
3544                goto error0;
3545        XFS_BTREE_STATS_INC(cur, free);
3546
3547        /*
3548         * If we joined with the left neighbor, set the buffer in the
3549         * cursor to the left block, and fix up the index.
3550         */
3551        if (bp != lbp) {
3552                cur->bc_bufs[level] = lbp;
3553                cur->bc_ptrs[level] += lrecs;
3554                cur->bc_ra[level] = 0;
3555        }
3556        /*
3557         * If we joined with the right neighbor and there's a level above
3558         * us, increment the cursor at that level.
3559         */
3560        else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
3561                   (level + 1 < cur->bc_nlevels)) {
3562                error = xfs_btree_increment(cur, level + 1, &i);
3563                if (error)
3564                        goto error0;
3565        }
3566
3567        /*
3568         * Readjust the ptr at this level if it's not a leaf, since it's
3569         * still pointing at the deletion point, which makes the cursor
3570         * inconsistent.  If this makes the ptr 0, the caller fixes it up.
3571         * We can't use decrement because it would change the next level up.
3572         */
3573        if (level > 0)
3574                cur->bc_ptrs[level]--;
3575
3576        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3577        /* Return value means the next level up has something to do. */
3578        *stat = 2;
3579        return 0;
3580
3581error0:
3582        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3583        if (tcur)
3584                xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
3585        return error;
3586}
3587
3588/*
3589 * Delete the record pointed to by cur.
3590 * The cursor refers to the place where the record was (could be inserted)
3591 * when the operation returns.
3592 */
3593int                                     /* error */
3594xfs_btree_delete(
3595        struct xfs_btree_cur    *cur,
3596        int                     *stat)  /* success/failure */
3597{
3598        int                     error;  /* error return value */
3599        int                     level;
3600        int                     i;
3601
3602        XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3603
3604        /*
3605         * Go up the tree, starting at leaf level.
3606         *
3607         * If 2 is returned then a join was done; go to the next level.
3608         * Otherwise we are done.
3609         */
3610        for (level = 0, i = 2; i == 2; level++) {
3611                error = xfs_btree_delrec(cur, level, &i);
3612                if (error)
3613                        goto error0;
3614        }
3615
3616        if (i == 0) {
3617                for (level = 1; level < cur->bc_nlevels; level++) {
3618                        if (cur->bc_ptrs[level] == 0) {
3619                                error = xfs_btree_decrement(cur, level, &i);
3620                                if (error)
3621                                        goto error0;
3622                                break;
3623                        }
3624                }
3625        }
3626
3627        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3628        *stat = i;
3629        return 0;
3630error0:
3631        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3632        return error;
3633}
3634
3635/*
3636 * Get the data from the pointed-to record.
3637 */
3638int                                     /* error */
3639xfs_btree_get_rec(
3640        struct xfs_btree_cur    *cur,   /* btree cursor */
3641        union xfs_btree_rec     **recp, /* output: btree record */
3642        int                     *stat)  /* output: success/failure */
3643{
3644        struct xfs_btree_block  *block; /* btree block */
3645        struct xfs_buf          *bp;    /* buffer pointer */
3646        int                     ptr;    /* record number */
3647#ifdef DEBUG
3648        int                     error;  /* error return value */
3649#endif
3650
3651        ptr = cur->bc_ptrs[0];
3652        block = xfs_btree_get_block(cur, 0, &bp);
3653
3654#ifdef DEBUG
3655        error = xfs_btree_check_block(cur, block, 0, bp);
3656        if (error)
3657                return error;
3658#endif
3659
3660        /*
3661         * Off the right end or left end, return failure.
3662         */
3663        if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
3664                *stat = 0;
3665                return 0;
3666        }
3667
3668        /*
3669         * Point to the record and extract its data.
3670         */
3671        *recp = xfs_btree_rec_addr(cur, ptr, block);
3672        *stat = 1;
3673        return 0;
3674}
3675