linux/fs/xfs/libxfs/xfs_iext_tree.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2017 Christoph Hellwig.
   4 */
   5
   6#include "xfs.h"
   7#include "xfs_shared.h"
   8#include "xfs_format.h"
   9#include "xfs_bit.h"
  10#include "xfs_log_format.h"
  11#include "xfs_trans_resv.h"
  12#include "xfs_mount.h"
  13#include "xfs_inode.h"
  14#include "xfs_trace.h"
  15
  16/*
  17 * In-core extent record layout:
  18 *
  19 * +-------+----------------------------+
  20 * | 00:53 | all 54 bits of startoff    |
  21 * | 54:63 | low 10 bits of startblock  |
  22 * +-------+----------------------------+
  23 * | 00:20 | all 21 bits of length      |
  24 * |    21 | unwritten extent bit       |
  25 * | 22:63 | high 42 bits of startblock |
  26 * +-------+----------------------------+
  27 */
  28#define XFS_IEXT_STARTOFF_MASK          xfs_mask64lo(BMBT_STARTOFF_BITLEN)
  29#define XFS_IEXT_LENGTH_MASK            xfs_mask64lo(BMBT_BLOCKCOUNT_BITLEN)
  30#define XFS_IEXT_STARTBLOCK_MASK        xfs_mask64lo(BMBT_STARTBLOCK_BITLEN)
  31
  32struct xfs_iext_rec {
  33        uint64_t                        lo;
  34        uint64_t                        hi;
  35};
  36
  37/*
  38 * Given that the length can't be a zero, only an empty hi value indicates an
  39 * unused record.
  40 */
  41static bool xfs_iext_rec_is_empty(struct xfs_iext_rec *rec)
  42{
  43        return rec->hi == 0;
  44}
  45
  46static inline void xfs_iext_rec_clear(struct xfs_iext_rec *rec)
  47{
  48        rec->lo = 0;
  49        rec->hi = 0;
  50}
  51
  52static void
  53xfs_iext_set(
  54        struct xfs_iext_rec     *rec,
  55        struct xfs_bmbt_irec    *irec)
  56{
  57        ASSERT((irec->br_startoff & ~XFS_IEXT_STARTOFF_MASK) == 0);
  58        ASSERT((irec->br_blockcount & ~XFS_IEXT_LENGTH_MASK) == 0);
  59        ASSERT((irec->br_startblock & ~XFS_IEXT_STARTBLOCK_MASK) == 0);
  60
  61        rec->lo = irec->br_startoff & XFS_IEXT_STARTOFF_MASK;
  62        rec->hi = irec->br_blockcount & XFS_IEXT_LENGTH_MASK;
  63
  64        rec->lo |= (irec->br_startblock << 54);
  65        rec->hi |= ((irec->br_startblock & ~xfs_mask64lo(10)) << (22 - 10));
  66
  67        if (irec->br_state == XFS_EXT_UNWRITTEN)
  68                rec->hi |= (1 << 21);
  69}
  70
  71static void
  72xfs_iext_get(
  73        struct xfs_bmbt_irec    *irec,
  74        struct xfs_iext_rec     *rec)
  75{
  76        irec->br_startoff = rec->lo & XFS_IEXT_STARTOFF_MASK;
  77        irec->br_blockcount = rec->hi & XFS_IEXT_LENGTH_MASK;
  78
  79        irec->br_startblock = rec->lo >> 54;
  80        irec->br_startblock |= (rec->hi & xfs_mask64hi(42)) >> (22 - 10);
  81
  82        if (rec->hi & (1 << 21))
  83                irec->br_state = XFS_EXT_UNWRITTEN;
  84        else
  85                irec->br_state = XFS_EXT_NORM;
  86}
  87
  88enum {
  89        NODE_SIZE       = 256,
  90        KEYS_PER_NODE   = NODE_SIZE / (sizeof(uint64_t) + sizeof(void *)),
  91        RECS_PER_LEAF   = (NODE_SIZE - (2 * sizeof(struct xfs_iext_leaf *))) /
  92                                sizeof(struct xfs_iext_rec),
  93};
  94
  95/*
  96 * In-core extent btree block layout:
  97 *
  98 * There are two types of blocks in the btree: leaf and inner (non-leaf) blocks.
  99 *
 100 * The leaf blocks are made up by %KEYS_PER_NODE extent records, which each
 101 * contain the startoffset, blockcount, startblock and unwritten extent flag.
 102 * See above for the exact format, followed by pointers to the previous and next
 103 * leaf blocks (if there are any).
 104 *
 105 * The inner (non-leaf) blocks first contain KEYS_PER_NODE lookup keys, followed
 106 * by an equal number of pointers to the btree blocks at the next lower level.
 107 *
 108 *              +-------+-------+-------+-------+-------+----------+----------+
 109 * Leaf:        | rec 1 | rec 2 | rec 3 | rec 4 | rec N | prev-ptr | next-ptr |
 110 *              +-------+-------+-------+-------+-------+----------+----------+
 111 *
 112 *              +-------+-------+-------+-------+-------+-------+------+-------+
 113 * Inner:       | key 1 | key 2 | key 3 | key N | ptr 1 | ptr 2 | ptr3 | ptr N |
 114 *              +-------+-------+-------+-------+-------+-------+------+-------+
 115 */
 116struct xfs_iext_node {
 117        uint64_t                keys[KEYS_PER_NODE];
 118#define XFS_IEXT_KEY_INVALID    (1ULL << 63)
 119        void                    *ptrs[KEYS_PER_NODE];
 120};
 121
 122struct xfs_iext_leaf {
 123        struct xfs_iext_rec     recs[RECS_PER_LEAF];
 124        struct xfs_iext_leaf    *prev;
 125        struct xfs_iext_leaf    *next;
 126};
 127
 128inline xfs_extnum_t xfs_iext_count(struct xfs_ifork *ifp)
 129{
 130        return ifp->if_bytes / sizeof(struct xfs_iext_rec);
 131}
 132
 133static inline int xfs_iext_max_recs(struct xfs_ifork *ifp)
 134{
 135        if (ifp->if_height == 1)
 136                return xfs_iext_count(ifp);
 137        return RECS_PER_LEAF;
 138}
 139
 140static inline struct xfs_iext_rec *cur_rec(struct xfs_iext_cursor *cur)
 141{
 142        return &cur->leaf->recs[cur->pos];
 143}
 144
 145static inline bool xfs_iext_valid(struct xfs_ifork *ifp,
 146                struct xfs_iext_cursor *cur)
 147{
 148        if (!cur->leaf)
 149                return false;
 150        if (cur->pos < 0 || cur->pos >= xfs_iext_max_recs(ifp))
 151                return false;
 152        if (xfs_iext_rec_is_empty(cur_rec(cur)))
 153                return false;
 154        return true;
 155}
 156
 157static void *
 158xfs_iext_find_first_leaf(
 159        struct xfs_ifork        *ifp)
 160{
 161        struct xfs_iext_node    *node = ifp->if_u1.if_root;
 162        int                     height;
 163
 164        if (!ifp->if_height)
 165                return NULL;
 166
 167        for (height = ifp->if_height; height > 1; height--) {
 168                node = node->ptrs[0];
 169                ASSERT(node);
 170        }
 171
 172        return node;
 173}
 174
 175static void *
 176xfs_iext_find_last_leaf(
 177        struct xfs_ifork        *ifp)
 178{
 179        struct xfs_iext_node    *node = ifp->if_u1.if_root;
 180        int                     height, i;
 181
 182        if (!ifp->if_height)
 183                return NULL;
 184
 185        for (height = ifp->if_height; height > 1; height--) {
 186                for (i = 1; i < KEYS_PER_NODE; i++)
 187                        if (!node->ptrs[i])
 188                                break;
 189                node = node->ptrs[i - 1];
 190                ASSERT(node);
 191        }
 192
 193        return node;
 194}
 195
 196void
 197xfs_iext_first(
 198        struct xfs_ifork        *ifp,
 199        struct xfs_iext_cursor  *cur)
 200{
 201        cur->pos = 0;
 202        cur->leaf = xfs_iext_find_first_leaf(ifp);
 203}
 204
 205void
 206xfs_iext_last(
 207        struct xfs_ifork        *ifp,
 208        struct xfs_iext_cursor  *cur)
 209{
 210        int                     i;
 211
 212        cur->leaf = xfs_iext_find_last_leaf(ifp);
 213        if (!cur->leaf) {
 214                cur->pos = 0;
 215                return;
 216        }
 217
 218        for (i = 1; i < xfs_iext_max_recs(ifp); i++) {
 219                if (xfs_iext_rec_is_empty(&cur->leaf->recs[i]))
 220                        break;
 221        }
 222        cur->pos = i - 1;
 223}
 224
 225void
 226xfs_iext_next(
 227        struct xfs_ifork        *ifp,
 228        struct xfs_iext_cursor  *cur)
 229{
 230        if (!cur->leaf) {
 231                ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF);
 232                xfs_iext_first(ifp, cur);
 233                return;
 234        }
 235
 236        ASSERT(cur->pos >= 0);
 237        ASSERT(cur->pos < xfs_iext_max_recs(ifp));
 238
 239        cur->pos++;
 240        if (ifp->if_height > 1 && !xfs_iext_valid(ifp, cur) &&
 241            cur->leaf->next) {
 242                cur->leaf = cur->leaf->next;
 243                cur->pos = 0;
 244        }
 245}
 246
 247void
 248xfs_iext_prev(
 249        struct xfs_ifork        *ifp,
 250        struct xfs_iext_cursor  *cur)
 251{
 252        if (!cur->leaf) {
 253                ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF);
 254                xfs_iext_last(ifp, cur);
 255                return;
 256        }
 257
 258        ASSERT(cur->pos >= 0);
 259        ASSERT(cur->pos <= RECS_PER_LEAF);
 260
 261recurse:
 262        do {
 263                cur->pos--;
 264                if (xfs_iext_valid(ifp, cur))
 265                        return;
 266        } while (cur->pos > 0);
 267
 268        if (ifp->if_height > 1 && cur->leaf->prev) {
 269                cur->leaf = cur->leaf->prev;
 270                cur->pos = RECS_PER_LEAF;
 271                goto recurse;
 272        }
 273}
 274
 275static inline int
 276xfs_iext_key_cmp(
 277        struct xfs_iext_node    *node,
 278        int                     n,
 279        xfs_fileoff_t           offset)
 280{
 281        if (node->keys[n] > offset)
 282                return 1;
 283        if (node->keys[n] < offset)
 284                return -1;
 285        return 0;
 286}
 287
 288static inline int
 289xfs_iext_rec_cmp(
 290        struct xfs_iext_rec     *rec,
 291        xfs_fileoff_t           offset)
 292{
 293        uint64_t                rec_offset = rec->lo & XFS_IEXT_STARTOFF_MASK;
 294        uint32_t                rec_len = rec->hi & XFS_IEXT_LENGTH_MASK;
 295
 296        if (rec_offset > offset)
 297                return 1;
 298        if (rec_offset + rec_len <= offset)
 299                return -1;
 300        return 0;
 301}
 302
 303static void *
 304xfs_iext_find_level(
 305        struct xfs_ifork        *ifp,
 306        xfs_fileoff_t           offset,
 307        int                     level)
 308{
 309        struct xfs_iext_node    *node = ifp->if_u1.if_root;
 310        int                     height, i;
 311
 312        if (!ifp->if_height)
 313                return NULL;
 314
 315        for (height = ifp->if_height; height > level; height--) {
 316                for (i = 1; i < KEYS_PER_NODE; i++)
 317                        if (xfs_iext_key_cmp(node, i, offset) > 0)
 318                                break;
 319
 320                node = node->ptrs[i - 1];
 321                if (!node)
 322                        break;
 323        }
 324
 325        return node;
 326}
 327
 328static int
 329xfs_iext_node_pos(
 330        struct xfs_iext_node    *node,
 331        xfs_fileoff_t           offset)
 332{
 333        int                     i;
 334
 335        for (i = 1; i < KEYS_PER_NODE; i++) {
 336                if (xfs_iext_key_cmp(node, i, offset) > 0)
 337                        break;
 338        }
 339
 340        return i - 1;
 341}
 342
 343static int
 344xfs_iext_node_insert_pos(
 345        struct xfs_iext_node    *node,
 346        xfs_fileoff_t           offset)
 347{
 348        int                     i;
 349
 350        for (i = 0; i < KEYS_PER_NODE; i++) {
 351                if (xfs_iext_key_cmp(node, i, offset) > 0)
 352                        return i;
 353        }
 354
 355        return KEYS_PER_NODE;
 356}
 357
 358static int
 359xfs_iext_node_nr_entries(
 360        struct xfs_iext_node    *node,
 361        int                     start)
 362{
 363        int                     i;
 364
 365        for (i = start; i < KEYS_PER_NODE; i++) {
 366                if (node->keys[i] == XFS_IEXT_KEY_INVALID)
 367                        break;
 368        }
 369
 370        return i;
 371}
 372
 373static int
 374xfs_iext_leaf_nr_entries(
 375        struct xfs_ifork        *ifp,
 376        struct xfs_iext_leaf    *leaf,
 377        int                     start)
 378{
 379        int                     i;
 380
 381        for (i = start; i < xfs_iext_max_recs(ifp); i++) {
 382                if (xfs_iext_rec_is_empty(&leaf->recs[i]))
 383                        break;
 384        }
 385
 386        return i;
 387}
 388
 389static inline uint64_t
 390xfs_iext_leaf_key(
 391        struct xfs_iext_leaf    *leaf,
 392        int                     n)
 393{
 394        return leaf->recs[n].lo & XFS_IEXT_STARTOFF_MASK;
 395}
 396
 397static void
 398xfs_iext_grow(
 399        struct xfs_ifork        *ifp)
 400{
 401        struct xfs_iext_node    *node = kmem_zalloc(NODE_SIZE, KM_NOFS);
 402        int                     i;
 403
 404        if (ifp->if_height == 1) {
 405                struct xfs_iext_leaf *prev = ifp->if_u1.if_root;
 406
 407                node->keys[0] = xfs_iext_leaf_key(prev, 0);
 408                node->ptrs[0] = prev;
 409        } else  {
 410                struct xfs_iext_node *prev = ifp->if_u1.if_root;
 411
 412                ASSERT(ifp->if_height > 1);
 413
 414                node->keys[0] = prev->keys[0];
 415                node->ptrs[0] = prev;
 416        }
 417
 418        for (i = 1; i < KEYS_PER_NODE; i++)
 419                node->keys[i] = XFS_IEXT_KEY_INVALID;
 420
 421        ifp->if_u1.if_root = node;
 422        ifp->if_height++;
 423}
 424
 425static void
 426xfs_iext_update_node(
 427        struct xfs_ifork        *ifp,
 428        xfs_fileoff_t           old_offset,
 429        xfs_fileoff_t           new_offset,
 430        int                     level,
 431        void                    *ptr)
 432{
 433        struct xfs_iext_node    *node = ifp->if_u1.if_root;
 434        int                     height, i;
 435
 436        for (height = ifp->if_height; height > level; height--) {
 437                for (i = 0; i < KEYS_PER_NODE; i++) {
 438                        if (i > 0 && xfs_iext_key_cmp(node, i, old_offset) > 0)
 439                                break;
 440                        if (node->keys[i] == old_offset)
 441                                node->keys[i] = new_offset;
 442                }
 443                node = node->ptrs[i - 1];
 444                ASSERT(node);
 445        }
 446
 447        ASSERT(node == ptr);
 448}
 449
 450static struct xfs_iext_node *
 451xfs_iext_split_node(
 452        struct xfs_iext_node    **nodep,
 453        int                     *pos,
 454        int                     *nr_entries)
 455{
 456        struct xfs_iext_node    *node = *nodep;
 457        struct xfs_iext_node    *new = kmem_zalloc(NODE_SIZE, KM_NOFS);
 458        const int               nr_move = KEYS_PER_NODE / 2;
 459        int                     nr_keep = nr_move + (KEYS_PER_NODE & 1);
 460        int                     i = 0;
 461
 462        /* for sequential append operations just spill over into the new node */
 463        if (*pos == KEYS_PER_NODE) {
 464                *nodep = new;
 465                *pos = 0;
 466                *nr_entries = 0;
 467                goto done;
 468        }
 469
 470
 471        for (i = 0; i < nr_move; i++) {
 472                new->keys[i] = node->keys[nr_keep + i];
 473                new->ptrs[i] = node->ptrs[nr_keep + i];
 474
 475                node->keys[nr_keep + i] = XFS_IEXT_KEY_INVALID;
 476                node->ptrs[nr_keep + i] = NULL;
 477        }
 478
 479        if (*pos >= nr_keep) {
 480                *nodep = new;
 481                *pos -= nr_keep;
 482                *nr_entries = nr_move;
 483        } else {
 484                *nr_entries = nr_keep;
 485        }
 486done:
 487        for (; i < KEYS_PER_NODE; i++)
 488                new->keys[i] = XFS_IEXT_KEY_INVALID;
 489        return new;
 490}
 491
 492static void
 493xfs_iext_insert_node(
 494        struct xfs_ifork        *ifp,
 495        uint64_t                offset,
 496        void                    *ptr,
 497        int                     level)
 498{
 499        struct xfs_iext_node    *node, *new;
 500        int                     i, pos, nr_entries;
 501
 502again:
 503        if (ifp->if_height < level)
 504                xfs_iext_grow(ifp);
 505
 506        new = NULL;
 507        node = xfs_iext_find_level(ifp, offset, level);
 508        pos = xfs_iext_node_insert_pos(node, offset);
 509        nr_entries = xfs_iext_node_nr_entries(node, pos);
 510
 511        ASSERT(pos >= nr_entries || xfs_iext_key_cmp(node, pos, offset) != 0);
 512        ASSERT(nr_entries <= KEYS_PER_NODE);
 513
 514        if (nr_entries == KEYS_PER_NODE)
 515                new = xfs_iext_split_node(&node, &pos, &nr_entries);
 516
 517        /*
 518         * Update the pointers in higher levels if the first entry changes
 519         * in an existing node.
 520         */
 521        if (node != new && pos == 0 && nr_entries > 0)
 522                xfs_iext_update_node(ifp, node->keys[0], offset, level, node);
 523
 524        for (i = nr_entries; i > pos; i--) {
 525                node->keys[i] = node->keys[i - 1];
 526                node->ptrs[i] = node->ptrs[i - 1];
 527        }
 528        node->keys[pos] = offset;
 529        node->ptrs[pos] = ptr;
 530
 531        if (new) {
 532                offset = new->keys[0];
 533                ptr = new;
 534                level++;
 535                goto again;
 536        }
 537}
 538
 539static struct xfs_iext_leaf *
 540xfs_iext_split_leaf(
 541        struct xfs_iext_cursor  *cur,
 542        int                     *nr_entries)
 543{
 544        struct xfs_iext_leaf    *leaf = cur->leaf;
 545        struct xfs_iext_leaf    *new = kmem_zalloc(NODE_SIZE, KM_NOFS);
 546        const int               nr_move = RECS_PER_LEAF / 2;
 547        int                     nr_keep = nr_move + (RECS_PER_LEAF & 1);
 548        int                     i;
 549
 550        /* for sequential append operations just spill over into the new node */
 551        if (cur->pos == RECS_PER_LEAF) {
 552                cur->leaf = new;
 553                cur->pos = 0;
 554                *nr_entries = 0;
 555                goto done;
 556        }
 557
 558        for (i = 0; i < nr_move; i++) {
 559                new->recs[i] = leaf->recs[nr_keep + i];
 560                xfs_iext_rec_clear(&leaf->recs[nr_keep + i]);
 561        }
 562
 563        if (cur->pos >= nr_keep) {
 564                cur->leaf = new;
 565                cur->pos -= nr_keep;
 566                *nr_entries = nr_move;
 567        } else {
 568                *nr_entries = nr_keep;
 569        }
 570done:
 571        if (leaf->next)
 572                leaf->next->prev = new;
 573        new->next = leaf->next;
 574        new->prev = leaf;
 575        leaf->next = new;
 576        return new;
 577}
 578
 579static void
 580xfs_iext_alloc_root(
 581        struct xfs_ifork        *ifp,
 582        struct xfs_iext_cursor  *cur)
 583{
 584        ASSERT(ifp->if_bytes == 0);
 585
 586        ifp->if_u1.if_root = kmem_zalloc(sizeof(struct xfs_iext_rec), KM_NOFS);
 587        ifp->if_height = 1;
 588
 589        /* now that we have a node step into it */
 590        cur->leaf = ifp->if_u1.if_root;
 591        cur->pos = 0;
 592}
 593
 594static void
 595xfs_iext_realloc_root(
 596        struct xfs_ifork        *ifp,
 597        struct xfs_iext_cursor  *cur)
 598{
 599        int64_t new_size = ifp->if_bytes + sizeof(struct xfs_iext_rec);
 600        void *new;
 601
 602        /* account for the prev/next pointers */
 603        if (new_size / sizeof(struct xfs_iext_rec) == RECS_PER_LEAF)
 604                new_size = NODE_SIZE;
 605
 606        new = krealloc(ifp->if_u1.if_root, new_size, GFP_NOFS | __GFP_NOFAIL);
 607        memset(new + ifp->if_bytes, 0, new_size - ifp->if_bytes);
 608        ifp->if_u1.if_root = new;
 609        cur->leaf = new;
 610}
 611
 612/*
 613 * Increment the sequence counter on extent tree changes. If we are on a COW
 614 * fork, this allows the writeback code to skip looking for a COW extent if the
 615 * COW fork hasn't changed. We use WRITE_ONCE here to ensure the update to the
 616 * sequence counter is seen before the modifications to the extent tree itself
 617 * take effect.
 618 */
 619static inline void xfs_iext_inc_seq(struct xfs_ifork *ifp)
 620{
 621        WRITE_ONCE(ifp->if_seq, READ_ONCE(ifp->if_seq) + 1);
 622}
 623
 624void
 625xfs_iext_insert(
 626        struct xfs_inode        *ip,
 627        struct xfs_iext_cursor  *cur,
 628        struct xfs_bmbt_irec    *irec,
 629        int                     state)
 630{
 631        struct xfs_ifork        *ifp = xfs_iext_state_to_fork(ip, state);
 632        xfs_fileoff_t           offset = irec->br_startoff;
 633        struct xfs_iext_leaf    *new = NULL;
 634        int                     nr_entries, i;
 635
 636        xfs_iext_inc_seq(ifp);
 637
 638        if (ifp->if_height == 0)
 639                xfs_iext_alloc_root(ifp, cur);
 640        else if (ifp->if_height == 1)
 641                xfs_iext_realloc_root(ifp, cur);
 642
 643        nr_entries = xfs_iext_leaf_nr_entries(ifp, cur->leaf, cur->pos);
 644        ASSERT(nr_entries <= RECS_PER_LEAF);
 645        ASSERT(cur->pos >= nr_entries ||
 646               xfs_iext_rec_cmp(cur_rec(cur), irec->br_startoff) != 0);
 647
 648        if (nr_entries == RECS_PER_LEAF)
 649                new = xfs_iext_split_leaf(cur, &nr_entries);
 650
 651        /*
 652         * Update the pointers in higher levels if the first entry changes
 653         * in an existing node.
 654         */
 655        if (cur->leaf != new && cur->pos == 0 && nr_entries > 0) {
 656                xfs_iext_update_node(ifp, xfs_iext_leaf_key(cur->leaf, 0),
 657                                offset, 1, cur->leaf);
 658        }
 659
 660        for (i = nr_entries; i > cur->pos; i--)
 661                cur->leaf->recs[i] = cur->leaf->recs[i - 1];
 662        xfs_iext_set(cur_rec(cur), irec);
 663        ifp->if_bytes += sizeof(struct xfs_iext_rec);
 664
 665        trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
 666
 667        if (new)
 668                xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
 669}
 670
 671static struct xfs_iext_node *
 672xfs_iext_rebalance_node(
 673        struct xfs_iext_node    *parent,
 674        int                     *pos,
 675        struct xfs_iext_node    *node,
 676        int                     nr_entries)
 677{
 678        /*
 679         * If the neighbouring nodes are completely full, or have different
 680         * parents, we might never be able to merge our node, and will only
 681         * delete it once the number of entries hits zero.
 682         */
 683        if (nr_entries == 0)
 684                return node;
 685
 686        if (*pos > 0) {
 687                struct xfs_iext_node *prev = parent->ptrs[*pos - 1];
 688                int nr_prev = xfs_iext_node_nr_entries(prev, 0), i;
 689
 690                if (nr_prev + nr_entries <= KEYS_PER_NODE) {
 691                        for (i = 0; i < nr_entries; i++) {
 692                                prev->keys[nr_prev + i] = node->keys[i];
 693                                prev->ptrs[nr_prev + i] = node->ptrs[i];
 694                        }
 695                        return node;
 696                }
 697        }
 698
 699        if (*pos + 1 < xfs_iext_node_nr_entries(parent, *pos)) {
 700                struct xfs_iext_node *next = parent->ptrs[*pos + 1];
 701                int nr_next = xfs_iext_node_nr_entries(next, 0), i;
 702
 703                if (nr_entries + nr_next <= KEYS_PER_NODE) {
 704                        /*
 705                         * Merge the next node into this node so that we don't
 706                         * have to do an additional update of the keys in the
 707                         * higher levels.
 708                         */
 709                        for (i = 0; i < nr_next; i++) {
 710                                node->keys[nr_entries + i] = next->keys[i];
 711                                node->ptrs[nr_entries + i] = next->ptrs[i];
 712                        }
 713
 714                        ++*pos;
 715                        return next;
 716                }
 717        }
 718
 719        return NULL;
 720}
 721
 722static void
 723xfs_iext_remove_node(
 724        struct xfs_ifork        *ifp,
 725        xfs_fileoff_t           offset,
 726        void                    *victim)
 727{
 728        struct xfs_iext_node    *node, *parent;
 729        int                     level = 2, pos, nr_entries, i;
 730
 731        ASSERT(level <= ifp->if_height);
 732        node = xfs_iext_find_level(ifp, offset, level);
 733        pos = xfs_iext_node_pos(node, offset);
 734again:
 735        ASSERT(node->ptrs[pos]);
 736        ASSERT(node->ptrs[pos] == victim);
 737        kmem_free(victim);
 738
 739        nr_entries = xfs_iext_node_nr_entries(node, pos) - 1;
 740        offset = node->keys[0];
 741        for (i = pos; i < nr_entries; i++) {
 742                node->keys[i] = node->keys[i + 1];
 743                node->ptrs[i] = node->ptrs[i + 1];
 744        }
 745        node->keys[nr_entries] = XFS_IEXT_KEY_INVALID;
 746        node->ptrs[nr_entries] = NULL;
 747
 748        if (pos == 0 && nr_entries > 0) {
 749                xfs_iext_update_node(ifp, offset, node->keys[0], level, node);
 750                offset = node->keys[0];
 751        }
 752
 753        if (nr_entries >= KEYS_PER_NODE / 2)
 754                return;
 755
 756        if (level < ifp->if_height) {
 757                /*
 758                 * If we aren't at the root yet try to find a neighbour node to
 759                 * merge with (or delete the node if it is empty), and then
 760                 * recurse up to the next level.
 761                 */
 762                level++;
 763                parent = xfs_iext_find_level(ifp, offset, level);
 764                pos = xfs_iext_node_pos(parent, offset);
 765
 766                ASSERT(pos != KEYS_PER_NODE);
 767                ASSERT(parent->ptrs[pos] == node);
 768
 769                node = xfs_iext_rebalance_node(parent, &pos, node, nr_entries);
 770                if (node) {
 771                        victim = node;
 772                        node = parent;
 773                        goto again;
 774                }
 775        } else if (nr_entries == 1) {
 776                /*
 777                 * If we are at the root and only one entry is left we can just
 778                 * free this node and update the root pointer.
 779                 */
 780                ASSERT(node == ifp->if_u1.if_root);
 781                ifp->if_u1.if_root = node->ptrs[0];
 782                ifp->if_height--;
 783                kmem_free(node);
 784        }
 785}
 786
 787static void
 788xfs_iext_rebalance_leaf(
 789        struct xfs_ifork        *ifp,
 790        struct xfs_iext_cursor  *cur,
 791        struct xfs_iext_leaf    *leaf,
 792        xfs_fileoff_t           offset,
 793        int                     nr_entries)
 794{
 795        /*
 796         * If the neighbouring nodes are completely full we might never be able
 797         * to merge our node, and will only delete it once the number of
 798         * entries hits zero.
 799         */
 800        if (nr_entries == 0)
 801                goto remove_node;
 802
 803        if (leaf->prev) {
 804                int nr_prev = xfs_iext_leaf_nr_entries(ifp, leaf->prev, 0), i;
 805
 806                if (nr_prev + nr_entries <= RECS_PER_LEAF) {
 807                        for (i = 0; i < nr_entries; i++)
 808                                leaf->prev->recs[nr_prev + i] = leaf->recs[i];
 809
 810                        if (cur->leaf == leaf) {
 811                                cur->leaf = leaf->prev;
 812                                cur->pos += nr_prev;
 813                        }
 814                        goto remove_node;
 815                }
 816        }
 817
 818        if (leaf->next) {
 819                int nr_next = xfs_iext_leaf_nr_entries(ifp, leaf->next, 0), i;
 820
 821                if (nr_entries + nr_next <= RECS_PER_LEAF) {
 822                        /*
 823                         * Merge the next node into this node so that we don't
 824                         * have to do an additional update of the keys in the
 825                         * higher levels.
 826                         */
 827                        for (i = 0; i < nr_next; i++) {
 828                                leaf->recs[nr_entries + i] =
 829                                        leaf->next->recs[i];
 830                        }
 831
 832                        if (cur->leaf == leaf->next) {
 833                                cur->leaf = leaf;
 834                                cur->pos += nr_entries;
 835                        }
 836
 837                        offset = xfs_iext_leaf_key(leaf->next, 0);
 838                        leaf = leaf->next;
 839                        goto remove_node;
 840                }
 841        }
 842
 843        return;
 844remove_node:
 845        if (leaf->prev)
 846                leaf->prev->next = leaf->next;
 847        if (leaf->next)
 848                leaf->next->prev = leaf->prev;
 849        xfs_iext_remove_node(ifp, offset, leaf);
 850}
 851
 852static void
 853xfs_iext_free_last_leaf(
 854        struct xfs_ifork        *ifp)
 855{
 856        ifp->if_height--;
 857        kmem_free(ifp->if_u1.if_root);
 858        ifp->if_u1.if_root = NULL;
 859}
 860
 861void
 862xfs_iext_remove(
 863        struct xfs_inode        *ip,
 864        struct xfs_iext_cursor  *cur,
 865        int                     state)
 866{
 867        struct xfs_ifork        *ifp = xfs_iext_state_to_fork(ip, state);
 868        struct xfs_iext_leaf    *leaf = cur->leaf;
 869        xfs_fileoff_t           offset = xfs_iext_leaf_key(leaf, 0);
 870        int                     i, nr_entries;
 871
 872        trace_xfs_iext_remove(ip, cur, state, _RET_IP_);
 873
 874        ASSERT(ifp->if_height > 0);
 875        ASSERT(ifp->if_u1.if_root != NULL);
 876        ASSERT(xfs_iext_valid(ifp, cur));
 877
 878        xfs_iext_inc_seq(ifp);
 879
 880        nr_entries = xfs_iext_leaf_nr_entries(ifp, leaf, cur->pos) - 1;
 881        for (i = cur->pos; i < nr_entries; i++)
 882                leaf->recs[i] = leaf->recs[i + 1];
 883        xfs_iext_rec_clear(&leaf->recs[nr_entries]);
 884        ifp->if_bytes -= sizeof(struct xfs_iext_rec);
 885
 886        if (cur->pos == 0 && nr_entries > 0) {
 887                xfs_iext_update_node(ifp, offset, xfs_iext_leaf_key(leaf, 0), 1,
 888                                leaf);
 889                offset = xfs_iext_leaf_key(leaf, 0);
 890        } else if (cur->pos == nr_entries) {
 891                if (ifp->if_height > 1 && leaf->next)
 892                        cur->leaf = leaf->next;
 893                else
 894                        cur->leaf = NULL;
 895                cur->pos = 0;
 896        }
 897
 898        if (nr_entries >= RECS_PER_LEAF / 2)
 899                return;
 900
 901        if (ifp->if_height > 1)
 902                xfs_iext_rebalance_leaf(ifp, cur, leaf, offset, nr_entries);
 903        else if (nr_entries == 0)
 904                xfs_iext_free_last_leaf(ifp);
 905}
 906
 907/*
 908 * Lookup the extent covering bno.
 909 *
 910 * If there is an extent covering bno return the extent index, and store the
 911 * expanded extent structure in *gotp, and the extent cursor in *cur.
 912 * If there is no extent covering bno, but there is an extent after it (e.g.
 913 * it lies in a hole) return that extent in *gotp and its cursor in *cur
 914 * instead.
 915 * If bno is beyond the last extent return false, and return an invalid
 916 * cursor value.
 917 */
 918bool
 919xfs_iext_lookup_extent(
 920        struct xfs_inode        *ip,
 921        struct xfs_ifork        *ifp,
 922        xfs_fileoff_t           offset,
 923        struct xfs_iext_cursor  *cur,
 924        struct xfs_bmbt_irec    *gotp)
 925{
 926        XFS_STATS_INC(ip->i_mount, xs_look_exlist);
 927
 928        cur->leaf = xfs_iext_find_level(ifp, offset, 1);
 929        if (!cur->leaf) {
 930                cur->pos = 0;
 931                return false;
 932        }
 933
 934        for (cur->pos = 0; cur->pos < xfs_iext_max_recs(ifp); cur->pos++) {
 935                struct xfs_iext_rec *rec = cur_rec(cur);
 936
 937                if (xfs_iext_rec_is_empty(rec))
 938                        break;
 939                if (xfs_iext_rec_cmp(rec, offset) >= 0)
 940                        goto found;
 941        }
 942
 943        /* Try looking in the next node for an entry > offset */
 944        if (ifp->if_height == 1 || !cur->leaf->next)
 945                return false;
 946        cur->leaf = cur->leaf->next;
 947        cur->pos = 0;
 948        if (!xfs_iext_valid(ifp, cur))
 949                return false;
 950found:
 951        xfs_iext_get(gotp, cur_rec(cur));
 952        return true;
 953}
 954
 955/*
 956 * Returns the last extent before end, and if this extent doesn't cover
 957 * end, update end to the end of the extent.
 958 */
 959bool
 960xfs_iext_lookup_extent_before(
 961        struct xfs_inode        *ip,
 962        struct xfs_ifork        *ifp,
 963        xfs_fileoff_t           *end,
 964        struct xfs_iext_cursor  *cur,
 965        struct xfs_bmbt_irec    *gotp)
 966{
 967        /* could be optimized to not even look up the next on a match.. */
 968        if (xfs_iext_lookup_extent(ip, ifp, *end - 1, cur, gotp) &&
 969            gotp->br_startoff <= *end - 1)
 970                return true;
 971        if (!xfs_iext_prev_extent(ifp, cur, gotp))
 972                return false;
 973        *end = gotp->br_startoff + gotp->br_blockcount;
 974        return true;
 975}
 976
 977void
 978xfs_iext_update_extent(
 979        struct xfs_inode        *ip,
 980        int                     state,
 981        struct xfs_iext_cursor  *cur,
 982        struct xfs_bmbt_irec    *new)
 983{
 984        struct xfs_ifork        *ifp = xfs_iext_state_to_fork(ip, state);
 985
 986        xfs_iext_inc_seq(ifp);
 987
 988        if (cur->pos == 0) {
 989                struct xfs_bmbt_irec    old;
 990
 991                xfs_iext_get(&old, cur_rec(cur));
 992                if (new->br_startoff != old.br_startoff) {
 993                        xfs_iext_update_node(ifp, old.br_startoff,
 994                                        new->br_startoff, 1, cur->leaf);
 995                }
 996        }
 997
 998        trace_xfs_bmap_pre_update(ip, cur, state, _RET_IP_);
 999        xfs_iext_set(cur_rec(cur), new);
1000        trace_xfs_bmap_post_update(ip, cur, state, _RET_IP_);
1001}
1002
1003/*
1004 * Return true if the cursor points at an extent and return the extent structure
1005 * in gotp.  Else return false.
1006 */
1007bool
1008xfs_iext_get_extent(
1009        struct xfs_ifork        *ifp,
1010        struct xfs_iext_cursor  *cur,
1011        struct xfs_bmbt_irec    *gotp)
1012{
1013        if (!xfs_iext_valid(ifp, cur))
1014                return false;
1015        xfs_iext_get(gotp, cur_rec(cur));
1016        return true;
1017}
1018
1019/*
1020 * This is a recursive function, because of that we need to be extremely
1021 * careful with stack usage.
1022 */
1023static void
1024xfs_iext_destroy_node(
1025        struct xfs_iext_node    *node,
1026        int                     level)
1027{
1028        int                     i;
1029
1030        if (level > 1) {
1031                for (i = 0; i < KEYS_PER_NODE; i++) {
1032                        if (node->keys[i] == XFS_IEXT_KEY_INVALID)
1033                                break;
1034                        xfs_iext_destroy_node(node->ptrs[i], level - 1);
1035                }
1036        }
1037
1038        kmem_free(node);
1039}
1040
1041void
1042xfs_iext_destroy(
1043        struct xfs_ifork        *ifp)
1044{
1045        xfs_iext_destroy_node(ifp->if_u1.if_root, ifp->if_height);
1046
1047        ifp->if_bytes = 0;
1048        ifp->if_height = 0;
1049        ifp->if_u1.if_root = NULL;
1050}
1051