linux/fs/xfs/libxfs/xfs_iext_tree.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2017 Christoph Hellwig.
   4 */
   5
   6#include <linux/cache.h>
   7#include <linux/kernel.h>
   8#include <linux/slab.h>
   9#include "xfs.h"
  10#include "xfs_format.h"
  11#include "xfs_bit.h"
  12#include "xfs_log_format.h"
  13#include "xfs_inode.h"
  14#include "xfs_inode_fork.h"
  15#include "xfs_trans_resv.h"
  16#include "xfs_mount.h"
  17#include "xfs_trace.h"
  18
  19/*
  20 * In-core extent record layout:
  21 *
  22 * +-------+----------------------------+
  23 * | 00:53 | all 54 bits of startoff    |
  24 * | 54:63 | low 10 bits of startblock  |
  25 * +-------+----------------------------+
  26 * | 00:20 | all 21 bits of length      |
  27 * |    21 | unwritten extent bit       |
  28 * | 22:63 | high 42 bits of startblock |
  29 * +-------+----------------------------+
  30 */
  31#define XFS_IEXT_STARTOFF_MASK          xfs_mask64lo(BMBT_STARTOFF_BITLEN)
  32#define XFS_IEXT_LENGTH_MASK            xfs_mask64lo(BMBT_BLOCKCOUNT_BITLEN)
  33#define XFS_IEXT_STARTBLOCK_MASK        xfs_mask64lo(BMBT_STARTBLOCK_BITLEN)
  34
  35struct xfs_iext_rec {
  36        uint64_t                        lo;
  37        uint64_t                        hi;
  38};
  39
  40/*
  41 * Given that the length can't be a zero, only an empty hi value indicates an
  42 * unused record.
  43 */
  44static bool xfs_iext_rec_is_empty(struct xfs_iext_rec *rec)
  45{
  46        return rec->hi == 0;
  47}
  48
  49static inline void xfs_iext_rec_clear(struct xfs_iext_rec *rec)
  50{
  51        rec->lo = 0;
  52        rec->hi = 0;
  53}
  54
  55static void
  56xfs_iext_set(
  57        struct xfs_iext_rec     *rec,
  58        struct xfs_bmbt_irec    *irec)
  59{
  60        ASSERT((irec->br_startoff & ~XFS_IEXT_STARTOFF_MASK) == 0);
  61        ASSERT((irec->br_blockcount & ~XFS_IEXT_LENGTH_MASK) == 0);
  62        ASSERT((irec->br_startblock & ~XFS_IEXT_STARTBLOCK_MASK) == 0);
  63
  64        rec->lo = irec->br_startoff & XFS_IEXT_STARTOFF_MASK;
  65        rec->hi = irec->br_blockcount & XFS_IEXT_LENGTH_MASK;
  66
  67        rec->lo |= (irec->br_startblock << 54);
  68        rec->hi |= ((irec->br_startblock & ~xfs_mask64lo(10)) << (22 - 10));
  69
  70        if (irec->br_state == XFS_EXT_UNWRITTEN)
  71                rec->hi |= (1 << 21);
  72}
  73
  74static void
  75xfs_iext_get(
  76        struct xfs_bmbt_irec    *irec,
  77        struct xfs_iext_rec     *rec)
  78{
  79        irec->br_startoff = rec->lo & XFS_IEXT_STARTOFF_MASK;
  80        irec->br_blockcount = rec->hi & XFS_IEXT_LENGTH_MASK;
  81
  82        irec->br_startblock = rec->lo >> 54;
  83        irec->br_startblock |= (rec->hi & xfs_mask64hi(42)) >> (22 - 10);
  84
  85        if (rec->hi & (1 << 21))
  86                irec->br_state = XFS_EXT_UNWRITTEN;
  87        else
  88                irec->br_state = XFS_EXT_NORM;
  89}
  90
  91enum {
  92        NODE_SIZE       = 256,
  93        KEYS_PER_NODE   = NODE_SIZE / (sizeof(uint64_t) + sizeof(void *)),
  94        RECS_PER_LEAF   = (NODE_SIZE - (2 * sizeof(struct xfs_iext_leaf *))) /
  95                                sizeof(struct xfs_iext_rec),
  96};
  97
  98/*
  99 * In-core extent btree block layout:
 100 *
 101 * There are two types of blocks in the btree: leaf and inner (non-leaf) blocks.
 102 *
 103 * The leaf blocks are made up by %KEYS_PER_NODE extent records, which each
 104 * contain the startoffset, blockcount, startblock and unwritten extent flag.
 105 * See above for the exact format, followed by pointers to the previous and next
 106 * leaf blocks (if there are any).
 107 *
 108 * The inner (non-leaf) blocks first contain KEYS_PER_NODE lookup keys, followed
 109 * by an equal number of pointers to the btree blocks at the next lower level.
 110 *
 111 *              +-------+-------+-------+-------+-------+----------+----------+
 112 * Leaf:        | rec 1 | rec 2 | rec 3 | rec 4 | rec N | prev-ptr | next-ptr |
 113 *              +-------+-------+-------+-------+-------+----------+----------+
 114 *
 115 *              +-------+-------+-------+-------+-------+-------+------+-------+
 116 * Inner:       | key 1 | key 2 | key 3 | key N | ptr 1 | ptr 2 | ptr3 | ptr N |
 117 *              +-------+-------+-------+-------+-------+-------+------+-------+
 118 */
 119struct xfs_iext_node {
 120        uint64_t                keys[KEYS_PER_NODE];
 121#define XFS_IEXT_KEY_INVALID    (1ULL << 63)
 122        void                    *ptrs[KEYS_PER_NODE];
 123};
 124
 125struct xfs_iext_leaf {
 126        struct xfs_iext_rec     recs[RECS_PER_LEAF];
 127        struct xfs_iext_leaf    *prev;
 128        struct xfs_iext_leaf    *next;
 129};
 130
 131inline xfs_extnum_t xfs_iext_count(struct xfs_ifork *ifp)
 132{
 133        return ifp->if_bytes / sizeof(struct xfs_iext_rec);
 134}
 135
 136static inline int xfs_iext_max_recs(struct xfs_ifork *ifp)
 137{
 138        if (ifp->if_height == 1)
 139                return xfs_iext_count(ifp);
 140        return RECS_PER_LEAF;
 141}
 142
 143static inline struct xfs_iext_rec *cur_rec(struct xfs_iext_cursor *cur)
 144{
 145        return &cur->leaf->recs[cur->pos];
 146}
 147
 148static inline bool xfs_iext_valid(struct xfs_ifork *ifp,
 149                struct xfs_iext_cursor *cur)
 150{
 151        if (!cur->leaf)
 152                return false;
 153        if (cur->pos < 0 || cur->pos >= xfs_iext_max_recs(ifp))
 154                return false;
 155        if (xfs_iext_rec_is_empty(cur_rec(cur)))
 156                return false;
 157        return true;
 158}
 159
 160static void *
 161xfs_iext_find_first_leaf(
 162        struct xfs_ifork        *ifp)
 163{
 164        struct xfs_iext_node    *node = ifp->if_u1.if_root;
 165        int                     height;
 166
 167        if (!ifp->if_height)
 168                return NULL;
 169
 170        for (height = ifp->if_height; height > 1; height--) {
 171                node = node->ptrs[0];
 172                ASSERT(node);
 173        }
 174
 175        return node;
 176}
 177
 178static void *
 179xfs_iext_find_last_leaf(
 180        struct xfs_ifork        *ifp)
 181{
 182        struct xfs_iext_node    *node = ifp->if_u1.if_root;
 183        int                     height, i;
 184
 185        if (!ifp->if_height)
 186                return NULL;
 187
 188        for (height = ifp->if_height; height > 1; height--) {
 189                for (i = 1; i < KEYS_PER_NODE; i++)
 190                        if (!node->ptrs[i])
 191                                break;
 192                node = node->ptrs[i - 1];
 193                ASSERT(node);
 194        }
 195
 196        return node;
 197}
 198
 199void
 200xfs_iext_first(
 201        struct xfs_ifork        *ifp,
 202        struct xfs_iext_cursor  *cur)
 203{
 204        cur->pos = 0;
 205        cur->leaf = xfs_iext_find_first_leaf(ifp);
 206}
 207
 208void
 209xfs_iext_last(
 210        struct xfs_ifork        *ifp,
 211        struct xfs_iext_cursor  *cur)
 212{
 213        int                     i;
 214
 215        cur->leaf = xfs_iext_find_last_leaf(ifp);
 216        if (!cur->leaf) {
 217                cur->pos = 0;
 218                return;
 219        }
 220
 221        for (i = 1; i < xfs_iext_max_recs(ifp); i++) {
 222                if (xfs_iext_rec_is_empty(&cur->leaf->recs[i]))
 223                        break;
 224        }
 225        cur->pos = i - 1;
 226}
 227
 228void
 229xfs_iext_next(
 230        struct xfs_ifork        *ifp,
 231        struct xfs_iext_cursor  *cur)
 232{
 233        if (!cur->leaf) {
 234                ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF);
 235                xfs_iext_first(ifp, cur);
 236                return;
 237        }
 238
 239        ASSERT(cur->pos >= 0);
 240        ASSERT(cur->pos < xfs_iext_max_recs(ifp));
 241
 242        cur->pos++;
 243        if (ifp->if_height > 1 && !xfs_iext_valid(ifp, cur) &&
 244            cur->leaf->next) {
 245                cur->leaf = cur->leaf->next;
 246                cur->pos = 0;
 247        }
 248}
 249
 250void
 251xfs_iext_prev(
 252        struct xfs_ifork        *ifp,
 253        struct xfs_iext_cursor  *cur)
 254{
 255        if (!cur->leaf) {
 256                ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF);
 257                xfs_iext_last(ifp, cur);
 258                return;
 259        }
 260
 261        ASSERT(cur->pos >= 0);
 262        ASSERT(cur->pos <= RECS_PER_LEAF);
 263
 264recurse:
 265        do {
 266                cur->pos--;
 267                if (xfs_iext_valid(ifp, cur))
 268                        return;
 269        } while (cur->pos > 0);
 270
 271        if (ifp->if_height > 1 && cur->leaf->prev) {
 272                cur->leaf = cur->leaf->prev;
 273                cur->pos = RECS_PER_LEAF;
 274                goto recurse;
 275        }
 276}
 277
 278static inline int
 279xfs_iext_key_cmp(
 280        struct xfs_iext_node    *node,
 281        int                     n,
 282        xfs_fileoff_t           offset)
 283{
 284        if (node->keys[n] > offset)
 285                return 1;
 286        if (node->keys[n] < offset)
 287                return -1;
 288        return 0;
 289}
 290
 291static inline int
 292xfs_iext_rec_cmp(
 293        struct xfs_iext_rec     *rec,
 294        xfs_fileoff_t           offset)
 295{
 296        uint64_t                rec_offset = rec->lo & XFS_IEXT_STARTOFF_MASK;
 297        uint32_t                rec_len = rec->hi & XFS_IEXT_LENGTH_MASK;
 298
 299        if (rec_offset > offset)
 300                return 1;
 301        if (rec_offset + rec_len <= offset)
 302                return -1;
 303        return 0;
 304}
 305
 306static void *
 307xfs_iext_find_level(
 308        struct xfs_ifork        *ifp,
 309        xfs_fileoff_t           offset,
 310        int                     level)
 311{
 312        struct xfs_iext_node    *node = ifp->if_u1.if_root;
 313        int                     height, i;
 314
 315        if (!ifp->if_height)
 316                return NULL;
 317
 318        for (height = ifp->if_height; height > level; height--) {
 319                for (i = 1; i < KEYS_PER_NODE; i++)
 320                        if (xfs_iext_key_cmp(node, i, offset) > 0)
 321                                break;
 322
 323                node = node->ptrs[i - 1];
 324                if (!node)
 325                        break;
 326        }
 327
 328        return node;
 329}
 330
 331static int
 332xfs_iext_node_pos(
 333        struct xfs_iext_node    *node,
 334        xfs_fileoff_t           offset)
 335{
 336        int                     i;
 337
 338        for (i = 1; i < KEYS_PER_NODE; i++) {
 339                if (xfs_iext_key_cmp(node, i, offset) > 0)
 340                        break;
 341        }
 342
 343        return i - 1;
 344}
 345
 346static int
 347xfs_iext_node_insert_pos(
 348        struct xfs_iext_node    *node,
 349        xfs_fileoff_t           offset)
 350{
 351        int                     i;
 352
 353        for (i = 0; i < KEYS_PER_NODE; i++) {
 354                if (xfs_iext_key_cmp(node, i, offset) > 0)
 355                        return i;
 356        }
 357
 358        return KEYS_PER_NODE;
 359}
 360
 361static int
 362xfs_iext_node_nr_entries(
 363        struct xfs_iext_node    *node,
 364        int                     start)
 365{
 366        int                     i;
 367
 368        for (i = start; i < KEYS_PER_NODE; i++) {
 369                if (node->keys[i] == XFS_IEXT_KEY_INVALID)
 370                        break;
 371        }
 372
 373        return i;
 374}
 375
 376static int
 377xfs_iext_leaf_nr_entries(
 378        struct xfs_ifork        *ifp,
 379        struct xfs_iext_leaf    *leaf,
 380        int                     start)
 381{
 382        int                     i;
 383
 384        for (i = start; i < xfs_iext_max_recs(ifp); i++) {
 385                if (xfs_iext_rec_is_empty(&leaf->recs[i]))
 386                        break;
 387        }
 388
 389        return i;
 390}
 391
 392static inline uint64_t
 393xfs_iext_leaf_key(
 394        struct xfs_iext_leaf    *leaf,
 395        int                     n)
 396{
 397        return leaf->recs[n].lo & XFS_IEXT_STARTOFF_MASK;
 398}
 399
 400static void
 401xfs_iext_grow(
 402        struct xfs_ifork        *ifp)
 403{
 404        struct xfs_iext_node    *node = kmem_zalloc(NODE_SIZE, KM_NOFS);
 405        int                     i;
 406
 407        if (ifp->if_height == 1) {
 408                struct xfs_iext_leaf *prev = ifp->if_u1.if_root;
 409
 410                node->keys[0] = xfs_iext_leaf_key(prev, 0);
 411                node->ptrs[0] = prev;
 412        } else  {
 413                struct xfs_iext_node *prev = ifp->if_u1.if_root;
 414
 415                ASSERT(ifp->if_height > 1);
 416
 417                node->keys[0] = prev->keys[0];
 418                node->ptrs[0] = prev;
 419        }
 420
 421        for (i = 1; i < KEYS_PER_NODE; i++)
 422                node->keys[i] = XFS_IEXT_KEY_INVALID;
 423
 424        ifp->if_u1.if_root = node;
 425        ifp->if_height++;
 426}
 427
 428static void
 429xfs_iext_update_node(
 430        struct xfs_ifork        *ifp,
 431        xfs_fileoff_t           old_offset,
 432        xfs_fileoff_t           new_offset,
 433        int                     level,
 434        void                    *ptr)
 435{
 436        struct xfs_iext_node    *node = ifp->if_u1.if_root;
 437        int                     height, i;
 438
 439        for (height = ifp->if_height; height > level; height--) {
 440                for (i = 0; i < KEYS_PER_NODE; i++) {
 441                        if (i > 0 && xfs_iext_key_cmp(node, i, old_offset) > 0)
 442                                break;
 443                        if (node->keys[i] == old_offset)
 444                                node->keys[i] = new_offset;
 445                }
 446                node = node->ptrs[i - 1];
 447                ASSERT(node);
 448        }
 449
 450        ASSERT(node == ptr);
 451}
 452
 453static struct xfs_iext_node *
 454xfs_iext_split_node(
 455        struct xfs_iext_node    **nodep,
 456        int                     *pos,
 457        int                     *nr_entries)
 458{
 459        struct xfs_iext_node    *node = *nodep;
 460        struct xfs_iext_node    *new = kmem_zalloc(NODE_SIZE, KM_NOFS);
 461        const int               nr_move = KEYS_PER_NODE / 2;
 462        int                     nr_keep = nr_move + (KEYS_PER_NODE & 1);
 463        int                     i = 0;
 464
 465        /* for sequential append operations just spill over into the new node */
 466        if (*pos == KEYS_PER_NODE) {
 467                *nodep = new;
 468                *pos = 0;
 469                *nr_entries = 0;
 470                goto done;
 471        }
 472
 473
 474        for (i = 0; i < nr_move; i++) {
 475                new->keys[i] = node->keys[nr_keep + i];
 476                new->ptrs[i] = node->ptrs[nr_keep + i];
 477
 478                node->keys[nr_keep + i] = XFS_IEXT_KEY_INVALID;
 479                node->ptrs[nr_keep + i] = NULL;
 480        }
 481
 482        if (*pos >= nr_keep) {
 483                *nodep = new;
 484                *pos -= nr_keep;
 485                *nr_entries = nr_move;
 486        } else {
 487                *nr_entries = nr_keep;
 488        }
 489done:
 490        for (; i < KEYS_PER_NODE; i++)
 491                new->keys[i] = XFS_IEXT_KEY_INVALID;
 492        return new;
 493}
 494
 495static void
 496xfs_iext_insert_node(
 497        struct xfs_ifork        *ifp,
 498        uint64_t                offset,
 499        void                    *ptr,
 500        int                     level)
 501{
 502        struct xfs_iext_node    *node, *new;
 503        int                     i, pos, nr_entries;
 504
 505again:
 506        if (ifp->if_height < level)
 507                xfs_iext_grow(ifp);
 508
 509        new = NULL;
 510        node = xfs_iext_find_level(ifp, offset, level);
 511        pos = xfs_iext_node_insert_pos(node, offset);
 512        nr_entries = xfs_iext_node_nr_entries(node, pos);
 513
 514        ASSERT(pos >= nr_entries || xfs_iext_key_cmp(node, pos, offset) != 0);
 515        ASSERT(nr_entries <= KEYS_PER_NODE);
 516
 517        if (nr_entries == KEYS_PER_NODE)
 518                new = xfs_iext_split_node(&node, &pos, &nr_entries);
 519
 520        /*
 521         * Update the pointers in higher levels if the first entry changes
 522         * in an existing node.
 523         */
 524        if (node != new && pos == 0 && nr_entries > 0)
 525                xfs_iext_update_node(ifp, node->keys[0], offset, level, node);
 526
 527        for (i = nr_entries; i > pos; i--) {
 528                node->keys[i] = node->keys[i - 1];
 529                node->ptrs[i] = node->ptrs[i - 1];
 530        }
 531        node->keys[pos] = offset;
 532        node->ptrs[pos] = ptr;
 533
 534        if (new) {
 535                offset = new->keys[0];
 536                ptr = new;
 537                level++;
 538                goto again;
 539        }
 540}
 541
 542static struct xfs_iext_leaf *
 543xfs_iext_split_leaf(
 544        struct xfs_iext_cursor  *cur,
 545        int                     *nr_entries)
 546{
 547        struct xfs_iext_leaf    *leaf = cur->leaf;
 548        struct xfs_iext_leaf    *new = kmem_zalloc(NODE_SIZE, KM_NOFS);
 549        const int               nr_move = RECS_PER_LEAF / 2;
 550        int                     nr_keep = nr_move + (RECS_PER_LEAF & 1);
 551        int                     i;
 552
 553        /* for sequential append operations just spill over into the new node */
 554        if (cur->pos == RECS_PER_LEAF) {
 555                cur->leaf = new;
 556                cur->pos = 0;
 557                *nr_entries = 0;
 558                goto done;
 559        }
 560
 561        for (i = 0; i < nr_move; i++) {
 562                new->recs[i] = leaf->recs[nr_keep + i];
 563                xfs_iext_rec_clear(&leaf->recs[nr_keep + i]);
 564        }
 565
 566        if (cur->pos >= nr_keep) {
 567                cur->leaf = new;
 568                cur->pos -= nr_keep;
 569                *nr_entries = nr_move;
 570        } else {
 571                *nr_entries = nr_keep;
 572        }
 573done:
 574        if (leaf->next)
 575                leaf->next->prev = new;
 576        new->next = leaf->next;
 577        new->prev = leaf;
 578        leaf->next = new;
 579        return new;
 580}
 581
 582static void
 583xfs_iext_alloc_root(
 584        struct xfs_ifork        *ifp,
 585        struct xfs_iext_cursor  *cur)
 586{
 587        ASSERT(ifp->if_bytes == 0);
 588
 589        ifp->if_u1.if_root = kmem_zalloc(sizeof(struct xfs_iext_rec), KM_NOFS);
 590        ifp->if_height = 1;
 591
 592        /* now that we have a node step into it */
 593        cur->leaf = ifp->if_u1.if_root;
 594        cur->pos = 0;
 595}
 596
 597static void
 598xfs_iext_realloc_root(
 599        struct xfs_ifork        *ifp,
 600        struct xfs_iext_cursor  *cur)
 601{
 602        size_t new_size = ifp->if_bytes + sizeof(struct xfs_iext_rec);
 603        void *new;
 604
 605        /* account for the prev/next pointers */
 606        if (new_size / sizeof(struct xfs_iext_rec) == RECS_PER_LEAF)
 607                new_size = NODE_SIZE;
 608
 609        new = kmem_realloc(ifp->if_u1.if_root, new_size, KM_NOFS);
 610        memset(new + ifp->if_bytes, 0, new_size - ifp->if_bytes);
 611        ifp->if_u1.if_root = new;
 612        cur->leaf = new;
 613}
 614
 615void
 616xfs_iext_insert(
 617        struct xfs_inode        *ip,
 618        struct xfs_iext_cursor  *cur,
 619        struct xfs_bmbt_irec    *irec,
 620        int                     state)
 621{
 622        struct xfs_ifork        *ifp = xfs_iext_state_to_fork(ip, state);
 623        xfs_fileoff_t           offset = irec->br_startoff;
 624        struct xfs_iext_leaf    *new = NULL;
 625        int                     nr_entries, i;
 626
 627        if (ifp->if_height == 0)
 628                xfs_iext_alloc_root(ifp, cur);
 629        else if (ifp->if_height == 1)
 630                xfs_iext_realloc_root(ifp, cur);
 631
 632        nr_entries = xfs_iext_leaf_nr_entries(ifp, cur->leaf, cur->pos);
 633        ASSERT(nr_entries <= RECS_PER_LEAF);
 634        ASSERT(cur->pos >= nr_entries ||
 635               xfs_iext_rec_cmp(cur_rec(cur), irec->br_startoff) != 0);
 636
 637        if (nr_entries == RECS_PER_LEAF)
 638                new = xfs_iext_split_leaf(cur, &nr_entries);
 639
 640        /*
 641         * Update the pointers in higher levels if the first entry changes
 642         * in an existing node.
 643         */
 644        if (cur->leaf != new && cur->pos == 0 && nr_entries > 0) {
 645                xfs_iext_update_node(ifp, xfs_iext_leaf_key(cur->leaf, 0),
 646                                offset, 1, cur->leaf);
 647        }
 648
 649        for (i = nr_entries; i > cur->pos; i--)
 650                cur->leaf->recs[i] = cur->leaf->recs[i - 1];
 651        xfs_iext_set(cur_rec(cur), irec);
 652        ifp->if_bytes += sizeof(struct xfs_iext_rec);
 653
 654        trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
 655
 656        if (new)
 657                xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
 658}
 659
 660static struct xfs_iext_node *
 661xfs_iext_rebalance_node(
 662        struct xfs_iext_node    *parent,
 663        int                     *pos,
 664        struct xfs_iext_node    *node,
 665        int                     nr_entries)
 666{
 667        /*
 668         * If the neighbouring nodes are completely full, or have different
 669         * parents, we might never be able to merge our node, and will only
 670         * delete it once the number of entries hits zero.
 671         */
 672        if (nr_entries == 0)
 673                return node;
 674
 675        if (*pos > 0) {
 676                struct xfs_iext_node *prev = parent->ptrs[*pos - 1];
 677                int nr_prev = xfs_iext_node_nr_entries(prev, 0), i;
 678
 679                if (nr_prev + nr_entries <= KEYS_PER_NODE) {
 680                        for (i = 0; i < nr_entries; i++) {
 681                                prev->keys[nr_prev + i] = node->keys[i];
 682                                prev->ptrs[nr_prev + i] = node->ptrs[i];
 683                        }
 684                        return node;
 685                }
 686        }
 687
 688        if (*pos + 1 < xfs_iext_node_nr_entries(parent, *pos)) {
 689                struct xfs_iext_node *next = parent->ptrs[*pos + 1];
 690                int nr_next = xfs_iext_node_nr_entries(next, 0), i;
 691
 692                if (nr_entries + nr_next <= KEYS_PER_NODE) {
 693                        /*
 694                         * Merge the next node into this node so that we don't
 695                         * have to do an additional update of the keys in the
 696                         * higher levels.
 697                         */
 698                        for (i = 0; i < nr_next; i++) {
 699                                node->keys[nr_entries + i] = next->keys[i];
 700                                node->ptrs[nr_entries + i] = next->ptrs[i];
 701                        }
 702
 703                        ++*pos;
 704                        return next;
 705                }
 706        }
 707
 708        return NULL;
 709}
 710
 711static void
 712xfs_iext_remove_node(
 713        struct xfs_ifork        *ifp,
 714        xfs_fileoff_t           offset,
 715        void                    *victim)
 716{
 717        struct xfs_iext_node    *node, *parent;
 718        int                     level = 2, pos, nr_entries, i;
 719
 720        ASSERT(level <= ifp->if_height);
 721        node = xfs_iext_find_level(ifp, offset, level);
 722        pos = xfs_iext_node_pos(node, offset);
 723again:
 724        ASSERT(node->ptrs[pos]);
 725        ASSERT(node->ptrs[pos] == victim);
 726        kmem_free(victim);
 727
 728        nr_entries = xfs_iext_node_nr_entries(node, pos) - 1;
 729        offset = node->keys[0];
 730        for (i = pos; i < nr_entries; i++) {
 731                node->keys[i] = node->keys[i + 1];
 732                node->ptrs[i] = node->ptrs[i + 1];
 733        }
 734        node->keys[nr_entries] = XFS_IEXT_KEY_INVALID;
 735        node->ptrs[nr_entries] = NULL;
 736
 737        if (pos == 0 && nr_entries > 0) {
 738                xfs_iext_update_node(ifp, offset, node->keys[0], level, node);
 739                offset = node->keys[0];
 740        }
 741
 742        if (nr_entries >= KEYS_PER_NODE / 2)
 743                return;
 744
 745        if (level < ifp->if_height) {
 746                /*
 747                 * If we aren't at the root yet try to find a neighbour node to
 748                 * merge with (or delete the node if it is empty), and then
 749                 * recurse up to the next level.
 750                 */
 751                level++;
 752                parent = xfs_iext_find_level(ifp, offset, level);
 753                pos = xfs_iext_node_pos(parent, offset);
 754
 755                ASSERT(pos != KEYS_PER_NODE);
 756                ASSERT(parent->ptrs[pos] == node);
 757
 758                node = xfs_iext_rebalance_node(parent, &pos, node, nr_entries);
 759                if (node) {
 760                        victim = node;
 761                        node = parent;
 762                        goto again;
 763                }
 764        } else if (nr_entries == 1) {
 765                /*
 766                 * If we are at the root and only one entry is left we can just
 767                 * free this node and update the root pointer.
 768                 */
 769                ASSERT(node == ifp->if_u1.if_root);
 770                ifp->if_u1.if_root = node->ptrs[0];
 771                ifp->if_height--;
 772                kmem_free(node);
 773        }
 774}
 775
 776static void
 777xfs_iext_rebalance_leaf(
 778        struct xfs_ifork        *ifp,
 779        struct xfs_iext_cursor  *cur,
 780        struct xfs_iext_leaf    *leaf,
 781        xfs_fileoff_t           offset,
 782        int                     nr_entries)
 783{
 784        /*
 785         * If the neighbouring nodes are completely full we might never be able
 786         * to merge our node, and will only delete it once the number of
 787         * entries hits zero.
 788         */
 789        if (nr_entries == 0)
 790                goto remove_node;
 791
 792        if (leaf->prev) {
 793                int nr_prev = xfs_iext_leaf_nr_entries(ifp, leaf->prev, 0), i;
 794
 795                if (nr_prev + nr_entries <= RECS_PER_LEAF) {
 796                        for (i = 0; i < nr_entries; i++)
 797                                leaf->prev->recs[nr_prev + i] = leaf->recs[i];
 798
 799                        if (cur->leaf == leaf) {
 800                                cur->leaf = leaf->prev;
 801                                cur->pos += nr_prev;
 802                        }
 803                        goto remove_node;
 804                }
 805        }
 806
 807        if (leaf->next) {
 808                int nr_next = xfs_iext_leaf_nr_entries(ifp, leaf->next, 0), i;
 809
 810                if (nr_entries + nr_next <= RECS_PER_LEAF) {
 811                        /*
 812                         * Merge the next node into this node so that we don't
 813                         * have to do an additional update of the keys in the
 814                         * higher levels.
 815                         */
 816                        for (i = 0; i < nr_next; i++) {
 817                                leaf->recs[nr_entries + i] =
 818                                        leaf->next->recs[i];
 819                        }
 820
 821                        if (cur->leaf == leaf->next) {
 822                                cur->leaf = leaf;
 823                                cur->pos += nr_entries;
 824                        }
 825
 826                        offset = xfs_iext_leaf_key(leaf->next, 0);
 827                        leaf = leaf->next;
 828                        goto remove_node;
 829                }
 830        }
 831
 832        return;
 833remove_node:
 834        if (leaf->prev)
 835                leaf->prev->next = leaf->next;
 836        if (leaf->next)
 837                leaf->next->prev = leaf->prev;
 838        xfs_iext_remove_node(ifp, offset, leaf);
 839}
 840
 841static void
 842xfs_iext_free_last_leaf(
 843        struct xfs_ifork        *ifp)
 844{
 845        ifp->if_height--;
 846        kmem_free(ifp->if_u1.if_root);
 847        ifp->if_u1.if_root = NULL;
 848}
 849
 850void
 851xfs_iext_remove(
 852        struct xfs_inode        *ip,
 853        struct xfs_iext_cursor  *cur,
 854        int                     state)
 855{
 856        struct xfs_ifork        *ifp = xfs_iext_state_to_fork(ip, state);
 857        struct xfs_iext_leaf    *leaf = cur->leaf;
 858        xfs_fileoff_t           offset = xfs_iext_leaf_key(leaf, 0);
 859        int                     i, nr_entries;
 860
 861        trace_xfs_iext_remove(ip, cur, state, _RET_IP_);
 862
 863        ASSERT(ifp->if_height > 0);
 864        ASSERT(ifp->if_u1.if_root != NULL);
 865        ASSERT(xfs_iext_valid(ifp, cur));
 866
 867        nr_entries = xfs_iext_leaf_nr_entries(ifp, leaf, cur->pos) - 1;
 868        for (i = cur->pos; i < nr_entries; i++)
 869                leaf->recs[i] = leaf->recs[i + 1];
 870        xfs_iext_rec_clear(&leaf->recs[nr_entries]);
 871        ifp->if_bytes -= sizeof(struct xfs_iext_rec);
 872
 873        if (cur->pos == 0 && nr_entries > 0) {
 874                xfs_iext_update_node(ifp, offset, xfs_iext_leaf_key(leaf, 0), 1,
 875                                leaf);
 876                offset = xfs_iext_leaf_key(leaf, 0);
 877        } else if (cur->pos == nr_entries) {
 878                if (ifp->if_height > 1 && leaf->next)
 879                        cur->leaf = leaf->next;
 880                else
 881                        cur->leaf = NULL;
 882                cur->pos = 0;
 883        }
 884
 885        if (nr_entries >= RECS_PER_LEAF / 2)
 886                return;
 887
 888        if (ifp->if_height > 1)
 889                xfs_iext_rebalance_leaf(ifp, cur, leaf, offset, nr_entries);
 890        else if (nr_entries == 0)
 891                xfs_iext_free_last_leaf(ifp);
 892}
 893
 894/*
 895 * Lookup the extent covering bno.
 896 *
 897 * If there is an extent covering bno return the extent index, and store the
 898 * expanded extent structure in *gotp, and the extent cursor in *cur.
 899 * If there is no extent covering bno, but there is an extent after it (e.g.
 900 * it lies in a hole) return that extent in *gotp and its cursor in *cur
 901 * instead.
 902 * If bno is beyond the last extent return false, and return an invalid
 903 * cursor value.
 904 */
 905bool
 906xfs_iext_lookup_extent(
 907        struct xfs_inode        *ip,
 908        struct xfs_ifork        *ifp,
 909        xfs_fileoff_t           offset,
 910        struct xfs_iext_cursor  *cur,
 911        struct xfs_bmbt_irec    *gotp)
 912{
 913        XFS_STATS_INC(ip->i_mount, xs_look_exlist);
 914
 915        cur->leaf = xfs_iext_find_level(ifp, offset, 1);
 916        if (!cur->leaf) {
 917                cur->pos = 0;
 918                return false;
 919        }
 920
 921        for (cur->pos = 0; cur->pos < xfs_iext_max_recs(ifp); cur->pos++) {
 922                struct xfs_iext_rec *rec = cur_rec(cur);
 923
 924                if (xfs_iext_rec_is_empty(rec))
 925                        break;
 926                if (xfs_iext_rec_cmp(rec, offset) >= 0)
 927                        goto found;
 928        }
 929
 930        /* Try looking in the next node for an entry > offset */
 931        if (ifp->if_height == 1 || !cur->leaf->next)
 932                return false;
 933        cur->leaf = cur->leaf->next;
 934        cur->pos = 0;
 935        if (!xfs_iext_valid(ifp, cur))
 936                return false;
 937found:
 938        xfs_iext_get(gotp, cur_rec(cur));
 939        return true;
 940}
 941
 942/*
 943 * Returns the last extent before end, and if this extent doesn't cover
 944 * end, update end to the end of the extent.
 945 */
 946bool
 947xfs_iext_lookup_extent_before(
 948        struct xfs_inode        *ip,
 949        struct xfs_ifork        *ifp,
 950        xfs_fileoff_t           *end,
 951        struct xfs_iext_cursor  *cur,
 952        struct xfs_bmbt_irec    *gotp)
 953{
 954        /* could be optimized to not even look up the next on a match.. */
 955        if (xfs_iext_lookup_extent(ip, ifp, *end - 1, cur, gotp) &&
 956            gotp->br_startoff <= *end - 1)
 957                return true;
 958        if (!xfs_iext_prev_extent(ifp, cur, gotp))
 959                return false;
 960        *end = gotp->br_startoff + gotp->br_blockcount;
 961        return true;
 962}
 963
 964void
 965xfs_iext_update_extent(
 966        struct xfs_inode        *ip,
 967        int                     state,
 968        struct xfs_iext_cursor  *cur,
 969        struct xfs_bmbt_irec    *new)
 970{
 971        struct xfs_ifork        *ifp = xfs_iext_state_to_fork(ip, state);
 972
 973        if (cur->pos == 0) {
 974                struct xfs_bmbt_irec    old;
 975
 976                xfs_iext_get(&old, cur_rec(cur));
 977                if (new->br_startoff != old.br_startoff) {
 978                        xfs_iext_update_node(ifp, old.br_startoff,
 979                                        new->br_startoff, 1, cur->leaf);
 980                }
 981        }
 982
 983        trace_xfs_bmap_pre_update(ip, cur, state, _RET_IP_);
 984        xfs_iext_set(cur_rec(cur), new);
 985        trace_xfs_bmap_post_update(ip, cur, state, _RET_IP_);
 986}
 987
 988/*
 989 * Return true if the cursor points at an extent and return the extent structure
 990 * in gotp.  Else return false.
 991 */
 992bool
 993xfs_iext_get_extent(
 994        struct xfs_ifork        *ifp,
 995        struct xfs_iext_cursor  *cur,
 996        struct xfs_bmbt_irec    *gotp)
 997{
 998        if (!xfs_iext_valid(ifp, cur))
 999                return false;
1000        xfs_iext_get(gotp, cur_rec(cur));
1001        return true;
1002}
1003
1004/*
1005 * This is a recursive function, because of that we need to be extremely
1006 * careful with stack usage.
1007 */
1008static void
1009xfs_iext_destroy_node(
1010        struct xfs_iext_node    *node,
1011        int                     level)
1012{
1013        int                     i;
1014
1015        if (level > 1) {
1016                for (i = 0; i < KEYS_PER_NODE; i++) {
1017                        if (node->keys[i] == XFS_IEXT_KEY_INVALID)
1018                                break;
1019                        xfs_iext_destroy_node(node->ptrs[i], level - 1);
1020                }
1021        }
1022
1023        kmem_free(node);
1024}
1025
1026void
1027xfs_iext_destroy(
1028        struct xfs_ifork        *ifp)
1029{
1030        xfs_iext_destroy_node(ifp->if_u1.if_root, ifp->if_height);
1031
1032        ifp->if_bytes = 0;
1033        ifp->if_height = 0;
1034        ifp->if_u1.if_root = NULL;
1035}
1036