linux/fs/xfs/libxfs/xfs_iext_tree.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2017 Christoph Hellwig.
   3 *
   4 * This program is free software; you can redistribute it and/or modify it
   5 * under the terms and conditions of the GNU General Public License,
   6 * version 2, as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope it will be useful, but WITHOUT
   9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11 * more details.
  12 */
  13
  14#include <linux/cache.h>
  15#include <linux/kernel.h>
  16#include <linux/slab.h>
  17#include "xfs.h"
  18#include "xfs_format.h"
  19#include "xfs_bit.h"
  20#include "xfs_log_format.h"
  21#include "xfs_inode.h"
  22#include "xfs_inode_fork.h"
  23#include "xfs_trans_resv.h"
  24#include "xfs_mount.h"
  25#include "xfs_trace.h"
  26
  27/*
  28 * In-core extent record layout:
  29 *
  30 * +-------+----------------------------+
  31 * | 00:53 | all 54 bits of startoff    |
  32 * | 54:63 | low 10 bits of startblock  |
  33 * +-------+----------------------------+
  34 * | 00:20 | all 21 bits of length      |
  35 * |    21 | unwritten extent bit       |
  36 * | 22:63 | high 42 bits of startblock |
  37 * +-------+----------------------------+
  38 */
  39#define XFS_IEXT_STARTOFF_MASK          xfs_mask64lo(BMBT_STARTOFF_BITLEN)
  40#define XFS_IEXT_LENGTH_MASK            xfs_mask64lo(BMBT_BLOCKCOUNT_BITLEN)
  41#define XFS_IEXT_STARTBLOCK_MASK        xfs_mask64lo(BMBT_STARTBLOCK_BITLEN)
  42
  43struct xfs_iext_rec {
  44        uint64_t                        lo;
  45        uint64_t                        hi;
  46};
  47
  48/*
  49 * Given that the length can't be a zero, only an empty hi value indicates an
  50 * unused record.
  51 */
  52static bool xfs_iext_rec_is_empty(struct xfs_iext_rec *rec)
  53{
  54        return rec->hi == 0;
  55}
  56
  57static inline void xfs_iext_rec_clear(struct xfs_iext_rec *rec)
  58{
  59        rec->lo = 0;
  60        rec->hi = 0;
  61}
  62
  63static void
  64xfs_iext_set(
  65        struct xfs_iext_rec     *rec,
  66        struct xfs_bmbt_irec    *irec)
  67{
  68        ASSERT((irec->br_startoff & ~XFS_IEXT_STARTOFF_MASK) == 0);
  69        ASSERT((irec->br_blockcount & ~XFS_IEXT_LENGTH_MASK) == 0);
  70        ASSERT((irec->br_startblock & ~XFS_IEXT_STARTBLOCK_MASK) == 0);
  71
  72        rec->lo = irec->br_startoff & XFS_IEXT_STARTOFF_MASK;
  73        rec->hi = irec->br_blockcount & XFS_IEXT_LENGTH_MASK;
  74
  75        rec->lo |= (irec->br_startblock << 54);
  76        rec->hi |= ((irec->br_startblock & ~xfs_mask64lo(10)) << (22 - 10));
  77
  78        if (irec->br_state == XFS_EXT_UNWRITTEN)
  79                rec->hi |= (1 << 21);
  80}
  81
  82static void
  83xfs_iext_get(
  84        struct xfs_bmbt_irec    *irec,
  85        struct xfs_iext_rec     *rec)
  86{
  87        irec->br_startoff = rec->lo & XFS_IEXT_STARTOFF_MASK;
  88        irec->br_blockcount = rec->hi & XFS_IEXT_LENGTH_MASK;
  89
  90        irec->br_startblock = rec->lo >> 54;
  91        irec->br_startblock |= (rec->hi & xfs_mask64hi(42)) >> (22 - 10);
  92
  93        if (rec->hi & (1 << 21))
  94                irec->br_state = XFS_EXT_UNWRITTEN;
  95        else
  96                irec->br_state = XFS_EXT_NORM;
  97}
  98
  99enum {
 100        NODE_SIZE       = 256,
 101        KEYS_PER_NODE   = NODE_SIZE / (sizeof(uint64_t) + sizeof(void *)),
 102        RECS_PER_LEAF   = (NODE_SIZE - (2 * sizeof(struct xfs_iext_leaf *))) /
 103                                sizeof(struct xfs_iext_rec),
 104};
 105
 106/*
 107 * In-core extent btree block layout:
 108 *
 109 * There are two types of blocks in the btree: leaf and inner (non-leaf) blocks.
 110 *
 111 * The leaf blocks are made up by %KEYS_PER_NODE extent records, which each
 112 * contain the startoffset, blockcount, startblock and unwritten extent flag.
 113 * See above for the exact format, followed by pointers to the previous and next
 114 * leaf blocks (if there are any).
 115 *
 116 * The inner (non-leaf) blocks first contain KEYS_PER_NODE lookup keys, followed
 117 * by an equal number of pointers to the btree blocks at the next lower level.
 118 *
 119 *              +-------+-------+-------+-------+-------+----------+----------+
 120 * Leaf:        | rec 1 | rec 2 | rec 3 | rec 4 | rec N | prev-ptr | next-ptr |
 121 *              +-------+-------+-------+-------+-------+----------+----------+
 122 *
 123 *              +-------+-------+-------+-------+-------+-------+------+-------+
 124 * Inner:       | key 1 | key 2 | key 3 | key N | ptr 1 | ptr 2 | ptr3 | ptr N |
 125 *              +-------+-------+-------+-------+-------+-------+------+-------+
 126 */
 127struct xfs_iext_node {
 128        uint64_t                keys[KEYS_PER_NODE];
 129#define XFS_IEXT_KEY_INVALID    (1ULL << 63)
 130        void                    *ptrs[KEYS_PER_NODE];
 131};
 132
 133struct xfs_iext_leaf {
 134        struct xfs_iext_rec     recs[RECS_PER_LEAF];
 135        struct xfs_iext_leaf    *prev;
 136        struct xfs_iext_leaf    *next;
 137};
 138
 139inline xfs_extnum_t xfs_iext_count(struct xfs_ifork *ifp)
 140{
 141        return ifp->if_bytes / sizeof(struct xfs_iext_rec);
 142}
 143
 144static inline int xfs_iext_max_recs(struct xfs_ifork *ifp)
 145{
 146        if (ifp->if_height == 1)
 147                return xfs_iext_count(ifp);
 148        return RECS_PER_LEAF;
 149}
 150
 151static inline struct xfs_iext_rec *cur_rec(struct xfs_iext_cursor *cur)
 152{
 153        return &cur->leaf->recs[cur->pos];
 154}
 155
 156static inline bool xfs_iext_valid(struct xfs_ifork *ifp,
 157                struct xfs_iext_cursor *cur)
 158{
 159        if (!cur->leaf)
 160                return false;
 161        if (cur->pos < 0 || cur->pos >= xfs_iext_max_recs(ifp))
 162                return false;
 163        if (xfs_iext_rec_is_empty(cur_rec(cur)))
 164                return false;
 165        return true;
 166}
 167
 168static void *
 169xfs_iext_find_first_leaf(
 170        struct xfs_ifork        *ifp)
 171{
 172        struct xfs_iext_node    *node = ifp->if_u1.if_root;
 173        int                     height;
 174
 175        if (!ifp->if_height)
 176                return NULL;
 177
 178        for (height = ifp->if_height; height > 1; height--) {
 179                node = node->ptrs[0];
 180                ASSERT(node);
 181        }
 182
 183        return node;
 184}
 185
 186static void *
 187xfs_iext_find_last_leaf(
 188        struct xfs_ifork        *ifp)
 189{
 190        struct xfs_iext_node    *node = ifp->if_u1.if_root;
 191        int                     height, i;
 192
 193        if (!ifp->if_height)
 194                return NULL;
 195
 196        for (height = ifp->if_height; height > 1; height--) {
 197                for (i = 1; i < KEYS_PER_NODE; i++)
 198                        if (!node->ptrs[i])
 199                                break;
 200                node = node->ptrs[i - 1];
 201                ASSERT(node);
 202        }
 203
 204        return node;
 205}
 206
 207void
 208xfs_iext_first(
 209        struct xfs_ifork        *ifp,
 210        struct xfs_iext_cursor  *cur)
 211{
 212        cur->pos = 0;
 213        cur->leaf = xfs_iext_find_first_leaf(ifp);
 214}
 215
 216void
 217xfs_iext_last(
 218        struct xfs_ifork        *ifp,
 219        struct xfs_iext_cursor  *cur)
 220{
 221        int                     i;
 222
 223        cur->leaf = xfs_iext_find_last_leaf(ifp);
 224        if (!cur->leaf) {
 225                cur->pos = 0;
 226                return;
 227        }
 228
 229        for (i = 1; i < xfs_iext_max_recs(ifp); i++) {
 230                if (xfs_iext_rec_is_empty(&cur->leaf->recs[i]))
 231                        break;
 232        }
 233        cur->pos = i - 1;
 234}
 235
 236void
 237xfs_iext_next(
 238        struct xfs_ifork        *ifp,
 239        struct xfs_iext_cursor  *cur)
 240{
 241        if (!cur->leaf) {
 242                ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF);
 243                xfs_iext_first(ifp, cur);
 244                return;
 245        }
 246
 247        ASSERT(cur->pos >= 0);
 248        ASSERT(cur->pos < xfs_iext_max_recs(ifp));
 249
 250        cur->pos++;
 251        if (ifp->if_height > 1 && !xfs_iext_valid(ifp, cur) &&
 252            cur->leaf->next) {
 253                cur->leaf = cur->leaf->next;
 254                cur->pos = 0;
 255        }
 256}
 257
 258void
 259xfs_iext_prev(
 260        struct xfs_ifork        *ifp,
 261        struct xfs_iext_cursor  *cur)
 262{
 263        if (!cur->leaf) {
 264                ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF);
 265                xfs_iext_last(ifp, cur);
 266                return;
 267        }
 268
 269        ASSERT(cur->pos >= 0);
 270        ASSERT(cur->pos <= RECS_PER_LEAF);
 271
 272recurse:
 273        do {
 274                cur->pos--;
 275                if (xfs_iext_valid(ifp, cur))
 276                        return;
 277        } while (cur->pos > 0);
 278
 279        if (ifp->if_height > 1 && cur->leaf->prev) {
 280                cur->leaf = cur->leaf->prev;
 281                cur->pos = RECS_PER_LEAF;
 282                goto recurse;
 283        }
 284}
 285
 286static inline int
 287xfs_iext_key_cmp(
 288        struct xfs_iext_node    *node,
 289        int                     n,
 290        xfs_fileoff_t           offset)
 291{
 292        if (node->keys[n] > offset)
 293                return 1;
 294        if (node->keys[n] < offset)
 295                return -1;
 296        return 0;
 297}
 298
 299static inline int
 300xfs_iext_rec_cmp(
 301        struct xfs_iext_rec     *rec,
 302        xfs_fileoff_t           offset)
 303{
 304        uint64_t                rec_offset = rec->lo & XFS_IEXT_STARTOFF_MASK;
 305        uint32_t                rec_len = rec->hi & XFS_IEXT_LENGTH_MASK;
 306
 307        if (rec_offset > offset)
 308                return 1;
 309        if (rec_offset + rec_len <= offset)
 310                return -1;
 311        return 0;
 312}
 313
 314static void *
 315xfs_iext_find_level(
 316        struct xfs_ifork        *ifp,
 317        xfs_fileoff_t           offset,
 318        int                     level)
 319{
 320        struct xfs_iext_node    *node = ifp->if_u1.if_root;
 321        int                     height, i;
 322
 323        if (!ifp->if_height)
 324                return NULL;
 325
 326        for (height = ifp->if_height; height > level; height--) {
 327                for (i = 1; i < KEYS_PER_NODE; i++)
 328                        if (xfs_iext_key_cmp(node, i, offset) > 0)
 329                                break;
 330
 331                node = node->ptrs[i - 1];
 332                if (!node)
 333                        break;
 334        }
 335
 336        return node;
 337}
 338
 339static int
 340xfs_iext_node_pos(
 341        struct xfs_iext_node    *node,
 342        xfs_fileoff_t           offset)
 343{
 344        int                     i;
 345
 346        for (i = 1; i < KEYS_PER_NODE; i++) {
 347                if (xfs_iext_key_cmp(node, i, offset) > 0)
 348                        break;
 349        }
 350
 351        return i - 1;
 352}
 353
 354static int
 355xfs_iext_node_insert_pos(
 356        struct xfs_iext_node    *node,
 357        xfs_fileoff_t           offset)
 358{
 359        int                     i;
 360
 361        for (i = 0; i < KEYS_PER_NODE; i++) {
 362                if (xfs_iext_key_cmp(node, i, offset) > 0)
 363                        return i;
 364        }
 365
 366        return KEYS_PER_NODE;
 367}
 368
 369static int
 370xfs_iext_node_nr_entries(
 371        struct xfs_iext_node    *node,
 372        int                     start)
 373{
 374        int                     i;
 375
 376        for (i = start; i < KEYS_PER_NODE; i++) {
 377                if (node->keys[i] == XFS_IEXT_KEY_INVALID)
 378                        break;
 379        }
 380
 381        return i;
 382}
 383
 384static int
 385xfs_iext_leaf_nr_entries(
 386        struct xfs_ifork        *ifp,
 387        struct xfs_iext_leaf    *leaf,
 388        int                     start)
 389{
 390        int                     i;
 391
 392        for (i = start; i < xfs_iext_max_recs(ifp); i++) {
 393                if (xfs_iext_rec_is_empty(&leaf->recs[i]))
 394                        break;
 395        }
 396
 397        return i;
 398}
 399
 400static inline uint64_t
 401xfs_iext_leaf_key(
 402        struct xfs_iext_leaf    *leaf,
 403        int                     n)
 404{
 405        return leaf->recs[n].lo & XFS_IEXT_STARTOFF_MASK;
 406}
 407
 408static void
 409xfs_iext_grow(
 410        struct xfs_ifork        *ifp)
 411{
 412        struct xfs_iext_node    *node = kmem_zalloc(NODE_SIZE, KM_NOFS);
 413        int                     i;
 414
 415        if (ifp->if_height == 1) {
 416                struct xfs_iext_leaf *prev = ifp->if_u1.if_root;
 417
 418                node->keys[0] = xfs_iext_leaf_key(prev, 0);
 419                node->ptrs[0] = prev;
 420        } else  {
 421                struct xfs_iext_node *prev = ifp->if_u1.if_root;
 422
 423                ASSERT(ifp->if_height > 1);
 424
 425                node->keys[0] = prev->keys[0];
 426                node->ptrs[0] = prev;
 427        }
 428
 429        for (i = 1; i < KEYS_PER_NODE; i++)
 430                node->keys[i] = XFS_IEXT_KEY_INVALID;
 431
 432        ifp->if_u1.if_root = node;
 433        ifp->if_height++;
 434}
 435
 436static void
 437xfs_iext_update_node(
 438        struct xfs_ifork        *ifp,
 439        xfs_fileoff_t           old_offset,
 440        xfs_fileoff_t           new_offset,
 441        int                     level,
 442        void                    *ptr)
 443{
 444        struct xfs_iext_node    *node = ifp->if_u1.if_root;
 445        int                     height, i;
 446
 447        for (height = ifp->if_height; height > level; height--) {
 448                for (i = 0; i < KEYS_PER_NODE; i++) {
 449                        if (i > 0 && xfs_iext_key_cmp(node, i, old_offset) > 0)
 450                                break;
 451                        if (node->keys[i] == old_offset)
 452                                node->keys[i] = new_offset;
 453                }
 454                node = node->ptrs[i - 1];
 455                ASSERT(node);
 456        }
 457
 458        ASSERT(node == ptr);
 459}
 460
 461static struct xfs_iext_node *
 462xfs_iext_split_node(
 463        struct xfs_iext_node    **nodep,
 464        int                     *pos,
 465        int                     *nr_entries)
 466{
 467        struct xfs_iext_node    *node = *nodep;
 468        struct xfs_iext_node    *new = kmem_zalloc(NODE_SIZE, KM_NOFS);
 469        const int               nr_move = KEYS_PER_NODE / 2;
 470        int                     nr_keep = nr_move + (KEYS_PER_NODE & 1);
 471        int                     i = 0;
 472
 473        /* for sequential append operations just spill over into the new node */
 474        if (*pos == KEYS_PER_NODE) {
 475                *nodep = new;
 476                *pos = 0;
 477                *nr_entries = 0;
 478                goto done;
 479        }
 480
 481
 482        for (i = 0; i < nr_move; i++) {
 483                new->keys[i] = node->keys[nr_keep + i];
 484                new->ptrs[i] = node->ptrs[nr_keep + i];
 485
 486                node->keys[nr_keep + i] = XFS_IEXT_KEY_INVALID;
 487                node->ptrs[nr_keep + i] = NULL;
 488        }
 489
 490        if (*pos >= nr_keep) {
 491                *nodep = new;
 492                *pos -= nr_keep;
 493                *nr_entries = nr_move;
 494        } else {
 495                *nr_entries = nr_keep;
 496        }
 497done:
 498        for (; i < KEYS_PER_NODE; i++)
 499                new->keys[i] = XFS_IEXT_KEY_INVALID;
 500        return new;
 501}
 502
 503static void
 504xfs_iext_insert_node(
 505        struct xfs_ifork        *ifp,
 506        uint64_t                offset,
 507        void                    *ptr,
 508        int                     level)
 509{
 510        struct xfs_iext_node    *node, *new;
 511        int                     i, pos, nr_entries;
 512
 513again:
 514        if (ifp->if_height < level)
 515                xfs_iext_grow(ifp);
 516
 517        new = NULL;
 518        node = xfs_iext_find_level(ifp, offset, level);
 519        pos = xfs_iext_node_insert_pos(node, offset);
 520        nr_entries = xfs_iext_node_nr_entries(node, pos);
 521
 522        ASSERT(pos >= nr_entries || xfs_iext_key_cmp(node, pos, offset) != 0);
 523        ASSERT(nr_entries <= KEYS_PER_NODE);
 524
 525        if (nr_entries == KEYS_PER_NODE)
 526                new = xfs_iext_split_node(&node, &pos, &nr_entries);
 527
 528        /*
 529         * Update the pointers in higher levels if the first entry changes
 530         * in an existing node.
 531         */
 532        if (node != new && pos == 0 && nr_entries > 0)
 533                xfs_iext_update_node(ifp, node->keys[0], offset, level, node);
 534
 535        for (i = nr_entries; i > pos; i--) {
 536                node->keys[i] = node->keys[i - 1];
 537                node->ptrs[i] = node->ptrs[i - 1];
 538        }
 539        node->keys[pos] = offset;
 540        node->ptrs[pos] = ptr;
 541
 542        if (new) {
 543                offset = new->keys[0];
 544                ptr = new;
 545                level++;
 546                goto again;
 547        }
 548}
 549
 550static struct xfs_iext_leaf *
 551xfs_iext_split_leaf(
 552        struct xfs_iext_cursor  *cur,
 553        int                     *nr_entries)
 554{
 555        struct xfs_iext_leaf    *leaf = cur->leaf;
 556        struct xfs_iext_leaf    *new = kmem_zalloc(NODE_SIZE, KM_NOFS);
 557        const int               nr_move = RECS_PER_LEAF / 2;
 558        int                     nr_keep = nr_move + (RECS_PER_LEAF & 1);
 559        int                     i;
 560
 561        /* for sequential append operations just spill over into the new node */
 562        if (cur->pos == RECS_PER_LEAF) {
 563                cur->leaf = new;
 564                cur->pos = 0;
 565                *nr_entries = 0;
 566                goto done;
 567        }
 568
 569        for (i = 0; i < nr_move; i++) {
 570                new->recs[i] = leaf->recs[nr_keep + i];
 571                xfs_iext_rec_clear(&leaf->recs[nr_keep + i]);
 572        }
 573
 574        if (cur->pos >= nr_keep) {
 575                cur->leaf = new;
 576                cur->pos -= nr_keep;
 577                *nr_entries = nr_move;
 578        } else {
 579                *nr_entries = nr_keep;
 580        }
 581done:
 582        if (leaf->next)
 583                leaf->next->prev = new;
 584        new->next = leaf->next;
 585        new->prev = leaf;
 586        leaf->next = new;
 587        return new;
 588}
 589
 590static void
 591xfs_iext_alloc_root(
 592        struct xfs_ifork        *ifp,
 593        struct xfs_iext_cursor  *cur)
 594{
 595        ASSERT(ifp->if_bytes == 0);
 596
 597        ifp->if_u1.if_root = kmem_zalloc(sizeof(struct xfs_iext_rec), KM_NOFS);
 598        ifp->if_height = 1;
 599
 600        /* now that we have a node step into it */
 601        cur->leaf = ifp->if_u1.if_root;
 602        cur->pos = 0;
 603}
 604
 605static void
 606xfs_iext_realloc_root(
 607        struct xfs_ifork        *ifp,
 608        struct xfs_iext_cursor  *cur)
 609{
 610        size_t new_size = ifp->if_bytes + sizeof(struct xfs_iext_rec);
 611        void *new;
 612
 613        /* account for the prev/next pointers */
 614        if (new_size / sizeof(struct xfs_iext_rec) == RECS_PER_LEAF)
 615                new_size = NODE_SIZE;
 616
 617        new = kmem_realloc(ifp->if_u1.if_root, new_size, KM_NOFS);
 618        memset(new + ifp->if_bytes, 0, new_size - ifp->if_bytes);
 619        ifp->if_u1.if_root = new;
 620        cur->leaf = new;
 621}
 622
 623void
 624xfs_iext_insert(
 625        struct xfs_inode        *ip,
 626        struct xfs_iext_cursor  *cur,
 627        struct xfs_bmbt_irec    *irec,
 628        int                     state)
 629{
 630        struct xfs_ifork        *ifp = xfs_iext_state_to_fork(ip, state);
 631        xfs_fileoff_t           offset = irec->br_startoff;
 632        struct xfs_iext_leaf    *new = NULL;
 633        int                     nr_entries, i;
 634
 635        if (ifp->if_height == 0)
 636                xfs_iext_alloc_root(ifp, cur);
 637        else if (ifp->if_height == 1)
 638                xfs_iext_realloc_root(ifp, cur);
 639
 640        nr_entries = xfs_iext_leaf_nr_entries(ifp, cur->leaf, cur->pos);
 641        ASSERT(nr_entries <= RECS_PER_LEAF);
 642        ASSERT(cur->pos >= nr_entries ||
 643               xfs_iext_rec_cmp(cur_rec(cur), irec->br_startoff) != 0);
 644
 645        if (nr_entries == RECS_PER_LEAF)
 646                new = xfs_iext_split_leaf(cur, &nr_entries);
 647
 648        /*
 649         * Update the pointers in higher levels if the first entry changes
 650         * in an existing node.
 651         */
 652        if (cur->leaf != new && cur->pos == 0 && nr_entries > 0) {
 653                xfs_iext_update_node(ifp, xfs_iext_leaf_key(cur->leaf, 0),
 654                                offset, 1, cur->leaf);
 655        }
 656
 657        for (i = nr_entries; i > cur->pos; i--)
 658                cur->leaf->recs[i] = cur->leaf->recs[i - 1];
 659        xfs_iext_set(cur_rec(cur), irec);
 660        ifp->if_bytes += sizeof(struct xfs_iext_rec);
 661
 662        trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
 663
 664        if (new)
 665                xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
 666}
 667
 668static struct xfs_iext_node *
 669xfs_iext_rebalance_node(
 670        struct xfs_iext_node    *parent,
 671        int                     *pos,
 672        struct xfs_iext_node    *node,
 673        int                     nr_entries)
 674{
 675        /*
 676         * If the neighbouring nodes are completely full, or have different
 677         * parents, we might never be able to merge our node, and will only
 678         * delete it once the number of entries hits zero.
 679         */
 680        if (nr_entries == 0)
 681                return node;
 682
 683        if (*pos > 0) {
 684                struct xfs_iext_node *prev = parent->ptrs[*pos - 1];
 685                int nr_prev = xfs_iext_node_nr_entries(prev, 0), i;
 686
 687                if (nr_prev + nr_entries <= KEYS_PER_NODE) {
 688                        for (i = 0; i < nr_entries; i++) {
 689                                prev->keys[nr_prev + i] = node->keys[i];
 690                                prev->ptrs[nr_prev + i] = node->ptrs[i];
 691                        }
 692                        return node;
 693                }
 694        }
 695
 696        if (*pos + 1 < xfs_iext_node_nr_entries(parent, *pos)) {
 697                struct xfs_iext_node *next = parent->ptrs[*pos + 1];
 698                int nr_next = xfs_iext_node_nr_entries(next, 0), i;
 699
 700                if (nr_entries + nr_next <= KEYS_PER_NODE) {
 701                        /*
 702                         * Merge the next node into this node so that we don't
 703                         * have to do an additional update of the keys in the
 704                         * higher levels.
 705                         */
 706                        for (i = 0; i < nr_next; i++) {
 707                                node->keys[nr_entries + i] = next->keys[i];
 708                                node->ptrs[nr_entries + i] = next->ptrs[i];
 709                        }
 710
 711                        ++*pos;
 712                        return next;
 713                }
 714        }
 715
 716        return NULL;
 717}
 718
 719static void
 720xfs_iext_remove_node(
 721        struct xfs_ifork        *ifp,
 722        xfs_fileoff_t           offset,
 723        void                    *victim)
 724{
 725        struct xfs_iext_node    *node, *parent;
 726        int                     level = 2, pos, nr_entries, i;
 727
 728        ASSERT(level <= ifp->if_height);
 729        node = xfs_iext_find_level(ifp, offset, level);
 730        pos = xfs_iext_node_pos(node, offset);
 731again:
 732        ASSERT(node->ptrs[pos]);
 733        ASSERT(node->ptrs[pos] == victim);
 734        kmem_free(victim);
 735
 736        nr_entries = xfs_iext_node_nr_entries(node, pos) - 1;
 737        offset = node->keys[0];
 738        for (i = pos; i < nr_entries; i++) {
 739                node->keys[i] = node->keys[i + 1];
 740                node->ptrs[i] = node->ptrs[i + 1];
 741        }
 742        node->keys[nr_entries] = XFS_IEXT_KEY_INVALID;
 743        node->ptrs[nr_entries] = NULL;
 744
 745        if (pos == 0 && nr_entries > 0) {
 746                xfs_iext_update_node(ifp, offset, node->keys[0], level, node);
 747                offset = node->keys[0];
 748        }
 749
 750        if (nr_entries >= KEYS_PER_NODE / 2)
 751                return;
 752
 753        if (level < ifp->if_height) {
 754                /*
 755                 * If we aren't at the root yet try to find a neighbour node to
 756                 * merge with (or delete the node if it is empty), and then
 757                 * recurse up to the next level.
 758                 */
 759                level++;
 760                parent = xfs_iext_find_level(ifp, offset, level);
 761                pos = xfs_iext_node_pos(parent, offset);
 762
 763                ASSERT(pos != KEYS_PER_NODE);
 764                ASSERT(parent->ptrs[pos] == node);
 765
 766                node = xfs_iext_rebalance_node(parent, &pos, node, nr_entries);
 767                if (node) {
 768                        victim = node;
 769                        node = parent;
 770                        goto again;
 771                }
 772        } else if (nr_entries == 1) {
 773                /*
 774                 * If we are at the root and only one entry is left we can just
 775                 * free this node and update the root pointer.
 776                 */
 777                ASSERT(node == ifp->if_u1.if_root);
 778                ifp->if_u1.if_root = node->ptrs[0];
 779                ifp->if_height--;
 780                kmem_free(node);
 781        }
 782}
 783
 784static void
 785xfs_iext_rebalance_leaf(
 786        struct xfs_ifork        *ifp,
 787        struct xfs_iext_cursor  *cur,
 788        struct xfs_iext_leaf    *leaf,
 789        xfs_fileoff_t           offset,
 790        int                     nr_entries)
 791{
 792        /*
 793         * If the neighbouring nodes are completely full we might never be able
 794         * to merge our node, and will only delete it once the number of
 795         * entries hits zero.
 796         */
 797        if (nr_entries == 0)
 798                goto remove_node;
 799
 800        if (leaf->prev) {
 801                int nr_prev = xfs_iext_leaf_nr_entries(ifp, leaf->prev, 0), i;
 802
 803                if (nr_prev + nr_entries <= RECS_PER_LEAF) {
 804                        for (i = 0; i < nr_entries; i++)
 805                                leaf->prev->recs[nr_prev + i] = leaf->recs[i];
 806
 807                        if (cur->leaf == leaf) {
 808                                cur->leaf = leaf->prev;
 809                                cur->pos += nr_prev;
 810                        }
 811                        goto remove_node;
 812                }
 813        }
 814
 815        if (leaf->next) {
 816                int nr_next = xfs_iext_leaf_nr_entries(ifp, leaf->next, 0), i;
 817
 818                if (nr_entries + nr_next <= RECS_PER_LEAF) {
 819                        /*
 820                         * Merge the next node into this node so that we don't
 821                         * have to do an additional update of the keys in the
 822                         * higher levels.
 823                         */
 824                        for (i = 0; i < nr_next; i++) {
 825                                leaf->recs[nr_entries + i] =
 826                                        leaf->next->recs[i];
 827                        }
 828
 829                        if (cur->leaf == leaf->next) {
 830                                cur->leaf = leaf;
 831                                cur->pos += nr_entries;
 832                        }
 833
 834                        offset = xfs_iext_leaf_key(leaf->next, 0);
 835                        leaf = leaf->next;
 836                        goto remove_node;
 837                }
 838        }
 839
 840        return;
 841remove_node:
 842        if (leaf->prev)
 843                leaf->prev->next = leaf->next;
 844        if (leaf->next)
 845                leaf->next->prev = leaf->prev;
 846        xfs_iext_remove_node(ifp, offset, leaf);
 847}
 848
 849static void
 850xfs_iext_free_last_leaf(
 851        struct xfs_ifork        *ifp)
 852{
 853        ifp->if_height--;
 854        kmem_free(ifp->if_u1.if_root);
 855        ifp->if_u1.if_root = NULL;
 856}
 857
 858void
 859xfs_iext_remove(
 860        struct xfs_inode        *ip,
 861        struct xfs_iext_cursor  *cur,
 862        int                     state)
 863{
 864        struct xfs_ifork        *ifp = xfs_iext_state_to_fork(ip, state);
 865        struct xfs_iext_leaf    *leaf = cur->leaf;
 866        xfs_fileoff_t           offset = xfs_iext_leaf_key(leaf, 0);
 867        int                     i, nr_entries;
 868
 869        trace_xfs_iext_remove(ip, cur, state, _RET_IP_);
 870
 871        ASSERT(ifp->if_height > 0);
 872        ASSERT(ifp->if_u1.if_root != NULL);
 873        ASSERT(xfs_iext_valid(ifp, cur));
 874
 875        nr_entries = xfs_iext_leaf_nr_entries(ifp, leaf, cur->pos) - 1;
 876        for (i = cur->pos; i < nr_entries; i++)
 877                leaf->recs[i] = leaf->recs[i + 1];
 878        xfs_iext_rec_clear(&leaf->recs[nr_entries]);
 879        ifp->if_bytes -= sizeof(struct xfs_iext_rec);
 880
 881        if (cur->pos == 0 && nr_entries > 0) {
 882                xfs_iext_update_node(ifp, offset, xfs_iext_leaf_key(leaf, 0), 1,
 883                                leaf);
 884                offset = xfs_iext_leaf_key(leaf, 0);
 885        } else if (cur->pos == nr_entries) {
 886                if (ifp->if_height > 1 && leaf->next)
 887                        cur->leaf = leaf->next;
 888                else
 889                        cur->leaf = NULL;
 890                cur->pos = 0;
 891        }
 892
 893        if (nr_entries >= RECS_PER_LEAF / 2)
 894                return;
 895
 896        if (ifp->if_height > 1)
 897                xfs_iext_rebalance_leaf(ifp, cur, leaf, offset, nr_entries);
 898        else if (nr_entries == 0)
 899                xfs_iext_free_last_leaf(ifp);
 900}
 901
 902/*
 903 * Lookup the extent covering bno.
 904 *
 905 * If there is an extent covering bno return the extent index, and store the
 906 * expanded extent structure in *gotp, and the extent cursor in *cur.
 907 * If there is no extent covering bno, but there is an extent after it (e.g.
 908 * it lies in a hole) return that extent in *gotp and its cursor in *cur
 909 * instead.
 910 * If bno is beyond the last extent return false, and return an invalid
 911 * cursor value.
 912 */
 913bool
 914xfs_iext_lookup_extent(
 915        struct xfs_inode        *ip,
 916        struct xfs_ifork        *ifp,
 917        xfs_fileoff_t           offset,
 918        struct xfs_iext_cursor  *cur,
 919        struct xfs_bmbt_irec    *gotp)
 920{
 921        XFS_STATS_INC(ip->i_mount, xs_look_exlist);
 922
 923        cur->leaf = xfs_iext_find_level(ifp, offset, 1);
 924        if (!cur->leaf) {
 925                cur->pos = 0;
 926                return false;
 927        }
 928
 929        for (cur->pos = 0; cur->pos < xfs_iext_max_recs(ifp); cur->pos++) {
 930                struct xfs_iext_rec *rec = cur_rec(cur);
 931
 932                if (xfs_iext_rec_is_empty(rec))
 933                        break;
 934                if (xfs_iext_rec_cmp(rec, offset) >= 0)
 935                        goto found;
 936        }
 937
 938        /* Try looking in the next node for an entry > offset */
 939        if (ifp->if_height == 1 || !cur->leaf->next)
 940                return false;
 941        cur->leaf = cur->leaf->next;
 942        cur->pos = 0;
 943        if (!xfs_iext_valid(ifp, cur))
 944                return false;
 945found:
 946        xfs_iext_get(gotp, cur_rec(cur));
 947        return true;
 948}
 949
 950/*
 951 * Returns the last extent before end, and if this extent doesn't cover
 952 * end, update end to the end of the extent.
 953 */
 954bool
 955xfs_iext_lookup_extent_before(
 956        struct xfs_inode        *ip,
 957        struct xfs_ifork        *ifp,
 958        xfs_fileoff_t           *end,
 959        struct xfs_iext_cursor  *cur,
 960        struct xfs_bmbt_irec    *gotp)
 961{
 962        /* could be optimized to not even look up the next on a match.. */
 963        if (xfs_iext_lookup_extent(ip, ifp, *end - 1, cur, gotp) &&
 964            gotp->br_startoff <= *end - 1)
 965                return true;
 966        if (!xfs_iext_prev_extent(ifp, cur, gotp))
 967                return false;
 968        *end = gotp->br_startoff + gotp->br_blockcount;
 969        return true;
 970}
 971
 972void
 973xfs_iext_update_extent(
 974        struct xfs_inode        *ip,
 975        int                     state,
 976        struct xfs_iext_cursor  *cur,
 977        struct xfs_bmbt_irec    *new)
 978{
 979        struct xfs_ifork        *ifp = xfs_iext_state_to_fork(ip, state);
 980
 981        if (cur->pos == 0) {
 982                struct xfs_bmbt_irec    old;
 983
 984                xfs_iext_get(&old, cur_rec(cur));
 985                if (new->br_startoff != old.br_startoff) {
 986                        xfs_iext_update_node(ifp, old.br_startoff,
 987                                        new->br_startoff, 1, cur->leaf);
 988                }
 989        }
 990
 991        trace_xfs_bmap_pre_update(ip, cur, state, _RET_IP_);
 992        xfs_iext_set(cur_rec(cur), new);
 993        trace_xfs_bmap_post_update(ip, cur, state, _RET_IP_);
 994}
 995
 996/*
 997 * Return true if the cursor points at an extent and return the extent structure
 998 * in gotp.  Else return false.
 999 */
1000bool
1001xfs_iext_get_extent(
1002        struct xfs_ifork        *ifp,
1003        struct xfs_iext_cursor  *cur,
1004        struct xfs_bmbt_irec    *gotp)
1005{
1006        if (!xfs_iext_valid(ifp, cur))
1007                return false;
1008        xfs_iext_get(gotp, cur_rec(cur));
1009        return true;
1010}
1011
1012/*
1013 * This is a recursive function, because of that we need to be extremely
1014 * careful with stack usage.
1015 */
1016static void
1017xfs_iext_destroy_node(
1018        struct xfs_iext_node    *node,
1019        int                     level)
1020{
1021        int                     i;
1022
1023        if (level > 1) {
1024                for (i = 0; i < KEYS_PER_NODE; i++) {
1025                        if (node->keys[i] == XFS_IEXT_KEY_INVALID)
1026                                break;
1027                        xfs_iext_destroy_node(node->ptrs[i], level - 1);
1028                }
1029        }
1030
1031        kmem_free(node);
1032}
1033
1034void
1035xfs_iext_destroy(
1036        struct xfs_ifork        *ifp)
1037{
1038        xfs_iext_destroy_node(ifp->if_u1.if_root, ifp->if_height);
1039
1040        ifp->if_bytes = 0;
1041        ifp->if_height = 0;
1042        ifp->if_u1.if_root = NULL;
1043}
1044