linux/fs/nilfs2/btree.c
<<
>>
Prefs
   1/*
   2 * btree.c - NILFS B-tree.
   3 *
   4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write to the Free Software
  18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  19 *
  20 * Written by Koji Sato <koji@osrg.net>.
  21 */
  22
  23#include <linux/slab.h>
  24#include <linux/string.h>
  25#include <linux/errno.h>
  26#include <linux/pagevec.h>
  27#include "nilfs.h"
  28#include "page.h"
  29#include "btnode.h"
  30#include "btree.h"
  31#include "alloc.h"
  32#include "dat.h"
  33
  34/**
  35 * struct nilfs_btree_path - A path on which B-tree operations are executed
  36 * @bp_bh: buffer head of node block
  37 * @bp_sib_bh: buffer head of sibling node block
  38 * @bp_index: index of child node
  39 * @bp_oldreq: ptr end request for old ptr
  40 * @bp_newreq: ptr alloc request for new ptr
  41 * @bp_op: rebalance operation
  42 */
  43struct nilfs_btree_path {
  44        struct buffer_head *bp_bh;
  45        struct buffer_head *bp_sib_bh;
  46        int bp_index;
  47        union nilfs_bmap_ptr_req bp_oldreq;
  48        union nilfs_bmap_ptr_req bp_newreq;
  49        struct nilfs_btnode_chkey_ctxt bp_ctxt;
  50        void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
  51                      int, __u64 *, __u64 *);
  52};
  53
  54/*
  55 * B-tree path operations
  56 */
  57
  58static struct kmem_cache *nilfs_btree_path_cache;
  59
  60int __init nilfs_btree_path_cache_init(void)
  61{
  62        nilfs_btree_path_cache =
  63                kmem_cache_create("nilfs2_btree_path_cache",
  64                                  sizeof(struct nilfs_btree_path) *
  65                                  NILFS_BTREE_LEVEL_MAX, 0, 0, NULL);
  66        return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM;
  67}
  68
  69void nilfs_btree_path_cache_destroy(void)
  70{
  71        kmem_cache_destroy(nilfs_btree_path_cache);
  72}
  73
  74static inline struct nilfs_btree_path *nilfs_btree_alloc_path(void)
  75{
  76        return kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
  77}
  78
  79static inline void nilfs_btree_free_path(struct nilfs_btree_path *path)
  80{
  81        kmem_cache_free(nilfs_btree_path_cache, path);
  82}
  83
  84static void nilfs_btree_init_path(struct nilfs_btree_path *path)
  85{
  86        int level;
  87
  88        for (level = NILFS_BTREE_LEVEL_DATA;
  89             level < NILFS_BTREE_LEVEL_MAX;
  90             level++) {
  91                path[level].bp_bh = NULL;
  92                path[level].bp_sib_bh = NULL;
  93                path[level].bp_index = 0;
  94                path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
  95                path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
  96                path[level].bp_op = NULL;
  97        }
  98}
  99
 100static void nilfs_btree_release_path(struct nilfs_btree_path *path)
 101{
 102        int level;
 103
 104        for (level = NILFS_BTREE_LEVEL_DATA; level < NILFS_BTREE_LEVEL_MAX;
 105             level++)
 106                brelse(path[level].bp_bh);
 107}
 108
 109/*
 110 * B-tree node operations
 111 */
 112static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
 113                                 struct buffer_head **bhp)
 114{
 115        struct address_space *btnc =
 116                &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
 117        return nilfs_btnode_get(btnc, ptr, 0, bhp, 0);
 118}
 119
 120static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
 121                                     __u64 ptr, struct buffer_head **bhp)
 122{
 123        struct address_space *btnc =
 124                &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
 125        int ret;
 126
 127        ret = nilfs_btnode_get(btnc, ptr, 0, bhp, 1);
 128        if (!ret)
 129                set_buffer_nilfs_volatile(*bhp);
 130        return ret;
 131}
 132
 133static inline int
 134nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
 135{
 136        return node->bn_flags;
 137}
 138
 139static inline void
 140nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
 141{
 142        node->bn_flags = flags;
 143}
 144
 145static inline int nilfs_btree_node_root(const struct nilfs_btree_node *node)
 146{
 147        return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
 148}
 149
 150static inline int
 151nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
 152{
 153        return node->bn_level;
 154}
 155
 156static inline void
 157nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
 158{
 159        node->bn_level = level;
 160}
 161
 162static inline int
 163nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
 164{
 165        return le16_to_cpu(node->bn_nchildren);
 166}
 167
 168static inline void
 169nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
 170{
 171        node->bn_nchildren = cpu_to_le16(nchildren);
 172}
 173
 174static inline int nilfs_btree_node_size(const struct nilfs_btree *btree)
 175{
 176        return 1 << btree->bt_bmap.b_inode->i_blkbits;
 177}
 178
 179static inline int
 180nilfs_btree_node_nchildren_min(const struct nilfs_btree_node *node,
 181                               const struct nilfs_btree *btree)
 182{
 183        return nilfs_btree_node_root(node) ?
 184                NILFS_BTREE_ROOT_NCHILDREN_MIN :
 185                NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
 186}
 187
 188static inline int
 189nilfs_btree_node_nchildren_max(const struct nilfs_btree_node *node,
 190                               const struct nilfs_btree *btree)
 191{
 192        return nilfs_btree_node_root(node) ?
 193                NILFS_BTREE_ROOT_NCHILDREN_MAX :
 194                NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
 195}
 196
 197static inline __le64 *
 198nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
 199{
 200        return (__le64 *)((char *)(node + 1) +
 201                          (nilfs_btree_node_root(node) ?
 202                           0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
 203}
 204
 205static inline __le64 *
 206nilfs_btree_node_dptrs(const struct nilfs_btree_node *node,
 207                       const struct nilfs_btree *btree)
 208{
 209        return (__le64 *)(nilfs_btree_node_dkeys(node) +
 210                          nilfs_btree_node_nchildren_max(node, btree));
 211}
 212
 213static inline __u64
 214nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
 215{
 216        return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(node) + index));
 217}
 218
 219static inline void
 220nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
 221{
 222        *(nilfs_btree_node_dkeys(node) + index) = nilfs_bmap_key_to_dkey(key);
 223}
 224
 225static inline __u64
 226nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
 227                         const struct nilfs_btree_node *node, int index)
 228{
 229        return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(node, btree) +
 230                                        index));
 231}
 232
 233static inline void
 234nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
 235                         struct nilfs_btree_node *node, int index, __u64 ptr)
 236{
 237        *(nilfs_btree_node_dptrs(node, btree) + index) =
 238                nilfs_bmap_ptr_to_dptr(ptr);
 239}
 240
 241static void nilfs_btree_node_init(struct nilfs_btree *btree,
 242                                  struct nilfs_btree_node *node,
 243                                  int flags, int level, int nchildren,
 244                                  const __u64 *keys, const __u64 *ptrs)
 245{
 246        __le64 *dkeys;
 247        __le64 *dptrs;
 248        int i;
 249
 250        nilfs_btree_node_set_flags(node, flags);
 251        nilfs_btree_node_set_level(node, level);
 252        nilfs_btree_node_set_nchildren(node, nchildren);
 253
 254        dkeys = nilfs_btree_node_dkeys(node);
 255        dptrs = nilfs_btree_node_dptrs(node, btree);
 256        for (i = 0; i < nchildren; i++) {
 257                dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
 258                dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
 259        }
 260}
 261
 262/* Assume the buffer heads corresponding to left and right are locked. */
 263static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
 264                                       struct nilfs_btree_node *left,
 265                                       struct nilfs_btree_node *right,
 266                                       int n)
 267{
 268        __le64 *ldkeys, *rdkeys;
 269        __le64 *ldptrs, *rdptrs;
 270        int lnchildren, rnchildren;
 271
 272        ldkeys = nilfs_btree_node_dkeys(left);
 273        ldptrs = nilfs_btree_node_dptrs(left, btree);
 274        lnchildren = nilfs_btree_node_get_nchildren(left);
 275
 276        rdkeys = nilfs_btree_node_dkeys(right);
 277        rdptrs = nilfs_btree_node_dptrs(right, btree);
 278        rnchildren = nilfs_btree_node_get_nchildren(right);
 279
 280        memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
 281        memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
 282        memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
 283        memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
 284
 285        lnchildren += n;
 286        rnchildren -= n;
 287        nilfs_btree_node_set_nchildren(left, lnchildren);
 288        nilfs_btree_node_set_nchildren(right, rnchildren);
 289}
 290
 291/* Assume that the buffer heads corresponding to left and right are locked. */
 292static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
 293                                        struct nilfs_btree_node *left,
 294                                        struct nilfs_btree_node *right,
 295                                        int n)
 296{
 297        __le64 *ldkeys, *rdkeys;
 298        __le64 *ldptrs, *rdptrs;
 299        int lnchildren, rnchildren;
 300
 301        ldkeys = nilfs_btree_node_dkeys(left);
 302        ldptrs = nilfs_btree_node_dptrs(left, btree);
 303        lnchildren = nilfs_btree_node_get_nchildren(left);
 304
 305        rdkeys = nilfs_btree_node_dkeys(right);
 306        rdptrs = nilfs_btree_node_dptrs(right, btree);
 307        rnchildren = nilfs_btree_node_get_nchildren(right);
 308
 309        memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
 310        memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
 311        memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
 312        memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
 313
 314        lnchildren -= n;
 315        rnchildren += n;
 316        nilfs_btree_node_set_nchildren(left, lnchildren);
 317        nilfs_btree_node_set_nchildren(right, rnchildren);
 318}
 319
 320/* Assume that the buffer head corresponding to node is locked. */
 321static void nilfs_btree_node_insert(struct nilfs_btree *btree,
 322                                    struct nilfs_btree_node *node,
 323                                    __u64 key, __u64 ptr, int index)
 324{
 325        __le64 *dkeys;
 326        __le64 *dptrs;
 327        int nchildren;
 328
 329        dkeys = nilfs_btree_node_dkeys(node);
 330        dptrs = nilfs_btree_node_dptrs(node, btree);
 331        nchildren = nilfs_btree_node_get_nchildren(node);
 332        if (index < nchildren) {
 333                memmove(dkeys + index + 1, dkeys + index,
 334                        (nchildren - index) * sizeof(*dkeys));
 335                memmove(dptrs + index + 1, dptrs + index,
 336                        (nchildren - index) * sizeof(*dptrs));
 337        }
 338        dkeys[index] = nilfs_bmap_key_to_dkey(key);
 339        dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
 340        nchildren++;
 341        nilfs_btree_node_set_nchildren(node, nchildren);
 342}
 343
 344/* Assume that the buffer head corresponding to node is locked. */
 345static void nilfs_btree_node_delete(struct nilfs_btree *btree,
 346                                    struct nilfs_btree_node *node,
 347                                    __u64 *keyp, __u64 *ptrp, int index)
 348{
 349        __u64 key;
 350        __u64 ptr;
 351        __le64 *dkeys;
 352        __le64 *dptrs;
 353        int nchildren;
 354
 355        dkeys = nilfs_btree_node_dkeys(node);
 356        dptrs = nilfs_btree_node_dptrs(node, btree);
 357        key = nilfs_bmap_dkey_to_key(dkeys[index]);
 358        ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
 359        nchildren = nilfs_btree_node_get_nchildren(node);
 360        if (keyp != NULL)
 361                *keyp = key;
 362        if (ptrp != NULL)
 363                *ptrp = ptr;
 364
 365        if (index < nchildren - 1) {
 366                memmove(dkeys + index, dkeys + index + 1,
 367                        (nchildren - index - 1) * sizeof(*dkeys));
 368                memmove(dptrs + index, dptrs + index + 1,
 369                        (nchildren - index - 1) * sizeof(*dptrs));
 370        }
 371        nchildren--;
 372        nilfs_btree_node_set_nchildren(node, nchildren);
 373}
 374
 375static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
 376                                   __u64 key, int *indexp)
 377{
 378        __u64 nkey;
 379        int index, low, high, s;
 380
 381        /* binary search */
 382        low = 0;
 383        high = nilfs_btree_node_get_nchildren(node) - 1;
 384        index = 0;
 385        s = 0;
 386        while (low <= high) {
 387                index = (low + high) / 2;
 388                nkey = nilfs_btree_node_get_key(node, index);
 389                if (nkey == key) {
 390                        s = 0;
 391                        goto out;
 392                } else if (nkey < key) {
 393                        low = index + 1;
 394                        s = -1;
 395                } else {
 396                        high = index - 1;
 397                        s = 1;
 398                }
 399        }
 400
 401        /* adjust index */
 402        if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
 403                if (s > 0 && index > 0)
 404                        index--;
 405        } else if (s < 0)
 406                index++;
 407
 408 out:
 409        *indexp = index;
 410
 411        return s == 0;
 412}
 413
 414static inline struct nilfs_btree_node *
 415nilfs_btree_get_root(const struct nilfs_btree *btree)
 416{
 417        return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
 418}
 419
 420static inline struct nilfs_btree_node *
 421nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
 422{
 423        return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
 424}
 425
 426static inline struct nilfs_btree_node *
 427nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
 428{
 429        return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
 430}
 431
 432static inline int nilfs_btree_height(const struct nilfs_btree *btree)
 433{
 434        return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
 435}
 436
 437static inline struct nilfs_btree_node *
 438nilfs_btree_get_node(const struct nilfs_btree *btree,
 439                     const struct nilfs_btree_path *path,
 440                     int level)
 441{
 442        return (level == nilfs_btree_height(btree) - 1) ?
 443                nilfs_btree_get_root(btree) :
 444                nilfs_btree_get_nonroot_node(path, level);
 445}
 446
 447static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
 448                                 struct nilfs_btree_path *path,
 449                                 __u64 key, __u64 *ptrp, int minlevel)
 450{
 451        struct nilfs_btree_node *node;
 452        __u64 ptr;
 453        int level, index, found, ret;
 454
 455        node = nilfs_btree_get_root(btree);
 456        level = nilfs_btree_node_get_level(node);
 457        if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
 458                return -ENOENT;
 459
 460        found = nilfs_btree_node_lookup(node, key, &index);
 461        ptr = nilfs_btree_node_get_ptr(btree, node, index);
 462        path[level].bp_bh = NULL;
 463        path[level].bp_index = index;
 464
 465        for (level--; level >= minlevel; level--) {
 466                ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
 467                if (ret < 0)
 468                        return ret;
 469                node = nilfs_btree_get_nonroot_node(path, level);
 470                BUG_ON(level != nilfs_btree_node_get_level(node));
 471                if (!found)
 472                        found = nilfs_btree_node_lookup(node, key, &index);
 473                else
 474                        index = 0;
 475                if (index < nilfs_btree_node_nchildren_max(node, btree))
 476                        ptr = nilfs_btree_node_get_ptr(btree, node, index);
 477                else {
 478                        WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
 479                        /* insert */
 480                        ptr = NILFS_BMAP_INVALID_PTR;
 481                }
 482                path[level].bp_index = index;
 483        }
 484        if (!found)
 485                return -ENOENT;
 486
 487        if (ptrp != NULL)
 488                *ptrp = ptr;
 489
 490        return 0;
 491}
 492
 493static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
 494                                      struct nilfs_btree_path *path,
 495                                      __u64 *keyp, __u64 *ptrp)
 496{
 497        struct nilfs_btree_node *node;
 498        __u64 ptr;
 499        int index, level, ret;
 500
 501        node = nilfs_btree_get_root(btree);
 502        index = nilfs_btree_node_get_nchildren(node) - 1;
 503        if (index < 0)
 504                return -ENOENT;
 505        level = nilfs_btree_node_get_level(node);
 506        ptr = nilfs_btree_node_get_ptr(btree, node, index);
 507        path[level].bp_bh = NULL;
 508        path[level].bp_index = index;
 509
 510        for (level--; level > 0; level--) {
 511                ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
 512                if (ret < 0)
 513                        return ret;
 514                node = nilfs_btree_get_nonroot_node(path, level);
 515                BUG_ON(level != nilfs_btree_node_get_level(node));
 516                index = nilfs_btree_node_get_nchildren(node) - 1;
 517                ptr = nilfs_btree_node_get_ptr(btree, node, index);
 518                path[level].bp_index = index;
 519        }
 520
 521        if (keyp != NULL)
 522                *keyp = nilfs_btree_node_get_key(node, index);
 523        if (ptrp != NULL)
 524                *ptrp = ptr;
 525
 526        return 0;
 527}
 528
 529static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
 530                              __u64 key, int level, __u64 *ptrp)
 531{
 532        struct nilfs_btree *btree;
 533        struct nilfs_btree_path *path;
 534        __u64 ptr;
 535        int ret;
 536
 537        btree = (struct nilfs_btree *)bmap;
 538        path = nilfs_btree_alloc_path();
 539        if (path == NULL)
 540                return -ENOMEM;
 541        nilfs_btree_init_path(path);
 542
 543        ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
 544
 545        if (ptrp != NULL)
 546                *ptrp = ptr;
 547
 548        nilfs_btree_release_path(path);
 549        nilfs_btree_free_path(path);
 550
 551        return ret;
 552}
 553
 554static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
 555                                     __u64 key, __u64 *ptrp, unsigned maxblocks)
 556{
 557        struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
 558        struct nilfs_btree_path *path;
 559        struct nilfs_btree_node *node;
 560        struct inode *dat = NULL;
 561        __u64 ptr, ptr2;
 562        sector_t blocknr;
 563        int level = NILFS_BTREE_LEVEL_NODE_MIN;
 564        int ret, cnt, index, maxlevel;
 565
 566        path = nilfs_btree_alloc_path();
 567        if (path == NULL)
 568                return -ENOMEM;
 569        nilfs_btree_init_path(path);
 570        ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
 571        if (ret < 0)
 572                goto out;
 573
 574        if (NILFS_BMAP_USE_VBN(bmap)) {
 575                dat = nilfs_bmap_get_dat(bmap);
 576                ret = nilfs_dat_translate(dat, ptr, &blocknr);
 577                if (ret < 0)
 578                        goto out;
 579                ptr = blocknr;
 580        }
 581        cnt = 1;
 582        if (cnt == maxblocks)
 583                goto end;
 584
 585        maxlevel = nilfs_btree_height(btree) - 1;
 586        node = nilfs_btree_get_node(btree, path, level);
 587        index = path[level].bp_index + 1;
 588        for (;;) {
 589                while (index < nilfs_btree_node_get_nchildren(node)) {
 590                        if (nilfs_btree_node_get_key(node, index) !=
 591                            key + cnt)
 592                                goto end;
 593                        ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
 594                        if (dat) {
 595                                ret = nilfs_dat_translate(dat, ptr2, &blocknr);
 596                                if (ret < 0)
 597                                        goto out;
 598                                ptr2 = blocknr;
 599                        }
 600                        if (ptr2 != ptr + cnt || ++cnt == maxblocks)
 601                                goto end;
 602                        index++;
 603                        continue;
 604                }
 605                if (level == maxlevel)
 606                        break;
 607
 608                /* look-up right sibling node */
 609                node = nilfs_btree_get_node(btree, path, level + 1);
 610                index = path[level + 1].bp_index + 1;
 611                if (index >= nilfs_btree_node_get_nchildren(node) ||
 612                    nilfs_btree_node_get_key(node, index) != key + cnt)
 613                        break;
 614                ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
 615                path[level + 1].bp_index = index;
 616
 617                brelse(path[level].bp_bh);
 618                path[level].bp_bh = NULL;
 619                ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh);
 620                if (ret < 0)
 621                        goto out;
 622                node = nilfs_btree_get_nonroot_node(path, level);
 623                index = 0;
 624                path[level].bp_index = index;
 625        }
 626 end:
 627        *ptrp = ptr;
 628        ret = cnt;
 629 out:
 630        nilfs_btree_release_path(path);
 631        nilfs_btree_free_path(path);
 632        return ret;
 633}
 634
 635static void nilfs_btree_promote_key(struct nilfs_btree *btree,
 636                                    struct nilfs_btree_path *path,
 637                                    int level, __u64 key)
 638{
 639        if (level < nilfs_btree_height(btree) - 1) {
 640                do {
 641                        lock_buffer(path[level].bp_bh);
 642                        nilfs_btree_node_set_key(
 643                                nilfs_btree_get_nonroot_node(path, level),
 644                                path[level].bp_index, key);
 645                        if (!buffer_dirty(path[level].bp_bh))
 646                                nilfs_btnode_mark_dirty(path[level].bp_bh);
 647                        unlock_buffer(path[level].bp_bh);
 648                } while ((path[level].bp_index == 0) &&
 649                         (++level < nilfs_btree_height(btree) - 1));
 650        }
 651
 652        /* root */
 653        if (level == nilfs_btree_height(btree) - 1) {
 654                nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
 655                                         path[level].bp_index, key);
 656        }
 657}
 658
 659static void nilfs_btree_do_insert(struct nilfs_btree *btree,
 660                                  struct nilfs_btree_path *path,
 661                                  int level, __u64 *keyp, __u64 *ptrp)
 662{
 663        struct nilfs_btree_node *node;
 664
 665        if (level < nilfs_btree_height(btree) - 1) {
 666                lock_buffer(path[level].bp_bh);
 667                node = nilfs_btree_get_nonroot_node(path, level);
 668                nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
 669                                        path[level].bp_index);
 670                if (!buffer_dirty(path[level].bp_bh))
 671                        nilfs_btnode_mark_dirty(path[level].bp_bh);
 672                unlock_buffer(path[level].bp_bh);
 673
 674                if (path[level].bp_index == 0)
 675                        nilfs_btree_promote_key(btree, path, level + 1,
 676                                                nilfs_btree_node_get_key(node,
 677                                                                         0));
 678        } else {
 679                node = nilfs_btree_get_root(btree);
 680                nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
 681                                        path[level].bp_index);
 682        }
 683}
 684
 685static void nilfs_btree_carry_left(struct nilfs_btree *btree,
 686                                   struct nilfs_btree_path *path,
 687                                   int level, __u64 *keyp, __u64 *ptrp)
 688{
 689        struct nilfs_btree_node *node, *left;
 690        int nchildren, lnchildren, n, move;
 691
 692        lock_buffer(path[level].bp_bh);
 693        lock_buffer(path[level].bp_sib_bh);
 694
 695        node = nilfs_btree_get_nonroot_node(path, level);
 696        left = nilfs_btree_get_sib_node(path, level);
 697        nchildren = nilfs_btree_node_get_nchildren(node);
 698        lnchildren = nilfs_btree_node_get_nchildren(left);
 699        move = 0;
 700
 701        n = (nchildren + lnchildren + 1) / 2 - lnchildren;
 702        if (n > path[level].bp_index) {
 703                /* move insert point */
 704                n--;
 705                move = 1;
 706        }
 707
 708        nilfs_btree_node_move_left(btree, left, node, n);
 709
 710        if (!buffer_dirty(path[level].bp_bh))
 711                nilfs_btnode_mark_dirty(path[level].bp_bh);
 712        if (!buffer_dirty(path[level].bp_sib_bh))
 713                nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
 714
 715        unlock_buffer(path[level].bp_bh);
 716        unlock_buffer(path[level].bp_sib_bh);
 717
 718        nilfs_btree_promote_key(btree, path, level + 1,
 719                                nilfs_btree_node_get_key(node, 0));
 720
 721        if (move) {
 722                brelse(path[level].bp_bh);
 723                path[level].bp_bh = path[level].bp_sib_bh;
 724                path[level].bp_sib_bh = NULL;
 725                path[level].bp_index += lnchildren;
 726                path[level + 1].bp_index--;
 727        } else {
 728                brelse(path[level].bp_sib_bh);
 729                path[level].bp_sib_bh = NULL;
 730                path[level].bp_index -= n;
 731        }
 732
 733        nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 734}
 735
 736static void nilfs_btree_carry_right(struct nilfs_btree *btree,
 737                                    struct nilfs_btree_path *path,
 738                                    int level, __u64 *keyp, __u64 *ptrp)
 739{
 740        struct nilfs_btree_node *node, *right;
 741        int nchildren, rnchildren, n, move;
 742
 743        lock_buffer(path[level].bp_bh);
 744        lock_buffer(path[level].bp_sib_bh);
 745
 746        node = nilfs_btree_get_nonroot_node(path, level);
 747        right = nilfs_btree_get_sib_node(path, level);
 748        nchildren = nilfs_btree_node_get_nchildren(node);
 749        rnchildren = nilfs_btree_node_get_nchildren(right);
 750        move = 0;
 751
 752        n = (nchildren + rnchildren + 1) / 2 - rnchildren;
 753        if (n > nchildren - path[level].bp_index) {
 754                /* move insert point */
 755                n--;
 756                move = 1;
 757        }
 758
 759        nilfs_btree_node_move_right(btree, node, right, n);
 760
 761        if (!buffer_dirty(path[level].bp_bh))
 762                nilfs_btnode_mark_dirty(path[level].bp_bh);
 763        if (!buffer_dirty(path[level].bp_sib_bh))
 764                nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
 765
 766        unlock_buffer(path[level].bp_bh);
 767        unlock_buffer(path[level].bp_sib_bh);
 768
 769        path[level + 1].bp_index++;
 770        nilfs_btree_promote_key(btree, path, level + 1,
 771                                nilfs_btree_node_get_key(right, 0));
 772        path[level + 1].bp_index--;
 773
 774        if (move) {
 775                brelse(path[level].bp_bh);
 776                path[level].bp_bh = path[level].bp_sib_bh;
 777                path[level].bp_sib_bh = NULL;
 778                path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
 779                path[level + 1].bp_index++;
 780        } else {
 781                brelse(path[level].bp_sib_bh);
 782                path[level].bp_sib_bh = NULL;
 783        }
 784
 785        nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 786}
 787
 788static void nilfs_btree_split(struct nilfs_btree *btree,
 789                              struct nilfs_btree_path *path,
 790                              int level, __u64 *keyp, __u64 *ptrp)
 791{
 792        struct nilfs_btree_node *node, *right;
 793        __u64 newkey;
 794        __u64 newptr;
 795        int nchildren, n, move;
 796
 797        lock_buffer(path[level].bp_bh);
 798        lock_buffer(path[level].bp_sib_bh);
 799
 800        node = nilfs_btree_get_nonroot_node(path, level);
 801        right = nilfs_btree_get_sib_node(path, level);
 802        nchildren = nilfs_btree_node_get_nchildren(node);
 803        move = 0;
 804
 805        n = (nchildren + 1) / 2;
 806        if (n > nchildren - path[level].bp_index) {
 807                n--;
 808                move = 1;
 809        }
 810
 811        nilfs_btree_node_move_right(btree, node, right, n);
 812
 813        if (!buffer_dirty(path[level].bp_bh))
 814                nilfs_btnode_mark_dirty(path[level].bp_bh);
 815        if (!buffer_dirty(path[level].bp_sib_bh))
 816                nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
 817
 818        unlock_buffer(path[level].bp_bh);
 819        unlock_buffer(path[level].bp_sib_bh);
 820
 821        newkey = nilfs_btree_node_get_key(right, 0);
 822        newptr = path[level].bp_newreq.bpr_ptr;
 823
 824        if (move) {
 825                path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
 826                nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
 827                                        path[level].bp_index);
 828
 829                *keyp = nilfs_btree_node_get_key(right, 0);
 830                *ptrp = path[level].bp_newreq.bpr_ptr;
 831
 832                brelse(path[level].bp_bh);
 833                path[level].bp_bh = path[level].bp_sib_bh;
 834                path[level].bp_sib_bh = NULL;
 835        } else {
 836                nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 837
 838                *keyp = nilfs_btree_node_get_key(right, 0);
 839                *ptrp = path[level].bp_newreq.bpr_ptr;
 840
 841                brelse(path[level].bp_sib_bh);
 842                path[level].bp_sib_bh = NULL;
 843        }
 844
 845        path[level + 1].bp_index++;
 846}
 847
 848static void nilfs_btree_grow(struct nilfs_btree *btree,
 849                             struct nilfs_btree_path *path,
 850                             int level, __u64 *keyp, __u64 *ptrp)
 851{
 852        struct nilfs_btree_node *root, *child;
 853        int n;
 854
 855        lock_buffer(path[level].bp_sib_bh);
 856
 857        root = nilfs_btree_get_root(btree);
 858        child = nilfs_btree_get_sib_node(path, level);
 859
 860        n = nilfs_btree_node_get_nchildren(root);
 861
 862        nilfs_btree_node_move_right(btree, root, child, n);
 863        nilfs_btree_node_set_level(root, level + 1);
 864
 865        if (!buffer_dirty(path[level].bp_sib_bh))
 866                nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
 867
 868        unlock_buffer(path[level].bp_sib_bh);
 869
 870        path[level].bp_bh = path[level].bp_sib_bh;
 871        path[level].bp_sib_bh = NULL;
 872
 873        nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 874
 875        *keyp = nilfs_btree_node_get_key(child, 0);
 876        *ptrp = path[level].bp_newreq.bpr_ptr;
 877}
 878
 879static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
 880                                   const struct nilfs_btree_path *path)
 881{
 882        struct nilfs_btree_node *node;
 883        int level;
 884
 885        if (path == NULL)
 886                return NILFS_BMAP_INVALID_PTR;
 887
 888        /* left sibling */
 889        level = NILFS_BTREE_LEVEL_NODE_MIN;
 890        if (path[level].bp_index > 0) {
 891                node = nilfs_btree_get_node(btree, path, level);
 892                return nilfs_btree_node_get_ptr(btree, node,
 893                                                path[level].bp_index - 1);
 894        }
 895
 896        /* parent */
 897        level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
 898        if (level <= nilfs_btree_height(btree) - 1) {
 899                node = nilfs_btree_get_node(btree, path, level);
 900                return nilfs_btree_node_get_ptr(btree, node,
 901                                                path[level].bp_index);
 902        }
 903
 904        return NILFS_BMAP_INVALID_PTR;
 905}
 906
 907static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
 908                                       const struct nilfs_btree_path *path,
 909                                       __u64 key)
 910{
 911        __u64 ptr;
 912
 913        ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
 914        if (ptr != NILFS_BMAP_INVALID_PTR)
 915                /* sequential access */
 916                return ptr;
 917        else {
 918                ptr = nilfs_btree_find_near(btree, path);
 919                if (ptr != NILFS_BMAP_INVALID_PTR)
 920                        /* near */
 921                        return ptr;
 922        }
 923        /* block group */
 924        return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
 925}
 926
 927static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
 928                                     __u64 ptr)
 929{
 930        btree->bt_bmap.b_last_allocated_key = key;
 931        btree->bt_bmap.b_last_allocated_ptr = ptr;
 932}
 933
 934static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
 935                                      struct nilfs_btree_path *path,
 936                                      int *levelp, __u64 key, __u64 ptr,
 937                                      struct nilfs_bmap_stats *stats)
 938{
 939        struct buffer_head *bh;
 940        struct nilfs_btree_node *node, *parent, *sib;
 941        __u64 sibptr;
 942        int pindex, level, ret;
 943        struct inode *dat = NULL;
 944
 945        stats->bs_nblocks = 0;
 946        level = NILFS_BTREE_LEVEL_DATA;
 947
 948        /* allocate a new ptr for data block */
 949        if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
 950                path[level].bp_newreq.bpr_ptr =
 951                        nilfs_btree_find_target_v(btree, path, key);
 952                dat = nilfs_bmap_get_dat(&btree->bt_bmap);
 953        }
 954
 955        ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
 956                                           &path[level].bp_newreq, dat);
 957        if (ret < 0)
 958                goto err_out_data;
 959
 960        for (level = NILFS_BTREE_LEVEL_NODE_MIN;
 961             level < nilfs_btree_height(btree) - 1;
 962             level++) {
 963                node = nilfs_btree_get_nonroot_node(path, level);
 964                if (nilfs_btree_node_get_nchildren(node) <
 965                    nilfs_btree_node_nchildren_max(node, btree)) {
 966                        path[level].bp_op = nilfs_btree_do_insert;
 967                        stats->bs_nblocks++;
 968                        goto out;
 969                }
 970
 971                parent = nilfs_btree_get_node(btree, path, level + 1);
 972                pindex = path[level + 1].bp_index;
 973
 974                /* left sibling */
 975                if (pindex > 0) {
 976                        sibptr = nilfs_btree_node_get_ptr(btree, parent,
 977                                                          pindex - 1);
 978                        ret = nilfs_btree_get_block(btree, sibptr, &bh);
 979                        if (ret < 0)
 980                                goto err_out_child_node;
 981                        sib = (struct nilfs_btree_node *)bh->b_data;
 982                        if (nilfs_btree_node_get_nchildren(sib) <
 983                            nilfs_btree_node_nchildren_max(sib, btree)) {
 984                                path[level].bp_sib_bh = bh;
 985                                path[level].bp_op = nilfs_btree_carry_left;
 986                                stats->bs_nblocks++;
 987                                goto out;
 988                        } else
 989                                brelse(bh);
 990                }
 991
 992                /* right sibling */
 993                if (pindex <
 994                    nilfs_btree_node_get_nchildren(parent) - 1) {
 995                        sibptr = nilfs_btree_node_get_ptr(btree, parent,
 996                                                          pindex + 1);
 997                        ret = nilfs_btree_get_block(btree, sibptr, &bh);
 998                        if (ret < 0)
 999                                goto err_out_child_node;
1000                        sib = (struct nilfs_btree_node *)bh->b_data;
1001                        if (nilfs_btree_node_get_nchildren(sib) <
1002                            nilfs_btree_node_nchildren_max(sib, btree)) {
1003                                path[level].bp_sib_bh = bh;
1004                                path[level].bp_op = nilfs_btree_carry_right;
1005                                stats->bs_nblocks++;
1006                                goto out;
1007                        } else
1008                                brelse(bh);
1009                }
1010
1011                /* split */
1012                path[level].bp_newreq.bpr_ptr =
1013                        path[level - 1].bp_newreq.bpr_ptr + 1;
1014                ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1015                                                   &path[level].bp_newreq, dat);
1016                if (ret < 0)
1017                        goto err_out_child_node;
1018                ret = nilfs_btree_get_new_block(btree,
1019                                                path[level].bp_newreq.bpr_ptr,
1020                                                &bh);
1021                if (ret < 0)
1022                        goto err_out_curr_node;
1023
1024                stats->bs_nblocks++;
1025
1026                lock_buffer(bh);
1027                nilfs_btree_node_init(btree,
1028                                      (struct nilfs_btree_node *)bh->b_data,
1029                                      0, level, 0, NULL, NULL);
1030                unlock_buffer(bh);
1031                path[level].bp_sib_bh = bh;
1032                path[level].bp_op = nilfs_btree_split;
1033        }
1034
1035        /* root */
1036        node = nilfs_btree_get_root(btree);
1037        if (nilfs_btree_node_get_nchildren(node) <
1038            nilfs_btree_node_nchildren_max(node, btree)) {
1039                path[level].bp_op = nilfs_btree_do_insert;
1040                stats->bs_nblocks++;
1041                goto out;
1042        }
1043
1044        /* grow */
1045        path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1046        ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1047                                           &path[level].bp_newreq, dat);
1048        if (ret < 0)
1049                goto err_out_child_node;
1050        ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1051                                        &bh);
1052        if (ret < 0)
1053                goto err_out_curr_node;
1054
1055        lock_buffer(bh);
1056        nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1057                              0, level, 0, NULL, NULL);
1058        unlock_buffer(bh);
1059        path[level].bp_sib_bh = bh;
1060        path[level].bp_op = nilfs_btree_grow;
1061
1062        level++;
1063        path[level].bp_op = nilfs_btree_do_insert;
1064
1065        /* a newly-created node block and a data block are added */
1066        stats->bs_nblocks += 2;
1067
1068        /* success */
1069 out:
1070        *levelp = level;
1071        return ret;
1072
1073        /* error */
1074 err_out_curr_node:
1075        nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1076                                   dat);
1077 err_out_child_node:
1078        for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1079                nilfs_btnode_delete(path[level].bp_sib_bh);
1080                nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap,
1081                                           &path[level].bp_newreq, dat);
1082
1083        }
1084
1085        nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1086                                   dat);
1087 err_out_data:
1088        *levelp = level;
1089        stats->bs_nblocks = 0;
1090        return ret;
1091}
1092
1093static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1094                                      struct nilfs_btree_path *path,
1095                                      int maxlevel, __u64 key, __u64 ptr)
1096{
1097        struct inode *dat = NULL;
1098        int level;
1099
1100        set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1101        ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1102        if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
1103                nilfs_btree_set_target_v(btree, key, ptr);
1104                dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1105        }
1106
1107        for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1108                nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap,
1109                                            &path[level - 1].bp_newreq, dat);
1110                path[level].bp_op(btree, path, level, &key, &ptr);
1111        }
1112
1113        if (!nilfs_bmap_dirty(&btree->bt_bmap))
1114                nilfs_bmap_set_dirty(&btree->bt_bmap);
1115}
1116
1117static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1118{
1119        struct nilfs_btree *btree;
1120        struct nilfs_btree_path *path;
1121        struct nilfs_bmap_stats stats;
1122        int level, ret;
1123
1124        btree = (struct nilfs_btree *)bmap;
1125        path = nilfs_btree_alloc_path();
1126        if (path == NULL)
1127                return -ENOMEM;
1128        nilfs_btree_init_path(path);
1129
1130        ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1131                                    NILFS_BTREE_LEVEL_NODE_MIN);
1132        if (ret != -ENOENT) {
1133                if (ret == 0)
1134                        ret = -EEXIST;
1135                goto out;
1136        }
1137
1138        ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1139        if (ret < 0)
1140                goto out;
1141        nilfs_btree_commit_insert(btree, path, level, key, ptr);
1142        nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1143
1144 out:
1145        nilfs_btree_release_path(path);
1146        nilfs_btree_free_path(path);
1147        return ret;
1148}
1149
1150static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1151                                  struct nilfs_btree_path *path,
1152                                  int level, __u64 *keyp, __u64 *ptrp)
1153{
1154        struct nilfs_btree_node *node;
1155
1156        if (level < nilfs_btree_height(btree) - 1) {
1157                lock_buffer(path[level].bp_bh);
1158                node = nilfs_btree_get_nonroot_node(path, level);
1159                nilfs_btree_node_delete(btree, node, keyp, ptrp,
1160                                        path[level].bp_index);
1161                if (!buffer_dirty(path[level].bp_bh))
1162                        nilfs_btnode_mark_dirty(path[level].bp_bh);
1163                unlock_buffer(path[level].bp_bh);
1164                if (path[level].bp_index == 0)
1165                        nilfs_btree_promote_key(btree, path, level + 1,
1166                                nilfs_btree_node_get_key(node, 0));
1167        } else {
1168                node = nilfs_btree_get_root(btree);
1169                nilfs_btree_node_delete(btree, node, keyp, ptrp,
1170                                        path[level].bp_index);
1171        }
1172}
1173
1174static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1175                                    struct nilfs_btree_path *path,
1176                                    int level, __u64 *keyp, __u64 *ptrp)
1177{
1178        struct nilfs_btree_node *node, *left;
1179        int nchildren, lnchildren, n;
1180
1181        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1182
1183        lock_buffer(path[level].bp_bh);
1184        lock_buffer(path[level].bp_sib_bh);
1185
1186        node = nilfs_btree_get_nonroot_node(path, level);
1187        left = nilfs_btree_get_sib_node(path, level);
1188        nchildren = nilfs_btree_node_get_nchildren(node);
1189        lnchildren = nilfs_btree_node_get_nchildren(left);
1190
1191        n = (nchildren + lnchildren) / 2 - nchildren;
1192
1193        nilfs_btree_node_move_right(btree, left, node, n);
1194
1195        if (!buffer_dirty(path[level].bp_bh))
1196                nilfs_btnode_mark_dirty(path[level].bp_bh);
1197        if (!buffer_dirty(path[level].bp_sib_bh))
1198                nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1199
1200        unlock_buffer(path[level].bp_bh);
1201        unlock_buffer(path[level].bp_sib_bh);
1202
1203        nilfs_btree_promote_key(btree, path, level + 1,
1204                                nilfs_btree_node_get_key(node, 0));
1205
1206        brelse(path[level].bp_sib_bh);
1207        path[level].bp_sib_bh = NULL;
1208        path[level].bp_index += n;
1209}
1210
1211static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1212                                     struct nilfs_btree_path *path,
1213                                     int level, __u64 *keyp, __u64 *ptrp)
1214{
1215        struct nilfs_btree_node *node, *right;
1216        int nchildren, rnchildren, n;
1217
1218        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1219
1220        lock_buffer(path[level].bp_bh);
1221        lock_buffer(path[level].bp_sib_bh);
1222
1223        node = nilfs_btree_get_nonroot_node(path, level);
1224        right = nilfs_btree_get_sib_node(path, level);
1225        nchildren = nilfs_btree_node_get_nchildren(node);
1226        rnchildren = nilfs_btree_node_get_nchildren(right);
1227
1228        n = (nchildren + rnchildren) / 2 - nchildren;
1229
1230        nilfs_btree_node_move_left(btree, node, right, n);
1231
1232        if (!buffer_dirty(path[level].bp_bh))
1233                nilfs_btnode_mark_dirty(path[level].bp_bh);
1234        if (!buffer_dirty(path[level].bp_sib_bh))
1235                nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1236
1237        unlock_buffer(path[level].bp_bh);
1238        unlock_buffer(path[level].bp_sib_bh);
1239
1240        path[level + 1].bp_index++;
1241        nilfs_btree_promote_key(btree, path, level + 1,
1242                                nilfs_btree_node_get_key(right, 0));
1243        path[level + 1].bp_index--;
1244
1245        brelse(path[level].bp_sib_bh);
1246        path[level].bp_sib_bh = NULL;
1247}
1248
1249static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1250                                    struct nilfs_btree_path *path,
1251                                    int level, __u64 *keyp, __u64 *ptrp)
1252{
1253        struct nilfs_btree_node *node, *left;
1254        int n;
1255
1256        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1257
1258        lock_buffer(path[level].bp_bh);
1259        lock_buffer(path[level].bp_sib_bh);
1260
1261        node = nilfs_btree_get_nonroot_node(path, level);
1262        left = nilfs_btree_get_sib_node(path, level);
1263
1264        n = nilfs_btree_node_get_nchildren(node);
1265
1266        nilfs_btree_node_move_left(btree, left, node, n);
1267
1268        if (!buffer_dirty(path[level].bp_sib_bh))
1269                nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1270
1271        unlock_buffer(path[level].bp_bh);
1272        unlock_buffer(path[level].bp_sib_bh);
1273
1274        nilfs_btnode_delete(path[level].bp_bh);
1275        path[level].bp_bh = path[level].bp_sib_bh;
1276        path[level].bp_sib_bh = NULL;
1277        path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1278}
1279
1280static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1281                                     struct nilfs_btree_path *path,
1282                                     int level, __u64 *keyp, __u64 *ptrp)
1283{
1284        struct nilfs_btree_node *node, *right;
1285        int n;
1286
1287        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1288
1289        lock_buffer(path[level].bp_bh);
1290        lock_buffer(path[level].bp_sib_bh);
1291
1292        node = nilfs_btree_get_nonroot_node(path, level);
1293        right = nilfs_btree_get_sib_node(path, level);
1294
1295        n = nilfs_btree_node_get_nchildren(right);
1296
1297        nilfs_btree_node_move_left(btree, node, right, n);
1298
1299        if (!buffer_dirty(path[level].bp_bh))
1300                nilfs_btnode_mark_dirty(path[level].bp_bh);
1301
1302        unlock_buffer(path[level].bp_bh);
1303        unlock_buffer(path[level].bp_sib_bh);
1304
1305        nilfs_btnode_delete(path[level].bp_sib_bh);
1306        path[level].bp_sib_bh = NULL;
1307        path[level + 1].bp_index++;
1308}
1309
1310static void nilfs_btree_shrink(struct nilfs_btree *btree,
1311                               struct nilfs_btree_path *path,
1312                               int level, __u64 *keyp, __u64 *ptrp)
1313{
1314        struct nilfs_btree_node *root, *child;
1315        int n;
1316
1317        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1318
1319        lock_buffer(path[level].bp_bh);
1320        root = nilfs_btree_get_root(btree);
1321        child = nilfs_btree_get_nonroot_node(path, level);
1322
1323        nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1324        nilfs_btree_node_set_level(root, level);
1325        n = nilfs_btree_node_get_nchildren(child);
1326        nilfs_btree_node_move_left(btree, root, child, n);
1327        unlock_buffer(path[level].bp_bh);
1328
1329        nilfs_btnode_delete(path[level].bp_bh);
1330        path[level].bp_bh = NULL;
1331}
1332
1333
1334static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1335                                      struct nilfs_btree_path *path,
1336                                      int *levelp,
1337                                      struct nilfs_bmap_stats *stats,
1338                                      struct inode *dat)
1339{
1340        struct buffer_head *bh;
1341        struct nilfs_btree_node *node, *parent, *sib;
1342        __u64 sibptr;
1343        int pindex, level, ret;
1344
1345        ret = 0;
1346        stats->bs_nblocks = 0;
1347        for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1348             level < nilfs_btree_height(btree) - 1;
1349             level++) {
1350                node = nilfs_btree_get_nonroot_node(path, level);
1351                path[level].bp_oldreq.bpr_ptr =
1352                        nilfs_btree_node_get_ptr(btree, node,
1353                                                 path[level].bp_index);
1354                ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1355                                                 &path[level].bp_oldreq, dat);
1356                if (ret < 0)
1357                        goto err_out_child_node;
1358
1359                if (nilfs_btree_node_get_nchildren(node) >
1360                    nilfs_btree_node_nchildren_min(node, btree)) {
1361                        path[level].bp_op = nilfs_btree_do_delete;
1362                        stats->bs_nblocks++;
1363                        goto out;
1364                }
1365
1366                parent = nilfs_btree_get_node(btree, path, level + 1);
1367                pindex = path[level + 1].bp_index;
1368
1369                if (pindex > 0) {
1370                        /* left sibling */
1371                        sibptr = nilfs_btree_node_get_ptr(btree, parent,
1372                                                          pindex - 1);
1373                        ret = nilfs_btree_get_block(btree, sibptr, &bh);
1374                        if (ret < 0)
1375                                goto err_out_curr_node;
1376                        sib = (struct nilfs_btree_node *)bh->b_data;
1377                        if (nilfs_btree_node_get_nchildren(sib) >
1378                            nilfs_btree_node_nchildren_min(sib, btree)) {
1379                                path[level].bp_sib_bh = bh;
1380                                path[level].bp_op = nilfs_btree_borrow_left;
1381                                stats->bs_nblocks++;
1382                                goto out;
1383                        } else {
1384                                path[level].bp_sib_bh = bh;
1385                                path[level].bp_op = nilfs_btree_concat_left;
1386                                stats->bs_nblocks++;
1387                                /* continue; */
1388                        }
1389                } else if (pindex <
1390                           nilfs_btree_node_get_nchildren(parent) - 1) {
1391                        /* right sibling */
1392                        sibptr = nilfs_btree_node_get_ptr(btree, parent,
1393                                                          pindex + 1);
1394                        ret = nilfs_btree_get_block(btree, sibptr, &bh);
1395                        if (ret < 0)
1396                                goto err_out_curr_node;
1397                        sib = (struct nilfs_btree_node *)bh->b_data;
1398                        if (nilfs_btree_node_get_nchildren(sib) >
1399                            nilfs_btree_node_nchildren_min(sib, btree)) {
1400                                path[level].bp_sib_bh = bh;
1401                                path[level].bp_op = nilfs_btree_borrow_right;
1402                                stats->bs_nblocks++;
1403                                goto out;
1404                        } else {
1405                                path[level].bp_sib_bh = bh;
1406                                path[level].bp_op = nilfs_btree_concat_right;
1407                                stats->bs_nblocks++;
1408                                /* continue; */
1409                        }
1410                } else {
1411                        /* no siblings */
1412                        /* the only child of the root node */
1413                        WARN_ON(level != nilfs_btree_height(btree) - 2);
1414                        if (nilfs_btree_node_get_nchildren(node) - 1 <=
1415                            NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1416                                path[level].bp_op = nilfs_btree_shrink;
1417                                stats->bs_nblocks += 2;
1418                        } else {
1419                                path[level].bp_op = nilfs_btree_do_delete;
1420                                stats->bs_nblocks++;
1421                        }
1422
1423                        goto out;
1424
1425                }
1426        }
1427
1428        node = nilfs_btree_get_root(btree);
1429        path[level].bp_oldreq.bpr_ptr =
1430                nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1431
1432        ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1433                                         &path[level].bp_oldreq, dat);
1434        if (ret < 0)
1435                goto err_out_child_node;
1436
1437        /* child of the root node is deleted */
1438        path[level].bp_op = nilfs_btree_do_delete;
1439        stats->bs_nblocks++;
1440
1441        /* success */
1442 out:
1443        *levelp = level;
1444        return ret;
1445
1446        /* error */
1447 err_out_curr_node:
1448        nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq, dat);
1449 err_out_child_node:
1450        for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1451                brelse(path[level].bp_sib_bh);
1452                nilfs_bmap_abort_end_ptr(&btree->bt_bmap,
1453                                         &path[level].bp_oldreq, dat);
1454        }
1455        *levelp = level;
1456        stats->bs_nblocks = 0;
1457        return ret;
1458}
1459
1460static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1461                                      struct nilfs_btree_path *path,
1462                                      int maxlevel, struct inode *dat)
1463{
1464        int level;
1465
1466        for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1467                nilfs_bmap_commit_end_ptr(&btree->bt_bmap,
1468                                          &path[level].bp_oldreq, dat);
1469                path[level].bp_op(btree, path, level, NULL, NULL);
1470        }
1471
1472        if (!nilfs_bmap_dirty(&btree->bt_bmap))
1473                nilfs_bmap_set_dirty(&btree->bt_bmap);
1474}
1475
1476static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1477
1478{
1479        struct nilfs_btree *btree;
1480        struct nilfs_btree_path *path;
1481        struct nilfs_bmap_stats stats;
1482        struct inode *dat;
1483        int level, ret;
1484
1485        btree = (struct nilfs_btree *)bmap;
1486        path = nilfs_btree_alloc_path();
1487        if (path == NULL)
1488                return -ENOMEM;
1489        nilfs_btree_init_path(path);
1490        ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1491                                    NILFS_BTREE_LEVEL_NODE_MIN);
1492        if (ret < 0)
1493                goto out;
1494
1495
1496        dat = NILFS_BMAP_USE_VBN(&btree->bt_bmap) ?
1497                nilfs_bmap_get_dat(&btree->bt_bmap) : NULL;
1498
1499        ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1500        if (ret < 0)
1501                goto out;
1502        nilfs_btree_commit_delete(btree, path, level, dat);
1503        nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1504
1505out:
1506        nilfs_btree_release_path(path);
1507        nilfs_btree_free_path(path);
1508        return ret;
1509}
1510
1511static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1512{
1513        struct nilfs_btree *btree;
1514        struct nilfs_btree_path *path;
1515        int ret;
1516
1517        btree = (struct nilfs_btree *)bmap;
1518        path = nilfs_btree_alloc_path();
1519        if (path == NULL)
1520                return -ENOMEM;
1521        nilfs_btree_init_path(path);
1522
1523        ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1524
1525        nilfs_btree_release_path(path);
1526        nilfs_btree_free_path(path);
1527
1528        return ret;
1529}
1530
1531static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1532{
1533        struct buffer_head *bh;
1534        struct nilfs_btree *btree;
1535        struct nilfs_btree_node *root, *node;
1536        __u64 maxkey, nextmaxkey;
1537        __u64 ptr;
1538        int nchildren, ret;
1539
1540        btree = (struct nilfs_btree *)bmap;
1541        root = nilfs_btree_get_root(btree);
1542        switch (nilfs_btree_height(btree)) {
1543        case 2:
1544                bh = NULL;
1545                node = root;
1546                break;
1547        case 3:
1548                nchildren = nilfs_btree_node_get_nchildren(root);
1549                if (nchildren > 1)
1550                        return 0;
1551                ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1552                ret = nilfs_btree_get_block(btree, ptr, &bh);
1553                if (ret < 0)
1554                        return ret;
1555                node = (struct nilfs_btree_node *)bh->b_data;
1556                break;
1557        default:
1558                return 0;
1559        }
1560
1561        nchildren = nilfs_btree_node_get_nchildren(node);
1562        maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1563        nextmaxkey = (nchildren > 1) ?
1564                nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1565        if (bh != NULL)
1566                brelse(bh);
1567
1568        return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1569}
1570
1571static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1572                                   __u64 *keys, __u64 *ptrs, int nitems)
1573{
1574        struct buffer_head *bh;
1575        struct nilfs_btree *btree;
1576        struct nilfs_btree_node *node, *root;
1577        __le64 *dkeys;
1578        __le64 *dptrs;
1579        __u64 ptr;
1580        int nchildren, i, ret;
1581
1582        btree = (struct nilfs_btree *)bmap;
1583        root = nilfs_btree_get_root(btree);
1584        switch (nilfs_btree_height(btree)) {
1585        case 2:
1586                bh = NULL;
1587                node = root;
1588                break;
1589        case 3:
1590                nchildren = nilfs_btree_node_get_nchildren(root);
1591                WARN_ON(nchildren > 1);
1592                ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1593                ret = nilfs_btree_get_block(btree, ptr, &bh);
1594                if (ret < 0)
1595                        return ret;
1596                node = (struct nilfs_btree_node *)bh->b_data;
1597                break;
1598        default:
1599                node = NULL;
1600                return -EINVAL;
1601        }
1602
1603        nchildren = nilfs_btree_node_get_nchildren(node);
1604        if (nchildren < nitems)
1605                nitems = nchildren;
1606        dkeys = nilfs_btree_node_dkeys(node);
1607        dptrs = nilfs_btree_node_dptrs(node, btree);
1608        for (i = 0; i < nitems; i++) {
1609                keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1610                ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
1611        }
1612
1613        if (bh != NULL)
1614                brelse(bh);
1615
1616        return nitems;
1617}
1618
1619static int
1620nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1621                                       union nilfs_bmap_ptr_req *dreq,
1622                                       union nilfs_bmap_ptr_req *nreq,
1623                                       struct buffer_head **bhp,
1624                                       struct nilfs_bmap_stats *stats)
1625{
1626        struct buffer_head *bh;
1627        struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1628        struct inode *dat = NULL;
1629        int ret;
1630
1631        stats->bs_nblocks = 0;
1632
1633        /* for data */
1634        /* cannot find near ptr */
1635        if (NILFS_BMAP_USE_VBN(bmap)) {
1636                dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1637                dat = nilfs_bmap_get_dat(bmap);
1638        }
1639
1640        ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq, dat);
1641        if (ret < 0)
1642                return ret;
1643
1644        *bhp = NULL;
1645        stats->bs_nblocks++;
1646        if (nreq != NULL) {
1647                nreq->bpr_ptr = dreq->bpr_ptr + 1;
1648                ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq, dat);
1649                if (ret < 0)
1650                        goto err_out_dreq;
1651
1652                ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1653                if (ret < 0)
1654                        goto err_out_nreq;
1655
1656                *bhp = bh;
1657                stats->bs_nblocks++;
1658        }
1659
1660        /* success */
1661        return 0;
1662
1663        /* error */
1664 err_out_nreq:
1665        nilfs_bmap_abort_alloc_ptr(bmap, nreq, dat);
1666 err_out_dreq:
1667        nilfs_bmap_abort_alloc_ptr(bmap, dreq, dat);
1668        stats->bs_nblocks = 0;
1669        return ret;
1670
1671}
1672
1673static void
1674nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1675                                      __u64 key, __u64 ptr,
1676                                      const __u64 *keys, const __u64 *ptrs,
1677                                      int n,
1678                                      union nilfs_bmap_ptr_req *dreq,
1679                                      union nilfs_bmap_ptr_req *nreq,
1680                                      struct buffer_head *bh)
1681{
1682        struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1683        struct nilfs_btree_node *node;
1684        struct inode *dat;
1685        __u64 tmpptr;
1686
1687        /* free resources */
1688        if (bmap->b_ops->bop_clear != NULL)
1689                bmap->b_ops->bop_clear(bmap);
1690
1691        /* ptr must be a pointer to a buffer head. */
1692        set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1693
1694        /* convert and insert */
1695        dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL;
1696        nilfs_btree_init(bmap);
1697        if (nreq != NULL) {
1698                nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1699                nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat);
1700
1701                /* create child node at level 1 */
1702                lock_buffer(bh);
1703                node = (struct nilfs_btree_node *)bh->b_data;
1704                nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1705                nilfs_btree_node_insert(btree, node,
1706                                        key, dreq->bpr_ptr, n);
1707                if (!buffer_dirty(bh))
1708                        nilfs_btnode_mark_dirty(bh);
1709                if (!nilfs_bmap_dirty(bmap))
1710                        nilfs_bmap_set_dirty(bmap);
1711
1712                unlock_buffer(bh);
1713                brelse(bh);
1714
1715                /* create root node at level 2 */
1716                node = nilfs_btree_get_root(btree);
1717                tmpptr = nreq->bpr_ptr;
1718                nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1719                                      2, 1, &keys[0], &tmpptr);
1720        } else {
1721                nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1722
1723                /* create root node at level 1 */
1724                node = nilfs_btree_get_root(btree);
1725                nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1726                                      1, n, keys, ptrs);
1727                nilfs_btree_node_insert(btree, node,
1728                                        key, dreq->bpr_ptr, n);
1729                if (!nilfs_bmap_dirty(bmap))
1730                        nilfs_bmap_set_dirty(bmap);
1731        }
1732
1733        if (NILFS_BMAP_USE_VBN(bmap))
1734                nilfs_btree_set_target_v(btree, key, dreq->bpr_ptr);
1735}
1736
1737/**
1738 * nilfs_btree_convert_and_insert -
1739 * @bmap:
1740 * @key:
1741 * @ptr:
1742 * @keys:
1743 * @ptrs:
1744 * @n:
1745 */
1746int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1747                                   __u64 key, __u64 ptr,
1748                                   const __u64 *keys, const __u64 *ptrs, int n)
1749{
1750        struct buffer_head *bh;
1751        union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1752        struct nilfs_bmap_stats stats;
1753        int ret;
1754
1755        if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1756                di = &dreq;
1757                ni = NULL;
1758        } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1759                           1 << bmap->b_inode->i_blkbits)) {
1760                di = &dreq;
1761                ni = &nreq;
1762        } else {
1763                di = NULL;
1764                ni = NULL;
1765                BUG();
1766        }
1767
1768        ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
1769                                                     &stats);
1770        if (ret < 0)
1771                return ret;
1772        nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1773                                              di, ni, bh);
1774        nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1775        return 0;
1776}
1777
1778static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1779                                   struct nilfs_btree_path *path,
1780                                   int level,
1781                                   struct buffer_head *bh)
1782{
1783        while ((++level < nilfs_btree_height(btree) - 1) &&
1784               !buffer_dirty(path[level].bp_bh))
1785                nilfs_btnode_mark_dirty(path[level].bp_bh);
1786
1787        return 0;
1788}
1789
1790static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1791                                        struct nilfs_btree_path *path,
1792                                        int level, struct inode *dat)
1793{
1794        struct nilfs_btree_node *parent;
1795        int ret;
1796
1797        parent = nilfs_btree_get_node(btree, path, level + 1);
1798        path[level].bp_oldreq.bpr_ptr =
1799                nilfs_btree_node_get_ptr(btree, parent,
1800                                         path[level + 1].bp_index);
1801        path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1802        ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1803                                       &path[level].bp_newreq.bpr_req);
1804        if (ret < 0)
1805                return ret;
1806
1807        if (buffer_nilfs_node(path[level].bp_bh)) {
1808                path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1809                path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1810                path[level].bp_ctxt.bh = path[level].bp_bh;
1811                ret = nilfs_btnode_prepare_change_key(
1812                        &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1813                        &path[level].bp_ctxt);
1814                if (ret < 0) {
1815                        nilfs_dat_abort_update(dat,
1816                                               &path[level].bp_oldreq.bpr_req,
1817                                               &path[level].bp_newreq.bpr_req);
1818                        return ret;
1819                }
1820        }
1821
1822        return 0;
1823}
1824
1825static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1826                                        struct nilfs_btree_path *path,
1827                                        int level, struct inode *dat)
1828{
1829        struct nilfs_btree_node *parent;
1830
1831        nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1832                                &path[level].bp_newreq.bpr_req,
1833                                btree->bt_bmap.b_ptr_type == NILFS_BMAP_PTR_VS);
1834
1835        if (buffer_nilfs_node(path[level].bp_bh)) {
1836                nilfs_btnode_commit_change_key(
1837                        &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1838                        &path[level].bp_ctxt);
1839                path[level].bp_bh = path[level].bp_ctxt.bh;
1840        }
1841        set_buffer_nilfs_volatile(path[level].bp_bh);
1842
1843        parent = nilfs_btree_get_node(btree, path, level + 1);
1844        nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1845                                 path[level].bp_newreq.bpr_ptr);
1846}
1847
1848static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1849                                       struct nilfs_btree_path *path,
1850                                       int level, struct inode *dat)
1851{
1852        nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1853                               &path[level].bp_newreq.bpr_req);
1854        if (buffer_nilfs_node(path[level].bp_bh))
1855                nilfs_btnode_abort_change_key(
1856                        &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1857                        &path[level].bp_ctxt);
1858}
1859
1860static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1861                                           struct nilfs_btree_path *path,
1862                                           int minlevel, int *maxlevelp,
1863                                           struct inode *dat)
1864{
1865        int level, ret;
1866
1867        level = minlevel;
1868        if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1869                ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1870                if (ret < 0)
1871                        return ret;
1872        }
1873        while ((++level < nilfs_btree_height(btree) - 1) &&
1874               !buffer_dirty(path[level].bp_bh)) {
1875
1876                WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1877                ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1878                if (ret < 0)
1879                        goto out;
1880        }
1881
1882        /* success */
1883        *maxlevelp = level - 1;
1884        return 0;
1885
1886        /* error */
1887 out:
1888        while (--level > minlevel)
1889                nilfs_btree_abort_update_v(btree, path, level, dat);
1890        if (!buffer_nilfs_volatile(path[level].bp_bh))
1891                nilfs_btree_abort_update_v(btree, path, level, dat);
1892        return ret;
1893}
1894
1895static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1896                                           struct nilfs_btree_path *path,
1897                                           int minlevel, int maxlevel,
1898                                           struct buffer_head *bh,
1899                                           struct inode *dat)
1900{
1901        int level;
1902
1903        if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1904                nilfs_btree_commit_update_v(btree, path, minlevel, dat);
1905
1906        for (level = minlevel + 1; level <= maxlevel; level++)
1907                nilfs_btree_commit_update_v(btree, path, level, dat);
1908}
1909
1910static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1911                                   struct nilfs_btree_path *path,
1912                                   int level, struct buffer_head *bh)
1913{
1914        int maxlevel, ret;
1915        struct nilfs_btree_node *parent;
1916        struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1917        __u64 ptr;
1918
1919        get_bh(bh);
1920        path[level].bp_bh = bh;
1921        ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
1922                                              dat);
1923        if (ret < 0)
1924                goto out;
1925
1926        if (buffer_nilfs_volatile(path[level].bp_bh)) {
1927                parent = nilfs_btree_get_node(btree, path, level + 1);
1928                ptr = nilfs_btree_node_get_ptr(btree, parent,
1929                                               path[level + 1].bp_index);
1930                ret = nilfs_dat_mark_dirty(dat, ptr);
1931                if (ret < 0)
1932                        goto out;
1933        }
1934
1935        nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
1936
1937 out:
1938        brelse(path[level].bp_bh);
1939        path[level].bp_bh = NULL;
1940        return ret;
1941}
1942
1943static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1944                                 struct buffer_head *bh)
1945{
1946        struct nilfs_btree *btree;
1947        struct nilfs_btree_path *path;
1948        struct nilfs_btree_node *node;
1949        __u64 key;
1950        int level, ret;
1951
1952        WARN_ON(!buffer_dirty(bh));
1953
1954        btree = (struct nilfs_btree *)bmap;
1955        path = nilfs_btree_alloc_path();
1956        if (path == NULL)
1957                return -ENOMEM;
1958        nilfs_btree_init_path(path);
1959
1960        if (buffer_nilfs_node(bh)) {
1961                node = (struct nilfs_btree_node *)bh->b_data;
1962                key = nilfs_btree_node_get_key(node, 0);
1963                level = nilfs_btree_node_get_level(node);
1964        } else {
1965                key = nilfs_bmap_data_get_key(bmap, bh);
1966                level = NILFS_BTREE_LEVEL_DATA;
1967        }
1968
1969        ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
1970        if (ret < 0) {
1971                if (unlikely(ret == -ENOENT))
1972                        printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1973                               __func__, (unsigned long long)key, level);
1974                goto out;
1975        }
1976
1977        ret = NILFS_BMAP_USE_VBN(bmap) ?
1978                nilfs_btree_propagate_v(btree, path, level, bh) :
1979                nilfs_btree_propagate_p(btree, path, level, bh);
1980
1981 out:
1982        nilfs_btree_release_path(path);
1983        nilfs_btree_free_path(path);
1984
1985        return ret;
1986}
1987
1988static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
1989                                    struct buffer_head *bh)
1990{
1991        return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), bh->b_blocknr);
1992}
1993
1994static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
1995                                         struct list_head *lists,
1996                                         struct buffer_head *bh)
1997{
1998        struct list_head *head;
1999        struct buffer_head *cbh;
2000        struct nilfs_btree_node *node, *cnode;
2001        __u64 key, ckey;
2002        int level;
2003
2004        get_bh(bh);
2005        node = (struct nilfs_btree_node *)bh->b_data;
2006        key = nilfs_btree_node_get_key(node, 0);
2007        level = nilfs_btree_node_get_level(node);
2008        list_for_each(head, &lists[level]) {
2009                cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2010                cnode = (struct nilfs_btree_node *)cbh->b_data;
2011                ckey = nilfs_btree_node_get_key(cnode, 0);
2012                if (key < ckey)
2013                        break;
2014        }
2015        list_add_tail(&bh->b_assoc_buffers, head);
2016}
2017
2018static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
2019                                             struct list_head *listp)
2020{
2021        struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
2022        struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
2023        struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2024        struct pagevec pvec;
2025        struct buffer_head *bh, *head;
2026        pgoff_t index = 0;
2027        int level, i;
2028
2029        for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2030             level < NILFS_BTREE_LEVEL_MAX;
2031             level++)
2032                INIT_LIST_HEAD(&lists[level]);
2033
2034        pagevec_init(&pvec, 0);
2035
2036        while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2037                                  PAGEVEC_SIZE)) {
2038                for (i = 0; i < pagevec_count(&pvec); i++) {
2039                        bh = head = page_buffers(pvec.pages[i]);
2040                        do {
2041                                if (buffer_dirty(bh))
2042                                        nilfs_btree_add_dirty_buffer(btree,
2043                                                                     lists, bh);
2044                        } while ((bh = bh->b_this_page) != head);
2045                }
2046                pagevec_release(&pvec);
2047                cond_resched();
2048        }
2049
2050        for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2051             level < NILFS_BTREE_LEVEL_MAX;
2052             level++)
2053                list_splice(&lists[level], listp->prev);
2054}
2055
2056static int nilfs_btree_assign_p(struct nilfs_btree *btree,
2057                                struct nilfs_btree_path *path,
2058                                int level,
2059                                struct buffer_head **bh,
2060                                sector_t blocknr,
2061                                union nilfs_binfo *binfo)
2062{
2063        struct nilfs_btree_node *parent;
2064        __u64 key;
2065        __u64 ptr;
2066        int ret;
2067
2068        parent = nilfs_btree_get_node(btree, path, level + 1);
2069        ptr = nilfs_btree_node_get_ptr(btree, parent,
2070                                       path[level + 1].bp_index);
2071        if (buffer_nilfs_node(*bh)) {
2072                path[level].bp_ctxt.oldkey = ptr;
2073                path[level].bp_ctxt.newkey = blocknr;
2074                path[level].bp_ctxt.bh = *bh;
2075                ret = nilfs_btnode_prepare_change_key(
2076                        &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2077                        &path[level].bp_ctxt);
2078                if (ret < 0)
2079                        return ret;
2080                nilfs_btnode_commit_change_key(
2081                        &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2082                        &path[level].bp_ctxt);
2083                *bh = path[level].bp_ctxt.bh;
2084        }
2085
2086        nilfs_btree_node_set_ptr(btree, parent,
2087                                 path[level + 1].bp_index, blocknr);
2088
2089        key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2090        /* on-disk format */
2091        binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2092        binfo->bi_dat.bi_level = level;
2093
2094        return 0;
2095}
2096
2097static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2098                                struct nilfs_btree_path *path,
2099                                int level,
2100                                struct buffer_head **bh,
2101                                sector_t blocknr,
2102                                union nilfs_binfo *binfo)
2103{
2104        struct nilfs_btree_node *parent;
2105        struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
2106        __u64 key;
2107        __u64 ptr;
2108        union nilfs_bmap_ptr_req req;
2109        int ret;
2110
2111        parent = nilfs_btree_get_node(btree, path, level + 1);
2112        ptr = nilfs_btree_node_get_ptr(btree, parent,
2113                                       path[level + 1].bp_index);
2114        req.bpr_ptr = ptr;
2115        ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2116        if (ret < 0)
2117                return ret;
2118        nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2119
2120        key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2121        /* on-disk format */
2122        binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2123        binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2124
2125        return 0;
2126}
2127
2128static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2129                              struct buffer_head **bh,
2130                              sector_t blocknr,
2131                              union nilfs_binfo *binfo)
2132{
2133        struct nilfs_btree *btree;
2134        struct nilfs_btree_path *path;
2135        struct nilfs_btree_node *node;
2136        __u64 key;
2137        int level, ret;
2138
2139        btree = (struct nilfs_btree *)bmap;
2140        path = nilfs_btree_alloc_path();
2141        if (path == NULL)
2142                return -ENOMEM;
2143        nilfs_btree_init_path(path);
2144
2145        if (buffer_nilfs_node(*bh)) {
2146                node = (struct nilfs_btree_node *)(*bh)->b_data;
2147                key = nilfs_btree_node_get_key(node, 0);
2148                level = nilfs_btree_node_get_level(node);
2149        } else {
2150                key = nilfs_bmap_data_get_key(bmap, *bh);
2151                level = NILFS_BTREE_LEVEL_DATA;
2152        }
2153
2154        ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2155        if (ret < 0) {
2156                WARN_ON(ret == -ENOENT);
2157                goto out;
2158        }
2159
2160        ret = NILFS_BMAP_USE_VBN(bmap) ?
2161                nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2162                nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2163
2164 out:
2165        nilfs_btree_release_path(path);
2166        nilfs_btree_free_path(path);
2167
2168        return ret;
2169}
2170
2171static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2172                                 struct buffer_head **bh,
2173                                 sector_t blocknr,
2174                                 union nilfs_binfo *binfo)
2175{
2176        struct nilfs_btree_node *node;
2177        __u64 key;
2178        int ret;
2179
2180        ret = nilfs_dat_move(nilfs_bmap_get_dat(bmap), (*bh)->b_blocknr,
2181                             blocknr);
2182        if (ret < 0)
2183                return ret;
2184
2185        if (buffer_nilfs_node(*bh)) {
2186                node = (struct nilfs_btree_node *)(*bh)->b_data;
2187                key = nilfs_btree_node_get_key(node, 0);
2188        } else
2189                key = nilfs_bmap_data_get_key(bmap, *bh);
2190
2191        /* on-disk format */
2192        binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2193        binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2194
2195        return 0;
2196}
2197
2198static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2199{
2200        struct buffer_head *bh;
2201        struct nilfs_btree *btree;
2202        struct nilfs_btree_path *path;
2203        __u64 ptr;
2204        int ret;
2205
2206        btree = (struct nilfs_btree *)bmap;
2207        path = nilfs_btree_alloc_path();
2208        if (path == NULL)
2209                return -ENOMEM;
2210        nilfs_btree_init_path(path);
2211
2212        ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2213        if (ret < 0) {
2214                WARN_ON(ret == -ENOENT);
2215                goto out;
2216        }
2217        ret = nilfs_btree_get_block(btree, ptr, &bh);
2218        if (ret < 0) {
2219                WARN_ON(ret == -ENOENT);
2220                goto out;
2221        }
2222
2223        if (!buffer_dirty(bh))
2224                nilfs_btnode_mark_dirty(bh);
2225        brelse(bh);
2226        if (!nilfs_bmap_dirty(&btree->bt_bmap))
2227                nilfs_bmap_set_dirty(&btree->bt_bmap);
2228
2229 out:
2230        nilfs_btree_release_path(path);
2231        nilfs_btree_free_path(path);
2232        return ret;
2233}
2234
2235static const struct nilfs_bmap_operations nilfs_btree_ops = {
2236        .bop_lookup             =       nilfs_btree_lookup,
2237        .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2238        .bop_insert             =       nilfs_btree_insert,
2239        .bop_delete             =       nilfs_btree_delete,
2240        .bop_clear              =       NULL,
2241
2242        .bop_propagate          =       nilfs_btree_propagate,
2243
2244        .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2245
2246        .bop_assign             =       nilfs_btree_assign,
2247        .bop_mark               =       nilfs_btree_mark,
2248
2249        .bop_last_key           =       nilfs_btree_last_key,
2250        .bop_check_insert       =       NULL,
2251        .bop_check_delete       =       nilfs_btree_check_delete,
2252        .bop_gather_data        =       nilfs_btree_gather_data,
2253};
2254
2255static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2256        .bop_lookup             =       NULL,
2257        .bop_lookup_contig      =       NULL,
2258        .bop_insert             =       NULL,
2259        .bop_delete             =       NULL,
2260        .bop_clear              =       NULL,
2261
2262        .bop_propagate          =       nilfs_btree_propagate_gc,
2263
2264        .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2265
2266        .bop_assign             =       nilfs_btree_assign_gc,
2267        .bop_mark               =       NULL,
2268
2269        .bop_last_key           =       NULL,
2270        .bop_check_insert       =       NULL,
2271        .bop_check_delete       =       NULL,
2272        .bop_gather_data        =       NULL,
2273};
2274
2275int nilfs_btree_init(struct nilfs_bmap *bmap)
2276{
2277        bmap->b_ops = &nilfs_btree_ops;
2278        return 0;
2279}
2280
2281void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2282{
2283        bmap->b_ops = &nilfs_btree_ops_gc;
2284}
2285