linux/fs/nilfs2/btree.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * btree.c - NILFS B-tree.
   4 *
   5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
   6 *
   7 * Written by Koji Sato.
   8 */
   9
  10#include <linux/slab.h>
  11#include <linux/string.h>
  12#include <linux/errno.h>
  13#include <linux/pagevec.h>
  14#include "nilfs.h"
  15#include "page.h"
  16#include "btnode.h"
  17#include "btree.h"
  18#include "alloc.h"
  19#include "dat.h"
  20
  21static void __nilfs_btree_init(struct nilfs_bmap *bmap);
  22
  23static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
  24{
  25        struct nilfs_btree_path *path;
  26        int level = NILFS_BTREE_LEVEL_DATA;
  27
  28        path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
  29        if (path == NULL)
  30                goto out;
  31
  32        for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
  33                path[level].bp_bh = NULL;
  34                path[level].bp_sib_bh = NULL;
  35                path[level].bp_index = 0;
  36                path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
  37                path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
  38                path[level].bp_op = NULL;
  39        }
  40
  41out:
  42        return path;
  43}
  44
  45static void nilfs_btree_free_path(struct nilfs_btree_path *path)
  46{
  47        int level = NILFS_BTREE_LEVEL_DATA;
  48
  49        for (; level < NILFS_BTREE_LEVEL_MAX; level++)
  50                brelse(path[level].bp_bh);
  51
  52        kmem_cache_free(nilfs_btree_path_cache, path);
  53}
  54
  55/*
  56 * B-tree node operations
  57 */
  58static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
  59                                     __u64 ptr, struct buffer_head **bhp)
  60{
  61        struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
  62        struct buffer_head *bh;
  63
  64        bh = nilfs_btnode_create_block(btnc, ptr);
  65        if (!bh)
  66                return -ENOMEM;
  67
  68        set_buffer_nilfs_volatile(bh);
  69        *bhp = bh;
  70        return 0;
  71}
  72
  73static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
  74{
  75        return node->bn_flags;
  76}
  77
  78static void
  79nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
  80{
  81        node->bn_flags = flags;
  82}
  83
  84static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
  85{
  86        return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
  87}
  88
  89static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
  90{
  91        return node->bn_level;
  92}
  93
  94static void
  95nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
  96{
  97        node->bn_level = level;
  98}
  99
 100static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
 101{
 102        return le16_to_cpu(node->bn_nchildren);
 103}
 104
 105static void
 106nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
 107{
 108        node->bn_nchildren = cpu_to_le16(nchildren);
 109}
 110
 111static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
 112{
 113        return i_blocksize(btree->b_inode);
 114}
 115
 116static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
 117{
 118        return btree->b_nchildren_per_block;
 119}
 120
 121static __le64 *
 122nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
 123{
 124        return (__le64 *)((char *)(node + 1) +
 125                          (nilfs_btree_node_root(node) ?
 126                           0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
 127}
 128
 129static __le64 *
 130nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
 131{
 132        return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
 133}
 134
 135static __u64
 136nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
 137{
 138        return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
 139}
 140
 141static void
 142nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
 143{
 144        *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
 145}
 146
 147static __u64
 148nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
 149                         int ncmax)
 150{
 151        return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
 152}
 153
 154static void
 155nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
 156                         int ncmax)
 157{
 158        *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
 159}
 160
 161static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
 162                                  int level, int nchildren, int ncmax,
 163                                  const __u64 *keys, const __u64 *ptrs)
 164{
 165        __le64 *dkeys;
 166        __le64 *dptrs;
 167        int i;
 168
 169        nilfs_btree_node_set_flags(node, flags);
 170        nilfs_btree_node_set_level(node, level);
 171        nilfs_btree_node_set_nchildren(node, nchildren);
 172
 173        dkeys = nilfs_btree_node_dkeys(node);
 174        dptrs = nilfs_btree_node_dptrs(node, ncmax);
 175        for (i = 0; i < nchildren; i++) {
 176                dkeys[i] = cpu_to_le64(keys[i]);
 177                dptrs[i] = cpu_to_le64(ptrs[i]);
 178        }
 179}
 180
 181/* Assume the buffer heads corresponding to left and right are locked. */
 182static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
 183                                       struct nilfs_btree_node *right,
 184                                       int n, int lncmax, int rncmax)
 185{
 186        __le64 *ldkeys, *rdkeys;
 187        __le64 *ldptrs, *rdptrs;
 188        int lnchildren, rnchildren;
 189
 190        ldkeys = nilfs_btree_node_dkeys(left);
 191        ldptrs = nilfs_btree_node_dptrs(left, lncmax);
 192        lnchildren = nilfs_btree_node_get_nchildren(left);
 193
 194        rdkeys = nilfs_btree_node_dkeys(right);
 195        rdptrs = nilfs_btree_node_dptrs(right, rncmax);
 196        rnchildren = nilfs_btree_node_get_nchildren(right);
 197
 198        memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
 199        memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
 200        memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
 201        memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
 202
 203        lnchildren += n;
 204        rnchildren -= n;
 205        nilfs_btree_node_set_nchildren(left, lnchildren);
 206        nilfs_btree_node_set_nchildren(right, rnchildren);
 207}
 208
 209/* Assume that the buffer heads corresponding to left and right are locked. */
 210static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
 211                                        struct nilfs_btree_node *right,
 212                                        int n, int lncmax, int rncmax)
 213{
 214        __le64 *ldkeys, *rdkeys;
 215        __le64 *ldptrs, *rdptrs;
 216        int lnchildren, rnchildren;
 217
 218        ldkeys = nilfs_btree_node_dkeys(left);
 219        ldptrs = nilfs_btree_node_dptrs(left, lncmax);
 220        lnchildren = nilfs_btree_node_get_nchildren(left);
 221
 222        rdkeys = nilfs_btree_node_dkeys(right);
 223        rdptrs = nilfs_btree_node_dptrs(right, rncmax);
 224        rnchildren = nilfs_btree_node_get_nchildren(right);
 225
 226        memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
 227        memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
 228        memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
 229        memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
 230
 231        lnchildren -= n;
 232        rnchildren += n;
 233        nilfs_btree_node_set_nchildren(left, lnchildren);
 234        nilfs_btree_node_set_nchildren(right, rnchildren);
 235}
 236
 237/* Assume that the buffer head corresponding to node is locked. */
 238static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
 239                                    __u64 key, __u64 ptr, int ncmax)
 240{
 241        __le64 *dkeys;
 242        __le64 *dptrs;
 243        int nchildren;
 244
 245        dkeys = nilfs_btree_node_dkeys(node);
 246        dptrs = nilfs_btree_node_dptrs(node, ncmax);
 247        nchildren = nilfs_btree_node_get_nchildren(node);
 248        if (index < nchildren) {
 249                memmove(dkeys + index + 1, dkeys + index,
 250                        (nchildren - index) * sizeof(*dkeys));
 251                memmove(dptrs + index + 1, dptrs + index,
 252                        (nchildren - index) * sizeof(*dptrs));
 253        }
 254        dkeys[index] = cpu_to_le64(key);
 255        dptrs[index] = cpu_to_le64(ptr);
 256        nchildren++;
 257        nilfs_btree_node_set_nchildren(node, nchildren);
 258}
 259
 260/* Assume that the buffer head corresponding to node is locked. */
 261static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
 262                                    __u64 *keyp, __u64 *ptrp, int ncmax)
 263{
 264        __u64 key;
 265        __u64 ptr;
 266        __le64 *dkeys;
 267        __le64 *dptrs;
 268        int nchildren;
 269
 270        dkeys = nilfs_btree_node_dkeys(node);
 271        dptrs = nilfs_btree_node_dptrs(node, ncmax);
 272        key = le64_to_cpu(dkeys[index]);
 273        ptr = le64_to_cpu(dptrs[index]);
 274        nchildren = nilfs_btree_node_get_nchildren(node);
 275        if (keyp != NULL)
 276                *keyp = key;
 277        if (ptrp != NULL)
 278                *ptrp = ptr;
 279
 280        if (index < nchildren - 1) {
 281                memmove(dkeys + index, dkeys + index + 1,
 282                        (nchildren - index - 1) * sizeof(*dkeys));
 283                memmove(dptrs + index, dptrs + index + 1,
 284                        (nchildren - index - 1) * sizeof(*dptrs));
 285        }
 286        nchildren--;
 287        nilfs_btree_node_set_nchildren(node, nchildren);
 288}
 289
 290static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
 291                                   __u64 key, int *indexp)
 292{
 293        __u64 nkey;
 294        int index, low, high, s;
 295
 296        /* binary search */
 297        low = 0;
 298        high = nilfs_btree_node_get_nchildren(node) - 1;
 299        index = 0;
 300        s = 0;
 301        while (low <= high) {
 302                index = (low + high) / 2;
 303                nkey = nilfs_btree_node_get_key(node, index);
 304                if (nkey == key) {
 305                        s = 0;
 306                        goto out;
 307                } else if (nkey < key) {
 308                        low = index + 1;
 309                        s = -1;
 310                } else {
 311                        high = index - 1;
 312                        s = 1;
 313                }
 314        }
 315
 316        /* adjust index */
 317        if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
 318                if (s > 0 && index > 0)
 319                        index--;
 320        } else if (s < 0)
 321                index++;
 322
 323 out:
 324        *indexp = index;
 325
 326        return s == 0;
 327}
 328
 329/**
 330 * nilfs_btree_node_broken - verify consistency of btree node
 331 * @node: btree node block to be examined
 332 * @size: node size (in bytes)
 333 * @inode: host inode of btree
 334 * @blocknr: block number
 335 *
 336 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
 337 */
 338static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
 339                                   size_t size, struct inode *inode,
 340                                   sector_t blocknr)
 341{
 342        int level, flags, nchildren;
 343        int ret = 0;
 344
 345        level = nilfs_btree_node_get_level(node);
 346        flags = nilfs_btree_node_get_flags(node);
 347        nchildren = nilfs_btree_node_get_nchildren(node);
 348
 349        if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
 350                     level >= NILFS_BTREE_LEVEL_MAX ||
 351                     (flags & NILFS_BTREE_NODE_ROOT) ||
 352                     nchildren < 0 ||
 353                     nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
 354                nilfs_msg(inode->i_sb, KERN_CRIT,
 355                          "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d",
 356                          inode->i_ino, (unsigned long long)blocknr, level,
 357                          flags, nchildren);
 358                ret = 1;
 359        }
 360        return ret;
 361}
 362
 363/**
 364 * nilfs_btree_root_broken - verify consistency of btree root node
 365 * @node: btree root node to be examined
 366 * @inode: host inode of btree
 367 *
 368 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
 369 */
 370static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
 371                                   struct inode *inode)
 372{
 373        int level, flags, nchildren;
 374        int ret = 0;
 375
 376        level = nilfs_btree_node_get_level(node);
 377        flags = nilfs_btree_node_get_flags(node);
 378        nchildren = nilfs_btree_node_get_nchildren(node);
 379
 380        if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
 381                     level >= NILFS_BTREE_LEVEL_MAX ||
 382                     nchildren < 0 ||
 383                     nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
 384                nilfs_msg(inode->i_sb, KERN_CRIT,
 385                          "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d",
 386                          inode->i_ino, level, flags, nchildren);
 387                ret = 1;
 388        }
 389        return ret;
 390}
 391
 392int nilfs_btree_broken_node_block(struct buffer_head *bh)
 393{
 394        struct inode *inode;
 395        int ret;
 396
 397        if (buffer_nilfs_checked(bh))
 398                return 0;
 399
 400        inode = bh->b_page->mapping->host;
 401        ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
 402                                      bh->b_size, inode, bh->b_blocknr);
 403        if (likely(!ret))
 404                set_buffer_nilfs_checked(bh);
 405        return ret;
 406}
 407
 408static struct nilfs_btree_node *
 409nilfs_btree_get_root(const struct nilfs_bmap *btree)
 410{
 411        return (struct nilfs_btree_node *)btree->b_u.u_data;
 412}
 413
 414static struct nilfs_btree_node *
 415nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
 416{
 417        return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
 418}
 419
 420static struct nilfs_btree_node *
 421nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
 422{
 423        return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
 424}
 425
 426static int nilfs_btree_height(const struct nilfs_bmap *btree)
 427{
 428        return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
 429}
 430
 431static struct nilfs_btree_node *
 432nilfs_btree_get_node(const struct nilfs_bmap *btree,
 433                     const struct nilfs_btree_path *path,
 434                     int level, int *ncmaxp)
 435{
 436        struct nilfs_btree_node *node;
 437
 438        if (level == nilfs_btree_height(btree) - 1) {
 439                node = nilfs_btree_get_root(btree);
 440                *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
 441        } else {
 442                node = nilfs_btree_get_nonroot_node(path, level);
 443                *ncmaxp = nilfs_btree_nchildren_per_block(btree);
 444        }
 445        return node;
 446}
 447
 448static int nilfs_btree_bad_node(const struct nilfs_bmap *btree,
 449                                struct nilfs_btree_node *node, int level)
 450{
 451        if (unlikely(nilfs_btree_node_get_level(node) != level)) {
 452                dump_stack();
 453                nilfs_msg(btree->b_inode->i_sb, KERN_CRIT,
 454                          "btree level mismatch (ino=%lu): %d != %d",
 455                          btree->b_inode->i_ino,
 456                          nilfs_btree_node_get_level(node), level);
 457                return 1;
 458        }
 459        return 0;
 460}
 461
 462struct nilfs_btree_readahead_info {
 463        struct nilfs_btree_node *node;  /* parent node */
 464        int max_ra_blocks;              /* max nof blocks to read ahead */
 465        int index;                      /* current index on the parent node */
 466        int ncmax;                      /* nof children in the parent node */
 467};
 468
 469static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
 470                                   struct buffer_head **bhp,
 471                                   const struct nilfs_btree_readahead_info *ra)
 472{
 473        struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
 474        struct buffer_head *bh, *ra_bh;
 475        sector_t submit_ptr = 0;
 476        int ret;
 477
 478        ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, 0, &bh,
 479                                        &submit_ptr);
 480        if (ret) {
 481                if (ret != -EEXIST)
 482                        return ret;
 483                goto out_check;
 484        }
 485
 486        if (ra) {
 487                int i, n;
 488                __u64 ptr2;
 489
 490                /* read ahead sibling nodes */
 491                for (n = ra->max_ra_blocks, i = ra->index + 1;
 492                     n > 0 && i < ra->ncmax; n--, i++) {
 493                        ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
 494
 495                        ret = nilfs_btnode_submit_block(btnc, ptr2, 0,
 496                                                        REQ_OP_READ, REQ_RAHEAD,
 497                                                        &ra_bh, &submit_ptr);
 498                        if (likely(!ret || ret == -EEXIST))
 499                                brelse(ra_bh);
 500                        else if (ret != -EBUSY)
 501                                break;
 502                        if (!buffer_locked(bh))
 503                                goto out_no_wait;
 504                }
 505        }
 506
 507        wait_on_buffer(bh);
 508
 509 out_no_wait:
 510        if (!buffer_uptodate(bh)) {
 511                nilfs_msg(btree->b_inode->i_sb, KERN_ERR,
 512                          "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)",
 513                          btree->b_inode->i_ino, (unsigned long long)ptr);
 514                brelse(bh);
 515                return -EIO;
 516        }
 517
 518 out_check:
 519        if (nilfs_btree_broken_node_block(bh)) {
 520                clear_buffer_uptodate(bh);
 521                brelse(bh);
 522                return -EINVAL;
 523        }
 524
 525        *bhp = bh;
 526        return 0;
 527}
 528
 529static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
 530                                   struct buffer_head **bhp)
 531{
 532        return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
 533}
 534
 535static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
 536                                 struct nilfs_btree_path *path,
 537                                 __u64 key, __u64 *ptrp, int minlevel,
 538                                 int readahead)
 539{
 540        struct nilfs_btree_node *node;
 541        struct nilfs_btree_readahead_info p, *ra;
 542        __u64 ptr;
 543        int level, index, found, ncmax, ret;
 544
 545        node = nilfs_btree_get_root(btree);
 546        level = nilfs_btree_node_get_level(node);
 547        if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
 548                return -ENOENT;
 549
 550        found = nilfs_btree_node_lookup(node, key, &index);
 551        ptr = nilfs_btree_node_get_ptr(node, index,
 552                                       NILFS_BTREE_ROOT_NCHILDREN_MAX);
 553        path[level].bp_bh = NULL;
 554        path[level].bp_index = index;
 555
 556        ncmax = nilfs_btree_nchildren_per_block(btree);
 557
 558        while (--level >= minlevel) {
 559                ra = NULL;
 560                if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
 561                        p.node = nilfs_btree_get_node(btree, path, level + 1,
 562                                                      &p.ncmax);
 563                        p.index = index;
 564                        p.max_ra_blocks = 7;
 565                        ra = &p;
 566                }
 567                ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
 568                                              ra);
 569                if (ret < 0)
 570                        return ret;
 571
 572                node = nilfs_btree_get_nonroot_node(path, level);
 573                if (nilfs_btree_bad_node(btree, node, level))
 574                        return -EINVAL;
 575                if (!found)
 576                        found = nilfs_btree_node_lookup(node, key, &index);
 577                else
 578                        index = 0;
 579                if (index < ncmax) {
 580                        ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
 581                } else {
 582                        WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
 583                        /* insert */
 584                        ptr = NILFS_BMAP_INVALID_PTR;
 585                }
 586                path[level].bp_index = index;
 587        }
 588        if (!found)
 589                return -ENOENT;
 590
 591        if (ptrp != NULL)
 592                *ptrp = ptr;
 593
 594        return 0;
 595}
 596
 597static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
 598                                      struct nilfs_btree_path *path,
 599                                      __u64 *keyp, __u64 *ptrp)
 600{
 601        struct nilfs_btree_node *node;
 602        __u64 ptr;
 603        int index, level, ncmax, ret;
 604
 605        node = nilfs_btree_get_root(btree);
 606        index = nilfs_btree_node_get_nchildren(node) - 1;
 607        if (index < 0)
 608                return -ENOENT;
 609        level = nilfs_btree_node_get_level(node);
 610        ptr = nilfs_btree_node_get_ptr(node, index,
 611                                       NILFS_BTREE_ROOT_NCHILDREN_MAX);
 612        path[level].bp_bh = NULL;
 613        path[level].bp_index = index;
 614        ncmax = nilfs_btree_nchildren_per_block(btree);
 615
 616        for (level--; level > 0; level--) {
 617                ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
 618                if (ret < 0)
 619                        return ret;
 620                node = nilfs_btree_get_nonroot_node(path, level);
 621                if (nilfs_btree_bad_node(btree, node, level))
 622                        return -EINVAL;
 623                index = nilfs_btree_node_get_nchildren(node) - 1;
 624                ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
 625                path[level].bp_index = index;
 626        }
 627
 628        if (keyp != NULL)
 629                *keyp = nilfs_btree_node_get_key(node, index);
 630        if (ptrp != NULL)
 631                *ptrp = ptr;
 632
 633        return 0;
 634}
 635
 636/**
 637 * nilfs_btree_get_next_key - get next valid key from btree path array
 638 * @btree: bmap struct of btree
 639 * @path: array of nilfs_btree_path struct
 640 * @minlevel: start level
 641 * @nextkey: place to store the next valid key
 642 *
 643 * Return Value: If a next key was found, 0 is returned. Otherwise,
 644 * -ENOENT is returned.
 645 */
 646static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
 647                                    const struct nilfs_btree_path *path,
 648                                    int minlevel, __u64 *nextkey)
 649{
 650        struct nilfs_btree_node *node;
 651        int maxlevel = nilfs_btree_height(btree) - 1;
 652        int index, next_adj, level;
 653
 654        /* Next index is already set to bp_index for leaf nodes. */
 655        next_adj = 0;
 656        for (level = minlevel; level <= maxlevel; level++) {
 657                if (level == maxlevel)
 658                        node = nilfs_btree_get_root(btree);
 659                else
 660                        node = nilfs_btree_get_nonroot_node(path, level);
 661
 662                index = path[level].bp_index + next_adj;
 663                if (index < nilfs_btree_node_get_nchildren(node)) {
 664                        /* Next key is in this node */
 665                        *nextkey = nilfs_btree_node_get_key(node, index);
 666                        return 0;
 667                }
 668                /* For non-leaf nodes, next index is stored at bp_index + 1. */
 669                next_adj = 1;
 670        }
 671        return -ENOENT;
 672}
 673
 674static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
 675                              __u64 key, int level, __u64 *ptrp)
 676{
 677        struct nilfs_btree_path *path;
 678        int ret;
 679
 680        path = nilfs_btree_alloc_path();
 681        if (path == NULL)
 682                return -ENOMEM;
 683
 684        ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
 685
 686        nilfs_btree_free_path(path);
 687
 688        return ret;
 689}
 690
 691static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
 692                                     __u64 key, __u64 *ptrp,
 693                                     unsigned int maxblocks)
 694{
 695        struct nilfs_btree_path *path;
 696        struct nilfs_btree_node *node;
 697        struct inode *dat = NULL;
 698        __u64 ptr, ptr2;
 699        sector_t blocknr;
 700        int level = NILFS_BTREE_LEVEL_NODE_MIN;
 701        int ret, cnt, index, maxlevel, ncmax;
 702        struct nilfs_btree_readahead_info p;
 703
 704        path = nilfs_btree_alloc_path();
 705        if (path == NULL)
 706                return -ENOMEM;
 707
 708        ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
 709        if (ret < 0)
 710                goto out;
 711
 712        if (NILFS_BMAP_USE_VBN(btree)) {
 713                dat = nilfs_bmap_get_dat(btree);
 714                ret = nilfs_dat_translate(dat, ptr, &blocknr);
 715                if (ret < 0)
 716                        goto out;
 717                ptr = blocknr;
 718        }
 719        cnt = 1;
 720        if (cnt == maxblocks)
 721                goto end;
 722
 723        maxlevel = nilfs_btree_height(btree) - 1;
 724        node = nilfs_btree_get_node(btree, path, level, &ncmax);
 725        index = path[level].bp_index + 1;
 726        for (;;) {
 727                while (index < nilfs_btree_node_get_nchildren(node)) {
 728                        if (nilfs_btree_node_get_key(node, index) !=
 729                            key + cnt)
 730                                goto end;
 731                        ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
 732                        if (dat) {
 733                                ret = nilfs_dat_translate(dat, ptr2, &blocknr);
 734                                if (ret < 0)
 735                                        goto out;
 736                                ptr2 = blocknr;
 737                        }
 738                        if (ptr2 != ptr + cnt || ++cnt == maxblocks)
 739                                goto end;
 740                        index++;
 741                        continue;
 742                }
 743                if (level == maxlevel)
 744                        break;
 745
 746                /* look-up right sibling node */
 747                p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
 748                p.index = path[level + 1].bp_index + 1;
 749                p.max_ra_blocks = 7;
 750                if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
 751                    nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
 752                        break;
 753                ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
 754                path[level + 1].bp_index = p.index;
 755
 756                brelse(path[level].bp_bh);
 757                path[level].bp_bh = NULL;
 758
 759                ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
 760                                              &p);
 761                if (ret < 0)
 762                        goto out;
 763                node = nilfs_btree_get_nonroot_node(path, level);
 764                ncmax = nilfs_btree_nchildren_per_block(btree);
 765                index = 0;
 766                path[level].bp_index = index;
 767        }
 768 end:
 769        *ptrp = ptr;
 770        ret = cnt;
 771 out:
 772        nilfs_btree_free_path(path);
 773        return ret;
 774}
 775
 776static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
 777                                    struct nilfs_btree_path *path,
 778                                    int level, __u64 key)
 779{
 780        if (level < nilfs_btree_height(btree) - 1) {
 781                do {
 782                        nilfs_btree_node_set_key(
 783                                nilfs_btree_get_nonroot_node(path, level),
 784                                path[level].bp_index, key);
 785                        if (!buffer_dirty(path[level].bp_bh))
 786                                mark_buffer_dirty(path[level].bp_bh);
 787                } while ((path[level].bp_index == 0) &&
 788                         (++level < nilfs_btree_height(btree) - 1));
 789        }
 790
 791        /* root */
 792        if (level == nilfs_btree_height(btree) - 1) {
 793                nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
 794                                         path[level].bp_index, key);
 795        }
 796}
 797
 798static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
 799                                  struct nilfs_btree_path *path,
 800                                  int level, __u64 *keyp, __u64 *ptrp)
 801{
 802        struct nilfs_btree_node *node;
 803        int ncblk;
 804
 805        if (level < nilfs_btree_height(btree) - 1) {
 806                node = nilfs_btree_get_nonroot_node(path, level);
 807                ncblk = nilfs_btree_nchildren_per_block(btree);
 808                nilfs_btree_node_insert(node, path[level].bp_index,
 809                                        *keyp, *ptrp, ncblk);
 810                if (!buffer_dirty(path[level].bp_bh))
 811                        mark_buffer_dirty(path[level].bp_bh);
 812
 813                if (path[level].bp_index == 0)
 814                        nilfs_btree_promote_key(btree, path, level + 1,
 815                                                nilfs_btree_node_get_key(node,
 816                                                                         0));
 817        } else {
 818                node = nilfs_btree_get_root(btree);
 819                nilfs_btree_node_insert(node, path[level].bp_index,
 820                                        *keyp, *ptrp,
 821                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
 822        }
 823}
 824
 825static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
 826                                   struct nilfs_btree_path *path,
 827                                   int level, __u64 *keyp, __u64 *ptrp)
 828{
 829        struct nilfs_btree_node *node, *left;
 830        int nchildren, lnchildren, n, move, ncblk;
 831
 832        node = nilfs_btree_get_nonroot_node(path, level);
 833        left = nilfs_btree_get_sib_node(path, level);
 834        nchildren = nilfs_btree_node_get_nchildren(node);
 835        lnchildren = nilfs_btree_node_get_nchildren(left);
 836        ncblk = nilfs_btree_nchildren_per_block(btree);
 837        move = 0;
 838
 839        n = (nchildren + lnchildren + 1) / 2 - lnchildren;
 840        if (n > path[level].bp_index) {
 841                /* move insert point */
 842                n--;
 843                move = 1;
 844        }
 845
 846        nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
 847
 848        if (!buffer_dirty(path[level].bp_bh))
 849                mark_buffer_dirty(path[level].bp_bh);
 850        if (!buffer_dirty(path[level].bp_sib_bh))
 851                mark_buffer_dirty(path[level].bp_sib_bh);
 852
 853        nilfs_btree_promote_key(btree, path, level + 1,
 854                                nilfs_btree_node_get_key(node, 0));
 855
 856        if (move) {
 857                brelse(path[level].bp_bh);
 858                path[level].bp_bh = path[level].bp_sib_bh;
 859                path[level].bp_sib_bh = NULL;
 860                path[level].bp_index += lnchildren;
 861                path[level + 1].bp_index--;
 862        } else {
 863                brelse(path[level].bp_sib_bh);
 864                path[level].bp_sib_bh = NULL;
 865                path[level].bp_index -= n;
 866        }
 867
 868        nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 869}
 870
 871static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
 872                                    struct nilfs_btree_path *path,
 873                                    int level, __u64 *keyp, __u64 *ptrp)
 874{
 875        struct nilfs_btree_node *node, *right;
 876        int nchildren, rnchildren, n, move, ncblk;
 877
 878        node = nilfs_btree_get_nonroot_node(path, level);
 879        right = nilfs_btree_get_sib_node(path, level);
 880        nchildren = nilfs_btree_node_get_nchildren(node);
 881        rnchildren = nilfs_btree_node_get_nchildren(right);
 882        ncblk = nilfs_btree_nchildren_per_block(btree);
 883        move = 0;
 884
 885        n = (nchildren + rnchildren + 1) / 2 - rnchildren;
 886        if (n > nchildren - path[level].bp_index) {
 887                /* move insert point */
 888                n--;
 889                move = 1;
 890        }
 891
 892        nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
 893
 894        if (!buffer_dirty(path[level].bp_bh))
 895                mark_buffer_dirty(path[level].bp_bh);
 896        if (!buffer_dirty(path[level].bp_sib_bh))
 897                mark_buffer_dirty(path[level].bp_sib_bh);
 898
 899        path[level + 1].bp_index++;
 900        nilfs_btree_promote_key(btree, path, level + 1,
 901                                nilfs_btree_node_get_key(right, 0));
 902        path[level + 1].bp_index--;
 903
 904        if (move) {
 905                brelse(path[level].bp_bh);
 906                path[level].bp_bh = path[level].bp_sib_bh;
 907                path[level].bp_sib_bh = NULL;
 908                path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
 909                path[level + 1].bp_index++;
 910        } else {
 911                brelse(path[level].bp_sib_bh);
 912                path[level].bp_sib_bh = NULL;
 913        }
 914
 915        nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 916}
 917
 918static void nilfs_btree_split(struct nilfs_bmap *btree,
 919                              struct nilfs_btree_path *path,
 920                              int level, __u64 *keyp, __u64 *ptrp)
 921{
 922        struct nilfs_btree_node *node, *right;
 923        int nchildren, n, move, ncblk;
 924
 925        node = nilfs_btree_get_nonroot_node(path, level);
 926        right = nilfs_btree_get_sib_node(path, level);
 927        nchildren = nilfs_btree_node_get_nchildren(node);
 928        ncblk = nilfs_btree_nchildren_per_block(btree);
 929        move = 0;
 930
 931        n = (nchildren + 1) / 2;
 932        if (n > nchildren - path[level].bp_index) {
 933                n--;
 934                move = 1;
 935        }
 936
 937        nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
 938
 939        if (!buffer_dirty(path[level].bp_bh))
 940                mark_buffer_dirty(path[level].bp_bh);
 941        if (!buffer_dirty(path[level].bp_sib_bh))
 942                mark_buffer_dirty(path[level].bp_sib_bh);
 943
 944        if (move) {
 945                path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
 946                nilfs_btree_node_insert(right, path[level].bp_index,
 947                                        *keyp, *ptrp, ncblk);
 948
 949                *keyp = nilfs_btree_node_get_key(right, 0);
 950                *ptrp = path[level].bp_newreq.bpr_ptr;
 951
 952                brelse(path[level].bp_bh);
 953                path[level].bp_bh = path[level].bp_sib_bh;
 954                path[level].bp_sib_bh = NULL;
 955        } else {
 956                nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 957
 958                *keyp = nilfs_btree_node_get_key(right, 0);
 959                *ptrp = path[level].bp_newreq.bpr_ptr;
 960
 961                brelse(path[level].bp_sib_bh);
 962                path[level].bp_sib_bh = NULL;
 963        }
 964
 965        path[level + 1].bp_index++;
 966}
 967
 968static void nilfs_btree_grow(struct nilfs_bmap *btree,
 969                             struct nilfs_btree_path *path,
 970                             int level, __u64 *keyp, __u64 *ptrp)
 971{
 972        struct nilfs_btree_node *root, *child;
 973        int n, ncblk;
 974
 975        root = nilfs_btree_get_root(btree);
 976        child = nilfs_btree_get_sib_node(path, level);
 977        ncblk = nilfs_btree_nchildren_per_block(btree);
 978
 979        n = nilfs_btree_node_get_nchildren(root);
 980
 981        nilfs_btree_node_move_right(root, child, n,
 982                                    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
 983        nilfs_btree_node_set_level(root, level + 1);
 984
 985        if (!buffer_dirty(path[level].bp_sib_bh))
 986                mark_buffer_dirty(path[level].bp_sib_bh);
 987
 988        path[level].bp_bh = path[level].bp_sib_bh;
 989        path[level].bp_sib_bh = NULL;
 990
 991        nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 992
 993        *keyp = nilfs_btree_node_get_key(child, 0);
 994        *ptrp = path[level].bp_newreq.bpr_ptr;
 995}
 996
 997static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
 998                                   const struct nilfs_btree_path *path)
 999{
1000        struct nilfs_btree_node *node;
1001        int level, ncmax;
1002
1003        if (path == NULL)
1004                return NILFS_BMAP_INVALID_PTR;
1005
1006        /* left sibling */
1007        level = NILFS_BTREE_LEVEL_NODE_MIN;
1008        if (path[level].bp_index > 0) {
1009                node = nilfs_btree_get_node(btree, path, level, &ncmax);
1010                return nilfs_btree_node_get_ptr(node,
1011                                                path[level].bp_index - 1,
1012                                                ncmax);
1013        }
1014
1015        /* parent */
1016        level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
1017        if (level <= nilfs_btree_height(btree) - 1) {
1018                node = nilfs_btree_get_node(btree, path, level, &ncmax);
1019                return nilfs_btree_node_get_ptr(node, path[level].bp_index,
1020                                                ncmax);
1021        }
1022
1023        return NILFS_BMAP_INVALID_PTR;
1024}
1025
1026static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
1027                                       const struct nilfs_btree_path *path,
1028                                       __u64 key)
1029{
1030        __u64 ptr;
1031
1032        ptr = nilfs_bmap_find_target_seq(btree, key);
1033        if (ptr != NILFS_BMAP_INVALID_PTR)
1034                /* sequential access */
1035                return ptr;
1036
1037        ptr = nilfs_btree_find_near(btree, path);
1038        if (ptr != NILFS_BMAP_INVALID_PTR)
1039                /* near */
1040                return ptr;
1041
1042        /* block group */
1043        return nilfs_bmap_find_target_in_group(btree);
1044}
1045
1046static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
1047                                      struct nilfs_btree_path *path,
1048                                      int *levelp, __u64 key, __u64 ptr,
1049                                      struct nilfs_bmap_stats *stats)
1050{
1051        struct buffer_head *bh;
1052        struct nilfs_btree_node *node, *parent, *sib;
1053        __u64 sibptr;
1054        int pindex, level, ncmax, ncblk, ret;
1055        struct inode *dat = NULL;
1056
1057        stats->bs_nblocks = 0;
1058        level = NILFS_BTREE_LEVEL_DATA;
1059
1060        /* allocate a new ptr for data block */
1061        if (NILFS_BMAP_USE_VBN(btree)) {
1062                path[level].bp_newreq.bpr_ptr =
1063                        nilfs_btree_find_target_v(btree, path, key);
1064                dat = nilfs_bmap_get_dat(btree);
1065        }
1066
1067        ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1068        if (ret < 0)
1069                goto err_out_data;
1070
1071        ncblk = nilfs_btree_nchildren_per_block(btree);
1072
1073        for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1074             level < nilfs_btree_height(btree) - 1;
1075             level++) {
1076                node = nilfs_btree_get_nonroot_node(path, level);
1077                if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1078                        path[level].bp_op = nilfs_btree_do_insert;
1079                        stats->bs_nblocks++;
1080                        goto out;
1081                }
1082
1083                parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1084                pindex = path[level + 1].bp_index;
1085
1086                /* left sibling */
1087                if (pindex > 0) {
1088                        sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1089                                                          ncmax);
1090                        ret = nilfs_btree_get_block(btree, sibptr, &bh);
1091                        if (ret < 0)
1092                                goto err_out_child_node;
1093                        sib = (struct nilfs_btree_node *)bh->b_data;
1094                        if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1095                                path[level].bp_sib_bh = bh;
1096                                path[level].bp_op = nilfs_btree_carry_left;
1097                                stats->bs_nblocks++;
1098                                goto out;
1099                        } else {
1100                                brelse(bh);
1101                        }
1102                }
1103
1104                /* right sibling */
1105                if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1106                        sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1107                                                          ncmax);
1108                        ret = nilfs_btree_get_block(btree, sibptr, &bh);
1109                        if (ret < 0)
1110                                goto err_out_child_node;
1111                        sib = (struct nilfs_btree_node *)bh->b_data;
1112                        if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1113                                path[level].bp_sib_bh = bh;
1114                                path[level].bp_op = nilfs_btree_carry_right;
1115                                stats->bs_nblocks++;
1116                                goto out;
1117                        } else {
1118                                brelse(bh);
1119                        }
1120                }
1121
1122                /* split */
1123                path[level].bp_newreq.bpr_ptr =
1124                        path[level - 1].bp_newreq.bpr_ptr + 1;
1125                ret = nilfs_bmap_prepare_alloc_ptr(btree,
1126                                                   &path[level].bp_newreq, dat);
1127                if (ret < 0)
1128                        goto err_out_child_node;
1129                ret = nilfs_btree_get_new_block(btree,
1130                                                path[level].bp_newreq.bpr_ptr,
1131                                                &bh);
1132                if (ret < 0)
1133                        goto err_out_curr_node;
1134
1135                stats->bs_nblocks++;
1136
1137                sib = (struct nilfs_btree_node *)bh->b_data;
1138                nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1139                path[level].bp_sib_bh = bh;
1140                path[level].bp_op = nilfs_btree_split;
1141        }
1142
1143        /* root */
1144        node = nilfs_btree_get_root(btree);
1145        if (nilfs_btree_node_get_nchildren(node) <
1146            NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1147                path[level].bp_op = nilfs_btree_do_insert;
1148                stats->bs_nblocks++;
1149                goto out;
1150        }
1151
1152        /* grow */
1153        path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1154        ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1155        if (ret < 0)
1156                goto err_out_child_node;
1157        ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1158                                        &bh);
1159        if (ret < 0)
1160                goto err_out_curr_node;
1161
1162        nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1163                              0, level, 0, ncblk, NULL, NULL);
1164        path[level].bp_sib_bh = bh;
1165        path[level].bp_op = nilfs_btree_grow;
1166
1167        level++;
1168        path[level].bp_op = nilfs_btree_do_insert;
1169
1170        /* a newly-created node block and a data block are added */
1171        stats->bs_nblocks += 2;
1172
1173        /* success */
1174 out:
1175        *levelp = level;
1176        return ret;
1177
1178        /* error */
1179 err_out_curr_node:
1180        nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1181 err_out_child_node:
1182        for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1183                nilfs_btnode_delete(path[level].bp_sib_bh);
1184                nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1185
1186        }
1187
1188        nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1189 err_out_data:
1190        *levelp = level;
1191        stats->bs_nblocks = 0;
1192        return ret;
1193}
1194
1195static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1196                                      struct nilfs_btree_path *path,
1197                                      int maxlevel, __u64 key, __u64 ptr)
1198{
1199        struct inode *dat = NULL;
1200        int level;
1201
1202        set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1203        ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1204        if (NILFS_BMAP_USE_VBN(btree)) {
1205                nilfs_bmap_set_target_v(btree, key, ptr);
1206                dat = nilfs_bmap_get_dat(btree);
1207        }
1208
1209        for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1210                nilfs_bmap_commit_alloc_ptr(btree,
1211                                            &path[level - 1].bp_newreq, dat);
1212                path[level].bp_op(btree, path, level, &key, &ptr);
1213        }
1214
1215        if (!nilfs_bmap_dirty(btree))
1216                nilfs_bmap_set_dirty(btree);
1217}
1218
1219static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1220{
1221        struct nilfs_btree_path *path;
1222        struct nilfs_bmap_stats stats;
1223        int level, ret;
1224
1225        path = nilfs_btree_alloc_path();
1226        if (path == NULL)
1227                return -ENOMEM;
1228
1229        ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1230                                    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1231        if (ret != -ENOENT) {
1232                if (ret == 0)
1233                        ret = -EEXIST;
1234                goto out;
1235        }
1236
1237        ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1238        if (ret < 0)
1239                goto out;
1240        nilfs_btree_commit_insert(btree, path, level, key, ptr);
1241        nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1242
1243 out:
1244        nilfs_btree_free_path(path);
1245        return ret;
1246}
1247
1248static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1249                                  struct nilfs_btree_path *path,
1250                                  int level, __u64 *keyp, __u64 *ptrp)
1251{
1252        struct nilfs_btree_node *node;
1253        int ncblk;
1254
1255        if (level < nilfs_btree_height(btree) - 1) {
1256                node = nilfs_btree_get_nonroot_node(path, level);
1257                ncblk = nilfs_btree_nchildren_per_block(btree);
1258                nilfs_btree_node_delete(node, path[level].bp_index,
1259                                        keyp, ptrp, ncblk);
1260                if (!buffer_dirty(path[level].bp_bh))
1261                        mark_buffer_dirty(path[level].bp_bh);
1262                if (path[level].bp_index == 0)
1263                        nilfs_btree_promote_key(btree, path, level + 1,
1264                                nilfs_btree_node_get_key(node, 0));
1265        } else {
1266                node = nilfs_btree_get_root(btree);
1267                nilfs_btree_node_delete(node, path[level].bp_index,
1268                                        keyp, ptrp,
1269                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
1270        }
1271}
1272
1273static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1274                                    struct nilfs_btree_path *path,
1275                                    int level, __u64 *keyp, __u64 *ptrp)
1276{
1277        struct nilfs_btree_node *node, *left;
1278        int nchildren, lnchildren, n, ncblk;
1279
1280        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1281
1282        node = nilfs_btree_get_nonroot_node(path, level);
1283        left = nilfs_btree_get_sib_node(path, level);
1284        nchildren = nilfs_btree_node_get_nchildren(node);
1285        lnchildren = nilfs_btree_node_get_nchildren(left);
1286        ncblk = nilfs_btree_nchildren_per_block(btree);
1287
1288        n = (nchildren + lnchildren) / 2 - nchildren;
1289
1290        nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1291
1292        if (!buffer_dirty(path[level].bp_bh))
1293                mark_buffer_dirty(path[level].bp_bh);
1294        if (!buffer_dirty(path[level].bp_sib_bh))
1295                mark_buffer_dirty(path[level].bp_sib_bh);
1296
1297        nilfs_btree_promote_key(btree, path, level + 1,
1298                                nilfs_btree_node_get_key(node, 0));
1299
1300        brelse(path[level].bp_sib_bh);
1301        path[level].bp_sib_bh = NULL;
1302        path[level].bp_index += n;
1303}
1304
1305static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1306                                     struct nilfs_btree_path *path,
1307                                     int level, __u64 *keyp, __u64 *ptrp)
1308{
1309        struct nilfs_btree_node *node, *right;
1310        int nchildren, rnchildren, n, ncblk;
1311
1312        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1313
1314        node = nilfs_btree_get_nonroot_node(path, level);
1315        right = nilfs_btree_get_sib_node(path, level);
1316        nchildren = nilfs_btree_node_get_nchildren(node);
1317        rnchildren = nilfs_btree_node_get_nchildren(right);
1318        ncblk = nilfs_btree_nchildren_per_block(btree);
1319
1320        n = (nchildren + rnchildren) / 2 - nchildren;
1321
1322        nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1323
1324        if (!buffer_dirty(path[level].bp_bh))
1325                mark_buffer_dirty(path[level].bp_bh);
1326        if (!buffer_dirty(path[level].bp_sib_bh))
1327                mark_buffer_dirty(path[level].bp_sib_bh);
1328
1329        path[level + 1].bp_index++;
1330        nilfs_btree_promote_key(btree, path, level + 1,
1331                                nilfs_btree_node_get_key(right, 0));
1332        path[level + 1].bp_index--;
1333
1334        brelse(path[level].bp_sib_bh);
1335        path[level].bp_sib_bh = NULL;
1336}
1337
1338static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1339                                    struct nilfs_btree_path *path,
1340                                    int level, __u64 *keyp, __u64 *ptrp)
1341{
1342        struct nilfs_btree_node *node, *left;
1343        int n, ncblk;
1344
1345        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1346
1347        node = nilfs_btree_get_nonroot_node(path, level);
1348        left = nilfs_btree_get_sib_node(path, level);
1349        ncblk = nilfs_btree_nchildren_per_block(btree);
1350
1351        n = nilfs_btree_node_get_nchildren(node);
1352
1353        nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1354
1355        if (!buffer_dirty(path[level].bp_sib_bh))
1356                mark_buffer_dirty(path[level].bp_sib_bh);
1357
1358        nilfs_btnode_delete(path[level].bp_bh);
1359        path[level].bp_bh = path[level].bp_sib_bh;
1360        path[level].bp_sib_bh = NULL;
1361        path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1362}
1363
1364static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1365                                     struct nilfs_btree_path *path,
1366                                     int level, __u64 *keyp, __u64 *ptrp)
1367{
1368        struct nilfs_btree_node *node, *right;
1369        int n, ncblk;
1370
1371        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1372
1373        node = nilfs_btree_get_nonroot_node(path, level);
1374        right = nilfs_btree_get_sib_node(path, level);
1375        ncblk = nilfs_btree_nchildren_per_block(btree);
1376
1377        n = nilfs_btree_node_get_nchildren(right);
1378
1379        nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1380
1381        if (!buffer_dirty(path[level].bp_bh))
1382                mark_buffer_dirty(path[level].bp_bh);
1383
1384        nilfs_btnode_delete(path[level].bp_sib_bh);
1385        path[level].bp_sib_bh = NULL;
1386        path[level + 1].bp_index++;
1387}
1388
1389static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1390                               struct nilfs_btree_path *path,
1391                               int level, __u64 *keyp, __u64 *ptrp)
1392{
1393        struct nilfs_btree_node *root, *child;
1394        int n, ncblk;
1395
1396        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1397
1398        root = nilfs_btree_get_root(btree);
1399        child = nilfs_btree_get_nonroot_node(path, level);
1400        ncblk = nilfs_btree_nchildren_per_block(btree);
1401
1402        nilfs_btree_node_delete(root, 0, NULL, NULL,
1403                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1404        nilfs_btree_node_set_level(root, level);
1405        n = nilfs_btree_node_get_nchildren(child);
1406        nilfs_btree_node_move_left(root, child, n,
1407                                   NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1408
1409        nilfs_btnode_delete(path[level].bp_bh);
1410        path[level].bp_bh = NULL;
1411}
1412
1413static void nilfs_btree_nop(struct nilfs_bmap *btree,
1414                            struct nilfs_btree_path *path,
1415                            int level, __u64 *keyp, __u64 *ptrp)
1416{
1417}
1418
1419static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1420                                      struct nilfs_btree_path *path,
1421                                      int *levelp,
1422                                      struct nilfs_bmap_stats *stats,
1423                                      struct inode *dat)
1424{
1425        struct buffer_head *bh;
1426        struct nilfs_btree_node *node, *parent, *sib;
1427        __u64 sibptr;
1428        int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1429
1430        ret = 0;
1431        stats->bs_nblocks = 0;
1432        ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1433        ncblk = nilfs_btree_nchildren_per_block(btree);
1434
1435        for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1436             level < nilfs_btree_height(btree) - 1;
1437             level++) {
1438                node = nilfs_btree_get_nonroot_node(path, level);
1439                path[level].bp_oldreq.bpr_ptr =
1440                        nilfs_btree_node_get_ptr(node, dindex, ncblk);
1441                ret = nilfs_bmap_prepare_end_ptr(btree,
1442                                                 &path[level].bp_oldreq, dat);
1443                if (ret < 0)
1444                        goto err_out_child_node;
1445
1446                if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1447                        path[level].bp_op = nilfs_btree_do_delete;
1448                        stats->bs_nblocks++;
1449                        goto out;
1450                }
1451
1452                parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1453                pindex = path[level + 1].bp_index;
1454                dindex = pindex;
1455
1456                if (pindex > 0) {
1457                        /* left sibling */
1458                        sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1459                                                          ncmax);
1460                        ret = nilfs_btree_get_block(btree, sibptr, &bh);
1461                        if (ret < 0)
1462                                goto err_out_curr_node;
1463                        sib = (struct nilfs_btree_node *)bh->b_data;
1464                        if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1465                                path[level].bp_sib_bh = bh;
1466                                path[level].bp_op = nilfs_btree_borrow_left;
1467                                stats->bs_nblocks++;
1468                                goto out;
1469                        } else {
1470                                path[level].bp_sib_bh = bh;
1471                                path[level].bp_op = nilfs_btree_concat_left;
1472                                stats->bs_nblocks++;
1473                                /* continue; */
1474                        }
1475                } else if (pindex <
1476                           nilfs_btree_node_get_nchildren(parent) - 1) {
1477                        /* right sibling */
1478                        sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1479                                                          ncmax);
1480                        ret = nilfs_btree_get_block(btree, sibptr, &bh);
1481                        if (ret < 0)
1482                                goto err_out_curr_node;
1483                        sib = (struct nilfs_btree_node *)bh->b_data;
1484                        if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1485                                path[level].bp_sib_bh = bh;
1486                                path[level].bp_op = nilfs_btree_borrow_right;
1487                                stats->bs_nblocks++;
1488                                goto out;
1489                        } else {
1490                                path[level].bp_sib_bh = bh;
1491                                path[level].bp_op = nilfs_btree_concat_right;
1492                                stats->bs_nblocks++;
1493                                /*
1494                                 * When merging right sibling node
1495                                 * into the current node, pointer to
1496                                 * the right sibling node must be
1497                                 * terminated instead.  The adjustment
1498                                 * below is required for that.
1499                                 */
1500                                dindex = pindex + 1;
1501                                /* continue; */
1502                        }
1503                } else {
1504                        /* no siblings */
1505                        /* the only child of the root node */
1506                        WARN_ON(level != nilfs_btree_height(btree) - 2);
1507                        if (nilfs_btree_node_get_nchildren(node) - 1 <=
1508                            NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1509                                path[level].bp_op = nilfs_btree_shrink;
1510                                stats->bs_nblocks += 2;
1511                                level++;
1512                                path[level].bp_op = nilfs_btree_nop;
1513                                goto shrink_root_child;
1514                        } else {
1515                                path[level].bp_op = nilfs_btree_do_delete;
1516                                stats->bs_nblocks++;
1517                                goto out;
1518                        }
1519                }
1520        }
1521
1522        /* child of the root node is deleted */
1523        path[level].bp_op = nilfs_btree_do_delete;
1524        stats->bs_nblocks++;
1525
1526shrink_root_child:
1527        node = nilfs_btree_get_root(btree);
1528        path[level].bp_oldreq.bpr_ptr =
1529                nilfs_btree_node_get_ptr(node, dindex,
1530                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1531
1532        ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1533        if (ret < 0)
1534                goto err_out_child_node;
1535
1536        /* success */
1537 out:
1538        *levelp = level;
1539        return ret;
1540
1541        /* error */
1542 err_out_curr_node:
1543        nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1544 err_out_child_node:
1545        for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1546                brelse(path[level].bp_sib_bh);
1547                nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1548        }
1549        *levelp = level;
1550        stats->bs_nblocks = 0;
1551        return ret;
1552}
1553
1554static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1555                                      struct nilfs_btree_path *path,
1556                                      int maxlevel, struct inode *dat)
1557{
1558        int level;
1559
1560        for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1561                nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1562                path[level].bp_op(btree, path, level, NULL, NULL);
1563        }
1564
1565        if (!nilfs_bmap_dirty(btree))
1566                nilfs_bmap_set_dirty(btree);
1567}
1568
1569static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1570
1571{
1572        struct nilfs_btree_path *path;
1573        struct nilfs_bmap_stats stats;
1574        struct inode *dat;
1575        int level, ret;
1576
1577        path = nilfs_btree_alloc_path();
1578        if (path == NULL)
1579                return -ENOMEM;
1580
1581        ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1582                                    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1583        if (ret < 0)
1584                goto out;
1585
1586
1587        dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1588
1589        ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1590        if (ret < 0)
1591                goto out;
1592        nilfs_btree_commit_delete(btree, path, level, dat);
1593        nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1594
1595out:
1596        nilfs_btree_free_path(path);
1597        return ret;
1598}
1599
1600static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
1601                                __u64 *keyp)
1602{
1603        struct nilfs_btree_path *path;
1604        const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
1605        int ret;
1606
1607        path = nilfs_btree_alloc_path();
1608        if (!path)
1609                return -ENOMEM;
1610
1611        ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
1612        if (!ret)
1613                *keyp = start;
1614        else if (ret == -ENOENT)
1615                ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
1616
1617        nilfs_btree_free_path(path);
1618        return ret;
1619}
1620
1621static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1622{
1623        struct nilfs_btree_path *path;
1624        int ret;
1625
1626        path = nilfs_btree_alloc_path();
1627        if (path == NULL)
1628                return -ENOMEM;
1629
1630        ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1631
1632        nilfs_btree_free_path(path);
1633
1634        return ret;
1635}
1636
1637static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1638{
1639        struct buffer_head *bh;
1640        struct nilfs_btree_node *root, *node;
1641        __u64 maxkey, nextmaxkey;
1642        __u64 ptr;
1643        int nchildren, ret;
1644
1645        root = nilfs_btree_get_root(btree);
1646        switch (nilfs_btree_height(btree)) {
1647        case 2:
1648                bh = NULL;
1649                node = root;
1650                break;
1651        case 3:
1652                nchildren = nilfs_btree_node_get_nchildren(root);
1653                if (nchildren > 1)
1654                        return 0;
1655                ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1656                                               NILFS_BTREE_ROOT_NCHILDREN_MAX);
1657                ret = nilfs_btree_get_block(btree, ptr, &bh);
1658                if (ret < 0)
1659                        return ret;
1660                node = (struct nilfs_btree_node *)bh->b_data;
1661                break;
1662        default:
1663                return 0;
1664        }
1665
1666        nchildren = nilfs_btree_node_get_nchildren(node);
1667        maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1668        nextmaxkey = (nchildren > 1) ?
1669                nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1670        if (bh != NULL)
1671                brelse(bh);
1672
1673        return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1674}
1675
1676static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1677                                   __u64 *keys, __u64 *ptrs, int nitems)
1678{
1679        struct buffer_head *bh;
1680        struct nilfs_btree_node *node, *root;
1681        __le64 *dkeys;
1682        __le64 *dptrs;
1683        __u64 ptr;
1684        int nchildren, ncmax, i, ret;
1685
1686        root = nilfs_btree_get_root(btree);
1687        switch (nilfs_btree_height(btree)) {
1688        case 2:
1689                bh = NULL;
1690                node = root;
1691                ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1692                break;
1693        case 3:
1694                nchildren = nilfs_btree_node_get_nchildren(root);
1695                WARN_ON(nchildren > 1);
1696                ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1697                                               NILFS_BTREE_ROOT_NCHILDREN_MAX);
1698                ret = nilfs_btree_get_block(btree, ptr, &bh);
1699                if (ret < 0)
1700                        return ret;
1701                node = (struct nilfs_btree_node *)bh->b_data;
1702                ncmax = nilfs_btree_nchildren_per_block(btree);
1703                break;
1704        default:
1705                node = NULL;
1706                return -EINVAL;
1707        }
1708
1709        nchildren = nilfs_btree_node_get_nchildren(node);
1710        if (nchildren < nitems)
1711                nitems = nchildren;
1712        dkeys = nilfs_btree_node_dkeys(node);
1713        dptrs = nilfs_btree_node_dptrs(node, ncmax);
1714        for (i = 0; i < nitems; i++) {
1715                keys[i] = le64_to_cpu(dkeys[i]);
1716                ptrs[i] = le64_to_cpu(dptrs[i]);
1717        }
1718
1719        if (bh != NULL)
1720                brelse(bh);
1721
1722        return nitems;
1723}
1724
1725static int
1726nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1727                                       union nilfs_bmap_ptr_req *dreq,
1728                                       union nilfs_bmap_ptr_req *nreq,
1729                                       struct buffer_head **bhp,
1730                                       struct nilfs_bmap_stats *stats)
1731{
1732        struct buffer_head *bh;
1733        struct inode *dat = NULL;
1734        int ret;
1735
1736        stats->bs_nblocks = 0;
1737
1738        /* for data */
1739        /* cannot find near ptr */
1740        if (NILFS_BMAP_USE_VBN(btree)) {
1741                dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1742                dat = nilfs_bmap_get_dat(btree);
1743        }
1744
1745        ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1746        if (ret < 0)
1747                return ret;
1748
1749        *bhp = NULL;
1750        stats->bs_nblocks++;
1751        if (nreq != NULL) {
1752                nreq->bpr_ptr = dreq->bpr_ptr + 1;
1753                ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1754                if (ret < 0)
1755                        goto err_out_dreq;
1756
1757                ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1758                if (ret < 0)
1759                        goto err_out_nreq;
1760
1761                *bhp = bh;
1762                stats->bs_nblocks++;
1763        }
1764
1765        /* success */
1766        return 0;
1767
1768        /* error */
1769 err_out_nreq:
1770        nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1771 err_out_dreq:
1772        nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1773        stats->bs_nblocks = 0;
1774        return ret;
1775
1776}
1777
1778static void
1779nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1780                                      __u64 key, __u64 ptr,
1781                                      const __u64 *keys, const __u64 *ptrs,
1782                                      int n,
1783                                      union nilfs_bmap_ptr_req *dreq,
1784                                      union nilfs_bmap_ptr_req *nreq,
1785                                      struct buffer_head *bh)
1786{
1787        struct nilfs_btree_node *node;
1788        struct inode *dat;
1789        __u64 tmpptr;
1790        int ncblk;
1791
1792        /* free resources */
1793        if (btree->b_ops->bop_clear != NULL)
1794                btree->b_ops->bop_clear(btree);
1795
1796        /* ptr must be a pointer to a buffer head. */
1797        set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1798
1799        /* convert and insert */
1800        dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1801        __nilfs_btree_init(btree);
1802        if (nreq != NULL) {
1803                nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1804                nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1805
1806                /* create child node at level 1 */
1807                node = (struct nilfs_btree_node *)bh->b_data;
1808                ncblk = nilfs_btree_nchildren_per_block(btree);
1809                nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1810                nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1811                if (!buffer_dirty(bh))
1812                        mark_buffer_dirty(bh);
1813                if (!nilfs_bmap_dirty(btree))
1814                        nilfs_bmap_set_dirty(btree);
1815
1816                brelse(bh);
1817
1818                /* create root node at level 2 */
1819                node = nilfs_btree_get_root(btree);
1820                tmpptr = nreq->bpr_ptr;
1821                nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1822                                      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1823                                      &keys[0], &tmpptr);
1824        } else {
1825                nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1826
1827                /* create root node at level 1 */
1828                node = nilfs_btree_get_root(btree);
1829                nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1830                                      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1831                                      keys, ptrs);
1832                nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1833                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
1834                if (!nilfs_bmap_dirty(btree))
1835                        nilfs_bmap_set_dirty(btree);
1836        }
1837
1838        if (NILFS_BMAP_USE_VBN(btree))
1839                nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1840}
1841
1842/**
1843 * nilfs_btree_convert_and_insert -
1844 * @bmap:
1845 * @key:
1846 * @ptr:
1847 * @keys:
1848 * @ptrs:
1849 * @n:
1850 */
1851int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1852                                   __u64 key, __u64 ptr,
1853                                   const __u64 *keys, const __u64 *ptrs, int n)
1854{
1855        struct buffer_head *bh = NULL;
1856        union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1857        struct nilfs_bmap_stats stats;
1858        int ret;
1859
1860        if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1861                di = &dreq;
1862                ni = NULL;
1863        } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1864                           nilfs_btree_node_size(btree))) {
1865                di = &dreq;
1866                ni = &nreq;
1867        } else {
1868                di = NULL;
1869                ni = NULL;
1870                BUG();
1871        }
1872
1873        ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1874                                                     &stats);
1875        if (ret < 0)
1876                return ret;
1877        nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1878                                              di, ni, bh);
1879        nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1880        return 0;
1881}
1882
1883static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1884                                   struct nilfs_btree_path *path,
1885                                   int level,
1886                                   struct buffer_head *bh)
1887{
1888        while ((++level < nilfs_btree_height(btree) - 1) &&
1889               !buffer_dirty(path[level].bp_bh))
1890                mark_buffer_dirty(path[level].bp_bh);
1891
1892        return 0;
1893}
1894
1895static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1896                                        struct nilfs_btree_path *path,
1897                                        int level, struct inode *dat)
1898{
1899        struct nilfs_btree_node *parent;
1900        int ncmax, ret;
1901
1902        parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1903        path[level].bp_oldreq.bpr_ptr =
1904                nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1905                                         ncmax);
1906        path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1907        ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1908                                       &path[level].bp_newreq.bpr_req);
1909        if (ret < 0)
1910                return ret;
1911
1912        if (buffer_nilfs_node(path[level].bp_bh)) {
1913                path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1914                path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1915                path[level].bp_ctxt.bh = path[level].bp_bh;
1916                ret = nilfs_btnode_prepare_change_key(
1917                        &NILFS_BMAP_I(btree)->i_btnode_cache,
1918                        &path[level].bp_ctxt);
1919                if (ret < 0) {
1920                        nilfs_dat_abort_update(dat,
1921                                               &path[level].bp_oldreq.bpr_req,
1922                                               &path[level].bp_newreq.bpr_req);
1923                        return ret;
1924                }
1925        }
1926
1927        return 0;
1928}
1929
1930static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1931                                        struct nilfs_btree_path *path,
1932                                        int level, struct inode *dat)
1933{
1934        struct nilfs_btree_node *parent;
1935        int ncmax;
1936
1937        nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1938                                &path[level].bp_newreq.bpr_req,
1939                                btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1940
1941        if (buffer_nilfs_node(path[level].bp_bh)) {
1942                nilfs_btnode_commit_change_key(
1943                        &NILFS_BMAP_I(btree)->i_btnode_cache,
1944                        &path[level].bp_ctxt);
1945                path[level].bp_bh = path[level].bp_ctxt.bh;
1946        }
1947        set_buffer_nilfs_volatile(path[level].bp_bh);
1948
1949        parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1950        nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1951                                 path[level].bp_newreq.bpr_ptr, ncmax);
1952}
1953
1954static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1955                                       struct nilfs_btree_path *path,
1956                                       int level, struct inode *dat)
1957{
1958        nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1959                               &path[level].bp_newreq.bpr_req);
1960        if (buffer_nilfs_node(path[level].bp_bh))
1961                nilfs_btnode_abort_change_key(
1962                        &NILFS_BMAP_I(btree)->i_btnode_cache,
1963                        &path[level].bp_ctxt);
1964}
1965
1966static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1967                                           struct nilfs_btree_path *path,
1968                                           int minlevel, int *maxlevelp,
1969                                           struct inode *dat)
1970{
1971        int level, ret;
1972
1973        level = minlevel;
1974        if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1975                ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1976                if (ret < 0)
1977                        return ret;
1978        }
1979        while ((++level < nilfs_btree_height(btree) - 1) &&
1980               !buffer_dirty(path[level].bp_bh)) {
1981
1982                WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1983                ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1984                if (ret < 0)
1985                        goto out;
1986        }
1987
1988        /* success */
1989        *maxlevelp = level - 1;
1990        return 0;
1991
1992        /* error */
1993 out:
1994        while (--level > minlevel)
1995                nilfs_btree_abort_update_v(btree, path, level, dat);
1996        if (!buffer_nilfs_volatile(path[level].bp_bh))
1997                nilfs_btree_abort_update_v(btree, path, level, dat);
1998        return ret;
1999}
2000
2001static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
2002                                           struct nilfs_btree_path *path,
2003                                           int minlevel, int maxlevel,
2004                                           struct buffer_head *bh,
2005                                           struct inode *dat)
2006{
2007        int level;
2008
2009        if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
2010                nilfs_btree_commit_update_v(btree, path, minlevel, dat);
2011
2012        for (level = minlevel + 1; level <= maxlevel; level++)
2013                nilfs_btree_commit_update_v(btree, path, level, dat);
2014}
2015
2016static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
2017                                   struct nilfs_btree_path *path,
2018                                   int level, struct buffer_head *bh)
2019{
2020        int maxlevel = 0, ret;
2021        struct nilfs_btree_node *parent;
2022        struct inode *dat = nilfs_bmap_get_dat(btree);
2023        __u64 ptr;
2024        int ncmax;
2025
2026        get_bh(bh);
2027        path[level].bp_bh = bh;
2028        ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
2029                                              dat);
2030        if (ret < 0)
2031                goto out;
2032
2033        if (buffer_nilfs_volatile(path[level].bp_bh)) {
2034                parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2035                ptr = nilfs_btree_node_get_ptr(parent,
2036                                               path[level + 1].bp_index,
2037                                               ncmax);
2038                ret = nilfs_dat_mark_dirty(dat, ptr);
2039                if (ret < 0)
2040                        goto out;
2041        }
2042
2043        nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
2044
2045 out:
2046        brelse(path[level].bp_bh);
2047        path[level].bp_bh = NULL;
2048        return ret;
2049}
2050
2051static int nilfs_btree_propagate(struct nilfs_bmap *btree,
2052                                 struct buffer_head *bh)
2053{
2054        struct nilfs_btree_path *path;
2055        struct nilfs_btree_node *node;
2056        __u64 key;
2057        int level, ret;
2058
2059        WARN_ON(!buffer_dirty(bh));
2060
2061        path = nilfs_btree_alloc_path();
2062        if (path == NULL)
2063                return -ENOMEM;
2064
2065        if (buffer_nilfs_node(bh)) {
2066                node = (struct nilfs_btree_node *)bh->b_data;
2067                key = nilfs_btree_node_get_key(node, 0);
2068                level = nilfs_btree_node_get_level(node);
2069        } else {
2070                key = nilfs_bmap_data_get_key(btree, bh);
2071                level = NILFS_BTREE_LEVEL_DATA;
2072        }
2073
2074        ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2075        if (ret < 0) {
2076                if (unlikely(ret == -ENOENT))
2077                        nilfs_msg(btree->b_inode->i_sb, KERN_CRIT,
2078                                  "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
2079                                  btree->b_inode->i_ino,
2080                                  (unsigned long long)key, level);
2081                goto out;
2082        }
2083
2084        ret = NILFS_BMAP_USE_VBN(btree) ?
2085                nilfs_btree_propagate_v(btree, path, level, bh) :
2086                nilfs_btree_propagate_p(btree, path, level, bh);
2087
2088 out:
2089        nilfs_btree_free_path(path);
2090
2091        return ret;
2092}
2093
2094static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2095                                    struct buffer_head *bh)
2096{
2097        return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2098}
2099
2100static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2101                                         struct list_head *lists,
2102                                         struct buffer_head *bh)
2103{
2104        struct list_head *head;
2105        struct buffer_head *cbh;
2106        struct nilfs_btree_node *node, *cnode;
2107        __u64 key, ckey;
2108        int level;
2109
2110        get_bh(bh);
2111        node = (struct nilfs_btree_node *)bh->b_data;
2112        key = nilfs_btree_node_get_key(node, 0);
2113        level = nilfs_btree_node_get_level(node);
2114        if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2115            level >= NILFS_BTREE_LEVEL_MAX) {
2116                dump_stack();
2117                nilfs_msg(btree->b_inode->i_sb, KERN_WARNING,
2118                          "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)",
2119                          level, (unsigned long long)key,
2120                          btree->b_inode->i_ino,
2121                          (unsigned long long)bh->b_blocknr);
2122                return;
2123        }
2124
2125        list_for_each(head, &lists[level]) {
2126                cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2127                cnode = (struct nilfs_btree_node *)cbh->b_data;
2128                ckey = nilfs_btree_node_get_key(cnode, 0);
2129                if (key < ckey)
2130                        break;
2131        }
2132        list_add_tail(&bh->b_assoc_buffers, head);
2133}
2134
2135static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2136                                             struct list_head *listp)
2137{
2138        struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache;
2139        struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2140        struct pagevec pvec;
2141        struct buffer_head *bh, *head;
2142        pgoff_t index = 0;
2143        int level, i;
2144
2145        for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2146             level < NILFS_BTREE_LEVEL_MAX;
2147             level++)
2148                INIT_LIST_HEAD(&lists[level]);
2149
2150        pagevec_init(&pvec);
2151
2152        while (pagevec_lookup_tag(&pvec, btcache, &index,
2153                                        PAGECACHE_TAG_DIRTY)) {
2154                for (i = 0; i < pagevec_count(&pvec); i++) {
2155                        bh = head = page_buffers(pvec.pages[i]);
2156                        do {
2157                                if (buffer_dirty(bh))
2158                                        nilfs_btree_add_dirty_buffer(btree,
2159                                                                     lists, bh);
2160                        } while ((bh = bh->b_this_page) != head);
2161                }
2162                pagevec_release(&pvec);
2163                cond_resched();
2164        }
2165
2166        for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2167             level < NILFS_BTREE_LEVEL_MAX;
2168             level++)
2169                list_splice_tail(&lists[level], listp);
2170}
2171
2172static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2173                                struct nilfs_btree_path *path,
2174                                int level,
2175                                struct buffer_head **bh,
2176                                sector_t blocknr,
2177                                union nilfs_binfo *binfo)
2178{
2179        struct nilfs_btree_node *parent;
2180        __u64 key;
2181        __u64 ptr;
2182        int ncmax, ret;
2183
2184        parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2185        ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2186                                       ncmax);
2187        if (buffer_nilfs_node(*bh)) {
2188                path[level].bp_ctxt.oldkey = ptr;
2189                path[level].bp_ctxt.newkey = blocknr;
2190                path[level].bp_ctxt.bh = *bh;
2191                ret = nilfs_btnode_prepare_change_key(
2192                        &NILFS_BMAP_I(btree)->i_btnode_cache,
2193                        &path[level].bp_ctxt);
2194                if (ret < 0)
2195                        return ret;
2196                nilfs_btnode_commit_change_key(
2197                        &NILFS_BMAP_I(btree)->i_btnode_cache,
2198                        &path[level].bp_ctxt);
2199                *bh = path[level].bp_ctxt.bh;
2200        }
2201
2202        nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2203                                 ncmax);
2204
2205        key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2206        /* on-disk format */
2207        binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2208        binfo->bi_dat.bi_level = level;
2209
2210        return 0;
2211}
2212
2213static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2214                                struct nilfs_btree_path *path,
2215                                int level,
2216                                struct buffer_head **bh,
2217                                sector_t blocknr,
2218                                union nilfs_binfo *binfo)
2219{
2220        struct nilfs_btree_node *parent;
2221        struct inode *dat = nilfs_bmap_get_dat(btree);
2222        __u64 key;
2223        __u64 ptr;
2224        union nilfs_bmap_ptr_req req;
2225        int ncmax, ret;
2226
2227        parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2228        ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2229                                       ncmax);
2230        req.bpr_ptr = ptr;
2231        ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2232        if (ret < 0)
2233                return ret;
2234        nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2235
2236        key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2237        /* on-disk format */
2238        binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2239        binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2240
2241        return 0;
2242}
2243
2244static int nilfs_btree_assign(struct nilfs_bmap *btree,
2245                              struct buffer_head **bh,
2246                              sector_t blocknr,
2247                              union nilfs_binfo *binfo)
2248{
2249        struct nilfs_btree_path *path;
2250        struct nilfs_btree_node *node;
2251        __u64 key;
2252        int level, ret;
2253
2254        path = nilfs_btree_alloc_path();
2255        if (path == NULL)
2256                return -ENOMEM;
2257
2258        if (buffer_nilfs_node(*bh)) {
2259                node = (struct nilfs_btree_node *)(*bh)->b_data;
2260                key = nilfs_btree_node_get_key(node, 0);
2261                level = nilfs_btree_node_get_level(node);
2262        } else {
2263                key = nilfs_bmap_data_get_key(btree, *bh);
2264                level = NILFS_BTREE_LEVEL_DATA;
2265        }
2266
2267        ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2268        if (ret < 0) {
2269                WARN_ON(ret == -ENOENT);
2270                goto out;
2271        }
2272
2273        ret = NILFS_BMAP_USE_VBN(btree) ?
2274                nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2275                nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2276
2277 out:
2278        nilfs_btree_free_path(path);
2279
2280        return ret;
2281}
2282
2283static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2284                                 struct buffer_head **bh,
2285                                 sector_t blocknr,
2286                                 union nilfs_binfo *binfo)
2287{
2288        struct nilfs_btree_node *node;
2289        __u64 key;
2290        int ret;
2291
2292        ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2293                             blocknr);
2294        if (ret < 0)
2295                return ret;
2296
2297        if (buffer_nilfs_node(*bh)) {
2298                node = (struct nilfs_btree_node *)(*bh)->b_data;
2299                key = nilfs_btree_node_get_key(node, 0);
2300        } else
2301                key = nilfs_bmap_data_get_key(btree, *bh);
2302
2303        /* on-disk format */
2304        binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2305        binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2306
2307        return 0;
2308}
2309
2310static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2311{
2312        struct buffer_head *bh;
2313        struct nilfs_btree_path *path;
2314        __u64 ptr;
2315        int ret;
2316
2317        path = nilfs_btree_alloc_path();
2318        if (path == NULL)
2319                return -ENOMEM;
2320
2321        ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2322        if (ret < 0) {
2323                WARN_ON(ret == -ENOENT);
2324                goto out;
2325        }
2326        ret = nilfs_btree_get_block(btree, ptr, &bh);
2327        if (ret < 0) {
2328                WARN_ON(ret == -ENOENT);
2329                goto out;
2330        }
2331
2332        if (!buffer_dirty(bh))
2333                mark_buffer_dirty(bh);
2334        brelse(bh);
2335        if (!nilfs_bmap_dirty(btree))
2336                nilfs_bmap_set_dirty(btree);
2337
2338 out:
2339        nilfs_btree_free_path(path);
2340        return ret;
2341}
2342
2343static const struct nilfs_bmap_operations nilfs_btree_ops = {
2344        .bop_lookup             =       nilfs_btree_lookup,
2345        .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2346        .bop_insert             =       nilfs_btree_insert,
2347        .bop_delete             =       nilfs_btree_delete,
2348        .bop_clear              =       NULL,
2349
2350        .bop_propagate          =       nilfs_btree_propagate,
2351
2352        .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2353
2354        .bop_assign             =       nilfs_btree_assign,
2355        .bop_mark               =       nilfs_btree_mark,
2356
2357        .bop_seek_key           =       nilfs_btree_seek_key,
2358        .bop_last_key           =       nilfs_btree_last_key,
2359
2360        .bop_check_insert       =       NULL,
2361        .bop_check_delete       =       nilfs_btree_check_delete,
2362        .bop_gather_data        =       nilfs_btree_gather_data,
2363};
2364
2365static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2366        .bop_lookup             =       NULL,
2367        .bop_lookup_contig      =       NULL,
2368        .bop_insert             =       NULL,
2369        .bop_delete             =       NULL,
2370        .bop_clear              =       NULL,
2371
2372        .bop_propagate          =       nilfs_btree_propagate_gc,
2373
2374        .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2375
2376        .bop_assign             =       nilfs_btree_assign_gc,
2377        .bop_mark               =       NULL,
2378
2379        .bop_seek_key           =       NULL,
2380        .bop_last_key           =       NULL,
2381
2382        .bop_check_insert       =       NULL,
2383        .bop_check_delete       =       NULL,
2384        .bop_gather_data        =       NULL,
2385};
2386
2387static void __nilfs_btree_init(struct nilfs_bmap *bmap)
2388{
2389        bmap->b_ops = &nilfs_btree_ops;
2390        bmap->b_nchildren_per_block =
2391                NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2392}
2393
2394int nilfs_btree_init(struct nilfs_bmap *bmap)
2395{
2396        int ret = 0;
2397
2398        __nilfs_btree_init(bmap);
2399
2400        if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode))
2401                ret = -EIO;
2402        return ret;
2403}
2404
2405void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2406{
2407        bmap->b_ops = &nilfs_btree_ops_gc;
2408        bmap->b_nchildren_per_block =
2409                NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2410}
2411