linux/fs/nilfs2/btree.c
<<
>>
Prefs
   1/*
   2 * btree.c - NILFS B-tree.
   3 *
   4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * Written by Koji Sato.
  17 */
  18
  19#include <linux/slab.h>
  20#include <linux/string.h>
  21#include <linux/errno.h>
  22#include <linux/pagevec.h>
  23#include "nilfs.h"
  24#include "page.h"
  25#include "btnode.h"
  26#include "btree.h"
  27#include "alloc.h"
  28#include "dat.h"
  29
  30static void __nilfs_btree_init(struct nilfs_bmap *bmap);
  31
  32static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
  33{
  34        struct nilfs_btree_path *path;
  35        int level = NILFS_BTREE_LEVEL_DATA;
  36
  37        path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
  38        if (path == NULL)
  39                goto out;
  40
  41        for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
  42                path[level].bp_bh = NULL;
  43                path[level].bp_sib_bh = NULL;
  44                path[level].bp_index = 0;
  45                path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
  46                path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
  47                path[level].bp_op = NULL;
  48        }
  49
  50out:
  51        return path;
  52}
  53
  54static void nilfs_btree_free_path(struct nilfs_btree_path *path)
  55{
  56        int level = NILFS_BTREE_LEVEL_DATA;
  57
  58        for (; level < NILFS_BTREE_LEVEL_MAX; level++)
  59                brelse(path[level].bp_bh);
  60
  61        kmem_cache_free(nilfs_btree_path_cache, path);
  62}
  63
  64/*
  65 * B-tree node operations
  66 */
  67static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
  68                                     __u64 ptr, struct buffer_head **bhp)
  69{
  70        struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
  71        struct buffer_head *bh;
  72
  73        bh = nilfs_btnode_create_block(btnc, ptr);
  74        if (!bh)
  75                return -ENOMEM;
  76
  77        set_buffer_nilfs_volatile(bh);
  78        *bhp = bh;
  79        return 0;
  80}
  81
  82static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
  83{
  84        return node->bn_flags;
  85}
  86
  87static void
  88nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
  89{
  90        node->bn_flags = flags;
  91}
  92
  93static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
  94{
  95        return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
  96}
  97
  98static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
  99{
 100        return node->bn_level;
 101}
 102
 103static void
 104nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
 105{
 106        node->bn_level = level;
 107}
 108
 109static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
 110{
 111        return le16_to_cpu(node->bn_nchildren);
 112}
 113
 114static void
 115nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
 116{
 117        node->bn_nchildren = cpu_to_le16(nchildren);
 118}
 119
 120static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
 121{
 122        return i_blocksize(btree->b_inode);
 123}
 124
 125static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
 126{
 127        return btree->b_nchildren_per_block;
 128}
 129
 130static __le64 *
 131nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
 132{
 133        return (__le64 *)((char *)(node + 1) +
 134                          (nilfs_btree_node_root(node) ?
 135                           0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
 136}
 137
 138static __le64 *
 139nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
 140{
 141        return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
 142}
 143
 144static __u64
 145nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
 146{
 147        return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
 148}
 149
 150static void
 151nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
 152{
 153        *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
 154}
 155
 156static __u64
 157nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
 158                         int ncmax)
 159{
 160        return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
 161}
 162
 163static void
 164nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
 165                         int ncmax)
 166{
 167        *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
 168}
 169
 170static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
 171                                  int level, int nchildren, int ncmax,
 172                                  const __u64 *keys, const __u64 *ptrs)
 173{
 174        __le64 *dkeys;
 175        __le64 *dptrs;
 176        int i;
 177
 178        nilfs_btree_node_set_flags(node, flags);
 179        nilfs_btree_node_set_level(node, level);
 180        nilfs_btree_node_set_nchildren(node, nchildren);
 181
 182        dkeys = nilfs_btree_node_dkeys(node);
 183        dptrs = nilfs_btree_node_dptrs(node, ncmax);
 184        for (i = 0; i < nchildren; i++) {
 185                dkeys[i] = cpu_to_le64(keys[i]);
 186                dptrs[i] = cpu_to_le64(ptrs[i]);
 187        }
 188}
 189
 190/* Assume the buffer heads corresponding to left and right are locked. */
 191static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
 192                                       struct nilfs_btree_node *right,
 193                                       int n, int lncmax, int rncmax)
 194{
 195        __le64 *ldkeys, *rdkeys;
 196        __le64 *ldptrs, *rdptrs;
 197        int lnchildren, rnchildren;
 198
 199        ldkeys = nilfs_btree_node_dkeys(left);
 200        ldptrs = nilfs_btree_node_dptrs(left, lncmax);
 201        lnchildren = nilfs_btree_node_get_nchildren(left);
 202
 203        rdkeys = nilfs_btree_node_dkeys(right);
 204        rdptrs = nilfs_btree_node_dptrs(right, rncmax);
 205        rnchildren = nilfs_btree_node_get_nchildren(right);
 206
 207        memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
 208        memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
 209        memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
 210        memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
 211
 212        lnchildren += n;
 213        rnchildren -= n;
 214        nilfs_btree_node_set_nchildren(left, lnchildren);
 215        nilfs_btree_node_set_nchildren(right, rnchildren);
 216}
 217
 218/* Assume that the buffer heads corresponding to left and right are locked. */
 219static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
 220                                        struct nilfs_btree_node *right,
 221                                        int n, int lncmax, int rncmax)
 222{
 223        __le64 *ldkeys, *rdkeys;
 224        __le64 *ldptrs, *rdptrs;
 225        int lnchildren, rnchildren;
 226
 227        ldkeys = nilfs_btree_node_dkeys(left);
 228        ldptrs = nilfs_btree_node_dptrs(left, lncmax);
 229        lnchildren = nilfs_btree_node_get_nchildren(left);
 230
 231        rdkeys = nilfs_btree_node_dkeys(right);
 232        rdptrs = nilfs_btree_node_dptrs(right, rncmax);
 233        rnchildren = nilfs_btree_node_get_nchildren(right);
 234
 235        memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
 236        memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
 237        memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
 238        memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
 239
 240        lnchildren -= n;
 241        rnchildren += n;
 242        nilfs_btree_node_set_nchildren(left, lnchildren);
 243        nilfs_btree_node_set_nchildren(right, rnchildren);
 244}
 245
 246/* Assume that the buffer head corresponding to node is locked. */
 247static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
 248                                    __u64 key, __u64 ptr, int ncmax)
 249{
 250        __le64 *dkeys;
 251        __le64 *dptrs;
 252        int nchildren;
 253
 254        dkeys = nilfs_btree_node_dkeys(node);
 255        dptrs = nilfs_btree_node_dptrs(node, ncmax);
 256        nchildren = nilfs_btree_node_get_nchildren(node);
 257        if (index < nchildren) {
 258                memmove(dkeys + index + 1, dkeys + index,
 259                        (nchildren - index) * sizeof(*dkeys));
 260                memmove(dptrs + index + 1, dptrs + index,
 261                        (nchildren - index) * sizeof(*dptrs));
 262        }
 263        dkeys[index] = cpu_to_le64(key);
 264        dptrs[index] = cpu_to_le64(ptr);
 265        nchildren++;
 266        nilfs_btree_node_set_nchildren(node, nchildren);
 267}
 268
 269/* Assume that the buffer head corresponding to node is locked. */
 270static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
 271                                    __u64 *keyp, __u64 *ptrp, int ncmax)
 272{
 273        __u64 key;
 274        __u64 ptr;
 275        __le64 *dkeys;
 276        __le64 *dptrs;
 277        int nchildren;
 278
 279        dkeys = nilfs_btree_node_dkeys(node);
 280        dptrs = nilfs_btree_node_dptrs(node, ncmax);
 281        key = le64_to_cpu(dkeys[index]);
 282        ptr = le64_to_cpu(dptrs[index]);
 283        nchildren = nilfs_btree_node_get_nchildren(node);
 284        if (keyp != NULL)
 285                *keyp = key;
 286        if (ptrp != NULL)
 287                *ptrp = ptr;
 288
 289        if (index < nchildren - 1) {
 290                memmove(dkeys + index, dkeys + index + 1,
 291                        (nchildren - index - 1) * sizeof(*dkeys));
 292                memmove(dptrs + index, dptrs + index + 1,
 293                        (nchildren - index - 1) * sizeof(*dptrs));
 294        }
 295        nchildren--;
 296        nilfs_btree_node_set_nchildren(node, nchildren);
 297}
 298
 299static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
 300                                   __u64 key, int *indexp)
 301{
 302        __u64 nkey;
 303        int index, low, high, s;
 304
 305        /* binary search */
 306        low = 0;
 307        high = nilfs_btree_node_get_nchildren(node) - 1;
 308        index = 0;
 309        s = 0;
 310        while (low <= high) {
 311                index = (low + high) / 2;
 312                nkey = nilfs_btree_node_get_key(node, index);
 313                if (nkey == key) {
 314                        s = 0;
 315                        goto out;
 316                } else if (nkey < key) {
 317                        low = index + 1;
 318                        s = -1;
 319                } else {
 320                        high = index - 1;
 321                        s = 1;
 322                }
 323        }
 324
 325        /* adjust index */
 326        if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
 327                if (s > 0 && index > 0)
 328                        index--;
 329        } else if (s < 0)
 330                index++;
 331
 332 out:
 333        *indexp = index;
 334
 335        return s == 0;
 336}
 337
 338/**
 339 * nilfs_btree_node_broken - verify consistency of btree node
 340 * @node: btree node block to be examined
 341 * @size: node size (in bytes)
 342 * @inode: host inode of btree
 343 * @blocknr: block number
 344 *
 345 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
 346 */
 347static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
 348                                   size_t size, struct inode *inode,
 349                                   sector_t blocknr)
 350{
 351        int level, flags, nchildren;
 352        int ret = 0;
 353
 354        level = nilfs_btree_node_get_level(node);
 355        flags = nilfs_btree_node_get_flags(node);
 356        nchildren = nilfs_btree_node_get_nchildren(node);
 357
 358        if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
 359                     level >= NILFS_BTREE_LEVEL_MAX ||
 360                     (flags & NILFS_BTREE_NODE_ROOT) ||
 361                     nchildren < 0 ||
 362                     nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
 363                nilfs_msg(inode->i_sb, KERN_CRIT,
 364                          "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d",
 365                          inode->i_ino, (unsigned long long)blocknr, level,
 366                          flags, nchildren);
 367                ret = 1;
 368        }
 369        return ret;
 370}
 371
 372/**
 373 * nilfs_btree_root_broken - verify consistency of btree root node
 374 * @node: btree root node to be examined
 375 * @inode: host inode of btree
 376 *
 377 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
 378 */
 379static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
 380                                   struct inode *inode)
 381{
 382        int level, flags, nchildren;
 383        int ret = 0;
 384
 385        level = nilfs_btree_node_get_level(node);
 386        flags = nilfs_btree_node_get_flags(node);
 387        nchildren = nilfs_btree_node_get_nchildren(node);
 388
 389        if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
 390                     level >= NILFS_BTREE_LEVEL_MAX ||
 391                     nchildren < 0 ||
 392                     nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
 393                nilfs_msg(inode->i_sb, KERN_CRIT,
 394                          "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d",
 395                          inode->i_ino, level, flags, nchildren);
 396                ret = 1;
 397        }
 398        return ret;
 399}
 400
 401int nilfs_btree_broken_node_block(struct buffer_head *bh)
 402{
 403        struct inode *inode;
 404        int ret;
 405
 406        if (buffer_nilfs_checked(bh))
 407                return 0;
 408
 409        inode = bh->b_page->mapping->host;
 410        ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
 411                                      bh->b_size, inode, bh->b_blocknr);
 412        if (likely(!ret))
 413                set_buffer_nilfs_checked(bh);
 414        return ret;
 415}
 416
 417static struct nilfs_btree_node *
 418nilfs_btree_get_root(const struct nilfs_bmap *btree)
 419{
 420        return (struct nilfs_btree_node *)btree->b_u.u_data;
 421}
 422
 423static struct nilfs_btree_node *
 424nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
 425{
 426        return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
 427}
 428
 429static struct nilfs_btree_node *
 430nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
 431{
 432        return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
 433}
 434
 435static int nilfs_btree_height(const struct nilfs_bmap *btree)
 436{
 437        return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
 438}
 439
 440static struct nilfs_btree_node *
 441nilfs_btree_get_node(const struct nilfs_bmap *btree,
 442                     const struct nilfs_btree_path *path,
 443                     int level, int *ncmaxp)
 444{
 445        struct nilfs_btree_node *node;
 446
 447        if (level == nilfs_btree_height(btree) - 1) {
 448                node = nilfs_btree_get_root(btree);
 449                *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
 450        } else {
 451                node = nilfs_btree_get_nonroot_node(path, level);
 452                *ncmaxp = nilfs_btree_nchildren_per_block(btree);
 453        }
 454        return node;
 455}
 456
 457static int nilfs_btree_bad_node(const struct nilfs_bmap *btree,
 458                                struct nilfs_btree_node *node, int level)
 459{
 460        if (unlikely(nilfs_btree_node_get_level(node) != level)) {
 461                dump_stack();
 462                nilfs_msg(btree->b_inode->i_sb, KERN_CRIT,
 463                          "btree level mismatch (ino=%lu): %d != %d",
 464                          btree->b_inode->i_ino,
 465                          nilfs_btree_node_get_level(node), level);
 466                return 1;
 467        }
 468        return 0;
 469}
 470
 471struct nilfs_btree_readahead_info {
 472        struct nilfs_btree_node *node;  /* parent node */
 473        int max_ra_blocks;              /* max nof blocks to read ahead */
 474        int index;                      /* current index on the parent node */
 475        int ncmax;                      /* nof children in the parent node */
 476};
 477
 478static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
 479                                   struct buffer_head **bhp,
 480                                   const struct nilfs_btree_readahead_info *ra)
 481{
 482        struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
 483        struct buffer_head *bh, *ra_bh;
 484        sector_t submit_ptr = 0;
 485        int ret;
 486
 487        ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, 0, &bh,
 488                                        &submit_ptr);
 489        if (ret) {
 490                if (ret != -EEXIST)
 491                        return ret;
 492                goto out_check;
 493        }
 494
 495        if (ra) {
 496                int i, n;
 497                __u64 ptr2;
 498
 499                /* read ahead sibling nodes */
 500                for (n = ra->max_ra_blocks, i = ra->index + 1;
 501                     n > 0 && i < ra->ncmax; n--, i++) {
 502                        ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
 503
 504                        ret = nilfs_btnode_submit_block(btnc, ptr2, 0,
 505                                                        REQ_OP_READ, REQ_RAHEAD,
 506                                                        &ra_bh, &submit_ptr);
 507                        if (likely(!ret || ret == -EEXIST))
 508                                brelse(ra_bh);
 509                        else if (ret != -EBUSY)
 510                                break;
 511                        if (!buffer_locked(bh))
 512                                goto out_no_wait;
 513                }
 514        }
 515
 516        wait_on_buffer(bh);
 517
 518 out_no_wait:
 519        if (!buffer_uptodate(bh)) {
 520                nilfs_msg(btree->b_inode->i_sb, KERN_ERR,
 521                          "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)",
 522                          btree->b_inode->i_ino, (unsigned long long)ptr);
 523                brelse(bh);
 524                return -EIO;
 525        }
 526
 527 out_check:
 528        if (nilfs_btree_broken_node_block(bh)) {
 529                clear_buffer_uptodate(bh);
 530                brelse(bh);
 531                return -EINVAL;
 532        }
 533
 534        *bhp = bh;
 535        return 0;
 536}
 537
 538static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
 539                                   struct buffer_head **bhp)
 540{
 541        return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
 542}
 543
 544static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
 545                                 struct nilfs_btree_path *path,
 546                                 __u64 key, __u64 *ptrp, int minlevel,
 547                                 int readahead)
 548{
 549        struct nilfs_btree_node *node;
 550        struct nilfs_btree_readahead_info p, *ra;
 551        __u64 ptr;
 552        int level, index, found, ncmax, ret;
 553
 554        node = nilfs_btree_get_root(btree);
 555        level = nilfs_btree_node_get_level(node);
 556        if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
 557                return -ENOENT;
 558
 559        found = nilfs_btree_node_lookup(node, key, &index);
 560        ptr = nilfs_btree_node_get_ptr(node, index,
 561                                       NILFS_BTREE_ROOT_NCHILDREN_MAX);
 562        path[level].bp_bh = NULL;
 563        path[level].bp_index = index;
 564
 565        ncmax = nilfs_btree_nchildren_per_block(btree);
 566
 567        while (--level >= minlevel) {
 568                ra = NULL;
 569                if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
 570                        p.node = nilfs_btree_get_node(btree, path, level + 1,
 571                                                      &p.ncmax);
 572                        p.index = index;
 573                        p.max_ra_blocks = 7;
 574                        ra = &p;
 575                }
 576                ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
 577                                              ra);
 578                if (ret < 0)
 579                        return ret;
 580
 581                node = nilfs_btree_get_nonroot_node(path, level);
 582                if (nilfs_btree_bad_node(btree, node, level))
 583                        return -EINVAL;
 584                if (!found)
 585                        found = nilfs_btree_node_lookup(node, key, &index);
 586                else
 587                        index = 0;
 588                if (index < ncmax) {
 589                        ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
 590                } else {
 591                        WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
 592                        /* insert */
 593                        ptr = NILFS_BMAP_INVALID_PTR;
 594                }
 595                path[level].bp_index = index;
 596        }
 597        if (!found)
 598                return -ENOENT;
 599
 600        if (ptrp != NULL)
 601                *ptrp = ptr;
 602
 603        return 0;
 604}
 605
 606static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
 607                                      struct nilfs_btree_path *path,
 608                                      __u64 *keyp, __u64 *ptrp)
 609{
 610        struct nilfs_btree_node *node;
 611        __u64 ptr;
 612        int index, level, ncmax, ret;
 613
 614        node = nilfs_btree_get_root(btree);
 615        index = nilfs_btree_node_get_nchildren(node) - 1;
 616        if (index < 0)
 617                return -ENOENT;
 618        level = nilfs_btree_node_get_level(node);
 619        ptr = nilfs_btree_node_get_ptr(node, index,
 620                                       NILFS_BTREE_ROOT_NCHILDREN_MAX);
 621        path[level].bp_bh = NULL;
 622        path[level].bp_index = index;
 623        ncmax = nilfs_btree_nchildren_per_block(btree);
 624
 625        for (level--; level > 0; level--) {
 626                ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
 627                if (ret < 0)
 628                        return ret;
 629                node = nilfs_btree_get_nonroot_node(path, level);
 630                if (nilfs_btree_bad_node(btree, node, level))
 631                        return -EINVAL;
 632                index = nilfs_btree_node_get_nchildren(node) - 1;
 633                ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
 634                path[level].bp_index = index;
 635        }
 636
 637        if (keyp != NULL)
 638                *keyp = nilfs_btree_node_get_key(node, index);
 639        if (ptrp != NULL)
 640                *ptrp = ptr;
 641
 642        return 0;
 643}
 644
 645/**
 646 * nilfs_btree_get_next_key - get next valid key from btree path array
 647 * @btree: bmap struct of btree
 648 * @path: array of nilfs_btree_path struct
 649 * @minlevel: start level
 650 * @nextkey: place to store the next valid key
 651 *
 652 * Return Value: If a next key was found, 0 is returned. Otherwise,
 653 * -ENOENT is returned.
 654 */
 655static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
 656                                    const struct nilfs_btree_path *path,
 657                                    int minlevel, __u64 *nextkey)
 658{
 659        struct nilfs_btree_node *node;
 660        int maxlevel = nilfs_btree_height(btree) - 1;
 661        int index, next_adj, level;
 662
 663        /* Next index is already set to bp_index for leaf nodes. */
 664        next_adj = 0;
 665        for (level = minlevel; level <= maxlevel; level++) {
 666                if (level == maxlevel)
 667                        node = nilfs_btree_get_root(btree);
 668                else
 669                        node = nilfs_btree_get_nonroot_node(path, level);
 670
 671                index = path[level].bp_index + next_adj;
 672                if (index < nilfs_btree_node_get_nchildren(node)) {
 673                        /* Next key is in this node */
 674                        *nextkey = nilfs_btree_node_get_key(node, index);
 675                        return 0;
 676                }
 677                /* For non-leaf nodes, next index is stored at bp_index + 1. */
 678                next_adj = 1;
 679        }
 680        return -ENOENT;
 681}
 682
 683static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
 684                              __u64 key, int level, __u64 *ptrp)
 685{
 686        struct nilfs_btree_path *path;
 687        int ret;
 688
 689        path = nilfs_btree_alloc_path();
 690        if (path == NULL)
 691                return -ENOMEM;
 692
 693        ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
 694
 695        nilfs_btree_free_path(path);
 696
 697        return ret;
 698}
 699
 700static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
 701                                     __u64 key, __u64 *ptrp,
 702                                     unsigned int maxblocks)
 703{
 704        struct nilfs_btree_path *path;
 705        struct nilfs_btree_node *node;
 706        struct inode *dat = NULL;
 707        __u64 ptr, ptr2;
 708        sector_t blocknr;
 709        int level = NILFS_BTREE_LEVEL_NODE_MIN;
 710        int ret, cnt, index, maxlevel, ncmax;
 711        struct nilfs_btree_readahead_info p;
 712
 713        path = nilfs_btree_alloc_path();
 714        if (path == NULL)
 715                return -ENOMEM;
 716
 717        ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
 718        if (ret < 0)
 719                goto out;
 720
 721        if (NILFS_BMAP_USE_VBN(btree)) {
 722                dat = nilfs_bmap_get_dat(btree);
 723                ret = nilfs_dat_translate(dat, ptr, &blocknr);
 724                if (ret < 0)
 725                        goto out;
 726                ptr = blocknr;
 727        }
 728        cnt = 1;
 729        if (cnt == maxblocks)
 730                goto end;
 731
 732        maxlevel = nilfs_btree_height(btree) - 1;
 733        node = nilfs_btree_get_node(btree, path, level, &ncmax);
 734        index = path[level].bp_index + 1;
 735        for (;;) {
 736                while (index < nilfs_btree_node_get_nchildren(node)) {
 737                        if (nilfs_btree_node_get_key(node, index) !=
 738                            key + cnt)
 739                                goto end;
 740                        ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
 741                        if (dat) {
 742                                ret = nilfs_dat_translate(dat, ptr2, &blocknr);
 743                                if (ret < 0)
 744                                        goto out;
 745                                ptr2 = blocknr;
 746                        }
 747                        if (ptr2 != ptr + cnt || ++cnt == maxblocks)
 748                                goto end;
 749                        index++;
 750                        continue;
 751                }
 752                if (level == maxlevel)
 753                        break;
 754
 755                /* look-up right sibling node */
 756                p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
 757                p.index = path[level + 1].bp_index + 1;
 758                p.max_ra_blocks = 7;
 759                if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
 760                    nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
 761                        break;
 762                ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
 763                path[level + 1].bp_index = p.index;
 764
 765                brelse(path[level].bp_bh);
 766                path[level].bp_bh = NULL;
 767
 768                ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
 769                                              &p);
 770                if (ret < 0)
 771                        goto out;
 772                node = nilfs_btree_get_nonroot_node(path, level);
 773                ncmax = nilfs_btree_nchildren_per_block(btree);
 774                index = 0;
 775                path[level].bp_index = index;
 776        }
 777 end:
 778        *ptrp = ptr;
 779        ret = cnt;
 780 out:
 781        nilfs_btree_free_path(path);
 782        return ret;
 783}
 784
 785static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
 786                                    struct nilfs_btree_path *path,
 787                                    int level, __u64 key)
 788{
 789        if (level < nilfs_btree_height(btree) - 1) {
 790                do {
 791                        nilfs_btree_node_set_key(
 792                                nilfs_btree_get_nonroot_node(path, level),
 793                                path[level].bp_index, key);
 794                        if (!buffer_dirty(path[level].bp_bh))
 795                                mark_buffer_dirty(path[level].bp_bh);
 796                } while ((path[level].bp_index == 0) &&
 797                         (++level < nilfs_btree_height(btree) - 1));
 798        }
 799
 800        /* root */
 801        if (level == nilfs_btree_height(btree) - 1) {
 802                nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
 803                                         path[level].bp_index, key);
 804        }
 805}
 806
 807static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
 808                                  struct nilfs_btree_path *path,
 809                                  int level, __u64 *keyp, __u64 *ptrp)
 810{
 811        struct nilfs_btree_node *node;
 812        int ncblk;
 813
 814        if (level < nilfs_btree_height(btree) - 1) {
 815                node = nilfs_btree_get_nonroot_node(path, level);
 816                ncblk = nilfs_btree_nchildren_per_block(btree);
 817                nilfs_btree_node_insert(node, path[level].bp_index,
 818                                        *keyp, *ptrp, ncblk);
 819                if (!buffer_dirty(path[level].bp_bh))
 820                        mark_buffer_dirty(path[level].bp_bh);
 821
 822                if (path[level].bp_index == 0)
 823                        nilfs_btree_promote_key(btree, path, level + 1,
 824                                                nilfs_btree_node_get_key(node,
 825                                                                         0));
 826        } else {
 827                node = nilfs_btree_get_root(btree);
 828                nilfs_btree_node_insert(node, path[level].bp_index,
 829                                        *keyp, *ptrp,
 830                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
 831        }
 832}
 833
 834static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
 835                                   struct nilfs_btree_path *path,
 836                                   int level, __u64 *keyp, __u64 *ptrp)
 837{
 838        struct nilfs_btree_node *node, *left;
 839        int nchildren, lnchildren, n, move, ncblk;
 840
 841        node = nilfs_btree_get_nonroot_node(path, level);
 842        left = nilfs_btree_get_sib_node(path, level);
 843        nchildren = nilfs_btree_node_get_nchildren(node);
 844        lnchildren = nilfs_btree_node_get_nchildren(left);
 845        ncblk = nilfs_btree_nchildren_per_block(btree);
 846        move = 0;
 847
 848        n = (nchildren + lnchildren + 1) / 2 - lnchildren;
 849        if (n > path[level].bp_index) {
 850                /* move insert point */
 851                n--;
 852                move = 1;
 853        }
 854
 855        nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
 856
 857        if (!buffer_dirty(path[level].bp_bh))
 858                mark_buffer_dirty(path[level].bp_bh);
 859        if (!buffer_dirty(path[level].bp_sib_bh))
 860                mark_buffer_dirty(path[level].bp_sib_bh);
 861
 862        nilfs_btree_promote_key(btree, path, level + 1,
 863                                nilfs_btree_node_get_key(node, 0));
 864
 865        if (move) {
 866                brelse(path[level].bp_bh);
 867                path[level].bp_bh = path[level].bp_sib_bh;
 868                path[level].bp_sib_bh = NULL;
 869                path[level].bp_index += lnchildren;
 870                path[level + 1].bp_index--;
 871        } else {
 872                brelse(path[level].bp_sib_bh);
 873                path[level].bp_sib_bh = NULL;
 874                path[level].bp_index -= n;
 875        }
 876
 877        nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 878}
 879
 880static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
 881                                    struct nilfs_btree_path *path,
 882                                    int level, __u64 *keyp, __u64 *ptrp)
 883{
 884        struct nilfs_btree_node *node, *right;
 885        int nchildren, rnchildren, n, move, ncblk;
 886
 887        node = nilfs_btree_get_nonroot_node(path, level);
 888        right = nilfs_btree_get_sib_node(path, level);
 889        nchildren = nilfs_btree_node_get_nchildren(node);
 890        rnchildren = nilfs_btree_node_get_nchildren(right);
 891        ncblk = nilfs_btree_nchildren_per_block(btree);
 892        move = 0;
 893
 894        n = (nchildren + rnchildren + 1) / 2 - rnchildren;
 895        if (n > nchildren - path[level].bp_index) {
 896                /* move insert point */
 897                n--;
 898                move = 1;
 899        }
 900
 901        nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
 902
 903        if (!buffer_dirty(path[level].bp_bh))
 904                mark_buffer_dirty(path[level].bp_bh);
 905        if (!buffer_dirty(path[level].bp_sib_bh))
 906                mark_buffer_dirty(path[level].bp_sib_bh);
 907
 908        path[level + 1].bp_index++;
 909        nilfs_btree_promote_key(btree, path, level + 1,
 910                                nilfs_btree_node_get_key(right, 0));
 911        path[level + 1].bp_index--;
 912
 913        if (move) {
 914                brelse(path[level].bp_bh);
 915                path[level].bp_bh = path[level].bp_sib_bh;
 916                path[level].bp_sib_bh = NULL;
 917                path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
 918                path[level + 1].bp_index++;
 919        } else {
 920                brelse(path[level].bp_sib_bh);
 921                path[level].bp_sib_bh = NULL;
 922        }
 923
 924        nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 925}
 926
 927static void nilfs_btree_split(struct nilfs_bmap *btree,
 928                              struct nilfs_btree_path *path,
 929                              int level, __u64 *keyp, __u64 *ptrp)
 930{
 931        struct nilfs_btree_node *node, *right;
 932        int nchildren, n, move, ncblk;
 933
 934        node = nilfs_btree_get_nonroot_node(path, level);
 935        right = nilfs_btree_get_sib_node(path, level);
 936        nchildren = nilfs_btree_node_get_nchildren(node);
 937        ncblk = nilfs_btree_nchildren_per_block(btree);
 938        move = 0;
 939
 940        n = (nchildren + 1) / 2;
 941        if (n > nchildren - path[level].bp_index) {
 942                n--;
 943                move = 1;
 944        }
 945
 946        nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
 947
 948        if (!buffer_dirty(path[level].bp_bh))
 949                mark_buffer_dirty(path[level].bp_bh);
 950        if (!buffer_dirty(path[level].bp_sib_bh))
 951                mark_buffer_dirty(path[level].bp_sib_bh);
 952
 953        if (move) {
 954                path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
 955                nilfs_btree_node_insert(right, path[level].bp_index,
 956                                        *keyp, *ptrp, ncblk);
 957
 958                *keyp = nilfs_btree_node_get_key(right, 0);
 959                *ptrp = path[level].bp_newreq.bpr_ptr;
 960
 961                brelse(path[level].bp_bh);
 962                path[level].bp_bh = path[level].bp_sib_bh;
 963                path[level].bp_sib_bh = NULL;
 964        } else {
 965                nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 966
 967                *keyp = nilfs_btree_node_get_key(right, 0);
 968                *ptrp = path[level].bp_newreq.bpr_ptr;
 969
 970                brelse(path[level].bp_sib_bh);
 971                path[level].bp_sib_bh = NULL;
 972        }
 973
 974        path[level + 1].bp_index++;
 975}
 976
 977static void nilfs_btree_grow(struct nilfs_bmap *btree,
 978                             struct nilfs_btree_path *path,
 979                             int level, __u64 *keyp, __u64 *ptrp)
 980{
 981        struct nilfs_btree_node *root, *child;
 982        int n, ncblk;
 983
 984        root = nilfs_btree_get_root(btree);
 985        child = nilfs_btree_get_sib_node(path, level);
 986        ncblk = nilfs_btree_nchildren_per_block(btree);
 987
 988        n = nilfs_btree_node_get_nchildren(root);
 989
 990        nilfs_btree_node_move_right(root, child, n,
 991                                    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
 992        nilfs_btree_node_set_level(root, level + 1);
 993
 994        if (!buffer_dirty(path[level].bp_sib_bh))
 995                mark_buffer_dirty(path[level].bp_sib_bh);
 996
 997        path[level].bp_bh = path[level].bp_sib_bh;
 998        path[level].bp_sib_bh = NULL;
 999
1000        nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
1001
1002        *keyp = nilfs_btree_node_get_key(child, 0);
1003        *ptrp = path[level].bp_newreq.bpr_ptr;
1004}
1005
1006static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
1007                                   const struct nilfs_btree_path *path)
1008{
1009        struct nilfs_btree_node *node;
1010        int level, ncmax;
1011
1012        if (path == NULL)
1013                return NILFS_BMAP_INVALID_PTR;
1014
1015        /* left sibling */
1016        level = NILFS_BTREE_LEVEL_NODE_MIN;
1017        if (path[level].bp_index > 0) {
1018                node = nilfs_btree_get_node(btree, path, level, &ncmax);
1019                return nilfs_btree_node_get_ptr(node,
1020                                                path[level].bp_index - 1,
1021                                                ncmax);
1022        }
1023
1024        /* parent */
1025        level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
1026        if (level <= nilfs_btree_height(btree) - 1) {
1027                node = nilfs_btree_get_node(btree, path, level, &ncmax);
1028                return nilfs_btree_node_get_ptr(node, path[level].bp_index,
1029                                                ncmax);
1030        }
1031
1032        return NILFS_BMAP_INVALID_PTR;
1033}
1034
1035static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
1036                                       const struct nilfs_btree_path *path,
1037                                       __u64 key)
1038{
1039        __u64 ptr;
1040
1041        ptr = nilfs_bmap_find_target_seq(btree, key);
1042        if (ptr != NILFS_BMAP_INVALID_PTR)
1043                /* sequential access */
1044                return ptr;
1045
1046        ptr = nilfs_btree_find_near(btree, path);
1047        if (ptr != NILFS_BMAP_INVALID_PTR)
1048                /* near */
1049                return ptr;
1050
1051        /* block group */
1052        return nilfs_bmap_find_target_in_group(btree);
1053}
1054
1055static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
1056                                      struct nilfs_btree_path *path,
1057                                      int *levelp, __u64 key, __u64 ptr,
1058                                      struct nilfs_bmap_stats *stats)
1059{
1060        struct buffer_head *bh;
1061        struct nilfs_btree_node *node, *parent, *sib;
1062        __u64 sibptr;
1063        int pindex, level, ncmax, ncblk, ret;
1064        struct inode *dat = NULL;
1065
1066        stats->bs_nblocks = 0;
1067        level = NILFS_BTREE_LEVEL_DATA;
1068
1069        /* allocate a new ptr for data block */
1070        if (NILFS_BMAP_USE_VBN(btree)) {
1071                path[level].bp_newreq.bpr_ptr =
1072                        nilfs_btree_find_target_v(btree, path, key);
1073                dat = nilfs_bmap_get_dat(btree);
1074        }
1075
1076        ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1077        if (ret < 0)
1078                goto err_out_data;
1079
1080        ncblk = nilfs_btree_nchildren_per_block(btree);
1081
1082        for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1083             level < nilfs_btree_height(btree) - 1;
1084             level++) {
1085                node = nilfs_btree_get_nonroot_node(path, level);
1086                if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1087                        path[level].bp_op = nilfs_btree_do_insert;
1088                        stats->bs_nblocks++;
1089                        goto out;
1090                }
1091
1092                parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1093                pindex = path[level + 1].bp_index;
1094
1095                /* left sibling */
1096                if (pindex > 0) {
1097                        sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1098                                                          ncmax);
1099                        ret = nilfs_btree_get_block(btree, sibptr, &bh);
1100                        if (ret < 0)
1101                                goto err_out_child_node;
1102                        sib = (struct nilfs_btree_node *)bh->b_data;
1103                        if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1104                                path[level].bp_sib_bh = bh;
1105                                path[level].bp_op = nilfs_btree_carry_left;
1106                                stats->bs_nblocks++;
1107                                goto out;
1108                        } else {
1109                                brelse(bh);
1110                        }
1111                }
1112
1113                /* right sibling */
1114                if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1115                        sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1116                                                          ncmax);
1117                        ret = nilfs_btree_get_block(btree, sibptr, &bh);
1118                        if (ret < 0)
1119                                goto err_out_child_node;
1120                        sib = (struct nilfs_btree_node *)bh->b_data;
1121                        if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1122                                path[level].bp_sib_bh = bh;
1123                                path[level].bp_op = nilfs_btree_carry_right;
1124                                stats->bs_nblocks++;
1125                                goto out;
1126                        } else {
1127                                brelse(bh);
1128                        }
1129                }
1130
1131                /* split */
1132                path[level].bp_newreq.bpr_ptr =
1133                        path[level - 1].bp_newreq.bpr_ptr + 1;
1134                ret = nilfs_bmap_prepare_alloc_ptr(btree,
1135                                                   &path[level].bp_newreq, dat);
1136                if (ret < 0)
1137                        goto err_out_child_node;
1138                ret = nilfs_btree_get_new_block(btree,
1139                                                path[level].bp_newreq.bpr_ptr,
1140                                                &bh);
1141                if (ret < 0)
1142                        goto err_out_curr_node;
1143
1144                stats->bs_nblocks++;
1145
1146                sib = (struct nilfs_btree_node *)bh->b_data;
1147                nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1148                path[level].bp_sib_bh = bh;
1149                path[level].bp_op = nilfs_btree_split;
1150        }
1151
1152        /* root */
1153        node = nilfs_btree_get_root(btree);
1154        if (nilfs_btree_node_get_nchildren(node) <
1155            NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1156                path[level].bp_op = nilfs_btree_do_insert;
1157                stats->bs_nblocks++;
1158                goto out;
1159        }
1160
1161        /* grow */
1162        path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1163        ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1164        if (ret < 0)
1165                goto err_out_child_node;
1166        ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1167                                        &bh);
1168        if (ret < 0)
1169                goto err_out_curr_node;
1170
1171        nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1172                              0, level, 0, ncblk, NULL, NULL);
1173        path[level].bp_sib_bh = bh;
1174        path[level].bp_op = nilfs_btree_grow;
1175
1176        level++;
1177        path[level].bp_op = nilfs_btree_do_insert;
1178
1179        /* a newly-created node block and a data block are added */
1180        stats->bs_nblocks += 2;
1181
1182        /* success */
1183 out:
1184        *levelp = level;
1185        return ret;
1186
1187        /* error */
1188 err_out_curr_node:
1189        nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1190 err_out_child_node:
1191        for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1192                nilfs_btnode_delete(path[level].bp_sib_bh);
1193                nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1194
1195        }
1196
1197        nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1198 err_out_data:
1199        *levelp = level;
1200        stats->bs_nblocks = 0;
1201        return ret;
1202}
1203
1204static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1205                                      struct nilfs_btree_path *path,
1206                                      int maxlevel, __u64 key, __u64 ptr)
1207{
1208        struct inode *dat = NULL;
1209        int level;
1210
1211        set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1212        ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1213        if (NILFS_BMAP_USE_VBN(btree)) {
1214                nilfs_bmap_set_target_v(btree, key, ptr);
1215                dat = nilfs_bmap_get_dat(btree);
1216        }
1217
1218        for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1219                nilfs_bmap_commit_alloc_ptr(btree,
1220                                            &path[level - 1].bp_newreq, dat);
1221                path[level].bp_op(btree, path, level, &key, &ptr);
1222        }
1223
1224        if (!nilfs_bmap_dirty(btree))
1225                nilfs_bmap_set_dirty(btree);
1226}
1227
1228static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1229{
1230        struct nilfs_btree_path *path;
1231        struct nilfs_bmap_stats stats;
1232        int level, ret;
1233
1234        path = nilfs_btree_alloc_path();
1235        if (path == NULL)
1236                return -ENOMEM;
1237
1238        ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1239                                    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1240        if (ret != -ENOENT) {
1241                if (ret == 0)
1242                        ret = -EEXIST;
1243                goto out;
1244        }
1245
1246        ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1247        if (ret < 0)
1248                goto out;
1249        nilfs_btree_commit_insert(btree, path, level, key, ptr);
1250        nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1251
1252 out:
1253        nilfs_btree_free_path(path);
1254        return ret;
1255}
1256
1257static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1258                                  struct nilfs_btree_path *path,
1259                                  int level, __u64 *keyp, __u64 *ptrp)
1260{
1261        struct nilfs_btree_node *node;
1262        int ncblk;
1263
1264        if (level < nilfs_btree_height(btree) - 1) {
1265                node = nilfs_btree_get_nonroot_node(path, level);
1266                ncblk = nilfs_btree_nchildren_per_block(btree);
1267                nilfs_btree_node_delete(node, path[level].bp_index,
1268                                        keyp, ptrp, ncblk);
1269                if (!buffer_dirty(path[level].bp_bh))
1270                        mark_buffer_dirty(path[level].bp_bh);
1271                if (path[level].bp_index == 0)
1272                        nilfs_btree_promote_key(btree, path, level + 1,
1273                                nilfs_btree_node_get_key(node, 0));
1274        } else {
1275                node = nilfs_btree_get_root(btree);
1276                nilfs_btree_node_delete(node, path[level].bp_index,
1277                                        keyp, ptrp,
1278                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
1279        }
1280}
1281
1282static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1283                                    struct nilfs_btree_path *path,
1284                                    int level, __u64 *keyp, __u64 *ptrp)
1285{
1286        struct nilfs_btree_node *node, *left;
1287        int nchildren, lnchildren, n, ncblk;
1288
1289        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1290
1291        node = nilfs_btree_get_nonroot_node(path, level);
1292        left = nilfs_btree_get_sib_node(path, level);
1293        nchildren = nilfs_btree_node_get_nchildren(node);
1294        lnchildren = nilfs_btree_node_get_nchildren(left);
1295        ncblk = nilfs_btree_nchildren_per_block(btree);
1296
1297        n = (nchildren + lnchildren) / 2 - nchildren;
1298
1299        nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1300
1301        if (!buffer_dirty(path[level].bp_bh))
1302                mark_buffer_dirty(path[level].bp_bh);
1303        if (!buffer_dirty(path[level].bp_sib_bh))
1304                mark_buffer_dirty(path[level].bp_sib_bh);
1305
1306        nilfs_btree_promote_key(btree, path, level + 1,
1307                                nilfs_btree_node_get_key(node, 0));
1308
1309        brelse(path[level].bp_sib_bh);
1310        path[level].bp_sib_bh = NULL;
1311        path[level].bp_index += n;
1312}
1313
1314static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1315                                     struct nilfs_btree_path *path,
1316                                     int level, __u64 *keyp, __u64 *ptrp)
1317{
1318        struct nilfs_btree_node *node, *right;
1319        int nchildren, rnchildren, n, ncblk;
1320
1321        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1322
1323        node = nilfs_btree_get_nonroot_node(path, level);
1324        right = nilfs_btree_get_sib_node(path, level);
1325        nchildren = nilfs_btree_node_get_nchildren(node);
1326        rnchildren = nilfs_btree_node_get_nchildren(right);
1327        ncblk = nilfs_btree_nchildren_per_block(btree);
1328
1329        n = (nchildren + rnchildren) / 2 - nchildren;
1330
1331        nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1332
1333        if (!buffer_dirty(path[level].bp_bh))
1334                mark_buffer_dirty(path[level].bp_bh);
1335        if (!buffer_dirty(path[level].bp_sib_bh))
1336                mark_buffer_dirty(path[level].bp_sib_bh);
1337
1338        path[level + 1].bp_index++;
1339        nilfs_btree_promote_key(btree, path, level + 1,
1340                                nilfs_btree_node_get_key(right, 0));
1341        path[level + 1].bp_index--;
1342
1343        brelse(path[level].bp_sib_bh);
1344        path[level].bp_sib_bh = NULL;
1345}
1346
1347static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1348                                    struct nilfs_btree_path *path,
1349                                    int level, __u64 *keyp, __u64 *ptrp)
1350{
1351        struct nilfs_btree_node *node, *left;
1352        int n, ncblk;
1353
1354        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1355
1356        node = nilfs_btree_get_nonroot_node(path, level);
1357        left = nilfs_btree_get_sib_node(path, level);
1358        ncblk = nilfs_btree_nchildren_per_block(btree);
1359
1360        n = nilfs_btree_node_get_nchildren(node);
1361
1362        nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1363
1364        if (!buffer_dirty(path[level].bp_sib_bh))
1365                mark_buffer_dirty(path[level].bp_sib_bh);
1366
1367        nilfs_btnode_delete(path[level].bp_bh);
1368        path[level].bp_bh = path[level].bp_sib_bh;
1369        path[level].bp_sib_bh = NULL;
1370        path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1371}
1372
1373static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1374                                     struct nilfs_btree_path *path,
1375                                     int level, __u64 *keyp, __u64 *ptrp)
1376{
1377        struct nilfs_btree_node *node, *right;
1378        int n, ncblk;
1379
1380        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1381
1382        node = nilfs_btree_get_nonroot_node(path, level);
1383        right = nilfs_btree_get_sib_node(path, level);
1384        ncblk = nilfs_btree_nchildren_per_block(btree);
1385
1386        n = nilfs_btree_node_get_nchildren(right);
1387
1388        nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1389
1390        if (!buffer_dirty(path[level].bp_bh))
1391                mark_buffer_dirty(path[level].bp_bh);
1392
1393        nilfs_btnode_delete(path[level].bp_sib_bh);
1394        path[level].bp_sib_bh = NULL;
1395        path[level + 1].bp_index++;
1396}
1397
1398static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1399                               struct nilfs_btree_path *path,
1400                               int level, __u64 *keyp, __u64 *ptrp)
1401{
1402        struct nilfs_btree_node *root, *child;
1403        int n, ncblk;
1404
1405        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1406
1407        root = nilfs_btree_get_root(btree);
1408        child = nilfs_btree_get_nonroot_node(path, level);
1409        ncblk = nilfs_btree_nchildren_per_block(btree);
1410
1411        nilfs_btree_node_delete(root, 0, NULL, NULL,
1412                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1413        nilfs_btree_node_set_level(root, level);
1414        n = nilfs_btree_node_get_nchildren(child);
1415        nilfs_btree_node_move_left(root, child, n,
1416                                   NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1417
1418        nilfs_btnode_delete(path[level].bp_bh);
1419        path[level].bp_bh = NULL;
1420}
1421
1422static void nilfs_btree_nop(struct nilfs_bmap *btree,
1423                            struct nilfs_btree_path *path,
1424                            int level, __u64 *keyp, __u64 *ptrp)
1425{
1426}
1427
1428static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1429                                      struct nilfs_btree_path *path,
1430                                      int *levelp,
1431                                      struct nilfs_bmap_stats *stats,
1432                                      struct inode *dat)
1433{
1434        struct buffer_head *bh;
1435        struct nilfs_btree_node *node, *parent, *sib;
1436        __u64 sibptr;
1437        int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1438
1439        ret = 0;
1440        stats->bs_nblocks = 0;
1441        ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1442        ncblk = nilfs_btree_nchildren_per_block(btree);
1443
1444        for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1445             level < nilfs_btree_height(btree) - 1;
1446             level++) {
1447                node = nilfs_btree_get_nonroot_node(path, level);
1448                path[level].bp_oldreq.bpr_ptr =
1449                        nilfs_btree_node_get_ptr(node, dindex, ncblk);
1450                ret = nilfs_bmap_prepare_end_ptr(btree,
1451                                                 &path[level].bp_oldreq, dat);
1452                if (ret < 0)
1453                        goto err_out_child_node;
1454
1455                if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1456                        path[level].bp_op = nilfs_btree_do_delete;
1457                        stats->bs_nblocks++;
1458                        goto out;
1459                }
1460
1461                parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1462                pindex = path[level + 1].bp_index;
1463                dindex = pindex;
1464
1465                if (pindex > 0) {
1466                        /* left sibling */
1467                        sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1468                                                          ncmax);
1469                        ret = nilfs_btree_get_block(btree, sibptr, &bh);
1470                        if (ret < 0)
1471                                goto err_out_curr_node;
1472                        sib = (struct nilfs_btree_node *)bh->b_data;
1473                        if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1474                                path[level].bp_sib_bh = bh;
1475                                path[level].bp_op = nilfs_btree_borrow_left;
1476                                stats->bs_nblocks++;
1477                                goto out;
1478                        } else {
1479                                path[level].bp_sib_bh = bh;
1480                                path[level].bp_op = nilfs_btree_concat_left;
1481                                stats->bs_nblocks++;
1482                                /* continue; */
1483                        }
1484                } else if (pindex <
1485                           nilfs_btree_node_get_nchildren(parent) - 1) {
1486                        /* right sibling */
1487                        sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1488                                                          ncmax);
1489                        ret = nilfs_btree_get_block(btree, sibptr, &bh);
1490                        if (ret < 0)
1491                                goto err_out_curr_node;
1492                        sib = (struct nilfs_btree_node *)bh->b_data;
1493                        if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1494                                path[level].bp_sib_bh = bh;
1495                                path[level].bp_op = nilfs_btree_borrow_right;
1496                                stats->bs_nblocks++;
1497                                goto out;
1498                        } else {
1499                                path[level].bp_sib_bh = bh;
1500                                path[level].bp_op = nilfs_btree_concat_right;
1501                                stats->bs_nblocks++;
1502                                /*
1503                                 * When merging right sibling node
1504                                 * into the current node, pointer to
1505                                 * the right sibling node must be
1506                                 * terminated instead.  The adjustment
1507                                 * below is required for that.
1508                                 */
1509                                dindex = pindex + 1;
1510                                /* continue; */
1511                        }
1512                } else {
1513                        /* no siblings */
1514                        /* the only child of the root node */
1515                        WARN_ON(level != nilfs_btree_height(btree) - 2);
1516                        if (nilfs_btree_node_get_nchildren(node) - 1 <=
1517                            NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1518                                path[level].bp_op = nilfs_btree_shrink;
1519                                stats->bs_nblocks += 2;
1520                                level++;
1521                                path[level].bp_op = nilfs_btree_nop;
1522                                goto shrink_root_child;
1523                        } else {
1524                                path[level].bp_op = nilfs_btree_do_delete;
1525                                stats->bs_nblocks++;
1526                                goto out;
1527                        }
1528                }
1529        }
1530
1531        /* child of the root node is deleted */
1532        path[level].bp_op = nilfs_btree_do_delete;
1533        stats->bs_nblocks++;
1534
1535shrink_root_child:
1536        node = nilfs_btree_get_root(btree);
1537        path[level].bp_oldreq.bpr_ptr =
1538                nilfs_btree_node_get_ptr(node, dindex,
1539                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1540
1541        ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1542        if (ret < 0)
1543                goto err_out_child_node;
1544
1545        /* success */
1546 out:
1547        *levelp = level;
1548        return ret;
1549
1550        /* error */
1551 err_out_curr_node:
1552        nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1553 err_out_child_node:
1554        for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1555                brelse(path[level].bp_sib_bh);
1556                nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1557        }
1558        *levelp = level;
1559        stats->bs_nblocks = 0;
1560        return ret;
1561}
1562
1563static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1564                                      struct nilfs_btree_path *path,
1565                                      int maxlevel, struct inode *dat)
1566{
1567        int level;
1568
1569        for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1570                nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1571                path[level].bp_op(btree, path, level, NULL, NULL);
1572        }
1573
1574        if (!nilfs_bmap_dirty(btree))
1575                nilfs_bmap_set_dirty(btree);
1576}
1577
1578static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1579
1580{
1581        struct nilfs_btree_path *path;
1582        struct nilfs_bmap_stats stats;
1583        struct inode *dat;
1584        int level, ret;
1585
1586        path = nilfs_btree_alloc_path();
1587        if (path == NULL)
1588                return -ENOMEM;
1589
1590        ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1591                                    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1592        if (ret < 0)
1593                goto out;
1594
1595
1596        dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1597
1598        ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1599        if (ret < 0)
1600                goto out;
1601        nilfs_btree_commit_delete(btree, path, level, dat);
1602        nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1603
1604out:
1605        nilfs_btree_free_path(path);
1606        return ret;
1607}
1608
1609static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
1610                                __u64 *keyp)
1611{
1612        struct nilfs_btree_path *path;
1613        const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
1614        int ret;
1615
1616        path = nilfs_btree_alloc_path();
1617        if (!path)
1618                return -ENOMEM;
1619
1620        ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
1621        if (!ret)
1622                *keyp = start;
1623        else if (ret == -ENOENT)
1624                ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
1625
1626        nilfs_btree_free_path(path);
1627        return ret;
1628}
1629
1630static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1631{
1632        struct nilfs_btree_path *path;
1633        int ret;
1634
1635        path = nilfs_btree_alloc_path();
1636        if (path == NULL)
1637                return -ENOMEM;
1638
1639        ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1640
1641        nilfs_btree_free_path(path);
1642
1643        return ret;
1644}
1645
1646static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1647{
1648        struct buffer_head *bh;
1649        struct nilfs_btree_node *root, *node;
1650        __u64 maxkey, nextmaxkey;
1651        __u64 ptr;
1652        int nchildren, ret;
1653
1654        root = nilfs_btree_get_root(btree);
1655        switch (nilfs_btree_height(btree)) {
1656        case 2:
1657                bh = NULL;
1658                node = root;
1659                break;
1660        case 3:
1661                nchildren = nilfs_btree_node_get_nchildren(root);
1662                if (nchildren > 1)
1663                        return 0;
1664                ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1665                                               NILFS_BTREE_ROOT_NCHILDREN_MAX);
1666                ret = nilfs_btree_get_block(btree, ptr, &bh);
1667                if (ret < 0)
1668                        return ret;
1669                node = (struct nilfs_btree_node *)bh->b_data;
1670                break;
1671        default:
1672                return 0;
1673        }
1674
1675        nchildren = nilfs_btree_node_get_nchildren(node);
1676        maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1677        nextmaxkey = (nchildren > 1) ?
1678                nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1679        if (bh != NULL)
1680                brelse(bh);
1681
1682        return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1683}
1684
1685static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1686                                   __u64 *keys, __u64 *ptrs, int nitems)
1687{
1688        struct buffer_head *bh;
1689        struct nilfs_btree_node *node, *root;
1690        __le64 *dkeys;
1691        __le64 *dptrs;
1692        __u64 ptr;
1693        int nchildren, ncmax, i, ret;
1694
1695        root = nilfs_btree_get_root(btree);
1696        switch (nilfs_btree_height(btree)) {
1697        case 2:
1698                bh = NULL;
1699                node = root;
1700                ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1701                break;
1702        case 3:
1703                nchildren = nilfs_btree_node_get_nchildren(root);
1704                WARN_ON(nchildren > 1);
1705                ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1706                                               NILFS_BTREE_ROOT_NCHILDREN_MAX);
1707                ret = nilfs_btree_get_block(btree, ptr, &bh);
1708                if (ret < 0)
1709                        return ret;
1710                node = (struct nilfs_btree_node *)bh->b_data;
1711                ncmax = nilfs_btree_nchildren_per_block(btree);
1712                break;
1713        default:
1714                node = NULL;
1715                return -EINVAL;
1716        }
1717
1718        nchildren = nilfs_btree_node_get_nchildren(node);
1719        if (nchildren < nitems)
1720                nitems = nchildren;
1721        dkeys = nilfs_btree_node_dkeys(node);
1722        dptrs = nilfs_btree_node_dptrs(node, ncmax);
1723        for (i = 0; i < nitems; i++) {
1724                keys[i] = le64_to_cpu(dkeys[i]);
1725                ptrs[i] = le64_to_cpu(dptrs[i]);
1726        }
1727
1728        if (bh != NULL)
1729                brelse(bh);
1730
1731        return nitems;
1732}
1733
1734static int
1735nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1736                                       union nilfs_bmap_ptr_req *dreq,
1737                                       union nilfs_bmap_ptr_req *nreq,
1738                                       struct buffer_head **bhp,
1739                                       struct nilfs_bmap_stats *stats)
1740{
1741        struct buffer_head *bh;
1742        struct inode *dat = NULL;
1743        int ret;
1744
1745        stats->bs_nblocks = 0;
1746
1747        /* for data */
1748        /* cannot find near ptr */
1749        if (NILFS_BMAP_USE_VBN(btree)) {
1750                dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1751                dat = nilfs_bmap_get_dat(btree);
1752        }
1753
1754        ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1755        if (ret < 0)
1756                return ret;
1757
1758        *bhp = NULL;
1759        stats->bs_nblocks++;
1760        if (nreq != NULL) {
1761                nreq->bpr_ptr = dreq->bpr_ptr + 1;
1762                ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1763                if (ret < 0)
1764                        goto err_out_dreq;
1765
1766                ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1767                if (ret < 0)
1768                        goto err_out_nreq;
1769
1770                *bhp = bh;
1771                stats->bs_nblocks++;
1772        }
1773
1774        /* success */
1775        return 0;
1776
1777        /* error */
1778 err_out_nreq:
1779        nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1780 err_out_dreq:
1781        nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1782        stats->bs_nblocks = 0;
1783        return ret;
1784
1785}
1786
1787static void
1788nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1789                                      __u64 key, __u64 ptr,
1790                                      const __u64 *keys, const __u64 *ptrs,
1791                                      int n,
1792                                      union nilfs_bmap_ptr_req *dreq,
1793                                      union nilfs_bmap_ptr_req *nreq,
1794                                      struct buffer_head *bh)
1795{
1796        struct nilfs_btree_node *node;
1797        struct inode *dat;
1798        __u64 tmpptr;
1799        int ncblk;
1800
1801        /* free resources */
1802        if (btree->b_ops->bop_clear != NULL)
1803                btree->b_ops->bop_clear(btree);
1804
1805        /* ptr must be a pointer to a buffer head. */
1806        set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1807
1808        /* convert and insert */
1809        dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1810        __nilfs_btree_init(btree);
1811        if (nreq != NULL) {
1812                nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1813                nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1814
1815                /* create child node at level 1 */
1816                node = (struct nilfs_btree_node *)bh->b_data;
1817                ncblk = nilfs_btree_nchildren_per_block(btree);
1818                nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1819                nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1820                if (!buffer_dirty(bh))
1821                        mark_buffer_dirty(bh);
1822                if (!nilfs_bmap_dirty(btree))
1823                        nilfs_bmap_set_dirty(btree);
1824
1825                brelse(bh);
1826
1827                /* create root node at level 2 */
1828                node = nilfs_btree_get_root(btree);
1829                tmpptr = nreq->bpr_ptr;
1830                nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1831                                      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1832                                      &keys[0], &tmpptr);
1833        } else {
1834                nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1835
1836                /* create root node at level 1 */
1837                node = nilfs_btree_get_root(btree);
1838                nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1839                                      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1840                                      keys, ptrs);
1841                nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1842                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
1843                if (!nilfs_bmap_dirty(btree))
1844                        nilfs_bmap_set_dirty(btree);
1845        }
1846
1847        if (NILFS_BMAP_USE_VBN(btree))
1848                nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1849}
1850
1851/**
1852 * nilfs_btree_convert_and_insert -
1853 * @bmap:
1854 * @key:
1855 * @ptr:
1856 * @keys:
1857 * @ptrs:
1858 * @n:
1859 */
1860int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1861                                   __u64 key, __u64 ptr,
1862                                   const __u64 *keys, const __u64 *ptrs, int n)
1863{
1864        struct buffer_head *bh = NULL;
1865        union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1866        struct nilfs_bmap_stats stats;
1867        int ret;
1868
1869        if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1870                di = &dreq;
1871                ni = NULL;
1872        } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1873                           nilfs_btree_node_size(btree))) {
1874                di = &dreq;
1875                ni = &nreq;
1876        } else {
1877                di = NULL;
1878                ni = NULL;
1879                BUG();
1880        }
1881
1882        ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1883                                                     &stats);
1884        if (ret < 0)
1885                return ret;
1886        nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1887                                              di, ni, bh);
1888        nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1889        return 0;
1890}
1891
1892static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1893                                   struct nilfs_btree_path *path,
1894                                   int level,
1895                                   struct buffer_head *bh)
1896{
1897        while ((++level < nilfs_btree_height(btree) - 1) &&
1898               !buffer_dirty(path[level].bp_bh))
1899                mark_buffer_dirty(path[level].bp_bh);
1900
1901        return 0;
1902}
1903
1904static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1905                                        struct nilfs_btree_path *path,
1906                                        int level, struct inode *dat)
1907{
1908        struct nilfs_btree_node *parent;
1909        int ncmax, ret;
1910
1911        parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1912        path[level].bp_oldreq.bpr_ptr =
1913                nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1914                                         ncmax);
1915        path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1916        ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1917                                       &path[level].bp_newreq.bpr_req);
1918        if (ret < 0)
1919                return ret;
1920
1921        if (buffer_nilfs_node(path[level].bp_bh)) {
1922                path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1923                path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1924                path[level].bp_ctxt.bh = path[level].bp_bh;
1925                ret = nilfs_btnode_prepare_change_key(
1926                        &NILFS_BMAP_I(btree)->i_btnode_cache,
1927                        &path[level].bp_ctxt);
1928                if (ret < 0) {
1929                        nilfs_dat_abort_update(dat,
1930                                               &path[level].bp_oldreq.bpr_req,
1931                                               &path[level].bp_newreq.bpr_req);
1932                        return ret;
1933                }
1934        }
1935
1936        return 0;
1937}
1938
1939static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1940                                        struct nilfs_btree_path *path,
1941                                        int level, struct inode *dat)
1942{
1943        struct nilfs_btree_node *parent;
1944        int ncmax;
1945
1946        nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1947                                &path[level].bp_newreq.bpr_req,
1948                                btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1949
1950        if (buffer_nilfs_node(path[level].bp_bh)) {
1951                nilfs_btnode_commit_change_key(
1952                        &NILFS_BMAP_I(btree)->i_btnode_cache,
1953                        &path[level].bp_ctxt);
1954                path[level].bp_bh = path[level].bp_ctxt.bh;
1955        }
1956        set_buffer_nilfs_volatile(path[level].bp_bh);
1957
1958        parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1959        nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1960                                 path[level].bp_newreq.bpr_ptr, ncmax);
1961}
1962
1963static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1964                                       struct nilfs_btree_path *path,
1965                                       int level, struct inode *dat)
1966{
1967        nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1968                               &path[level].bp_newreq.bpr_req);
1969        if (buffer_nilfs_node(path[level].bp_bh))
1970                nilfs_btnode_abort_change_key(
1971                        &NILFS_BMAP_I(btree)->i_btnode_cache,
1972                        &path[level].bp_ctxt);
1973}
1974
1975static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1976                                           struct nilfs_btree_path *path,
1977                                           int minlevel, int *maxlevelp,
1978                                           struct inode *dat)
1979{
1980        int level, ret;
1981
1982        level = minlevel;
1983        if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1984                ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1985                if (ret < 0)
1986                        return ret;
1987        }
1988        while ((++level < nilfs_btree_height(btree) - 1) &&
1989               !buffer_dirty(path[level].bp_bh)) {
1990
1991                WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1992                ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1993                if (ret < 0)
1994                        goto out;
1995        }
1996
1997        /* success */
1998        *maxlevelp = level - 1;
1999        return 0;
2000
2001        /* error */
2002 out:
2003        while (--level > minlevel)
2004                nilfs_btree_abort_update_v(btree, path, level, dat);
2005        if (!buffer_nilfs_volatile(path[level].bp_bh))
2006                nilfs_btree_abort_update_v(btree, path, level, dat);
2007        return ret;
2008}
2009
2010static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
2011                                           struct nilfs_btree_path *path,
2012                                           int minlevel, int maxlevel,
2013                                           struct buffer_head *bh,
2014                                           struct inode *dat)
2015{
2016        int level;
2017
2018        if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
2019                nilfs_btree_commit_update_v(btree, path, minlevel, dat);
2020
2021        for (level = minlevel + 1; level <= maxlevel; level++)
2022                nilfs_btree_commit_update_v(btree, path, level, dat);
2023}
2024
2025static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
2026                                   struct nilfs_btree_path *path,
2027                                   int level, struct buffer_head *bh)
2028{
2029        int maxlevel = 0, ret;
2030        struct nilfs_btree_node *parent;
2031        struct inode *dat = nilfs_bmap_get_dat(btree);
2032        __u64 ptr;
2033        int ncmax;
2034
2035        get_bh(bh);
2036        path[level].bp_bh = bh;
2037        ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
2038                                              dat);
2039        if (ret < 0)
2040                goto out;
2041
2042        if (buffer_nilfs_volatile(path[level].bp_bh)) {
2043                parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2044                ptr = nilfs_btree_node_get_ptr(parent,
2045                                               path[level + 1].bp_index,
2046                                               ncmax);
2047                ret = nilfs_dat_mark_dirty(dat, ptr);
2048                if (ret < 0)
2049                        goto out;
2050        }
2051
2052        nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
2053
2054 out:
2055        brelse(path[level].bp_bh);
2056        path[level].bp_bh = NULL;
2057        return ret;
2058}
2059
2060static int nilfs_btree_propagate(struct nilfs_bmap *btree,
2061                                 struct buffer_head *bh)
2062{
2063        struct nilfs_btree_path *path;
2064        struct nilfs_btree_node *node;
2065        __u64 key;
2066        int level, ret;
2067
2068        WARN_ON(!buffer_dirty(bh));
2069
2070        path = nilfs_btree_alloc_path();
2071        if (path == NULL)
2072                return -ENOMEM;
2073
2074        if (buffer_nilfs_node(bh)) {
2075                node = (struct nilfs_btree_node *)bh->b_data;
2076                key = nilfs_btree_node_get_key(node, 0);
2077                level = nilfs_btree_node_get_level(node);
2078        } else {
2079                key = nilfs_bmap_data_get_key(btree, bh);
2080                level = NILFS_BTREE_LEVEL_DATA;
2081        }
2082
2083        ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2084        if (ret < 0) {
2085                if (unlikely(ret == -ENOENT))
2086                        nilfs_msg(btree->b_inode->i_sb, KERN_CRIT,
2087                                  "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
2088                                  btree->b_inode->i_ino,
2089                                  (unsigned long long)key, level);
2090                goto out;
2091        }
2092
2093        ret = NILFS_BMAP_USE_VBN(btree) ?
2094                nilfs_btree_propagate_v(btree, path, level, bh) :
2095                nilfs_btree_propagate_p(btree, path, level, bh);
2096
2097 out:
2098        nilfs_btree_free_path(path);
2099
2100        return ret;
2101}
2102
2103static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2104                                    struct buffer_head *bh)
2105{
2106        return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2107}
2108
2109static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2110                                         struct list_head *lists,
2111                                         struct buffer_head *bh)
2112{
2113        struct list_head *head;
2114        struct buffer_head *cbh;
2115        struct nilfs_btree_node *node, *cnode;
2116        __u64 key, ckey;
2117        int level;
2118
2119        get_bh(bh);
2120        node = (struct nilfs_btree_node *)bh->b_data;
2121        key = nilfs_btree_node_get_key(node, 0);
2122        level = nilfs_btree_node_get_level(node);
2123        if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2124            level >= NILFS_BTREE_LEVEL_MAX) {
2125                dump_stack();
2126                nilfs_msg(btree->b_inode->i_sb, KERN_WARNING,
2127                          "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)",
2128                          level, (unsigned long long)key,
2129                          btree->b_inode->i_ino,
2130                          (unsigned long long)bh->b_blocknr);
2131                return;
2132        }
2133
2134        list_for_each(head, &lists[level]) {
2135                cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2136                cnode = (struct nilfs_btree_node *)cbh->b_data;
2137                ckey = nilfs_btree_node_get_key(cnode, 0);
2138                if (key < ckey)
2139                        break;
2140        }
2141        list_add_tail(&bh->b_assoc_buffers, head);
2142}
2143
2144static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2145                                             struct list_head *listp)
2146{
2147        struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache;
2148        struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2149        struct pagevec pvec;
2150        struct buffer_head *bh, *head;
2151        pgoff_t index = 0;
2152        int level, i;
2153
2154        for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2155             level < NILFS_BTREE_LEVEL_MAX;
2156             level++)
2157                INIT_LIST_HEAD(&lists[level]);
2158
2159        pagevec_init(&pvec, 0);
2160
2161        while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2162                                  PAGEVEC_SIZE)) {
2163                for (i = 0; i < pagevec_count(&pvec); i++) {
2164                        bh = head = page_buffers(pvec.pages[i]);
2165                        do {
2166                                if (buffer_dirty(bh))
2167                                        nilfs_btree_add_dirty_buffer(btree,
2168                                                                     lists, bh);
2169                        } while ((bh = bh->b_this_page) != head);
2170                }
2171                pagevec_release(&pvec);
2172                cond_resched();
2173        }
2174
2175        for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2176             level < NILFS_BTREE_LEVEL_MAX;
2177             level++)
2178                list_splice_tail(&lists[level], listp);
2179}
2180
2181static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2182                                struct nilfs_btree_path *path,
2183                                int level,
2184                                struct buffer_head **bh,
2185                                sector_t blocknr,
2186                                union nilfs_binfo *binfo)
2187{
2188        struct nilfs_btree_node *parent;
2189        __u64 key;
2190        __u64 ptr;
2191        int ncmax, ret;
2192
2193        parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2194        ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2195                                       ncmax);
2196        if (buffer_nilfs_node(*bh)) {
2197                path[level].bp_ctxt.oldkey = ptr;
2198                path[level].bp_ctxt.newkey = blocknr;
2199                path[level].bp_ctxt.bh = *bh;
2200                ret = nilfs_btnode_prepare_change_key(
2201                        &NILFS_BMAP_I(btree)->i_btnode_cache,
2202                        &path[level].bp_ctxt);
2203                if (ret < 0)
2204                        return ret;
2205                nilfs_btnode_commit_change_key(
2206                        &NILFS_BMAP_I(btree)->i_btnode_cache,
2207                        &path[level].bp_ctxt);
2208                *bh = path[level].bp_ctxt.bh;
2209        }
2210
2211        nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2212                                 ncmax);
2213
2214        key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2215        /* on-disk format */
2216        binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2217        binfo->bi_dat.bi_level = level;
2218
2219        return 0;
2220}
2221
2222static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2223                                struct nilfs_btree_path *path,
2224                                int level,
2225                                struct buffer_head **bh,
2226                                sector_t blocknr,
2227                                union nilfs_binfo *binfo)
2228{
2229        struct nilfs_btree_node *parent;
2230        struct inode *dat = nilfs_bmap_get_dat(btree);
2231        __u64 key;
2232        __u64 ptr;
2233        union nilfs_bmap_ptr_req req;
2234        int ncmax, ret;
2235
2236        parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2237        ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2238                                       ncmax);
2239        req.bpr_ptr = ptr;
2240        ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2241        if (ret < 0)
2242                return ret;
2243        nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2244
2245        key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2246        /* on-disk format */
2247        binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2248        binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2249
2250        return 0;
2251}
2252
2253static int nilfs_btree_assign(struct nilfs_bmap *btree,
2254                              struct buffer_head **bh,
2255                              sector_t blocknr,
2256                              union nilfs_binfo *binfo)
2257{
2258        struct nilfs_btree_path *path;
2259        struct nilfs_btree_node *node;
2260        __u64 key;
2261        int level, ret;
2262
2263        path = nilfs_btree_alloc_path();
2264        if (path == NULL)
2265                return -ENOMEM;
2266
2267        if (buffer_nilfs_node(*bh)) {
2268                node = (struct nilfs_btree_node *)(*bh)->b_data;
2269                key = nilfs_btree_node_get_key(node, 0);
2270                level = nilfs_btree_node_get_level(node);
2271        } else {
2272                key = nilfs_bmap_data_get_key(btree, *bh);
2273                level = NILFS_BTREE_LEVEL_DATA;
2274        }
2275
2276        ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2277        if (ret < 0) {
2278                WARN_ON(ret == -ENOENT);
2279                goto out;
2280        }
2281
2282        ret = NILFS_BMAP_USE_VBN(btree) ?
2283                nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2284                nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2285
2286 out:
2287        nilfs_btree_free_path(path);
2288
2289        return ret;
2290}
2291
2292static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2293                                 struct buffer_head **bh,
2294                                 sector_t blocknr,
2295                                 union nilfs_binfo *binfo)
2296{
2297        struct nilfs_btree_node *node;
2298        __u64 key;
2299        int ret;
2300
2301        ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2302                             blocknr);
2303        if (ret < 0)
2304                return ret;
2305
2306        if (buffer_nilfs_node(*bh)) {
2307                node = (struct nilfs_btree_node *)(*bh)->b_data;
2308                key = nilfs_btree_node_get_key(node, 0);
2309        } else
2310                key = nilfs_bmap_data_get_key(btree, *bh);
2311
2312        /* on-disk format */
2313        binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2314        binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2315
2316        return 0;
2317}
2318
2319static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2320{
2321        struct buffer_head *bh;
2322        struct nilfs_btree_path *path;
2323        __u64 ptr;
2324        int ret;
2325
2326        path = nilfs_btree_alloc_path();
2327        if (path == NULL)
2328                return -ENOMEM;
2329
2330        ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2331        if (ret < 0) {
2332                WARN_ON(ret == -ENOENT);
2333                goto out;
2334        }
2335        ret = nilfs_btree_get_block(btree, ptr, &bh);
2336        if (ret < 0) {
2337                WARN_ON(ret == -ENOENT);
2338                goto out;
2339        }
2340
2341        if (!buffer_dirty(bh))
2342                mark_buffer_dirty(bh);
2343        brelse(bh);
2344        if (!nilfs_bmap_dirty(btree))
2345                nilfs_bmap_set_dirty(btree);
2346
2347 out:
2348        nilfs_btree_free_path(path);
2349        return ret;
2350}
2351
2352static const struct nilfs_bmap_operations nilfs_btree_ops = {
2353        .bop_lookup             =       nilfs_btree_lookup,
2354        .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2355        .bop_insert             =       nilfs_btree_insert,
2356        .bop_delete             =       nilfs_btree_delete,
2357        .bop_clear              =       NULL,
2358
2359        .bop_propagate          =       nilfs_btree_propagate,
2360
2361        .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2362
2363        .bop_assign             =       nilfs_btree_assign,
2364        .bop_mark               =       nilfs_btree_mark,
2365
2366        .bop_seek_key           =       nilfs_btree_seek_key,
2367        .bop_last_key           =       nilfs_btree_last_key,
2368
2369        .bop_check_insert       =       NULL,
2370        .bop_check_delete       =       nilfs_btree_check_delete,
2371        .bop_gather_data        =       nilfs_btree_gather_data,
2372};
2373
2374static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2375        .bop_lookup             =       NULL,
2376        .bop_lookup_contig      =       NULL,
2377        .bop_insert             =       NULL,
2378        .bop_delete             =       NULL,
2379        .bop_clear              =       NULL,
2380
2381        .bop_propagate          =       nilfs_btree_propagate_gc,
2382
2383        .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2384
2385        .bop_assign             =       nilfs_btree_assign_gc,
2386        .bop_mark               =       NULL,
2387
2388        .bop_seek_key           =       NULL,
2389        .bop_last_key           =       NULL,
2390
2391        .bop_check_insert       =       NULL,
2392        .bop_check_delete       =       NULL,
2393        .bop_gather_data        =       NULL,
2394};
2395
2396static void __nilfs_btree_init(struct nilfs_bmap *bmap)
2397{
2398        bmap->b_ops = &nilfs_btree_ops;
2399        bmap->b_nchildren_per_block =
2400                NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2401}
2402
2403int nilfs_btree_init(struct nilfs_bmap *bmap)
2404{
2405        int ret = 0;
2406
2407        __nilfs_btree_init(bmap);
2408
2409        if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode))
2410                ret = -EIO;
2411        return ret;
2412}
2413
2414void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2415{
2416        bmap->b_ops = &nilfs_btree_ops_gc;
2417        bmap->b_nchildren_per_block =
2418                NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2419}
2420