linux/fs/nilfs2/btree.c
<<
>>
Prefs
   1/*
   2 * btree.c - NILFS B-tree.
   3 *
   4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write to the Free Software
  18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  19 *
  20 * Written by Koji Sato <koji@osrg.net>.
  21 */
  22
  23#include <linux/slab.h>
  24#include <linux/string.h>
  25#include <linux/errno.h>
  26#include <linux/pagevec.h>
  27#include "nilfs.h"
  28#include "page.h"
  29#include "btnode.h"
  30#include "btree.h"
  31#include "alloc.h"
  32#include "dat.h"
  33
  34static void __nilfs_btree_init(struct nilfs_bmap *bmap);
  35
  36static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
  37{
  38        struct nilfs_btree_path *path;
  39        int level = NILFS_BTREE_LEVEL_DATA;
  40
  41        path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
  42        if (path == NULL)
  43                goto out;
  44
  45        for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
  46                path[level].bp_bh = NULL;
  47                path[level].bp_sib_bh = NULL;
  48                path[level].bp_index = 0;
  49                path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
  50                path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
  51                path[level].bp_op = NULL;
  52        }
  53
  54out:
  55        return path;
  56}
  57
  58static void nilfs_btree_free_path(struct nilfs_btree_path *path)
  59{
  60        int level = NILFS_BTREE_LEVEL_DATA;
  61
  62        for (; level < NILFS_BTREE_LEVEL_MAX; level++)
  63                brelse(path[level].bp_bh);
  64
  65        kmem_cache_free(nilfs_btree_path_cache, path);
  66}
  67
  68/*
  69 * B-tree node operations
  70 */
  71static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
  72                                     __u64 ptr, struct buffer_head **bhp)
  73{
  74        struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
  75        struct buffer_head *bh;
  76
  77        bh = nilfs_btnode_create_block(btnc, ptr);
  78        if (!bh)
  79                return -ENOMEM;
  80
  81        set_buffer_nilfs_volatile(bh);
  82        *bhp = bh;
  83        return 0;
  84}
  85
  86static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
  87{
  88        return node->bn_flags;
  89}
  90
  91static void
  92nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
  93{
  94        node->bn_flags = flags;
  95}
  96
  97static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
  98{
  99        return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
 100}
 101
 102static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
 103{
 104        return node->bn_level;
 105}
 106
 107static void
 108nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
 109{
 110        node->bn_level = level;
 111}
 112
 113static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
 114{
 115        return le16_to_cpu(node->bn_nchildren);
 116}
 117
 118static void
 119nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
 120{
 121        node->bn_nchildren = cpu_to_le16(nchildren);
 122}
 123
 124static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
 125{
 126        return 1 << btree->b_inode->i_blkbits;
 127}
 128
 129static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
 130{
 131        return btree->b_nchildren_per_block;
 132}
 133
 134static __le64 *
 135nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
 136{
 137        return (__le64 *)((char *)(node + 1) +
 138                          (nilfs_btree_node_root(node) ?
 139                           0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
 140}
 141
 142static __le64 *
 143nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
 144{
 145        return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
 146}
 147
 148static __u64
 149nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
 150{
 151        return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
 152}
 153
 154static void
 155nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
 156{
 157        *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
 158}
 159
 160static __u64
 161nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
 162                         int ncmax)
 163{
 164        return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
 165}
 166
 167static void
 168nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
 169                         int ncmax)
 170{
 171        *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
 172}
 173
 174static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
 175                                  int level, int nchildren, int ncmax,
 176                                  const __u64 *keys, const __u64 *ptrs)
 177{
 178        __le64 *dkeys;
 179        __le64 *dptrs;
 180        int i;
 181
 182        nilfs_btree_node_set_flags(node, flags);
 183        nilfs_btree_node_set_level(node, level);
 184        nilfs_btree_node_set_nchildren(node, nchildren);
 185
 186        dkeys = nilfs_btree_node_dkeys(node);
 187        dptrs = nilfs_btree_node_dptrs(node, ncmax);
 188        for (i = 0; i < nchildren; i++) {
 189                dkeys[i] = cpu_to_le64(keys[i]);
 190                dptrs[i] = cpu_to_le64(ptrs[i]);
 191        }
 192}
 193
 194/* Assume the buffer heads corresponding to left and right are locked. */
 195static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
 196                                       struct nilfs_btree_node *right,
 197                                       int n, int lncmax, int rncmax)
 198{
 199        __le64 *ldkeys, *rdkeys;
 200        __le64 *ldptrs, *rdptrs;
 201        int lnchildren, rnchildren;
 202
 203        ldkeys = nilfs_btree_node_dkeys(left);
 204        ldptrs = nilfs_btree_node_dptrs(left, lncmax);
 205        lnchildren = nilfs_btree_node_get_nchildren(left);
 206
 207        rdkeys = nilfs_btree_node_dkeys(right);
 208        rdptrs = nilfs_btree_node_dptrs(right, rncmax);
 209        rnchildren = nilfs_btree_node_get_nchildren(right);
 210
 211        memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
 212        memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
 213        memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
 214        memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
 215
 216        lnchildren += n;
 217        rnchildren -= n;
 218        nilfs_btree_node_set_nchildren(left, lnchildren);
 219        nilfs_btree_node_set_nchildren(right, rnchildren);
 220}
 221
 222/* Assume that the buffer heads corresponding to left and right are locked. */
 223static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
 224                                        struct nilfs_btree_node *right,
 225                                        int n, int lncmax, int rncmax)
 226{
 227        __le64 *ldkeys, *rdkeys;
 228        __le64 *ldptrs, *rdptrs;
 229        int lnchildren, rnchildren;
 230
 231        ldkeys = nilfs_btree_node_dkeys(left);
 232        ldptrs = nilfs_btree_node_dptrs(left, lncmax);
 233        lnchildren = nilfs_btree_node_get_nchildren(left);
 234
 235        rdkeys = nilfs_btree_node_dkeys(right);
 236        rdptrs = nilfs_btree_node_dptrs(right, rncmax);
 237        rnchildren = nilfs_btree_node_get_nchildren(right);
 238
 239        memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
 240        memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
 241        memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
 242        memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
 243
 244        lnchildren -= n;
 245        rnchildren += n;
 246        nilfs_btree_node_set_nchildren(left, lnchildren);
 247        nilfs_btree_node_set_nchildren(right, rnchildren);
 248}
 249
 250/* Assume that the buffer head corresponding to node is locked. */
 251static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
 252                                    __u64 key, __u64 ptr, int ncmax)
 253{
 254        __le64 *dkeys;
 255        __le64 *dptrs;
 256        int nchildren;
 257
 258        dkeys = nilfs_btree_node_dkeys(node);
 259        dptrs = nilfs_btree_node_dptrs(node, ncmax);
 260        nchildren = nilfs_btree_node_get_nchildren(node);
 261        if (index < nchildren) {
 262                memmove(dkeys + index + 1, dkeys + index,
 263                        (nchildren - index) * sizeof(*dkeys));
 264                memmove(dptrs + index + 1, dptrs + index,
 265                        (nchildren - index) * sizeof(*dptrs));
 266        }
 267        dkeys[index] = cpu_to_le64(key);
 268        dptrs[index] = cpu_to_le64(ptr);
 269        nchildren++;
 270        nilfs_btree_node_set_nchildren(node, nchildren);
 271}
 272
 273/* Assume that the buffer head corresponding to node is locked. */
 274static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
 275                                    __u64 *keyp, __u64 *ptrp, int ncmax)
 276{
 277        __u64 key;
 278        __u64 ptr;
 279        __le64 *dkeys;
 280        __le64 *dptrs;
 281        int nchildren;
 282
 283        dkeys = nilfs_btree_node_dkeys(node);
 284        dptrs = nilfs_btree_node_dptrs(node, ncmax);
 285        key = le64_to_cpu(dkeys[index]);
 286        ptr = le64_to_cpu(dptrs[index]);
 287        nchildren = nilfs_btree_node_get_nchildren(node);
 288        if (keyp != NULL)
 289                *keyp = key;
 290        if (ptrp != NULL)
 291                *ptrp = ptr;
 292
 293        if (index < nchildren - 1) {
 294                memmove(dkeys + index, dkeys + index + 1,
 295                        (nchildren - index - 1) * sizeof(*dkeys));
 296                memmove(dptrs + index, dptrs + index + 1,
 297                        (nchildren - index - 1) * sizeof(*dptrs));
 298        }
 299        nchildren--;
 300        nilfs_btree_node_set_nchildren(node, nchildren);
 301}
 302
 303static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
 304                                   __u64 key, int *indexp)
 305{
 306        __u64 nkey;
 307        int index, low, high, s;
 308
 309        /* binary search */
 310        low = 0;
 311        high = nilfs_btree_node_get_nchildren(node) - 1;
 312        index = 0;
 313        s = 0;
 314        while (low <= high) {
 315                index = (low + high) / 2;
 316                nkey = nilfs_btree_node_get_key(node, index);
 317                if (nkey == key) {
 318                        s = 0;
 319                        goto out;
 320                } else if (nkey < key) {
 321                        low = index + 1;
 322                        s = -1;
 323                } else {
 324                        high = index - 1;
 325                        s = 1;
 326                }
 327        }
 328
 329        /* adjust index */
 330        if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
 331                if (s > 0 && index > 0)
 332                        index--;
 333        } else if (s < 0)
 334                index++;
 335
 336 out:
 337        *indexp = index;
 338
 339        return s == 0;
 340}
 341
 342/**
 343 * nilfs_btree_node_broken - verify consistency of btree node
 344 * @node: btree node block to be examined
 345 * @size: node size (in bytes)
 346 * @blocknr: block number
 347 *
 348 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
 349 */
 350static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
 351                                   size_t size, sector_t blocknr)
 352{
 353        int level, flags, nchildren;
 354        int ret = 0;
 355
 356        level = nilfs_btree_node_get_level(node);
 357        flags = nilfs_btree_node_get_flags(node);
 358        nchildren = nilfs_btree_node_get_nchildren(node);
 359
 360        if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
 361                     level >= NILFS_BTREE_LEVEL_MAX ||
 362                     (flags & NILFS_BTREE_NODE_ROOT) ||
 363                     nchildren < 0 ||
 364                     nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
 365                printk(KERN_CRIT "NILFS: bad btree node (blocknr=%llu): "
 366                       "level = %d, flags = 0x%x, nchildren = %d\n",
 367                       (unsigned long long)blocknr, level, flags, nchildren);
 368                ret = 1;
 369        }
 370        return ret;
 371}
 372
 373/**
 374 * nilfs_btree_root_broken - verify consistency of btree root node
 375 * @node: btree root node to be examined
 376 * @ino: inode number
 377 *
 378 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
 379 */
 380static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
 381                                   unsigned long ino)
 382{
 383        int level, flags, nchildren;
 384        int ret = 0;
 385
 386        level = nilfs_btree_node_get_level(node);
 387        flags = nilfs_btree_node_get_flags(node);
 388        nchildren = nilfs_btree_node_get_nchildren(node);
 389
 390        if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
 391                     level >= NILFS_BTREE_LEVEL_MAX ||
 392                     nchildren < 0 ||
 393                     nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
 394                pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n",
 395                        ino, level, flags, nchildren);
 396                ret = 1;
 397        }
 398        return ret;
 399}
 400
 401int nilfs_btree_broken_node_block(struct buffer_head *bh)
 402{
 403        int ret;
 404
 405        if (buffer_nilfs_checked(bh))
 406                return 0;
 407
 408        ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
 409                                       bh->b_size, bh->b_blocknr);
 410        if (likely(!ret))
 411                set_buffer_nilfs_checked(bh);
 412        return ret;
 413}
 414
 415static struct nilfs_btree_node *
 416nilfs_btree_get_root(const struct nilfs_bmap *btree)
 417{
 418        return (struct nilfs_btree_node *)btree->b_u.u_data;
 419}
 420
 421static struct nilfs_btree_node *
 422nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
 423{
 424        return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
 425}
 426
 427static struct nilfs_btree_node *
 428nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
 429{
 430        return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
 431}
 432
 433static int nilfs_btree_height(const struct nilfs_bmap *btree)
 434{
 435        return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
 436}
 437
 438static struct nilfs_btree_node *
 439nilfs_btree_get_node(const struct nilfs_bmap *btree,
 440                     const struct nilfs_btree_path *path,
 441                     int level, int *ncmaxp)
 442{
 443        struct nilfs_btree_node *node;
 444
 445        if (level == nilfs_btree_height(btree) - 1) {
 446                node = nilfs_btree_get_root(btree);
 447                *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
 448        } else {
 449                node = nilfs_btree_get_nonroot_node(path, level);
 450                *ncmaxp = nilfs_btree_nchildren_per_block(btree);
 451        }
 452        return node;
 453}
 454
 455static int
 456nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
 457{
 458        if (unlikely(nilfs_btree_node_get_level(node) != level)) {
 459                dump_stack();
 460                printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
 461                       nilfs_btree_node_get_level(node), level);
 462                return 1;
 463        }
 464        return 0;
 465}
 466
 467struct nilfs_btree_readahead_info {
 468        struct nilfs_btree_node *node;  /* parent node */
 469        int max_ra_blocks;              /* max nof blocks to read ahead */
 470        int index;                      /* current index on the parent node */
 471        int ncmax;                      /* nof children in the parent node */
 472};
 473
 474static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
 475                                   struct buffer_head **bhp,
 476                                   const struct nilfs_btree_readahead_info *ra)
 477{
 478        struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
 479        struct buffer_head *bh, *ra_bh;
 480        sector_t submit_ptr = 0;
 481        int ret;
 482
 483        ret = nilfs_btnode_submit_block(btnc, ptr, 0, READ, &bh, &submit_ptr);
 484        if (ret) {
 485                if (ret != -EEXIST)
 486                        return ret;
 487                goto out_check;
 488        }
 489
 490        if (ra) {
 491                int i, n;
 492                __u64 ptr2;
 493
 494                /* read ahead sibling nodes */
 495                for (n = ra->max_ra_blocks, i = ra->index + 1;
 496                     n > 0 && i < ra->ncmax; n--, i++) {
 497                        ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
 498
 499                        ret = nilfs_btnode_submit_block(btnc, ptr2, 0, READA,
 500                                                        &ra_bh, &submit_ptr);
 501                        if (likely(!ret || ret == -EEXIST))
 502                                brelse(ra_bh);
 503                        else if (ret != -EBUSY)
 504                                break;
 505                        if (!buffer_locked(bh))
 506                                goto out_no_wait;
 507                }
 508        }
 509
 510        wait_on_buffer(bh);
 511
 512 out_no_wait:
 513        if (!buffer_uptodate(bh)) {
 514                brelse(bh);
 515                return -EIO;
 516        }
 517
 518 out_check:
 519        if (nilfs_btree_broken_node_block(bh)) {
 520                clear_buffer_uptodate(bh);
 521                brelse(bh);
 522                return -EINVAL;
 523        }
 524
 525        *bhp = bh;
 526        return 0;
 527}
 528
 529static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
 530                                   struct buffer_head **bhp)
 531{
 532        return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
 533}
 534
 535static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
 536                                 struct nilfs_btree_path *path,
 537                                 __u64 key, __u64 *ptrp, int minlevel,
 538                                 int readahead)
 539{
 540        struct nilfs_btree_node *node;
 541        struct nilfs_btree_readahead_info p, *ra;
 542        __u64 ptr;
 543        int level, index, found, ncmax, ret;
 544
 545        node = nilfs_btree_get_root(btree);
 546        level = nilfs_btree_node_get_level(node);
 547        if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
 548                return -ENOENT;
 549
 550        found = nilfs_btree_node_lookup(node, key, &index);
 551        ptr = nilfs_btree_node_get_ptr(node, index,
 552                                       NILFS_BTREE_ROOT_NCHILDREN_MAX);
 553        path[level].bp_bh = NULL;
 554        path[level].bp_index = index;
 555
 556        ncmax = nilfs_btree_nchildren_per_block(btree);
 557
 558        while (--level >= minlevel) {
 559                ra = NULL;
 560                if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
 561                        p.node = nilfs_btree_get_node(btree, path, level + 1,
 562                                                      &p.ncmax);
 563                        p.index = index;
 564                        p.max_ra_blocks = 7;
 565                        ra = &p;
 566                }
 567                ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
 568                                              ra);
 569                if (ret < 0)
 570                        return ret;
 571
 572                node = nilfs_btree_get_nonroot_node(path, level);
 573                if (nilfs_btree_bad_node(node, level))
 574                        return -EINVAL;
 575                if (!found)
 576                        found = nilfs_btree_node_lookup(node, key, &index);
 577                else
 578                        index = 0;
 579                if (index < ncmax) {
 580                        ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
 581                } else {
 582                        WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
 583                        /* insert */
 584                        ptr = NILFS_BMAP_INVALID_PTR;
 585                }
 586                path[level].bp_index = index;
 587        }
 588        if (!found)
 589                return -ENOENT;
 590
 591        if (ptrp != NULL)
 592                *ptrp = ptr;
 593
 594        return 0;
 595}
 596
 597static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
 598                                      struct nilfs_btree_path *path,
 599                                      __u64 *keyp, __u64 *ptrp)
 600{
 601        struct nilfs_btree_node *node;
 602        __u64 ptr;
 603        int index, level, ncmax, ret;
 604
 605        node = nilfs_btree_get_root(btree);
 606        index = nilfs_btree_node_get_nchildren(node) - 1;
 607        if (index < 0)
 608                return -ENOENT;
 609        level = nilfs_btree_node_get_level(node);
 610        ptr = nilfs_btree_node_get_ptr(node, index,
 611                                       NILFS_BTREE_ROOT_NCHILDREN_MAX);
 612        path[level].bp_bh = NULL;
 613        path[level].bp_index = index;
 614        ncmax = nilfs_btree_nchildren_per_block(btree);
 615
 616        for (level--; level > 0; level--) {
 617                ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
 618                if (ret < 0)
 619                        return ret;
 620                node = nilfs_btree_get_nonroot_node(path, level);
 621                if (nilfs_btree_bad_node(node, level))
 622                        return -EINVAL;
 623                index = nilfs_btree_node_get_nchildren(node) - 1;
 624                ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
 625                path[level].bp_index = index;
 626        }
 627
 628        if (keyp != NULL)
 629                *keyp = nilfs_btree_node_get_key(node, index);
 630        if (ptrp != NULL)
 631                *ptrp = ptr;
 632
 633        return 0;
 634}
 635
 636/**
 637 * nilfs_btree_get_next_key - get next valid key from btree path array
 638 * @btree: bmap struct of btree
 639 * @path: array of nilfs_btree_path struct
 640 * @minlevel: start level
 641 * @nextkey: place to store the next valid key
 642 *
 643 * Return Value: If a next key was found, 0 is returned. Otherwise,
 644 * -ENOENT is returned.
 645 */
 646static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
 647                                    const struct nilfs_btree_path *path,
 648                                    int minlevel, __u64 *nextkey)
 649{
 650        struct nilfs_btree_node *node;
 651        int maxlevel = nilfs_btree_height(btree) - 1;
 652        int index, next_adj, level;
 653
 654        /* Next index is already set to bp_index for leaf nodes. */
 655        next_adj = 0;
 656        for (level = minlevel; level <= maxlevel; level++) {
 657                if (level == maxlevel)
 658                        node = nilfs_btree_get_root(btree);
 659                else
 660                        node = nilfs_btree_get_nonroot_node(path, level);
 661
 662                index = path[level].bp_index + next_adj;
 663                if (index < nilfs_btree_node_get_nchildren(node)) {
 664                        /* Next key is in this node */
 665                        *nextkey = nilfs_btree_node_get_key(node, index);
 666                        return 0;
 667                }
 668                /* For non-leaf nodes, next index is stored at bp_index + 1. */
 669                next_adj = 1;
 670        }
 671        return -ENOENT;
 672}
 673
 674static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
 675                              __u64 key, int level, __u64 *ptrp)
 676{
 677        struct nilfs_btree_path *path;
 678        int ret;
 679
 680        path = nilfs_btree_alloc_path();
 681        if (path == NULL)
 682                return -ENOMEM;
 683
 684        ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
 685
 686        nilfs_btree_free_path(path);
 687
 688        return ret;
 689}
 690
 691static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
 692                                     __u64 key, __u64 *ptrp, unsigned maxblocks)
 693{
 694        struct nilfs_btree_path *path;
 695        struct nilfs_btree_node *node;
 696        struct inode *dat = NULL;
 697        __u64 ptr, ptr2;
 698        sector_t blocknr;
 699        int level = NILFS_BTREE_LEVEL_NODE_MIN;
 700        int ret, cnt, index, maxlevel, ncmax;
 701        struct nilfs_btree_readahead_info p;
 702
 703        path = nilfs_btree_alloc_path();
 704        if (path == NULL)
 705                return -ENOMEM;
 706
 707        ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
 708        if (ret < 0)
 709                goto out;
 710
 711        if (NILFS_BMAP_USE_VBN(btree)) {
 712                dat = nilfs_bmap_get_dat(btree);
 713                ret = nilfs_dat_translate(dat, ptr, &blocknr);
 714                if (ret < 0)
 715                        goto out;
 716                ptr = blocknr;
 717        }
 718        cnt = 1;
 719        if (cnt == maxblocks)
 720                goto end;
 721
 722        maxlevel = nilfs_btree_height(btree) - 1;
 723        node = nilfs_btree_get_node(btree, path, level, &ncmax);
 724        index = path[level].bp_index + 1;
 725        for (;;) {
 726                while (index < nilfs_btree_node_get_nchildren(node)) {
 727                        if (nilfs_btree_node_get_key(node, index) !=
 728                            key + cnt)
 729                                goto end;
 730                        ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
 731                        if (dat) {
 732                                ret = nilfs_dat_translate(dat, ptr2, &blocknr);
 733                                if (ret < 0)
 734                                        goto out;
 735                                ptr2 = blocknr;
 736                        }
 737                        if (ptr2 != ptr + cnt || ++cnt == maxblocks)
 738                                goto end;
 739                        index++;
 740                        continue;
 741                }
 742                if (level == maxlevel)
 743                        break;
 744
 745                /* look-up right sibling node */
 746                p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
 747                p.index = path[level + 1].bp_index + 1;
 748                p.max_ra_blocks = 7;
 749                if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
 750                    nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
 751                        break;
 752                ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
 753                path[level + 1].bp_index = p.index;
 754
 755                brelse(path[level].bp_bh);
 756                path[level].bp_bh = NULL;
 757
 758                ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
 759                                              &p);
 760                if (ret < 0)
 761                        goto out;
 762                node = nilfs_btree_get_nonroot_node(path, level);
 763                ncmax = nilfs_btree_nchildren_per_block(btree);
 764                index = 0;
 765                path[level].bp_index = index;
 766        }
 767 end:
 768        *ptrp = ptr;
 769        ret = cnt;
 770 out:
 771        nilfs_btree_free_path(path);
 772        return ret;
 773}
 774
 775static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
 776                                    struct nilfs_btree_path *path,
 777                                    int level, __u64 key)
 778{
 779        if (level < nilfs_btree_height(btree) - 1) {
 780                do {
 781                        nilfs_btree_node_set_key(
 782                                nilfs_btree_get_nonroot_node(path, level),
 783                                path[level].bp_index, key);
 784                        if (!buffer_dirty(path[level].bp_bh))
 785                                mark_buffer_dirty(path[level].bp_bh);
 786                } while ((path[level].bp_index == 0) &&
 787                         (++level < nilfs_btree_height(btree) - 1));
 788        }
 789
 790        /* root */
 791        if (level == nilfs_btree_height(btree) - 1) {
 792                nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
 793                                         path[level].bp_index, key);
 794        }
 795}
 796
 797static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
 798                                  struct nilfs_btree_path *path,
 799                                  int level, __u64 *keyp, __u64 *ptrp)
 800{
 801        struct nilfs_btree_node *node;
 802        int ncblk;
 803
 804        if (level < nilfs_btree_height(btree) - 1) {
 805                node = nilfs_btree_get_nonroot_node(path, level);
 806                ncblk = nilfs_btree_nchildren_per_block(btree);
 807                nilfs_btree_node_insert(node, path[level].bp_index,
 808                                        *keyp, *ptrp, ncblk);
 809                if (!buffer_dirty(path[level].bp_bh))
 810                        mark_buffer_dirty(path[level].bp_bh);
 811
 812                if (path[level].bp_index == 0)
 813                        nilfs_btree_promote_key(btree, path, level + 1,
 814                                                nilfs_btree_node_get_key(node,
 815                                                                         0));
 816        } else {
 817                node = nilfs_btree_get_root(btree);
 818                nilfs_btree_node_insert(node, path[level].bp_index,
 819                                        *keyp, *ptrp,
 820                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
 821        }
 822}
 823
 824static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
 825                                   struct nilfs_btree_path *path,
 826                                   int level, __u64 *keyp, __u64 *ptrp)
 827{
 828        struct nilfs_btree_node *node, *left;
 829        int nchildren, lnchildren, n, move, ncblk;
 830
 831        node = nilfs_btree_get_nonroot_node(path, level);
 832        left = nilfs_btree_get_sib_node(path, level);
 833        nchildren = nilfs_btree_node_get_nchildren(node);
 834        lnchildren = nilfs_btree_node_get_nchildren(left);
 835        ncblk = nilfs_btree_nchildren_per_block(btree);
 836        move = 0;
 837
 838        n = (nchildren + lnchildren + 1) / 2 - lnchildren;
 839        if (n > path[level].bp_index) {
 840                /* move insert point */
 841                n--;
 842                move = 1;
 843        }
 844
 845        nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
 846
 847        if (!buffer_dirty(path[level].bp_bh))
 848                mark_buffer_dirty(path[level].bp_bh);
 849        if (!buffer_dirty(path[level].bp_sib_bh))
 850                mark_buffer_dirty(path[level].bp_sib_bh);
 851
 852        nilfs_btree_promote_key(btree, path, level + 1,
 853                                nilfs_btree_node_get_key(node, 0));
 854
 855        if (move) {
 856                brelse(path[level].bp_bh);
 857                path[level].bp_bh = path[level].bp_sib_bh;
 858                path[level].bp_sib_bh = NULL;
 859                path[level].bp_index += lnchildren;
 860                path[level + 1].bp_index--;
 861        } else {
 862                brelse(path[level].bp_sib_bh);
 863                path[level].bp_sib_bh = NULL;
 864                path[level].bp_index -= n;
 865        }
 866
 867        nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 868}
 869
 870static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
 871                                    struct nilfs_btree_path *path,
 872                                    int level, __u64 *keyp, __u64 *ptrp)
 873{
 874        struct nilfs_btree_node *node, *right;
 875        int nchildren, rnchildren, n, move, ncblk;
 876
 877        node = nilfs_btree_get_nonroot_node(path, level);
 878        right = nilfs_btree_get_sib_node(path, level);
 879        nchildren = nilfs_btree_node_get_nchildren(node);
 880        rnchildren = nilfs_btree_node_get_nchildren(right);
 881        ncblk = nilfs_btree_nchildren_per_block(btree);
 882        move = 0;
 883
 884        n = (nchildren + rnchildren + 1) / 2 - rnchildren;
 885        if (n > nchildren - path[level].bp_index) {
 886                /* move insert point */
 887                n--;
 888                move = 1;
 889        }
 890
 891        nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
 892
 893        if (!buffer_dirty(path[level].bp_bh))
 894                mark_buffer_dirty(path[level].bp_bh);
 895        if (!buffer_dirty(path[level].bp_sib_bh))
 896                mark_buffer_dirty(path[level].bp_sib_bh);
 897
 898        path[level + 1].bp_index++;
 899        nilfs_btree_promote_key(btree, path, level + 1,
 900                                nilfs_btree_node_get_key(right, 0));
 901        path[level + 1].bp_index--;
 902
 903        if (move) {
 904                brelse(path[level].bp_bh);
 905                path[level].bp_bh = path[level].bp_sib_bh;
 906                path[level].bp_sib_bh = NULL;
 907                path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
 908                path[level + 1].bp_index++;
 909        } else {
 910                brelse(path[level].bp_sib_bh);
 911                path[level].bp_sib_bh = NULL;
 912        }
 913
 914        nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 915}
 916
 917static void nilfs_btree_split(struct nilfs_bmap *btree,
 918                              struct nilfs_btree_path *path,
 919                              int level, __u64 *keyp, __u64 *ptrp)
 920{
 921        struct nilfs_btree_node *node, *right;
 922        int nchildren, n, move, ncblk;
 923
 924        node = nilfs_btree_get_nonroot_node(path, level);
 925        right = nilfs_btree_get_sib_node(path, level);
 926        nchildren = nilfs_btree_node_get_nchildren(node);
 927        ncblk = nilfs_btree_nchildren_per_block(btree);
 928        move = 0;
 929
 930        n = (nchildren + 1) / 2;
 931        if (n > nchildren - path[level].bp_index) {
 932                n--;
 933                move = 1;
 934        }
 935
 936        nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
 937
 938        if (!buffer_dirty(path[level].bp_bh))
 939                mark_buffer_dirty(path[level].bp_bh);
 940        if (!buffer_dirty(path[level].bp_sib_bh))
 941                mark_buffer_dirty(path[level].bp_sib_bh);
 942
 943        if (move) {
 944                path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
 945                nilfs_btree_node_insert(right, path[level].bp_index,
 946                                        *keyp, *ptrp, ncblk);
 947
 948                *keyp = nilfs_btree_node_get_key(right, 0);
 949                *ptrp = path[level].bp_newreq.bpr_ptr;
 950
 951                brelse(path[level].bp_bh);
 952                path[level].bp_bh = path[level].bp_sib_bh;
 953                path[level].bp_sib_bh = NULL;
 954        } else {
 955                nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 956
 957                *keyp = nilfs_btree_node_get_key(right, 0);
 958                *ptrp = path[level].bp_newreq.bpr_ptr;
 959
 960                brelse(path[level].bp_sib_bh);
 961                path[level].bp_sib_bh = NULL;
 962        }
 963
 964        path[level + 1].bp_index++;
 965}
 966
 967static void nilfs_btree_grow(struct nilfs_bmap *btree,
 968                             struct nilfs_btree_path *path,
 969                             int level, __u64 *keyp, __u64 *ptrp)
 970{
 971        struct nilfs_btree_node *root, *child;
 972        int n, ncblk;
 973
 974        root = nilfs_btree_get_root(btree);
 975        child = nilfs_btree_get_sib_node(path, level);
 976        ncblk = nilfs_btree_nchildren_per_block(btree);
 977
 978        n = nilfs_btree_node_get_nchildren(root);
 979
 980        nilfs_btree_node_move_right(root, child, n,
 981                                    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
 982        nilfs_btree_node_set_level(root, level + 1);
 983
 984        if (!buffer_dirty(path[level].bp_sib_bh))
 985                mark_buffer_dirty(path[level].bp_sib_bh);
 986
 987        path[level].bp_bh = path[level].bp_sib_bh;
 988        path[level].bp_sib_bh = NULL;
 989
 990        nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
 991
 992        *keyp = nilfs_btree_node_get_key(child, 0);
 993        *ptrp = path[level].bp_newreq.bpr_ptr;
 994}
 995
 996static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
 997                                   const struct nilfs_btree_path *path)
 998{
 999        struct nilfs_btree_node *node;
1000        int level, ncmax;
1001
1002        if (path == NULL)
1003                return NILFS_BMAP_INVALID_PTR;
1004
1005        /* left sibling */
1006        level = NILFS_BTREE_LEVEL_NODE_MIN;
1007        if (path[level].bp_index > 0) {
1008                node = nilfs_btree_get_node(btree, path, level, &ncmax);
1009                return nilfs_btree_node_get_ptr(node,
1010                                                path[level].bp_index - 1,
1011                                                ncmax);
1012        }
1013
1014        /* parent */
1015        level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
1016        if (level <= nilfs_btree_height(btree) - 1) {
1017                node = nilfs_btree_get_node(btree, path, level, &ncmax);
1018                return nilfs_btree_node_get_ptr(node, path[level].bp_index,
1019                                                ncmax);
1020        }
1021
1022        return NILFS_BMAP_INVALID_PTR;
1023}
1024
1025static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
1026                                       const struct nilfs_btree_path *path,
1027                                       __u64 key)
1028{
1029        __u64 ptr;
1030
1031        ptr = nilfs_bmap_find_target_seq(btree, key);
1032        if (ptr != NILFS_BMAP_INVALID_PTR)
1033                /* sequential access */
1034                return ptr;
1035        else {
1036                ptr = nilfs_btree_find_near(btree, path);
1037                if (ptr != NILFS_BMAP_INVALID_PTR)
1038                        /* near */
1039                        return ptr;
1040        }
1041        /* block group */
1042        return nilfs_bmap_find_target_in_group(btree);
1043}
1044
1045static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
1046                                      struct nilfs_btree_path *path,
1047                                      int *levelp, __u64 key, __u64 ptr,
1048                                      struct nilfs_bmap_stats *stats)
1049{
1050        struct buffer_head *bh;
1051        struct nilfs_btree_node *node, *parent, *sib;
1052        __u64 sibptr;
1053        int pindex, level, ncmax, ncblk, ret;
1054        struct inode *dat = NULL;
1055
1056        stats->bs_nblocks = 0;
1057        level = NILFS_BTREE_LEVEL_DATA;
1058
1059        /* allocate a new ptr for data block */
1060        if (NILFS_BMAP_USE_VBN(btree)) {
1061                path[level].bp_newreq.bpr_ptr =
1062                        nilfs_btree_find_target_v(btree, path, key);
1063                dat = nilfs_bmap_get_dat(btree);
1064        }
1065
1066        ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1067        if (ret < 0)
1068                goto err_out_data;
1069
1070        ncblk = nilfs_btree_nchildren_per_block(btree);
1071
1072        for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1073             level < nilfs_btree_height(btree) - 1;
1074             level++) {
1075                node = nilfs_btree_get_nonroot_node(path, level);
1076                if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1077                        path[level].bp_op = nilfs_btree_do_insert;
1078                        stats->bs_nblocks++;
1079                        goto out;
1080                }
1081
1082                parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1083                pindex = path[level + 1].bp_index;
1084
1085                /* left sibling */
1086                if (pindex > 0) {
1087                        sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1088                                                          ncmax);
1089                        ret = nilfs_btree_get_block(btree, sibptr, &bh);
1090                        if (ret < 0)
1091                                goto err_out_child_node;
1092                        sib = (struct nilfs_btree_node *)bh->b_data;
1093                        if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1094                                path[level].bp_sib_bh = bh;
1095                                path[level].bp_op = nilfs_btree_carry_left;
1096                                stats->bs_nblocks++;
1097                                goto out;
1098                        } else {
1099                                brelse(bh);
1100                        }
1101                }
1102
1103                /* right sibling */
1104                if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1105                        sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1106                                                          ncmax);
1107                        ret = nilfs_btree_get_block(btree, sibptr, &bh);
1108                        if (ret < 0)
1109                                goto err_out_child_node;
1110                        sib = (struct nilfs_btree_node *)bh->b_data;
1111                        if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1112                                path[level].bp_sib_bh = bh;
1113                                path[level].bp_op = nilfs_btree_carry_right;
1114                                stats->bs_nblocks++;
1115                                goto out;
1116                        } else {
1117                                brelse(bh);
1118                        }
1119                }
1120
1121                /* split */
1122                path[level].bp_newreq.bpr_ptr =
1123                        path[level - 1].bp_newreq.bpr_ptr + 1;
1124                ret = nilfs_bmap_prepare_alloc_ptr(btree,
1125                                                   &path[level].bp_newreq, dat);
1126                if (ret < 0)
1127                        goto err_out_child_node;
1128                ret = nilfs_btree_get_new_block(btree,
1129                                                path[level].bp_newreq.bpr_ptr,
1130                                                &bh);
1131                if (ret < 0)
1132                        goto err_out_curr_node;
1133
1134                stats->bs_nblocks++;
1135
1136                sib = (struct nilfs_btree_node *)bh->b_data;
1137                nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1138                path[level].bp_sib_bh = bh;
1139                path[level].bp_op = nilfs_btree_split;
1140        }
1141
1142        /* root */
1143        node = nilfs_btree_get_root(btree);
1144        if (nilfs_btree_node_get_nchildren(node) <
1145            NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1146                path[level].bp_op = nilfs_btree_do_insert;
1147                stats->bs_nblocks++;
1148                goto out;
1149        }
1150
1151        /* grow */
1152        path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1153        ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1154        if (ret < 0)
1155                goto err_out_child_node;
1156        ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1157                                        &bh);
1158        if (ret < 0)
1159                goto err_out_curr_node;
1160
1161        nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1162                              0, level, 0, ncblk, NULL, NULL);
1163        path[level].bp_sib_bh = bh;
1164        path[level].bp_op = nilfs_btree_grow;
1165
1166        level++;
1167        path[level].bp_op = nilfs_btree_do_insert;
1168
1169        /* a newly-created node block and a data block are added */
1170        stats->bs_nblocks += 2;
1171
1172        /* success */
1173 out:
1174        *levelp = level;
1175        return ret;
1176
1177        /* error */
1178 err_out_curr_node:
1179        nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1180 err_out_child_node:
1181        for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1182                nilfs_btnode_delete(path[level].bp_sib_bh);
1183                nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1184
1185        }
1186
1187        nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1188 err_out_data:
1189        *levelp = level;
1190        stats->bs_nblocks = 0;
1191        return ret;
1192}
1193
1194static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1195                                      struct nilfs_btree_path *path,
1196                                      int maxlevel, __u64 key, __u64 ptr)
1197{
1198        struct inode *dat = NULL;
1199        int level;
1200
1201        set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1202        ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1203        if (NILFS_BMAP_USE_VBN(btree)) {
1204                nilfs_bmap_set_target_v(btree, key, ptr);
1205                dat = nilfs_bmap_get_dat(btree);
1206        }
1207
1208        for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1209                nilfs_bmap_commit_alloc_ptr(btree,
1210                                            &path[level - 1].bp_newreq, dat);
1211                path[level].bp_op(btree, path, level, &key, &ptr);
1212        }
1213
1214        if (!nilfs_bmap_dirty(btree))
1215                nilfs_bmap_set_dirty(btree);
1216}
1217
1218static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1219{
1220        struct nilfs_btree_path *path;
1221        struct nilfs_bmap_stats stats;
1222        int level, ret;
1223
1224        path = nilfs_btree_alloc_path();
1225        if (path == NULL)
1226                return -ENOMEM;
1227
1228        ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1229                                    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1230        if (ret != -ENOENT) {
1231                if (ret == 0)
1232                        ret = -EEXIST;
1233                goto out;
1234        }
1235
1236        ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1237        if (ret < 0)
1238                goto out;
1239        nilfs_btree_commit_insert(btree, path, level, key, ptr);
1240        nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1241
1242 out:
1243        nilfs_btree_free_path(path);
1244        return ret;
1245}
1246
1247static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1248                                  struct nilfs_btree_path *path,
1249                                  int level, __u64 *keyp, __u64 *ptrp)
1250{
1251        struct nilfs_btree_node *node;
1252        int ncblk;
1253
1254        if (level < nilfs_btree_height(btree) - 1) {
1255                node = nilfs_btree_get_nonroot_node(path, level);
1256                ncblk = nilfs_btree_nchildren_per_block(btree);
1257                nilfs_btree_node_delete(node, path[level].bp_index,
1258                                        keyp, ptrp, ncblk);
1259                if (!buffer_dirty(path[level].bp_bh))
1260                        mark_buffer_dirty(path[level].bp_bh);
1261                if (path[level].bp_index == 0)
1262                        nilfs_btree_promote_key(btree, path, level + 1,
1263                                nilfs_btree_node_get_key(node, 0));
1264        } else {
1265                node = nilfs_btree_get_root(btree);
1266                nilfs_btree_node_delete(node, path[level].bp_index,
1267                                        keyp, ptrp,
1268                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
1269        }
1270}
1271
1272static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1273                                    struct nilfs_btree_path *path,
1274                                    int level, __u64 *keyp, __u64 *ptrp)
1275{
1276        struct nilfs_btree_node *node, *left;
1277        int nchildren, lnchildren, n, ncblk;
1278
1279        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1280
1281        node = nilfs_btree_get_nonroot_node(path, level);
1282        left = nilfs_btree_get_sib_node(path, level);
1283        nchildren = nilfs_btree_node_get_nchildren(node);
1284        lnchildren = nilfs_btree_node_get_nchildren(left);
1285        ncblk = nilfs_btree_nchildren_per_block(btree);
1286
1287        n = (nchildren + lnchildren) / 2 - nchildren;
1288
1289        nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1290
1291        if (!buffer_dirty(path[level].bp_bh))
1292                mark_buffer_dirty(path[level].bp_bh);
1293        if (!buffer_dirty(path[level].bp_sib_bh))
1294                mark_buffer_dirty(path[level].bp_sib_bh);
1295
1296        nilfs_btree_promote_key(btree, path, level + 1,
1297                                nilfs_btree_node_get_key(node, 0));
1298
1299        brelse(path[level].bp_sib_bh);
1300        path[level].bp_sib_bh = NULL;
1301        path[level].bp_index += n;
1302}
1303
1304static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1305                                     struct nilfs_btree_path *path,
1306                                     int level, __u64 *keyp, __u64 *ptrp)
1307{
1308        struct nilfs_btree_node *node, *right;
1309        int nchildren, rnchildren, n, ncblk;
1310
1311        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1312
1313        node = nilfs_btree_get_nonroot_node(path, level);
1314        right = nilfs_btree_get_sib_node(path, level);
1315        nchildren = nilfs_btree_node_get_nchildren(node);
1316        rnchildren = nilfs_btree_node_get_nchildren(right);
1317        ncblk = nilfs_btree_nchildren_per_block(btree);
1318
1319        n = (nchildren + rnchildren) / 2 - nchildren;
1320
1321        nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1322
1323        if (!buffer_dirty(path[level].bp_bh))
1324                mark_buffer_dirty(path[level].bp_bh);
1325        if (!buffer_dirty(path[level].bp_sib_bh))
1326                mark_buffer_dirty(path[level].bp_sib_bh);
1327
1328        path[level + 1].bp_index++;
1329        nilfs_btree_promote_key(btree, path, level + 1,
1330                                nilfs_btree_node_get_key(right, 0));
1331        path[level + 1].bp_index--;
1332
1333        brelse(path[level].bp_sib_bh);
1334        path[level].bp_sib_bh = NULL;
1335}
1336
1337static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1338                                    struct nilfs_btree_path *path,
1339                                    int level, __u64 *keyp, __u64 *ptrp)
1340{
1341        struct nilfs_btree_node *node, *left;
1342        int n, ncblk;
1343
1344        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1345
1346        node = nilfs_btree_get_nonroot_node(path, level);
1347        left = nilfs_btree_get_sib_node(path, level);
1348        ncblk = nilfs_btree_nchildren_per_block(btree);
1349
1350        n = nilfs_btree_node_get_nchildren(node);
1351
1352        nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1353
1354        if (!buffer_dirty(path[level].bp_sib_bh))
1355                mark_buffer_dirty(path[level].bp_sib_bh);
1356
1357        nilfs_btnode_delete(path[level].bp_bh);
1358        path[level].bp_bh = path[level].bp_sib_bh;
1359        path[level].bp_sib_bh = NULL;
1360        path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1361}
1362
1363static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1364                                     struct nilfs_btree_path *path,
1365                                     int level, __u64 *keyp, __u64 *ptrp)
1366{
1367        struct nilfs_btree_node *node, *right;
1368        int n, ncblk;
1369
1370        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1371
1372        node = nilfs_btree_get_nonroot_node(path, level);
1373        right = nilfs_btree_get_sib_node(path, level);
1374        ncblk = nilfs_btree_nchildren_per_block(btree);
1375
1376        n = nilfs_btree_node_get_nchildren(right);
1377
1378        nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1379
1380        if (!buffer_dirty(path[level].bp_bh))
1381                mark_buffer_dirty(path[level].bp_bh);
1382
1383        nilfs_btnode_delete(path[level].bp_sib_bh);
1384        path[level].bp_sib_bh = NULL;
1385        path[level + 1].bp_index++;
1386}
1387
1388static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1389                               struct nilfs_btree_path *path,
1390                               int level, __u64 *keyp, __u64 *ptrp)
1391{
1392        struct nilfs_btree_node *root, *child;
1393        int n, ncblk;
1394
1395        nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1396
1397        root = nilfs_btree_get_root(btree);
1398        child = nilfs_btree_get_nonroot_node(path, level);
1399        ncblk = nilfs_btree_nchildren_per_block(btree);
1400
1401        nilfs_btree_node_delete(root, 0, NULL, NULL,
1402                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1403        nilfs_btree_node_set_level(root, level);
1404        n = nilfs_btree_node_get_nchildren(child);
1405        nilfs_btree_node_move_left(root, child, n,
1406                                   NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1407
1408        nilfs_btnode_delete(path[level].bp_bh);
1409        path[level].bp_bh = NULL;
1410}
1411
1412static void nilfs_btree_nop(struct nilfs_bmap *btree,
1413                            struct nilfs_btree_path *path,
1414                            int level, __u64 *keyp, __u64 *ptrp)
1415{
1416}
1417
1418static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1419                                      struct nilfs_btree_path *path,
1420                                      int *levelp,
1421                                      struct nilfs_bmap_stats *stats,
1422                                      struct inode *dat)
1423{
1424        struct buffer_head *bh;
1425        struct nilfs_btree_node *node, *parent, *sib;
1426        __u64 sibptr;
1427        int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1428
1429        ret = 0;
1430        stats->bs_nblocks = 0;
1431        ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1432        ncblk = nilfs_btree_nchildren_per_block(btree);
1433
1434        for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1435             level < nilfs_btree_height(btree) - 1;
1436             level++) {
1437                node = nilfs_btree_get_nonroot_node(path, level);
1438                path[level].bp_oldreq.bpr_ptr =
1439                        nilfs_btree_node_get_ptr(node, dindex, ncblk);
1440                ret = nilfs_bmap_prepare_end_ptr(btree,
1441                                                 &path[level].bp_oldreq, dat);
1442                if (ret < 0)
1443                        goto err_out_child_node;
1444
1445                if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1446                        path[level].bp_op = nilfs_btree_do_delete;
1447                        stats->bs_nblocks++;
1448                        goto out;
1449                }
1450
1451                parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1452                pindex = path[level + 1].bp_index;
1453                dindex = pindex;
1454
1455                if (pindex > 0) {
1456                        /* left sibling */
1457                        sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1458                                                          ncmax);
1459                        ret = nilfs_btree_get_block(btree, sibptr, &bh);
1460                        if (ret < 0)
1461                                goto err_out_curr_node;
1462                        sib = (struct nilfs_btree_node *)bh->b_data;
1463                        if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1464                                path[level].bp_sib_bh = bh;
1465                                path[level].bp_op = nilfs_btree_borrow_left;
1466                                stats->bs_nblocks++;
1467                                goto out;
1468                        } else {
1469                                path[level].bp_sib_bh = bh;
1470                                path[level].bp_op = nilfs_btree_concat_left;
1471                                stats->bs_nblocks++;
1472                                /* continue; */
1473                        }
1474                } else if (pindex <
1475                           nilfs_btree_node_get_nchildren(parent) - 1) {
1476                        /* right sibling */
1477                        sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1478                                                          ncmax);
1479                        ret = nilfs_btree_get_block(btree, sibptr, &bh);
1480                        if (ret < 0)
1481                                goto err_out_curr_node;
1482                        sib = (struct nilfs_btree_node *)bh->b_data;
1483                        if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1484                                path[level].bp_sib_bh = bh;
1485                                path[level].bp_op = nilfs_btree_borrow_right;
1486                                stats->bs_nblocks++;
1487                                goto out;
1488                        } else {
1489                                path[level].bp_sib_bh = bh;
1490                                path[level].bp_op = nilfs_btree_concat_right;
1491                                stats->bs_nblocks++;
1492                                /*
1493                                 * When merging right sibling node
1494                                 * into the current node, pointer to
1495                                 * the right sibling node must be
1496                                 * terminated instead.  The adjustment
1497                                 * below is required for that.
1498                                 */
1499                                dindex = pindex + 1;
1500                                /* continue; */
1501                        }
1502                } else {
1503                        /* no siblings */
1504                        /* the only child of the root node */
1505                        WARN_ON(level != nilfs_btree_height(btree) - 2);
1506                        if (nilfs_btree_node_get_nchildren(node) - 1 <=
1507                            NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1508                                path[level].bp_op = nilfs_btree_shrink;
1509                                stats->bs_nblocks += 2;
1510                                level++;
1511                                path[level].bp_op = nilfs_btree_nop;
1512                                goto shrink_root_child;
1513                        } else {
1514                                path[level].bp_op = nilfs_btree_do_delete;
1515                                stats->bs_nblocks++;
1516                                goto out;
1517                        }
1518                }
1519        }
1520
1521        /* child of the root node is deleted */
1522        path[level].bp_op = nilfs_btree_do_delete;
1523        stats->bs_nblocks++;
1524
1525shrink_root_child:
1526        node = nilfs_btree_get_root(btree);
1527        path[level].bp_oldreq.bpr_ptr =
1528                nilfs_btree_node_get_ptr(node, dindex,
1529                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1530
1531        ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1532        if (ret < 0)
1533                goto err_out_child_node;
1534
1535        /* success */
1536 out:
1537        *levelp = level;
1538        return ret;
1539
1540        /* error */
1541 err_out_curr_node:
1542        nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1543 err_out_child_node:
1544        for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1545                brelse(path[level].bp_sib_bh);
1546                nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1547        }
1548        *levelp = level;
1549        stats->bs_nblocks = 0;
1550        return ret;
1551}
1552
1553static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1554                                      struct nilfs_btree_path *path,
1555                                      int maxlevel, struct inode *dat)
1556{
1557        int level;
1558
1559        for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1560                nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1561                path[level].bp_op(btree, path, level, NULL, NULL);
1562        }
1563
1564        if (!nilfs_bmap_dirty(btree))
1565                nilfs_bmap_set_dirty(btree);
1566}
1567
1568static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1569
1570{
1571        struct nilfs_btree_path *path;
1572        struct nilfs_bmap_stats stats;
1573        struct inode *dat;
1574        int level, ret;
1575
1576        path = nilfs_btree_alloc_path();
1577        if (path == NULL)
1578                return -ENOMEM;
1579
1580        ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1581                                    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1582        if (ret < 0)
1583                goto out;
1584
1585
1586        dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1587
1588        ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1589        if (ret < 0)
1590                goto out;
1591        nilfs_btree_commit_delete(btree, path, level, dat);
1592        nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1593
1594out:
1595        nilfs_btree_free_path(path);
1596        return ret;
1597}
1598
1599static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
1600                                __u64 *keyp)
1601{
1602        struct nilfs_btree_path *path;
1603        const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
1604        int ret;
1605
1606        path = nilfs_btree_alloc_path();
1607        if (!path)
1608                return -ENOMEM;
1609
1610        ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
1611        if (!ret)
1612                *keyp = start;
1613        else if (ret == -ENOENT)
1614                ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
1615
1616        nilfs_btree_free_path(path);
1617        return ret;
1618}
1619
1620static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1621{
1622        struct nilfs_btree_path *path;
1623        int ret;
1624
1625        path = nilfs_btree_alloc_path();
1626        if (path == NULL)
1627                return -ENOMEM;
1628
1629        ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1630
1631        nilfs_btree_free_path(path);
1632
1633        return ret;
1634}
1635
1636static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1637{
1638        struct buffer_head *bh;
1639        struct nilfs_btree_node *root, *node;
1640        __u64 maxkey, nextmaxkey;
1641        __u64 ptr;
1642        int nchildren, ret;
1643
1644        root = nilfs_btree_get_root(btree);
1645        switch (nilfs_btree_height(btree)) {
1646        case 2:
1647                bh = NULL;
1648                node = root;
1649                break;
1650        case 3:
1651                nchildren = nilfs_btree_node_get_nchildren(root);
1652                if (nchildren > 1)
1653                        return 0;
1654                ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1655                                               NILFS_BTREE_ROOT_NCHILDREN_MAX);
1656                ret = nilfs_btree_get_block(btree, ptr, &bh);
1657                if (ret < 0)
1658                        return ret;
1659                node = (struct nilfs_btree_node *)bh->b_data;
1660                break;
1661        default:
1662                return 0;
1663        }
1664
1665        nchildren = nilfs_btree_node_get_nchildren(node);
1666        maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1667        nextmaxkey = (nchildren > 1) ?
1668                nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1669        if (bh != NULL)
1670                brelse(bh);
1671
1672        return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1673}
1674
1675static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1676                                   __u64 *keys, __u64 *ptrs, int nitems)
1677{
1678        struct buffer_head *bh;
1679        struct nilfs_btree_node *node, *root;
1680        __le64 *dkeys;
1681        __le64 *dptrs;
1682        __u64 ptr;
1683        int nchildren, ncmax, i, ret;
1684
1685        root = nilfs_btree_get_root(btree);
1686        switch (nilfs_btree_height(btree)) {
1687        case 2:
1688                bh = NULL;
1689                node = root;
1690                ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1691                break;
1692        case 3:
1693                nchildren = nilfs_btree_node_get_nchildren(root);
1694                WARN_ON(nchildren > 1);
1695                ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1696                                               NILFS_BTREE_ROOT_NCHILDREN_MAX);
1697                ret = nilfs_btree_get_block(btree, ptr, &bh);
1698                if (ret < 0)
1699                        return ret;
1700                node = (struct nilfs_btree_node *)bh->b_data;
1701                ncmax = nilfs_btree_nchildren_per_block(btree);
1702                break;
1703        default:
1704                node = NULL;
1705                return -EINVAL;
1706        }
1707
1708        nchildren = nilfs_btree_node_get_nchildren(node);
1709        if (nchildren < nitems)
1710                nitems = nchildren;
1711        dkeys = nilfs_btree_node_dkeys(node);
1712        dptrs = nilfs_btree_node_dptrs(node, ncmax);
1713        for (i = 0; i < nitems; i++) {
1714                keys[i] = le64_to_cpu(dkeys[i]);
1715                ptrs[i] = le64_to_cpu(dptrs[i]);
1716        }
1717
1718        if (bh != NULL)
1719                brelse(bh);
1720
1721        return nitems;
1722}
1723
1724static int
1725nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1726                                       union nilfs_bmap_ptr_req *dreq,
1727                                       union nilfs_bmap_ptr_req *nreq,
1728                                       struct buffer_head **bhp,
1729                                       struct nilfs_bmap_stats *stats)
1730{
1731        struct buffer_head *bh;
1732        struct inode *dat = NULL;
1733        int ret;
1734
1735        stats->bs_nblocks = 0;
1736
1737        /* for data */
1738        /* cannot find near ptr */
1739        if (NILFS_BMAP_USE_VBN(btree)) {
1740                dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1741                dat = nilfs_bmap_get_dat(btree);
1742        }
1743
1744        ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1745        if (ret < 0)
1746                return ret;
1747
1748        *bhp = NULL;
1749        stats->bs_nblocks++;
1750        if (nreq != NULL) {
1751                nreq->bpr_ptr = dreq->bpr_ptr + 1;
1752                ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1753                if (ret < 0)
1754                        goto err_out_dreq;
1755
1756                ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1757                if (ret < 0)
1758                        goto err_out_nreq;
1759
1760                *bhp = bh;
1761                stats->bs_nblocks++;
1762        }
1763
1764        /* success */
1765        return 0;
1766
1767        /* error */
1768 err_out_nreq:
1769        nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1770 err_out_dreq:
1771        nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1772        stats->bs_nblocks = 0;
1773        return ret;
1774
1775}
1776
1777static void
1778nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1779                                      __u64 key, __u64 ptr,
1780                                      const __u64 *keys, const __u64 *ptrs,
1781                                      int n,
1782                                      union nilfs_bmap_ptr_req *dreq,
1783                                      union nilfs_bmap_ptr_req *nreq,
1784                                      struct buffer_head *bh)
1785{
1786        struct nilfs_btree_node *node;
1787        struct inode *dat;
1788        __u64 tmpptr;
1789        int ncblk;
1790
1791        /* free resources */
1792        if (btree->b_ops->bop_clear != NULL)
1793                btree->b_ops->bop_clear(btree);
1794
1795        /* ptr must be a pointer to a buffer head. */
1796        set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1797
1798        /* convert and insert */
1799        dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1800        __nilfs_btree_init(btree);
1801        if (nreq != NULL) {
1802                nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1803                nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1804
1805                /* create child node at level 1 */
1806                node = (struct nilfs_btree_node *)bh->b_data;
1807                ncblk = nilfs_btree_nchildren_per_block(btree);
1808                nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1809                nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1810                if (!buffer_dirty(bh))
1811                        mark_buffer_dirty(bh);
1812                if (!nilfs_bmap_dirty(btree))
1813                        nilfs_bmap_set_dirty(btree);
1814
1815                brelse(bh);
1816
1817                /* create root node at level 2 */
1818                node = nilfs_btree_get_root(btree);
1819                tmpptr = nreq->bpr_ptr;
1820                nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1821                                      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1822                                      &keys[0], &tmpptr);
1823        } else {
1824                nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1825
1826                /* create root node at level 1 */
1827                node = nilfs_btree_get_root(btree);
1828                nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1829                                      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1830                                      keys, ptrs);
1831                nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1832                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
1833                if (!nilfs_bmap_dirty(btree))
1834                        nilfs_bmap_set_dirty(btree);
1835        }
1836
1837        if (NILFS_BMAP_USE_VBN(btree))
1838                nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1839}
1840
1841/**
1842 * nilfs_btree_convert_and_insert -
1843 * @bmap:
1844 * @key:
1845 * @ptr:
1846 * @keys:
1847 * @ptrs:
1848 * @n:
1849 */
1850int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1851                                   __u64 key, __u64 ptr,
1852                                   const __u64 *keys, const __u64 *ptrs, int n)
1853{
1854        struct buffer_head *bh = NULL;
1855        union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1856        struct nilfs_bmap_stats stats;
1857        int ret;
1858
1859        if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1860                di = &dreq;
1861                ni = NULL;
1862        } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1863                           1 << btree->b_inode->i_blkbits)) {
1864                di = &dreq;
1865                ni = &nreq;
1866        } else {
1867                di = NULL;
1868                ni = NULL;
1869                BUG();
1870        }
1871
1872        ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1873                                                     &stats);
1874        if (ret < 0)
1875                return ret;
1876        nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1877                                              di, ni, bh);
1878        nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1879        return 0;
1880}
1881
1882static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1883                                   struct nilfs_btree_path *path,
1884                                   int level,
1885                                   struct buffer_head *bh)
1886{
1887        while ((++level < nilfs_btree_height(btree) - 1) &&
1888               !buffer_dirty(path[level].bp_bh))
1889                mark_buffer_dirty(path[level].bp_bh);
1890
1891        return 0;
1892}
1893
1894static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1895                                        struct nilfs_btree_path *path,
1896                                        int level, struct inode *dat)
1897{
1898        struct nilfs_btree_node *parent;
1899        int ncmax, ret;
1900
1901        parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1902        path[level].bp_oldreq.bpr_ptr =
1903                nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1904                                         ncmax);
1905        path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1906        ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1907                                       &path[level].bp_newreq.bpr_req);
1908        if (ret < 0)
1909                return ret;
1910
1911        if (buffer_nilfs_node(path[level].bp_bh)) {
1912                path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1913                path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1914                path[level].bp_ctxt.bh = path[level].bp_bh;
1915                ret = nilfs_btnode_prepare_change_key(
1916                        &NILFS_BMAP_I(btree)->i_btnode_cache,
1917                        &path[level].bp_ctxt);
1918                if (ret < 0) {
1919                        nilfs_dat_abort_update(dat,
1920                                               &path[level].bp_oldreq.bpr_req,
1921                                               &path[level].bp_newreq.bpr_req);
1922                        return ret;
1923                }
1924        }
1925
1926        return 0;
1927}
1928
1929static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1930                                        struct nilfs_btree_path *path,
1931                                        int level, struct inode *dat)
1932{
1933        struct nilfs_btree_node *parent;
1934        int ncmax;
1935
1936        nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1937                                &path[level].bp_newreq.bpr_req,
1938                                btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1939
1940        if (buffer_nilfs_node(path[level].bp_bh)) {
1941                nilfs_btnode_commit_change_key(
1942                        &NILFS_BMAP_I(btree)->i_btnode_cache,
1943                        &path[level].bp_ctxt);
1944                path[level].bp_bh = path[level].bp_ctxt.bh;
1945        }
1946        set_buffer_nilfs_volatile(path[level].bp_bh);
1947
1948        parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1949        nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1950                                 path[level].bp_newreq.bpr_ptr, ncmax);
1951}
1952
1953static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1954                                       struct nilfs_btree_path *path,
1955                                       int level, struct inode *dat)
1956{
1957        nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1958                               &path[level].bp_newreq.bpr_req);
1959        if (buffer_nilfs_node(path[level].bp_bh))
1960                nilfs_btnode_abort_change_key(
1961                        &NILFS_BMAP_I(btree)->i_btnode_cache,
1962                        &path[level].bp_ctxt);
1963}
1964
1965static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1966                                           struct nilfs_btree_path *path,
1967                                           int minlevel, int *maxlevelp,
1968                                           struct inode *dat)
1969{
1970        int level, ret;
1971
1972        level = minlevel;
1973        if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1974                ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1975                if (ret < 0)
1976                        return ret;
1977        }
1978        while ((++level < nilfs_btree_height(btree) - 1) &&
1979               !buffer_dirty(path[level].bp_bh)) {
1980
1981                WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1982                ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1983                if (ret < 0)
1984                        goto out;
1985        }
1986
1987        /* success */
1988        *maxlevelp = level - 1;
1989        return 0;
1990
1991        /* error */
1992 out:
1993        while (--level > minlevel)
1994                nilfs_btree_abort_update_v(btree, path, level, dat);
1995        if (!buffer_nilfs_volatile(path[level].bp_bh))
1996                nilfs_btree_abort_update_v(btree, path, level, dat);
1997        return ret;
1998}
1999
2000static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
2001                                           struct nilfs_btree_path *path,
2002                                           int minlevel, int maxlevel,
2003                                           struct buffer_head *bh,
2004                                           struct inode *dat)
2005{
2006        int level;
2007
2008        if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
2009                nilfs_btree_commit_update_v(btree, path, minlevel, dat);
2010
2011        for (level = minlevel + 1; level <= maxlevel; level++)
2012                nilfs_btree_commit_update_v(btree, path, level, dat);
2013}
2014
2015static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
2016                                   struct nilfs_btree_path *path,
2017                                   int level, struct buffer_head *bh)
2018{
2019        int maxlevel = 0, ret;
2020        struct nilfs_btree_node *parent;
2021        struct inode *dat = nilfs_bmap_get_dat(btree);
2022        __u64 ptr;
2023        int ncmax;
2024
2025        get_bh(bh);
2026        path[level].bp_bh = bh;
2027        ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
2028                                              dat);
2029        if (ret < 0)
2030                goto out;
2031
2032        if (buffer_nilfs_volatile(path[level].bp_bh)) {
2033                parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2034                ptr = nilfs_btree_node_get_ptr(parent,
2035                                               path[level + 1].bp_index,
2036                                               ncmax);
2037                ret = nilfs_dat_mark_dirty(dat, ptr);
2038                if (ret < 0)
2039                        goto out;
2040        }
2041
2042        nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
2043
2044 out:
2045        brelse(path[level].bp_bh);
2046        path[level].bp_bh = NULL;
2047        return ret;
2048}
2049
2050static int nilfs_btree_propagate(struct nilfs_bmap *btree,
2051                                 struct buffer_head *bh)
2052{
2053        struct nilfs_btree_path *path;
2054        struct nilfs_btree_node *node;
2055        __u64 key;
2056        int level, ret;
2057
2058        WARN_ON(!buffer_dirty(bh));
2059
2060        path = nilfs_btree_alloc_path();
2061        if (path == NULL)
2062                return -ENOMEM;
2063
2064        if (buffer_nilfs_node(bh)) {
2065                node = (struct nilfs_btree_node *)bh->b_data;
2066                key = nilfs_btree_node_get_key(node, 0);
2067                level = nilfs_btree_node_get_level(node);
2068        } else {
2069                key = nilfs_bmap_data_get_key(btree, bh);
2070                level = NILFS_BTREE_LEVEL_DATA;
2071        }
2072
2073        ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2074        if (ret < 0) {
2075                if (unlikely(ret == -ENOENT))
2076                        printk(KERN_CRIT "%s: key = %llu, level == %d\n",
2077                               __func__, (unsigned long long)key, level);
2078                goto out;
2079        }
2080
2081        ret = NILFS_BMAP_USE_VBN(btree) ?
2082                nilfs_btree_propagate_v(btree, path, level, bh) :
2083                nilfs_btree_propagate_p(btree, path, level, bh);
2084
2085 out:
2086        nilfs_btree_free_path(path);
2087
2088        return ret;
2089}
2090
2091static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2092                                    struct buffer_head *bh)
2093{
2094        return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2095}
2096
2097static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2098                                         struct list_head *lists,
2099                                         struct buffer_head *bh)
2100{
2101        struct list_head *head;
2102        struct buffer_head *cbh;
2103        struct nilfs_btree_node *node, *cnode;
2104        __u64 key, ckey;
2105        int level;
2106
2107        get_bh(bh);
2108        node = (struct nilfs_btree_node *)bh->b_data;
2109        key = nilfs_btree_node_get_key(node, 0);
2110        level = nilfs_btree_node_get_level(node);
2111        if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2112            level >= NILFS_BTREE_LEVEL_MAX) {
2113                dump_stack();
2114                printk(KERN_WARNING
2115                       "%s: invalid btree level: %d (key=%llu, ino=%lu, "
2116                       "blocknr=%llu)\n",
2117                       __func__, level, (unsigned long long)key,
2118                       NILFS_BMAP_I(btree)->vfs_inode.i_ino,
2119                       (unsigned long long)bh->b_blocknr);
2120                return;
2121        }
2122
2123        list_for_each(head, &lists[level]) {
2124                cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2125                cnode = (struct nilfs_btree_node *)cbh->b_data;
2126                ckey = nilfs_btree_node_get_key(cnode, 0);
2127                if (key < ckey)
2128                        break;
2129        }
2130        list_add_tail(&bh->b_assoc_buffers, head);
2131}
2132
2133static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2134                                             struct list_head *listp)
2135{
2136        struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache;
2137        struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2138        struct pagevec pvec;
2139        struct buffer_head *bh, *head;
2140        pgoff_t index = 0;
2141        int level, i;
2142
2143        for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2144             level < NILFS_BTREE_LEVEL_MAX;
2145             level++)
2146                INIT_LIST_HEAD(&lists[level]);
2147
2148        pagevec_init(&pvec, 0);
2149
2150        while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2151                                  PAGEVEC_SIZE)) {
2152                for (i = 0; i < pagevec_count(&pvec); i++) {
2153                        bh = head = page_buffers(pvec.pages[i]);
2154                        do {
2155                                if (buffer_dirty(bh))
2156                                        nilfs_btree_add_dirty_buffer(btree,
2157                                                                     lists, bh);
2158                        } while ((bh = bh->b_this_page) != head);
2159                }
2160                pagevec_release(&pvec);
2161                cond_resched();
2162        }
2163
2164        for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2165             level < NILFS_BTREE_LEVEL_MAX;
2166             level++)
2167                list_splice_tail(&lists[level], listp);
2168}
2169
2170static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2171                                struct nilfs_btree_path *path,
2172                                int level,
2173                                struct buffer_head **bh,
2174                                sector_t blocknr,
2175                                union nilfs_binfo *binfo)
2176{
2177        struct nilfs_btree_node *parent;
2178        __u64 key;
2179        __u64 ptr;
2180        int ncmax, ret;
2181
2182        parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2183        ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2184                                       ncmax);
2185        if (buffer_nilfs_node(*bh)) {
2186                path[level].bp_ctxt.oldkey = ptr;
2187                path[level].bp_ctxt.newkey = blocknr;
2188                path[level].bp_ctxt.bh = *bh;
2189                ret = nilfs_btnode_prepare_change_key(
2190                        &NILFS_BMAP_I(btree)->i_btnode_cache,
2191                        &path[level].bp_ctxt);
2192                if (ret < 0)
2193                        return ret;
2194                nilfs_btnode_commit_change_key(
2195                        &NILFS_BMAP_I(btree)->i_btnode_cache,
2196                        &path[level].bp_ctxt);
2197                *bh = path[level].bp_ctxt.bh;
2198        }
2199
2200        nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2201                                 ncmax);
2202
2203        key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2204        /* on-disk format */
2205        binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2206        binfo->bi_dat.bi_level = level;
2207
2208        return 0;
2209}
2210
2211static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2212                                struct nilfs_btree_path *path,
2213                                int level,
2214                                struct buffer_head **bh,
2215                                sector_t blocknr,
2216                                union nilfs_binfo *binfo)
2217{
2218        struct nilfs_btree_node *parent;
2219        struct inode *dat = nilfs_bmap_get_dat(btree);
2220        __u64 key;
2221        __u64 ptr;
2222        union nilfs_bmap_ptr_req req;
2223        int ncmax, ret;
2224
2225        parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2226        ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2227                                       ncmax);
2228        req.bpr_ptr = ptr;
2229        ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2230        if (ret < 0)
2231                return ret;
2232        nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2233
2234        key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2235        /* on-disk format */
2236        binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2237        binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2238
2239        return 0;
2240}
2241
2242static int nilfs_btree_assign(struct nilfs_bmap *btree,
2243                              struct buffer_head **bh,
2244                              sector_t blocknr,
2245                              union nilfs_binfo *binfo)
2246{
2247        struct nilfs_btree_path *path;
2248        struct nilfs_btree_node *node;
2249        __u64 key;
2250        int level, ret;
2251
2252        path = nilfs_btree_alloc_path();
2253        if (path == NULL)
2254                return -ENOMEM;
2255
2256        if (buffer_nilfs_node(*bh)) {
2257                node = (struct nilfs_btree_node *)(*bh)->b_data;
2258                key = nilfs_btree_node_get_key(node, 0);
2259                level = nilfs_btree_node_get_level(node);
2260        } else {
2261                key = nilfs_bmap_data_get_key(btree, *bh);
2262                level = NILFS_BTREE_LEVEL_DATA;
2263        }
2264
2265        ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2266        if (ret < 0) {
2267                WARN_ON(ret == -ENOENT);
2268                goto out;
2269        }
2270
2271        ret = NILFS_BMAP_USE_VBN(btree) ?
2272                nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2273                nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2274
2275 out:
2276        nilfs_btree_free_path(path);
2277
2278        return ret;
2279}
2280
2281static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2282                                 struct buffer_head **bh,
2283                                 sector_t blocknr,
2284                                 union nilfs_binfo *binfo)
2285{
2286        struct nilfs_btree_node *node;
2287        __u64 key;
2288        int ret;
2289
2290        ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2291                             blocknr);
2292        if (ret < 0)
2293                return ret;
2294
2295        if (buffer_nilfs_node(*bh)) {
2296                node = (struct nilfs_btree_node *)(*bh)->b_data;
2297                key = nilfs_btree_node_get_key(node, 0);
2298        } else
2299                key = nilfs_bmap_data_get_key(btree, *bh);
2300
2301        /* on-disk format */
2302        binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2303        binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2304
2305        return 0;
2306}
2307
2308static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2309{
2310        struct buffer_head *bh;
2311        struct nilfs_btree_path *path;
2312        __u64 ptr;
2313        int ret;
2314
2315        path = nilfs_btree_alloc_path();
2316        if (path == NULL)
2317                return -ENOMEM;
2318
2319        ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2320        if (ret < 0) {
2321                WARN_ON(ret == -ENOENT);
2322                goto out;
2323        }
2324        ret = nilfs_btree_get_block(btree, ptr, &bh);
2325        if (ret < 0) {
2326                WARN_ON(ret == -ENOENT);
2327                goto out;
2328        }
2329
2330        if (!buffer_dirty(bh))
2331                mark_buffer_dirty(bh);
2332        brelse(bh);
2333        if (!nilfs_bmap_dirty(btree))
2334                nilfs_bmap_set_dirty(btree);
2335
2336 out:
2337        nilfs_btree_free_path(path);
2338        return ret;
2339}
2340
2341static const struct nilfs_bmap_operations nilfs_btree_ops = {
2342        .bop_lookup             =       nilfs_btree_lookup,
2343        .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2344        .bop_insert             =       nilfs_btree_insert,
2345        .bop_delete             =       nilfs_btree_delete,
2346        .bop_clear              =       NULL,
2347
2348        .bop_propagate          =       nilfs_btree_propagate,
2349
2350        .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2351
2352        .bop_assign             =       nilfs_btree_assign,
2353        .bop_mark               =       nilfs_btree_mark,
2354
2355        .bop_seek_key           =       nilfs_btree_seek_key,
2356        .bop_last_key           =       nilfs_btree_last_key,
2357
2358        .bop_check_insert       =       NULL,
2359        .bop_check_delete       =       nilfs_btree_check_delete,
2360        .bop_gather_data        =       nilfs_btree_gather_data,
2361};
2362
2363static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2364        .bop_lookup             =       NULL,
2365        .bop_lookup_contig      =       NULL,
2366        .bop_insert             =       NULL,
2367        .bop_delete             =       NULL,
2368        .bop_clear              =       NULL,
2369
2370        .bop_propagate          =       nilfs_btree_propagate_gc,
2371
2372        .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2373
2374        .bop_assign             =       nilfs_btree_assign_gc,
2375        .bop_mark               =       NULL,
2376
2377        .bop_seek_key           =       NULL,
2378        .bop_last_key           =       NULL,
2379
2380        .bop_check_insert       =       NULL,
2381        .bop_check_delete       =       NULL,
2382        .bop_gather_data        =       NULL,
2383};
2384
2385static void __nilfs_btree_init(struct nilfs_bmap *bmap)
2386{
2387        bmap->b_ops = &nilfs_btree_ops;
2388        bmap->b_nchildren_per_block =
2389                NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2390}
2391
2392int nilfs_btree_init(struct nilfs_bmap *bmap)
2393{
2394        int ret = 0;
2395
2396        __nilfs_btree_init(bmap);
2397
2398        if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap),
2399                                    bmap->b_inode->i_ino))
2400                ret = -EIO;
2401        return ret;
2402}
2403
2404void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2405{
2406        bmap->b_ops = &nilfs_btree_ops_gc;
2407        bmap->b_nchildren_per_block =
2408                NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2409}
2410