linux/fs/reiserfs/fix_node.c
<<
>>
Prefs
   1/*
   2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
   3 */
   4
   5/**
   6 ** old_item_num
   7 ** old_entry_num
   8 ** set_entry_sizes
   9 ** create_virtual_node
  10 ** check_left
  11 ** check_right
  12 ** directory_part_size
  13 ** get_num_ver
  14 ** set_parameters
  15 ** is_leaf_removable
  16 ** are_leaves_removable
  17 ** get_empty_nodes
  18 ** get_lfree
  19 ** get_rfree
  20 ** is_left_neighbor_in_cache
  21 ** decrement_key
  22 ** get_far_parent
  23 ** get_parents
  24 ** can_node_be_removed
  25 ** ip_check_balance
  26 ** dc_check_balance_internal
  27 ** dc_check_balance_leaf
  28 ** dc_check_balance
  29 ** check_balance
  30 ** get_direct_parent
  31 ** get_neighbors
  32 ** fix_nodes
  33 **
  34 **
  35 **/
  36
  37#include <linux/time.h>
  38#include <linux/string.h>
  39#include <linux/reiserfs_fs.h>
  40#include <linux/buffer_head.h>
  41
  42/* To make any changes in the tree we find a node, that contains item
  43   to be changed/deleted or position in the node we insert a new item
  44   to. We call this node S. To do balancing we need to decide what we
  45   will shift to left/right neighbor, or to a new node, where new item
  46   will be etc. To make this analysis simpler we build virtual
  47   node. Virtual node is an array of items, that will replace items of
  48   node S. (For instance if we are going to delete an item, virtual
  49   node does not contain it). Virtual node keeps information about
  50   item sizes and types, mergeability of first and last items, sizes
  51   of all entries in directory item. We use this array of items when
  52   calculating what we can shift to neighbors and how many nodes we
  53   have to have if we do not any shiftings, if we shift to left/right
  54   neighbor or to both. */
  55
  56/* taking item number in virtual node, returns number of item, that it has in source buffer */
  57static inline int old_item_num(int new_num, int affected_item_num, int mode)
  58{
  59        if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num)
  60                return new_num;
  61
  62        if (mode == M_INSERT) {
  63
  64                RFALSE(new_num == 0,
  65                       "vs-8005: for INSERT mode and item number of inserted item");
  66
  67                return new_num - 1;
  68        }
  69
  70        RFALSE(mode != M_DELETE,
  71               "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'",
  72               mode);
  73        /* delete mode */
  74        return new_num + 1;
  75}
  76
  77static void create_virtual_node(struct tree_balance *tb, int h)
  78{
  79        struct item_head *ih;
  80        struct virtual_node *vn = tb->tb_vn;
  81        int new_num;
  82        struct buffer_head *Sh; /* this comes from tb->S[h] */
  83
  84        Sh = PATH_H_PBUFFER(tb->tb_path, h);
  85
  86        /* size of changed node */
  87        vn->vn_size =
  88            MAX_CHILD_SIZE(Sh) - B_FREE_SPACE(Sh) + tb->insert_size[h];
  89
  90        /* for internal nodes array if virtual items is not created */
  91        if (h) {
  92                vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE);
  93                return;
  94        }
  95
  96        /* number of items in virtual node  */
  97        vn->vn_nr_item =
  98            B_NR_ITEMS(Sh) + ((vn->vn_mode == M_INSERT) ? 1 : 0) -
  99            ((vn->vn_mode == M_DELETE) ? 1 : 0);
 100
 101        /* first virtual item */
 102        vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1);
 103        memset(vn->vn_vi, 0, vn->vn_nr_item * sizeof(struct virtual_item));
 104        vn->vn_free_ptr += vn->vn_nr_item * sizeof(struct virtual_item);
 105
 106        /* first item in the node */
 107        ih = B_N_PITEM_HEAD(Sh, 0);
 108
 109        /* define the mergeability for 0-th item (if it is not being deleted) */
 110        if (op_is_left_mergeable(&(ih->ih_key), Sh->b_size)
 111            && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num))
 112                vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE;
 113
 114        /* go through all items those remain in the virtual node (except for the new (inserted) one) */
 115        for (new_num = 0; new_num < vn->vn_nr_item; new_num++) {
 116                int j;
 117                struct virtual_item *vi = vn->vn_vi + new_num;
 118                int is_affected =
 119                    ((new_num != vn->vn_affected_item_num) ? 0 : 1);
 120
 121                if (is_affected && vn->vn_mode == M_INSERT)
 122                        continue;
 123
 124                /* get item number in source node */
 125                j = old_item_num(new_num, vn->vn_affected_item_num,
 126                                 vn->vn_mode);
 127
 128                vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE;
 129                vi->vi_ih = ih + j;
 130                vi->vi_item = B_I_PITEM(Sh, ih + j);
 131                vi->vi_uarea = vn->vn_free_ptr;
 132
 133                // FIXME: there is no check, that item operation did not
 134                // consume too much memory
 135                vn->vn_free_ptr +=
 136                    op_create_vi(vn, vi, is_affected, tb->insert_size[0]);
 137                if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr)
 138                        reiserfs_panic(tb->tb_sb, "vs-8030",
 139                                       "virtual node space consumed");
 140
 141                if (!is_affected)
 142                        /* this is not being changed */
 143                        continue;
 144
 145                if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) {
 146                        vn->vn_vi[new_num].vi_item_len += tb->insert_size[0];
 147                        vi->vi_new_data = vn->vn_data;  // pointer to data which is going to be pasted
 148                }
 149        }
 150
 151        /* virtual inserted item is not defined yet */
 152        if (vn->vn_mode == M_INSERT) {
 153                struct virtual_item *vi = vn->vn_vi + vn->vn_affected_item_num;
 154
 155                RFALSE(vn->vn_ins_ih == NULL,
 156                       "vs-8040: item header of inserted item is not specified");
 157                vi->vi_item_len = tb->insert_size[0];
 158                vi->vi_ih = vn->vn_ins_ih;
 159                vi->vi_item = vn->vn_data;
 160                vi->vi_uarea = vn->vn_free_ptr;
 161
 162                op_create_vi(vn, vi, 0 /*not pasted or cut */ ,
 163                             tb->insert_size[0]);
 164        }
 165
 166        /* set right merge flag we take right delimiting key and check whether it is a mergeable item */
 167        if (tb->CFR[0]) {
 168                struct reiserfs_key *key;
 169
 170                key = B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]);
 171                if (op_is_left_mergeable(key, Sh->b_size)
 172                    && (vn->vn_mode != M_DELETE
 173                        || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1))
 174                        vn->vn_vi[vn->vn_nr_item - 1].vi_type |=
 175                            VI_TYPE_RIGHT_MERGEABLE;
 176
 177#ifdef CONFIG_REISERFS_CHECK
 178                if (op_is_left_mergeable(key, Sh->b_size) &&
 179                    !(vn->vn_mode != M_DELETE
 180                      || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) {
 181                        /* we delete last item and it could be merged with right neighbor's first item */
 182                        if (!
 183                            (B_NR_ITEMS(Sh) == 1
 184                             && is_direntry_le_ih(B_N_PITEM_HEAD(Sh, 0))
 185                             && I_ENTRY_COUNT(B_N_PITEM_HEAD(Sh, 0)) == 1)) {
 186                                /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */
 187                                print_block(Sh, 0, -1, -1);
 188                                reiserfs_panic(tb->tb_sb, "vs-8045",
 189                                               "rdkey %k, affected item==%d "
 190                                               "(mode==%c) Must be %c",
 191                                               key, vn->vn_affected_item_num,
 192                                               vn->vn_mode, M_DELETE);
 193                        }
 194                }
 195#endif
 196
 197        }
 198}
 199
 200/* using virtual node check, how many items can be shifted to left
 201   neighbor */
 202static void check_left(struct tree_balance *tb, int h, int cur_free)
 203{
 204        int i;
 205        struct virtual_node *vn = tb->tb_vn;
 206        struct virtual_item *vi;
 207        int d_size, ih_size;
 208
 209        RFALSE(cur_free < 0, "vs-8050: cur_free (%d) < 0", cur_free);
 210
 211        /* internal level */
 212        if (h > 0) {
 213                tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
 214                return;
 215        }
 216
 217        /* leaf level */
 218
 219        if (!cur_free || !vn->vn_nr_item) {
 220                /* no free space or nothing to move */
 221                tb->lnum[h] = 0;
 222                tb->lbytes = -1;
 223                return;
 224        }
 225
 226        RFALSE(!PATH_H_PPARENT(tb->tb_path, 0),
 227               "vs-8055: parent does not exist or invalid");
 228
 229        vi = vn->vn_vi;
 230        if ((unsigned int)cur_free >=
 231            (vn->vn_size -
 232             ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) {
 233                /* all contents of S[0] fits into L[0] */
 234
 235                RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
 236                       "vs-8055: invalid mode or balance condition failed");
 237
 238                tb->lnum[0] = vn->vn_nr_item;
 239                tb->lbytes = -1;
 240                return;
 241        }
 242
 243        d_size = 0, ih_size = IH_SIZE;
 244
 245        /* first item may be merge with last item in left neighbor */
 246        if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE)
 247                d_size = -((int)IH_SIZE), ih_size = 0;
 248
 249        tb->lnum[0] = 0;
 250        for (i = 0; i < vn->vn_nr_item;
 251             i++, ih_size = IH_SIZE, d_size = 0, vi++) {
 252                d_size += vi->vi_item_len;
 253                if (cur_free >= d_size) {
 254                        /* the item can be shifted entirely */
 255                        cur_free -= d_size;
 256                        tb->lnum[0]++;
 257                        continue;
 258                }
 259
 260                /* the item cannot be shifted entirely, try to split it */
 261                /* check whether L[0] can hold ih and at least one byte of the item body */
 262                if (cur_free <= ih_size) {
 263                        /* cannot shift even a part of the current item */
 264                        tb->lbytes = -1;
 265                        return;
 266                }
 267                cur_free -= ih_size;
 268
 269                tb->lbytes = op_check_left(vi, cur_free, 0, 0);
 270                if (tb->lbytes != -1)
 271                        /* count partially shifted item */
 272                        tb->lnum[0]++;
 273
 274                break;
 275        }
 276
 277        return;
 278}
 279
 280/* using virtual node check, how many items can be shifted to right
 281   neighbor */
 282static void check_right(struct tree_balance *tb, int h, int cur_free)
 283{
 284        int i;
 285        struct virtual_node *vn = tb->tb_vn;
 286        struct virtual_item *vi;
 287        int d_size, ih_size;
 288
 289        RFALSE(cur_free < 0, "vs-8070: cur_free < 0");
 290
 291        /* internal level */
 292        if (h > 0) {
 293                tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
 294                return;
 295        }
 296
 297        /* leaf level */
 298
 299        if (!cur_free || !vn->vn_nr_item) {
 300                /* no free space  */
 301                tb->rnum[h] = 0;
 302                tb->rbytes = -1;
 303                return;
 304        }
 305
 306        RFALSE(!PATH_H_PPARENT(tb->tb_path, 0),
 307               "vs-8075: parent does not exist or invalid");
 308
 309        vi = vn->vn_vi + vn->vn_nr_item - 1;
 310        if ((unsigned int)cur_free >=
 311            (vn->vn_size -
 312             ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) {
 313                /* all contents of S[0] fits into R[0] */
 314
 315                RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
 316                       "vs-8080: invalid mode or balance condition failed");
 317
 318                tb->rnum[h] = vn->vn_nr_item;
 319                tb->rbytes = -1;
 320                return;
 321        }
 322
 323        d_size = 0, ih_size = IH_SIZE;
 324
 325        /* last item may be merge with first item in right neighbor */
 326        if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE)
 327                d_size = -(int)IH_SIZE, ih_size = 0;
 328
 329        tb->rnum[0] = 0;
 330        for (i = vn->vn_nr_item - 1; i >= 0;
 331             i--, d_size = 0, ih_size = IH_SIZE, vi--) {
 332                d_size += vi->vi_item_len;
 333                if (cur_free >= d_size) {
 334                        /* the item can be shifted entirely */
 335                        cur_free -= d_size;
 336                        tb->rnum[0]++;
 337                        continue;
 338                }
 339
 340                /* check whether R[0] can hold ih and at least one byte of the item body */
 341                if (cur_free <= ih_size) {      /* cannot shift even a part of the current item */
 342                        tb->rbytes = -1;
 343                        return;
 344                }
 345
 346                /* R[0] can hold the header of the item and at least one byte of its body */
 347                cur_free -= ih_size;    /* cur_free is still > 0 */
 348
 349                tb->rbytes = op_check_right(vi, cur_free);
 350                if (tb->rbytes != -1)
 351                        /* count partially shifted item */
 352                        tb->rnum[0]++;
 353
 354                break;
 355        }
 356
 357        return;
 358}
 359
 360/*
 361 * from - number of items, which are shifted to left neighbor entirely
 362 * to - number of item, which are shifted to right neighbor entirely
 363 * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor
 364 * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */
 365static int get_num_ver(int mode, struct tree_balance *tb, int h,
 366                       int from, int from_bytes,
 367                       int to, int to_bytes, short *snum012, int flow)
 368{
 369        int i;
 370        int cur_free;
 371        //    int bytes;
 372        int units;
 373        struct virtual_node *vn = tb->tb_vn;
 374        //    struct virtual_item * vi;
 375
 376        int total_node_size, max_node_size, current_item_size;
 377        int needed_nodes;
 378        int start_item,         /* position of item we start filling node from */
 379         end_item,              /* position of item we finish filling node by */
 380         start_bytes,           /* number of first bytes (entries for directory) of start_item-th item
 381                                   we do not include into node that is being filled */
 382         end_bytes;             /* number of last bytes (entries for directory) of end_item-th item
 383                                   we do node include into node that is being filled */
 384        int split_item_positions[2];    /* these are positions in virtual item of
 385                                           items, that are split between S[0] and
 386                                           S1new and S1new and S2new */
 387
 388        split_item_positions[0] = -1;
 389        split_item_positions[1] = -1;
 390
 391        /* We only create additional nodes if we are in insert or paste mode
 392           or we are in replace mode at the internal level. If h is 0 and
 393           the mode is M_REPLACE then in fix_nodes we change the mode to
 394           paste or insert before we get here in the code.  */
 395        RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE),
 396               "vs-8100: insert_size < 0 in overflow");
 397
 398        max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h));
 399
 400        /* snum012 [0-2] - number of items, that lay
 401           to S[0], first new node and second new node */
 402        snum012[3] = -1;        /* s1bytes */
 403        snum012[4] = -1;        /* s2bytes */
 404
 405        /* internal level */
 406        if (h > 0) {
 407                i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE);
 408                if (i == max_node_size)
 409                        return 1;
 410                return (i / max_node_size + 1);
 411        }
 412
 413        /* leaf level */
 414        needed_nodes = 1;
 415        total_node_size = 0;
 416        cur_free = max_node_size;
 417
 418        // start from 'from'-th item
 419        start_item = from;
 420        // skip its first 'start_bytes' units
 421        start_bytes = ((from_bytes != -1) ? from_bytes : 0);
 422
 423        // last included item is the 'end_item'-th one
 424        end_item = vn->vn_nr_item - to - 1;
 425        // do not count last 'end_bytes' units of 'end_item'-th item
 426        end_bytes = (to_bytes != -1) ? to_bytes : 0;
 427
 428        /* go through all item beginning from the start_item-th item and ending by
 429           the end_item-th item. Do not count first 'start_bytes' units of
 430           'start_item'-th item and last 'end_bytes' of 'end_item'-th item */
 431
 432        for (i = start_item; i <= end_item; i++) {
 433                struct virtual_item *vi = vn->vn_vi + i;
 434                int skip_from_end = ((i == end_item) ? end_bytes : 0);
 435
 436                RFALSE(needed_nodes > 3, "vs-8105: too many nodes are needed");
 437
 438                /* get size of current item */
 439                current_item_size = vi->vi_item_len;
 440
 441                /* do not take in calculation head part (from_bytes) of from-th item */
 442                current_item_size -=
 443                    op_part_size(vi, 0 /*from start */ , start_bytes);
 444
 445                /* do not take in calculation tail part of last item */
 446                current_item_size -=
 447                    op_part_size(vi, 1 /*from end */ , skip_from_end);
 448
 449                /* if item fits into current node entierly */
 450                if (total_node_size + current_item_size <= max_node_size) {
 451                        snum012[needed_nodes - 1]++;
 452                        total_node_size += current_item_size;
 453                        start_bytes = 0;
 454                        continue;
 455                }
 456
 457                if (current_item_size > max_node_size) {
 458                        /* virtual item length is longer, than max size of item in
 459                           a node. It is impossible for direct item */
 460                        RFALSE(is_direct_le_ih(vi->vi_ih),
 461                               "vs-8110: "
 462                               "direct item length is %d. It can not be longer than %d",
 463                               current_item_size, max_node_size);
 464                        /* we will try to split it */
 465                        flow = 1;
 466                }
 467
 468                if (!flow) {
 469                        /* as we do not split items, take new node and continue */
 470                        needed_nodes++;
 471                        i--;
 472                        total_node_size = 0;
 473                        continue;
 474                }
 475                // calculate number of item units which fit into node being
 476                // filled
 477                {
 478                        int free_space;
 479
 480                        free_space = max_node_size - total_node_size - IH_SIZE;
 481                        units =
 482                            op_check_left(vi, free_space, start_bytes,
 483                                          skip_from_end);
 484                        if (units == -1) {
 485                                /* nothing fits into current node, take new node and continue */
 486                                needed_nodes++, i--, total_node_size = 0;
 487                                continue;
 488                        }
 489                }
 490
 491                /* something fits into the current node */
 492                //if (snum012[3] != -1 || needed_nodes != 1)
 493                //  reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required");
 494                //snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units;
 495                start_bytes += units;
 496                snum012[needed_nodes - 1 + 3] = units;
 497
 498                if (needed_nodes > 2)
 499                        reiserfs_warning(tb->tb_sb, "vs-8111",
 500                                         "split_item_position is out of range");
 501                snum012[needed_nodes - 1]++;
 502                split_item_positions[needed_nodes - 1] = i;
 503                needed_nodes++;
 504                /* continue from the same item with start_bytes != -1 */
 505                start_item = i;
 506                i--;
 507                total_node_size = 0;
 508        }
 509
 510        // sum012[4] (if it is not -1) contains number of units of which
 511        // are to be in S1new, snum012[3] - to be in S0. They are supposed
 512        // to be S1bytes and S2bytes correspondingly, so recalculate
 513        if (snum012[4] > 0) {
 514                int split_item_num;
 515                int bytes_to_r, bytes_to_l;
 516                int bytes_to_S1new;
 517
 518                split_item_num = split_item_positions[1];
 519                bytes_to_l =
 520                    ((from == split_item_num
 521                      && from_bytes != -1) ? from_bytes : 0);
 522                bytes_to_r =
 523                    ((end_item == split_item_num
 524                      && end_bytes != -1) ? end_bytes : 0);
 525                bytes_to_S1new =
 526                    ((split_item_positions[0] ==
 527                      split_item_positions[1]) ? snum012[3] : 0);
 528
 529                // s2bytes
 530                snum012[4] =
 531                    op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] -
 532                    bytes_to_r - bytes_to_l - bytes_to_S1new;
 533
 534                if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY &&
 535                    vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT)
 536                        reiserfs_warning(tb->tb_sb, "vs-8115",
 537                                         "not directory or indirect item");
 538        }
 539
 540        /* now we know S2bytes, calculate S1bytes */
 541        if (snum012[3] > 0) {
 542                int split_item_num;
 543                int bytes_to_r, bytes_to_l;
 544                int bytes_to_S2new;
 545
 546                split_item_num = split_item_positions[0];
 547                bytes_to_l =
 548                    ((from == split_item_num
 549                      && from_bytes != -1) ? from_bytes : 0);
 550                bytes_to_r =
 551                    ((end_item == split_item_num
 552                      && end_bytes != -1) ? end_bytes : 0);
 553                bytes_to_S2new =
 554                    ((split_item_positions[0] == split_item_positions[1]
 555                      && snum012[4] != -1) ? snum012[4] : 0);
 556
 557                // s1bytes
 558                snum012[3] =
 559                    op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] -
 560                    bytes_to_r - bytes_to_l - bytes_to_S2new;
 561        }
 562
 563        return needed_nodes;
 564}
 565
 566#ifdef CONFIG_REISERFS_CHECK
 567extern struct tree_balance *cur_tb;
 568#endif
 569
 570/* Set parameters for balancing.
 571 * Performs write of results of analysis of balancing into structure tb,
 572 * where it will later be used by the functions that actually do the balancing.
 573 * Parameters:
 574 *      tb      tree_balance structure;
 575 *      h       current level of the node;
 576 *      lnum    number of items from S[h] that must be shifted to L[h];
 577 *      rnum    number of items from S[h] that must be shifted to R[h];
 578 *      blk_num number of blocks that S[h] will be splitted into;
 579 *      s012    number of items that fall into splitted nodes.
 580 *      lbytes  number of bytes which flow to the left neighbor from the item that is not
 581 *              not shifted entirely
 582 *      rbytes  number of bytes which flow to the right neighbor from the item that is not
 583 *              not shifted entirely
 584 *      s1bytes number of bytes which flow to the first  new node when S[0] splits (this number is contained in s012 array)
 585 */
 586
 587static void set_parameters(struct tree_balance *tb, int h, int lnum,
 588                           int rnum, int blk_num, short *s012, int lb, int rb)
 589{
 590
 591        tb->lnum[h] = lnum;
 592        tb->rnum[h] = rnum;
 593        tb->blknum[h] = blk_num;
 594
 595        if (h == 0) {           /* only for leaf level */
 596                if (s012 != NULL) {
 597                        tb->s0num = *s012++,
 598                            tb->s1num = *s012++, tb->s2num = *s012++;
 599                        tb->s1bytes = *s012++;
 600                        tb->s2bytes = *s012;
 601                }
 602                tb->lbytes = lb;
 603                tb->rbytes = rb;
 604        }
 605        PROC_INFO_ADD(tb->tb_sb, lnum[h], lnum);
 606        PROC_INFO_ADD(tb->tb_sb, rnum[h], rnum);
 607
 608        PROC_INFO_ADD(tb->tb_sb, lbytes[h], lb);
 609        PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb);
 610}
 611
 612/* check, does node disappear if we shift tb->lnum[0] items to left
 613   neighbor and tb->rnum[0] to the right one. */
 614static int is_leaf_removable(struct tree_balance *tb)
 615{
 616        struct virtual_node *vn = tb->tb_vn;
 617        int to_left, to_right;
 618        int size;
 619        int remain_items;
 620
 621        /* number of items, that will be shifted to left (right) neighbor
 622           entirely */
 623        to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0);
 624        to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0);
 625        remain_items = vn->vn_nr_item;
 626
 627        /* how many items remain in S[0] after shiftings to neighbors */
 628        remain_items -= (to_left + to_right);
 629
 630        if (remain_items < 1) {
 631                /* all content of node can be shifted to neighbors */
 632                set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0,
 633                               NULL, -1, -1);
 634                return 1;
 635        }
 636
 637        if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1)
 638                /* S[0] is not removable */
 639                return 0;
 640
 641        /* check, whether we can divide 1 remaining item between neighbors */
 642
 643        /* get size of remaining item (in item units) */
 644        size = op_unit_num(&(vn->vn_vi[to_left]));
 645
 646        if (tb->lbytes + tb->rbytes >= size) {
 647                set_parameters(tb, 0, to_left + 1, to_right + 1, 0, NULL,
 648                               tb->lbytes, -1);
 649                return 1;
 650        }
 651
 652        return 0;
 653}
 654
 655/* check whether L, S, R can be joined in one node */
 656static int are_leaves_removable(struct tree_balance *tb, int lfree, int rfree)
 657{
 658        struct virtual_node *vn = tb->tb_vn;
 659        int ih_size;
 660        struct buffer_head *S0;
 661
 662        S0 = PATH_H_PBUFFER(tb->tb_path, 0);
 663
 664        ih_size = 0;
 665        if (vn->vn_nr_item) {
 666                if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE)
 667                        ih_size += IH_SIZE;
 668
 669                if (vn->vn_vi[vn->vn_nr_item - 1].
 670                    vi_type & VI_TYPE_RIGHT_MERGEABLE)
 671                        ih_size += IH_SIZE;
 672        } else {
 673                /* there was only one item and it will be deleted */
 674                struct item_head *ih;
 675
 676                RFALSE(B_NR_ITEMS(S0) != 1,
 677                       "vs-8125: item number must be 1: it is %d",
 678                       B_NR_ITEMS(S0));
 679
 680                ih = B_N_PITEM_HEAD(S0, 0);
 681                if (tb->CFR[0]
 682                    && !comp_short_le_keys(&(ih->ih_key),
 683                                           B_N_PDELIM_KEY(tb->CFR[0],
 684                                                          tb->rkey[0])))
 685                        if (is_direntry_le_ih(ih)) {
 686                                /* Directory must be in correct state here: that is
 687                                   somewhere at the left side should exist first directory
 688                                   item. But the item being deleted can not be that first
 689                                   one because its right neighbor is item of the same
 690                                   directory. (But first item always gets deleted in last
 691                                   turn). So, neighbors of deleted item can be merged, so
 692                                   we can save ih_size */
 693                                ih_size = IH_SIZE;
 694
 695                                /* we might check that left neighbor exists and is of the
 696                                   same directory */
 697                                RFALSE(le_ih_k_offset(ih) == DOT_OFFSET,
 698                                       "vs-8130: first directory item can not be removed until directory is not empty");
 699                        }
 700
 701        }
 702
 703        if (MAX_CHILD_SIZE(S0) + vn->vn_size <= rfree + lfree + ih_size) {
 704                set_parameters(tb, 0, -1, -1, -1, NULL, -1, -1);
 705                PROC_INFO_INC(tb->tb_sb, leaves_removable);
 706                return 1;
 707        }
 708        return 0;
 709
 710}
 711
 712/* when we do not split item, lnum and rnum are numbers of entire items */
 713#define SET_PAR_SHIFT_LEFT \
 714if (h)\
 715{\
 716   int to_l;\
 717   \
 718   to_l = (MAX_NR_KEY(Sh)+1 - lpar + vn->vn_nr_item + 1) / 2 -\
 719              (MAX_NR_KEY(Sh) + 1 - lpar);\
 720              \
 721              set_parameters (tb, h, to_l, 0, lnver, NULL, -1, -1);\
 722}\
 723else \
 724{\
 725   if (lset==LEFT_SHIFT_FLOW)\
 726     set_parameters (tb, h, lpar, 0, lnver, snum012+lset,\
 727                     tb->lbytes, -1);\
 728   else\
 729     set_parameters (tb, h, lpar - (tb->lbytes!=-1), 0, lnver, snum012+lset,\
 730                     -1, -1);\
 731}
 732
 733#define SET_PAR_SHIFT_RIGHT \
 734if (h)\
 735{\
 736   int to_r;\
 737   \
 738   to_r = (MAX_NR_KEY(Sh)+1 - rpar + vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - rpar);\
 739   \
 740   set_parameters (tb, h, 0, to_r, rnver, NULL, -1, -1);\
 741}\
 742else \
 743{\
 744   if (rset==RIGHT_SHIFT_FLOW)\
 745     set_parameters (tb, h, 0, rpar, rnver, snum012+rset,\
 746                  -1, tb->rbytes);\
 747   else\
 748     set_parameters (tb, h, 0, rpar - (tb->rbytes!=-1), rnver, snum012+rset,\
 749                  -1, -1);\
 750}
 751
 752static void free_buffers_in_tb(struct tree_balance *tb)
 753{
 754        int i;
 755
 756        pathrelse(tb->tb_path);
 757
 758        for (i = 0; i < MAX_HEIGHT; i++) {
 759                brelse(tb->L[i]);
 760                brelse(tb->R[i]);
 761                brelse(tb->FL[i]);
 762                brelse(tb->FR[i]);
 763                brelse(tb->CFL[i]);
 764                brelse(tb->CFR[i]);
 765
 766                tb->L[i] = NULL;
 767                tb->R[i] = NULL;
 768                tb->FL[i] = NULL;
 769                tb->FR[i] = NULL;
 770                tb->CFL[i] = NULL;
 771                tb->CFR[i] = NULL;
 772        }
 773}
 774
 775/* Get new buffers for storing new nodes that are created while balancing.
 776 * Returns:     SCHEDULE_OCCURRED - schedule occurred while the function worked;
 777 *              CARRY_ON - schedule didn't occur while the function worked;
 778 *              NO_DISK_SPACE - no disk space.
 779 */
 780/* The function is NOT SCHEDULE-SAFE! */
 781static int get_empty_nodes(struct tree_balance *tb, int h)
 782{
 783        struct buffer_head *new_bh,
 784            *Sh = PATH_H_PBUFFER(tb->tb_path, h);
 785        b_blocknr_t *blocknr, blocknrs[MAX_AMOUNT_NEEDED] = { 0, };
 786        int counter, number_of_freeblk, amount_needed,  /* number of needed empty blocks */
 787         retval = CARRY_ON;
 788        struct super_block *sb = tb->tb_sb;
 789
 790        /* number_of_freeblk is the number of empty blocks which have been
 791           acquired for use by the balancing algorithm minus the number of
 792           empty blocks used in the previous levels of the analysis,
 793           number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs
 794           after empty blocks are acquired, and the balancing analysis is
 795           then restarted, amount_needed is the number needed by this level
 796           (h) of the balancing analysis.
 797
 798           Note that for systems with many processes writing, it would be
 799           more layout optimal to calculate the total number needed by all
 800           levels and then to run reiserfs_new_blocks to get all of them at once.  */
 801
 802        /* Initiate number_of_freeblk to the amount acquired prior to the restart of
 803           the analysis or 0 if not restarted, then subtract the amount needed
 804           by all of the levels of the tree below h. */
 805        /* blknum includes S[h], so we subtract 1 in this calculation */
 806        for (counter = 0, number_of_freeblk = tb->cur_blknum;
 807             counter < h; counter++)
 808                number_of_freeblk -=
 809                    (tb->blknum[counter]) ? (tb->blknum[counter] -
 810                                                   1) : 0;
 811
 812        /* Allocate missing empty blocks. */
 813        /* if Sh == 0  then we are getting a new root */
 814        amount_needed = (Sh) ? (tb->blknum[h] - 1) : 1;
 815        /*  Amount_needed = the amount that we need more than the amount that we have. */
 816        if (amount_needed > number_of_freeblk)
 817                amount_needed -= number_of_freeblk;
 818        else                    /* If we have enough already then there is nothing to do. */
 819                return CARRY_ON;
 820
 821        /* No need to check quota - is not allocated for blocks used for formatted nodes */
 822        if (reiserfs_new_form_blocknrs(tb, blocknrs,
 823                                       amount_needed) == NO_DISK_SPACE)
 824                return NO_DISK_SPACE;
 825
 826        /* for each blocknumber we just got, get a buffer and stick it on FEB */
 827        for (blocknr = blocknrs, counter = 0;
 828             counter < amount_needed; blocknr++, counter++) {
 829
 830                RFALSE(!*blocknr,
 831                       "PAP-8135: reiserfs_new_blocknrs failed when got new blocks");
 832
 833                new_bh = sb_getblk(sb, *blocknr);
 834                RFALSE(buffer_dirty(new_bh) ||
 835                       buffer_journaled(new_bh) ||
 836                       buffer_journal_dirty(new_bh),
 837                       "PAP-8140: journlaled or dirty buffer %b for the new block",
 838                       new_bh);
 839
 840                /* Put empty buffers into the array. */
 841                RFALSE(tb->FEB[tb->cur_blknum],
 842                       "PAP-8141: busy slot for new buffer");
 843
 844                set_buffer_journal_new(new_bh);
 845                tb->FEB[tb->cur_blknum++] = new_bh;
 846        }
 847
 848        if (retval == CARRY_ON && FILESYSTEM_CHANGED_TB(tb))
 849                retval = REPEAT_SEARCH;
 850
 851        return retval;
 852}
 853
 854/* Get free space of the left neighbor, which is stored in the parent
 855 * node of the left neighbor.  */
 856static int get_lfree(struct tree_balance *tb, int h)
 857{
 858        struct buffer_head *l, *f;
 859        int order;
 860
 861        if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL ||
 862            (l = tb->FL[h]) == NULL)
 863                return 0;
 864
 865        if (f == l)
 866                order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) - 1;
 867        else {
 868                order = B_NR_ITEMS(l);
 869                f = l;
 870        }
 871
 872        return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
 873}
 874
 875/* Get free space of the right neighbor,
 876 * which is stored in the parent node of the right neighbor.
 877 */
 878static int get_rfree(struct tree_balance *tb, int h)
 879{
 880        struct buffer_head *r, *f;
 881        int order;
 882
 883        if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL ||
 884            (r = tb->FR[h]) == NULL)
 885                return 0;
 886
 887        if (f == r)
 888                order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) + 1;
 889        else {
 890                order = 0;
 891                f = r;
 892        }
 893
 894        return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
 895
 896}
 897
 898/* Check whether left neighbor is in memory. */
 899static int is_left_neighbor_in_cache(struct tree_balance *tb, int h)
 900{
 901        struct buffer_head *father, *left;
 902        struct super_block *sb = tb->tb_sb;
 903        b_blocknr_t left_neighbor_blocknr;
 904        int left_neighbor_position;
 905
 906        /* Father of the left neighbor does not exist. */
 907        if (!tb->FL[h])
 908                return 0;
 909
 910        /* Calculate father of the node to be balanced. */
 911        father = PATH_H_PBUFFER(tb->tb_path, h + 1);
 912
 913        RFALSE(!father ||
 914               !B_IS_IN_TREE(father) ||
 915               !B_IS_IN_TREE(tb->FL[h]) ||
 916               !buffer_uptodate(father) ||
 917               !buffer_uptodate(tb->FL[h]),
 918               "vs-8165: F[h] (%b) or FL[h] (%b) is invalid",
 919               father, tb->FL[h]);
 920
 921        /* Get position of the pointer to the left neighbor into the left father. */
 922        left_neighbor_position = (father == tb->FL[h]) ?
 923            tb->lkey[h] : B_NR_ITEMS(tb->FL[h]);
 924        /* Get left neighbor block number. */
 925        left_neighbor_blocknr =
 926            B_N_CHILD_NUM(tb->FL[h], left_neighbor_position);
 927        /* Look for the left neighbor in the cache. */
 928        if ((left = sb_find_get_block(sb, left_neighbor_blocknr))) {
 929
 930                RFALSE(buffer_uptodate(left) && !B_IS_IN_TREE(left),
 931                       "vs-8170: left neighbor (%b %z) is not in the tree",
 932                       left, left);
 933                put_bh(left);
 934                return 1;
 935        }
 936
 937        return 0;
 938}
 939
 940#define LEFT_PARENTS  'l'
 941#define RIGHT_PARENTS 'r'
 942
 943static void decrement_key(struct cpu_key *key)
 944{
 945        // call item specific function for this key
 946        item_ops[cpu_key_k_type(key)]->decrement_key(key);
 947}
 948
 949/* Calculate far left/right parent of the left/right neighbor of the current node, that
 950 * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h].
 951 * Calculate left/right common parent of the current node and L[h]/R[h].
 952 * Calculate left/right delimiting key position.
 953 * Returns:     PATH_INCORRECT   - path in the tree is not correct;
 954                SCHEDULE_OCCURRED - schedule occurred while the function worked;
 955 *              CARRY_ON         - schedule didn't occur while the function worked;
 956 */
 957static int get_far_parent(struct tree_balance *tb,
 958                          int h,
 959                          struct buffer_head **pfather,
 960                          struct buffer_head **pcom_father, char c_lr_par)
 961{
 962        struct buffer_head *parent;
 963        INITIALIZE_PATH(s_path_to_neighbor_father);
 964        struct treepath *path = tb->tb_path;
 965        struct cpu_key s_lr_father_key;
 966        int counter,
 967            position = INT_MAX,
 968            first_last_position = 0,
 969            path_offset = PATH_H_PATH_OFFSET(path, h);
 970
 971        /* Starting from F[h] go upwards in the tree, and look for the common
 972           ancestor of F[h], and its neighbor l/r, that should be obtained. */
 973
 974        counter = path_offset;
 975
 976        RFALSE(counter < FIRST_PATH_ELEMENT_OFFSET,
 977               "PAP-8180: invalid path length");
 978
 979        for (; counter > FIRST_PATH_ELEMENT_OFFSET; counter--) {
 980                /* Check whether parent of the current buffer in the path is really parent in the tree. */
 981                if (!B_IS_IN_TREE
 982                    (parent = PATH_OFFSET_PBUFFER(path, counter - 1)))
 983                        return REPEAT_SEARCH;
 984                /* Check whether position in the parent is correct. */
 985                if ((position =
 986                     PATH_OFFSET_POSITION(path,
 987                                          counter - 1)) >
 988                    B_NR_ITEMS(parent))
 989                        return REPEAT_SEARCH;
 990                /* Check whether parent at the path really points to the child. */
 991                if (B_N_CHILD_NUM(parent, position) !=
 992                    PATH_OFFSET_PBUFFER(path, counter)->b_blocknr)
 993                        return REPEAT_SEARCH;
 994                /* Return delimiting key if position in the parent is not equal to first/last one. */
 995                if (c_lr_par == RIGHT_PARENTS)
 996                        first_last_position = B_NR_ITEMS(parent);
 997                if (position != first_last_position) {
 998                        *pcom_father = parent;
 999                        get_bh(*pcom_father);
1000                        /*(*pcom_father = parent)->b_count++; */
1001                        break;
1002                }
1003        }
1004
1005        /* if we are in the root of the tree, then there is no common father */
1006        if (counter == FIRST_PATH_ELEMENT_OFFSET) {
1007                /* Check whether first buffer in the path is the root of the tree. */
1008                if (PATH_OFFSET_PBUFFER
1009                    (tb->tb_path,
1010                     FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
1011                    SB_ROOT_BLOCK(tb->tb_sb)) {
1012                        *pfather = *pcom_father = NULL;
1013                        return CARRY_ON;
1014                }
1015                return REPEAT_SEARCH;
1016        }
1017
1018        RFALSE(B_LEVEL(*pcom_father) <= DISK_LEAF_NODE_LEVEL,
1019               "PAP-8185: (%b %z) level too small",
1020               *pcom_father, *pcom_father);
1021
1022        /* Check whether the common parent is locked. */
1023
1024        if (buffer_locked(*pcom_father)) {
1025                __wait_on_buffer(*pcom_father);
1026                if (FILESYSTEM_CHANGED_TB(tb)) {
1027                        brelse(*pcom_father);
1028                        return REPEAT_SEARCH;
1029                }
1030        }
1031
1032        /* So, we got common parent of the current node and its left/right neighbor.
1033           Now we are geting the parent of the left/right neighbor. */
1034
1035        /* Form key to get parent of the left/right neighbor. */
1036        le_key2cpu_key(&s_lr_father_key,
1037                       B_N_PDELIM_KEY(*pcom_father,
1038                                      (c_lr_par ==
1039                                       LEFT_PARENTS) ? (tb->lkey[h - 1] =
1040                                                        position -
1041                                                        1) : (tb->rkey[h -
1042                                                                           1] =
1043                                                              position)));
1044
1045        if (c_lr_par == LEFT_PARENTS)
1046                decrement_key(&s_lr_father_key);
1047
1048        if (search_by_key
1049            (tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father,
1050             h + 1) == IO_ERROR)
1051                // path is released
1052                return IO_ERROR;
1053
1054        if (FILESYSTEM_CHANGED_TB(tb)) {
1055                pathrelse(&s_path_to_neighbor_father);
1056                brelse(*pcom_father);
1057                return REPEAT_SEARCH;
1058        }
1059
1060        *pfather = PATH_PLAST_BUFFER(&s_path_to_neighbor_father);
1061
1062        RFALSE(B_LEVEL(*pfather) != h + 1,
1063               "PAP-8190: (%b %z) level too small", *pfather, *pfather);
1064        RFALSE(s_path_to_neighbor_father.path_length <
1065               FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small");
1066
1067        s_path_to_neighbor_father.path_length--;
1068        pathrelse(&s_path_to_neighbor_father);
1069        return CARRY_ON;
1070}
1071
1072/* Get parents of neighbors of node in the path(S[path_offset]) and common parents of
1073 * S[path_offset] and L[path_offset]/R[path_offset]: F[path_offset], FL[path_offset],
1074 * FR[path_offset], CFL[path_offset], CFR[path_offset].
1075 * Calculate numbers of left and right delimiting keys position: lkey[path_offset], rkey[path_offset].
1076 * Returns:     SCHEDULE_OCCURRED - schedule occurred while the function worked;
1077 *              CARRY_ON - schedule didn't occur while the function worked;
1078 */
1079static int get_parents(struct tree_balance *tb, int h)
1080{
1081        struct treepath *path = tb->tb_path;
1082        int position,
1083            ret,
1084            path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h);
1085        struct buffer_head *curf, *curcf;
1086
1087        /* Current node is the root of the tree or will be root of the tree */
1088        if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
1089                /* The root can not have parents.
1090                   Release nodes which previously were obtained as parents of the current node neighbors. */
1091                brelse(tb->FL[h]);
1092                brelse(tb->CFL[h]);
1093                brelse(tb->FR[h]);
1094                brelse(tb->CFR[h]);
1095                tb->FL[h]  = NULL;
1096                tb->CFL[h] = NULL;
1097                tb->FR[h]  = NULL;
1098                tb->CFR[h] = NULL;
1099                return CARRY_ON;
1100        }
1101
1102        /* Get parent FL[path_offset] of L[path_offset]. */
1103        position = PATH_OFFSET_POSITION(path, path_offset - 1);
1104        if (position) {
1105                /* Current node is not the first child of its parent. */
1106                curf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
1107                curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
1108                get_bh(curf);
1109                get_bh(curf);
1110                tb->lkey[h] = position - 1;
1111        } else {
1112                /* Calculate current parent of L[path_offset], which is the left neighbor of the current node.
1113                   Calculate current common parent of L[path_offset] and the current node. Note that
1114                   CFL[path_offset] not equal FL[path_offset] and CFL[path_offset] not equal F[path_offset].
1115                   Calculate lkey[path_offset]. */
1116                if ((ret = get_far_parent(tb, h + 1, &curf,
1117                                                  &curcf,
1118                                                  LEFT_PARENTS)) != CARRY_ON)
1119                        return ret;
1120        }
1121
1122        brelse(tb->FL[h]);
1123        tb->FL[h] = curf;       /* New initialization of FL[h]. */
1124        brelse(tb->CFL[h]);
1125        tb->CFL[h] = curcf;     /* New initialization of CFL[h]. */
1126
1127        RFALSE((curf && !B_IS_IN_TREE(curf)) ||
1128               (curcf && !B_IS_IN_TREE(curcf)),
1129               "PAP-8195: FL (%b) or CFL (%b) is invalid", curf, curcf);
1130
1131/* Get parent FR[h] of R[h]. */
1132
1133/* Current node is the last child of F[h]. FR[h] != F[h]. */
1134        if (position == B_NR_ITEMS(PATH_H_PBUFFER(path, h + 1))) {
1135/* Calculate current parent of R[h], which is the right neighbor of F[h].
1136   Calculate current common parent of R[h] and current node. Note that CFR[h]
1137   not equal FR[path_offset] and CFR[h] not equal F[h]. */
1138                if ((ret =
1139                     get_far_parent(tb, h + 1, &curf, &curcf,
1140                                    RIGHT_PARENTS)) != CARRY_ON)
1141                        return ret;
1142        } else {
1143/* Current node is not the last child of its parent F[h]. */
1144                curf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
1145                curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
1146                get_bh(curf);
1147                get_bh(curf);
1148                tb->rkey[h] = position;
1149        }
1150
1151        brelse(tb->FR[h]);
1152        /* New initialization of FR[path_offset]. */
1153        tb->FR[h] = curf;
1154
1155        brelse(tb->CFR[h]);
1156        /* New initialization of CFR[path_offset]. */
1157        tb->CFR[h] = curcf;
1158
1159        RFALSE((curf && !B_IS_IN_TREE(curf)) ||
1160               (curcf && !B_IS_IN_TREE(curcf)),
1161               "PAP-8205: FR (%b) or CFR (%b) is invalid", curf, curcf);
1162
1163        return CARRY_ON;
1164}
1165
1166/* it is possible to remove node as result of shiftings to
1167   neighbors even when we insert or paste item. */
1168static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree,
1169                                      struct tree_balance *tb, int h)
1170{
1171        struct buffer_head *Sh = PATH_H_PBUFFER(tb->tb_path, h);
1172        int levbytes = tb->insert_size[h];
1173        struct item_head *ih;
1174        struct reiserfs_key *r_key = NULL;
1175
1176        ih = B_N_PITEM_HEAD(Sh, 0);
1177        if (tb->CFR[h])
1178                r_key = B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]);
1179
1180        if (lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes
1181            /* shifting may merge items which might save space */
1182            -
1183            ((!h
1184              && op_is_left_mergeable(&(ih->ih_key), Sh->b_size)) ? IH_SIZE : 0)
1185            -
1186            ((!h && r_key
1187              && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0)
1188            + ((h) ? KEY_SIZE : 0)) {
1189                /* node can not be removed */
1190                if (sfree >= levbytes) {        /* new item fits into node S[h] without any shifting */
1191                        if (!h)
1192                                tb->s0num =
1193                                    B_NR_ITEMS(Sh) +
1194                                    ((mode == M_INSERT) ? 1 : 0);
1195                        set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1196                        return NO_BALANCING_NEEDED;
1197                }
1198        }
1199        PROC_INFO_INC(tb->tb_sb, can_node_be_removed[h]);
1200        return !NO_BALANCING_NEEDED;
1201}
1202
1203/* Check whether current node S[h] is balanced when increasing its size by
1204 * Inserting or Pasting.
1205 * Calculate parameters for balancing for current level h.
1206 * Parameters:
1207 *      tb      tree_balance structure;
1208 *      h       current level of the node;
1209 *      inum    item number in S[h];
1210 *      mode    i - insert, p - paste;
1211 * Returns:     1 - schedule occurred;
1212 *              0 - balancing for higher levels needed;
1213 *             -1 - no balancing for higher levels needed;
1214 *             -2 - no disk space.
1215 */
1216/* ip means Inserting or Pasting */
1217static int ip_check_balance(struct tree_balance *tb, int h)
1218{
1219        struct virtual_node *vn = tb->tb_vn;
1220        int levbytes,           /* Number of bytes that must be inserted into (value
1221                                   is negative if bytes are deleted) buffer which
1222                                   contains node being balanced.  The mnemonic is
1223                                   that the attempted change in node space used level
1224                                   is levbytes bytes. */
1225         ret;
1226
1227        int lfree, sfree, rfree /* free space in L, S and R */ ;
1228
1229        /* nver is short for number of vertixes, and lnver is the number if
1230           we shift to the left, rnver is the number if we shift to the
1231           right, and lrnver is the number if we shift in both directions.
1232           The goal is to minimize first the number of vertixes, and second,
1233           the number of vertixes whose contents are changed by shifting,
1234           and third the number of uncached vertixes whose contents are
1235           changed by shifting and must be read from disk.  */
1236        int nver, lnver, rnver, lrnver;
1237
1238        /* used at leaf level only, S0 = S[0] is the node being balanced,
1239           sInum [ I = 0,1,2 ] is the number of items that will
1240           remain in node SI after balancing.  S1 and S2 are new
1241           nodes that might be created. */
1242
1243        /* we perform 8 calls to get_num_ver().  For each call we calculate five parameters.
1244           where 4th parameter is s1bytes and 5th - s2bytes
1245         */
1246        short snum012[40] = { 0, };     /* s0num, s1num, s2num for 8 cases
1247                                           0,1 - do not shift and do not shift but bottle
1248                                           2 - shift only whole item to left
1249                                           3 - shift to left and bottle as much as possible
1250                                           4,5 - shift to right (whole items and as much as possible
1251                                           6,7 - shift to both directions (whole items and as much as possible)
1252                                         */
1253
1254        /* Sh is the node whose balance is currently being checked */
1255        struct buffer_head *Sh;
1256
1257        Sh = PATH_H_PBUFFER(tb->tb_path, h);
1258        levbytes = tb->insert_size[h];
1259
1260        /* Calculate balance parameters for creating new root. */
1261        if (!Sh) {
1262                if (!h)
1263                        reiserfs_panic(tb->tb_sb, "vs-8210",
1264                                       "S[0] can not be 0");
1265                switch (ret = get_empty_nodes(tb, h)) {
1266                case CARRY_ON:
1267                        set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1268                        return NO_BALANCING_NEEDED;     /* no balancing for higher levels needed */
1269
1270                case NO_DISK_SPACE:
1271                case REPEAT_SEARCH:
1272                        return ret;
1273                default:
1274                        reiserfs_panic(tb->tb_sb, "vs-8215", "incorrect "
1275                                       "return value of get_empty_nodes");
1276                }
1277        }
1278
1279        if ((ret = get_parents(tb, h)) != CARRY_ON)     /* get parents of S[h] neighbors. */
1280                return ret;
1281
1282        sfree = B_FREE_SPACE(Sh);
1283
1284        /* get free space of neighbors */
1285        rfree = get_rfree(tb, h);
1286        lfree = get_lfree(tb, h);
1287
1288        if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) ==
1289            NO_BALANCING_NEEDED)
1290                /* and new item fits into node S[h] without any shifting */
1291                return NO_BALANCING_NEEDED;
1292
1293        create_virtual_node(tb, h);
1294
1295        /*
1296           determine maximal number of items we can shift to the left neighbor (in tb structure)
1297           and the maximal number of bytes that can flow to the left neighbor
1298           from the left most liquid item that cannot be shifted from S[0] entirely (returned value)
1299         */
1300        check_left(tb, h, lfree);
1301
1302        /*
1303           determine maximal number of items we can shift to the right neighbor (in tb structure)
1304           and the maximal number of bytes that can flow to the right neighbor
1305           from the right most liquid item that cannot be shifted from S[0] entirely (returned value)
1306         */
1307        check_right(tb, h, rfree);
1308
1309        /* all contents of internal node S[h] can be moved into its
1310           neighbors, S[h] will be removed after balancing */
1311        if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) {
1312                int to_r;
1313
1314                /* Since we are working on internal nodes, and our internal
1315                   nodes have fixed size entries, then we can balance by the
1316                   number of items rather than the space they consume.  In this
1317                   routine we set the left node equal to the right node,
1318                   allowing a difference of less than or equal to 1 child
1319                   pointer. */
1320                to_r =
1321                    ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
1322                     vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
1323                                                tb->rnum[h]);
1324                set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL,
1325                               -1, -1);
1326                return CARRY_ON;
1327        }
1328
1329        /* this checks balance condition, that any two neighboring nodes can not fit in one node */
1330        RFALSE(h &&
1331               (tb->lnum[h] >= vn->vn_nr_item + 1 ||
1332                tb->rnum[h] >= vn->vn_nr_item + 1),
1333               "vs-8220: tree is not balanced on internal level");
1334        RFALSE(!h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) ||
1335                      (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))),
1336               "vs-8225: tree is not balanced on leaf level");
1337
1338        /* all contents of S[0] can be moved into its neighbors
1339           S[0] will be removed after balancing. */
1340        if (!h && is_leaf_removable(tb))
1341                return CARRY_ON;
1342
1343        /* why do we perform this check here rather than earlier??
1344           Answer: we can win 1 node in some cases above. Moreover we
1345           checked it above, when we checked, that S[0] is not removable
1346           in principle */
1347        if (sfree >= levbytes) {        /* new item fits into node S[h] without any shifting */
1348                if (!h)
1349                        tb->s0num = vn->vn_nr_item;
1350                set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1351                return NO_BALANCING_NEEDED;
1352        }
1353
1354        {
1355                int lpar, rpar, nset, lset, rset, lrset;
1356                /*
1357                 * regular overflowing of the node
1358                 */
1359
1360                /* get_num_ver works in 2 modes (FLOW & NO_FLOW)
1361                   lpar, rpar - number of items we can shift to left/right neighbor (including splitting item)
1362                   nset, lset, rset, lrset - shows, whether flowing items give better packing
1363                 */
1364#define FLOW 1
1365#define NO_FLOW 0               /* do not any splitting */
1366
1367                /* we choose one the following */
1368#define NOTHING_SHIFT_NO_FLOW   0
1369#define NOTHING_SHIFT_FLOW      5
1370#define LEFT_SHIFT_NO_FLOW      10
1371#define LEFT_SHIFT_FLOW         15
1372#define RIGHT_SHIFT_NO_FLOW     20
1373#define RIGHT_SHIFT_FLOW        25
1374#define LR_SHIFT_NO_FLOW        30
1375#define LR_SHIFT_FLOW           35
1376
1377                lpar = tb->lnum[h];
1378                rpar = tb->rnum[h];
1379
1380                /* calculate number of blocks S[h] must be split into when
1381                   nothing is shifted to the neighbors,
1382                   as well as number of items in each part of the split node (s012 numbers),
1383                   and number of bytes (s1bytes) of the shared drop which flow to S1 if any */
1384                nset = NOTHING_SHIFT_NO_FLOW;
1385                nver = get_num_ver(vn->vn_mode, tb, h,
1386                                   0, -1, h ? vn->vn_nr_item : 0, -1,
1387                                   snum012, NO_FLOW);
1388
1389                if (!h) {
1390                        int nver1;
1391
1392                        /* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */
1393                        nver1 = get_num_ver(vn->vn_mode, tb, h,
1394                                            0, -1, 0, -1,
1395                                            snum012 + NOTHING_SHIFT_FLOW, FLOW);
1396                        if (nver > nver1)
1397                                nset = NOTHING_SHIFT_FLOW, nver = nver1;
1398                }
1399
1400                /* calculate number of blocks S[h] must be split into when
1401                   l_shift_num first items and l_shift_bytes of the right most
1402                   liquid item to be shifted are shifted to the left neighbor,
1403                   as well as number of items in each part of the splitted node (s012 numbers),
1404                   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1405                 */
1406                lset = LEFT_SHIFT_NO_FLOW;
1407                lnver = get_num_ver(vn->vn_mode, tb, h,
1408                                    lpar - ((h || tb->lbytes == -1) ? 0 : 1),
1409                                    -1, h ? vn->vn_nr_item : 0, -1,
1410                                    snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW);
1411                if (!h) {
1412                        int lnver1;
1413
1414                        lnver1 = get_num_ver(vn->vn_mode, tb, h,
1415                                             lpar -
1416                                             ((tb->lbytes != -1) ? 1 : 0),
1417                                             tb->lbytes, 0, -1,
1418                                             snum012 + LEFT_SHIFT_FLOW, FLOW);
1419                        if (lnver > lnver1)
1420                                lset = LEFT_SHIFT_FLOW, lnver = lnver1;
1421                }
1422
1423                /* calculate number of blocks S[h] must be split into when
1424                   r_shift_num first items and r_shift_bytes of the left most
1425                   liquid item to be shifted are shifted to the right neighbor,
1426                   as well as number of items in each part of the splitted node (s012 numbers),
1427                   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1428                 */
1429                rset = RIGHT_SHIFT_NO_FLOW;
1430                rnver = get_num_ver(vn->vn_mode, tb, h,
1431                                    0, -1,
1432                                    h ? (vn->vn_nr_item - rpar) : (rpar -
1433                                                                   ((tb->
1434                                                                     rbytes !=
1435                                                                     -1) ? 1 :
1436                                                                    0)), -1,
1437                                    snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW);
1438                if (!h) {
1439                        int rnver1;
1440
1441                        rnver1 = get_num_ver(vn->vn_mode, tb, h,
1442                                             0, -1,
1443                                             (rpar -
1444                                              ((tb->rbytes != -1) ? 1 : 0)),
1445                                             tb->rbytes,
1446                                             snum012 + RIGHT_SHIFT_FLOW, FLOW);
1447
1448                        if (rnver > rnver1)
1449                                rset = RIGHT_SHIFT_FLOW, rnver = rnver1;
1450                }
1451
1452                /* calculate number of blocks S[h] must be split into when
1453                   items are shifted in both directions,
1454                   as well as number of items in each part of the splitted node (s012 numbers),
1455                   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1456                 */
1457                lrset = LR_SHIFT_NO_FLOW;
1458                lrnver = get_num_ver(vn->vn_mode, tb, h,
1459                                     lpar - ((h || tb->lbytes == -1) ? 0 : 1),
1460                                     -1,
1461                                     h ? (vn->vn_nr_item - rpar) : (rpar -
1462                                                                    ((tb->
1463                                                                      rbytes !=
1464                                                                      -1) ? 1 :
1465                                                                     0)), -1,
1466                                     snum012 + LR_SHIFT_NO_FLOW, NO_FLOW);
1467                if (!h) {
1468                        int lrnver1;
1469
1470                        lrnver1 = get_num_ver(vn->vn_mode, tb, h,
1471                                              lpar -
1472                                              ((tb->lbytes != -1) ? 1 : 0),
1473                                              tb->lbytes,
1474                                              (rpar -
1475                                               ((tb->rbytes != -1) ? 1 : 0)),
1476                                              tb->rbytes,
1477                                              snum012 + LR_SHIFT_FLOW, FLOW);
1478                        if (lrnver > lrnver1)
1479                                lrset = LR_SHIFT_FLOW, lrnver = lrnver1;
1480                }
1481
1482                /* Our general shifting strategy is:
1483                   1) to minimized number of new nodes;
1484                   2) to minimized number of neighbors involved in shifting;
1485                   3) to minimized number of disk reads; */
1486
1487                /* we can win TWO or ONE nodes by shifting in both directions */
1488                if (lrnver < lnver && lrnver < rnver) {
1489                        RFALSE(h &&
1490                               (tb->lnum[h] != 1 ||
1491                                tb->rnum[h] != 1 ||
1492                                lrnver != 1 || rnver != 2 || lnver != 2
1493                                || h != 1), "vs-8230: bad h");
1494                        if (lrset == LR_SHIFT_FLOW)
1495                                set_parameters(tb, h, tb->lnum[h], tb->rnum[h],
1496                                               lrnver, snum012 + lrset,
1497                                               tb->lbytes, tb->rbytes);
1498                        else
1499                                set_parameters(tb, h,
1500                                               tb->lnum[h] -
1501                                               ((tb->lbytes == -1) ? 0 : 1),
1502                                               tb->rnum[h] -
1503                                               ((tb->rbytes == -1) ? 0 : 1),
1504                                               lrnver, snum012 + lrset, -1, -1);
1505
1506                        return CARRY_ON;
1507                }
1508
1509                /* if shifting doesn't lead to better packing then don't shift */
1510                if (nver == lrnver) {
1511                        set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1,
1512                                       -1);
1513                        return CARRY_ON;
1514                }
1515
1516                /* now we know that for better packing shifting in only one
1517                   direction either to the left or to the right is required */
1518
1519                /*  if shifting to the left is better than shifting to the right */
1520                if (lnver < rnver) {
1521                        SET_PAR_SHIFT_LEFT;
1522                        return CARRY_ON;
1523                }
1524
1525                /* if shifting to the right is better than shifting to the left */
1526                if (lnver > rnver) {
1527                        SET_PAR_SHIFT_RIGHT;
1528                        return CARRY_ON;
1529                }
1530
1531                /* now shifting in either direction gives the same number
1532                   of nodes and we can make use of the cached neighbors */
1533                if (is_left_neighbor_in_cache(tb, h)) {
1534                        SET_PAR_SHIFT_LEFT;
1535                        return CARRY_ON;
1536                }
1537
1538                /* shift to the right independently on whether the right neighbor in cache or not */
1539                SET_PAR_SHIFT_RIGHT;
1540                return CARRY_ON;
1541        }
1542}
1543
1544/* Check whether current node S[h] is balanced when Decreasing its size by
1545 * Deleting or Cutting for INTERNAL node of S+tree.
1546 * Calculate parameters for balancing for current level h.
1547 * Parameters:
1548 *      tb      tree_balance structure;
1549 *      h       current level of the node;
1550 *      inum    item number in S[h];
1551 *      mode    i - insert, p - paste;
1552 * Returns:     1 - schedule occurred;
1553 *              0 - balancing for higher levels needed;
1554 *             -1 - no balancing for higher levels needed;
1555 *             -2 - no disk space.
1556 *
1557 * Note: Items of internal nodes have fixed size, so the balance condition for
1558 * the internal part of S+tree is as for the B-trees.
1559 */
1560static int dc_check_balance_internal(struct tree_balance *tb, int h)
1561{
1562        struct virtual_node *vn = tb->tb_vn;
1563
1564        /* Sh is the node whose balance is currently being checked,
1565           and Fh is its father.  */
1566        struct buffer_head *Sh, *Fh;
1567        int maxsize, ret;
1568        int lfree, rfree /* free space in L and R */ ;
1569
1570        Sh = PATH_H_PBUFFER(tb->tb_path, h);
1571        Fh = PATH_H_PPARENT(tb->tb_path, h);
1572
1573        maxsize = MAX_CHILD_SIZE(Sh);
1574
1575/*   using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */
1576/*   new_nr_item = number of items node would have if operation is */
1577/*      performed without balancing (new_nr_item); */
1578        create_virtual_node(tb, h);
1579
1580        if (!Fh) {              /* S[h] is the root. */
1581                if (vn->vn_nr_item > 0) {
1582                        set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1583                        return NO_BALANCING_NEEDED;     /* no balancing for higher levels needed */
1584                }
1585                /* new_nr_item == 0.
1586                 * Current root will be deleted resulting in
1587                 * decrementing the tree height. */
1588                set_parameters(tb, h, 0, 0, 0, NULL, -1, -1);
1589                return CARRY_ON;
1590        }
1591
1592        if ((ret = get_parents(tb, h)) != CARRY_ON)
1593                return ret;
1594
1595        /* get free space of neighbors */
1596        rfree = get_rfree(tb, h);
1597        lfree = get_lfree(tb, h);
1598
1599        /* determine maximal number of items we can fit into neighbors */
1600        check_left(tb, h, lfree);
1601        check_right(tb, h, rfree);
1602
1603        if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) { /* Balance condition for the internal node is valid.
1604                                                 * In this case we balance only if it leads to better packing. */
1605                if (vn->vn_nr_item == MIN_NR_KEY(Sh)) { /* Here we join S[h] with one of its neighbors,
1606                                                         * which is impossible with greater values of new_nr_item. */
1607                        if (tb->lnum[h] >= vn->vn_nr_item + 1) {
1608                                /* All contents of S[h] can be moved to L[h]. */
1609                                int n;
1610                                int order_L;
1611
1612                                order_L =
1613                                    ((n =
1614                                      PATH_H_B_ITEM_ORDER(tb->tb_path,
1615                                                          h)) ==
1616                                     0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1617                                n = dc_size(B_N_CHILD(tb->FL[h], order_L)) /
1618                                    (DC_SIZE + KEY_SIZE);
1619                                set_parameters(tb, h, -n - 1, 0, 0, NULL, -1,
1620                                               -1);
1621                                return CARRY_ON;
1622                        }
1623
1624                        if (tb->rnum[h] >= vn->vn_nr_item + 1) {
1625                                /* All contents of S[h] can be moved to R[h]. */
1626                                int n;
1627                                int order_R;
1628
1629                                order_R =
1630                                    ((n =
1631                                      PATH_H_B_ITEM_ORDER(tb->tb_path,
1632                                                          h)) ==
1633                                     B_NR_ITEMS(Fh)) ? 0 : n + 1;
1634                                n = dc_size(B_N_CHILD(tb->FR[h], order_R)) /
1635                                    (DC_SIZE + KEY_SIZE);
1636                                set_parameters(tb, h, 0, -n - 1, 0, NULL, -1,
1637                                               -1);
1638                                return CARRY_ON;
1639                        }
1640                }
1641
1642                if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
1643                        /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1644                        int to_r;
1645
1646                        to_r =
1647                            ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] -
1648                             tb->rnum[h] + vn->vn_nr_item + 1) / 2 -
1649                            (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
1650                        set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r,
1651                                       0, NULL, -1, -1);
1652                        return CARRY_ON;
1653                }
1654
1655                /* Balancing does not lead to better packing. */
1656                set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1657                return NO_BALANCING_NEEDED;
1658        }
1659
1660        /* Current node contain insufficient number of items. Balancing is required. */
1661        /* Check whether we can merge S[h] with left neighbor. */
1662        if (tb->lnum[h] >= vn->vn_nr_item + 1)
1663                if (is_left_neighbor_in_cache(tb, h)
1664                    || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h]) {
1665                        int n;
1666                        int order_L;
1667
1668                        order_L =
1669                            ((n =
1670                              PATH_H_B_ITEM_ORDER(tb->tb_path,
1671                                                  h)) ==
1672                             0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1673                        n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / (DC_SIZE +
1674                                                                      KEY_SIZE);
1675                        set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, -1);
1676                        return CARRY_ON;
1677                }
1678
1679        /* Check whether we can merge S[h] with right neighbor. */
1680        if (tb->rnum[h] >= vn->vn_nr_item + 1) {
1681                int n;
1682                int order_R;
1683
1684                order_R =
1685                    ((n =
1686                      PATH_H_B_ITEM_ORDER(tb->tb_path,
1687                                          h)) == B_NR_ITEMS(Fh)) ? 0 : (n + 1);
1688                n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / (DC_SIZE +
1689                                                              KEY_SIZE);
1690                set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, -1);
1691                return CARRY_ON;
1692        }
1693
1694        /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1695        if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
1696                int to_r;
1697
1698                to_r =
1699                    ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
1700                     vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
1701                                                tb->rnum[h]);
1702                set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL,
1703                               -1, -1);
1704                return CARRY_ON;
1705        }
1706
1707        /* For internal nodes try to borrow item from a neighbor */
1708        RFALSE(!tb->FL[h] && !tb->FR[h], "vs-8235: trying to borrow for root");
1709
1710        /* Borrow one or two items from caching neighbor */
1711        if (is_left_neighbor_in_cache(tb, h) || !tb->FR[h]) {
1712                int from_l;
1713
1714                from_l =
1715                    (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item +
1716                     1) / 2 - (vn->vn_nr_item + 1);
1717                set_parameters(tb, h, -from_l, 0, 1, NULL, -1, -1);
1718                return CARRY_ON;
1719        }
1720
1721        set_parameters(tb, h, 0,
1722                       -((MAX_NR_KEY(Sh) + 1 - tb->rnum[h] + vn->vn_nr_item +
1723                          1) / 2 - (vn->vn_nr_item + 1)), 1, NULL, -1, -1);
1724        return CARRY_ON;
1725}
1726
1727/* Check whether current node S[h] is balanced when Decreasing its size by
1728 * Deleting or Truncating for LEAF node of S+tree.
1729 * Calculate parameters for balancing for current level h.
1730 * Parameters:
1731 *      tb      tree_balance structure;
1732 *      h       current level of the node;
1733 *      inum    item number in S[h];
1734 *      mode    i - insert, p - paste;
1735 * Returns:     1 - schedule occurred;
1736 *              0 - balancing for higher levels needed;
1737 *             -1 - no balancing for higher levels needed;
1738 *             -2 - no disk space.
1739 */
1740static int dc_check_balance_leaf(struct tree_balance *tb, int h)
1741{
1742        struct virtual_node *vn = tb->tb_vn;
1743
1744        /* Number of bytes that must be deleted from
1745           (value is negative if bytes are deleted) buffer which
1746           contains node being balanced.  The mnemonic is that the
1747           attempted change in node space used level is levbytes bytes. */
1748        int levbytes;
1749        /* the maximal item size */
1750        int maxsize, ret;
1751        /* S0 is the node whose balance is currently being checked,
1752           and F0 is its father.  */
1753        struct buffer_head *S0, *F0;
1754        int lfree, rfree /* free space in L and R */ ;
1755
1756        S0 = PATH_H_PBUFFER(tb->tb_path, 0);
1757        F0 = PATH_H_PPARENT(tb->tb_path, 0);
1758
1759        levbytes = tb->insert_size[h];
1760
1761        maxsize = MAX_CHILD_SIZE(S0);   /* maximal possible size of an item */
1762
1763        if (!F0) {              /* S[0] is the root now. */
1764
1765                RFALSE(-levbytes >= maxsize - B_FREE_SPACE(S0),
1766                       "vs-8240: attempt to create empty buffer tree");
1767
1768                set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1769                return NO_BALANCING_NEEDED;
1770        }
1771
1772        if ((ret = get_parents(tb, h)) != CARRY_ON)
1773                return ret;
1774
1775        /* get free space of neighbors */
1776        rfree = get_rfree(tb, h);
1777        lfree = get_lfree(tb, h);
1778
1779        create_virtual_node(tb, h);
1780
1781        /* if 3 leaves can be merge to one, set parameters and return */
1782        if (are_leaves_removable(tb, lfree, rfree))
1783                return CARRY_ON;
1784
1785        /* determine maximal number of items we can shift to the left/right  neighbor
1786           and the maximal number of bytes that can flow to the left/right neighbor
1787           from the left/right most liquid item that cannot be shifted from S[0] entirely
1788         */
1789        check_left(tb, h, lfree);
1790        check_right(tb, h, rfree);
1791
1792        /* check whether we can merge S with left neighbor. */
1793        if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1)
1794                if (is_left_neighbor_in_cache(tb, h) || ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) ||      /* S can not be merged with R */
1795                    !tb->FR[h]) {
1796
1797                        RFALSE(!tb->FL[h],
1798                               "vs-8245: dc_check_balance_leaf: FL[h] must exist");
1799
1800                        /* set parameter to merge S[0] with its left neighbor */
1801                        set_parameters(tb, h, -1, 0, 0, NULL, -1, -1);
1802                        return CARRY_ON;
1803                }
1804
1805        /* check whether we can merge S[0] with right neighbor. */
1806        if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) {
1807                set_parameters(tb, h, 0, -1, 0, NULL, -1, -1);
1808                return CARRY_ON;
1809        }
1810
1811        /* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */
1812        if (is_leaf_removable(tb))
1813                return CARRY_ON;
1814
1815        /* Balancing is not required. */
1816        tb->s0num = vn->vn_nr_item;
1817        set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1818        return NO_BALANCING_NEEDED;
1819}
1820
1821/* Check whether current node S[h] is balanced when Decreasing its size by
1822 * Deleting or Cutting.
1823 * Calculate parameters for balancing for current level h.
1824 * Parameters:
1825 *      tb      tree_balance structure;
1826 *      h       current level of the node;
1827 *      inum    item number in S[h];
1828 *      mode    d - delete, c - cut.
1829 * Returns:     1 - schedule occurred;
1830 *              0 - balancing for higher levels needed;
1831 *             -1 - no balancing for higher levels needed;
1832 *             -2 - no disk space.
1833 */
1834static int dc_check_balance(struct tree_balance *tb, int h)
1835{
1836        RFALSE(!(PATH_H_PBUFFER(tb->tb_path, h)),
1837               "vs-8250: S is not initialized");
1838
1839        if (h)
1840                return dc_check_balance_internal(tb, h);
1841        else
1842                return dc_check_balance_leaf(tb, h);
1843}
1844
1845/* Check whether current node S[h] is balanced.
1846 * Calculate parameters for balancing for current level h.
1847 * Parameters:
1848 *
1849 *      tb      tree_balance structure:
1850 *
1851 *              tb is a large structure that must be read about in the header file
1852 *              at the same time as this procedure if the reader is to successfully
1853 *              understand this procedure
1854 *
1855 *      h       current level of the node;
1856 *      inum    item number in S[h];
1857 *      mode    i - insert, p - paste, d - delete, c - cut.
1858 * Returns:     1 - schedule occurred;
1859 *              0 - balancing for higher levels needed;
1860 *             -1 - no balancing for higher levels needed;
1861 *             -2 - no disk space.
1862 */
1863static int check_balance(int mode,
1864                         struct tree_balance *tb,
1865                         int h,
1866                         int inum,
1867                         int pos_in_item,
1868                         struct item_head *ins_ih, const void *data)
1869{
1870        struct virtual_node *vn;
1871
1872        vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf);
1873        vn->vn_free_ptr = (char *)(tb->tb_vn + 1);
1874        vn->vn_mode = mode;
1875        vn->vn_affected_item_num = inum;
1876        vn->vn_pos_in_item = pos_in_item;
1877        vn->vn_ins_ih = ins_ih;
1878        vn->vn_data = data;
1879
1880        RFALSE(mode == M_INSERT && !vn->vn_ins_ih,
1881               "vs-8255: ins_ih can not be 0 in insert mode");
1882
1883        if (tb->insert_size[h] > 0)
1884                /* Calculate balance parameters when size of node is increasing. */
1885                return ip_check_balance(tb, h);
1886
1887        /* Calculate balance parameters when  size of node is decreasing. */
1888        return dc_check_balance(tb, h);
1889}
1890
1891/* Check whether parent at the path is the really parent of the current node.*/
1892static int get_direct_parent(struct tree_balance *tb, int h)
1893{
1894        struct buffer_head *bh;
1895        struct treepath *path = tb->tb_path;
1896        int position,
1897            path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h);
1898
1899        /* We are in the root or in the new root. */
1900        if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
1901
1902                RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET - 1,
1903                       "PAP-8260: invalid offset in the path");
1904
1905                if (PATH_OFFSET_PBUFFER(path, FIRST_PATH_ELEMENT_OFFSET)->
1906                    b_blocknr == SB_ROOT_BLOCK(tb->tb_sb)) {
1907                        /* Root is not changed. */
1908                        PATH_OFFSET_PBUFFER(path, path_offset - 1) = NULL;
1909                        PATH_OFFSET_POSITION(path, path_offset - 1) = 0;
1910                        return CARRY_ON;
1911                }
1912                return REPEAT_SEARCH;   /* Root is changed and we must recalculate the path. */
1913        }
1914
1915        if (!B_IS_IN_TREE
1916            (bh = PATH_OFFSET_PBUFFER(path, path_offset - 1)))
1917                return REPEAT_SEARCH;   /* Parent in the path is not in the tree. */
1918
1919        if ((position =
1920             PATH_OFFSET_POSITION(path,
1921                                  path_offset - 1)) > B_NR_ITEMS(bh))
1922                return REPEAT_SEARCH;
1923
1924        if (B_N_CHILD_NUM(bh, position) !=
1925            PATH_OFFSET_PBUFFER(path, path_offset)->b_blocknr)
1926                /* Parent in the path is not parent of the current node in the tree. */
1927                return REPEAT_SEARCH;
1928
1929        if (buffer_locked(bh)) {
1930                __wait_on_buffer(bh);
1931                if (FILESYSTEM_CHANGED_TB(tb))
1932                        return REPEAT_SEARCH;
1933        }
1934
1935        return CARRY_ON;        /* Parent in the path is unlocked and really parent of the current node.  */
1936}
1937
1938/* Using lnum[h] and rnum[h] we should determine what neighbors
1939 * of S[h] we
1940 * need in order to balance S[h], and get them if necessary.
1941 * Returns:     SCHEDULE_OCCURRED - schedule occurred while the function worked;
1942 *              CARRY_ON - schedule didn't occur while the function worked;
1943 */
1944static int get_neighbors(struct tree_balance *tb, int h)
1945{
1946        int child_position,
1947            path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h + 1);
1948        unsigned long son_number;
1949        struct super_block *sb = tb->tb_sb;
1950        struct buffer_head *bh;
1951
1952        PROC_INFO_INC(sb, get_neighbors[h]);
1953
1954        if (tb->lnum[h]) {
1955                /* We need left neighbor to balance S[h]. */
1956                PROC_INFO_INC(sb, need_l_neighbor[h]);
1957                bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset);
1958
1959                RFALSE(bh == tb->FL[h] &&
1960                       !PATH_OFFSET_POSITION(tb->tb_path, path_offset),
1961                       "PAP-8270: invalid position in the parent");
1962
1963                child_position =
1964                    (bh ==
1965                     tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb->
1966                                                                       FL[h]);
1967                son_number = B_N_CHILD_NUM(tb->FL[h], child_position);
1968                bh = sb_bread(sb, son_number);
1969                if (!bh)
1970                        return IO_ERROR;
1971                if (FILESYSTEM_CHANGED_TB(tb)) {
1972                        brelse(bh);
1973                        PROC_INFO_INC(sb, get_neighbors_restart[h]);
1974                        return REPEAT_SEARCH;
1975                }
1976
1977                RFALSE(!B_IS_IN_TREE(tb->FL[h]) ||
1978                       child_position > B_NR_ITEMS(tb->FL[h]) ||
1979                       B_N_CHILD_NUM(tb->FL[h], child_position) !=
1980                       bh->b_blocknr, "PAP-8275: invalid parent");
1981                RFALSE(!B_IS_IN_TREE(bh), "PAP-8280: invalid child");
1982                RFALSE(!h &&
1983                       B_FREE_SPACE(bh) !=
1984                       MAX_CHILD_SIZE(bh) -
1985                       dc_size(B_N_CHILD(tb->FL[0], child_position)),
1986                       "PAP-8290: invalid child size of left neighbor");
1987
1988                brelse(tb->L[h]);
1989                tb->L[h] = bh;
1990        }
1991
1992        /* We need right neighbor to balance S[path_offset]. */
1993        if (tb->rnum[h]) {      /* We need right neighbor to balance S[path_offset]. */
1994                PROC_INFO_INC(sb, need_r_neighbor[h]);
1995                bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset);
1996
1997                RFALSE(bh == tb->FR[h] &&
1998                       PATH_OFFSET_POSITION(tb->tb_path,
1999                                            path_offset) >=
2000                       B_NR_ITEMS(bh),
2001                       "PAP-8295: invalid position in the parent");
2002
2003                child_position =
2004                    (bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0;
2005                son_number = B_N_CHILD_NUM(tb->FR[h], child_position);
2006                bh = sb_bread(sb, son_number);
2007                if (!bh)
2008                        return IO_ERROR;
2009                if (FILESYSTEM_CHANGED_TB(tb)) {
2010                        brelse(bh);
2011                        PROC_INFO_INC(sb, get_neighbors_restart[h]);
2012                        return REPEAT_SEARCH;
2013                }
2014                brelse(tb->R[h]);
2015                tb->R[h] = bh;
2016
2017                RFALSE(!h
2018                       && B_FREE_SPACE(bh) !=
2019                       MAX_CHILD_SIZE(bh) -
2020                       dc_size(B_N_CHILD(tb->FR[0], child_position)),
2021                       "PAP-8300: invalid child size of right neighbor (%d != %d - %d)",
2022                       B_FREE_SPACE(bh), MAX_CHILD_SIZE(bh),
2023                       dc_size(B_N_CHILD(tb->FR[0], child_position)));
2024
2025        }
2026        return CARRY_ON;
2027}
2028
2029static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh)
2030{
2031        int max_num_of_items;
2032        int max_num_of_entries;
2033        unsigned long blocksize = sb->s_blocksize;
2034
2035#define MIN_NAME_LEN 1
2036
2037        max_num_of_items = (blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN);
2038        max_num_of_entries = (blocksize - BLKH_SIZE - IH_SIZE) /
2039            (DEH_SIZE + MIN_NAME_LEN);
2040
2041        return sizeof(struct virtual_node) +
2042            max(max_num_of_items * sizeof(struct virtual_item),
2043                sizeof(struct virtual_item) + sizeof(struct direntry_uarea) +
2044                (max_num_of_entries - 1) * sizeof(__u16));
2045}
2046
2047/* maybe we should fail balancing we are going to perform when kmalloc
2048   fails several times. But now it will loop until kmalloc gets
2049   required memory */
2050static int get_mem_for_virtual_node(struct tree_balance *tb)
2051{
2052        int check_fs = 0;
2053        int size;
2054        char *buf;
2055
2056        size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path));
2057
2058        if (size > tb->vn_buf_size) {
2059                /* we have to allocate more memory for virtual node */
2060                if (tb->vn_buf) {
2061                        /* free memory allocated before */
2062                        kfree(tb->vn_buf);
2063                        /* this is not needed if kfree is atomic */
2064                        check_fs = 1;
2065                }
2066
2067                /* virtual node requires now more memory */
2068                tb->vn_buf_size = size;
2069
2070                /* get memory for virtual item */
2071                buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN);
2072                if (!buf) {
2073                        /* getting memory with GFP_KERNEL priority may involve
2074                           balancing now (due to indirect_to_direct conversion on
2075                           dcache shrinking). So, release path and collected
2076                           resources here */
2077                        free_buffers_in_tb(tb);
2078                        buf = kmalloc(size, GFP_NOFS);
2079                        if (!buf) {
2080                                tb->vn_buf_size = 0;
2081                        }
2082                        tb->vn_buf = buf;
2083                        schedule();
2084                        return REPEAT_SEARCH;
2085                }
2086
2087                tb->vn_buf = buf;
2088        }
2089
2090        if (check_fs && FILESYSTEM_CHANGED_TB(tb))
2091                return REPEAT_SEARCH;
2092
2093        return CARRY_ON;
2094}
2095
2096#ifdef CONFIG_REISERFS_CHECK
2097static void tb_buffer_sanity_check(struct super_block *sb,
2098                                   struct buffer_head *bh,
2099                                   const char *descr, int level)
2100{
2101        if (bh) {
2102                if (atomic_read(&(bh->b_count)) <= 0)
2103
2104                        reiserfs_panic(sb, "jmacd-1", "negative or zero "
2105                                       "reference counter for buffer %s[%d] "
2106                                       "(%b)", descr, level, bh);
2107
2108                if (!buffer_uptodate(bh))
2109                        reiserfs_panic(sb, "jmacd-2", "buffer is not up "
2110                                       "to date %s[%d] (%b)",
2111                                       descr, level, bh);
2112
2113                if (!B_IS_IN_TREE(bh))
2114                        reiserfs_panic(sb, "jmacd-3", "buffer is not "
2115                                       "in tree %s[%d] (%b)",
2116                                       descr, level, bh);
2117
2118                if (bh->b_bdev != sb->s_bdev)
2119                        reiserfs_panic(sb, "jmacd-4", "buffer has wrong "
2120                                       "device %s[%d] (%b)",
2121                                       descr, level, bh);
2122
2123                if (bh->b_size != sb->s_blocksize)
2124                        reiserfs_panic(sb, "jmacd-5", "buffer has wrong "
2125                                       "blocksize %s[%d] (%b)",
2126                                       descr, level, bh);
2127
2128                if (bh->b_blocknr > SB_BLOCK_COUNT(sb))
2129                        reiserfs_panic(sb, "jmacd-6", "buffer block "
2130                                       "number too high %s[%d] (%b)",
2131                                       descr, level, bh);
2132        }
2133}
2134#else
2135static void tb_buffer_sanity_check(struct super_block *sb,
2136                                   struct buffer_head *bh,
2137                                   const char *descr, int level)
2138{;
2139}
2140#endif
2141
2142static int clear_all_dirty_bits(struct super_block *s, struct buffer_head *bh)
2143{
2144        return reiserfs_prepare_for_journal(s, bh, 0);
2145}
2146
2147static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
2148{
2149        struct buffer_head *locked;
2150#ifdef CONFIG_REISERFS_CHECK
2151        int repeat_counter = 0;
2152#endif
2153        int i;
2154
2155        do {
2156
2157                locked = NULL;
2158
2159                for (i = tb->tb_path->path_length;
2160                     !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) {
2161                        if (PATH_OFFSET_PBUFFER(tb->tb_path, i)) {
2162                                /* if I understand correctly, we can only be sure the last buffer
2163                                 ** in the path is in the tree --clm
2164                                 */
2165#ifdef CONFIG_REISERFS_CHECK
2166                                if (PATH_PLAST_BUFFER(tb->tb_path) ==
2167                                    PATH_OFFSET_PBUFFER(tb->tb_path, i))
2168                                        tb_buffer_sanity_check(tb->tb_sb,
2169                                                               PATH_OFFSET_PBUFFER
2170                                                               (tb->tb_path,
2171                                                                i), "S",
2172                                                               tb->tb_path->
2173                                                               path_length - i);
2174#endif
2175                                if (!clear_all_dirty_bits(tb->tb_sb,
2176                                                          PATH_OFFSET_PBUFFER
2177                                                          (tb->tb_path,
2178                                                           i))) {
2179                                        locked =
2180                                            PATH_OFFSET_PBUFFER(tb->tb_path,
2181                                                                i);
2182                                }
2183                        }
2184                }
2185
2186                for (i = 0; !locked && i < MAX_HEIGHT && tb->insert_size[i];
2187                     i++) {
2188
2189                        if (tb->lnum[i]) {
2190
2191                                if (tb->L[i]) {
2192                                        tb_buffer_sanity_check(tb->tb_sb,
2193                                                               tb->L[i],
2194                                                               "L", i);
2195                                        if (!clear_all_dirty_bits
2196                                            (tb->tb_sb, tb->L[i]))
2197                                                locked = tb->L[i];
2198                                }
2199
2200                                if (!locked && tb->FL[i]) {
2201                                        tb_buffer_sanity_check(tb->tb_sb,
2202                                                               tb->FL[i],
2203                                                               "FL", i);
2204                                        if (!clear_all_dirty_bits
2205                                            (tb->tb_sb, tb->FL[i]))
2206                                                locked = tb->FL[i];
2207                                }
2208
2209                                if (!locked && tb->CFL[i]) {
2210                                        tb_buffer_sanity_check(tb->tb_sb,
2211                                                               tb->CFL[i],
2212                                                               "CFL", i);
2213                                        if (!clear_all_dirty_bits
2214                                            (tb->tb_sb, tb->CFL[i]))
2215                                                locked = tb->CFL[i];
2216                                }
2217
2218                        }
2219
2220                        if (!locked && (tb->rnum[i])) {
2221
2222                                if (tb->R[i]) {
2223                                        tb_buffer_sanity_check(tb->tb_sb,
2224                                                               tb->R[i],
2225                                                               "R", i);
2226                                        if (!clear_all_dirty_bits
2227                                            (tb->tb_sb, tb->R[i]))
2228                                                locked = tb->R[i];
2229                                }
2230
2231                                if (!locked && tb->FR[i]) {
2232                                        tb_buffer_sanity_check(tb->tb_sb,
2233                                                               tb->FR[i],
2234                                                               "FR", i);
2235                                        if (!clear_all_dirty_bits
2236                                            (tb->tb_sb, tb->FR[i]))
2237                                                locked = tb->FR[i];
2238                                }
2239
2240                                if (!locked && tb->CFR[i]) {
2241                                        tb_buffer_sanity_check(tb->tb_sb,
2242                                                               tb->CFR[i],
2243                                                               "CFR", i);
2244                                        if (!clear_all_dirty_bits
2245                                            (tb->tb_sb, tb->CFR[i]))
2246                                                locked = tb->CFR[i];
2247                                }
2248                        }
2249                }
2250                /* as far as I can tell, this is not required.  The FEB list seems
2251                 ** to be full of newly allocated nodes, which will never be locked,
2252                 ** dirty, or anything else.
2253                 ** To be safe, I'm putting in the checks and waits in.  For the moment,
2254                 ** they are needed to keep the code in journal.c from complaining
2255                 ** about the buffer.  That code is inside CONFIG_REISERFS_CHECK as well.
2256                 ** --clm
2257                 */
2258                for (i = 0; !locked && i < MAX_FEB_SIZE; i++) {
2259                        if (tb->FEB[i]) {
2260                                if (!clear_all_dirty_bits
2261                                    (tb->tb_sb, tb->FEB[i]))
2262                                        locked = tb->FEB[i];
2263                        }
2264                }
2265
2266                if (locked) {
2267#ifdef CONFIG_REISERFS_CHECK
2268                        repeat_counter++;
2269                        if ((repeat_counter % 10000) == 0) {
2270                                reiserfs_warning(tb->tb_sb, "reiserfs-8200",
2271                                                 "too many iterations waiting "
2272                                                 "for buffer to unlock "
2273                                                 "(%b)", locked);
2274
2275                                /* Don't loop forever.  Try to recover from possible error. */
2276
2277                                return (FILESYSTEM_CHANGED_TB(tb)) ?
2278                                    REPEAT_SEARCH : CARRY_ON;
2279                        }
2280#endif
2281                        __wait_on_buffer(locked);
2282                        if (FILESYSTEM_CHANGED_TB(tb))
2283                                return REPEAT_SEARCH;
2284                }
2285
2286        } while (locked);
2287
2288        return CARRY_ON;
2289}
2290
2291/* Prepare for balancing, that is
2292 *      get all necessary parents, and neighbors;
2293 *      analyze what and where should be moved;
2294 *      get sufficient number of new nodes;
2295 * Balancing will start only after all resources will be collected at a time.
2296 *
2297 * When ported to SMP kernels, only at the last moment after all needed nodes
2298 * are collected in cache, will the resources be locked using the usual
2299 * textbook ordered lock acquisition algorithms.  Note that ensuring that
2300 * this code neither write locks what it does not need to write lock nor locks out of order
2301 * will be a pain in the butt that could have been avoided.  Grumble grumble. -Hans
2302 *
2303 * fix is meant in the sense of render unchanging
2304 *
2305 * Latency might be improved by first gathering a list of what buffers are needed
2306 * and then getting as many of them in parallel as possible? -Hans
2307 *
2308 * Parameters:
2309 *      op_mode i - insert, d - delete, c - cut (truncate), p - paste (append)
2310 *      tb      tree_balance structure;
2311 *      inum    item number in S[h];
2312 *      pos_in_item - comment this if you can
2313 *      ins_ih  item head of item being inserted
2314 *      data    inserted item or data to be pasted
2315 * Returns:     1 - schedule occurred while the function worked;
2316 *              0 - schedule didn't occur while the function worked;
2317 *             -1 - if no_disk_space
2318 */
2319
2320int fix_nodes(int op_mode, struct tree_balance *tb,
2321              struct item_head *ins_ih, const void *data)
2322{
2323        int ret, h, item_num = PATH_LAST_POSITION(tb->tb_path);
2324        int pos_in_item;
2325
2326        /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared
2327         ** during wait_tb_buffers_run
2328         */
2329        int wait_tb_buffers_run = 0;
2330        struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
2331
2332        ++REISERFS_SB(tb->tb_sb)->s_fix_nodes;
2333
2334        pos_in_item = tb->tb_path->pos_in_item;
2335
2336        tb->fs_gen = get_generation(tb->tb_sb);
2337
2338        /* we prepare and log the super here so it will already be in the
2339         ** transaction when do_balance needs to change it.
2340         ** This way do_balance won't have to schedule when trying to prepare
2341         ** the super for logging
2342         */
2343        reiserfs_prepare_for_journal(tb->tb_sb,
2344                                     SB_BUFFER_WITH_SB(tb->tb_sb), 1);
2345        journal_mark_dirty(tb->transaction_handle, tb->tb_sb,
2346                           SB_BUFFER_WITH_SB(tb->tb_sb));
2347        if (FILESYSTEM_CHANGED_TB(tb))
2348                return REPEAT_SEARCH;
2349
2350        /* if it possible in indirect_to_direct conversion */
2351        if (buffer_locked(tbS0)) {
2352                __wait_on_buffer(tbS0);
2353                if (FILESYSTEM_CHANGED_TB(tb))
2354                        return REPEAT_SEARCH;
2355        }
2356#ifdef CONFIG_REISERFS_CHECK
2357        if (cur_tb) {
2358                print_cur_tb("fix_nodes");
2359                reiserfs_panic(tb->tb_sb, "PAP-8305",
2360                               "there is pending do_balance");
2361        }
2362
2363        if (!buffer_uptodate(tbS0) || !B_IS_IN_TREE(tbS0))
2364                reiserfs_panic(tb->tb_sb, "PAP-8320", "S[0] (%b %z) is "
2365                               "not uptodate at the beginning of fix_nodes "
2366                               "or not in tree (mode %c)",
2367                               tbS0, tbS0, op_mode);
2368
2369        /* Check parameters. */
2370        switch (op_mode) {
2371        case M_INSERT:
2372                if (item_num <= 0 || item_num > B_NR_ITEMS(tbS0))
2373                        reiserfs_panic(tb->tb_sb, "PAP-8330", "Incorrect "
2374                                       "item number %d (in S0 - %d) in case "
2375                                       "of insert", item_num,
2376                                       B_NR_ITEMS(tbS0));
2377                break;
2378        case M_PASTE:
2379        case M_DELETE:
2380        case M_CUT:
2381                if (item_num < 0 || item_num >= B_NR_ITEMS(tbS0)) {
2382                        print_block(tbS0, 0, -1, -1);
2383                        reiserfs_panic(tb->tb_sb, "PAP-8335", "Incorrect "
2384                                       "item number(%d); mode = %c "
2385                                       "insert_size = %d",
2386                                       item_num, op_mode,
2387                                       tb->insert_size[0]);
2388                }
2389                break;
2390        default:
2391                reiserfs_panic(tb->tb_sb, "PAP-8340", "Incorrect mode "
2392                               "of operation");
2393        }
2394#endif
2395
2396        if (get_mem_for_virtual_node(tb) == REPEAT_SEARCH)
2397                // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat
2398                return REPEAT_SEARCH;
2399
2400        /* Starting from the leaf level; for all levels h of the tree. */
2401        for (h = 0; h < MAX_HEIGHT && tb->insert_size[h]; h++) {
2402                ret = get_direct_parent(tb, h);
2403                if (ret != CARRY_ON)
2404                        goto repeat;
2405
2406                ret = check_balance(op_mode, tb, h, item_num,
2407                                    pos_in_item, ins_ih, data);
2408                if (ret != CARRY_ON) {
2409                        if (ret == NO_BALANCING_NEEDED) {
2410                                /* No balancing for higher levels needed. */
2411                                ret = get_neighbors(tb, h);
2412                                if (ret != CARRY_ON)
2413                                        goto repeat;
2414                                if (h != MAX_HEIGHT - 1)
2415                                        tb->insert_size[h + 1] = 0;
2416                                /* ok, analysis and resource gathering are complete */
2417                                break;
2418                        }
2419                        goto repeat;
2420                }
2421
2422                ret = get_neighbors(tb, h);
2423                if (ret != CARRY_ON)
2424                        goto repeat;
2425
2426                /* No disk space, or schedule occurred and analysis may be
2427                 * invalid and needs to be redone. */
2428                ret = get_empty_nodes(tb, h);
2429                if (ret != CARRY_ON)
2430                        goto repeat;
2431
2432                if (!PATH_H_PBUFFER(tb->tb_path, h)) {
2433                        /* We have a positive insert size but no nodes exist on this
2434                           level, this means that we are creating a new root. */
2435
2436                        RFALSE(tb->blknum[h] != 1,
2437                               "PAP-8350: creating new empty root");
2438
2439                        if (h < MAX_HEIGHT - 1)
2440                                tb->insert_size[h + 1] = 0;
2441                } else if (!PATH_H_PBUFFER(tb->tb_path, h + 1)) {
2442                        if (tb->blknum[h] > 1) {
2443                                /* The tree needs to be grown, so this node S[h]
2444                                   which is the root node is split into two nodes,
2445                                   and a new node (S[h+1]) will be created to
2446                                   become the root node.  */
2447
2448                                RFALSE(h == MAX_HEIGHT - 1,
2449                                       "PAP-8355: attempt to create too high of a tree");
2450
2451                                tb->insert_size[h + 1] =
2452                                    (DC_SIZE +
2453                                     KEY_SIZE) * (tb->blknum[h] - 1) +
2454                                    DC_SIZE;
2455                        } else if (h < MAX_HEIGHT - 1)
2456                                tb->insert_size[h + 1] = 0;
2457                } else
2458                        tb->insert_size[h + 1] =
2459                            (DC_SIZE + KEY_SIZE) * (tb->blknum[h] - 1);
2460        }
2461
2462        ret = wait_tb_buffers_until_unlocked(tb);
2463        if (ret == CARRY_ON) {
2464                if (FILESYSTEM_CHANGED_TB(tb)) {
2465                        wait_tb_buffers_run = 1;
2466                        ret = REPEAT_SEARCH;
2467                        goto repeat;
2468                } else {
2469                        return CARRY_ON;
2470                }
2471        } else {
2472                wait_tb_buffers_run = 1;
2473                goto repeat;
2474        }
2475
2476      repeat:
2477        // fix_nodes was unable to perform its calculation due to
2478        // filesystem got changed under us, lack of free disk space or i/o
2479        // failure. If the first is the case - the search will be
2480        // repeated. For now - free all resources acquired so far except
2481        // for the new allocated nodes
2482        {
2483                int i;
2484
2485                /* Release path buffers. */
2486                if (wait_tb_buffers_run) {
2487                        pathrelse_and_restore(tb->tb_sb, tb->tb_path);
2488                } else {
2489                        pathrelse(tb->tb_path);
2490                }
2491                /* brelse all resources collected for balancing */
2492                for (i = 0; i < MAX_HEIGHT; i++) {
2493                        if (wait_tb_buffers_run) {
2494                                reiserfs_restore_prepared_buffer(tb->tb_sb,
2495                                                                 tb->L[i]);
2496                                reiserfs_restore_prepared_buffer(tb->tb_sb,
2497                                                                 tb->R[i]);
2498                                reiserfs_restore_prepared_buffer(tb->tb_sb,
2499                                                                 tb->FL[i]);
2500                                reiserfs_restore_prepared_buffer(tb->tb_sb,
2501                                                                 tb->FR[i]);
2502                                reiserfs_restore_prepared_buffer(tb->tb_sb,
2503                                                                 tb->
2504                                                                 CFL[i]);
2505                                reiserfs_restore_prepared_buffer(tb->tb_sb,
2506                                                                 tb->
2507                                                                 CFR[i]);
2508                        }
2509
2510                        brelse(tb->L[i]);
2511                        brelse(tb->R[i]);
2512                        brelse(tb->FL[i]);
2513                        brelse(tb->FR[i]);
2514                        brelse(tb->CFL[i]);
2515                        brelse(tb->CFR[i]);
2516
2517                        tb->L[i] = NULL;
2518                        tb->R[i] = NULL;
2519                        tb->FL[i] = NULL;
2520                        tb->FR[i] = NULL;
2521                        tb->CFL[i] = NULL;
2522                        tb->CFR[i] = NULL;
2523                }
2524
2525                if (wait_tb_buffers_run) {
2526                        for (i = 0; i < MAX_FEB_SIZE; i++) {
2527                                if (tb->FEB[i])
2528                                        reiserfs_restore_prepared_buffer
2529                                            (tb->tb_sb, tb->FEB[i]);
2530                        }
2531                }
2532                return ret;
2533        }
2534
2535}
2536
2537/* Anatoly will probably forgive me renaming tb to tb. I just
2538   wanted to make lines shorter */
2539void unfix_nodes(struct tree_balance *tb)
2540{
2541        int i;
2542
2543        /* Release path buffers. */
2544        pathrelse_and_restore(tb->tb_sb, tb->tb_path);
2545
2546        /* brelse all resources collected for balancing */
2547        for (i = 0; i < MAX_HEIGHT; i++) {
2548                reiserfs_restore_prepared_buffer(tb->tb_sb, tb->L[i]);
2549                reiserfs_restore_prepared_buffer(tb->tb_sb, tb->R[i]);
2550                reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FL[i]);
2551                reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FR[i]);
2552                reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFL[i]);
2553                reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFR[i]);
2554
2555                brelse(tb->L[i]);
2556                brelse(tb->R[i]);
2557                brelse(tb->FL[i]);
2558                brelse(tb->FR[i]);
2559                brelse(tb->CFL[i]);
2560                brelse(tb->CFR[i]);
2561        }
2562
2563        /* deal with list of allocated (used and unused) nodes */
2564        for (i = 0; i < MAX_FEB_SIZE; i++) {
2565                if (tb->FEB[i]) {
2566                        b_blocknr_t blocknr = tb->FEB[i]->b_blocknr;
2567                        /* de-allocated block which was not used by balancing and
2568                           bforget about buffer for it */
2569                        brelse(tb->FEB[i]);
2570                        reiserfs_free_block(tb->transaction_handle, NULL,
2571                                            blocknr, 0);
2572                }
2573                if (tb->used[i]) {
2574                        /* release used as new nodes including a new root */
2575                        brelse(tb->used[i]);
2576                }
2577        }
2578
2579        kfree(tb->vn_buf);
2580
2581}
2582