linux/fs/reiserfs/stree.c
<<
>>
Prefs
   1/*
   2 *  Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
   3 */
   4
   5/*
   6 *  Written by Anatoly P. Pinchuk pap@namesys.botik.ru
   7 *  Programm System Institute
   8 *  Pereslavl-Zalessky Russia
   9 */
  10
  11#include <linux/time.h>
  12#include <linux/string.h>
  13#include <linux/pagemap.h>
  14#include <linux/bio.h>
  15#include "reiserfs.h"
  16#include <linux/buffer_head.h>
  17#include <linux/quotaops.h>
  18
  19/* Does the buffer contain a disk block which is in the tree. */
  20inline int B_IS_IN_TREE(const struct buffer_head *bh)
  21{
  22
  23        RFALSE(B_LEVEL(bh) > MAX_HEIGHT,
  24               "PAP-1010: block (%b) has too big level (%z)", bh, bh);
  25
  26        return (B_LEVEL(bh) != FREE_LEVEL);
  27}
  28
  29/* to get item head in le form */
  30inline void copy_item_head(struct item_head *to,
  31                           const struct item_head *from)
  32{
  33        memcpy(to, from, IH_SIZE);
  34}
  35
  36/*
  37 * k1 is pointer to on-disk structure which is stored in little-endian
  38 * form. k2 is pointer to cpu variable. For key of items of the same
  39 * object this returns 0.
  40 * Returns: -1 if key1 < key2
  41 * 0 if key1 == key2
  42 * 1 if key1 > key2
  43 */
  44inline int comp_short_keys(const struct reiserfs_key *le_key,
  45                           const struct cpu_key *cpu_key)
  46{
  47        __u32 n;
  48        n = le32_to_cpu(le_key->k_dir_id);
  49        if (n < cpu_key->on_disk_key.k_dir_id)
  50                return -1;
  51        if (n > cpu_key->on_disk_key.k_dir_id)
  52                return 1;
  53        n = le32_to_cpu(le_key->k_objectid);
  54        if (n < cpu_key->on_disk_key.k_objectid)
  55                return -1;
  56        if (n > cpu_key->on_disk_key.k_objectid)
  57                return 1;
  58        return 0;
  59}
  60
  61/*
  62 * k1 is pointer to on-disk structure which is stored in little-endian
  63 * form. k2 is pointer to cpu variable.
  64 * Compare keys using all 4 key fields.
  65 * Returns: -1 if key1 < key2 0
  66 * if key1 = key2 1 if key1 > key2
  67 */
  68static inline int comp_keys(const struct reiserfs_key *le_key,
  69                            const struct cpu_key *cpu_key)
  70{
  71        int retval;
  72
  73        retval = comp_short_keys(le_key, cpu_key);
  74        if (retval)
  75                return retval;
  76        if (le_key_k_offset(le_key_version(le_key), le_key) <
  77            cpu_key_k_offset(cpu_key))
  78                return -1;
  79        if (le_key_k_offset(le_key_version(le_key), le_key) >
  80            cpu_key_k_offset(cpu_key))
  81                return 1;
  82
  83        if (cpu_key->key_length == 3)
  84                return 0;
  85
  86        /* this part is needed only when tail conversion is in progress */
  87        if (le_key_k_type(le_key_version(le_key), le_key) <
  88            cpu_key_k_type(cpu_key))
  89                return -1;
  90
  91        if (le_key_k_type(le_key_version(le_key), le_key) >
  92            cpu_key_k_type(cpu_key))
  93                return 1;
  94
  95        return 0;
  96}
  97
  98inline int comp_short_le_keys(const struct reiserfs_key *key1,
  99                              const struct reiserfs_key *key2)
 100{
 101        __u32 *k1_u32, *k2_u32;
 102        int key_length = REISERFS_SHORT_KEY_LEN;
 103
 104        k1_u32 = (__u32 *) key1;
 105        k2_u32 = (__u32 *) key2;
 106        for (; key_length--; ++k1_u32, ++k2_u32) {
 107                if (le32_to_cpu(*k1_u32) < le32_to_cpu(*k2_u32))
 108                        return -1;
 109                if (le32_to_cpu(*k1_u32) > le32_to_cpu(*k2_u32))
 110                        return 1;
 111        }
 112        return 0;
 113}
 114
 115inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from)
 116{
 117        int version;
 118        to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id);
 119        to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid);
 120
 121        /* find out version of the key */
 122        version = le_key_version(from);
 123        to->version = version;
 124        to->on_disk_key.k_offset = le_key_k_offset(version, from);
 125        to->on_disk_key.k_type = le_key_k_type(version, from);
 126}
 127
 128/*
 129 * this does not say which one is bigger, it only returns 1 if keys
 130 * are not equal, 0 otherwise
 131 */
 132inline int comp_le_keys(const struct reiserfs_key *k1,
 133                        const struct reiserfs_key *k2)
 134{
 135        return memcmp(k1, k2, sizeof(struct reiserfs_key));
 136}
 137
 138/**************************************************************************
 139 *  Binary search toolkit function                                        *
 140 *  Search for an item in the array by the item key                       *
 141 *  Returns:    1 if found,  0 if not found;                              *
 142 *        *pos = number of the searched element if found, else the        *
 143 *        number of the first element that is larger than key.            *
 144 **************************************************************************/
 145/*
 146 * For those not familiar with binary search: lbound is the leftmost item
 147 * that it could be, rbound the rightmost item that it could be.  We examine
 148 * the item halfway between lbound and rbound, and that tells us either
 149 * that we can increase lbound, or decrease rbound, or that we have found it,
 150 * or if lbound <= rbound that there are no possible items, and we have not
 151 * found it. With each examination we cut the number of possible items it
 152 * could be by one more than half rounded down, or we find it.
 153 */
 154static inline int bin_search(const void *key,   /* Key to search for. */
 155                             const void *base,  /* First item in the array. */
 156                             int num,   /* Number of items in the array. */
 157                             /*
 158                              * Item size in the array.  searched. Lest the
 159                              * reader be confused, note that this is crafted
 160                              * as a general function, and when it is applied
 161                              * specifically to the array of item headers in a
 162                              * node, width is actually the item header size
 163                              * not the item size.
 164                              */
 165                             int width,
 166                             int *pos /* Number of the searched for element. */
 167    )
 168{
 169        int rbound, lbound, j;
 170
 171        for (j = ((rbound = num - 1) + (lbound = 0)) / 2;
 172             lbound <= rbound; j = (rbound + lbound) / 2)
 173                switch (comp_keys
 174                        ((struct reiserfs_key *)((char *)base + j * width),
 175                         (struct cpu_key *)key)) {
 176                case -1:
 177                        lbound = j + 1;
 178                        continue;
 179                case 1:
 180                        rbound = j - 1;
 181                        continue;
 182                case 0:
 183                        *pos = j;
 184                        return ITEM_FOUND;      /* Key found in the array.  */
 185                }
 186
 187        /*
 188         * bin_search did not find given key, it returns position of key,
 189         * that is minimal and greater than the given one.
 190         */
 191        *pos = lbound;
 192        return ITEM_NOT_FOUND;
 193}
 194
 195
 196/* Minimal possible key. It is never in the tree. */
 197const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
 198
 199/* Maximal possible key. It is never in the tree. */
 200static const struct reiserfs_key MAX_KEY = {
 201        cpu_to_le32(0xffffffff),
 202        cpu_to_le32(0xffffffff),
 203        {{cpu_to_le32(0xffffffff),
 204          cpu_to_le32(0xffffffff)},}
 205};
 206
 207/*
 208 * Get delimiting key of the buffer by looking for it in the buffers in the
 209 * path, starting from the bottom of the path, and going upwards.  We must
 210 * check the path's validity at each step.  If the key is not in the path,
 211 * there is no delimiting key in the tree (buffer is first or last buffer
 212 * in tree), and in this case we return a special key, either MIN_KEY or
 213 * MAX_KEY.
 214 */
 215static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path,
 216                                                  const struct super_block *sb)
 217{
 218        int position, path_offset = chk_path->path_length;
 219        struct buffer_head *parent;
 220
 221        RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
 222               "PAP-5010: invalid offset in the path");
 223
 224        /* While not higher in path than first element. */
 225        while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
 226
 227                RFALSE(!buffer_uptodate
 228                       (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
 229                       "PAP-5020: parent is not uptodate");
 230
 231                /* Parent at the path is not in the tree now. */
 232                if (!B_IS_IN_TREE
 233                    (parent =
 234                     PATH_OFFSET_PBUFFER(chk_path, path_offset)))
 235                        return &MAX_KEY;
 236                /* Check whether position in the parent is correct. */
 237                if ((position =
 238                     PATH_OFFSET_POSITION(chk_path,
 239                                          path_offset)) >
 240                    B_NR_ITEMS(parent))
 241                        return &MAX_KEY;
 242                /* Check whether parent at the path really points to the child. */
 243                if (B_N_CHILD_NUM(parent, position) !=
 244                    PATH_OFFSET_PBUFFER(chk_path,
 245                                        path_offset + 1)->b_blocknr)
 246                        return &MAX_KEY;
 247                /*
 248                 * Return delimiting key if position in the parent
 249                 * is not equal to zero.
 250                 */
 251                if (position)
 252                        return internal_key(parent, position - 1);
 253        }
 254        /* Return MIN_KEY if we are in the root of the buffer tree. */
 255        if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
 256            b_blocknr == SB_ROOT_BLOCK(sb))
 257                return &MIN_KEY;
 258        return &MAX_KEY;
 259}
 260
 261/* Get delimiting key of the buffer at the path and its right neighbor. */
 262inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
 263                                           const struct super_block *sb)
 264{
 265        int position, path_offset = chk_path->path_length;
 266        struct buffer_head *parent;
 267
 268        RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
 269               "PAP-5030: invalid offset in the path");
 270
 271        while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
 272
 273                RFALSE(!buffer_uptodate
 274                       (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
 275                       "PAP-5040: parent is not uptodate");
 276
 277                /* Parent at the path is not in the tree now. */
 278                if (!B_IS_IN_TREE
 279                    (parent =
 280                     PATH_OFFSET_PBUFFER(chk_path, path_offset)))
 281                        return &MIN_KEY;
 282                /* Check whether position in the parent is correct. */
 283                if ((position =
 284                     PATH_OFFSET_POSITION(chk_path,
 285                                          path_offset)) >
 286                    B_NR_ITEMS(parent))
 287                        return &MIN_KEY;
 288                /*
 289                 * Check whether parent at the path really points
 290                 * to the child.
 291                 */
 292                if (B_N_CHILD_NUM(parent, position) !=
 293                    PATH_OFFSET_PBUFFER(chk_path,
 294                                        path_offset + 1)->b_blocknr)
 295                        return &MIN_KEY;
 296
 297                /*
 298                 * Return delimiting key if position in the parent
 299                 * is not the last one.
 300                 */
 301                if (position != B_NR_ITEMS(parent))
 302                        return internal_key(parent, position);
 303        }
 304
 305        /* Return MAX_KEY if we are in the root of the buffer tree. */
 306        if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
 307            b_blocknr == SB_ROOT_BLOCK(sb))
 308                return &MAX_KEY;
 309        return &MIN_KEY;
 310}
 311
 312/*
 313 * Check whether a key is contained in the tree rooted from a buffer at a path.
 314 * This works by looking at the left and right delimiting keys for the buffer
 315 * in the last path_element in the path.  These delimiting keys are stored
 316 * at least one level above that buffer in the tree. If the buffer is the
 317 * first or last node in the tree order then one of the delimiting keys may
 318 * be absent, and in this case get_lkey and get_rkey return a special key
 319 * which is MIN_KEY or MAX_KEY.
 320 */
 321static inline int key_in_buffer(
 322                                /* Path which should be checked. */
 323                                struct treepath *chk_path,
 324                                /* Key which should be checked. */
 325                                const struct cpu_key *key,
 326                                struct super_block *sb
 327    )
 328{
 329
 330        RFALSE(!key || chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET
 331               || chk_path->path_length > MAX_HEIGHT,
 332               "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
 333               key, chk_path->path_length);
 334        RFALSE(!PATH_PLAST_BUFFER(chk_path)->b_bdev,
 335               "PAP-5060: device must not be NODEV");
 336
 337        if (comp_keys(get_lkey(chk_path, sb), key) == 1)
 338                /* left delimiting key is bigger, that the key we look for */
 339                return 0;
 340        /*  if ( comp_keys(key, get_rkey(chk_path, sb)) != -1 ) */
 341        if (comp_keys(get_rkey(chk_path, sb), key) != 1)
 342                /* key must be less than right delimitiing key */
 343                return 0;
 344        return 1;
 345}
 346
 347int reiserfs_check_path(struct treepath *p)
 348{
 349        RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
 350               "path not properly relsed");
 351        return 0;
 352}
 353
 354/*
 355 * Drop the reference to each buffer in a path and restore
 356 * dirty bits clean when preparing the buffer for the log.
 357 * This version should only be called from fix_nodes()
 358 */
 359void pathrelse_and_restore(struct super_block *sb,
 360                           struct treepath *search_path)
 361{
 362        int path_offset = search_path->path_length;
 363
 364        RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
 365               "clm-4000: invalid path offset");
 366
 367        while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
 368                struct buffer_head *bh;
 369                bh = PATH_OFFSET_PBUFFER(search_path, path_offset--);
 370                reiserfs_restore_prepared_buffer(sb, bh);
 371                brelse(bh);
 372        }
 373        search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
 374}
 375
 376/* Drop the reference to each buffer in a path */
 377void pathrelse(struct treepath *search_path)
 378{
 379        int path_offset = search_path->path_length;
 380
 381        RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
 382               "PAP-5090: invalid path offset");
 383
 384        while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET)
 385                brelse(PATH_OFFSET_PBUFFER(search_path, path_offset--));
 386
 387        search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
 388}
 389
 390static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
 391{
 392        struct block_head *blkh;
 393        struct item_head *ih;
 394        int used_space;
 395        int prev_location;
 396        int i;
 397        int nr;
 398
 399        blkh = (struct block_head *)buf;
 400        if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
 401                reiserfs_warning(NULL, "reiserfs-5080",
 402                                 "this should be caught earlier");
 403                return 0;
 404        }
 405
 406        nr = blkh_nr_item(blkh);
 407        if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
 408                /* item number is too big or too small */
 409                reiserfs_warning(NULL, "reiserfs-5081",
 410                                 "nr_item seems wrong: %z", bh);
 411                return 0;
 412        }
 413        ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
 414        used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
 415
 416        /* free space does not match to calculated amount of use space */
 417        if (used_space != blocksize - blkh_free_space(blkh)) {
 418                reiserfs_warning(NULL, "reiserfs-5082",
 419                                 "free space seems wrong: %z", bh);
 420                return 0;
 421        }
 422        /*
 423         * FIXME: it is_leaf will hit performance too much - we may have
 424         * return 1 here
 425         */
 426
 427        /* check tables of item heads */
 428        ih = (struct item_head *)(buf + BLKH_SIZE);
 429        prev_location = blocksize;
 430        for (i = 0; i < nr; i++, ih++) {
 431                if (le_ih_k_type(ih) == TYPE_ANY) {
 432                        reiserfs_warning(NULL, "reiserfs-5083",
 433                                         "wrong item type for item %h",
 434                                         ih);
 435                        return 0;
 436                }
 437                if (ih_location(ih) >= blocksize
 438                    || ih_location(ih) < IH_SIZE * nr) {
 439                        reiserfs_warning(NULL, "reiserfs-5084",
 440                                         "item location seems wrong: %h",
 441                                         ih);
 442                        return 0;
 443                }
 444                if (ih_item_len(ih) < 1
 445                    || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
 446                        reiserfs_warning(NULL, "reiserfs-5085",
 447                                         "item length seems wrong: %h",
 448                                         ih);
 449                        return 0;
 450                }
 451                if (prev_location - ih_location(ih) != ih_item_len(ih)) {
 452                        reiserfs_warning(NULL, "reiserfs-5086",
 453                                         "item location seems wrong "
 454                                         "(second one): %h", ih);
 455                        return 0;
 456                }
 457                prev_location = ih_location(ih);
 458        }
 459
 460        /* one may imagine many more checks */
 461        return 1;
 462}
 463
 464/* returns 1 if buf looks like an internal node, 0 otherwise */
 465static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
 466{
 467        struct block_head *blkh;
 468        int nr;
 469        int used_space;
 470
 471        blkh = (struct block_head *)buf;
 472        nr = blkh_level(blkh);
 473        if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
 474                /* this level is not possible for internal nodes */
 475                reiserfs_warning(NULL, "reiserfs-5087",
 476                                 "this should be caught earlier");
 477                return 0;
 478        }
 479
 480        nr = blkh_nr_item(blkh);
 481        /* for internal which is not root we might check min number of keys */
 482        if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
 483                reiserfs_warning(NULL, "reiserfs-5088",
 484                                 "number of key seems wrong: %z", bh);
 485                return 0;
 486        }
 487
 488        used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
 489        if (used_space != blocksize - blkh_free_space(blkh)) {
 490                reiserfs_warning(NULL, "reiserfs-5089",
 491                                 "free space seems wrong: %z", bh);
 492                return 0;
 493        }
 494
 495        /* one may imagine many more checks */
 496        return 1;
 497}
 498
 499/*
 500 * make sure that bh contains formatted node of reiserfs tree of
 501 * 'level'-th level
 502 */
 503static int is_tree_node(struct buffer_head *bh, int level)
 504{
 505        if (B_LEVEL(bh) != level) {
 506                reiserfs_warning(NULL, "reiserfs-5090", "node level %d does "
 507                                 "not match to the expected one %d",
 508                                 B_LEVEL(bh), level);
 509                return 0;
 510        }
 511        if (level == DISK_LEAF_NODE_LEVEL)
 512                return is_leaf(bh->b_data, bh->b_size, bh);
 513
 514        return is_internal(bh->b_data, bh->b_size, bh);
 515}
 516
 517#define SEARCH_BY_KEY_READA 16
 518
 519/*
 520 * The function is NOT SCHEDULE-SAFE!
 521 * It might unlock the write lock if we needed to wait for a block
 522 * to be read. Note that in this case it won't recover the lock to avoid
 523 * high contention resulting from too much lock requests, especially
 524 * the caller (search_by_key) will perform other schedule-unsafe
 525 * operations just after calling this function.
 526 *
 527 * @return depth of lock to be restored after read completes
 528 */
 529static int search_by_key_reada(struct super_block *s,
 530                                struct buffer_head **bh,
 531                                b_blocknr_t *b, int num)
 532{
 533        int i, j;
 534        int depth = -1;
 535
 536        for (i = 0; i < num; i++) {
 537                bh[i] = sb_getblk(s, b[i]);
 538        }
 539        /*
 540         * We are going to read some blocks on which we
 541         * have a reference. It's safe, though we might be
 542         * reading blocks concurrently changed if we release
 543         * the lock. But it's still fine because we check later
 544         * if the tree changed
 545         */
 546        for (j = 0; j < i; j++) {
 547                /*
 548                 * note, this needs attention if we are getting rid of the BKL
 549                 * you have to make sure the prepared bit isn't set on this
 550                 * buffer
 551                 */
 552                if (!buffer_uptodate(bh[j])) {
 553                        if (depth == -1)
 554                                depth = reiserfs_write_unlock_nested(s);
 555                        ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, bh + j);
 556                }
 557                brelse(bh[j]);
 558        }
 559        return depth;
 560}
 561
 562/*
 563 * This function fills up the path from the root to the leaf as it
 564 * descends the tree looking for the key.  It uses reiserfs_bread to
 565 * try to find buffers in the cache given their block number.  If it
 566 * does not find them in the cache it reads them from disk.  For each
 567 * node search_by_key finds using reiserfs_bread it then uses
 568 * bin_search to look through that node.  bin_search will find the
 569 * position of the block_number of the next node if it is looking
 570 * through an internal node.  If it is looking through a leaf node
 571 * bin_search will find the position of the item which has key either
 572 * equal to given key, or which is the maximal key less than the given
 573 * key.  search_by_key returns a path that must be checked for the
 574 * correctness of the top of the path but need not be checked for the
 575 * correctness of the bottom of the path
 576 */
 577/*
 578 * search_by_key - search for key (and item) in stree
 579 * @sb: superblock
 580 * @key: pointer to key to search for
 581 * @search_path: Allocated and initialized struct treepath; Returned filled
 582 *               on success.
 583 * @stop_level: How far down the tree to search, Use DISK_LEAF_NODE_LEVEL to
 584 *              stop at leaf level.
 585 *
 586 * The function is NOT SCHEDULE-SAFE!
 587 */
 588int search_by_key(struct super_block *sb, const struct cpu_key *key,
 589                  struct treepath *search_path, int stop_level)
 590{
 591        b_blocknr_t block_number;
 592        int expected_level;
 593        struct buffer_head *bh;
 594        struct path_element *last_element;
 595        int node_level, retval;
 596        int fs_gen;
 597        struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
 598        b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA];
 599        int reada_count = 0;
 600
 601#ifdef CONFIG_REISERFS_CHECK
 602        int repeat_counter = 0;
 603#endif
 604
 605        PROC_INFO_INC(sb, search_by_key);
 606
 607        /*
 608         * As we add each node to a path we increase its count.  This means
 609         * that we must be careful to release all nodes in a path before we
 610         * either discard the path struct or re-use the path struct, as we
 611         * do here.
 612         */
 613
 614        pathrelse(search_path);
 615
 616        /*
 617         * With each iteration of this loop we search through the items in the
 618         * current node, and calculate the next current node(next path element)
 619         * for the next iteration of this loop..
 620         */
 621        block_number = SB_ROOT_BLOCK(sb);
 622        expected_level = -1;
 623        while (1) {
 624
 625#ifdef CONFIG_REISERFS_CHECK
 626                if (!(++repeat_counter % 50000))
 627                        reiserfs_warning(sb, "PAP-5100",
 628                                         "%s: there were %d iterations of "
 629                                         "while loop looking for key %K",
 630                                         current->comm, repeat_counter,
 631                                         key);
 632#endif
 633
 634                /* prep path to have another element added to it. */
 635                last_element =
 636                    PATH_OFFSET_PELEMENT(search_path,
 637                                         ++search_path->path_length);
 638                fs_gen = get_generation(sb);
 639
 640                /*
 641                 * Read the next tree node, and set the last element
 642                 * in the path to have a pointer to it.
 643                 */
 644                if ((bh = last_element->pe_buffer =
 645                     sb_getblk(sb, block_number))) {
 646
 647                        /*
 648                         * We'll need to drop the lock if we encounter any
 649                         * buffers that need to be read. If all of them are
 650                         * already up to date, we don't need to drop the lock.
 651                         */
 652                        int depth = -1;
 653
 654                        if (!buffer_uptodate(bh) && reada_count > 1)
 655                                depth = search_by_key_reada(sb, reada_bh,
 656                                                    reada_blocks, reada_count);
 657
 658                        if (!buffer_uptodate(bh) && depth == -1)
 659                                depth = reiserfs_write_unlock_nested(sb);
 660
 661                        ll_rw_block(REQ_OP_READ, 0, 1, &bh);
 662                        wait_on_buffer(bh);
 663
 664                        if (depth != -1)
 665                                reiserfs_write_lock_nested(sb, depth);
 666                        if (!buffer_uptodate(bh))
 667                                goto io_error;
 668                } else {
 669io_error:
 670                        search_path->path_length--;
 671                        pathrelse(search_path);
 672                        return IO_ERROR;
 673                }
 674                reada_count = 0;
 675                if (expected_level == -1)
 676                        expected_level = SB_TREE_HEIGHT(sb);
 677                expected_level--;
 678
 679                /*
 680                 * It is possible that schedule occurred. We must check
 681                 * whether the key to search is still in the tree rooted
 682                 * from the current buffer. If not then repeat search
 683                 * from the root.
 684                 */
 685                if (fs_changed(fs_gen, sb) &&
 686                    (!B_IS_IN_TREE(bh) ||
 687                     B_LEVEL(bh) != expected_level ||
 688                     !key_in_buffer(search_path, key, sb))) {
 689                        PROC_INFO_INC(sb, search_by_key_fs_changed);
 690                        PROC_INFO_INC(sb, search_by_key_restarted);
 691                        PROC_INFO_INC(sb,
 692                                      sbk_restarted[expected_level - 1]);
 693                        pathrelse(search_path);
 694
 695                        /*
 696                         * Get the root block number so that we can
 697                         * repeat the search starting from the root.
 698                         */
 699                        block_number = SB_ROOT_BLOCK(sb);
 700                        expected_level = -1;
 701
 702                        /* repeat search from the root */
 703                        continue;
 704                }
 705
 706                /*
 707                 * only check that the key is in the buffer if key is not
 708                 * equal to the MAX_KEY. Latter case is only possible in
 709                 * "finish_unfinished()" processing during mount.
 710                 */
 711                RFALSE(comp_keys(&MAX_KEY, key) &&
 712                       !key_in_buffer(search_path, key, sb),
 713                       "PAP-5130: key is not in the buffer");
 714#ifdef CONFIG_REISERFS_CHECK
 715                if (REISERFS_SB(sb)->cur_tb) {
 716                        print_cur_tb("5140");
 717                        reiserfs_panic(sb, "PAP-5140",
 718                                       "schedule occurred in do_balance!");
 719                }
 720#endif
 721
 722                /*
 723                 * make sure, that the node contents look like a node of
 724                 * certain level
 725                 */
 726                if (!is_tree_node(bh, expected_level)) {
 727                        reiserfs_error(sb, "vs-5150",
 728                                       "invalid format found in block %ld. "
 729                                       "Fsck?", bh->b_blocknr);
 730                        pathrelse(search_path);
 731                        return IO_ERROR;
 732                }
 733
 734                /* ok, we have acquired next formatted node in the tree */
 735                node_level = B_LEVEL(bh);
 736
 737                PROC_INFO_BH_STAT(sb, bh, node_level - 1);
 738
 739                RFALSE(node_level < stop_level,
 740                       "vs-5152: tree level (%d) is less than stop level (%d)",
 741                       node_level, stop_level);
 742
 743                retval = bin_search(key, item_head(bh, 0),
 744                                      B_NR_ITEMS(bh),
 745                                      (node_level ==
 746                                       DISK_LEAF_NODE_LEVEL) ? IH_SIZE :
 747                                      KEY_SIZE,
 748                                      &last_element->pe_position);
 749                if (node_level == stop_level) {
 750                        return retval;
 751                }
 752
 753                /* we are not in the stop level */
 754                /*
 755                 * item has been found, so we choose the pointer which
 756                 * is to the right of the found one
 757                 */
 758                if (retval == ITEM_FOUND)
 759                        last_element->pe_position++;
 760
 761                /*
 762                 * if item was not found we choose the position which is to
 763                 * the left of the found item. This requires no code,
 764                 * bin_search did it already.
 765                 */
 766
 767                /*
 768                 * So we have chosen a position in the current node which is
 769                 * an internal node.  Now we calculate child block number by
 770                 * position in the node.
 771                 */
 772                block_number =
 773                    B_N_CHILD_NUM(bh, last_element->pe_position);
 774
 775                /*
 776                 * if we are going to read leaf nodes, try for read
 777                 * ahead as well
 778                 */
 779                if ((search_path->reada & PATH_READA) &&
 780                    node_level == DISK_LEAF_NODE_LEVEL + 1) {
 781                        int pos = last_element->pe_position;
 782                        int limit = B_NR_ITEMS(bh);
 783                        struct reiserfs_key *le_key;
 784
 785                        if (search_path->reada & PATH_READA_BACK)
 786                                limit = 0;
 787                        while (reada_count < SEARCH_BY_KEY_READA) {
 788                                if (pos == limit)
 789                                        break;
 790                                reada_blocks[reada_count++] =
 791                                    B_N_CHILD_NUM(bh, pos);
 792                                if (search_path->reada & PATH_READA_BACK)
 793                                        pos--;
 794                                else
 795                                        pos++;
 796
 797                                /*
 798                                 * check to make sure we're in the same object
 799                                 */
 800                                le_key = internal_key(bh, pos);
 801                                if (le32_to_cpu(le_key->k_objectid) !=
 802                                    key->on_disk_key.k_objectid) {
 803                                        break;
 804                                }
 805                        }
 806                }
 807        }
 808}
 809
 810/*
 811 * Form the path to an item and position in this item which contains
 812 * file byte defined by key. If there is no such item
 813 * corresponding to the key, we point the path to the item with
 814 * maximal key less than key, and *pos_in_item is set to one
 815 * past the last entry/byte in the item.  If searching for entry in a
 816 * directory item, and it is not found, *pos_in_item is set to one
 817 * entry more than the entry with maximal key which is less than the
 818 * sought key.
 819 *
 820 * Note that if there is no entry in this same node which is one more,
 821 * then we point to an imaginary entry.  for direct items, the
 822 * position is in units of bytes, for indirect items the position is
 823 * in units of blocknr entries, for directory items the position is in
 824 * units of directory entries.
 825 */
 826/* The function is NOT SCHEDULE-SAFE! */
 827int search_for_position_by_key(struct super_block *sb,
 828                               /* Key to search (cpu variable) */
 829                               const struct cpu_key *p_cpu_key,
 830                               /* Filled up by this function. */
 831                               struct treepath *search_path)
 832{
 833        struct item_head *p_le_ih;      /* pointer to on-disk structure */
 834        int blk_size;
 835        loff_t item_offset, offset;
 836        struct reiserfs_dir_entry de;
 837        int retval;
 838
 839        /* If searching for directory entry. */
 840        if (is_direntry_cpu_key(p_cpu_key))
 841                return search_by_entry_key(sb, p_cpu_key, search_path,
 842                                           &de);
 843
 844        /* If not searching for directory entry. */
 845
 846        /* If item is found. */
 847        retval = search_item(sb, p_cpu_key, search_path);
 848        if (retval == IO_ERROR)
 849                return retval;
 850        if (retval == ITEM_FOUND) {
 851
 852                RFALSE(!ih_item_len
 853                       (item_head
 854                        (PATH_PLAST_BUFFER(search_path),
 855                         PATH_LAST_POSITION(search_path))),
 856                       "PAP-5165: item length equals zero");
 857
 858                pos_in_item(search_path) = 0;
 859                return POSITION_FOUND;
 860        }
 861
 862        RFALSE(!PATH_LAST_POSITION(search_path),
 863               "PAP-5170: position equals zero");
 864
 865        /* Item is not found. Set path to the previous item. */
 866        p_le_ih =
 867            item_head(PATH_PLAST_BUFFER(search_path),
 868                           --PATH_LAST_POSITION(search_path));
 869        blk_size = sb->s_blocksize;
 870
 871        if (comp_short_keys(&p_le_ih->ih_key, p_cpu_key))
 872                return FILE_NOT_FOUND;
 873
 874        /* FIXME: quite ugly this far */
 875
 876        item_offset = le_ih_k_offset(p_le_ih);
 877        offset = cpu_key_k_offset(p_cpu_key);
 878
 879        /* Needed byte is contained in the item pointed to by the path. */
 880        if (item_offset <= offset &&
 881            item_offset + op_bytes_number(p_le_ih, blk_size) > offset) {
 882                pos_in_item(search_path) = offset - item_offset;
 883                if (is_indirect_le_ih(p_le_ih)) {
 884                        pos_in_item(search_path) /= blk_size;
 885                }
 886                return POSITION_FOUND;
 887        }
 888
 889        /*
 890         * Needed byte is not contained in the item pointed to by the
 891         * path. Set pos_in_item out of the item.
 892         */
 893        if (is_indirect_le_ih(p_le_ih))
 894                pos_in_item(search_path) =
 895                    ih_item_len(p_le_ih) / UNFM_P_SIZE;
 896        else
 897                pos_in_item(search_path) = ih_item_len(p_le_ih);
 898
 899        return POSITION_NOT_FOUND;
 900}
 901
 902/* Compare given item and item pointed to by the path. */
 903int comp_items(const struct item_head *stored_ih, const struct treepath *path)
 904{
 905        struct buffer_head *bh = PATH_PLAST_BUFFER(path);
 906        struct item_head *ih;
 907
 908        /* Last buffer at the path is not in the tree. */
 909        if (!B_IS_IN_TREE(bh))
 910                return 1;
 911
 912        /* Last path position is invalid. */
 913        if (PATH_LAST_POSITION(path) >= B_NR_ITEMS(bh))
 914                return 1;
 915
 916        /* we need only to know, whether it is the same item */
 917        ih = tp_item_head(path);
 918        return memcmp(stored_ih, ih, IH_SIZE);
 919}
 920
 921/* prepare for delete or cut of direct item */
 922static inline int prepare_for_direct_item(struct treepath *path,
 923                                          struct item_head *le_ih,
 924                                          struct inode *inode,
 925                                          loff_t new_file_length, int *cut_size)
 926{
 927        loff_t round_len;
 928
 929        if (new_file_length == max_reiserfs_offset(inode)) {
 930                /* item has to be deleted */
 931                *cut_size = -(IH_SIZE + ih_item_len(le_ih));
 932                return M_DELETE;
 933        }
 934        /* new file gets truncated */
 935        if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
 936                round_len = ROUND_UP(new_file_length);
 937                /* this was new_file_length < le_ih ... */
 938                if (round_len < le_ih_k_offset(le_ih)) {
 939                        *cut_size = -(IH_SIZE + ih_item_len(le_ih));
 940                        return M_DELETE;        /* Delete this item. */
 941                }
 942                /* Calculate first position and size for cutting from item. */
 943                pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1);
 944                *cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
 945
 946                return M_CUT;   /* Cut from this item. */
 947        }
 948
 949        /* old file: items may have any length */
 950
 951        if (new_file_length < le_ih_k_offset(le_ih)) {
 952                *cut_size = -(IH_SIZE + ih_item_len(le_ih));
 953                return M_DELETE;        /* Delete this item. */
 954        }
 955
 956        /* Calculate first position and size for cutting from item. */
 957        *cut_size = -(ih_item_len(le_ih) -
 958                      (pos_in_item(path) =
 959                       new_file_length + 1 - le_ih_k_offset(le_ih)));
 960        return M_CUT;           /* Cut from this item. */
 961}
 962
 963static inline int prepare_for_direntry_item(struct treepath *path,
 964                                            struct item_head *le_ih,
 965                                            struct inode *inode,
 966                                            loff_t new_file_length,
 967                                            int *cut_size)
 968{
 969        if (le_ih_k_offset(le_ih) == DOT_OFFSET &&
 970            new_file_length == max_reiserfs_offset(inode)) {
 971                RFALSE(ih_entry_count(le_ih) != 2,
 972                       "PAP-5220: incorrect empty directory item (%h)", le_ih);
 973                *cut_size = -(IH_SIZE + ih_item_len(le_ih));
 974                /* Delete the directory item containing "." and ".." entry. */
 975                return M_DELETE;
 976        }
 977
 978        if (ih_entry_count(le_ih) == 1) {
 979                /*
 980                 * Delete the directory item such as there is one record only
 981                 * in this item
 982                 */
 983                *cut_size = -(IH_SIZE + ih_item_len(le_ih));
 984                return M_DELETE;
 985        }
 986
 987        /* Cut one record from the directory item. */
 988        *cut_size =
 989            -(DEH_SIZE +
 990              entry_length(get_last_bh(path), le_ih, pos_in_item(path)));
 991        return M_CUT;
 992}
 993
 994#define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1)
 995
 996/*
 997 * If the path points to a directory or direct item, calculate mode
 998 * and the size cut, for balance.
 999 * If the path points to an indirect item, remove some number of its
1000 * unformatted nodes.
1001 * In case of file truncate calculate whether this item must be
1002 * deleted/truncated or last unformatted node of this item will be
1003 * converted to a direct item.
1004 * This function returns a determination of what balance mode the
1005 * calling function should employ.
1006 */
1007static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th,
1008                                      struct inode *inode,
1009                                      struct treepath *path,
1010                                      const struct cpu_key *item_key,
1011                                      /*
1012                                       * Number of unformatted nodes
1013                                       * which were removed from end
1014                                       * of the file.
1015                                       */
1016                                      int *removed,
1017                                      int *cut_size,
1018                                      /* MAX_KEY_OFFSET in case of delete. */
1019                                      unsigned long long new_file_length
1020    )
1021{
1022        struct super_block *sb = inode->i_sb;
1023        struct item_head *p_le_ih = tp_item_head(path);
1024        struct buffer_head *bh = PATH_PLAST_BUFFER(path);
1025
1026        BUG_ON(!th->t_trans_id);
1027
1028        /* Stat_data item. */
1029        if (is_statdata_le_ih(p_le_ih)) {
1030
1031                RFALSE(new_file_length != max_reiserfs_offset(inode),
1032                       "PAP-5210: mode must be M_DELETE");
1033
1034                *cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
1035                return M_DELETE;
1036        }
1037
1038        /* Directory item. */
1039        if (is_direntry_le_ih(p_le_ih))
1040                return prepare_for_direntry_item(path, p_le_ih, inode,
1041                                                 new_file_length,
1042                                                 cut_size);
1043
1044        /* Direct item. */
1045        if (is_direct_le_ih(p_le_ih))
1046                return prepare_for_direct_item(path, p_le_ih, inode,
1047                                               new_file_length, cut_size);
1048
1049        /* Case of an indirect item. */
1050        {
1051            int blk_size = sb->s_blocksize;
1052            struct item_head s_ih;
1053            int need_re_search;
1054            int delete = 0;
1055            int result = M_CUT;
1056            int pos = 0;
1057
1058            if ( new_file_length == max_reiserfs_offset (inode) ) {
1059                /*
1060                 * prepare_for_delete_or_cut() is called by
1061                 * reiserfs_delete_item()
1062                 */
1063                new_file_length = 0;
1064                delete = 1;
1065            }
1066
1067            do {
1068                need_re_search = 0;
1069                *cut_size = 0;
1070                bh = PATH_PLAST_BUFFER(path);
1071                copy_item_head(&s_ih, tp_item_head(path));
1072                pos = I_UNFM_NUM(&s_ih);
1073
1074                while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) {
1075                    __le32 *unfm;
1076                    __u32 block;
1077
1078                    /*
1079                     * Each unformatted block deletion may involve
1080                     * one additional bitmap block into the transaction,
1081                     * thereby the initial journal space reservation
1082                     * might not be enough.
1083                     */
1084                    if (!delete && (*cut_size) != 0 &&
1085                        reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD)
1086                        break;
1087
1088                    unfm = (__le32 *)ih_item_body(bh, &s_ih) + pos - 1;
1089                    block = get_block_num(unfm, 0);
1090
1091                    if (block != 0) {
1092                        reiserfs_prepare_for_journal(sb, bh, 1);
1093                        put_block_num(unfm, 0, 0);
1094                        journal_mark_dirty(th, bh);
1095                        reiserfs_free_block(th, inode, block, 1);
1096                    }
1097
1098                    reiserfs_cond_resched(sb);
1099
1100                    if (item_moved (&s_ih, path))  {
1101                        need_re_search = 1;
1102                        break;
1103                    }
1104
1105                    pos --;
1106                    (*removed)++;
1107                    (*cut_size) -= UNFM_P_SIZE;
1108
1109                    if (pos == 0) {
1110                        (*cut_size) -= IH_SIZE;
1111                        result = M_DELETE;
1112                        break;
1113                    }
1114                }
1115                /*
1116                 * a trick.  If the buffer has been logged, this will
1117                 * do nothing.  If we've broken the loop without logging
1118                 * it, it will restore the buffer
1119                 */
1120                reiserfs_restore_prepared_buffer(sb, bh);
1121            } while (need_re_search &&
1122                     search_for_position_by_key(sb, item_key, path) == POSITION_FOUND);
1123            pos_in_item(path) = pos * UNFM_P_SIZE;
1124
1125            if (*cut_size == 0) {
1126                /*
1127                 * Nothing was cut. maybe convert last unformatted node to the
1128                 * direct item?
1129                 */
1130                result = M_CONVERT;
1131            }
1132            return result;
1133        }
1134}
1135
1136/* Calculate number of bytes which will be deleted or cut during balance */
1137static int calc_deleted_bytes_number(struct tree_balance *tb, char mode)
1138{
1139        int del_size;
1140        struct item_head *p_le_ih = tp_item_head(tb->tb_path);
1141
1142        if (is_statdata_le_ih(p_le_ih))
1143                return 0;
1144
1145        del_size =
1146            (mode ==
1147             M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0];
1148        if (is_direntry_le_ih(p_le_ih)) {
1149                /*
1150                 * return EMPTY_DIR_SIZE; We delete emty directories only.
1151                 * we can't use EMPTY_DIR_SIZE, as old format dirs have a
1152                 * different empty size.  ick. FIXME, is this right?
1153                 */
1154                return del_size;
1155        }
1156
1157        if (is_indirect_le_ih(p_le_ih))
1158                del_size = (del_size / UNFM_P_SIZE) *
1159                                (PATH_PLAST_BUFFER(tb->tb_path)->b_size);
1160        return del_size;
1161}
1162
1163static void init_tb_struct(struct reiserfs_transaction_handle *th,
1164                           struct tree_balance *tb,
1165                           struct super_block *sb,
1166                           struct treepath *path, int size)
1167{
1168
1169        BUG_ON(!th->t_trans_id);
1170
1171        memset(tb, '\0', sizeof(struct tree_balance));
1172        tb->transaction_handle = th;
1173        tb->tb_sb = sb;
1174        tb->tb_path = path;
1175        PATH_OFFSET_PBUFFER(path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
1176        PATH_OFFSET_POSITION(path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
1177        tb->insert_size[0] = size;
1178}
1179
1180void padd_item(char *item, int total_length, int length)
1181{
1182        int i;
1183
1184        for (i = total_length; i > length;)
1185                item[--i] = 0;
1186}
1187
1188#ifdef REISERQUOTA_DEBUG
1189char key2type(struct reiserfs_key *ih)
1190{
1191        if (is_direntry_le_key(2, ih))
1192                return 'd';
1193        if (is_direct_le_key(2, ih))
1194                return 'D';
1195        if (is_indirect_le_key(2, ih))
1196                return 'i';
1197        if (is_statdata_le_key(2, ih))
1198                return 's';
1199        return 'u';
1200}
1201
1202char head2type(struct item_head *ih)
1203{
1204        if (is_direntry_le_ih(ih))
1205                return 'd';
1206        if (is_direct_le_ih(ih))
1207                return 'D';
1208        if (is_indirect_le_ih(ih))
1209                return 'i';
1210        if (is_statdata_le_ih(ih))
1211                return 's';
1212        return 'u';
1213}
1214#endif
1215
1216/*
1217 * Delete object item.
1218 * th       - active transaction handle
1219 * path     - path to the deleted item
1220 * item_key - key to search for the deleted item
1221 * indode   - used for updating i_blocks and quotas
1222 * un_bh    - NULL or unformatted node pointer
1223 */
1224int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
1225                         struct treepath *path, const struct cpu_key *item_key,
1226                         struct inode *inode, struct buffer_head *un_bh)
1227{
1228        struct super_block *sb = inode->i_sb;
1229        struct tree_balance s_del_balance;
1230        struct item_head s_ih;
1231        struct item_head *q_ih;
1232        int quota_cut_bytes;
1233        int ret_value, del_size, removed;
1234        int depth;
1235
1236#ifdef CONFIG_REISERFS_CHECK
1237        char mode;
1238        int iter = 0;
1239#endif
1240
1241        BUG_ON(!th->t_trans_id);
1242
1243        init_tb_struct(th, &s_del_balance, sb, path,
1244                       0 /*size is unknown */ );
1245
1246        while (1) {
1247                removed = 0;
1248
1249#ifdef CONFIG_REISERFS_CHECK
1250                iter++;
1251                mode =
1252#endif
1253                    prepare_for_delete_or_cut(th, inode, path,
1254                                              item_key, &removed,
1255                                              &del_size,
1256                                              max_reiserfs_offset(inode));
1257
1258                RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
1259
1260                copy_item_head(&s_ih, tp_item_head(path));
1261                s_del_balance.insert_size[0] = del_size;
1262
1263                ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
1264                if (ret_value != REPEAT_SEARCH)
1265                        break;
1266
1267                PROC_INFO_INC(sb, delete_item_restarted);
1268
1269                /* file system changed, repeat search */
1270                ret_value =
1271                    search_for_position_by_key(sb, item_key, path);
1272                if (ret_value == IO_ERROR)
1273                        break;
1274                if (ret_value == FILE_NOT_FOUND) {
1275                        reiserfs_warning(sb, "vs-5340",
1276                                         "no items of the file %K found",
1277                                         item_key);
1278                        break;
1279                }
1280        }                       /* while (1) */
1281
1282        if (ret_value != CARRY_ON) {
1283                unfix_nodes(&s_del_balance);
1284                return 0;
1285        }
1286
1287        /* reiserfs_delete_item returns item length when success */
1288        ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
1289        q_ih = tp_item_head(path);
1290        quota_cut_bytes = ih_item_len(q_ih);
1291
1292        /*
1293         * hack so the quota code doesn't have to guess if the file has a
1294         * tail.  On tail insert, we allocate quota for 1 unformatted node.
1295         * We test the offset because the tail might have been
1296         * split into multiple items, and we only want to decrement for
1297         * the unfm node once
1298         */
1299        if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) {
1300                if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) {
1301                        quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
1302                } else {
1303                        quota_cut_bytes = 0;
1304                }
1305        }
1306
1307        if (un_bh) {
1308                int off;
1309                char *data;
1310
1311                /*
1312                 * We are in direct2indirect conversion, so move tail contents
1313                 * to the unformatted node
1314                 */
1315                /*
1316                 * note, we do the copy before preparing the buffer because we
1317                 * don't care about the contents of the unformatted node yet.
1318                 * the only thing we really care about is the direct item's
1319                 * data is in the unformatted node.
1320                 *
1321                 * Otherwise, we would have to call
1322                 * reiserfs_prepare_for_journal on the unformatted node,
1323                 * which might schedule, meaning we'd have to loop all the
1324                 * way back up to the start of the while loop.
1325                 *
1326                 * The unformatted node must be dirtied later on.  We can't be
1327                 * sure here if the entire tail has been deleted yet.
1328                 *
1329                 * un_bh is from the page cache (all unformatted nodes are
1330                 * from the page cache) and might be a highmem page.  So, we
1331                 * can't use un_bh->b_data.
1332                 * -clm
1333                 */
1334
1335                data = kmap_atomic(un_bh->b_page);
1336                off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_SIZE - 1));
1337                memcpy(data + off,
1338                       ih_item_body(PATH_PLAST_BUFFER(path), &s_ih),
1339                       ret_value);
1340                kunmap_atomic(data);
1341        }
1342
1343        /* Perform balancing after all resources have been collected at once. */
1344        do_balance(&s_del_balance, NULL, NULL, M_DELETE);
1345
1346#ifdef REISERQUOTA_DEBUG
1347        reiserfs_debug(sb, REISERFS_DEBUG_CODE,
1348                       "reiserquota delete_item(): freeing %u, id=%u type=%c",
1349                       quota_cut_bytes, inode->i_uid, head2type(&s_ih));
1350#endif
1351        depth = reiserfs_write_unlock_nested(inode->i_sb);
1352        dquot_free_space_nodirty(inode, quota_cut_bytes);
1353        reiserfs_write_lock_nested(inode->i_sb, depth);
1354
1355        /* Return deleted body length */
1356        return ret_value;
1357}
1358
1359/*
1360 * Summary Of Mechanisms For Handling Collisions Between Processes:
1361 *
1362 *  deletion of the body of the object is performed by iput(), with the
1363 *  result that if multiple processes are operating on a file, the
1364 *  deletion of the body of the file is deferred until the last process
1365 *  that has an open inode performs its iput().
1366 *
1367 *  writes and truncates are protected from collisions by use of
1368 *  semaphores.
1369 *
1370 *  creates, linking, and mknod are protected from collisions with other
1371 *  processes by making the reiserfs_add_entry() the last step in the
1372 *  creation, and then rolling back all changes if there was a collision.
1373 *  - Hans
1374*/
1375
1376/* this deletes item which never gets split */
1377void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1378                                struct inode *inode, struct reiserfs_key *key)
1379{
1380        struct super_block *sb = th->t_super;
1381        struct tree_balance tb;
1382        INITIALIZE_PATH(path);
1383        int item_len = 0;
1384        int tb_init = 0;
1385        struct cpu_key cpu_key;
1386        int retval;
1387        int quota_cut_bytes = 0;
1388
1389        BUG_ON(!th->t_trans_id);
1390
1391        le_key2cpu_key(&cpu_key, key);
1392
1393        while (1) {
1394                retval = search_item(th->t_super, &cpu_key, &path);
1395                if (retval == IO_ERROR) {
1396                        reiserfs_error(th->t_super, "vs-5350",
1397                                       "i/o failure occurred trying "
1398                                       "to delete %K", &cpu_key);
1399                        break;
1400                }
1401                if (retval != ITEM_FOUND) {
1402                        pathrelse(&path);
1403                        /*
1404                         * No need for a warning, if there is just no free
1405                         * space to insert '..' item into the
1406                         * newly-created subdir
1407                         */
1408                        if (!
1409                            ((unsigned long long)
1410                             GET_HASH_VALUE(le_key_k_offset
1411                                            (le_key_version(key), key)) == 0
1412                             && (unsigned long long)
1413                             GET_GENERATION_NUMBER(le_key_k_offset
1414                                                   (le_key_version(key),
1415                                                    key)) == 1))
1416                                reiserfs_warning(th->t_super, "vs-5355",
1417                                                 "%k not found", key);
1418                        break;
1419                }
1420                if (!tb_init) {
1421                        tb_init = 1;
1422                        item_len = ih_item_len(tp_item_head(&path));
1423                        init_tb_struct(th, &tb, th->t_super, &path,
1424                                       -(IH_SIZE + item_len));
1425                }
1426                quota_cut_bytes = ih_item_len(tp_item_head(&path));
1427
1428                retval = fix_nodes(M_DELETE, &tb, NULL, NULL);
1429                if (retval == REPEAT_SEARCH) {
1430                        PROC_INFO_INC(th->t_super, delete_solid_item_restarted);
1431                        continue;
1432                }
1433
1434                if (retval == CARRY_ON) {
1435                        do_balance(&tb, NULL, NULL, M_DELETE);
1436                        /*
1437                         * Should we count quota for item? (we don't
1438                         * count quotas for save-links)
1439                         */
1440                        if (inode) {
1441                                int depth;
1442#ifdef REISERQUOTA_DEBUG
1443                                reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
1444                                               "reiserquota delete_solid_item(): freeing %u id=%u type=%c",
1445                                               quota_cut_bytes, inode->i_uid,
1446                                               key2type(key));
1447#endif
1448                                depth = reiserfs_write_unlock_nested(sb);
1449                                dquot_free_space_nodirty(inode,
1450                                                         quota_cut_bytes);
1451                                reiserfs_write_lock_nested(sb, depth);
1452                        }
1453                        break;
1454                }
1455
1456                /* IO_ERROR, NO_DISK_SPACE, etc */
1457                reiserfs_warning(th->t_super, "vs-5360",
1458                                 "could not delete %K due to fix_nodes failure",
1459                                 &cpu_key);
1460                unfix_nodes(&tb);
1461                break;
1462        }
1463
1464        reiserfs_check_path(&path);
1465}
1466
1467int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
1468                           struct inode *inode)
1469{
1470        int err;
1471        inode->i_size = 0;
1472        BUG_ON(!th->t_trans_id);
1473
1474        /* for directory this deletes item containing "." and ".." */
1475        err =
1476            reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ );
1477        if (err)
1478                return err;
1479
1480#if defined( USE_INODE_GENERATION_COUNTER )
1481        if (!old_format_only(th->t_super)) {
1482                __le32 *inode_generation;
1483
1484                inode_generation =
1485                    &REISERFS_SB(th->t_super)->s_rs->s_inode_generation;
1486                le32_add_cpu(inode_generation, 1);
1487        }
1488/* USE_INODE_GENERATION_COUNTER */
1489#endif
1490        reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1491
1492        return err;
1493}
1494
1495static void unmap_buffers(struct page *page, loff_t pos)
1496{
1497        struct buffer_head *bh;
1498        struct buffer_head *head;
1499        struct buffer_head *next;
1500        unsigned long tail_index;
1501        unsigned long cur_index;
1502
1503        if (page) {
1504                if (page_has_buffers(page)) {
1505                        tail_index = pos & (PAGE_SIZE - 1);
1506                        cur_index = 0;
1507                        head = page_buffers(page);
1508                        bh = head;
1509                        do {
1510                                next = bh->b_this_page;
1511
1512                                /*
1513                                 * we want to unmap the buffers that contain
1514                                 * the tail, and all the buffers after it
1515                                 * (since the tail must be at the end of the
1516                                 * file).  We don't want to unmap file data
1517                                 * before the tail, since it might be dirty
1518                                 * and waiting to reach disk
1519                                 */
1520                                cur_index += bh->b_size;
1521                                if (cur_index > tail_index) {
1522                                        reiserfs_unmap_buffer(bh);
1523                                }
1524                                bh = next;
1525                        } while (bh != head);
1526                }
1527        }
1528}
1529
1530static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
1531                                    struct inode *inode,
1532                                    struct page *page,
1533                                    struct treepath *path,
1534                                    const struct cpu_key *item_key,
1535                                    loff_t new_file_size, char *mode)
1536{
1537        struct super_block *sb = inode->i_sb;
1538        int block_size = sb->s_blocksize;
1539        int cut_bytes;
1540        BUG_ON(!th->t_trans_id);
1541        BUG_ON(new_file_size != inode->i_size);
1542
1543        /*
1544         * the page being sent in could be NULL if there was an i/o error
1545         * reading in the last block.  The user will hit problems trying to
1546         * read the file, but for now we just skip the indirect2direct
1547         */
1548        if (atomic_read(&inode->i_count) > 1 ||
1549            !tail_has_to_be_packed(inode) ||
1550            !page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) {
1551                /* leave tail in an unformatted node */
1552                *mode = M_SKIP_BALANCING;
1553                cut_bytes =
1554                    block_size - (new_file_size & (block_size - 1));
1555                pathrelse(path);
1556                return cut_bytes;
1557        }
1558
1559        /* Perform the conversion to a direct_item. */
1560        return indirect2direct(th, inode, page, path, item_key,
1561                               new_file_size, mode);
1562}
1563
1564/*
1565 * we did indirect_to_direct conversion. And we have inserted direct
1566 * item successesfully, but there were no disk space to cut unfm
1567 * pointer being converted. Therefore we have to delete inserted
1568 * direct item(s)
1569 */
1570static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
1571                                         struct inode *inode, struct treepath *path)
1572{
1573        struct cpu_key tail_key;
1574        int tail_len;
1575        int removed;
1576        BUG_ON(!th->t_trans_id);
1577
1578        make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);
1579        tail_key.key_length = 4;
1580
1581        tail_len =
1582            (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
1583        while (tail_len) {
1584                /* look for the last byte of the tail */
1585                if (search_for_position_by_key(inode->i_sb, &tail_key, path) ==
1586                    POSITION_NOT_FOUND)
1587                        reiserfs_panic(inode->i_sb, "vs-5615",
1588                                       "found invalid item");
1589                RFALSE(path->pos_in_item !=
1590                       ih_item_len(tp_item_head(path)) - 1,
1591                       "vs-5616: appended bytes found");
1592                PATH_LAST_POSITION(path)--;
1593
1594                removed =
1595                    reiserfs_delete_item(th, path, &tail_key, inode,
1596                                         NULL /*unbh not needed */ );
1597                RFALSE(removed <= 0
1598                       || removed > tail_len,
1599                       "vs-5617: there was tail %d bytes, removed item length %d bytes",
1600                       tail_len, removed);
1601                tail_len -= removed;
1602                set_cpu_key_k_offset(&tail_key,
1603                                     cpu_key_k_offset(&tail_key) - removed);
1604        }
1605        reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct "
1606                         "conversion has been rolled back due to "
1607                         "lack of disk space");
1608        mark_inode_dirty(inode);
1609}
1610
1611/* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
1612int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1613                           struct treepath *path,
1614                           struct cpu_key *item_key,
1615                           struct inode *inode,
1616                           struct page *page, loff_t new_file_size)
1617{
1618        struct super_block *sb = inode->i_sb;
1619        /*
1620         * Every function which is going to call do_balance must first
1621         * create a tree_balance structure.  Then it must fill up this
1622         * structure by using the init_tb_struct and fix_nodes functions.
1623         * After that we can make tree balancing.
1624         */
1625        struct tree_balance s_cut_balance;
1626        struct item_head *p_le_ih;
1627        int cut_size = 0;       /* Amount to be cut. */
1628        int ret_value = CARRY_ON;
1629        int removed = 0;        /* Number of the removed unformatted nodes. */
1630        int is_inode_locked = 0;
1631        char mode;              /* Mode of the balance. */
1632        int retval2 = -1;
1633        int quota_cut_bytes;
1634        loff_t tail_pos = 0;
1635        int depth;
1636
1637        BUG_ON(!th->t_trans_id);
1638
1639        init_tb_struct(th, &s_cut_balance, inode->i_sb, path,
1640                       cut_size);
1641
1642        /*
1643         * Repeat this loop until we either cut the item without needing
1644         * to balance, or we fix_nodes without schedule occurring
1645         */
1646        while (1) {
1647                /*
1648                 * Determine the balance mode, position of the first byte to
1649                 * be cut, and size to be cut.  In case of the indirect item
1650                 * free unformatted nodes which are pointed to by the cut
1651                 * pointers.
1652                 */
1653
1654                mode =
1655                    prepare_for_delete_or_cut(th, inode, path,
1656                                              item_key, &removed,
1657                                              &cut_size, new_file_size);
1658                if (mode == M_CONVERT) {
1659                        /*
1660                         * convert last unformatted node to direct item or
1661                         * leave tail in the unformatted node
1662                         */
1663                        RFALSE(ret_value != CARRY_ON,
1664                               "PAP-5570: can not convert twice");
1665
1666                        ret_value =
1667                            maybe_indirect_to_direct(th, inode, page,
1668                                                     path, item_key,
1669                                                     new_file_size, &mode);
1670                        if (mode == M_SKIP_BALANCING)
1671                                /* tail has been left in the unformatted node */
1672                                return ret_value;
1673
1674                        is_inode_locked = 1;
1675
1676                        /*
1677                         * removing of last unformatted node will
1678                         * change value we have to return to truncate.
1679                         * Save it
1680                         */
1681                        retval2 = ret_value;
1682
1683                        /*
1684                         * So, we have performed the first part of the
1685                         * conversion:
1686                         * inserting the new direct item.  Now we are
1687                         * removing the last unformatted node pointer.
1688                         * Set key to search for it.
1689                         */
1690                        set_cpu_key_k_type(item_key, TYPE_INDIRECT);
1691                        item_key->key_length = 4;
1692                        new_file_size -=
1693                            (new_file_size & (sb->s_blocksize - 1));
1694                        tail_pos = new_file_size;
1695                        set_cpu_key_k_offset(item_key, new_file_size + 1);
1696                        if (search_for_position_by_key
1697                            (sb, item_key,
1698                             path) == POSITION_NOT_FOUND) {
1699                                print_block(PATH_PLAST_BUFFER(path), 3,
1700                                            PATH_LAST_POSITION(path) - 1,
1701                                            PATH_LAST_POSITION(path) + 1);
1702                                reiserfs_panic(sb, "PAP-5580", "item to "
1703                                               "convert does not exist (%K)",
1704                                               item_key);
1705                        }
1706                        continue;
1707                }
1708                if (cut_size == 0) {
1709                        pathrelse(path);
1710                        return 0;
1711                }
1712
1713                s_cut_balance.insert_size[0] = cut_size;
1714
1715                ret_value = fix_nodes(mode, &s_cut_balance, NULL, NULL);
1716                if (ret_value != REPEAT_SEARCH)
1717                        break;
1718
1719                PROC_INFO_INC(sb, cut_from_item_restarted);
1720
1721                ret_value =
1722                    search_for_position_by_key(sb, item_key, path);
1723                if (ret_value == POSITION_FOUND)
1724                        continue;
1725
1726                reiserfs_warning(sb, "PAP-5610", "item %K not found",
1727                                 item_key);
1728                unfix_nodes(&s_cut_balance);
1729                return (ret_value == IO_ERROR) ? -EIO : -ENOENT;
1730        }                       /* while */
1731
1732        /* check fix_nodes results (IO_ERROR or NO_DISK_SPACE) */
1733        if (ret_value != CARRY_ON) {
1734                if (is_inode_locked) {
1735                        /*
1736                         * FIXME: this seems to be not needed: we are always
1737                         * able to cut item
1738                         */
1739                        indirect_to_direct_roll_back(th, inode, path);
1740                }
1741                if (ret_value == NO_DISK_SPACE)
1742                        reiserfs_warning(sb, "reiserfs-5092",
1743                                         "NO_DISK_SPACE");
1744                unfix_nodes(&s_cut_balance);
1745                return -EIO;
1746        }
1747
1748        /* go ahead and perform balancing */
1749
1750        RFALSE(mode == M_PASTE || mode == M_INSERT, "invalid mode");
1751
1752        /* Calculate number of bytes that need to be cut from the item. */
1753        quota_cut_bytes =
1754            (mode ==
1755             M_DELETE) ? ih_item_len(tp_item_head(path)) : -s_cut_balance.
1756            insert_size[0];
1757        if (retval2 == -1)
1758                ret_value = calc_deleted_bytes_number(&s_cut_balance, mode);
1759        else
1760                ret_value = retval2;
1761
1762        /*
1763         * For direct items, we only change the quota when deleting the last
1764         * item.
1765         */
1766        p_le_ih = tp_item_head(s_cut_balance.tb_path);
1767        if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) {
1768                if (mode == M_DELETE &&
1769                    (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) ==
1770                    1) {
1771                        /* FIXME: this is to keep 3.5 happy */
1772                        REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
1773                        quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
1774                } else {
1775                        quota_cut_bytes = 0;
1776                }
1777        }
1778#ifdef CONFIG_REISERFS_CHECK
1779        if (is_inode_locked) {
1780                struct item_head *le_ih =
1781                    tp_item_head(s_cut_balance.tb_path);
1782                /*
1783                 * we are going to complete indirect2direct conversion. Make
1784                 * sure, that we exactly remove last unformatted node pointer
1785                 * of the item
1786                 */
1787                if (!is_indirect_le_ih(le_ih))
1788                        reiserfs_panic(sb, "vs-5652",
1789                                       "item must be indirect %h", le_ih);
1790
1791                if (mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
1792                        reiserfs_panic(sb, "vs-5653", "completing "
1793                                       "indirect2direct conversion indirect "
1794                                       "item %h being deleted must be of "
1795                                       "4 byte long", le_ih);
1796
1797                if (mode == M_CUT
1798                    && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
1799                        reiserfs_panic(sb, "vs-5654", "can not complete "
1800                                       "indirect2direct conversion of %h "
1801                                       "(CUT, insert_size==%d)",
1802                                       le_ih, s_cut_balance.insert_size[0]);
1803                }
1804                /*
1805                 * it would be useful to make sure, that right neighboring
1806                 * item is direct item of this file
1807                 */
1808        }
1809#endif
1810
1811        do_balance(&s_cut_balance, NULL, NULL, mode);
1812        if (is_inode_locked) {
1813                /*
1814                 * we've done an indirect->direct conversion.  when the
1815                 * data block was freed, it was removed from the list of
1816                 * blocks that must be flushed before the transaction
1817                 * commits, make sure to unmap and invalidate it
1818                 */
1819                unmap_buffers(page, tail_pos);
1820                REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
1821        }
1822#ifdef REISERQUOTA_DEBUG
1823        reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
1824                       "reiserquota cut_from_item(): freeing %u id=%u type=%c",
1825                       quota_cut_bytes, inode->i_uid, '?');
1826#endif
1827        depth = reiserfs_write_unlock_nested(sb);
1828        dquot_free_space_nodirty(inode, quota_cut_bytes);
1829        reiserfs_write_lock_nested(sb, depth);
1830        return ret_value;
1831}
1832
1833static void truncate_directory(struct reiserfs_transaction_handle *th,
1834                               struct inode *inode)
1835{
1836        BUG_ON(!th->t_trans_id);
1837        if (inode->i_nlink)
1838                reiserfs_error(inode->i_sb, "vs-5655", "link count != 0");
1839
1840        set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET);
1841        set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY);
1842        reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1843        reiserfs_update_sd(th, inode);
1844        set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET);
1845        set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA);
1846}
1847
1848/*
1849 * Truncate file to the new size. Note, this must be called with a
1850 * transaction already started
1851 */
1852int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
1853                         struct inode *inode,   /* ->i_size contains new size */
1854                         struct page *page,     /* up to date for last block */
1855                         /*
1856                          * when it is called by file_release to convert
1857                          * the tail - no timestamps should be updated
1858                          */
1859                         int update_timestamps
1860    )
1861{
1862        INITIALIZE_PATH(s_search_path); /* Path to the current object item. */
1863        struct item_head *p_le_ih;      /* Pointer to an item header. */
1864
1865        /* Key to search for a previous file item. */
1866        struct cpu_key s_item_key;
1867        loff_t file_size,       /* Old file size. */
1868         new_file_size; /* New file size. */
1869        int deleted;            /* Number of deleted or truncated bytes. */
1870        int retval;
1871        int err = 0;
1872
1873        BUG_ON(!th->t_trans_id);
1874        if (!
1875            (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
1876             || S_ISLNK(inode->i_mode)))
1877                return 0;
1878
1879        /* deletion of directory - no need to update timestamps */
1880        if (S_ISDIR(inode->i_mode)) {
1881                truncate_directory(th, inode);
1882                return 0;
1883        }
1884
1885        /* Get new file size. */
1886        new_file_size = inode->i_size;
1887
1888        /* FIXME: note, that key type is unimportant here */
1889        make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode),
1890                     TYPE_DIRECT, 3);
1891
1892        retval =
1893            search_for_position_by_key(inode->i_sb, &s_item_key,
1894                                       &s_search_path);
1895        if (retval == IO_ERROR) {
1896                reiserfs_error(inode->i_sb, "vs-5657",
1897                               "i/o failure occurred trying to truncate %K",
1898                               &s_item_key);
1899                err = -EIO;
1900                goto out;
1901        }
1902        if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
1903                reiserfs_error(inode->i_sb, "PAP-5660",
1904                               "wrong result %d of search for %K", retval,
1905                               &s_item_key);
1906
1907                err = -EIO;
1908                goto out;
1909        }
1910
1911        s_search_path.pos_in_item--;
1912
1913        /* Get real file size (total length of all file items) */
1914        p_le_ih = tp_item_head(&s_search_path);
1915        if (is_statdata_le_ih(p_le_ih))
1916                file_size = 0;
1917        else {
1918                loff_t offset = le_ih_k_offset(p_le_ih);
1919                int bytes =
1920                    op_bytes_number(p_le_ih, inode->i_sb->s_blocksize);
1921
1922                /*
1923                 * this may mismatch with real file size: if last direct item
1924                 * had no padding zeros and last unformatted node had no free
1925                 * space, this file would have this file size
1926                 */
1927                file_size = offset + bytes - 1;
1928        }
1929        /*
1930         * are we doing a full truncate or delete, if so
1931         * kick in the reada code
1932         */
1933        if (new_file_size == 0)
1934                s_search_path.reada = PATH_READA | PATH_READA_BACK;
1935
1936        if (file_size == 0 || file_size < new_file_size) {
1937                goto update_and_out;
1938        }
1939
1940        /* Update key to search for the last file item. */
1941        set_cpu_key_k_offset(&s_item_key, file_size);
1942
1943        do {
1944                /* Cut or delete file item. */
1945                deleted =
1946                    reiserfs_cut_from_item(th, &s_search_path, &s_item_key,
1947                                           inode, page, new_file_size);
1948                if (deleted < 0) {
1949                        reiserfs_warning(inode->i_sb, "vs-5665",
1950                                         "reiserfs_cut_from_item failed");
1951                        reiserfs_check_path(&s_search_path);
1952                        return 0;
1953                }
1954
1955                RFALSE(deleted > file_size,
1956                       "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
1957                       deleted, file_size, &s_item_key);
1958
1959                /* Change key to search the last file item. */
1960                file_size -= deleted;
1961
1962                set_cpu_key_k_offset(&s_item_key, file_size);
1963
1964                /*
1965                 * While there are bytes to truncate and previous
1966                 * file item is presented in the tree.
1967                 */
1968
1969                /*
1970                 * This loop could take a really long time, and could log
1971                 * many more blocks than a transaction can hold.  So, we do
1972                 * a polite journal end here, and if the transaction needs
1973                 * ending, we make sure the file is consistent before ending
1974                 * the current trans and starting a new one
1975                 */
1976                if (journal_transaction_should_end(th, 0) ||
1977                    reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
1978                        pathrelse(&s_search_path);
1979
1980                        if (update_timestamps) {
1981                                inode->i_mtime = current_time(inode);
1982                                inode->i_ctime = current_time(inode);
1983                        }
1984                        reiserfs_update_sd(th, inode);
1985
1986                        err = journal_end(th);
1987                        if (err)
1988                                goto out;
1989                        err = journal_begin(th, inode->i_sb,
1990                                            JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ;
1991                        if (err)
1992                                goto out;
1993                        reiserfs_update_inode_transaction(inode);
1994                }
1995        } while (file_size > ROUND_UP(new_file_size) &&
1996                 search_for_position_by_key(inode->i_sb, &s_item_key,
1997                                            &s_search_path) == POSITION_FOUND);
1998
1999        RFALSE(file_size > ROUND_UP(new_file_size),
2000               "PAP-5680: truncate did not finish: new_file_size %lld, current %lld, oid %d",
2001               new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
2002
2003update_and_out:
2004        if (update_timestamps) {
2005                /* this is truncate, not file closing */
2006                inode->i_mtime = current_time(inode);
2007                inode->i_ctime = current_time(inode);
2008        }
2009        reiserfs_update_sd(th, inode);
2010
2011out:
2012        pathrelse(&s_search_path);
2013        return err;
2014}
2015
2016#ifdef CONFIG_REISERFS_CHECK
2017/* this makes sure, that we __append__, not overwrite or add holes */
2018static void check_research_for_paste(struct treepath *path,
2019                                     const struct cpu_key *key)
2020{
2021        struct item_head *found_ih = tp_item_head(path);
2022
2023        if (is_direct_le_ih(found_ih)) {
2024                if (le_ih_k_offset(found_ih) +
2025                    op_bytes_number(found_ih,
2026                                    get_last_bh(path)->b_size) !=
2027                    cpu_key_k_offset(key)
2028                    || op_bytes_number(found_ih,
2029                                       get_last_bh(path)->b_size) !=
2030                    pos_in_item(path))
2031                        reiserfs_panic(NULL, "PAP-5720", "found direct item "
2032                                       "%h or position (%d) does not match "
2033                                       "to key %K", found_ih,
2034                                       pos_in_item(path), key);
2035        }
2036        if (is_indirect_le_ih(found_ih)) {
2037                if (le_ih_k_offset(found_ih) +
2038                    op_bytes_number(found_ih,
2039                                    get_last_bh(path)->b_size) !=
2040                    cpu_key_k_offset(key)
2041                    || I_UNFM_NUM(found_ih) != pos_in_item(path)
2042                    || get_ih_free_space(found_ih) != 0)
2043                        reiserfs_panic(NULL, "PAP-5730", "found indirect "
2044                                       "item (%h) or position (%d) does not "
2045                                       "match to key (%K)",
2046                                       found_ih, pos_in_item(path), key);
2047        }
2048}
2049#endif                          /* config reiserfs check */
2050
2051/*
2052 * Paste bytes to the existing item.
2053 * Returns bytes number pasted into the item.
2054 */
2055int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th,
2056                             /* Path to the pasted item. */
2057                             struct treepath *search_path,
2058                             /* Key to search for the needed item. */
2059                             const struct cpu_key *key,
2060                             /* Inode item belongs to */
2061                             struct inode *inode,
2062                             /* Pointer to the bytes to paste. */
2063                             const char *body,
2064                             /* Size of pasted bytes. */
2065                             int pasted_size)
2066{
2067        struct super_block *sb = inode->i_sb;
2068        struct tree_balance s_paste_balance;
2069        int retval;
2070        int fs_gen;
2071        int depth;
2072
2073        BUG_ON(!th->t_trans_id);
2074
2075        fs_gen = get_generation(inode->i_sb);
2076
2077#ifdef REISERQUOTA_DEBUG
2078        reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2079                       "reiserquota paste_into_item(): allocating %u id=%u type=%c",
2080                       pasted_size, inode->i_uid,
2081                       key2type(&key->on_disk_key));
2082#endif
2083
2084        depth = reiserfs_write_unlock_nested(sb);
2085        retval = dquot_alloc_space_nodirty(inode, pasted_size);
2086        reiserfs_write_lock_nested(sb, depth);
2087        if (retval) {
2088                pathrelse(search_path);
2089                return retval;
2090        }
2091        init_tb_struct(th, &s_paste_balance, th->t_super, search_path,
2092                       pasted_size);
2093#ifdef DISPLACE_NEW_PACKING_LOCALITIES
2094        s_paste_balance.key = key->on_disk_key;
2095#endif
2096
2097        /* DQUOT_* can schedule, must check before the fix_nodes */
2098        if (fs_changed(fs_gen, inode->i_sb)) {
2099                goto search_again;
2100        }
2101
2102        while ((retval =
2103                fix_nodes(M_PASTE, &s_paste_balance, NULL,
2104                          body)) == REPEAT_SEARCH) {
2105search_again:
2106                /* file system changed while we were in the fix_nodes */
2107                PROC_INFO_INC(th->t_super, paste_into_item_restarted);
2108                retval =
2109                    search_for_position_by_key(th->t_super, key,
2110                                               search_path);
2111                if (retval == IO_ERROR) {
2112                        retval = -EIO;
2113                        goto error_out;
2114                }
2115                if (retval == POSITION_FOUND) {
2116                        reiserfs_warning(inode->i_sb, "PAP-5710",
2117                                         "entry or pasted byte (%K) exists",
2118                                         key);
2119                        retval = -EEXIST;
2120                        goto error_out;
2121                }
2122#ifdef CONFIG_REISERFS_CHECK
2123                check_research_for_paste(search_path, key);
2124#endif
2125        }
2126
2127        /*
2128         * Perform balancing after all resources are collected by fix_nodes,
2129         * and accessing them will not risk triggering schedule.
2130         */
2131        if (retval == CARRY_ON) {
2132                do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE);
2133                return 0;
2134        }
2135        retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2136error_out:
2137        /* this also releases the path */
2138        unfix_nodes(&s_paste_balance);
2139#ifdef REISERQUOTA_DEBUG
2140        reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2141                       "reiserquota paste_into_item(): freeing %u id=%u type=%c",
2142                       pasted_size, inode->i_uid,
2143                       key2type(&key->on_disk_key));
2144#endif
2145        depth = reiserfs_write_unlock_nested(sb);
2146        dquot_free_space_nodirty(inode, pasted_size);
2147        reiserfs_write_lock_nested(sb, depth);
2148        return retval;
2149}
2150
2151/*
2152 * Insert new item into the buffer at the path.
2153 * th   - active transaction handle
2154 * path - path to the inserted item
2155 * ih   - pointer to the item header to insert
2156 * body - pointer to the bytes to insert
2157 */
2158int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2159                         struct treepath *path, const struct cpu_key *key,
2160                         struct item_head *ih, struct inode *inode,
2161                         const char *body)
2162{
2163        struct tree_balance s_ins_balance;
2164        int retval;
2165        int fs_gen = 0;
2166        int quota_bytes = 0;
2167
2168        BUG_ON(!th->t_trans_id);
2169
2170        if (inode) {            /* Do we count quotas for item? */
2171                int depth;
2172                fs_gen = get_generation(inode->i_sb);
2173                quota_bytes = ih_item_len(ih);
2174
2175                /*
2176                 * hack so the quota code doesn't have to guess
2177                 * if the file has a tail, links are always tails,
2178                 * so there's no guessing needed
2179                 */
2180                if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih))
2181                        quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
2182#ifdef REISERQUOTA_DEBUG
2183                reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2184                               "reiserquota insert_item(): allocating %u id=%u type=%c",
2185                               quota_bytes, inode->i_uid, head2type(ih));
2186#endif
2187                /*
2188                 * We can't dirty inode here. It would be immediately
2189                 * written but appropriate stat item isn't inserted yet...
2190                 */
2191                depth = reiserfs_write_unlock_nested(inode->i_sb);
2192                retval = dquot_alloc_space_nodirty(inode, quota_bytes);
2193                reiserfs_write_lock_nested(inode->i_sb, depth);
2194                if (retval) {
2195                        pathrelse(path);
2196                        return retval;
2197                }
2198        }
2199        init_tb_struct(th, &s_ins_balance, th->t_super, path,
2200                       IH_SIZE + ih_item_len(ih));
2201#ifdef DISPLACE_NEW_PACKING_LOCALITIES
2202        s_ins_balance.key = key->on_disk_key;
2203#endif
2204        /*
2205         * DQUOT_* can schedule, must check to be sure calling
2206         * fix_nodes is safe
2207         */
2208        if (inode && fs_changed(fs_gen, inode->i_sb)) {
2209                goto search_again;
2210        }
2211
2212        while ((retval =
2213                fix_nodes(M_INSERT, &s_ins_balance, ih,
2214                          body)) == REPEAT_SEARCH) {
2215search_again:
2216                /* file system changed while we were in the fix_nodes */
2217                PROC_INFO_INC(th->t_super, insert_item_restarted);
2218                retval = search_item(th->t_super, key, path);
2219                if (retval == IO_ERROR) {
2220                        retval = -EIO;
2221                        goto error_out;
2222                }
2223                if (retval == ITEM_FOUND) {
2224                        reiserfs_warning(th->t_super, "PAP-5760",
2225                                         "key %K already exists in the tree",
2226                                         key);
2227                        retval = -EEXIST;
2228                        goto error_out;
2229                }
2230        }
2231
2232        /* make balancing after all resources will be collected at a time */
2233        if (retval == CARRY_ON) {
2234                do_balance(&s_ins_balance, ih, body, M_INSERT);
2235                return 0;
2236        }
2237
2238        retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2239error_out:
2240        /* also releases the path */
2241        unfix_nodes(&s_ins_balance);
2242#ifdef REISERQUOTA_DEBUG
2243        if (inode)
2244                reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
2245                       "reiserquota insert_item(): freeing %u id=%u type=%c",
2246                       quota_bytes, inode->i_uid, head2type(ih));
2247#endif
2248        if (inode) {
2249                int depth = reiserfs_write_unlock_nested(inode->i_sb);
2250                dquot_free_space_nodirty(inode, quota_bytes);
2251                reiserfs_write_lock_nested(inode->i_sb, depth);
2252        }
2253        return retval;
2254}
2255