linux/fs/reiserfs/stree.c
<<
>>
Prefs
   1/*
   2 *  Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
   3 */
   4
   5/*
   6 *  Written by Anatoly P. Pinchuk pap@namesys.botik.ru
   7 *  Programm System Institute
   8 *  Pereslavl-Zalessky Russia
   9 */
  10
  11#include <linux/time.h>
  12#include <linux/string.h>
  13#include <linux/pagemap.h>
  14#include "reiserfs.h"
  15#include <linux/buffer_head.h>
  16#include <linux/quotaops.h>
  17
  18/* Does the buffer contain a disk block which is in the tree. */
  19inline int B_IS_IN_TREE(const struct buffer_head *bh)
  20{
  21
  22        RFALSE(B_LEVEL(bh) > MAX_HEIGHT,
  23               "PAP-1010: block (%b) has too big level (%z)", bh, bh);
  24
  25        return (B_LEVEL(bh) != FREE_LEVEL);
  26}
  27
  28/* to get item head in le form */
  29inline void copy_item_head(struct item_head *to,
  30                           const struct item_head *from)
  31{
  32        memcpy(to, from, IH_SIZE);
  33}
  34
  35/*
  36 * k1 is pointer to on-disk structure which is stored in little-endian
  37 * form. k2 is pointer to cpu variable. For key of items of the same
  38 * object this returns 0.
  39 * Returns: -1 if key1 < key2
  40 * 0 if key1 == key2
  41 * 1 if key1 > key2
  42 */
  43inline int comp_short_keys(const struct reiserfs_key *le_key,
  44                           const struct cpu_key *cpu_key)
  45{
  46        __u32 n;
  47        n = le32_to_cpu(le_key->k_dir_id);
  48        if (n < cpu_key->on_disk_key.k_dir_id)
  49                return -1;
  50        if (n > cpu_key->on_disk_key.k_dir_id)
  51                return 1;
  52        n = le32_to_cpu(le_key->k_objectid);
  53        if (n < cpu_key->on_disk_key.k_objectid)
  54                return -1;
  55        if (n > cpu_key->on_disk_key.k_objectid)
  56                return 1;
  57        return 0;
  58}
  59
  60/*
  61 * k1 is pointer to on-disk structure which is stored in little-endian
  62 * form. k2 is pointer to cpu variable.
  63 * Compare keys using all 4 key fields.
  64 * Returns: -1 if key1 < key2 0
  65 * if key1 = key2 1 if key1 > key2
  66 */
  67static inline int comp_keys(const struct reiserfs_key *le_key,
  68                            const struct cpu_key *cpu_key)
  69{
  70        int retval;
  71
  72        retval = comp_short_keys(le_key, cpu_key);
  73        if (retval)
  74                return retval;
  75        if (le_key_k_offset(le_key_version(le_key), le_key) <
  76            cpu_key_k_offset(cpu_key))
  77                return -1;
  78        if (le_key_k_offset(le_key_version(le_key), le_key) >
  79            cpu_key_k_offset(cpu_key))
  80                return 1;
  81
  82        if (cpu_key->key_length == 3)
  83                return 0;
  84
  85        /* this part is needed only when tail conversion is in progress */
  86        if (le_key_k_type(le_key_version(le_key), le_key) <
  87            cpu_key_k_type(cpu_key))
  88                return -1;
  89
  90        if (le_key_k_type(le_key_version(le_key), le_key) >
  91            cpu_key_k_type(cpu_key))
  92                return 1;
  93
  94        return 0;
  95}
  96
  97inline int comp_short_le_keys(const struct reiserfs_key *key1,
  98                              const struct reiserfs_key *key2)
  99{
 100        __u32 *k1_u32, *k2_u32;
 101        int key_length = REISERFS_SHORT_KEY_LEN;
 102
 103        k1_u32 = (__u32 *) key1;
 104        k2_u32 = (__u32 *) key2;
 105        for (; key_length--; ++k1_u32, ++k2_u32) {
 106                if (le32_to_cpu(*k1_u32) < le32_to_cpu(*k2_u32))
 107                        return -1;
 108                if (le32_to_cpu(*k1_u32) > le32_to_cpu(*k2_u32))
 109                        return 1;
 110        }
 111        return 0;
 112}
 113
 114inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from)
 115{
 116        int version;
 117        to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id);
 118        to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid);
 119
 120        /* find out version of the key */
 121        version = le_key_version(from);
 122        to->version = version;
 123        to->on_disk_key.k_offset = le_key_k_offset(version, from);
 124        to->on_disk_key.k_type = le_key_k_type(version, from);
 125}
 126
 127/*
 128 * this does not say which one is bigger, it only returns 1 if keys
 129 * are not equal, 0 otherwise
 130 */
 131inline int comp_le_keys(const struct reiserfs_key *k1,
 132                        const struct reiserfs_key *k2)
 133{
 134        return memcmp(k1, k2, sizeof(struct reiserfs_key));
 135}
 136
 137/**************************************************************************
 138 *  Binary search toolkit function                                        *
 139 *  Search for an item in the array by the item key                       *
 140 *  Returns:    1 if found,  0 if not found;                              *
 141 *        *pos = number of the searched element if found, else the        *
 142 *        number of the first element that is larger than key.            *
 143 **************************************************************************/
 144/*
 145 * For those not familiar with binary search: lbound is the leftmost item
 146 * that it could be, rbound the rightmost item that it could be.  We examine
 147 * the item halfway between lbound and rbound, and that tells us either
 148 * that we can increase lbound, or decrease rbound, or that we have found it,
 149 * or if lbound <= rbound that there are no possible items, and we have not
 150 * found it. With each examination we cut the number of possible items it
 151 * could be by one more than half rounded down, or we find it.
 152 */
 153static inline int bin_search(const void *key,   /* Key to search for. */
 154                             const void *base,  /* First item in the array. */
 155                             int num,   /* Number of items in the array. */
 156                             /*
 157                              * Item size in the array.  searched. Lest the
 158                              * reader be confused, note that this is crafted
 159                              * as a general function, and when it is applied
 160                              * specifically to the array of item headers in a
 161                              * node, width is actually the item header size
 162                              * not the item size.
 163                              */
 164                             int width,
 165                             int *pos /* Number of the searched for element. */
 166    )
 167{
 168        int rbound, lbound, j;
 169
 170        for (j = ((rbound = num - 1) + (lbound = 0)) / 2;
 171             lbound <= rbound; j = (rbound + lbound) / 2)
 172                switch (comp_keys
 173                        ((struct reiserfs_key *)((char *)base + j * width),
 174                         (struct cpu_key *)key)) {
 175                case -1:
 176                        lbound = j + 1;
 177                        continue;
 178                case 1:
 179                        rbound = j - 1;
 180                        continue;
 181                case 0:
 182                        *pos = j;
 183                        return ITEM_FOUND;      /* Key found in the array.  */
 184                }
 185
 186        /*
 187         * bin_search did not find given key, it returns position of key,
 188         * that is minimal and greater than the given one.
 189         */
 190        *pos = lbound;
 191        return ITEM_NOT_FOUND;
 192}
 193
 194
 195/* Minimal possible key. It is never in the tree. */
 196const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
 197
 198/* Maximal possible key. It is never in the tree. */
 199static const struct reiserfs_key MAX_KEY = {
 200        cpu_to_le32(0xffffffff),
 201        cpu_to_le32(0xffffffff),
 202        {{cpu_to_le32(0xffffffff),
 203          cpu_to_le32(0xffffffff)},}
 204};
 205
 206/*
 207 * Get delimiting key of the buffer by looking for it in the buffers in the
 208 * path, starting from the bottom of the path, and going upwards.  We must
 209 * check the path's validity at each step.  If the key is not in the path,
 210 * there is no delimiting key in the tree (buffer is first or last buffer
 211 * in tree), and in this case we return a special key, either MIN_KEY or
 212 * MAX_KEY.
 213 */
 214static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path,
 215                                                  const struct super_block *sb)
 216{
 217        int position, path_offset = chk_path->path_length;
 218        struct buffer_head *parent;
 219
 220        RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
 221               "PAP-5010: invalid offset in the path");
 222
 223        /* While not higher in path than first element. */
 224        while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
 225
 226                RFALSE(!buffer_uptodate
 227                       (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
 228                       "PAP-5020: parent is not uptodate");
 229
 230                /* Parent at the path is not in the tree now. */
 231                if (!B_IS_IN_TREE
 232                    (parent =
 233                     PATH_OFFSET_PBUFFER(chk_path, path_offset)))
 234                        return &MAX_KEY;
 235                /* Check whether position in the parent is correct. */
 236                if ((position =
 237                     PATH_OFFSET_POSITION(chk_path,
 238                                          path_offset)) >
 239                    B_NR_ITEMS(parent))
 240                        return &MAX_KEY;
 241                /* Check whether parent at the path really points to the child. */
 242                if (B_N_CHILD_NUM(parent, position) !=
 243                    PATH_OFFSET_PBUFFER(chk_path,
 244                                        path_offset + 1)->b_blocknr)
 245                        return &MAX_KEY;
 246                /*
 247                 * Return delimiting key if position in the parent
 248                 * is not equal to zero.
 249                 */
 250                if (position)
 251                        return internal_key(parent, position - 1);
 252        }
 253        /* Return MIN_KEY if we are in the root of the buffer tree. */
 254        if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
 255            b_blocknr == SB_ROOT_BLOCK(sb))
 256                return &MIN_KEY;
 257        return &MAX_KEY;
 258}
 259
 260/* Get delimiting key of the buffer at the path and its right neighbor. */
 261inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
 262                                           const struct super_block *sb)
 263{
 264        int position, path_offset = chk_path->path_length;
 265        struct buffer_head *parent;
 266
 267        RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
 268               "PAP-5030: invalid offset in the path");
 269
 270        while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
 271
 272                RFALSE(!buffer_uptodate
 273                       (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
 274                       "PAP-5040: parent is not uptodate");
 275
 276                /* Parent at the path is not in the tree now. */
 277                if (!B_IS_IN_TREE
 278                    (parent =
 279                     PATH_OFFSET_PBUFFER(chk_path, path_offset)))
 280                        return &MIN_KEY;
 281                /* Check whether position in the parent is correct. */
 282                if ((position =
 283                     PATH_OFFSET_POSITION(chk_path,
 284                                          path_offset)) >
 285                    B_NR_ITEMS(parent))
 286                        return &MIN_KEY;
 287                /*
 288                 * Check whether parent at the path really points
 289                 * to the child.
 290                 */
 291                if (B_N_CHILD_NUM(parent, position) !=
 292                    PATH_OFFSET_PBUFFER(chk_path,
 293                                        path_offset + 1)->b_blocknr)
 294                        return &MIN_KEY;
 295
 296                /*
 297                 * Return delimiting key if position in the parent
 298                 * is not the last one.
 299                 */
 300                if (position != B_NR_ITEMS(parent))
 301                        return internal_key(parent, position);
 302        }
 303
 304        /* Return MAX_KEY if we are in the root of the buffer tree. */
 305        if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
 306            b_blocknr == SB_ROOT_BLOCK(sb))
 307                return &MAX_KEY;
 308        return &MIN_KEY;
 309}
 310
 311/*
 312 * Check whether a key is contained in the tree rooted from a buffer at a path.
 313 * This works by looking at the left and right delimiting keys for the buffer
 314 * in the last path_element in the path.  These delimiting keys are stored
 315 * at least one level above that buffer in the tree. If the buffer is the
 316 * first or last node in the tree order then one of the delimiting keys may
 317 * be absent, and in this case get_lkey and get_rkey return a special key
 318 * which is MIN_KEY or MAX_KEY.
 319 */
 320static inline int key_in_buffer(
 321                                /* Path which should be checked. */
 322                                struct treepath *chk_path,
 323                                /* Key which should be checked. */
 324                                const struct cpu_key *key,
 325                                struct super_block *sb
 326    )
 327{
 328
 329        RFALSE(!key || chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET
 330               || chk_path->path_length > MAX_HEIGHT,
 331               "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
 332               key, chk_path->path_length);
 333        RFALSE(!PATH_PLAST_BUFFER(chk_path)->b_bdev,
 334               "PAP-5060: device must not be NODEV");
 335
 336        if (comp_keys(get_lkey(chk_path, sb), key) == 1)
 337                /* left delimiting key is bigger, that the key we look for */
 338                return 0;
 339        /*  if ( comp_keys(key, get_rkey(chk_path, sb)) != -1 ) */
 340        if (comp_keys(get_rkey(chk_path, sb), key) != 1)
 341                /* key must be less than right delimitiing key */
 342                return 0;
 343        return 1;
 344}
 345
 346int reiserfs_check_path(struct treepath *p)
 347{
 348        RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
 349               "path not properly relsed");
 350        return 0;
 351}
 352
 353/*
 354 * Drop the reference to each buffer in a path and restore
 355 * dirty bits clean when preparing the buffer for the log.
 356 * This version should only be called from fix_nodes()
 357 */
 358void pathrelse_and_restore(struct super_block *sb,
 359                           struct treepath *search_path)
 360{
 361        int path_offset = search_path->path_length;
 362
 363        RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
 364               "clm-4000: invalid path offset");
 365
 366        while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
 367                struct buffer_head *bh;
 368                bh = PATH_OFFSET_PBUFFER(search_path, path_offset--);
 369                reiserfs_restore_prepared_buffer(sb, bh);
 370                brelse(bh);
 371        }
 372        search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
 373}
 374
 375/* Drop the reference to each buffer in a path */
 376void pathrelse(struct treepath *search_path)
 377{
 378        int path_offset = search_path->path_length;
 379
 380        RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
 381               "PAP-5090: invalid path offset");
 382
 383        while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET)
 384                brelse(PATH_OFFSET_PBUFFER(search_path, path_offset--));
 385
 386        search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
 387}
 388
 389static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
 390{
 391        struct block_head *blkh;
 392        struct item_head *ih;
 393        int used_space;
 394        int prev_location;
 395        int i;
 396        int nr;
 397
 398        blkh = (struct block_head *)buf;
 399        if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
 400                reiserfs_warning(NULL, "reiserfs-5080",
 401                                 "this should be caught earlier");
 402                return 0;
 403        }
 404
 405        nr = blkh_nr_item(blkh);
 406        if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
 407                /* item number is too big or too small */
 408                reiserfs_warning(NULL, "reiserfs-5081",
 409                                 "nr_item seems wrong: %z", bh);
 410                return 0;
 411        }
 412        ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
 413        used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
 414
 415        /* free space does not match to calculated amount of use space */
 416        if (used_space != blocksize - blkh_free_space(blkh)) {
 417                reiserfs_warning(NULL, "reiserfs-5082",
 418                                 "free space seems wrong: %z", bh);
 419                return 0;
 420        }
 421        /*
 422         * FIXME: it is_leaf will hit performance too much - we may have
 423         * return 1 here
 424         */
 425
 426        /* check tables of item heads */
 427        ih = (struct item_head *)(buf + BLKH_SIZE);
 428        prev_location = blocksize;
 429        for (i = 0; i < nr; i++, ih++) {
 430                if (le_ih_k_type(ih) == TYPE_ANY) {
 431                        reiserfs_warning(NULL, "reiserfs-5083",
 432                                         "wrong item type for item %h",
 433                                         ih);
 434                        return 0;
 435                }
 436                if (ih_location(ih) >= blocksize
 437                    || ih_location(ih) < IH_SIZE * nr) {
 438                        reiserfs_warning(NULL, "reiserfs-5084",
 439                                         "item location seems wrong: %h",
 440                                         ih);
 441                        return 0;
 442                }
 443                if (ih_item_len(ih) < 1
 444                    || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
 445                        reiserfs_warning(NULL, "reiserfs-5085",
 446                                         "item length seems wrong: %h",
 447                                         ih);
 448                        return 0;
 449                }
 450                if (prev_location - ih_location(ih) != ih_item_len(ih)) {
 451                        reiserfs_warning(NULL, "reiserfs-5086",
 452                                         "item location seems wrong "
 453                                         "(second one): %h", ih);
 454                        return 0;
 455                }
 456                prev_location = ih_location(ih);
 457        }
 458
 459        /* one may imagine many more checks */
 460        return 1;
 461}
 462
 463/* returns 1 if buf looks like an internal node, 0 otherwise */
 464static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
 465{
 466        struct block_head *blkh;
 467        int nr;
 468        int used_space;
 469
 470        blkh = (struct block_head *)buf;
 471        nr = blkh_level(blkh);
 472        if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
 473                /* this level is not possible for internal nodes */
 474                reiserfs_warning(NULL, "reiserfs-5087",
 475                                 "this should be caught earlier");
 476                return 0;
 477        }
 478
 479        nr = blkh_nr_item(blkh);
 480        /* for internal which is not root we might check min number of keys */
 481        if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
 482                reiserfs_warning(NULL, "reiserfs-5088",
 483                                 "number of key seems wrong: %z", bh);
 484                return 0;
 485        }
 486
 487        used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
 488        if (used_space != blocksize - blkh_free_space(blkh)) {
 489                reiserfs_warning(NULL, "reiserfs-5089",
 490                                 "free space seems wrong: %z", bh);
 491                return 0;
 492        }
 493
 494        /* one may imagine many more checks */
 495        return 1;
 496}
 497
 498/*
 499 * make sure that bh contains formatted node of reiserfs tree of
 500 * 'level'-th level
 501 */
 502static int is_tree_node(struct buffer_head *bh, int level)
 503{
 504        if (B_LEVEL(bh) != level) {
 505                reiserfs_warning(NULL, "reiserfs-5090", "node level %d does "
 506                                 "not match to the expected one %d",
 507                                 B_LEVEL(bh), level);
 508                return 0;
 509        }
 510        if (level == DISK_LEAF_NODE_LEVEL)
 511                return is_leaf(bh->b_data, bh->b_size, bh);
 512
 513        return is_internal(bh->b_data, bh->b_size, bh);
 514}
 515
 516#define SEARCH_BY_KEY_READA 16
 517
 518/*
 519 * The function is NOT SCHEDULE-SAFE!
 520 * It might unlock the write lock if we needed to wait for a block
 521 * to be read. Note that in this case it won't recover the lock to avoid
 522 * high contention resulting from too much lock requests, especially
 523 * the caller (search_by_key) will perform other schedule-unsafe
 524 * operations just after calling this function.
 525 *
 526 * @return depth of lock to be restored after read completes
 527 */
 528static int search_by_key_reada(struct super_block *s,
 529                                struct buffer_head **bh,
 530                                b_blocknr_t *b, int num)
 531{
 532        int i, j;
 533        int depth = -1;
 534
 535        for (i = 0; i < num; i++) {
 536                bh[i] = sb_getblk(s, b[i]);
 537        }
 538        /*
 539         * We are going to read some blocks on which we
 540         * have a reference. It's safe, though we might be
 541         * reading blocks concurrently changed if we release
 542         * the lock. But it's still fine because we check later
 543         * if the tree changed
 544         */
 545        for (j = 0; j < i; j++) {
 546                /*
 547                 * note, this needs attention if we are getting rid of the BKL
 548                 * you have to make sure the prepared bit isn't set on this
 549                 * buffer
 550                 */
 551                if (!buffer_uptodate(bh[j])) {
 552                        if (depth == -1)
 553                                depth = reiserfs_write_unlock_nested(s);
 554                        ll_rw_block(READA, 1, bh + j);
 555                }
 556                brelse(bh[j]);
 557        }
 558        return depth;
 559}
 560
 561/*
 562 * This function fills up the path from the root to the leaf as it
 563 * descends the tree looking for the key.  It uses reiserfs_bread to
 564 * try to find buffers in the cache given their block number.  If it
 565 * does not find them in the cache it reads them from disk.  For each
 566 * node search_by_key finds using reiserfs_bread it then uses
 567 * bin_search to look through that node.  bin_search will find the
 568 * position of the block_number of the next node if it is looking
 569 * through an internal node.  If it is looking through a leaf node
 570 * bin_search will find the position of the item which has key either
 571 * equal to given key, or which is the maximal key less than the given
 572 * key.  search_by_key returns a path that must be checked for the
 573 * correctness of the top of the path but need not be checked for the
 574 * correctness of the bottom of the path
 575 */
 576/*
 577 * search_by_key - search for key (and item) in stree
 578 * @sb: superblock
 579 * @key: pointer to key to search for
 580 * @search_path: Allocated and initialized struct treepath; Returned filled
 581 *               on success.
 582 * @stop_level: How far down the tree to search, Use DISK_LEAF_NODE_LEVEL to
 583 *              stop at leaf level.
 584 *
 585 * The function is NOT SCHEDULE-SAFE!
 586 */
 587int search_by_key(struct super_block *sb, const struct cpu_key *key,
 588                  struct treepath *search_path, int stop_level)
 589{
 590        b_blocknr_t block_number;
 591        int expected_level;
 592        struct buffer_head *bh;
 593        struct path_element *last_element;
 594        int node_level, retval;
 595        int right_neighbor_of_leaf_node;
 596        int fs_gen;
 597        struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
 598        b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA];
 599        int reada_count = 0;
 600
 601#ifdef CONFIG_REISERFS_CHECK
 602        int repeat_counter = 0;
 603#endif
 604
 605        PROC_INFO_INC(sb, search_by_key);
 606
 607        /*
 608         * As we add each node to a path we increase its count.  This means
 609         * that we must be careful to release all nodes in a path before we
 610         * either discard the path struct or re-use the path struct, as we
 611         * do here.
 612         */
 613
 614        pathrelse(search_path);
 615
 616        right_neighbor_of_leaf_node = 0;
 617
 618        /*
 619         * With each iteration of this loop we search through the items in the
 620         * current node, and calculate the next current node(next path element)
 621         * for the next iteration of this loop..
 622         */
 623        block_number = SB_ROOT_BLOCK(sb);
 624        expected_level = -1;
 625        while (1) {
 626
 627#ifdef CONFIG_REISERFS_CHECK
 628                if (!(++repeat_counter % 50000))
 629                        reiserfs_warning(sb, "PAP-5100",
 630                                         "%s: there were %d iterations of "
 631                                         "while loop looking for key %K",
 632                                         current->comm, repeat_counter,
 633                                         key);
 634#endif
 635
 636                /* prep path to have another element added to it. */
 637                last_element =
 638                    PATH_OFFSET_PELEMENT(search_path,
 639                                         ++search_path->path_length);
 640                fs_gen = get_generation(sb);
 641
 642                /*
 643                 * Read the next tree node, and set the last element
 644                 * in the path to have a pointer to it.
 645                 */
 646                if ((bh = last_element->pe_buffer =
 647                     sb_getblk(sb, block_number))) {
 648
 649                        /*
 650                         * We'll need to drop the lock if we encounter any
 651                         * buffers that need to be read. If all of them are
 652                         * already up to date, we don't need to drop the lock.
 653                         */
 654                        int depth = -1;
 655
 656                        if (!buffer_uptodate(bh) && reada_count > 1)
 657                                depth = search_by_key_reada(sb, reada_bh,
 658                                                    reada_blocks, reada_count);
 659
 660                        if (!buffer_uptodate(bh) && depth == -1)
 661                                depth = reiserfs_write_unlock_nested(sb);
 662
 663                        ll_rw_block(READ, 1, &bh);
 664                        wait_on_buffer(bh);
 665
 666                        if (depth != -1)
 667                                reiserfs_write_lock_nested(sb, depth);
 668                        if (!buffer_uptodate(bh))
 669                                goto io_error;
 670                } else {
 671io_error:
 672                        search_path->path_length--;
 673                        pathrelse(search_path);
 674                        return IO_ERROR;
 675                }
 676                reada_count = 0;
 677                if (expected_level == -1)
 678                        expected_level = SB_TREE_HEIGHT(sb);
 679                expected_level--;
 680
 681                /*
 682                 * It is possible that schedule occurred. We must check
 683                 * whether the key to search is still in the tree rooted
 684                 * from the current buffer. If not then repeat search
 685                 * from the root.
 686                 */
 687                if (fs_changed(fs_gen, sb) &&
 688                    (!B_IS_IN_TREE(bh) ||
 689                     B_LEVEL(bh) != expected_level ||
 690                     !key_in_buffer(search_path, key, sb))) {
 691                        PROC_INFO_INC(sb, search_by_key_fs_changed);
 692                        PROC_INFO_INC(sb, search_by_key_restarted);
 693                        PROC_INFO_INC(sb,
 694                                      sbk_restarted[expected_level - 1]);
 695                        pathrelse(search_path);
 696
 697                        /*
 698                         * Get the root block number so that we can
 699                         * repeat the search starting from the root.
 700                         */
 701                        block_number = SB_ROOT_BLOCK(sb);
 702                        expected_level = -1;
 703                        right_neighbor_of_leaf_node = 0;
 704
 705                        /* repeat search from the root */
 706                        continue;
 707                }
 708
 709                /*
 710                 * only check that the key is in the buffer if key is not
 711                 * equal to the MAX_KEY. Latter case is only possible in
 712                 * "finish_unfinished()" processing during mount.
 713                 */
 714                RFALSE(comp_keys(&MAX_KEY, key) &&
 715                       !key_in_buffer(search_path, key, sb),
 716                       "PAP-5130: key is not in the buffer");
 717#ifdef CONFIG_REISERFS_CHECK
 718                if (REISERFS_SB(sb)->cur_tb) {
 719                        print_cur_tb("5140");
 720                        reiserfs_panic(sb, "PAP-5140",
 721                                       "schedule occurred in do_balance!");
 722                }
 723#endif
 724
 725                /*
 726                 * make sure, that the node contents look like a node of
 727                 * certain level
 728                 */
 729                if (!is_tree_node(bh, expected_level)) {
 730                        reiserfs_error(sb, "vs-5150",
 731                                       "invalid format found in block %ld. "
 732                                       "Fsck?", bh->b_blocknr);
 733                        pathrelse(search_path);
 734                        return IO_ERROR;
 735                }
 736
 737                /* ok, we have acquired next formatted node in the tree */
 738                node_level = B_LEVEL(bh);
 739
 740                PROC_INFO_BH_STAT(sb, bh, node_level - 1);
 741
 742                RFALSE(node_level < stop_level,
 743                       "vs-5152: tree level (%d) is less than stop level (%d)",
 744                       node_level, stop_level);
 745
 746                retval = bin_search(key, item_head(bh, 0),
 747                                      B_NR_ITEMS(bh),
 748                                      (node_level ==
 749                                       DISK_LEAF_NODE_LEVEL) ? IH_SIZE :
 750                                      KEY_SIZE,
 751                                      &last_element->pe_position);
 752                if (node_level == stop_level) {
 753                        return retval;
 754                }
 755
 756                /* we are not in the stop level */
 757                /*
 758                 * item has been found, so we choose the pointer which
 759                 * is to the right of the found one
 760                 */
 761                if (retval == ITEM_FOUND)
 762                        last_element->pe_position++;
 763
 764                /*
 765                 * if item was not found we choose the position which is to
 766                 * the left of the found item. This requires no code,
 767                 * bin_search did it already.
 768                 */
 769
 770                /*
 771                 * So we have chosen a position in the current node which is
 772                 * an internal node.  Now we calculate child block number by
 773                 * position in the node.
 774                 */
 775                block_number =
 776                    B_N_CHILD_NUM(bh, last_element->pe_position);
 777
 778                /*
 779                 * if we are going to read leaf nodes, try for read
 780                 * ahead as well
 781                 */
 782                if ((search_path->reada & PATH_READA) &&
 783                    node_level == DISK_LEAF_NODE_LEVEL + 1) {
 784                        int pos = last_element->pe_position;
 785                        int limit = B_NR_ITEMS(bh);
 786                        struct reiserfs_key *le_key;
 787
 788                        if (search_path->reada & PATH_READA_BACK)
 789                                limit = 0;
 790                        while (reada_count < SEARCH_BY_KEY_READA) {
 791                                if (pos == limit)
 792                                        break;
 793                                reada_blocks[reada_count++] =
 794                                    B_N_CHILD_NUM(bh, pos);
 795                                if (search_path->reada & PATH_READA_BACK)
 796                                        pos--;
 797                                else
 798                                        pos++;
 799
 800                                /*
 801                                 * check to make sure we're in the same object
 802                                 */
 803                                le_key = internal_key(bh, pos);
 804                                if (le32_to_cpu(le_key->k_objectid) !=
 805                                    key->on_disk_key.k_objectid) {
 806                                        break;
 807                                }
 808                        }
 809                }
 810        }
 811}
 812
 813/*
 814 * Form the path to an item and position in this item which contains
 815 * file byte defined by key. If there is no such item
 816 * corresponding to the key, we point the path to the item with
 817 * maximal key less than key, and *pos_in_item is set to one
 818 * past the last entry/byte in the item.  If searching for entry in a
 819 * directory item, and it is not found, *pos_in_item is set to one
 820 * entry more than the entry with maximal key which is less than the
 821 * sought key.
 822 *
 823 * Note that if there is no entry in this same node which is one more,
 824 * then we point to an imaginary entry.  for direct items, the
 825 * position is in units of bytes, for indirect items the position is
 826 * in units of blocknr entries, for directory items the position is in
 827 * units of directory entries.
 828 */
 829/* The function is NOT SCHEDULE-SAFE! */
 830int search_for_position_by_key(struct super_block *sb,
 831                               /* Key to search (cpu variable) */
 832                               const struct cpu_key *p_cpu_key,
 833                               /* Filled up by this function. */
 834                               struct treepath *search_path)
 835{
 836        struct item_head *p_le_ih;      /* pointer to on-disk structure */
 837        int blk_size;
 838        loff_t item_offset, offset;
 839        struct reiserfs_dir_entry de;
 840        int retval;
 841
 842        /* If searching for directory entry. */
 843        if (is_direntry_cpu_key(p_cpu_key))
 844                return search_by_entry_key(sb, p_cpu_key, search_path,
 845                                           &de);
 846
 847        /* If not searching for directory entry. */
 848
 849        /* If item is found. */
 850        retval = search_item(sb, p_cpu_key, search_path);
 851        if (retval == IO_ERROR)
 852                return retval;
 853        if (retval == ITEM_FOUND) {
 854
 855                RFALSE(!ih_item_len
 856                       (item_head
 857                        (PATH_PLAST_BUFFER(search_path),
 858                         PATH_LAST_POSITION(search_path))),
 859                       "PAP-5165: item length equals zero");
 860
 861                pos_in_item(search_path) = 0;
 862                return POSITION_FOUND;
 863        }
 864
 865        RFALSE(!PATH_LAST_POSITION(search_path),
 866               "PAP-5170: position equals zero");
 867
 868        /* Item is not found. Set path to the previous item. */
 869        p_le_ih =
 870            item_head(PATH_PLAST_BUFFER(search_path),
 871                           --PATH_LAST_POSITION(search_path));
 872        blk_size = sb->s_blocksize;
 873
 874        if (comp_short_keys(&p_le_ih->ih_key, p_cpu_key))
 875                return FILE_NOT_FOUND;
 876
 877        /* FIXME: quite ugly this far */
 878
 879        item_offset = le_ih_k_offset(p_le_ih);
 880        offset = cpu_key_k_offset(p_cpu_key);
 881
 882        /* Needed byte is contained in the item pointed to by the path. */
 883        if (item_offset <= offset &&
 884            item_offset + op_bytes_number(p_le_ih, blk_size) > offset) {
 885                pos_in_item(search_path) = offset - item_offset;
 886                if (is_indirect_le_ih(p_le_ih)) {
 887                        pos_in_item(search_path) /= blk_size;
 888                }
 889                return POSITION_FOUND;
 890        }
 891
 892        /*
 893         * Needed byte is not contained in the item pointed to by the
 894         * path. Set pos_in_item out of the item.
 895         */
 896        if (is_indirect_le_ih(p_le_ih))
 897                pos_in_item(search_path) =
 898                    ih_item_len(p_le_ih) / UNFM_P_SIZE;
 899        else
 900                pos_in_item(search_path) = ih_item_len(p_le_ih);
 901
 902        return POSITION_NOT_FOUND;
 903}
 904
 905/* Compare given item and item pointed to by the path. */
 906int comp_items(const struct item_head *stored_ih, const struct treepath *path)
 907{
 908        struct buffer_head *bh = PATH_PLAST_BUFFER(path);
 909        struct item_head *ih;
 910
 911        /* Last buffer at the path is not in the tree. */
 912        if (!B_IS_IN_TREE(bh))
 913                return 1;
 914
 915        /* Last path position is invalid. */
 916        if (PATH_LAST_POSITION(path) >= B_NR_ITEMS(bh))
 917                return 1;
 918
 919        /* we need only to know, whether it is the same item */
 920        ih = tp_item_head(path);
 921        return memcmp(stored_ih, ih, IH_SIZE);
 922}
 923
 924/* unformatted nodes are not logged anymore, ever.  This is safe now */
 925#define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1)
 926
 927/* block can not be forgotten as it is in I/O or held by someone */
 928#define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))
 929
 930/* prepare for delete or cut of direct item */
 931static inline int prepare_for_direct_item(struct treepath *path,
 932                                          struct item_head *le_ih,
 933                                          struct inode *inode,
 934                                          loff_t new_file_length, int *cut_size)
 935{
 936        loff_t round_len;
 937
 938        if (new_file_length == max_reiserfs_offset(inode)) {
 939                /* item has to be deleted */
 940                *cut_size = -(IH_SIZE + ih_item_len(le_ih));
 941                return M_DELETE;
 942        }
 943        /* new file gets truncated */
 944        if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
 945                round_len = ROUND_UP(new_file_length);
 946                /* this was new_file_length < le_ih ... */
 947                if (round_len < le_ih_k_offset(le_ih)) {
 948                        *cut_size = -(IH_SIZE + ih_item_len(le_ih));
 949                        return M_DELETE;        /* Delete this item. */
 950                }
 951                /* Calculate first position and size for cutting from item. */
 952                pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1);
 953                *cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
 954
 955                return M_CUT;   /* Cut from this item. */
 956        }
 957
 958        /* old file: items may have any length */
 959
 960        if (new_file_length < le_ih_k_offset(le_ih)) {
 961                *cut_size = -(IH_SIZE + ih_item_len(le_ih));
 962                return M_DELETE;        /* Delete this item. */
 963        }
 964
 965        /* Calculate first position and size for cutting from item. */
 966        *cut_size = -(ih_item_len(le_ih) -
 967                      (pos_in_item(path) =
 968                       new_file_length + 1 - le_ih_k_offset(le_ih)));
 969        return M_CUT;           /* Cut from this item. */
 970}
 971
 972static inline int prepare_for_direntry_item(struct treepath *path,
 973                                            struct item_head *le_ih,
 974                                            struct inode *inode,
 975                                            loff_t new_file_length,
 976                                            int *cut_size)
 977{
 978        if (le_ih_k_offset(le_ih) == DOT_OFFSET &&
 979            new_file_length == max_reiserfs_offset(inode)) {
 980                RFALSE(ih_entry_count(le_ih) != 2,
 981                       "PAP-5220: incorrect empty directory item (%h)", le_ih);
 982                *cut_size = -(IH_SIZE + ih_item_len(le_ih));
 983                /* Delete the directory item containing "." and ".." entry. */
 984                return M_DELETE;
 985        }
 986
 987        if (ih_entry_count(le_ih) == 1) {
 988                /*
 989                 * Delete the directory item such as there is one record only
 990                 * in this item
 991                 */
 992                *cut_size = -(IH_SIZE + ih_item_len(le_ih));
 993                return M_DELETE;
 994        }
 995
 996        /* Cut one record from the directory item. */
 997        *cut_size =
 998            -(DEH_SIZE +
 999              entry_length(get_last_bh(path), le_ih, pos_in_item(path)));
1000        return M_CUT;
1001}
1002
1003#define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1)
1004
1005/*
1006 * If the path points to a directory or direct item, calculate mode
1007 * and the size cut, for balance.
1008 * If the path points to an indirect item, remove some number of its
1009 * unformatted nodes.
1010 * In case of file truncate calculate whether this item must be
1011 * deleted/truncated or last unformatted node of this item will be
1012 * converted to a direct item.
1013 * This function returns a determination of what balance mode the
1014 * calling function should employ.
1015 */
1016static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th,
1017                                      struct inode *inode,
1018                                      struct treepath *path,
1019                                      const struct cpu_key *item_key,
1020                                      /*
1021                                       * Number of unformatted nodes
1022                                       * which were removed from end
1023                                       * of the file.
1024                                       */
1025                                      int *removed,
1026                                      int *cut_size,
1027                                      /* MAX_KEY_OFFSET in case of delete. */
1028                                      unsigned long long new_file_length
1029    )
1030{
1031        struct super_block *sb = inode->i_sb;
1032        struct item_head *p_le_ih = tp_item_head(path);
1033        struct buffer_head *bh = PATH_PLAST_BUFFER(path);
1034
1035        BUG_ON(!th->t_trans_id);
1036
1037        /* Stat_data item. */
1038        if (is_statdata_le_ih(p_le_ih)) {
1039
1040                RFALSE(new_file_length != max_reiserfs_offset(inode),
1041                       "PAP-5210: mode must be M_DELETE");
1042
1043                *cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
1044                return M_DELETE;
1045        }
1046
1047        /* Directory item. */
1048        if (is_direntry_le_ih(p_le_ih))
1049                return prepare_for_direntry_item(path, p_le_ih, inode,
1050                                                 new_file_length,
1051                                                 cut_size);
1052
1053        /* Direct item. */
1054        if (is_direct_le_ih(p_le_ih))
1055                return prepare_for_direct_item(path, p_le_ih, inode,
1056                                               new_file_length, cut_size);
1057
1058        /* Case of an indirect item. */
1059        {
1060            int blk_size = sb->s_blocksize;
1061            struct item_head s_ih;
1062            int need_re_search;
1063            int delete = 0;
1064            int result = M_CUT;
1065            int pos = 0;
1066
1067            if ( new_file_length == max_reiserfs_offset (inode) ) {
1068                /*
1069                 * prepare_for_delete_or_cut() is called by
1070                 * reiserfs_delete_item()
1071                 */
1072                new_file_length = 0;
1073                delete = 1;
1074            }
1075
1076            do {
1077                need_re_search = 0;
1078                *cut_size = 0;
1079                bh = PATH_PLAST_BUFFER(path);
1080                copy_item_head(&s_ih, tp_item_head(path));
1081                pos = I_UNFM_NUM(&s_ih);
1082
1083                while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) {
1084                    __le32 *unfm;
1085                    __u32 block;
1086
1087                    /*
1088                     * Each unformatted block deletion may involve
1089                     * one additional bitmap block into the transaction,
1090                     * thereby the initial journal space reservation
1091                     * might not be enough.
1092                     */
1093                    if (!delete && (*cut_size) != 0 &&
1094                        reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD)
1095                        break;
1096
1097                    unfm = (__le32 *)ih_item_body(bh, &s_ih) + pos - 1;
1098                    block = get_block_num(unfm, 0);
1099
1100                    if (block != 0) {
1101                        reiserfs_prepare_for_journal(sb, bh, 1);
1102                        put_block_num(unfm, 0, 0);
1103                        journal_mark_dirty(th, bh);
1104                        reiserfs_free_block(th, inode, block, 1);
1105                    }
1106
1107                    reiserfs_cond_resched(sb);
1108
1109                    if (item_moved (&s_ih, path))  {
1110                        need_re_search = 1;
1111                        break;
1112                    }
1113
1114                    pos --;
1115                    (*removed)++;
1116                    (*cut_size) -= UNFM_P_SIZE;
1117
1118                    if (pos == 0) {
1119                        (*cut_size) -= IH_SIZE;
1120                        result = M_DELETE;
1121                        break;
1122                    }
1123                }
1124                /*
1125                 * a trick.  If the buffer has been logged, this will
1126                 * do nothing.  If we've broken the loop without logging
1127                 * it, it will restore the buffer
1128                 */
1129                reiserfs_restore_prepared_buffer(sb, bh);
1130            } while (need_re_search &&
1131                     search_for_position_by_key(sb, item_key, path) == POSITION_FOUND);
1132            pos_in_item(path) = pos * UNFM_P_SIZE;
1133
1134            if (*cut_size == 0) {
1135                /*
1136                 * Nothing was cut. maybe convert last unformatted node to the
1137                 * direct item?
1138                 */
1139                result = M_CONVERT;
1140            }
1141            return result;
1142        }
1143}
1144
1145/* Calculate number of bytes which will be deleted or cut during balance */
1146static int calc_deleted_bytes_number(struct tree_balance *tb, char mode)
1147{
1148        int del_size;
1149        struct item_head *p_le_ih = tp_item_head(tb->tb_path);
1150
1151        if (is_statdata_le_ih(p_le_ih))
1152                return 0;
1153
1154        del_size =
1155            (mode ==
1156             M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0];
1157        if (is_direntry_le_ih(p_le_ih)) {
1158                /*
1159                 * return EMPTY_DIR_SIZE; We delete emty directories only.
1160                 * we can't use EMPTY_DIR_SIZE, as old format dirs have a
1161                 * different empty size.  ick. FIXME, is this right?
1162                 */
1163                return del_size;
1164        }
1165
1166        if (is_indirect_le_ih(p_le_ih))
1167                del_size = (del_size / UNFM_P_SIZE) *
1168                                (PATH_PLAST_BUFFER(tb->tb_path)->b_size);
1169        return del_size;
1170}
1171
1172static void init_tb_struct(struct reiserfs_transaction_handle *th,
1173                           struct tree_balance *tb,
1174                           struct super_block *sb,
1175                           struct treepath *path, int size)
1176{
1177
1178        BUG_ON(!th->t_trans_id);
1179
1180        memset(tb, '\0', sizeof(struct tree_balance));
1181        tb->transaction_handle = th;
1182        tb->tb_sb = sb;
1183        tb->tb_path = path;
1184        PATH_OFFSET_PBUFFER(path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
1185        PATH_OFFSET_POSITION(path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
1186        tb->insert_size[0] = size;
1187}
1188
1189void padd_item(char *item, int total_length, int length)
1190{
1191        int i;
1192
1193        for (i = total_length; i > length;)
1194                item[--i] = 0;
1195}
1196
1197#ifdef REISERQUOTA_DEBUG
1198char key2type(struct reiserfs_key *ih)
1199{
1200        if (is_direntry_le_key(2, ih))
1201                return 'd';
1202        if (is_direct_le_key(2, ih))
1203                return 'D';
1204        if (is_indirect_le_key(2, ih))
1205                return 'i';
1206        if (is_statdata_le_key(2, ih))
1207                return 's';
1208        return 'u';
1209}
1210
1211char head2type(struct item_head *ih)
1212{
1213        if (is_direntry_le_ih(ih))
1214                return 'd';
1215        if (is_direct_le_ih(ih))
1216                return 'D';
1217        if (is_indirect_le_ih(ih))
1218                return 'i';
1219        if (is_statdata_le_ih(ih))
1220                return 's';
1221        return 'u';
1222}
1223#endif
1224
1225/*
1226 * Delete object item.
1227 * th       - active transaction handle
1228 * path     - path to the deleted item
1229 * item_key - key to search for the deleted item
1230 * indode   - used for updating i_blocks and quotas
1231 * un_bh    - NULL or unformatted node pointer
1232 */
1233int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
1234                         struct treepath *path, const struct cpu_key *item_key,
1235                         struct inode *inode, struct buffer_head *un_bh)
1236{
1237        struct super_block *sb = inode->i_sb;
1238        struct tree_balance s_del_balance;
1239        struct item_head s_ih;
1240        struct item_head *q_ih;
1241        int quota_cut_bytes;
1242        int ret_value, del_size, removed;
1243        int depth;
1244
1245#ifdef CONFIG_REISERFS_CHECK
1246        char mode;
1247        int iter = 0;
1248#endif
1249
1250        BUG_ON(!th->t_trans_id);
1251
1252        init_tb_struct(th, &s_del_balance, sb, path,
1253                       0 /*size is unknown */ );
1254
1255        while (1) {
1256                removed = 0;
1257
1258#ifdef CONFIG_REISERFS_CHECK
1259                iter++;
1260                mode =
1261#endif
1262                    prepare_for_delete_or_cut(th, inode, path,
1263                                              item_key, &removed,
1264                                              &del_size,
1265                                              max_reiserfs_offset(inode));
1266
1267                RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
1268
1269                copy_item_head(&s_ih, tp_item_head(path));
1270                s_del_balance.insert_size[0] = del_size;
1271
1272                ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
1273                if (ret_value != REPEAT_SEARCH)
1274                        break;
1275
1276                PROC_INFO_INC(sb, delete_item_restarted);
1277
1278                /* file system changed, repeat search */
1279                ret_value =
1280                    search_for_position_by_key(sb, item_key, path);
1281                if (ret_value == IO_ERROR)
1282                        break;
1283                if (ret_value == FILE_NOT_FOUND) {
1284                        reiserfs_warning(sb, "vs-5340",
1285                                         "no items of the file %K found",
1286                                         item_key);
1287                        break;
1288                }
1289        }                       /* while (1) */
1290
1291        if (ret_value != CARRY_ON) {
1292                unfix_nodes(&s_del_balance);
1293                return 0;
1294        }
1295
1296        /* reiserfs_delete_item returns item length when success */
1297        ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
1298        q_ih = tp_item_head(path);
1299        quota_cut_bytes = ih_item_len(q_ih);
1300
1301        /*
1302         * hack so the quota code doesn't have to guess if the file has a
1303         * tail.  On tail insert, we allocate quota for 1 unformatted node.
1304         * We test the offset because the tail might have been
1305         * split into multiple items, and we only want to decrement for
1306         * the unfm node once
1307         */
1308        if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) {
1309                if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) {
1310                        quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
1311                } else {
1312                        quota_cut_bytes = 0;
1313                }
1314        }
1315
1316        if (un_bh) {
1317                int off;
1318                char *data;
1319
1320                /*
1321                 * We are in direct2indirect conversion, so move tail contents
1322                 * to the unformatted node
1323                 */
1324                /*
1325                 * note, we do the copy before preparing the buffer because we
1326                 * don't care about the contents of the unformatted node yet.
1327                 * the only thing we really care about is the direct item's
1328                 * data is in the unformatted node.
1329                 *
1330                 * Otherwise, we would have to call
1331                 * reiserfs_prepare_for_journal on the unformatted node,
1332                 * which might schedule, meaning we'd have to loop all the
1333                 * way back up to the start of the while loop.
1334                 *
1335                 * The unformatted node must be dirtied later on.  We can't be
1336                 * sure here if the entire tail has been deleted yet.
1337                 *
1338                 * un_bh is from the page cache (all unformatted nodes are
1339                 * from the page cache) and might be a highmem page.  So, we
1340                 * can't use un_bh->b_data.
1341                 * -clm
1342                 */
1343
1344                data = kmap_atomic(un_bh->b_page);
1345                off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_SIZE - 1));
1346                memcpy(data + off,
1347                       ih_item_body(PATH_PLAST_BUFFER(path), &s_ih),
1348                       ret_value);
1349                kunmap_atomic(data);
1350        }
1351
1352        /* Perform balancing after all resources have been collected at once. */
1353        do_balance(&s_del_balance, NULL, NULL, M_DELETE);
1354
1355#ifdef REISERQUOTA_DEBUG
1356        reiserfs_debug(sb, REISERFS_DEBUG_CODE,
1357                       "reiserquota delete_item(): freeing %u, id=%u type=%c",
1358                       quota_cut_bytes, inode->i_uid, head2type(&s_ih));
1359#endif
1360        depth = reiserfs_write_unlock_nested(inode->i_sb);
1361        dquot_free_space_nodirty(inode, quota_cut_bytes);
1362        reiserfs_write_lock_nested(inode->i_sb, depth);
1363
1364        /* Return deleted body length */
1365        return ret_value;
1366}
1367
1368/*
1369 * Summary Of Mechanisms For Handling Collisions Between Processes:
1370 *
1371 *  deletion of the body of the object is performed by iput(), with the
1372 *  result that if multiple processes are operating on a file, the
1373 *  deletion of the body of the file is deferred until the last process
1374 *  that has an open inode performs its iput().
1375 *
1376 *  writes and truncates are protected from collisions by use of
1377 *  semaphores.
1378 *
1379 *  creates, linking, and mknod are protected from collisions with other
1380 *  processes by making the reiserfs_add_entry() the last step in the
1381 *  creation, and then rolling back all changes if there was a collision.
1382 *  - Hans
1383*/
1384
1385/* this deletes item which never gets split */
1386void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1387                                struct inode *inode, struct reiserfs_key *key)
1388{
1389        struct super_block *sb = th->t_super;
1390        struct tree_balance tb;
1391        INITIALIZE_PATH(path);
1392        int item_len = 0;
1393        int tb_init = 0;
1394        struct cpu_key cpu_key;
1395        int retval;
1396        int quota_cut_bytes = 0;
1397
1398        BUG_ON(!th->t_trans_id);
1399
1400        le_key2cpu_key(&cpu_key, key);
1401
1402        while (1) {
1403                retval = search_item(th->t_super, &cpu_key, &path);
1404                if (retval == IO_ERROR) {
1405                        reiserfs_error(th->t_super, "vs-5350",
1406                                       "i/o failure occurred trying "
1407                                       "to delete %K", &cpu_key);
1408                        break;
1409                }
1410                if (retval != ITEM_FOUND) {
1411                        pathrelse(&path);
1412                        /*
1413                         * No need for a warning, if there is just no free
1414                         * space to insert '..' item into the
1415                         * newly-created subdir
1416                         */
1417                        if (!
1418                            ((unsigned long long)
1419                             GET_HASH_VALUE(le_key_k_offset
1420                                            (le_key_version(key), key)) == 0
1421                             && (unsigned long long)
1422                             GET_GENERATION_NUMBER(le_key_k_offset
1423                                                   (le_key_version(key),
1424                                                    key)) == 1))
1425                                reiserfs_warning(th->t_super, "vs-5355",
1426                                                 "%k not found", key);
1427                        break;
1428                }
1429                if (!tb_init) {
1430                        tb_init = 1;
1431                        item_len = ih_item_len(tp_item_head(&path));
1432                        init_tb_struct(th, &tb, th->t_super, &path,
1433                                       -(IH_SIZE + item_len));
1434                }
1435                quota_cut_bytes = ih_item_len(tp_item_head(&path));
1436
1437                retval = fix_nodes(M_DELETE, &tb, NULL, NULL);
1438                if (retval == REPEAT_SEARCH) {
1439                        PROC_INFO_INC(th->t_super, delete_solid_item_restarted);
1440                        continue;
1441                }
1442
1443                if (retval == CARRY_ON) {
1444                        do_balance(&tb, NULL, NULL, M_DELETE);
1445                        /*
1446                         * Should we count quota for item? (we don't
1447                         * count quotas for save-links)
1448                         */
1449                        if (inode) {
1450                                int depth;
1451#ifdef REISERQUOTA_DEBUG
1452                                reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
1453                                               "reiserquota delete_solid_item(): freeing %u id=%u type=%c",
1454                                               quota_cut_bytes, inode->i_uid,
1455                                               key2type(key));
1456#endif
1457                                depth = reiserfs_write_unlock_nested(sb);
1458                                dquot_free_space_nodirty(inode,
1459                                                         quota_cut_bytes);
1460                                reiserfs_write_lock_nested(sb, depth);
1461                        }
1462                        break;
1463                }
1464
1465                /* IO_ERROR, NO_DISK_SPACE, etc */
1466                reiserfs_warning(th->t_super, "vs-5360",
1467                                 "could not delete %K due to fix_nodes failure",
1468                                 &cpu_key);
1469                unfix_nodes(&tb);
1470                break;
1471        }
1472
1473        reiserfs_check_path(&path);
1474}
1475
1476int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
1477                           struct inode *inode)
1478{
1479        int err;
1480        inode->i_size = 0;
1481        BUG_ON(!th->t_trans_id);
1482
1483        /* for directory this deletes item containing "." and ".." */
1484        err =
1485            reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ );
1486        if (err)
1487                return err;
1488
1489#if defined( USE_INODE_GENERATION_COUNTER )
1490        if (!old_format_only(th->t_super)) {
1491                __le32 *inode_generation;
1492
1493                inode_generation =
1494                    &REISERFS_SB(th->t_super)->s_rs->s_inode_generation;
1495                le32_add_cpu(inode_generation, 1);
1496        }
1497/* USE_INODE_GENERATION_COUNTER */
1498#endif
1499        reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1500
1501        return err;
1502}
1503
1504static void unmap_buffers(struct page *page, loff_t pos)
1505{
1506        struct buffer_head *bh;
1507        struct buffer_head *head;
1508        struct buffer_head *next;
1509        unsigned long tail_index;
1510        unsigned long cur_index;
1511
1512        if (page) {
1513                if (page_has_buffers(page)) {
1514                        tail_index = pos & (PAGE_SIZE - 1);
1515                        cur_index = 0;
1516                        head = page_buffers(page);
1517                        bh = head;
1518                        do {
1519                                next = bh->b_this_page;
1520
1521                                /*
1522                                 * we want to unmap the buffers that contain
1523                                 * the tail, and all the buffers after it
1524                                 * (since the tail must be at the end of the
1525                                 * file).  We don't want to unmap file data
1526                                 * before the tail, since it might be dirty
1527                                 * and waiting to reach disk
1528                                 */
1529                                cur_index += bh->b_size;
1530                                if (cur_index > tail_index) {
1531                                        reiserfs_unmap_buffer(bh);
1532                                }
1533                                bh = next;
1534                        } while (bh != head);
1535                }
1536        }
1537}
1538
1539static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
1540                                    struct inode *inode,
1541                                    struct page *page,
1542                                    struct treepath *path,
1543                                    const struct cpu_key *item_key,
1544                                    loff_t new_file_size, char *mode)
1545{
1546        struct super_block *sb = inode->i_sb;
1547        int block_size = sb->s_blocksize;
1548        int cut_bytes;
1549        BUG_ON(!th->t_trans_id);
1550        BUG_ON(new_file_size != inode->i_size);
1551
1552        /*
1553         * the page being sent in could be NULL if there was an i/o error
1554         * reading in the last block.  The user will hit problems trying to
1555         * read the file, but for now we just skip the indirect2direct
1556         */
1557        if (atomic_read(&inode->i_count) > 1 ||
1558            !tail_has_to_be_packed(inode) ||
1559            !page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) {
1560                /* leave tail in an unformatted node */
1561                *mode = M_SKIP_BALANCING;
1562                cut_bytes =
1563                    block_size - (new_file_size & (block_size - 1));
1564                pathrelse(path);
1565                return cut_bytes;
1566        }
1567
1568        /* Perform the conversion to a direct_item. */
1569        return indirect2direct(th, inode, page, path, item_key,
1570                               new_file_size, mode);
1571}
1572
1573/*
1574 * we did indirect_to_direct conversion. And we have inserted direct
1575 * item successesfully, but there were no disk space to cut unfm
1576 * pointer being converted. Therefore we have to delete inserted
1577 * direct item(s)
1578 */
1579static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
1580                                         struct inode *inode, struct treepath *path)
1581{
1582        struct cpu_key tail_key;
1583        int tail_len;
1584        int removed;
1585        BUG_ON(!th->t_trans_id);
1586
1587        make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);
1588        tail_key.key_length = 4;
1589
1590        tail_len =
1591            (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
1592        while (tail_len) {
1593                /* look for the last byte of the tail */
1594                if (search_for_position_by_key(inode->i_sb, &tail_key, path) ==
1595                    POSITION_NOT_FOUND)
1596                        reiserfs_panic(inode->i_sb, "vs-5615",
1597                                       "found invalid item");
1598                RFALSE(path->pos_in_item !=
1599                       ih_item_len(tp_item_head(path)) - 1,
1600                       "vs-5616: appended bytes found");
1601                PATH_LAST_POSITION(path)--;
1602
1603                removed =
1604                    reiserfs_delete_item(th, path, &tail_key, inode,
1605                                         NULL /*unbh not needed */ );
1606                RFALSE(removed <= 0
1607                       || removed > tail_len,
1608                       "vs-5617: there was tail %d bytes, removed item length %d bytes",
1609                       tail_len, removed);
1610                tail_len -= removed;
1611                set_cpu_key_k_offset(&tail_key,
1612                                     cpu_key_k_offset(&tail_key) - removed);
1613        }
1614        reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct "
1615                         "conversion has been rolled back due to "
1616                         "lack of disk space");
1617        mark_inode_dirty(inode);
1618}
1619
1620/* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
1621int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1622                           struct treepath *path,
1623                           struct cpu_key *item_key,
1624                           struct inode *inode,
1625                           struct page *page, loff_t new_file_size)
1626{
1627        struct super_block *sb = inode->i_sb;
1628        /*
1629         * Every function which is going to call do_balance must first
1630         * create a tree_balance structure.  Then it must fill up this
1631         * structure by using the init_tb_struct and fix_nodes functions.
1632         * After that we can make tree balancing.
1633         */
1634        struct tree_balance s_cut_balance;
1635        struct item_head *p_le_ih;
1636        int cut_size = 0;       /* Amount to be cut. */
1637        int ret_value = CARRY_ON;
1638        int removed = 0;        /* Number of the removed unformatted nodes. */
1639        int is_inode_locked = 0;
1640        char mode;              /* Mode of the balance. */
1641        int retval2 = -1;
1642        int quota_cut_bytes;
1643        loff_t tail_pos = 0;
1644        int depth;
1645
1646        BUG_ON(!th->t_trans_id);
1647
1648        init_tb_struct(th, &s_cut_balance, inode->i_sb, path,
1649                       cut_size);
1650
1651        /*
1652         * Repeat this loop until we either cut the item without needing
1653         * to balance, or we fix_nodes without schedule occurring
1654         */
1655        while (1) {
1656                /*
1657                 * Determine the balance mode, position of the first byte to
1658                 * be cut, and size to be cut.  In case of the indirect item
1659                 * free unformatted nodes which are pointed to by the cut
1660                 * pointers.
1661                 */
1662
1663                mode =
1664                    prepare_for_delete_or_cut(th, inode, path,
1665                                              item_key, &removed,
1666                                              &cut_size, new_file_size);
1667                if (mode == M_CONVERT) {
1668                        /*
1669                         * convert last unformatted node to direct item or
1670                         * leave tail in the unformatted node
1671                         */
1672                        RFALSE(ret_value != CARRY_ON,
1673                               "PAP-5570: can not convert twice");
1674
1675                        ret_value =
1676                            maybe_indirect_to_direct(th, inode, page,
1677                                                     path, item_key,
1678                                                     new_file_size, &mode);
1679                        if (mode == M_SKIP_BALANCING)
1680                                /* tail has been left in the unformatted node */
1681                                return ret_value;
1682
1683                        is_inode_locked = 1;
1684
1685                        /*
1686                         * removing of last unformatted node will
1687                         * change value we have to return to truncate.
1688                         * Save it
1689                         */
1690                        retval2 = ret_value;
1691
1692                        /*
1693                         * So, we have performed the first part of the
1694                         * conversion:
1695                         * inserting the new direct item.  Now we are
1696                         * removing the last unformatted node pointer.
1697                         * Set key to search for it.
1698                         */
1699                        set_cpu_key_k_type(item_key, TYPE_INDIRECT);
1700                        item_key->key_length = 4;
1701                        new_file_size -=
1702                            (new_file_size & (sb->s_blocksize - 1));
1703                        tail_pos = new_file_size;
1704                        set_cpu_key_k_offset(item_key, new_file_size + 1);
1705                        if (search_for_position_by_key
1706                            (sb, item_key,
1707                             path) == POSITION_NOT_FOUND) {
1708                                print_block(PATH_PLAST_BUFFER(path), 3,
1709                                            PATH_LAST_POSITION(path) - 1,
1710                                            PATH_LAST_POSITION(path) + 1);
1711                                reiserfs_panic(sb, "PAP-5580", "item to "
1712                                               "convert does not exist (%K)",
1713                                               item_key);
1714                        }
1715                        continue;
1716                }
1717                if (cut_size == 0) {
1718                        pathrelse(path);
1719                        return 0;
1720                }
1721
1722                s_cut_balance.insert_size[0] = cut_size;
1723
1724                ret_value = fix_nodes(mode, &s_cut_balance, NULL, NULL);
1725                if (ret_value != REPEAT_SEARCH)
1726                        break;
1727
1728                PROC_INFO_INC(sb, cut_from_item_restarted);
1729
1730                ret_value =
1731                    search_for_position_by_key(sb, item_key, path);
1732                if (ret_value == POSITION_FOUND)
1733                        continue;
1734
1735                reiserfs_warning(sb, "PAP-5610", "item %K not found",
1736                                 item_key);
1737                unfix_nodes(&s_cut_balance);
1738                return (ret_value == IO_ERROR) ? -EIO : -ENOENT;
1739        }                       /* while */
1740
1741        /* check fix_nodes results (IO_ERROR or NO_DISK_SPACE) */
1742        if (ret_value != CARRY_ON) {
1743                if (is_inode_locked) {
1744                        /*
1745                         * FIXME: this seems to be not needed: we are always
1746                         * able to cut item
1747                         */
1748                        indirect_to_direct_roll_back(th, inode, path);
1749                }
1750                if (ret_value == NO_DISK_SPACE)
1751                        reiserfs_warning(sb, "reiserfs-5092",
1752                                         "NO_DISK_SPACE");
1753                unfix_nodes(&s_cut_balance);
1754                return -EIO;
1755        }
1756
1757        /* go ahead and perform balancing */
1758
1759        RFALSE(mode == M_PASTE || mode == M_INSERT, "invalid mode");
1760
1761        /* Calculate number of bytes that need to be cut from the item. */
1762        quota_cut_bytes =
1763            (mode ==
1764             M_DELETE) ? ih_item_len(tp_item_head(path)) : -s_cut_balance.
1765            insert_size[0];
1766        if (retval2 == -1)
1767                ret_value = calc_deleted_bytes_number(&s_cut_balance, mode);
1768        else
1769                ret_value = retval2;
1770
1771        /*
1772         * For direct items, we only change the quota when deleting the last
1773         * item.
1774         */
1775        p_le_ih = tp_item_head(s_cut_balance.tb_path);
1776        if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) {
1777                if (mode == M_DELETE &&
1778                    (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) ==
1779                    1) {
1780                        /* FIXME: this is to keep 3.5 happy */
1781                        REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
1782                        quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
1783                } else {
1784                        quota_cut_bytes = 0;
1785                }
1786        }
1787#ifdef CONFIG_REISERFS_CHECK
1788        if (is_inode_locked) {
1789                struct item_head *le_ih =
1790                    tp_item_head(s_cut_balance.tb_path);
1791                /*
1792                 * we are going to complete indirect2direct conversion. Make
1793                 * sure, that we exactly remove last unformatted node pointer
1794                 * of the item
1795                 */
1796                if (!is_indirect_le_ih(le_ih))
1797                        reiserfs_panic(sb, "vs-5652",
1798                                       "item must be indirect %h", le_ih);
1799
1800                if (mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
1801                        reiserfs_panic(sb, "vs-5653", "completing "
1802                                       "indirect2direct conversion indirect "
1803                                       "item %h being deleted must be of "
1804                                       "4 byte long", le_ih);
1805
1806                if (mode == M_CUT
1807                    && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
1808                        reiserfs_panic(sb, "vs-5654", "can not complete "
1809                                       "indirect2direct conversion of %h "
1810                                       "(CUT, insert_size==%d)",
1811                                       le_ih, s_cut_balance.insert_size[0]);
1812                }
1813                /*
1814                 * it would be useful to make sure, that right neighboring
1815                 * item is direct item of this file
1816                 */
1817        }
1818#endif
1819
1820        do_balance(&s_cut_balance, NULL, NULL, mode);
1821        if (is_inode_locked) {
1822                /*
1823                 * we've done an indirect->direct conversion.  when the
1824                 * data block was freed, it was removed from the list of
1825                 * blocks that must be flushed before the transaction
1826                 * commits, make sure to unmap and invalidate it
1827                 */
1828                unmap_buffers(page, tail_pos);
1829                REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
1830        }
1831#ifdef REISERQUOTA_DEBUG
1832        reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
1833                       "reiserquota cut_from_item(): freeing %u id=%u type=%c",
1834                       quota_cut_bytes, inode->i_uid, '?');
1835#endif
1836        depth = reiserfs_write_unlock_nested(sb);
1837        dquot_free_space_nodirty(inode, quota_cut_bytes);
1838        reiserfs_write_lock_nested(sb, depth);
1839        return ret_value;
1840}
1841
1842static void truncate_directory(struct reiserfs_transaction_handle *th,
1843                               struct inode *inode)
1844{
1845        BUG_ON(!th->t_trans_id);
1846        if (inode->i_nlink)
1847                reiserfs_error(inode->i_sb, "vs-5655", "link count != 0");
1848
1849        set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET);
1850        set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY);
1851        reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1852        reiserfs_update_sd(th, inode);
1853        set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET);
1854        set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA);
1855}
1856
1857/*
1858 * Truncate file to the new size. Note, this must be called with a
1859 * transaction already started
1860 */
1861int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
1862                         struct inode *inode,   /* ->i_size contains new size */
1863                         struct page *page,     /* up to date for last block */
1864                         /*
1865                          * when it is called by file_release to convert
1866                          * the tail - no timestamps should be updated
1867                          */
1868                         int update_timestamps
1869    )
1870{
1871        INITIALIZE_PATH(s_search_path); /* Path to the current object item. */
1872        struct item_head *p_le_ih;      /* Pointer to an item header. */
1873
1874        /* Key to search for a previous file item. */
1875        struct cpu_key s_item_key;
1876        loff_t file_size,       /* Old file size. */
1877         new_file_size; /* New file size. */
1878        int deleted;            /* Number of deleted or truncated bytes. */
1879        int retval;
1880        int err = 0;
1881
1882        BUG_ON(!th->t_trans_id);
1883        if (!
1884            (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
1885             || S_ISLNK(inode->i_mode)))
1886                return 0;
1887
1888        /* deletion of directory - no need to update timestamps */
1889        if (S_ISDIR(inode->i_mode)) {
1890                truncate_directory(th, inode);
1891                return 0;
1892        }
1893
1894        /* Get new file size. */
1895        new_file_size = inode->i_size;
1896
1897        /* FIXME: note, that key type is unimportant here */
1898        make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode),
1899                     TYPE_DIRECT, 3);
1900
1901        retval =
1902            search_for_position_by_key(inode->i_sb, &s_item_key,
1903                                       &s_search_path);
1904        if (retval == IO_ERROR) {
1905                reiserfs_error(inode->i_sb, "vs-5657",
1906                               "i/o failure occurred trying to truncate %K",
1907                               &s_item_key);
1908                err = -EIO;
1909                goto out;
1910        }
1911        if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
1912                reiserfs_error(inode->i_sb, "PAP-5660",
1913                               "wrong result %d of search for %K", retval,
1914                               &s_item_key);
1915
1916                err = -EIO;
1917                goto out;
1918        }
1919
1920        s_search_path.pos_in_item--;
1921
1922        /* Get real file size (total length of all file items) */
1923        p_le_ih = tp_item_head(&s_search_path);
1924        if (is_statdata_le_ih(p_le_ih))
1925                file_size = 0;
1926        else {
1927                loff_t offset = le_ih_k_offset(p_le_ih);
1928                int bytes =
1929                    op_bytes_number(p_le_ih, inode->i_sb->s_blocksize);
1930
1931                /*
1932                 * this may mismatch with real file size: if last direct item
1933                 * had no padding zeros and last unformatted node had no free
1934                 * space, this file would have this file size
1935                 */
1936                file_size = offset + bytes - 1;
1937        }
1938        /*
1939         * are we doing a full truncate or delete, if so
1940         * kick in the reada code
1941         */
1942        if (new_file_size == 0)
1943                s_search_path.reada = PATH_READA | PATH_READA_BACK;
1944
1945        if (file_size == 0 || file_size < new_file_size) {
1946                goto update_and_out;
1947        }
1948
1949        /* Update key to search for the last file item. */
1950        set_cpu_key_k_offset(&s_item_key, file_size);
1951
1952        do {
1953                /* Cut or delete file item. */
1954                deleted =
1955                    reiserfs_cut_from_item(th, &s_search_path, &s_item_key,
1956                                           inode, page, new_file_size);
1957                if (deleted < 0) {
1958                        reiserfs_warning(inode->i_sb, "vs-5665",
1959                                         "reiserfs_cut_from_item failed");
1960                        reiserfs_check_path(&s_search_path);
1961                        return 0;
1962                }
1963
1964                RFALSE(deleted > file_size,
1965                       "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
1966                       deleted, file_size, &s_item_key);
1967
1968                /* Change key to search the last file item. */
1969                file_size -= deleted;
1970
1971                set_cpu_key_k_offset(&s_item_key, file_size);
1972
1973                /*
1974                 * While there are bytes to truncate and previous
1975                 * file item is presented in the tree.
1976                 */
1977
1978                /*
1979                 * This loop could take a really long time, and could log
1980                 * many more blocks than a transaction can hold.  So, we do
1981                 * a polite journal end here, and if the transaction needs
1982                 * ending, we make sure the file is consistent before ending
1983                 * the current trans and starting a new one
1984                 */
1985                if (journal_transaction_should_end(th, 0) ||
1986                    reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
1987                        pathrelse(&s_search_path);
1988
1989                        if (update_timestamps) {
1990                                inode->i_mtime = CURRENT_TIME_SEC;
1991                                inode->i_ctime = CURRENT_TIME_SEC;
1992                        }
1993                        reiserfs_update_sd(th, inode);
1994
1995                        err = journal_end(th);
1996                        if (err)
1997                                goto out;
1998                        err = journal_begin(th, inode->i_sb,
1999                                            JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ;
2000                        if (err)
2001                                goto out;
2002                        reiserfs_update_inode_transaction(inode);
2003                }
2004        } while (file_size > ROUND_UP(new_file_size) &&
2005                 search_for_position_by_key(inode->i_sb, &s_item_key,
2006                                            &s_search_path) == POSITION_FOUND);
2007
2008        RFALSE(file_size > ROUND_UP(new_file_size),
2009               "PAP-5680: truncate did not finish: new_file_size %lld, current %lld, oid %d",
2010               new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
2011
2012update_and_out:
2013        if (update_timestamps) {
2014                /* this is truncate, not file closing */
2015                inode->i_mtime = CURRENT_TIME_SEC;
2016                inode->i_ctime = CURRENT_TIME_SEC;
2017        }
2018        reiserfs_update_sd(th, inode);
2019
2020out:
2021        pathrelse(&s_search_path);
2022        return err;
2023}
2024
2025#ifdef CONFIG_REISERFS_CHECK
2026/* this makes sure, that we __append__, not overwrite or add holes */
2027static void check_research_for_paste(struct treepath *path,
2028                                     const struct cpu_key *key)
2029{
2030        struct item_head *found_ih = tp_item_head(path);
2031
2032        if (is_direct_le_ih(found_ih)) {
2033                if (le_ih_k_offset(found_ih) +
2034                    op_bytes_number(found_ih,
2035                                    get_last_bh(path)->b_size) !=
2036                    cpu_key_k_offset(key)
2037                    || op_bytes_number(found_ih,
2038                                       get_last_bh(path)->b_size) !=
2039                    pos_in_item(path))
2040                        reiserfs_panic(NULL, "PAP-5720", "found direct item "
2041                                       "%h or position (%d) does not match "
2042                                       "to key %K", found_ih,
2043                                       pos_in_item(path), key);
2044        }
2045        if (is_indirect_le_ih(found_ih)) {
2046                if (le_ih_k_offset(found_ih) +
2047                    op_bytes_number(found_ih,
2048                                    get_last_bh(path)->b_size) !=
2049                    cpu_key_k_offset(key)
2050                    || I_UNFM_NUM(found_ih) != pos_in_item(path)
2051                    || get_ih_free_space(found_ih) != 0)
2052                        reiserfs_panic(NULL, "PAP-5730", "found indirect "
2053                                       "item (%h) or position (%d) does not "
2054                                       "match to key (%K)",
2055                                       found_ih, pos_in_item(path), key);
2056        }
2057}
2058#endif                          /* config reiserfs check */
2059
2060/*
2061 * Paste bytes to the existing item.
2062 * Returns bytes number pasted into the item.
2063 */
2064int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th,
2065                             /* Path to the pasted item. */
2066                             struct treepath *search_path,
2067                             /* Key to search for the needed item. */
2068                             const struct cpu_key *key,
2069                             /* Inode item belongs to */
2070                             struct inode *inode,
2071                             /* Pointer to the bytes to paste. */
2072                             const char *body,
2073                             /* Size of pasted bytes. */
2074                             int pasted_size)
2075{
2076        struct super_block *sb = inode->i_sb;
2077        struct tree_balance s_paste_balance;
2078        int retval;
2079        int fs_gen;
2080        int depth;
2081
2082        BUG_ON(!th->t_trans_id);
2083
2084        fs_gen = get_generation(inode->i_sb);
2085
2086#ifdef REISERQUOTA_DEBUG
2087        reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2088                       "reiserquota paste_into_item(): allocating %u id=%u type=%c",
2089                       pasted_size, inode->i_uid,
2090                       key2type(&key->on_disk_key));
2091#endif
2092
2093        depth = reiserfs_write_unlock_nested(sb);
2094        retval = dquot_alloc_space_nodirty(inode, pasted_size);
2095        reiserfs_write_lock_nested(sb, depth);
2096        if (retval) {
2097                pathrelse(search_path);
2098                return retval;
2099        }
2100        init_tb_struct(th, &s_paste_balance, th->t_super, search_path,
2101                       pasted_size);
2102#ifdef DISPLACE_NEW_PACKING_LOCALITIES
2103        s_paste_balance.key = key->on_disk_key;
2104#endif
2105
2106        /* DQUOT_* can schedule, must check before the fix_nodes */
2107        if (fs_changed(fs_gen, inode->i_sb)) {
2108                goto search_again;
2109        }
2110
2111        while ((retval =
2112                fix_nodes(M_PASTE, &s_paste_balance, NULL,
2113                          body)) == REPEAT_SEARCH) {
2114search_again:
2115                /* file system changed while we were in the fix_nodes */
2116                PROC_INFO_INC(th->t_super, paste_into_item_restarted);
2117                retval =
2118                    search_for_position_by_key(th->t_super, key,
2119                                               search_path);
2120                if (retval == IO_ERROR) {
2121                        retval = -EIO;
2122                        goto error_out;
2123                }
2124                if (retval == POSITION_FOUND) {
2125                        reiserfs_warning(inode->i_sb, "PAP-5710",
2126                                         "entry or pasted byte (%K) exists",
2127                                         key);
2128                        retval = -EEXIST;
2129                        goto error_out;
2130                }
2131#ifdef CONFIG_REISERFS_CHECK
2132                check_research_for_paste(search_path, key);
2133#endif
2134        }
2135
2136        /*
2137         * Perform balancing after all resources are collected by fix_nodes,
2138         * and accessing them will not risk triggering schedule.
2139         */
2140        if (retval == CARRY_ON) {
2141                do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE);
2142                return 0;
2143        }
2144        retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2145error_out:
2146        /* this also releases the path */
2147        unfix_nodes(&s_paste_balance);
2148#ifdef REISERQUOTA_DEBUG
2149        reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2150                       "reiserquota paste_into_item(): freeing %u id=%u type=%c",
2151                       pasted_size, inode->i_uid,
2152                       key2type(&key->on_disk_key));
2153#endif
2154        depth = reiserfs_write_unlock_nested(sb);
2155        dquot_free_space_nodirty(inode, pasted_size);
2156        reiserfs_write_lock_nested(sb, depth);
2157        return retval;
2158}
2159
2160/*
2161 * Insert new item into the buffer at the path.
2162 * th   - active transaction handle
2163 * path - path to the inserted item
2164 * ih   - pointer to the item header to insert
2165 * body - pointer to the bytes to insert
2166 */
2167int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2168                         struct treepath *path, const struct cpu_key *key,
2169                         struct item_head *ih, struct inode *inode,
2170                         const char *body)
2171{
2172        struct tree_balance s_ins_balance;
2173        int retval;
2174        int fs_gen = 0;
2175        int quota_bytes = 0;
2176
2177        BUG_ON(!th->t_trans_id);
2178
2179        if (inode) {            /* Do we count quotas for item? */
2180                int depth;
2181                fs_gen = get_generation(inode->i_sb);
2182                quota_bytes = ih_item_len(ih);
2183
2184                /*
2185                 * hack so the quota code doesn't have to guess
2186                 * if the file has a tail, links are always tails,
2187                 * so there's no guessing needed
2188                 */
2189                if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih))
2190                        quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
2191#ifdef REISERQUOTA_DEBUG
2192                reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2193                               "reiserquota insert_item(): allocating %u id=%u type=%c",
2194                               quota_bytes, inode->i_uid, head2type(ih));
2195#endif
2196                /*
2197                 * We can't dirty inode here. It would be immediately
2198                 * written but appropriate stat item isn't inserted yet...
2199                 */
2200                depth = reiserfs_write_unlock_nested(inode->i_sb);
2201                retval = dquot_alloc_space_nodirty(inode, quota_bytes);
2202                reiserfs_write_lock_nested(inode->i_sb, depth);
2203                if (retval) {
2204                        pathrelse(path);
2205                        return retval;
2206                }
2207        }
2208        init_tb_struct(th, &s_ins_balance, th->t_super, path,
2209                       IH_SIZE + ih_item_len(ih));
2210#ifdef DISPLACE_NEW_PACKING_LOCALITIES
2211        s_ins_balance.key = key->on_disk_key;
2212#endif
2213        /*
2214         * DQUOT_* can schedule, must check to be sure calling
2215         * fix_nodes is safe
2216         */
2217        if (inode && fs_changed(fs_gen, inode->i_sb)) {
2218                goto search_again;
2219        }
2220
2221        while ((retval =
2222                fix_nodes(M_INSERT, &s_ins_balance, ih,
2223                          body)) == REPEAT_SEARCH) {
2224search_again:
2225                /* file system changed while we were in the fix_nodes */
2226                PROC_INFO_INC(th->t_super, insert_item_restarted);
2227                retval = search_item(th->t_super, key, path);
2228                if (retval == IO_ERROR) {
2229                        retval = -EIO;
2230                        goto error_out;
2231                }
2232                if (retval == ITEM_FOUND) {
2233                        reiserfs_warning(th->t_super, "PAP-5760",
2234                                         "key %K already exists in the tree",
2235                                         key);
2236                        retval = -EEXIST;
2237                        goto error_out;
2238                }
2239        }
2240
2241        /* make balancing after all resources will be collected at a time */
2242        if (retval == CARRY_ON) {
2243                do_balance(&s_ins_balance, ih, body, M_INSERT);
2244                return 0;
2245        }
2246
2247        retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2248error_out:
2249        /* also releases the path */
2250        unfix_nodes(&s_ins_balance);
2251#ifdef REISERQUOTA_DEBUG
2252        reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
2253                       "reiserquota insert_item(): freeing %u id=%u type=%c",
2254                       quota_bytes, inode->i_uid, head2type(ih));
2255#endif
2256        if (inode) {
2257                int depth = reiserfs_write_unlock_nested(inode->i_sb);
2258                dquot_free_space_nodirty(inode, quota_bytes);
2259                reiserfs_write_lock_nested(inode->i_sb, depth);
2260        }
2261        return retval;
2262}
2263