linux/fs/ext4/namei.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/ext4/namei.c
   3 *
   4 * Copyright (C) 1992, 1993, 1994, 1995
   5 * Remy Card (card@masi.ibp.fr)
   6 * Laboratoire MASI - Institut Blaise Pascal
   7 * Universite Pierre et Marie Curie (Paris VI)
   8 *
   9 *  from
  10 *
  11 *  linux/fs/minix/namei.c
  12 *
  13 *  Copyright (C) 1991, 1992  Linus Torvalds
  14 *
  15 *  Big-endian to little-endian byte-swapping/bitmaps by
  16 *        David S. Miller (davem@caip.rutgers.edu), 1995
  17 *  Directory entry file type support and forward compatibility hooks
  18 *      for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
  19 *  Hash Tree Directory indexing (c)
  20 *      Daniel Phillips, 2001
  21 *  Hash Tree Directory indexing porting
  22 *      Christopher Li, 2002
  23 *  Hash Tree Directory indexing cleanup
  24 *      Theodore Ts'o, 2002
  25 */
  26
  27#include <linux/fs.h>
  28#include <linux/pagemap.h>
  29#include <linux/jbd2.h>
  30#include <linux/time.h>
  31#include <linux/fcntl.h>
  32#include <linux/stat.h>
  33#include <linux/string.h>
  34#include <linux/quotaops.h>
  35#include <linux/buffer_head.h>
  36#include <linux/bio.h>
  37#include "ext4.h"
  38#include "ext4_jbd2.h"
  39
  40#include "xattr.h"
  41#include "acl.h"
  42
  43/*
  44 * define how far ahead to read directories while searching them.
  45 */
  46#define NAMEI_RA_CHUNKS  2
  47#define NAMEI_RA_BLOCKS  4
  48#define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
  49#define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
  50
  51static struct buffer_head *ext4_append(handle_t *handle,
  52                                        struct inode *inode,
  53                                        ext4_lblk_t *block, int *err)
  54{
  55        struct buffer_head *bh;
  56
  57        *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
  58
  59        bh = ext4_bread(handle, inode, *block, 1, err);
  60        if (bh) {
  61                inode->i_size += inode->i_sb->s_blocksize;
  62                EXT4_I(inode)->i_disksize = inode->i_size;
  63                *err = ext4_journal_get_write_access(handle, bh);
  64                if (*err) {
  65                        brelse(bh);
  66                        bh = NULL;
  67                }
  68        }
  69        return bh;
  70}
  71
  72#ifndef assert
  73#define assert(test) J_ASSERT(test)
  74#endif
  75
  76#ifdef DX_DEBUG
  77#define dxtrace(command) command
  78#else
  79#define dxtrace(command)
  80#endif
  81
  82struct fake_dirent
  83{
  84        __le32 inode;
  85        __le16 rec_len;
  86        u8 name_len;
  87        u8 file_type;
  88};
  89
  90struct dx_countlimit
  91{
  92        __le16 limit;
  93        __le16 count;
  94};
  95
  96struct dx_entry
  97{
  98        __le32 hash;
  99        __le32 block;
 100};
 101
 102/*
 103 * dx_root_info is laid out so that if it should somehow get overlaid by a
 104 * dirent the two low bits of the hash version will be zero.  Therefore, the
 105 * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
 106 */
 107
 108struct dx_root
 109{
 110        struct fake_dirent dot;
 111        char dot_name[4];
 112        struct fake_dirent dotdot;
 113        char dotdot_name[4];
 114        struct dx_root_info
 115        {
 116                __le32 reserved_zero;
 117                u8 hash_version;
 118                u8 info_length; /* 8 */
 119                u8 indirect_levels;
 120                u8 unused_flags;
 121        }
 122        info;
 123        struct dx_entry entries[0];
 124};
 125
 126struct dx_node
 127{
 128        struct fake_dirent fake;
 129        struct dx_entry entries[0];
 130};
 131
 132
 133struct dx_frame
 134{
 135        struct buffer_head *bh;
 136        struct dx_entry *entries;
 137        struct dx_entry *at;
 138};
 139
 140struct dx_map_entry
 141{
 142        u32 hash;
 143        u16 offs;
 144        u16 size;
 145};
 146
 147static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
 148static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
 149static inline unsigned dx_get_hash(struct dx_entry *entry);
 150static void dx_set_hash(struct dx_entry *entry, unsigned value);
 151static unsigned dx_get_count(struct dx_entry *entries);
 152static unsigned dx_get_limit(struct dx_entry *entries);
 153static void dx_set_count(struct dx_entry *entries, unsigned value);
 154static void dx_set_limit(struct dx_entry *entries, unsigned value);
 155static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
 156static unsigned dx_node_limit(struct inode *dir);
 157static struct dx_frame *dx_probe(const struct qstr *d_name,
 158                                 struct inode *dir,
 159                                 struct dx_hash_info *hinfo,
 160                                 struct dx_frame *frame,
 161                                 int *err);
 162static void dx_release(struct dx_frame *frames);
 163static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
 164                       struct dx_hash_info *hinfo, struct dx_map_entry map[]);
 165static void dx_sort_map(struct dx_map_entry *map, unsigned count);
 166static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
 167                struct dx_map_entry *offsets, int count, unsigned blocksize);
 168static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize);
 169static void dx_insert_block(struct dx_frame *frame,
 170                                        u32 hash, ext4_lblk_t block);
 171static int ext4_htree_next_block(struct inode *dir, __u32 hash,
 172                                 struct dx_frame *frame,
 173                                 struct dx_frame *frames,
 174                                 __u32 *start_hash);
 175static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
 176                const struct qstr *d_name,
 177                struct ext4_dir_entry_2 **res_dir,
 178                int *err);
 179static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
 180                             struct inode *inode);
 181
 182unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
 183{
 184        unsigned len = le16_to_cpu(dlen);
 185
 186        if (len == EXT4_MAX_REC_LEN || len == 0)
 187                return blocksize;
 188        return (len & 65532) | ((len & 3) << 16);
 189}
 190  
 191__le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
 192{
 193        if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
 194                BUG();
 195        if (len < 65536)
 196                return cpu_to_le16(len);
 197        if (len == blocksize) {
 198                if (blocksize == 65536)
 199                        return cpu_to_le16(EXT4_MAX_REC_LEN);
 200                else 
 201                        return cpu_to_le16(0);
 202        }
 203        return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
 204}
 205
 206/*
 207 * p is at least 6 bytes before the end of page
 208 */
 209static inline struct ext4_dir_entry_2 *
 210ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
 211{
 212        return (struct ext4_dir_entry_2 *)((char *)p +
 213                ext4_rec_len_from_disk(p->rec_len, blocksize));
 214}
 215
 216/*
 217 * Future: use high four bits of block for coalesce-on-delete flags
 218 * Mask them off for now.
 219 */
 220
 221static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
 222{
 223        return le32_to_cpu(entry->block) & 0x00ffffff;
 224}
 225
 226static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
 227{
 228        entry->block = cpu_to_le32(value);
 229}
 230
 231static inline unsigned dx_get_hash(struct dx_entry *entry)
 232{
 233        return le32_to_cpu(entry->hash);
 234}
 235
 236static inline void dx_set_hash(struct dx_entry *entry, unsigned value)
 237{
 238        entry->hash = cpu_to_le32(value);
 239}
 240
 241static inline unsigned dx_get_count(struct dx_entry *entries)
 242{
 243        return le16_to_cpu(((struct dx_countlimit *) entries)->count);
 244}
 245
 246static inline unsigned dx_get_limit(struct dx_entry *entries)
 247{
 248        return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
 249}
 250
 251static inline void dx_set_count(struct dx_entry *entries, unsigned value)
 252{
 253        ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
 254}
 255
 256static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
 257{
 258        ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
 259}
 260
 261static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
 262{
 263        unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
 264                EXT4_DIR_REC_LEN(2) - infosize;
 265        return entry_space / sizeof(struct dx_entry);
 266}
 267
 268static inline unsigned dx_node_limit(struct inode *dir)
 269{
 270        unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
 271        return entry_space / sizeof(struct dx_entry);
 272}
 273
 274/*
 275 * Debug
 276 */
 277#ifdef DX_DEBUG
 278static void dx_show_index(char * label, struct dx_entry *entries)
 279{
 280        int i, n = dx_get_count (entries);
 281        printk(KERN_DEBUG "%s index ", label);
 282        for (i = 0; i < n; i++) {
 283                printk("%x->%lu ", i ? dx_get_hash(entries + i) :
 284                                0, (unsigned long)dx_get_block(entries + i));
 285        }
 286        printk("\n");
 287}
 288
 289struct stats
 290{
 291        unsigned names;
 292        unsigned space;
 293        unsigned bcount;
 294};
 295
 296static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_entry_2 *de,
 297                                 int size, int show_names)
 298{
 299        unsigned names = 0, space = 0;
 300        char *base = (char *) de;
 301        struct dx_hash_info h = *hinfo;
 302
 303        printk("names: ");
 304        while ((char *) de < base + size)
 305        {
 306                if (de->inode)
 307                {
 308                        if (show_names)
 309                        {
 310                                int len = de->name_len;
 311                                char *name = de->name;
 312                                while (len--) printk("%c", *name++);
 313                                ext4fs_dirhash(de->name, de->name_len, &h);
 314                                printk(":%x.%u ", h.hash,
 315                                       ((char *) de - base));
 316                        }
 317                        space += EXT4_DIR_REC_LEN(de->name_len);
 318                        names++;
 319                }
 320                de = ext4_next_entry(de, size);
 321        }
 322        printk("(%i)\n", names);
 323        return (struct stats) { names, space, 1 };
 324}
 325
 326struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
 327                             struct dx_entry *entries, int levels)
 328{
 329        unsigned blocksize = dir->i_sb->s_blocksize;
 330        unsigned count = dx_get_count(entries), names = 0, space = 0, i;
 331        unsigned bcount = 0;
 332        struct buffer_head *bh;
 333        int err;
 334        printk("%i indexed blocks...\n", count);
 335        for (i = 0; i < count; i++, entries++)
 336        {
 337                ext4_lblk_t block = dx_get_block(entries);
 338                ext4_lblk_t hash  = i ? dx_get_hash(entries): 0;
 339                u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
 340                struct stats stats;
 341                printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
 342                if (!(bh = ext4_bread (NULL,dir, block, 0,&err))) continue;
 343                stats = levels?
 344                   dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
 345                   dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0);
 346                names += stats.names;
 347                space += stats.space;
 348                bcount += stats.bcount;
 349                brelse(bh);
 350        }
 351        if (bcount)
 352                printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n", 
 353                       levels ? "" : "   ", names, space/bcount,
 354                       (space/bcount)*100/blocksize);
 355        return (struct stats) { names, space, bcount};
 356}
 357#endif /* DX_DEBUG */
 358
 359/*
 360 * Probe for a directory leaf block to search.
 361 *
 362 * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
 363 * error in the directory index, and the caller should fall back to
 364 * searching the directory normally.  The callers of dx_probe **MUST**
 365 * check for this error code, and make sure it never gets reflected
 366 * back to userspace.
 367 */
 368static struct dx_frame *
 369dx_probe(const struct qstr *d_name, struct inode *dir,
 370         struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
 371{
 372        unsigned count, indirect;
 373        struct dx_entry *at, *entries, *p, *q, *m;
 374        struct dx_root *root;
 375        struct buffer_head *bh;
 376        struct dx_frame *frame = frame_in;
 377        u32 hash;
 378
 379        frame->bh = NULL;
 380        if (!(bh = ext4_bread (NULL,dir, 0, 0, err)))
 381                goto fail;
 382        root = (struct dx_root *) bh->b_data;
 383        if (root->info.hash_version != DX_HASH_TEA &&
 384            root->info.hash_version != DX_HASH_HALF_MD4 &&
 385            root->info.hash_version != DX_HASH_LEGACY) {
 386                ext4_warning(dir->i_sb, __func__,
 387                             "Unrecognised inode hash code %d",
 388                             root->info.hash_version);
 389                brelse(bh);
 390                *err = ERR_BAD_DX_DIR;
 391                goto fail;
 392        }
 393        hinfo->hash_version = root->info.hash_version;
 394        if (hinfo->hash_version <= DX_HASH_TEA)
 395                hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
 396        hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
 397        if (d_name)
 398                ext4fs_dirhash(d_name->name, d_name->len, hinfo);
 399        hash = hinfo->hash;
 400
 401        if (root->info.unused_flags & 1) {
 402                ext4_warning(dir->i_sb, __func__,
 403                             "Unimplemented inode hash flags: %#06x",
 404                             root->info.unused_flags);
 405                brelse(bh);
 406                *err = ERR_BAD_DX_DIR;
 407                goto fail;
 408        }
 409
 410        if ((indirect = root->info.indirect_levels) > 1) {
 411                ext4_warning(dir->i_sb, __func__,
 412                             "Unimplemented inode hash depth: %#06x",
 413                             root->info.indirect_levels);
 414                brelse(bh);
 415                *err = ERR_BAD_DX_DIR;
 416                goto fail;
 417        }
 418
 419        entries = (struct dx_entry *) (((char *)&root->info) +
 420                                       root->info.info_length);
 421
 422        if (dx_get_limit(entries) != dx_root_limit(dir,
 423                                                   root->info.info_length)) {
 424                ext4_warning(dir->i_sb, __func__,
 425                             "dx entry: limit != root limit");
 426                brelse(bh);
 427                *err = ERR_BAD_DX_DIR;
 428                goto fail;
 429        }
 430
 431        dxtrace(printk("Look up %x", hash));
 432        while (1)
 433        {
 434                count = dx_get_count(entries);
 435                if (!count || count > dx_get_limit(entries)) {
 436                        ext4_warning(dir->i_sb, __func__,
 437                                     "dx entry: no count or count > limit");
 438                        brelse(bh);
 439                        *err = ERR_BAD_DX_DIR;
 440                        goto fail2;
 441                }
 442
 443                p = entries + 1;
 444                q = entries + count - 1;
 445                while (p <= q)
 446                {
 447                        m = p + (q - p)/2;
 448                        dxtrace(printk("."));
 449                        if (dx_get_hash(m) > hash)
 450                                q = m - 1;
 451                        else
 452                                p = m + 1;
 453                }
 454
 455                if (0) // linear search cross check
 456                {
 457                        unsigned n = count - 1;
 458                        at = entries;
 459                        while (n--)
 460                        {
 461                                dxtrace(printk(","));
 462                                if (dx_get_hash(++at) > hash)
 463                                {
 464                                        at--;
 465                                        break;
 466                                }
 467                        }
 468                        assert (at == p - 1);
 469                }
 470
 471                at = p - 1;
 472                dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
 473                frame->bh = bh;
 474                frame->entries = entries;
 475                frame->at = at;
 476                if (!indirect--) return frame;
 477                if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err)))
 478                        goto fail2;
 479                at = entries = ((struct dx_node *) bh->b_data)->entries;
 480                if (dx_get_limit(entries) != dx_node_limit (dir)) {
 481                        ext4_warning(dir->i_sb, __func__,
 482                                     "dx entry: limit != node limit");
 483                        brelse(bh);
 484                        *err = ERR_BAD_DX_DIR;
 485                        goto fail2;
 486                }
 487                frame++;
 488                frame->bh = NULL;
 489        }
 490fail2:
 491        while (frame >= frame_in) {
 492                brelse(frame->bh);
 493                frame--;
 494        }
 495fail:
 496        if (*err == ERR_BAD_DX_DIR)
 497                ext4_warning(dir->i_sb, __func__,
 498                             "Corrupt dir inode %ld, running e2fsck is "
 499                             "recommended.", dir->i_ino);
 500        return NULL;
 501}
 502
 503static void dx_release (struct dx_frame *frames)
 504{
 505        if (frames[0].bh == NULL)
 506                return;
 507
 508        if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
 509                brelse(frames[1].bh);
 510        brelse(frames[0].bh);
 511}
 512
 513/*
 514 * This function increments the frame pointer to search the next leaf
 515 * block, and reads in the necessary intervening nodes if the search
 516 * should be necessary.  Whether or not the search is necessary is
 517 * controlled by the hash parameter.  If the hash value is even, then
 518 * the search is only continued if the next block starts with that
 519 * hash value.  This is used if we are searching for a specific file.
 520 *
 521 * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
 522 *
 523 * This function returns 1 if the caller should continue to search,
 524 * or 0 if it should not.  If there is an error reading one of the
 525 * index blocks, it will a negative error code.
 526 *
 527 * If start_hash is non-null, it will be filled in with the starting
 528 * hash of the next page.
 529 */
 530static int ext4_htree_next_block(struct inode *dir, __u32 hash,
 531                                 struct dx_frame *frame,
 532                                 struct dx_frame *frames,
 533                                 __u32 *start_hash)
 534{
 535        struct dx_frame *p;
 536        struct buffer_head *bh;
 537        int err, num_frames = 0;
 538        __u32 bhash;
 539
 540        p = frame;
 541        /*
 542         * Find the next leaf page by incrementing the frame pointer.
 543         * If we run out of entries in the interior node, loop around and
 544         * increment pointer in the parent node.  When we break out of
 545         * this loop, num_frames indicates the number of interior
 546         * nodes need to be read.
 547         */
 548        while (1) {
 549                if (++(p->at) < p->entries + dx_get_count(p->entries))
 550                        break;
 551                if (p == frames)
 552                        return 0;
 553                num_frames++;
 554                p--;
 555        }
 556
 557        /*
 558         * If the hash is 1, then continue only if the next page has a
 559         * continuation hash of any value.  This is used for readdir
 560         * handling.  Otherwise, check to see if the hash matches the
 561         * desired contiuation hash.  If it doesn't, return since
 562         * there's no point to read in the successive index pages.
 563         */
 564        bhash = dx_get_hash(p->at);
 565        if (start_hash)
 566                *start_hash = bhash;
 567        if ((hash & 1) == 0) {
 568                if ((bhash & ~1) != hash)
 569                        return 0;
 570        }
 571        /*
 572         * If the hash is HASH_NB_ALWAYS, we always go to the next
 573         * block so no check is necessary
 574         */
 575        while (num_frames--) {
 576                if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at),
 577                                      0, &err)))
 578                        return err; /* Failure */
 579                p++;
 580                brelse(p->bh);
 581                p->bh = bh;
 582                p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
 583        }
 584        return 1;
 585}
 586
 587
 588/*
 589 * This function fills a red-black tree with information from a
 590 * directory block.  It returns the number directory entries loaded
 591 * into the tree.  If there is an error it is returned in err.
 592 */
 593static int htree_dirblock_to_tree(struct file *dir_file,
 594                                  struct inode *dir, ext4_lblk_t block,
 595                                  struct dx_hash_info *hinfo,
 596                                  __u32 start_hash, __u32 start_minor_hash)
 597{
 598        struct buffer_head *bh;
 599        struct ext4_dir_entry_2 *de, *top;
 600        int err, count = 0;
 601
 602        dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
 603                                                        (unsigned long)block));
 604        if (!(bh = ext4_bread (NULL, dir, block, 0, &err)))
 605                return err;
 606
 607        de = (struct ext4_dir_entry_2 *) bh->b_data;
 608        top = (struct ext4_dir_entry_2 *) ((char *) de +
 609                                           dir->i_sb->s_blocksize -
 610                                           EXT4_DIR_REC_LEN(0));
 611        for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
 612                if (!ext4_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
 613                                        (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
 614                                                +((char *)de - bh->b_data))) {
 615                        /* On error, skip the f_pos to the next block. */
 616                        dir_file->f_pos = (dir_file->f_pos |
 617                                        (dir->i_sb->s_blocksize - 1)) + 1;
 618                        brelse(bh);
 619                        return count;
 620                }
 621                ext4fs_dirhash(de->name, de->name_len, hinfo);
 622                if ((hinfo->hash < start_hash) ||
 623                    ((hinfo->hash == start_hash) &&
 624                     (hinfo->minor_hash < start_minor_hash)))
 625                        continue;
 626                if (de->inode == 0)
 627                        continue;
 628                if ((err = ext4_htree_store_dirent(dir_file,
 629                                   hinfo->hash, hinfo->minor_hash, de)) != 0) {
 630                        brelse(bh);
 631                        return err;
 632                }
 633                count++;
 634        }
 635        brelse(bh);
 636        return count;
 637}
 638
 639
 640/*
 641 * This function fills a red-black tree with information from a
 642 * directory.  We start scanning the directory in hash order, starting
 643 * at start_hash and start_minor_hash.
 644 *
 645 * This function returns the number of entries inserted into the tree,
 646 * or a negative error code.
 647 */
 648int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 649                         __u32 start_minor_hash, __u32 *next_hash)
 650{
 651        struct dx_hash_info hinfo;
 652        struct ext4_dir_entry_2 *de;
 653        struct dx_frame frames[2], *frame;
 654        struct inode *dir;
 655        ext4_lblk_t block;
 656        int count = 0;
 657        int ret, err;
 658        __u32 hashval;
 659
 660        dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", 
 661                       start_hash, start_minor_hash));
 662        dir = dir_file->f_path.dentry->d_inode;
 663        if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) {
 664                hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
 665                if (hinfo.hash_version <= DX_HASH_TEA)
 666                        hinfo.hash_version +=
 667                                EXT4_SB(dir->i_sb)->s_hash_unsigned;
 668                hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
 669                count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
 670                                               start_hash, start_minor_hash);
 671                *next_hash = ~0;
 672                return count;
 673        }
 674        hinfo.hash = start_hash;
 675        hinfo.minor_hash = 0;
 676        frame = dx_probe(NULL, dir, &hinfo, frames, &err);
 677        if (!frame)
 678                return err;
 679
 680        /* Add '.' and '..' from the htree header */
 681        if (!start_hash && !start_minor_hash) {
 682                de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
 683                if ((err = ext4_htree_store_dirent(dir_file, 0, 0, de)) != 0)
 684                        goto errout;
 685                count++;
 686        }
 687        if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
 688                de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
 689                de = ext4_next_entry(de, dir->i_sb->s_blocksize);
 690                if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0)
 691                        goto errout;
 692                count++;
 693        }
 694
 695        while (1) {
 696                block = dx_get_block(frame->at);
 697                ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
 698                                             start_hash, start_minor_hash);
 699                if (ret < 0) {
 700                        err = ret;
 701                        goto errout;
 702                }
 703                count += ret;
 704                hashval = ~0;
 705                ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS,
 706                                            frame, frames, &hashval);
 707                *next_hash = hashval;
 708                if (ret < 0) {
 709                        err = ret;
 710                        goto errout;
 711                }
 712                /*
 713                 * Stop if:  (a) there are no more entries, or
 714                 * (b) we have inserted at least one entry and the
 715                 * next hash value is not a continuation
 716                 */
 717                if ((ret == 0) ||
 718                    (count && ((hashval & 1) == 0)))
 719                        break;
 720        }
 721        dx_release(frames);
 722        dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
 723                       "next hash: %x\n", count, *next_hash));
 724        return count;
 725errout:
 726        dx_release(frames);
 727        return (err);
 728}
 729
 730
 731/*
 732 * Directory block splitting, compacting
 733 */
 734
 735/*
 736 * Create map of hash values, offsets, and sizes, stored at end of block.
 737 * Returns number of entries mapped.
 738 */
 739static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
 740                       struct dx_hash_info *hinfo,
 741                       struct dx_map_entry *map_tail)
 742{
 743        int count = 0;
 744        char *base = (char *) de;
 745        struct dx_hash_info h = *hinfo;
 746
 747        while ((char *) de < base + blocksize) {
 748                if (de->name_len && de->inode) {
 749                        ext4fs_dirhash(de->name, de->name_len, &h);
 750                        map_tail--;
 751                        map_tail->hash = h.hash;
 752                        map_tail->offs = ((char *) de - base)>>2;
 753                        map_tail->size = le16_to_cpu(de->rec_len);
 754                        count++;
 755                        cond_resched();
 756                }
 757                /* XXX: do we need to check rec_len == 0 case? -Chris */
 758                de = ext4_next_entry(de, blocksize);
 759        }
 760        return count;
 761}
 762
 763/* Sort map by hash value */
 764static void dx_sort_map (struct dx_map_entry *map, unsigned count)
 765{
 766        struct dx_map_entry *p, *q, *top = map + count - 1;
 767        int more;
 768        /* Combsort until bubble sort doesn't suck */
 769        while (count > 2) {
 770                count = count*10/13;
 771                if (count - 9 < 2) /* 9, 10 -> 11 */
 772                        count = 11;
 773                for (p = top, q = p - count; q >= map; p--, q--)
 774                        if (p->hash < q->hash)
 775                                swap(*p, *q);
 776        }
 777        /* Garden variety bubble sort */
 778        do {
 779                more = 0;
 780                q = top;
 781                while (q-- > map) {
 782                        if (q[1].hash >= q[0].hash)
 783                                continue;
 784                        swap(*(q+1), *q);
 785                        more = 1;
 786                }
 787        } while(more);
 788}
 789
 790static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
 791{
 792        struct dx_entry *entries = frame->entries;
 793        struct dx_entry *old = frame->at, *new = old + 1;
 794        int count = dx_get_count(entries);
 795
 796        assert(count < dx_get_limit(entries));
 797        assert(old < entries + count);
 798        memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
 799        dx_set_hash(new, hash);
 800        dx_set_block(new, block);
 801        dx_set_count(entries, count + 1);
 802}
 803
 804static void ext4_update_dx_flag(struct inode *inode)
 805{
 806        if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
 807                                     EXT4_FEATURE_COMPAT_DIR_INDEX))
 808                EXT4_I(inode)->i_flags &= ~EXT4_INDEX_FL;
 809}
 810
 811/*
 812 * NOTE! unlike strncmp, ext4_match returns 1 for success, 0 for failure.
 813 *
 814 * `len <= EXT4_NAME_LEN' is guaranteed by caller.
 815 * `de != NULL' is guaranteed by caller.
 816 */
 817static inline int ext4_match (int len, const char * const name,
 818                              struct ext4_dir_entry_2 * de)
 819{
 820        if (len != de->name_len)
 821                return 0;
 822        if (!de->inode)
 823                return 0;
 824        return !memcmp(name, de->name, len);
 825}
 826
 827/*
 828 * Returns 0 if not found, -1 on failure, and 1 on success
 829 */
 830static inline int search_dirblock(struct buffer_head *bh,
 831                                  struct inode *dir,
 832                                  const struct qstr *d_name,
 833                                  unsigned int offset,
 834                                  struct ext4_dir_entry_2 ** res_dir)
 835{
 836        struct ext4_dir_entry_2 * de;
 837        char * dlimit;
 838        int de_len;
 839        const char *name = d_name->name;
 840        int namelen = d_name->len;
 841
 842        de = (struct ext4_dir_entry_2 *) bh->b_data;
 843        dlimit = bh->b_data + dir->i_sb->s_blocksize;
 844        while ((char *) de < dlimit) {
 845                /* this code is executed quadratically often */
 846                /* do minimal checking `by hand' */
 847
 848                if ((char *) de + namelen <= dlimit &&
 849                    ext4_match (namelen, name, de)) {
 850                        /* found a match - just to be sure, do a full check */
 851                        if (!ext4_check_dir_entry("ext4_find_entry",
 852                                                  dir, de, bh, offset))
 853                                return -1;
 854                        *res_dir = de;
 855                        return 1;
 856                }
 857                /* prevent looping on a bad block */
 858                de_len = ext4_rec_len_from_disk(de->rec_len,
 859                                                dir->i_sb->s_blocksize);
 860                if (de_len <= 0)
 861                        return -1;
 862                offset += de_len;
 863                de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
 864        }
 865        return 0;
 866}
 867
 868
 869/*
 870 *      ext4_find_entry()
 871 *
 872 * finds an entry in the specified directory with the wanted name. It
 873 * returns the cache buffer in which the entry was found, and the entry
 874 * itself (as a parameter - res_dir). It does NOT read the inode of the
 875 * entry - you'll have to do that yourself if you want to.
 876 *
 877 * The returned buffer_head has ->b_count elevated.  The caller is expected
 878 * to brelse() it when appropriate.
 879 */
 880static struct buffer_head * ext4_find_entry (struct inode *dir,
 881                                        const struct qstr *d_name,
 882                                        struct ext4_dir_entry_2 ** res_dir)
 883{
 884        struct super_block *sb;
 885        struct buffer_head *bh_use[NAMEI_RA_SIZE];
 886        struct buffer_head *bh, *ret = NULL;
 887        ext4_lblk_t start, block, b;
 888        int ra_max = 0;         /* Number of bh's in the readahead
 889                                   buffer, bh_use[] */
 890        int ra_ptr = 0;         /* Current index into readahead
 891                                   buffer */
 892        int num = 0;
 893        ext4_lblk_t  nblocks;
 894        int i, err;
 895        int namelen;
 896
 897        *res_dir = NULL;
 898        sb = dir->i_sb;
 899        namelen = d_name->len;
 900        if (namelen > EXT4_NAME_LEN)
 901                return NULL;
 902        if (is_dx(dir)) {
 903                bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
 904                /*
 905                 * On success, or if the error was file not found,
 906                 * return.  Otherwise, fall back to doing a search the
 907                 * old fashioned way.
 908                 */
 909                if (bh || (err != ERR_BAD_DX_DIR))
 910                        return bh;
 911                dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
 912                               "falling back\n"));
 913        }
 914        nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
 915        start = EXT4_I(dir)->i_dir_start_lookup;
 916        if (start >= nblocks)
 917                start = 0;
 918        block = start;
 919restart:
 920        do {
 921                /*
 922                 * We deal with the read-ahead logic here.
 923                 */
 924                if (ra_ptr >= ra_max) {
 925                        /* Refill the readahead buffer */
 926                        ra_ptr = 0;
 927                        b = block;
 928                        for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
 929                                /*
 930                                 * Terminate if we reach the end of the
 931                                 * directory and must wrap, or if our
 932                                 * search has finished at this block.
 933                                 */
 934                                if (b >= nblocks || (num && block == start)) {
 935                                        bh_use[ra_max] = NULL;
 936                                        break;
 937                                }
 938                                num++;
 939                                bh = ext4_getblk(NULL, dir, b++, 0, &err);
 940                                bh_use[ra_max] = bh;
 941                                if (bh)
 942                                        ll_rw_block(READ_META, 1, &bh);
 943                        }
 944                }
 945                if ((bh = bh_use[ra_ptr++]) == NULL)
 946                        goto next;
 947                wait_on_buffer(bh);
 948                if (!buffer_uptodate(bh)) {
 949                        /* read error, skip block & hope for the best */
 950                        ext4_error(sb, __func__, "reading directory #%lu "
 951                                   "offset %lu", dir->i_ino,
 952                                   (unsigned long)block);
 953                        brelse(bh);
 954                        goto next;
 955                }
 956                i = search_dirblock(bh, dir, d_name,
 957                            block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
 958                if (i == 1) {
 959                        EXT4_I(dir)->i_dir_start_lookup = block;
 960                        ret = bh;
 961                        goto cleanup_and_exit;
 962                } else {
 963                        brelse(bh);
 964                        if (i < 0)
 965                                goto cleanup_and_exit;
 966                }
 967        next:
 968                if (++block >= nblocks)
 969                        block = 0;
 970        } while (block != start);
 971
 972        /*
 973         * If the directory has grown while we were searching, then
 974         * search the last part of the directory before giving up.
 975         */
 976        block = nblocks;
 977        nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
 978        if (block < nblocks) {
 979                start = 0;
 980                goto restart;
 981        }
 982
 983cleanup_and_exit:
 984        /* Clean up the read-ahead blocks */
 985        for (; ra_ptr < ra_max; ra_ptr++)
 986                brelse(bh_use[ra_ptr]);
 987        return ret;
 988}
 989
 990static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
 991                       struct ext4_dir_entry_2 **res_dir, int *err)
 992{
 993        struct super_block * sb;
 994        struct dx_hash_info     hinfo;
 995        u32 hash;
 996        struct dx_frame frames[2], *frame;
 997        struct ext4_dir_entry_2 *de, *top;
 998        struct buffer_head *bh;
 999        ext4_lblk_t block;
1000        int retval;
1001        int namelen = d_name->len;
1002        const u8 *name = d_name->name;
1003
1004        sb = dir->i_sb;
1005        /* NFS may look up ".." - look at dx_root directory block */
1006        if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
1007                if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
1008                        return NULL;
1009        } else {
1010                frame = frames;
1011                frame->bh = NULL;                       /* for dx_release() */
1012                frame->at = (struct dx_entry *)frames;  /* hack for zero entry*/
1013                dx_set_block(frame->at, 0);             /* dx_root block is 0 */
1014        }
1015        hash = hinfo.hash;
1016        do {
1017                block = dx_get_block(frame->at);
1018                if (!(bh = ext4_bread (NULL,dir, block, 0, err)))
1019                        goto errout;
1020                de = (struct ext4_dir_entry_2 *) bh->b_data;
1021                top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
1022                                       EXT4_DIR_REC_LEN(0));
1023                for (; de < top; de = ext4_next_entry(de, sb->s_blocksize)) {
1024                        int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
1025                                  + ((char *) de - bh->b_data);
1026
1027                        if (!ext4_check_dir_entry(__func__, dir, de, bh, off)) {
1028                                brelse(bh);
1029                                *err = ERR_BAD_DX_DIR;
1030                                goto errout;
1031                        }
1032
1033                        if (ext4_match(namelen, name, de)) {
1034                                *res_dir = de;
1035                                dx_release(frames);
1036                                return bh;
1037                        }
1038                }
1039                brelse(bh);
1040                /* Check to see if we should continue to search */
1041                retval = ext4_htree_next_block(dir, hash, frame,
1042                                               frames, NULL);
1043                if (retval < 0) {
1044                        ext4_warning(sb, __func__,
1045                             "error reading index page in directory #%lu",
1046                             dir->i_ino);
1047                        *err = retval;
1048                        goto errout;
1049                }
1050        } while (retval == 1);
1051
1052        *err = -ENOENT;
1053errout:
1054        dxtrace(printk(KERN_DEBUG "%s not found\n", name));
1055        dx_release (frames);
1056        return NULL;
1057}
1058
1059static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1060{
1061        struct inode *inode;
1062        struct ext4_dir_entry_2 *de;
1063        struct buffer_head *bh;
1064
1065        if (dentry->d_name.len > EXT4_NAME_LEN)
1066                return ERR_PTR(-ENAMETOOLONG);
1067
1068        bh = ext4_find_entry(dir, &dentry->d_name, &de);
1069        inode = NULL;
1070        if (bh) {
1071                __u32 ino = le32_to_cpu(de->inode);
1072                brelse(bh);
1073                if (!ext4_valid_inum(dir->i_sb, ino)) {
1074                        ext4_error(dir->i_sb, "ext4_lookup",
1075                                   "bad inode number: %u", ino);
1076                        return ERR_PTR(-EIO);
1077                }
1078                inode = ext4_iget(dir->i_sb, ino);
1079                if (unlikely(IS_ERR(inode))) {
1080                        if (PTR_ERR(inode) == -ESTALE) {
1081                                ext4_error(dir->i_sb, __func__,
1082                                                "deleted inode referenced: %u",
1083                                                ino);
1084                                return ERR_PTR(-EIO);
1085                        } else {
1086                                return ERR_CAST(inode);
1087                        }
1088                }
1089        }
1090        return d_splice_alias(inode, dentry);
1091}
1092
1093
1094struct dentry *ext4_get_parent(struct dentry *child)
1095{
1096        __u32 ino;
1097        struct inode *inode;
1098        static const struct qstr dotdot = {
1099                .name = "..",
1100                .len = 2,
1101        };
1102        struct ext4_dir_entry_2 * de;
1103        struct buffer_head *bh;
1104
1105        bh = ext4_find_entry(child->d_inode, &dotdot, &de);
1106        inode = NULL;
1107        if (!bh)
1108                return ERR_PTR(-ENOENT);
1109        ino = le32_to_cpu(de->inode);
1110        brelse(bh);
1111
1112        if (!ext4_valid_inum(child->d_inode->i_sb, ino)) {
1113                ext4_error(child->d_inode->i_sb, "ext4_get_parent",
1114                           "bad inode number: %u", ino);
1115                return ERR_PTR(-EIO);
1116        }
1117
1118        return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino));
1119}
1120
1121#define S_SHIFT 12
1122static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = {
1123        [S_IFREG >> S_SHIFT]    = EXT4_FT_REG_FILE,
1124        [S_IFDIR >> S_SHIFT]    = EXT4_FT_DIR,
1125        [S_IFCHR >> S_SHIFT]    = EXT4_FT_CHRDEV,
1126        [S_IFBLK >> S_SHIFT]    = EXT4_FT_BLKDEV,
1127        [S_IFIFO >> S_SHIFT]    = EXT4_FT_FIFO,
1128        [S_IFSOCK >> S_SHIFT]   = EXT4_FT_SOCK,
1129        [S_IFLNK >> S_SHIFT]    = EXT4_FT_SYMLINK,
1130};
1131
1132static inline void ext4_set_de_type(struct super_block *sb,
1133                                struct ext4_dir_entry_2 *de,
1134                                umode_t mode) {
1135        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE))
1136                de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
1137}
1138
1139/*
1140 * Move count entries from end of map between two memory locations.
1141 * Returns pointer to last entry moved.
1142 */
1143static struct ext4_dir_entry_2 *
1144dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
1145                unsigned blocksize)
1146{
1147        unsigned rec_len = 0;
1148
1149        while (count--) {
1150                struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) 
1151                                                (from + (map->offs<<2));
1152                rec_len = EXT4_DIR_REC_LEN(de->name_len);
1153                memcpy (to, de, rec_len);
1154                ((struct ext4_dir_entry_2 *) to)->rec_len =
1155                                ext4_rec_len_to_disk(rec_len, blocksize);
1156                de->inode = 0;
1157                map++;
1158                to += rec_len;
1159        }
1160        return (struct ext4_dir_entry_2 *) (to - rec_len);
1161}
1162
1163/*
1164 * Compact each dir entry in the range to the minimal rec_len.
1165 * Returns pointer to last entry in range.
1166 */
1167static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize)
1168{
1169        struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
1170        unsigned rec_len = 0;
1171
1172        prev = to = de;
1173        while ((char*)de < base + blocksize) {
1174                next = ext4_next_entry(de, blocksize);
1175                if (de->inode && de->name_len) {
1176                        rec_len = EXT4_DIR_REC_LEN(de->name_len);
1177                        if (de > to)
1178                                memmove(to, de, rec_len);
1179                        to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
1180                        prev = to;
1181                        to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len);
1182                }
1183                de = next;
1184        }
1185        return prev;
1186}
1187
1188/*
1189 * Split a full leaf block to make room for a new dir entry.
1190 * Allocate a new block, and move entries so that they are approx. equally full.
1191 * Returns pointer to de in block into which the new entry will be inserted.
1192 */
1193static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1194                        struct buffer_head **bh,struct dx_frame *frame,
1195                        struct dx_hash_info *hinfo, int *error)
1196{
1197        unsigned blocksize = dir->i_sb->s_blocksize;
1198        unsigned count, continued;
1199        struct buffer_head *bh2;
1200        ext4_lblk_t newblock;
1201        u32 hash2;
1202        struct dx_map_entry *map;
1203        char *data1 = (*bh)->b_data, *data2;
1204        unsigned split, move, size;
1205        struct ext4_dir_entry_2 *de = NULL, *de2;
1206        int     err = 0, i;
1207
1208        bh2 = ext4_append (handle, dir, &newblock, &err);
1209        if (!(bh2)) {
1210                brelse(*bh);
1211                *bh = NULL;
1212                goto errout;
1213        }
1214
1215        BUFFER_TRACE(*bh, "get_write_access");
1216        err = ext4_journal_get_write_access(handle, *bh);
1217        if (err)
1218                goto journal_error;
1219
1220        BUFFER_TRACE(frame->bh, "get_write_access");
1221        err = ext4_journal_get_write_access(handle, frame->bh);
1222        if (err)
1223                goto journal_error;
1224
1225        data2 = bh2->b_data;
1226
1227        /* create map in the end of data2 block */
1228        map = (struct dx_map_entry *) (data2 + blocksize);
1229        count = dx_make_map((struct ext4_dir_entry_2 *) data1,
1230                             blocksize, hinfo, map);
1231        map -= count;
1232        dx_sort_map(map, count);
1233        /* Split the existing block in the middle, size-wise */
1234        size = 0;
1235        move = 0;
1236        for (i = count-1; i >= 0; i--) {
1237                /* is more than half of this entry in 2nd half of the block? */
1238                if (size + map[i].size/2 > blocksize/2)
1239                        break;
1240                size += map[i].size;
1241                move++;
1242        }
1243        /* map index at which we will split */
1244        split = count - move;
1245        hash2 = map[split].hash;
1246        continued = hash2 == map[split - 1].hash;
1247        dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n",
1248                        (unsigned long)dx_get_block(frame->at),
1249                                        hash2, split, count-split));
1250
1251        /* Fancy dance to stay within two buffers */
1252        de2 = dx_move_dirents(data1, data2, map + split, count - split, blocksize);
1253        de = dx_pack_dirents(data1, blocksize);
1254        de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de,
1255                                           blocksize);
1256        de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2,
1257                                            blocksize);
1258        dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
1259        dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));
1260
1261        /* Which block gets the new entry? */
1262        if (hinfo->hash >= hash2)
1263        {
1264                swap(*bh, bh2);
1265                de = de2;
1266        }
1267        dx_insert_block(frame, hash2 + continued, newblock);
1268        err = ext4_handle_dirty_metadata(handle, dir, bh2);
1269        if (err)
1270                goto journal_error;
1271        err = ext4_handle_dirty_metadata(handle, dir, frame->bh);
1272        if (err)
1273                goto journal_error;
1274        brelse(bh2);
1275        dxtrace(dx_show_index("frame", frame->entries));
1276        return de;
1277
1278journal_error:
1279        brelse(*bh);
1280        brelse(bh2);
1281        *bh = NULL;
1282        ext4_std_error(dir->i_sb, err);
1283errout:
1284        *error = err;
1285        return NULL;
1286}
1287
1288/*
1289 * Add a new entry into a directory (leaf) block.  If de is non-NULL,
1290 * it points to a directory entry which is guaranteed to be large
1291 * enough for new directory entry.  If de is NULL, then
1292 * add_dirent_to_buf will attempt search the directory block for
1293 * space.  It will return -ENOSPC if no space is available, and -EIO
1294 * and -EEXIST if directory entry already exists.
1295 *
1296 * NOTE!  bh is NOT released in the case where ENOSPC is returned.  In
1297 * all other cases bh is released.
1298 */
1299static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1300                             struct inode *inode, struct ext4_dir_entry_2 *de,
1301                             struct buffer_head *bh)
1302{
1303        struct inode    *dir = dentry->d_parent->d_inode;
1304        const char      *name = dentry->d_name.name;
1305        int             namelen = dentry->d_name.len;
1306        unsigned int    offset = 0;
1307        unsigned int    blocksize = dir->i_sb->s_blocksize;
1308        unsigned short  reclen;
1309        int             nlen, rlen, err;
1310        char            *top;
1311
1312        reclen = EXT4_DIR_REC_LEN(namelen);
1313        if (!de) {
1314                de = (struct ext4_dir_entry_2 *)bh->b_data;
1315                top = bh->b_data + blocksize - reclen;
1316                while ((char *) de <= top) {
1317                        if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
1318                                                  bh, offset)) {
1319                                brelse(bh);
1320                                return -EIO;
1321                        }
1322                        if (ext4_match(namelen, name, de)) {
1323                                brelse(bh);
1324                                return -EEXIST;
1325                        }
1326                        nlen = EXT4_DIR_REC_LEN(de->name_len);
1327                        rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
1328                        if ((de->inode? rlen - nlen: rlen) >= reclen)
1329                                break;
1330                        de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
1331                        offset += rlen;
1332                }
1333                if ((char *) de > top)
1334                        return -ENOSPC;
1335        }
1336        BUFFER_TRACE(bh, "get_write_access");
1337        err = ext4_journal_get_write_access(handle, bh);
1338        if (err) {
1339                ext4_std_error(dir->i_sb, err);
1340                brelse(bh);
1341                return err;
1342        }
1343
1344        /* By now the buffer is marked for journaling */
1345        nlen = EXT4_DIR_REC_LEN(de->name_len);
1346        rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
1347        if (de->inode) {
1348                struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen);
1349                de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, blocksize);
1350                de->rec_len = ext4_rec_len_to_disk(nlen, blocksize);
1351                de = de1;
1352        }
1353        de->file_type = EXT4_FT_UNKNOWN;
1354        if (inode) {
1355                de->inode = cpu_to_le32(inode->i_ino);
1356                ext4_set_de_type(dir->i_sb, de, inode->i_mode);
1357        } else
1358                de->inode = 0;
1359        de->name_len = namelen;
1360        memcpy(de->name, name, namelen);
1361        /*
1362         * XXX shouldn't update any times until successful
1363         * completion of syscall, but too many callers depend
1364         * on this.
1365         *
1366         * XXX similarly, too many callers depend on
1367         * ext4_new_inode() setting the times, but error
1368         * recovery deletes the inode, so the worst that can
1369         * happen is that the times are slightly out of date
1370         * and/or different from the directory change time.
1371         */
1372        dir->i_mtime = dir->i_ctime = ext4_current_time(dir);
1373        ext4_update_dx_flag(dir);
1374        dir->i_version++;
1375        ext4_mark_inode_dirty(handle, dir);
1376        BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
1377        err = ext4_handle_dirty_metadata(handle, dir, bh);
1378        if (err)
1379                ext4_std_error(dir->i_sb, err);
1380        brelse(bh);
1381        return 0;
1382}
1383
1384/*
1385 * This converts a one block unindexed directory to a 3 block indexed
1386 * directory, and adds the dentry to the indexed directory.
1387 */
1388static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1389                            struct inode *inode, struct buffer_head *bh)
1390{
1391        struct inode    *dir = dentry->d_parent->d_inode;
1392        const char      *name = dentry->d_name.name;
1393        int             namelen = dentry->d_name.len;
1394        struct buffer_head *bh2;
1395        struct dx_root  *root;
1396        struct dx_frame frames[2], *frame;
1397        struct dx_entry *entries;
1398        struct ext4_dir_entry_2 *de, *de2;
1399        char            *data1, *top;
1400        unsigned        len;
1401        int             retval;
1402        unsigned        blocksize;
1403        struct dx_hash_info hinfo;
1404        ext4_lblk_t  block;
1405        struct fake_dirent *fde;
1406
1407        blocksize =  dir->i_sb->s_blocksize;
1408        dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino));
1409        retval = ext4_journal_get_write_access(handle, bh);
1410        if (retval) {
1411                ext4_std_error(dir->i_sb, retval);
1412                brelse(bh);
1413                return retval;
1414        }
1415        root = (struct dx_root *) bh->b_data;
1416
1417        /* The 0th block becomes the root, move the dirents out */
1418        fde = &root->dotdot;
1419        de = (struct ext4_dir_entry_2 *)((char *)fde +
1420                ext4_rec_len_from_disk(fde->rec_len, blocksize));
1421        if ((char *) de >= (((char *) root) + blocksize)) {
1422                ext4_error(dir->i_sb, __func__,
1423                           "invalid rec_len for '..' in inode %lu",
1424                           dir->i_ino);
1425                brelse(bh);
1426                return -EIO;
1427        }
1428        len = ((char *) root) + blocksize - (char *) de;
1429
1430        /* Allocate new block for the 0th block's dirents */
1431        bh2 = ext4_append(handle, dir, &block, &retval);
1432        if (!(bh2)) {
1433                brelse(bh);
1434                return retval;
1435        }
1436        EXT4_I(dir)->i_flags |= EXT4_INDEX_FL;
1437        data1 = bh2->b_data;
1438
1439        memcpy (data1, de, len);
1440        de = (struct ext4_dir_entry_2 *) data1;
1441        top = data1 + len;
1442        while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
1443                de = de2;
1444        de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de,
1445                                           blocksize);
1446        /* Initialize the root; the dot dirents already exist */
1447        de = (struct ext4_dir_entry_2 *) (&root->dotdot);
1448        de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2),
1449                                           blocksize);
1450        memset (&root->info, 0, sizeof(root->info));
1451        root->info.info_length = sizeof(root->info);
1452        root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
1453        entries = root->entries;
1454        dx_set_block(entries, 1);
1455        dx_set_count(entries, 1);
1456        dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
1457
1458        /* Initialize as for dx_probe */
1459        hinfo.hash_version = root->info.hash_version;
1460        if (hinfo.hash_version <= DX_HASH_TEA)
1461                hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
1462        hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
1463        ext4fs_dirhash(name, namelen, &hinfo);
1464        frame = frames;
1465        frame->entries = entries;
1466        frame->at = entries;
1467        frame->bh = bh;
1468        bh = bh2;
1469        de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
1470        dx_release (frames);
1471        if (!(de))
1472                return retval;
1473
1474        return add_dirent_to_buf(handle, dentry, inode, de, bh);
1475}
1476
1477/*
1478 *      ext4_add_entry()
1479 *
1480 * adds a file entry to the specified directory, using the same
1481 * semantics as ext4_find_entry(). It returns NULL if it failed.
1482 *
1483 * NOTE!! The inode part of 'de' is left at 0 - which means you
1484 * may not sleep between calling this and putting something into
1485 * the entry, as someone else might have used it while you slept.
1486 */
1487static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1488                          struct inode *inode)
1489{
1490        struct inode *dir = dentry->d_parent->d_inode;
1491        struct buffer_head *bh;
1492        struct ext4_dir_entry_2 *de;
1493        struct super_block *sb;
1494        int     retval;
1495        int     dx_fallback=0;
1496        unsigned blocksize;
1497        ext4_lblk_t block, blocks;
1498
1499        sb = dir->i_sb;
1500        blocksize = sb->s_blocksize;
1501        if (!dentry->d_name.len)
1502                return -EINVAL;
1503        if (is_dx(dir)) {
1504                retval = ext4_dx_add_entry(handle, dentry, inode);
1505                if (!retval || (retval != ERR_BAD_DX_DIR))
1506                        return retval;
1507                EXT4_I(dir)->i_flags &= ~EXT4_INDEX_FL;
1508                dx_fallback++;
1509                ext4_mark_inode_dirty(handle, dir);
1510        }
1511        blocks = dir->i_size >> sb->s_blocksize_bits;
1512        for (block = 0; block < blocks; block++) {
1513                bh = ext4_bread(handle, dir, block, 0, &retval);
1514                if(!bh)
1515                        return retval;
1516                retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1517                if (retval != -ENOSPC)
1518                        return retval;
1519
1520                if (blocks == 1 && !dx_fallback &&
1521                    EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
1522                        return make_indexed_dir(handle, dentry, inode, bh);
1523                brelse(bh);
1524        }
1525        bh = ext4_append(handle, dir, &block, &retval);
1526        if (!bh)
1527                return retval;
1528        de = (struct ext4_dir_entry_2 *) bh->b_data;
1529        de->inode = 0;
1530        de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
1531        return add_dirent_to_buf(handle, dentry, inode, de, bh);
1532}
1533
1534/*
1535 * Returns 0 for success, or a negative error value
1536 */
1537static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1538                             struct inode *inode)
1539{
1540        struct dx_frame frames[2], *frame;
1541        struct dx_entry *entries, *at;
1542        struct dx_hash_info hinfo;
1543        struct buffer_head *bh;
1544        struct inode *dir = dentry->d_parent->d_inode;
1545        struct super_block *sb = dir->i_sb;
1546        struct ext4_dir_entry_2 *de;
1547        int err;
1548
1549        frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
1550        if (!frame)
1551                return err;
1552        entries = frame->entries;
1553        at = frame->at;
1554
1555        if (!(bh = ext4_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
1556                goto cleanup;
1557
1558        BUFFER_TRACE(bh, "get_write_access");
1559        err = ext4_journal_get_write_access(handle, bh);
1560        if (err)
1561                goto journal_error;
1562
1563        err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1564        if (err != -ENOSPC) {
1565                bh = NULL;
1566                goto cleanup;
1567        }
1568
1569        /* Block full, should compress but for now just split */
1570        dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
1571                       dx_get_count(entries), dx_get_limit(entries)));
1572        /* Need to split index? */
1573        if (dx_get_count(entries) == dx_get_limit(entries)) {
1574                ext4_lblk_t newblock;
1575                unsigned icount = dx_get_count(entries);
1576                int levels = frame - frames;
1577                struct dx_entry *entries2;
1578                struct dx_node *node2;
1579                struct buffer_head *bh2;
1580
1581                if (levels && (dx_get_count(frames->entries) ==
1582                               dx_get_limit(frames->entries))) {
1583                        ext4_warning(sb, __func__,
1584                                     "Directory index full!");
1585                        err = -ENOSPC;
1586                        goto cleanup;
1587                }
1588                bh2 = ext4_append (handle, dir, &newblock, &err);
1589                if (!(bh2))
1590                        goto cleanup;
1591                node2 = (struct dx_node *)(bh2->b_data);
1592                entries2 = node2->entries;
1593                memset(&node2->fake, 0, sizeof(struct fake_dirent));
1594                node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize,
1595                                                           sb->s_blocksize);
1596                BUFFER_TRACE(frame->bh, "get_write_access");
1597                err = ext4_journal_get_write_access(handle, frame->bh);
1598                if (err)
1599                        goto journal_error;
1600                if (levels) {
1601                        unsigned icount1 = icount/2, icount2 = icount - icount1;
1602                        unsigned hash2 = dx_get_hash(entries + icount1);
1603                        dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
1604                                       icount1, icount2));
1605
1606                        BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
1607                        err = ext4_journal_get_write_access(handle,
1608                                                             frames[0].bh);
1609                        if (err)
1610                                goto journal_error;
1611
1612                        memcpy((char *) entries2, (char *) (entries + icount1),
1613                               icount2 * sizeof(struct dx_entry));
1614                        dx_set_count(entries, icount1);
1615                        dx_set_count(entries2, icount2);
1616                        dx_set_limit(entries2, dx_node_limit(dir));
1617
1618                        /* Which index block gets the new entry? */
1619                        if (at - entries >= icount1) {
1620                                frame->at = at = at - entries - icount1 + entries2;
1621                                frame->entries = entries = entries2;
1622                                swap(frame->bh, bh2);
1623                        }
1624                        dx_insert_block(frames + 0, hash2, newblock);
1625                        dxtrace(dx_show_index("node", frames[1].entries));
1626                        dxtrace(dx_show_index("node",
1627                               ((struct dx_node *) bh2->b_data)->entries));
1628                        err = ext4_handle_dirty_metadata(handle, inode, bh2);
1629                        if (err)
1630                                goto journal_error;
1631                        brelse (bh2);
1632                } else {
1633                        dxtrace(printk(KERN_DEBUG
1634                                       "Creating second level index...\n"));
1635                        memcpy((char *) entries2, (char *) entries,
1636                               icount * sizeof(struct dx_entry));
1637                        dx_set_limit(entries2, dx_node_limit(dir));
1638
1639                        /* Set up root */
1640                        dx_set_count(entries, 1);
1641                        dx_set_block(entries + 0, newblock);
1642                        ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
1643
1644                        /* Add new access path frame */
1645                        frame = frames + 1;
1646                        frame->at = at = at - entries + entries2;
1647                        frame->entries = entries = entries2;
1648                        frame->bh = bh2;
1649                        err = ext4_journal_get_write_access(handle,
1650                                                             frame->bh);
1651                        if (err)
1652                                goto journal_error;
1653                }
1654                ext4_handle_dirty_metadata(handle, inode, frames[0].bh);
1655        }
1656        de = do_split(handle, dir, &bh, frame, &hinfo, &err);
1657        if (!de)
1658                goto cleanup;
1659        err = add_dirent_to_buf(handle, dentry, inode, de, bh);
1660        bh = NULL;
1661        goto cleanup;
1662
1663journal_error:
1664        ext4_std_error(dir->i_sb, err);
1665cleanup:
1666        if (bh)
1667                brelse(bh);
1668        dx_release(frames);
1669        return err;
1670}
1671
1672/*
1673 * ext4_delete_entry deletes a directory entry by merging it with the
1674 * previous entry
1675 */
1676static int ext4_delete_entry(handle_t *handle,
1677                             struct inode *dir,
1678                             struct ext4_dir_entry_2 *de_del,
1679                             struct buffer_head *bh)
1680{
1681        struct ext4_dir_entry_2 *de, *pde;
1682        unsigned int blocksize = dir->i_sb->s_blocksize;
1683        int i;
1684
1685        i = 0;
1686        pde = NULL;
1687        de = (struct ext4_dir_entry_2 *) bh->b_data;
1688        while (i < bh->b_size) {
1689                if (!ext4_check_dir_entry("ext4_delete_entry", dir, de, bh, i))
1690                        return -EIO;
1691                if (de == de_del)  {
1692                        BUFFER_TRACE(bh, "get_write_access");
1693                        ext4_journal_get_write_access(handle, bh);
1694                        if (pde)
1695                                pde->rec_len = ext4_rec_len_to_disk(
1696                                        ext4_rec_len_from_disk(pde->rec_len,
1697                                                               blocksize) +
1698                                        ext4_rec_len_from_disk(de->rec_len,
1699                                                               blocksize),
1700                                        blocksize);
1701                        else
1702                                de->inode = 0;
1703                        dir->i_version++;
1704                        BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
1705                        ext4_handle_dirty_metadata(handle, dir, bh);
1706                        return 0;
1707                }
1708                i += ext4_rec_len_from_disk(de->rec_len, blocksize);
1709                pde = de;
1710                de = ext4_next_entry(de, blocksize);
1711        }
1712        return -ENOENT;
1713}
1714
1715/*
1716 * DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2,
1717 * since this indicates that nlinks count was previously 1.
1718 */
1719static void ext4_inc_count(handle_t *handle, struct inode *inode)
1720{
1721        inc_nlink(inode);
1722        if (is_dx(inode) && inode->i_nlink > 1) {
1723                /* limit is 16-bit i_links_count */
1724                if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) {
1725                        inode->i_nlink = 1;
1726                        EXT4_SET_RO_COMPAT_FEATURE(inode->i_sb,
1727                                              EXT4_FEATURE_RO_COMPAT_DIR_NLINK);
1728                }
1729        }
1730}
1731
1732/*
1733 * If a directory had nlink == 1, then we should let it be 1. This indicates
1734 * directory has >EXT4_LINK_MAX subdirs.
1735 */
1736static void ext4_dec_count(handle_t *handle, struct inode *inode)
1737{
1738        drop_nlink(inode);
1739        if (S_ISDIR(inode->i_mode) && inode->i_nlink == 0)
1740                inc_nlink(inode);
1741}
1742
1743
1744static int ext4_add_nondir(handle_t *handle,
1745                struct dentry *dentry, struct inode *inode)
1746{
1747        int err = ext4_add_entry(handle, dentry, inode);
1748        if (!err) {
1749                ext4_mark_inode_dirty(handle, inode);
1750                d_instantiate(dentry, inode);
1751                unlock_new_inode(inode);
1752                return 0;
1753        }
1754        drop_nlink(inode);
1755        unlock_new_inode(inode);
1756        iput(inode);
1757        return err;
1758}
1759
1760/*
1761 * By the time this is called, we already have created
1762 * the directory cache entry for the new file, but it
1763 * is so far negative - it has no inode.
1764 *
1765 * If the create succeeds, we fill in the inode information
1766 * with d_instantiate().
1767 */
1768static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
1769                       struct nameidata *nd)
1770{
1771        handle_t *handle;
1772        struct inode *inode;
1773        int err, retries = 0;
1774
1775retry:
1776        handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
1777                                        EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1778                                        2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
1779        if (IS_ERR(handle))
1780                return PTR_ERR(handle);
1781
1782        if (IS_DIRSYNC(dir))
1783                ext4_handle_sync(handle);
1784
1785        inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
1786        err = PTR_ERR(inode);
1787        if (!IS_ERR(inode)) {
1788                inode->i_op = &ext4_file_inode_operations;
1789                inode->i_fop = &ext4_file_operations;
1790                ext4_set_aops(inode);
1791                err = ext4_add_nondir(handle, dentry, inode);
1792        }
1793        ext4_journal_stop(handle);
1794        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
1795                goto retry;
1796        return err;
1797}
1798
1799static int ext4_mknod(struct inode *dir, struct dentry *dentry,
1800                      int mode, dev_t rdev)
1801{
1802        handle_t *handle;
1803        struct inode *inode;
1804        int err, retries = 0;
1805
1806        if (!new_valid_dev(rdev))
1807                return -EINVAL;
1808
1809retry:
1810        handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
1811                                        EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1812                                        2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
1813        if (IS_ERR(handle))
1814                return PTR_ERR(handle);
1815
1816        if (IS_DIRSYNC(dir))
1817                ext4_handle_sync(handle);
1818
1819        inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
1820        err = PTR_ERR(inode);
1821        if (!IS_ERR(inode)) {
1822                init_special_inode(inode, inode->i_mode, rdev);
1823#ifdef CONFIG_EXT4_FS_XATTR
1824                inode->i_op = &ext4_special_inode_operations;
1825#endif
1826                err = ext4_add_nondir(handle, dentry, inode);
1827        }
1828        ext4_journal_stop(handle);
1829        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
1830                goto retry;
1831        return err;
1832}
1833
1834static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1835{
1836        handle_t *handle;
1837        struct inode *inode;
1838        struct buffer_head *dir_block;
1839        struct ext4_dir_entry_2 *de;
1840        unsigned int blocksize = dir->i_sb->s_blocksize;
1841        int err, retries = 0;
1842
1843        if (EXT4_DIR_LINK_MAX(dir))
1844                return -EMLINK;
1845
1846retry:
1847        handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
1848                                        EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1849                                        2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
1850        if (IS_ERR(handle))
1851                return PTR_ERR(handle);
1852
1853        if (IS_DIRSYNC(dir))
1854                ext4_handle_sync(handle);
1855
1856        inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
1857                               &dentry->d_name, 0);
1858        err = PTR_ERR(inode);
1859        if (IS_ERR(inode))
1860                goto out_stop;
1861
1862        inode->i_op = &ext4_dir_inode_operations;
1863        inode->i_fop = &ext4_dir_operations;
1864        inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
1865        dir_block = ext4_bread(handle, inode, 0, 1, &err);
1866        if (!dir_block)
1867                goto out_clear_inode;
1868        BUFFER_TRACE(dir_block, "get_write_access");
1869        ext4_journal_get_write_access(handle, dir_block);
1870        de = (struct ext4_dir_entry_2 *) dir_block->b_data;
1871        de->inode = cpu_to_le32(inode->i_ino);
1872        de->name_len = 1;
1873        de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
1874                                           blocksize);
1875        strcpy(de->name, ".");
1876        ext4_set_de_type(dir->i_sb, de, S_IFDIR);
1877        de = ext4_next_entry(de, blocksize);
1878        de->inode = cpu_to_le32(dir->i_ino);
1879        de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(1),
1880                                           blocksize);
1881        de->name_len = 2;
1882        strcpy(de->name, "..");
1883        ext4_set_de_type(dir->i_sb, de, S_IFDIR);
1884        inode->i_nlink = 2;
1885        BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
1886        ext4_handle_dirty_metadata(handle, dir, dir_block);
1887        brelse(dir_block);
1888        ext4_mark_inode_dirty(handle, inode);
1889        err = ext4_add_entry(handle, dentry, inode);
1890        if (err) {
1891out_clear_inode:
1892                clear_nlink(inode);
1893                unlock_new_inode(inode);
1894                ext4_mark_inode_dirty(handle, inode);
1895                iput(inode);
1896                goto out_stop;
1897        }
1898        ext4_inc_count(handle, dir);
1899        ext4_update_dx_flag(dir);
1900        ext4_mark_inode_dirty(handle, dir);
1901        d_instantiate(dentry, inode);
1902        unlock_new_inode(inode);
1903out_stop:
1904        ext4_journal_stop(handle);
1905        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
1906                goto retry;
1907        return err;
1908}
1909
1910/*
1911 * routine to check that the specified directory is empty (for rmdir)
1912 */
1913static int empty_dir(struct inode *inode)
1914{
1915        unsigned int offset;
1916        struct buffer_head *bh;
1917        struct ext4_dir_entry_2 *de, *de1;
1918        struct super_block *sb;
1919        int err = 0;
1920
1921        sb = inode->i_sb;
1922        if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
1923            !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
1924                if (err)
1925                        ext4_error(inode->i_sb, __func__,
1926                                   "error %d reading directory #%lu offset 0",
1927                                   err, inode->i_ino);
1928                else
1929                        ext4_warning(inode->i_sb, __func__,
1930                                     "bad directory (dir #%lu) - no data block",
1931                                     inode->i_ino);
1932                return 1;
1933        }
1934        de = (struct ext4_dir_entry_2 *) bh->b_data;
1935        de1 = ext4_next_entry(de, sb->s_blocksize);
1936        if (le32_to_cpu(de->inode) != inode->i_ino ||
1937                        !le32_to_cpu(de1->inode) ||
1938                        strcmp(".", de->name) ||
1939                        strcmp("..", de1->name)) {
1940                ext4_warning(inode->i_sb, "empty_dir",
1941                             "bad directory (dir #%lu) - no `.' or `..'",
1942                             inode->i_ino);
1943                brelse(bh);
1944                return 1;
1945        }
1946        offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) +
1947                 ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize);
1948        de = ext4_next_entry(de1, sb->s_blocksize);
1949        while (offset < inode->i_size) {
1950                if (!bh ||
1951                        (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
1952                        err = 0;
1953                        brelse(bh);
1954                        bh = ext4_bread(NULL, inode,
1955                                offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
1956                        if (!bh) {
1957                                if (err)
1958                                        ext4_error(sb, __func__,
1959                                                   "error %d reading directory"
1960                                                   " #%lu offset %u",
1961                                                   err, inode->i_ino, offset);
1962                                offset += sb->s_blocksize;
1963                                continue;
1964                        }
1965                        de = (struct ext4_dir_entry_2 *) bh->b_data;
1966                }
1967                if (!ext4_check_dir_entry("empty_dir", inode, de, bh, offset)) {
1968                        de = (struct ext4_dir_entry_2 *)(bh->b_data +
1969                                                         sb->s_blocksize);
1970                        offset = (offset | (sb->s_blocksize - 1)) + 1;
1971                        continue;
1972                }
1973                if (le32_to_cpu(de->inode)) {
1974                        brelse(bh);
1975                        return 0;
1976                }
1977                offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
1978                de = ext4_next_entry(de, sb->s_blocksize);
1979        }
1980        brelse(bh);
1981        return 1;
1982}
1983
1984/* ext4_orphan_add() links an unlinked or truncated inode into a list of
1985 * such inodes, starting at the superblock, in case we crash before the
1986 * file is closed/deleted, or in case the inode truncate spans multiple
1987 * transactions and the last transaction is not recovered after a crash.
1988 *
1989 * At filesystem recovery time, we walk this list deleting unlinked
1990 * inodes and truncating linked inodes in ext4_orphan_cleanup().
1991 */
1992int ext4_orphan_add(handle_t *handle, struct inode *inode)
1993{
1994        struct super_block *sb = inode->i_sb;
1995        struct ext4_iloc iloc;
1996        int err = 0, rc;
1997
1998        if (!ext4_handle_valid(handle))
1999                return 0;
2000
2001        mutex_lock(&EXT4_SB(sb)->s_orphan_lock);
2002        if (!list_empty(&EXT4_I(inode)->i_orphan))
2003                goto out_unlock;
2004
2005        /* Orphan handling is only valid for files with data blocks
2006         * being truncated, or files being unlinked. */
2007
2008        /* @@@ FIXME: Observation from aviro:
2009         * I think I can trigger J_ASSERT in ext4_orphan_add().  We block
2010         * here (on s_orphan_lock), so race with ext4_link() which might bump
2011         * ->i_nlink. For, say it, character device. Not a regular file,
2012         * not a directory, not a symlink and ->i_nlink > 0.
2013         *
2014         * tytso, 4/25/2009: I'm not sure how that could happen;
2015         * shouldn't the fs core protect us from these sort of
2016         * unlink()/link() races?
2017         */
2018        J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
2019                  S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
2020
2021        BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
2022        err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
2023        if (err)
2024                goto out_unlock;
2025
2026        err = ext4_reserve_inode_write(handle, inode, &iloc);
2027        if (err)
2028                goto out_unlock;
2029
2030        /* Insert this inode at the head of the on-disk orphan list... */
2031        NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan);
2032        EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
2033        err = ext4_handle_dirty_metadata(handle, inode, EXT4_SB(sb)->s_sbh);
2034        rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
2035        if (!err)
2036                err = rc;
2037
2038        /* Only add to the head of the in-memory list if all the
2039         * previous operations succeeded.  If the orphan_add is going to
2040         * fail (possibly taking the journal offline), we can't risk
2041         * leaving the inode on the orphan list: stray orphan-list
2042         * entries can cause panics at unmount time.
2043         *
2044         * This is safe: on error we're going to ignore the orphan list
2045         * anyway on the next recovery. */
2046        if (!err)
2047                list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
2048
2049        jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
2050        jbd_debug(4, "orphan inode %lu will point to %d\n",
2051                        inode->i_ino, NEXT_ORPHAN(inode));
2052out_unlock:
2053        mutex_unlock(&EXT4_SB(sb)->s_orphan_lock);
2054        ext4_std_error(inode->i_sb, err);
2055        return err;
2056}
2057
2058/*
2059 * ext4_orphan_del() removes an unlinked or truncated inode from the list
2060 * of such inodes stored on disk, because it is finally being cleaned up.
2061 */
2062int ext4_orphan_del(handle_t *handle, struct inode *inode)
2063{
2064        struct list_head *prev;
2065        struct ext4_inode_info *ei = EXT4_I(inode);
2066        struct ext4_sb_info *sbi;
2067        __u32 ino_next;
2068        struct ext4_iloc iloc;
2069        int err = 0;
2070
2071        /* ext4_handle_valid() assumes a valid handle_t pointer */
2072        if (handle && !ext4_handle_valid(handle))
2073                return 0;
2074
2075        mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
2076        if (list_empty(&ei->i_orphan))
2077                goto out;
2078
2079        ino_next = NEXT_ORPHAN(inode);
2080        prev = ei->i_orphan.prev;
2081        sbi = EXT4_SB(inode->i_sb);
2082
2083        jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
2084
2085        list_del_init(&ei->i_orphan);
2086
2087        /* If we're on an error path, we may not have a valid
2088         * transaction handle with which to update the orphan list on
2089         * disk, but we still need to remove the inode from the linked
2090         * list in memory. */
2091        if (sbi->s_journal && !handle)
2092                goto out;
2093
2094        err = ext4_reserve_inode_write(handle, inode, &iloc);
2095        if (err)
2096                goto out_err;
2097
2098        if (prev == &sbi->s_orphan) {
2099                jbd_debug(4, "superblock will point to %u\n", ino_next);
2100                BUFFER_TRACE(sbi->s_sbh, "get_write_access");
2101                err = ext4_journal_get_write_access(handle, sbi->s_sbh);
2102                if (err)
2103                        goto out_brelse;
2104                sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
2105                err = ext4_handle_dirty_metadata(handle, inode, sbi->s_sbh);
2106        } else {
2107                struct ext4_iloc iloc2;
2108                struct inode *i_prev =
2109                        &list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode;
2110
2111                jbd_debug(4, "orphan inode %lu will point to %u\n",
2112                          i_prev->i_ino, ino_next);
2113                err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
2114                if (err)
2115                        goto out_brelse;
2116                NEXT_ORPHAN(i_prev) = ino_next;
2117                err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2);
2118        }
2119        if (err)
2120                goto out_brelse;
2121        NEXT_ORPHAN(inode) = 0;
2122        err = ext4_mark_iloc_dirty(handle, inode, &iloc);
2123
2124out_err:
2125        ext4_std_error(inode->i_sb, err);
2126out:
2127        mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
2128        return err;
2129
2130out_brelse:
2131        brelse(iloc.bh);
2132        goto out_err;
2133}
2134
2135static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2136{
2137        int retval;
2138        struct inode *inode;
2139        struct buffer_head *bh;
2140        struct ext4_dir_entry_2 *de;
2141        handle_t *handle;
2142
2143        /* Initialize quotas before so that eventual writes go in
2144         * separate transaction */
2145        vfs_dq_init(dentry->d_inode);
2146        handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
2147        if (IS_ERR(handle))
2148                return PTR_ERR(handle);
2149
2150        retval = -ENOENT;
2151        bh = ext4_find_entry(dir, &dentry->d_name, &de);
2152        if (!bh)
2153                goto end_rmdir;
2154
2155        if (IS_DIRSYNC(dir))
2156                ext4_handle_sync(handle);
2157
2158        inode = dentry->d_inode;
2159
2160        retval = -EIO;
2161        if (le32_to_cpu(de->inode) != inode->i_ino)
2162                goto end_rmdir;
2163
2164        retval = -ENOTEMPTY;
2165        if (!empty_dir(inode))
2166                goto end_rmdir;
2167
2168        retval = ext4_delete_entry(handle, dir, de, bh);
2169        if (retval)
2170                goto end_rmdir;
2171        if (!EXT4_DIR_LINK_EMPTY(inode))
2172                ext4_warning(inode->i_sb, "ext4_rmdir",
2173                             "empty directory has too many links (%d)",
2174                             inode->i_nlink);
2175        inode->i_version++;
2176        clear_nlink(inode);
2177        /* There's no need to set i_disksize: the fact that i_nlink is
2178         * zero will ensure that the right thing happens during any
2179         * recovery. */
2180        inode->i_size = 0;
2181        ext4_orphan_add(handle, inode);
2182        inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode);
2183        ext4_mark_inode_dirty(handle, inode);
2184        ext4_dec_count(handle, dir);
2185        ext4_update_dx_flag(dir);
2186        ext4_mark_inode_dirty(handle, dir);
2187
2188end_rmdir:
2189        ext4_journal_stop(handle);
2190        brelse(bh);
2191        return retval;
2192}
2193
2194static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2195{
2196        int retval;
2197        struct inode *inode;
2198        struct buffer_head *bh;
2199        struct ext4_dir_entry_2 *de;
2200        handle_t *handle;
2201
2202        /* Initialize quotas before so that eventual writes go
2203         * in separate transaction */
2204        vfs_dq_init(dentry->d_inode);
2205        handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
2206        if (IS_ERR(handle))
2207                return PTR_ERR(handle);
2208
2209        if (IS_DIRSYNC(dir))
2210                ext4_handle_sync(handle);
2211
2212        retval = -ENOENT;
2213        bh = ext4_find_entry(dir, &dentry->d_name, &de);
2214        if (!bh)
2215                goto end_unlink;
2216
2217        inode = dentry->d_inode;
2218
2219        retval = -EIO;
2220        if (le32_to_cpu(de->inode) != inode->i_ino)
2221                goto end_unlink;
2222
2223        if (!inode->i_nlink) {
2224                ext4_warning(inode->i_sb, "ext4_unlink",
2225                             "Deleting nonexistent file (%lu), %d",
2226                             inode->i_ino, inode->i_nlink);
2227                inode->i_nlink = 1;
2228        }
2229        retval = ext4_delete_entry(handle, dir, de, bh);
2230        if (retval)
2231                goto end_unlink;
2232        dir->i_ctime = dir->i_mtime = ext4_current_time(dir);
2233        ext4_update_dx_flag(dir);
2234        ext4_mark_inode_dirty(handle, dir);
2235        drop_nlink(inode);
2236        if (!inode->i_nlink)
2237                ext4_orphan_add(handle, inode);
2238        inode->i_ctime = ext4_current_time(inode);
2239        ext4_mark_inode_dirty(handle, inode);
2240        retval = 0;
2241
2242end_unlink:
2243        ext4_journal_stop(handle);
2244        brelse(bh);
2245        return retval;
2246}
2247
2248static int ext4_symlink(struct inode *dir,
2249                        struct dentry *dentry, const char *symname)
2250{
2251        handle_t *handle;
2252        struct inode *inode;
2253        int l, err, retries = 0;
2254
2255        l = strlen(symname)+1;
2256        if (l > dir->i_sb->s_blocksize)
2257                return -ENAMETOOLONG;
2258
2259retry:
2260        handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2261                                        EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 +
2262                                        2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
2263        if (IS_ERR(handle))
2264                return PTR_ERR(handle);
2265
2266        if (IS_DIRSYNC(dir))
2267                ext4_handle_sync(handle);
2268
2269        inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
2270                               &dentry->d_name, 0);
2271        err = PTR_ERR(inode);
2272        if (IS_ERR(inode))
2273                goto out_stop;
2274
2275        if (l > sizeof(EXT4_I(inode)->i_data)) {
2276                inode->i_op = &ext4_symlink_inode_operations;
2277                ext4_set_aops(inode);
2278                /*
2279                 * page_symlink() calls into ext4_prepare/commit_write.
2280                 * We have a transaction open.  All is sweetness.  It also sets
2281                 * i_size in generic_commit_write().
2282                 */
2283                err = __page_symlink(inode, symname, l, 1);
2284                if (err) {
2285                        clear_nlink(inode);
2286                        unlock_new_inode(inode);
2287                        ext4_mark_inode_dirty(handle, inode);
2288                        iput(inode);
2289                        goto out_stop;
2290                }
2291        } else {
2292                /* clear the extent format for fast symlink */
2293                EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
2294                inode->i_op = &ext4_fast_symlink_inode_operations;
2295                memcpy((char *)&EXT4_I(inode)->i_data, symname, l);
2296                inode->i_size = l-1;
2297        }
2298        EXT4_I(inode)->i_disksize = inode->i_size;
2299        err = ext4_add_nondir(handle, dentry, inode);
2300out_stop:
2301        ext4_journal_stop(handle);
2302        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2303                goto retry;
2304        return err;
2305}
2306
2307static int ext4_link(struct dentry *old_dentry,
2308                     struct inode *dir, struct dentry *dentry)
2309{
2310        handle_t *handle;
2311        struct inode *inode = old_dentry->d_inode;
2312        int err, retries = 0;
2313
2314        if (inode->i_nlink >= EXT4_LINK_MAX)
2315                return -EMLINK;
2316
2317        /*
2318         * Return -ENOENT if we've raced with unlink and i_nlink is 0.  Doing
2319         * otherwise has the potential to corrupt the orphan inode list.
2320         */
2321        if (inode->i_nlink == 0)
2322                return -ENOENT;
2323
2324retry:
2325        handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2326                                        EXT4_INDEX_EXTRA_TRANS_BLOCKS);
2327        if (IS_ERR(handle))
2328                return PTR_ERR(handle);
2329
2330        if (IS_DIRSYNC(dir))
2331                ext4_handle_sync(handle);
2332
2333        inode->i_ctime = ext4_current_time(inode);
2334        ext4_inc_count(handle, inode);
2335        atomic_inc(&inode->i_count);
2336
2337        err = ext4_add_entry(handle, dentry, inode);
2338        if (!err) {
2339                ext4_mark_inode_dirty(handle, inode);
2340                d_instantiate(dentry, inode);
2341        } else {
2342                drop_nlink(inode);
2343                iput(inode);
2344        }
2345        ext4_journal_stop(handle);
2346        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2347                goto retry;
2348        return err;
2349}
2350
2351#define PARENT_INO(buffer, size) \
2352        (ext4_next_entry((struct ext4_dir_entry_2 *)(buffer), size)->inode)
2353
2354/*
2355 * Anybody can rename anything with this: the permission checks are left to the
2356 * higher-level routines.
2357 */
2358static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2359                       struct inode *new_dir, struct dentry *new_dentry)
2360{
2361        handle_t *handle;
2362        struct inode *old_inode, *new_inode;
2363        struct buffer_head *old_bh, *new_bh, *dir_bh;
2364        struct ext4_dir_entry_2 *old_de, *new_de;
2365        int retval, force_da_alloc = 0;
2366
2367        old_bh = new_bh = dir_bh = NULL;
2368
2369        /* Initialize quotas before so that eventual writes go
2370         * in separate transaction */
2371        if (new_dentry->d_inode)
2372                vfs_dq_init(new_dentry->d_inode);
2373        handle = ext4_journal_start(old_dir, 2 *
2374                                        EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
2375                                        EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
2376        if (IS_ERR(handle))
2377                return PTR_ERR(handle);
2378
2379        if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
2380                ext4_handle_sync(handle);
2381
2382        old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de);
2383        /*
2384         *  Check for inode number is _not_ due to possible IO errors.
2385         *  We might rmdir the source, keep it as pwd of some process
2386         *  and merrily kill the link to whatever was created under the
2387         *  same name. Goodbye sticky bit ;-<
2388         */
2389        old_inode = old_dentry->d_inode;
2390        retval = -ENOENT;
2391        if (!old_bh || le32_to_cpu(old_de->inode) != old_inode->i_ino)
2392                goto end_rename;
2393
2394        new_inode = new_dentry->d_inode;
2395        new_bh = ext4_find_entry(new_dir, &new_dentry->d_name, &new_de);
2396        if (new_bh) {
2397                if (!new_inode) {
2398                        brelse(new_bh);
2399                        new_bh = NULL;
2400                }
2401        }
2402        if (S_ISDIR(old_inode->i_mode)) {
2403                if (new_inode) {
2404                        retval = -ENOTEMPTY;
2405                        if (!empty_dir(new_inode))
2406                                goto end_rename;
2407                }
2408                retval = -EIO;
2409                dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval);
2410                if (!dir_bh)
2411                        goto end_rename;
2412                if (le32_to_cpu(PARENT_INO(dir_bh->b_data,
2413                                old_dir->i_sb->s_blocksize)) != old_dir->i_ino)
2414                        goto end_rename;
2415                retval = -EMLINK;
2416                if (!new_inode && new_dir != old_dir &&
2417                    EXT4_DIR_LINK_MAX(new_dir))
2418                        goto end_rename;
2419        }
2420        if (!new_bh) {
2421                retval = ext4_add_entry(handle, new_dentry, old_inode);
2422                if (retval)
2423                        goto end_rename;
2424        } else {
2425                BUFFER_TRACE(new_bh, "get write access");
2426                ext4_journal_get_write_access(handle, new_bh);
2427                new_de->inode = cpu_to_le32(old_inode->i_ino);
2428                if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
2429                                              EXT4_FEATURE_INCOMPAT_FILETYPE))
2430                        new_de->file_type = old_de->file_type;
2431                new_dir->i_version++;
2432                new_dir->i_ctime = new_dir->i_mtime =
2433                                        ext4_current_time(new_dir);
2434                ext4_mark_inode_dirty(handle, new_dir);
2435                BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata");
2436                ext4_handle_dirty_metadata(handle, new_dir, new_bh);
2437                brelse(new_bh);
2438                new_bh = NULL;
2439        }
2440
2441        /*
2442         * Like most other Unix systems, set the ctime for inodes on a
2443         * rename.
2444         */
2445        old_inode->i_ctime = ext4_current_time(old_inode);
2446        ext4_mark_inode_dirty(handle, old_inode);
2447
2448        /*
2449         * ok, that's it
2450         */
2451        if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
2452            old_de->name_len != old_dentry->d_name.len ||
2453            strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
2454            (retval = ext4_delete_entry(handle, old_dir,
2455                                        old_de, old_bh)) == -ENOENT) {
2456                /* old_de could have moved from under us during htree split, so
2457                 * make sure that we are deleting the right entry.  We might
2458                 * also be pointing to a stale entry in the unused part of
2459                 * old_bh so just checking inum and the name isn't enough. */
2460                struct buffer_head *old_bh2;
2461                struct ext4_dir_entry_2 *old_de2;
2462
2463                old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de2);
2464                if (old_bh2) {
2465                        retval = ext4_delete_entry(handle, old_dir,
2466                                                   old_de2, old_bh2);
2467                        brelse(old_bh2);
2468                }
2469        }
2470        if (retval) {
2471                ext4_warning(old_dir->i_sb, "ext4_rename",
2472                                "Deleting old file (%lu), %d, error=%d",
2473                                old_dir->i_ino, old_dir->i_nlink, retval);
2474        }
2475
2476        if (new_inode) {
2477                ext4_dec_count(handle, new_inode);
2478                new_inode->i_ctime = ext4_current_time(new_inode);
2479        }
2480        old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir);
2481        ext4_update_dx_flag(old_dir);
2482        if (dir_bh) {
2483                BUFFER_TRACE(dir_bh, "get_write_access");
2484                ext4_journal_get_write_access(handle, dir_bh);
2485                PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
2486                                                cpu_to_le32(new_dir->i_ino);
2487                BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
2488                ext4_handle_dirty_metadata(handle, old_dir, dir_bh);
2489                ext4_dec_count(handle, old_dir);
2490                if (new_inode) {
2491                        /* checked empty_dir above, can't have another parent,
2492                         * ext4_dec_count() won't work for many-linked dirs */
2493                        new_inode->i_nlink = 0;
2494                } else {
2495                        ext4_inc_count(handle, new_dir);
2496                        ext4_update_dx_flag(new_dir);
2497                        ext4_mark_inode_dirty(handle, new_dir);
2498                }
2499        }
2500        ext4_mark_inode_dirty(handle, old_dir);
2501        if (new_inode) {
2502                ext4_mark_inode_dirty(handle, new_inode);
2503                if (!new_inode->i_nlink)
2504                        ext4_orphan_add(handle, new_inode);
2505                if (!test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC))
2506                        force_da_alloc = 1;
2507        }
2508        retval = 0;
2509
2510end_rename:
2511        brelse(dir_bh);
2512        brelse(old_bh);
2513        brelse(new_bh);
2514        ext4_journal_stop(handle);
2515        if (retval == 0 && force_da_alloc)
2516                ext4_alloc_da_blocks(old_inode);
2517        return retval;
2518}
2519
2520/*
2521 * directories can handle most operations...
2522 */
2523const struct inode_operations ext4_dir_inode_operations = {
2524        .create         = ext4_create,
2525        .lookup         = ext4_lookup,
2526        .link           = ext4_link,
2527        .unlink         = ext4_unlink,
2528        .symlink        = ext4_symlink,
2529        .mkdir          = ext4_mkdir,
2530        .rmdir          = ext4_rmdir,
2531        .mknod          = ext4_mknod,
2532        .rename         = ext4_rename,
2533        .setattr        = ext4_setattr,
2534#ifdef CONFIG_EXT4_FS_XATTR
2535        .setxattr       = generic_setxattr,
2536        .getxattr       = generic_getxattr,
2537        .listxattr      = ext4_listxattr,
2538        .removexattr    = generic_removexattr,
2539#endif
2540        .check_acl      = ext4_check_acl,
2541        .fiemap         = ext4_fiemap,
2542};
2543
2544const struct inode_operations ext4_special_inode_operations = {
2545        .setattr        = ext4_setattr,
2546#ifdef CONFIG_EXT4_FS_XATTR
2547        .setxattr       = generic_setxattr,
2548        .getxattr       = generic_getxattr,
2549        .listxattr      = ext4_listxattr,
2550        .removexattr    = generic_removexattr,
2551#endif
2552        .check_acl      = ext4_check_acl,
2553};
2554