linux/fs/ext4/namei.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/ext4/namei.c
   3 *
   4 * Copyright (C) 1992, 1993, 1994, 1995
   5 * Remy Card (card@masi.ibp.fr)
   6 * Laboratoire MASI - Institut Blaise Pascal
   7 * Universite Pierre et Marie Curie (Paris VI)
   8 *
   9 *  from
  10 *
  11 *  linux/fs/minix/namei.c
  12 *
  13 *  Copyright (C) 1991, 1992  Linus Torvalds
  14 *
  15 *  Big-endian to little-endian byte-swapping/bitmaps by
  16 *        David S. Miller (davem@caip.rutgers.edu), 1995
  17 *  Directory entry file type support and forward compatibility hooks
  18 *      for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
  19 *  Hash Tree Directory indexing (c)
  20 *      Daniel Phillips, 2001
  21 *  Hash Tree Directory indexing porting
  22 *      Christopher Li, 2002
  23 *  Hash Tree Directory indexing cleanup
  24 *      Theodore Ts'o, 2002
  25 */
  26
  27#include <linux/fs.h>
  28#include <linux/pagemap.h>
  29#include <linux/jbd2.h>
  30#include <linux/time.h>
  31#include <linux/fcntl.h>
  32#include <linux/stat.h>
  33#include <linux/string.h>
  34#include <linux/quotaops.h>
  35#include <linux/buffer_head.h>
  36#include <linux/bio.h>
  37#include "ext4.h"
  38#include "ext4_jbd2.h"
  39
  40#include "xattr.h"
  41#include "acl.h"
  42
  43#include <trace/events/ext4.h>
  44/*
  45 * define how far ahead to read directories while searching them.
  46 */
  47#define NAMEI_RA_CHUNKS  2
  48#define NAMEI_RA_BLOCKS  4
  49#define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
  50
  51static struct buffer_head *ext4_append(handle_t *handle,
  52                                        struct inode *inode,
  53                                        ext4_lblk_t *block)
  54{
  55        struct buffer_head *bh;
  56        int err = 0;
  57
  58        if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
  59                     ((inode->i_size >> 10) >=
  60                      EXT4_SB(inode->i_sb)->s_max_dir_size_kb)))
  61                return ERR_PTR(-ENOSPC);
  62
  63        *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
  64
  65        bh = ext4_bread(handle, inode, *block, 1, &err);
  66        if (!bh)
  67                return ERR_PTR(err);
  68        inode->i_size += inode->i_sb->s_blocksize;
  69        EXT4_I(inode)->i_disksize = inode->i_size;
  70        err = ext4_journal_get_write_access(handle, bh);
  71        if (err) {
  72                brelse(bh);
  73                ext4_std_error(inode->i_sb, err);
  74                return ERR_PTR(err);
  75        }
  76        return bh;
  77}
  78
  79static int ext4_dx_csum_verify(struct inode *inode,
  80                               struct ext4_dir_entry *dirent);
  81
  82typedef enum {
  83        EITHER, INDEX, DIRENT
  84} dirblock_type_t;
  85
  86#define ext4_read_dirblock(inode, block, type) \
  87        __ext4_read_dirblock((inode), (block), (type), __LINE__)
  88
  89static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
  90                                              ext4_lblk_t block,
  91                                              dirblock_type_t type,
  92                                              unsigned int line)
  93{
  94        struct buffer_head *bh;
  95        struct ext4_dir_entry *dirent;
  96        int err = 0, is_dx_block = 0;
  97
  98        bh = ext4_bread(NULL, inode, block, 0, &err);
  99        if (!bh) {
 100                if (err == 0) {
 101                        ext4_error_inode(inode, __func__, line, block,
 102                                               "Directory hole found");
 103                        return ERR_PTR(-EIO);
 104                }
 105                __ext4_warning(inode->i_sb, __func__, line,
 106                               "error reading directory block "
 107                               "(ino %lu, block %lu)", inode->i_ino,
 108                               (unsigned long) block);
 109                return ERR_PTR(err);
 110        }
 111        dirent = (struct ext4_dir_entry *) bh->b_data;
 112        /* Determine whether or not we have an index block */
 113        if (is_dx(inode)) {
 114                if (block == 0)
 115                        is_dx_block = 1;
 116                else if (ext4_rec_len_from_disk(dirent->rec_len,
 117                                                inode->i_sb->s_blocksize) ==
 118                         inode->i_sb->s_blocksize)
 119                        is_dx_block = 1;
 120        }
 121        if (!is_dx_block && type == INDEX) {
 122                ext4_error_inode(inode, __func__, line, block,
 123                       "directory leaf block found instead of index block");
 124                return ERR_PTR(-EIO);
 125        }
 126        if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
 127                                        EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) ||
 128            buffer_verified(bh))
 129                return bh;
 130
 131        /*
 132         * An empty leaf block can get mistaken for a index block; for
 133         * this reason, we can only check the index checksum when the
 134         * caller is sure it should be an index block.
 135         */
 136        if (is_dx_block && type == INDEX) {
 137                if (ext4_dx_csum_verify(inode, dirent))
 138                        set_buffer_verified(bh);
 139                else {
 140                        ext4_error_inode(inode, __func__, line, block,
 141                                "Directory index failed checksum");
 142                        brelse(bh);
 143                        return ERR_PTR(-EIO);
 144                }
 145        }
 146        if (!is_dx_block) {
 147                if (ext4_dirent_csum_verify(inode, dirent))
 148                        set_buffer_verified(bh);
 149                else {
 150                        ext4_error_inode(inode, __func__, line, block,
 151                                "Directory block failed checksum");
 152                        brelse(bh);
 153                        return ERR_PTR(-EIO);
 154                }
 155        }
 156        return bh;
 157}
 158
 159#ifndef assert
 160#define assert(test) J_ASSERT(test)
 161#endif
 162
 163#ifdef DX_DEBUG
 164#define dxtrace(command) command
 165#else
 166#define dxtrace(command)
 167#endif
 168
 169struct fake_dirent
 170{
 171        __le32 inode;
 172        __le16 rec_len;
 173        u8 name_len;
 174        u8 file_type;
 175};
 176
 177struct dx_countlimit
 178{
 179        __le16 limit;
 180        __le16 count;
 181};
 182
 183struct dx_entry
 184{
 185        __le32 hash;
 186        __le32 block;
 187};
 188
 189/*
 190 * dx_root_info is laid out so that if it should somehow get overlaid by a
 191 * dirent the two low bits of the hash version will be zero.  Therefore, the
 192 * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
 193 */
 194
 195struct dx_root
 196{
 197        struct fake_dirent dot;
 198        char dot_name[4];
 199        struct fake_dirent dotdot;
 200        char dotdot_name[4];
 201        struct dx_root_info
 202        {
 203                __le32 reserved_zero;
 204                u8 hash_version;
 205                u8 info_length; /* 8 */
 206                u8 indirect_levels;
 207                u8 unused_flags;
 208        }
 209        info;
 210        struct dx_entry entries[0];
 211};
 212
 213struct dx_node
 214{
 215        struct fake_dirent fake;
 216        struct dx_entry entries[0];
 217};
 218
 219
 220struct dx_frame
 221{
 222        struct buffer_head *bh;
 223        struct dx_entry *entries;
 224        struct dx_entry *at;
 225};
 226
 227struct dx_map_entry
 228{
 229        u32 hash;
 230        u16 offs;
 231        u16 size;
 232};
 233
 234/*
 235 * This goes at the end of each htree block.
 236 */
 237struct dx_tail {
 238        u32 dt_reserved;
 239        __le32 dt_checksum;     /* crc32c(uuid+inum+dirblock) */
 240};
 241
 242static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
 243static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
 244static inline unsigned dx_get_hash(struct dx_entry *entry);
 245static void dx_set_hash(struct dx_entry *entry, unsigned value);
 246static unsigned dx_get_count(struct dx_entry *entries);
 247static unsigned dx_get_limit(struct dx_entry *entries);
 248static void dx_set_count(struct dx_entry *entries, unsigned value);
 249static void dx_set_limit(struct dx_entry *entries, unsigned value);
 250static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
 251static unsigned dx_node_limit(struct inode *dir);
 252static struct dx_frame *dx_probe(const struct qstr *d_name,
 253                                 struct inode *dir,
 254                                 struct dx_hash_info *hinfo,
 255                                 struct dx_frame *frame,
 256                                 int *err);
 257static void dx_release(struct dx_frame *frames);
 258static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
 259                       struct dx_hash_info *hinfo, struct dx_map_entry map[]);
 260static void dx_sort_map(struct dx_map_entry *map, unsigned count);
 261static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
 262                struct dx_map_entry *offsets, int count, unsigned blocksize);
 263static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize);
 264static void dx_insert_block(struct dx_frame *frame,
 265                                        u32 hash, ext4_lblk_t block);
 266static int ext4_htree_next_block(struct inode *dir, __u32 hash,
 267                                 struct dx_frame *frame,
 268                                 struct dx_frame *frames,
 269                                 __u32 *start_hash);
 270static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
 271                const struct qstr *d_name,
 272                struct ext4_dir_entry_2 **res_dir,
 273                int *err);
 274static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
 275                             struct inode *inode);
 276
 277/* checksumming functions */
 278void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
 279                            unsigned int blocksize)
 280{
 281        memset(t, 0, sizeof(struct ext4_dir_entry_tail));
 282        t->det_rec_len = ext4_rec_len_to_disk(
 283                        sizeof(struct ext4_dir_entry_tail), blocksize);
 284        t->det_reserved_ft = EXT4_FT_DIR_CSUM;
 285}
 286
 287/* Walk through a dirent block to find a checksum "dirent" at the tail */
 288static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
 289                                                   struct ext4_dir_entry *de)
 290{
 291        struct ext4_dir_entry_tail *t;
 292
 293#ifdef PARANOID
 294        struct ext4_dir_entry *d, *top;
 295
 296        d = de;
 297        top = (struct ext4_dir_entry *)(((void *)de) +
 298                (EXT4_BLOCK_SIZE(inode->i_sb) -
 299                sizeof(struct ext4_dir_entry_tail)));
 300        while (d < top && d->rec_len)
 301                d = (struct ext4_dir_entry *)(((void *)d) +
 302                    le16_to_cpu(d->rec_len));
 303
 304        if (d != top)
 305                return NULL;
 306
 307        t = (struct ext4_dir_entry_tail *)d;
 308#else
 309        t = EXT4_DIRENT_TAIL(de, EXT4_BLOCK_SIZE(inode->i_sb));
 310#endif
 311
 312        if (t->det_reserved_zero1 ||
 313            le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) ||
 314            t->det_reserved_zero2 ||
 315            t->det_reserved_ft != EXT4_FT_DIR_CSUM)
 316                return NULL;
 317
 318        return t;
 319}
 320
 321static __le32 ext4_dirent_csum(struct inode *inode,
 322                               struct ext4_dir_entry *dirent, int size)
 323{
 324        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 325        struct ext4_inode_info *ei = EXT4_I(inode);
 326        __u32 csum;
 327
 328        csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
 329        return cpu_to_le32(csum);
 330}
 331
 332static void warn_no_space_for_csum(struct inode *inode)
 333{
 334        ext4_warning(inode->i_sb, "no space in directory inode %lu leaf for "
 335                     "checksum.  Please run e2fsck -D.", inode->i_ino);
 336}
 337
 338int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
 339{
 340        struct ext4_dir_entry_tail *t;
 341
 342        if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
 343                                        EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
 344                return 1;
 345
 346        t = get_dirent_tail(inode, dirent);
 347        if (!t) {
 348                warn_no_space_for_csum(inode);
 349                return 0;
 350        }
 351
 352        if (t->det_checksum != ext4_dirent_csum(inode, dirent,
 353                                                (void *)t - (void *)dirent))
 354                return 0;
 355
 356        return 1;
 357}
 358
 359static void ext4_dirent_csum_set(struct inode *inode,
 360                                 struct ext4_dir_entry *dirent)
 361{
 362        struct ext4_dir_entry_tail *t;
 363
 364        if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
 365                                        EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
 366                return;
 367
 368        t = get_dirent_tail(inode, dirent);
 369        if (!t) {
 370                warn_no_space_for_csum(inode);
 371                return;
 372        }
 373
 374        t->det_checksum = ext4_dirent_csum(inode, dirent,
 375                                           (void *)t - (void *)dirent);
 376}
 377
 378int ext4_handle_dirty_dirent_node(handle_t *handle,
 379                                  struct inode *inode,
 380                                  struct buffer_head *bh)
 381{
 382        ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
 383        return ext4_handle_dirty_metadata(handle, inode, bh);
 384}
 385
 386static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
 387                                               struct ext4_dir_entry *dirent,
 388                                               int *offset)
 389{
 390        struct ext4_dir_entry *dp;
 391        struct dx_root_info *root;
 392        int count_offset;
 393
 394        if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb))
 395                count_offset = 8;
 396        else if (le16_to_cpu(dirent->rec_len) == 12) {
 397                dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
 398                if (le16_to_cpu(dp->rec_len) !=
 399                    EXT4_BLOCK_SIZE(inode->i_sb) - 12)
 400                        return NULL;
 401                root = (struct dx_root_info *)(((void *)dp + 12));
 402                if (root->reserved_zero ||
 403                    root->info_length != sizeof(struct dx_root_info))
 404                        return NULL;
 405                count_offset = 32;
 406        } else
 407                return NULL;
 408
 409        if (offset)
 410                *offset = count_offset;
 411        return (struct dx_countlimit *)(((void *)dirent) + count_offset);
 412}
 413
 414static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent,
 415                           int count_offset, int count, struct dx_tail *t)
 416{
 417        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 418        struct ext4_inode_info *ei = EXT4_I(inode);
 419        __u32 csum;
 420        __le32 save_csum;
 421        int size;
 422
 423        size = count_offset + (count * sizeof(struct dx_entry));
 424        save_csum = t->dt_checksum;
 425        t->dt_checksum = 0;
 426        csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
 427        csum = ext4_chksum(sbi, csum, (__u8 *)t, sizeof(struct dx_tail));
 428        t->dt_checksum = save_csum;
 429
 430        return cpu_to_le32(csum);
 431}
 432
 433static int ext4_dx_csum_verify(struct inode *inode,
 434                               struct ext4_dir_entry *dirent)
 435{
 436        struct dx_countlimit *c;
 437        struct dx_tail *t;
 438        int count_offset, limit, count;
 439
 440        if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
 441                                        EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
 442                return 1;
 443
 444        c = get_dx_countlimit(inode, dirent, &count_offset);
 445        if (!c) {
 446                EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
 447                return 1;
 448        }
 449        limit = le16_to_cpu(c->limit);
 450        count = le16_to_cpu(c->count);
 451        if (count_offset + (limit * sizeof(struct dx_entry)) >
 452            EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
 453                warn_no_space_for_csum(inode);
 454                return 1;
 455        }
 456        t = (struct dx_tail *)(((struct dx_entry *)c) + limit);
 457
 458        if (t->dt_checksum != ext4_dx_csum(inode, dirent, count_offset,
 459                                            count, t))
 460                return 0;
 461        return 1;
 462}
 463
 464static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
 465{
 466        struct dx_countlimit *c;
 467        struct dx_tail *t;
 468        int count_offset, limit, count;
 469
 470        if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
 471                                        EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
 472                return;
 473
 474        c = get_dx_countlimit(inode, dirent, &count_offset);
 475        if (!c) {
 476                EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
 477                return;
 478        }
 479        limit = le16_to_cpu(c->limit);
 480        count = le16_to_cpu(c->count);
 481        if (count_offset + (limit * sizeof(struct dx_entry)) >
 482            EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
 483                warn_no_space_for_csum(inode);
 484                return;
 485        }
 486        t = (struct dx_tail *)(((struct dx_entry *)c) + limit);
 487
 488        t->dt_checksum = ext4_dx_csum(inode, dirent, count_offset, count, t);
 489}
 490
 491static inline int ext4_handle_dirty_dx_node(handle_t *handle,
 492                                            struct inode *inode,
 493                                            struct buffer_head *bh)
 494{
 495        ext4_dx_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
 496        return ext4_handle_dirty_metadata(handle, inode, bh);
 497}
 498
 499/*
 500 * p is at least 6 bytes before the end of page
 501 */
 502static inline struct ext4_dir_entry_2 *
 503ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
 504{
 505        return (struct ext4_dir_entry_2 *)((char *)p +
 506                ext4_rec_len_from_disk(p->rec_len, blocksize));
 507}
 508
 509/*
 510 * Future: use high four bits of block for coalesce-on-delete flags
 511 * Mask them off for now.
 512 */
 513
 514static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
 515{
 516        return le32_to_cpu(entry->block) & 0x00ffffff;
 517}
 518
 519static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
 520{
 521        entry->block = cpu_to_le32(value);
 522}
 523
 524static inline unsigned dx_get_hash(struct dx_entry *entry)
 525{
 526        return le32_to_cpu(entry->hash);
 527}
 528
 529static inline void dx_set_hash(struct dx_entry *entry, unsigned value)
 530{
 531        entry->hash = cpu_to_le32(value);
 532}
 533
 534static inline unsigned dx_get_count(struct dx_entry *entries)
 535{
 536        return le16_to_cpu(((struct dx_countlimit *) entries)->count);
 537}
 538
 539static inline unsigned dx_get_limit(struct dx_entry *entries)
 540{
 541        return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
 542}
 543
 544static inline void dx_set_count(struct dx_entry *entries, unsigned value)
 545{
 546        ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
 547}
 548
 549static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
 550{
 551        ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
 552}
 553
 554static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
 555{
 556        unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
 557                EXT4_DIR_REC_LEN(2) - infosize;
 558
 559        if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
 560                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
 561                entry_space -= sizeof(struct dx_tail);
 562        return entry_space / sizeof(struct dx_entry);
 563}
 564
 565static inline unsigned dx_node_limit(struct inode *dir)
 566{
 567        unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
 568
 569        if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
 570                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
 571                entry_space -= sizeof(struct dx_tail);
 572        return entry_space / sizeof(struct dx_entry);
 573}
 574
 575/*
 576 * Debug
 577 */
 578#ifdef DX_DEBUG
 579static void dx_show_index(char * label, struct dx_entry *entries)
 580{
 581        int i, n = dx_get_count (entries);
 582        printk(KERN_DEBUG "%s index ", label);
 583        for (i = 0; i < n; i++) {
 584                printk("%x->%lu ", i ? dx_get_hash(entries + i) :
 585                                0, (unsigned long)dx_get_block(entries + i));
 586        }
 587        printk("\n");
 588}
 589
 590struct stats
 591{
 592        unsigned names;
 593        unsigned space;
 594        unsigned bcount;
 595};
 596
 597static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_entry_2 *de,
 598                                 int size, int show_names)
 599{
 600        unsigned names = 0, space = 0;
 601        char *base = (char *) de;
 602        struct dx_hash_info h = *hinfo;
 603
 604        printk("names: ");
 605        while ((char *) de < base + size)
 606        {
 607                if (de->inode)
 608                {
 609                        if (show_names)
 610                        {
 611                                int len = de->name_len;
 612                                char *name = de->name;
 613                                while (len--) printk("%c", *name++);
 614                                ext4fs_dirhash(de->name, de->name_len, &h);
 615                                printk(":%x.%u ", h.hash,
 616                                       (unsigned) ((char *) de - base));
 617                        }
 618                        space += EXT4_DIR_REC_LEN(de->name_len);
 619                        names++;
 620                }
 621                de = ext4_next_entry(de, size);
 622        }
 623        printk("(%i)\n", names);
 624        return (struct stats) { names, space, 1 };
 625}
 626
 627struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
 628                             struct dx_entry *entries, int levels)
 629{
 630        unsigned blocksize = dir->i_sb->s_blocksize;
 631        unsigned count = dx_get_count(entries), names = 0, space = 0, i;
 632        unsigned bcount = 0;
 633        struct buffer_head *bh;
 634        int err;
 635        printk("%i indexed blocks...\n", count);
 636        for (i = 0; i < count; i++, entries++)
 637        {
 638                ext4_lblk_t block = dx_get_block(entries);
 639                ext4_lblk_t hash  = i ? dx_get_hash(entries): 0;
 640                u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
 641                struct stats stats;
 642                printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
 643                if (!(bh = ext4_bread (NULL,dir, block, 0,&err))) continue;
 644                stats = levels?
 645                   dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
 646                   dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0);
 647                names += stats.names;
 648                space += stats.space;
 649                bcount += stats.bcount;
 650                brelse(bh);
 651        }
 652        if (bcount)
 653                printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
 654                       levels ? "" : "   ", names, space/bcount,
 655                       (space/bcount)*100/blocksize);
 656        return (struct stats) { names, space, bcount};
 657}
 658#endif /* DX_DEBUG */
 659
 660/*
 661 * Probe for a directory leaf block to search.
 662 *
 663 * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
 664 * error in the directory index, and the caller should fall back to
 665 * searching the directory normally.  The callers of dx_probe **MUST**
 666 * check for this error code, and make sure it never gets reflected
 667 * back to userspace.
 668 */
 669static struct dx_frame *
 670dx_probe(const struct qstr *d_name, struct inode *dir,
 671         struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
 672{
 673        unsigned count, indirect;
 674        struct dx_entry *at, *entries, *p, *q, *m;
 675        struct dx_root *root;
 676        struct buffer_head *bh;
 677        struct dx_frame *frame = frame_in;
 678        u32 hash;
 679
 680        frame->bh = NULL;
 681        bh = ext4_read_dirblock(dir, 0, INDEX);
 682        if (IS_ERR(bh)) {
 683                *err = PTR_ERR(bh);
 684                goto fail;
 685        }
 686        root = (struct dx_root *) bh->b_data;
 687        if (root->info.hash_version != DX_HASH_TEA &&
 688            root->info.hash_version != DX_HASH_HALF_MD4 &&
 689            root->info.hash_version != DX_HASH_LEGACY) {
 690                ext4_warning(dir->i_sb, "Unrecognised inode hash code %d",
 691                             root->info.hash_version);
 692                brelse(bh);
 693                *err = ERR_BAD_DX_DIR;
 694                goto fail;
 695        }
 696        hinfo->hash_version = root->info.hash_version;
 697        if (hinfo->hash_version <= DX_HASH_TEA)
 698                hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
 699        hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
 700        if (d_name)
 701                ext4fs_dirhash(d_name->name, d_name->len, hinfo);
 702        hash = hinfo->hash;
 703
 704        if (root->info.unused_flags & 1) {
 705                ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x",
 706                             root->info.unused_flags);
 707                brelse(bh);
 708                *err = ERR_BAD_DX_DIR;
 709                goto fail;
 710        }
 711
 712        if ((indirect = root->info.indirect_levels) > 1) {
 713                ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
 714                             root->info.indirect_levels);
 715                brelse(bh);
 716                *err = ERR_BAD_DX_DIR;
 717                goto fail;
 718        }
 719
 720        entries = (struct dx_entry *) (((char *)&root->info) +
 721                                       root->info.info_length);
 722
 723        if (dx_get_limit(entries) != dx_root_limit(dir,
 724                                                   root->info.info_length)) {
 725                ext4_warning(dir->i_sb, "dx entry: limit != root limit");
 726                brelse(bh);
 727                *err = ERR_BAD_DX_DIR;
 728                goto fail;
 729        }
 730
 731        dxtrace(printk("Look up %x", hash));
 732        while (1)
 733        {
 734                count = dx_get_count(entries);
 735                if (!count || count > dx_get_limit(entries)) {
 736                        ext4_warning(dir->i_sb,
 737                                     "dx entry: no count or count > limit");
 738                        brelse(bh);
 739                        *err = ERR_BAD_DX_DIR;
 740                        goto fail2;
 741                }
 742
 743                p = entries + 1;
 744                q = entries + count - 1;
 745                while (p <= q)
 746                {
 747                        m = p + (q - p)/2;
 748                        dxtrace(printk("."));
 749                        if (dx_get_hash(m) > hash)
 750                                q = m - 1;
 751                        else
 752                                p = m + 1;
 753                }
 754
 755                if (0) // linear search cross check
 756                {
 757                        unsigned n = count - 1;
 758                        at = entries;
 759                        while (n--)
 760                        {
 761                                dxtrace(printk(","));
 762                                if (dx_get_hash(++at) > hash)
 763                                {
 764                                        at--;
 765                                        break;
 766                                }
 767                        }
 768                        assert (at == p - 1);
 769                }
 770
 771                at = p - 1;
 772                dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
 773                frame->bh = bh;
 774                frame->entries = entries;
 775                frame->at = at;
 776                if (!indirect--) return frame;
 777                bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX);
 778                if (IS_ERR(bh)) {
 779                        *err = PTR_ERR(bh);
 780                        goto fail2;
 781                }
 782                entries = ((struct dx_node *) bh->b_data)->entries;
 783
 784                if (dx_get_limit(entries) != dx_node_limit (dir)) {
 785                        ext4_warning(dir->i_sb,
 786                                     "dx entry: limit != node limit");
 787                        brelse(bh);
 788                        *err = ERR_BAD_DX_DIR;
 789                        goto fail2;
 790                }
 791                frame++;
 792                frame->bh = NULL;
 793        }
 794fail2:
 795        while (frame >= frame_in) {
 796                brelse(frame->bh);
 797                frame--;
 798        }
 799fail:
 800        if (*err == ERR_BAD_DX_DIR)
 801                ext4_warning(dir->i_sb,
 802                             "Corrupt dir inode %lu, running e2fsck is "
 803                             "recommended.", dir->i_ino);
 804        return NULL;
 805}
 806
 807static void dx_release (struct dx_frame *frames)
 808{
 809        if (frames[0].bh == NULL)
 810                return;
 811
 812        if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
 813                brelse(frames[1].bh);
 814        brelse(frames[0].bh);
 815}
 816
 817/*
 818 * This function increments the frame pointer to search the next leaf
 819 * block, and reads in the necessary intervening nodes if the search
 820 * should be necessary.  Whether or not the search is necessary is
 821 * controlled by the hash parameter.  If the hash value is even, then
 822 * the search is only continued if the next block starts with that
 823 * hash value.  This is used if we are searching for a specific file.
 824 *
 825 * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
 826 *
 827 * This function returns 1 if the caller should continue to search,
 828 * or 0 if it should not.  If there is an error reading one of the
 829 * index blocks, it will a negative error code.
 830 *
 831 * If start_hash is non-null, it will be filled in with the starting
 832 * hash of the next page.
 833 */
 834static int ext4_htree_next_block(struct inode *dir, __u32 hash,
 835                                 struct dx_frame *frame,
 836                                 struct dx_frame *frames,
 837                                 __u32 *start_hash)
 838{
 839        struct dx_frame *p;
 840        struct buffer_head *bh;
 841        int num_frames = 0;
 842        __u32 bhash;
 843
 844        p = frame;
 845        /*
 846         * Find the next leaf page by incrementing the frame pointer.
 847         * If we run out of entries in the interior node, loop around and
 848         * increment pointer in the parent node.  When we break out of
 849         * this loop, num_frames indicates the number of interior
 850         * nodes need to be read.
 851         */
 852        while (1) {
 853                if (++(p->at) < p->entries + dx_get_count(p->entries))
 854                        break;
 855                if (p == frames)
 856                        return 0;
 857                num_frames++;
 858                p--;
 859        }
 860
 861        /*
 862         * If the hash is 1, then continue only if the next page has a
 863         * continuation hash of any value.  This is used for readdir
 864         * handling.  Otherwise, check to see if the hash matches the
 865         * desired contiuation hash.  If it doesn't, return since
 866         * there's no point to read in the successive index pages.
 867         */
 868        bhash = dx_get_hash(p->at);
 869        if (start_hash)
 870                *start_hash = bhash;
 871        if ((hash & 1) == 0) {
 872                if ((bhash & ~1) != hash)
 873                        return 0;
 874        }
 875        /*
 876         * If the hash is HASH_NB_ALWAYS, we always go to the next
 877         * block so no check is necessary
 878         */
 879        while (num_frames--) {
 880                bh = ext4_read_dirblock(dir, dx_get_block(p->at), INDEX);
 881                if (IS_ERR(bh))
 882                        return PTR_ERR(bh);
 883                p++;
 884                brelse(p->bh);
 885                p->bh = bh;
 886                p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
 887        }
 888        return 1;
 889}
 890
 891
 892/*
 893 * This function fills a red-black tree with information from a
 894 * directory block.  It returns the number directory entries loaded
 895 * into the tree.  If there is an error it is returned in err.
 896 */
 897static int htree_dirblock_to_tree(struct file *dir_file,
 898                                  struct inode *dir, ext4_lblk_t block,
 899                                  struct dx_hash_info *hinfo,
 900                                  __u32 start_hash, __u32 start_minor_hash)
 901{
 902        struct buffer_head *bh;
 903        struct ext4_dir_entry_2 *de, *top;
 904        int err = 0, count = 0;
 905
 906        dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
 907                                                        (unsigned long)block));
 908        bh = ext4_read_dirblock(dir, block, DIRENT);
 909        if (IS_ERR(bh))
 910                return PTR_ERR(bh);
 911
 912        de = (struct ext4_dir_entry_2 *) bh->b_data;
 913        top = (struct ext4_dir_entry_2 *) ((char *) de +
 914                                           dir->i_sb->s_blocksize -
 915                                           EXT4_DIR_REC_LEN(0));
 916        for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
 917                if (ext4_check_dir_entry(dir, NULL, de, bh,
 918                                bh->b_data, bh->b_size,
 919                                (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
 920                                         + ((char *)de - bh->b_data))) {
 921                        /* silently ignore the rest of the block */
 922                        break;
 923                }
 924                ext4fs_dirhash(de->name, de->name_len, hinfo);
 925                if ((hinfo->hash < start_hash) ||
 926                    ((hinfo->hash == start_hash) &&
 927                     (hinfo->minor_hash < start_minor_hash)))
 928                        continue;
 929                if (de->inode == 0)
 930                        continue;
 931                if ((err = ext4_htree_store_dirent(dir_file,
 932                                   hinfo->hash, hinfo->minor_hash, de)) != 0) {
 933                        brelse(bh);
 934                        return err;
 935                }
 936                count++;
 937        }
 938        brelse(bh);
 939        return count;
 940}
 941
 942
 943/*
 944 * This function fills a red-black tree with information from a
 945 * directory.  We start scanning the directory in hash order, starting
 946 * at start_hash and start_minor_hash.
 947 *
 948 * This function returns the number of entries inserted into the tree,
 949 * or a negative error code.
 950 */
 951int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 952                         __u32 start_minor_hash, __u32 *next_hash)
 953{
 954        struct dx_hash_info hinfo;
 955        struct ext4_dir_entry_2 *de;
 956        struct dx_frame frames[2], *frame;
 957        struct inode *dir;
 958        ext4_lblk_t block;
 959        int count = 0;
 960        int ret, err;
 961        __u32 hashval;
 962
 963        dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
 964                       start_hash, start_minor_hash));
 965        dir = file_inode(dir_file);
 966        if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
 967                hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
 968                if (hinfo.hash_version <= DX_HASH_TEA)
 969                        hinfo.hash_version +=
 970                                EXT4_SB(dir->i_sb)->s_hash_unsigned;
 971                hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
 972                if (ext4_has_inline_data(dir)) {
 973                        int has_inline_data = 1;
 974                        count = htree_inlinedir_to_tree(dir_file, dir, 0,
 975                                                        &hinfo, start_hash,
 976                                                        start_minor_hash,
 977                                                        &has_inline_data);
 978                        if (has_inline_data) {
 979                                *next_hash = ~0;
 980                                return count;
 981                        }
 982                }
 983                count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
 984                                               start_hash, start_minor_hash);
 985                *next_hash = ~0;
 986                return count;
 987        }
 988        hinfo.hash = start_hash;
 989        hinfo.minor_hash = 0;
 990        frame = dx_probe(NULL, dir, &hinfo, frames, &err);
 991        if (!frame)
 992                return err;
 993
 994        /* Add '.' and '..' from the htree header */
 995        if (!start_hash && !start_minor_hash) {
 996                de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
 997                if ((err = ext4_htree_store_dirent(dir_file, 0, 0, de)) != 0)
 998                        goto errout;
 999                count++;
1000        }
1001        if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
1002                de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
1003                de = ext4_next_entry(de, dir->i_sb->s_blocksize);
1004                if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0)
1005                        goto errout;
1006                count++;
1007        }
1008
1009        while (1) {
1010                block = dx_get_block(frame->at);
1011                ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
1012                                             start_hash, start_minor_hash);
1013                if (ret < 0) {
1014                        err = ret;
1015                        goto errout;
1016                }
1017                count += ret;
1018                hashval = ~0;
1019                ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS,
1020                                            frame, frames, &hashval);
1021                *next_hash = hashval;
1022                if (ret < 0) {
1023                        err = ret;
1024                        goto errout;
1025                }
1026                /*
1027                 * Stop if:  (a) there are no more entries, or
1028                 * (b) we have inserted at least one entry and the
1029                 * next hash value is not a continuation
1030                 */
1031                if ((ret == 0) ||
1032                    (count && ((hashval & 1) == 0)))
1033                        break;
1034        }
1035        dx_release(frames);
1036        dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
1037                       "next hash: %x\n", count, *next_hash));
1038        return count;
1039errout:
1040        dx_release(frames);
1041        return (err);
1042}
1043
1044static inline int search_dirblock(struct buffer_head *bh,
1045                                  struct inode *dir,
1046                                  const struct qstr *d_name,
1047                                  unsigned int offset,
1048                                  struct ext4_dir_entry_2 **res_dir)
1049{
1050        return search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir,
1051                          d_name, offset, res_dir);
1052}
1053
1054/*
1055 * Directory block splitting, compacting
1056 */
1057
1058/*
1059 * Create map of hash values, offsets, and sizes, stored at end of block.
1060 * Returns number of entries mapped.
1061 */
1062static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
1063                       struct dx_hash_info *hinfo,
1064                       struct dx_map_entry *map_tail)
1065{
1066        int count = 0;
1067        char *base = (char *) de;
1068        struct dx_hash_info h = *hinfo;
1069
1070        while ((char *) de < base + blocksize) {
1071                if (de->name_len && de->inode) {
1072                        ext4fs_dirhash(de->name, de->name_len, &h);
1073                        map_tail--;
1074                        map_tail->hash = h.hash;
1075                        map_tail->offs = ((char *) de - base)>>2;
1076                        map_tail->size = le16_to_cpu(de->rec_len);
1077                        count++;
1078                        cond_resched();
1079                }
1080                /* XXX: do we need to check rec_len == 0 case? -Chris */
1081                de = ext4_next_entry(de, blocksize);
1082        }
1083        return count;
1084}
1085
1086/* Sort map by hash value */
1087static void dx_sort_map (struct dx_map_entry *map, unsigned count)
1088{
1089        struct dx_map_entry *p, *q, *top = map + count - 1;
1090        int more;
1091        /* Combsort until bubble sort doesn't suck */
1092        while (count > 2) {
1093                count = count*10/13;
1094                if (count - 9 < 2) /* 9, 10 -> 11 */
1095                        count = 11;
1096                for (p = top, q = p - count; q >= map; p--, q--)
1097                        if (p->hash < q->hash)
1098                                swap(*p, *q);
1099        }
1100        /* Garden variety bubble sort */
1101        do {
1102                more = 0;
1103                q = top;
1104                while (q-- > map) {
1105                        if (q[1].hash >= q[0].hash)
1106                                continue;
1107                        swap(*(q+1), *q);
1108                        more = 1;
1109                }
1110        } while(more);
1111}
1112
1113static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
1114{
1115        struct dx_entry *entries = frame->entries;
1116        struct dx_entry *old = frame->at, *new = old + 1;
1117        int count = dx_get_count(entries);
1118
1119        assert(count < dx_get_limit(entries));
1120        assert(old < entries + count);
1121        memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
1122        dx_set_hash(new, hash);
1123        dx_set_block(new, block);
1124        dx_set_count(entries, count + 1);
1125}
1126
1127/*
1128 * NOTE! unlike strncmp, ext4_match returns 1 for success, 0 for failure.
1129 *
1130 * `len <= EXT4_NAME_LEN' is guaranteed by caller.
1131 * `de != NULL' is guaranteed by caller.
1132 */
1133static inline int ext4_match (int len, const char * const name,
1134                              struct ext4_dir_entry_2 * de)
1135{
1136        if (len != de->name_len)
1137                return 0;
1138        if (!de->inode)
1139                return 0;
1140        return !memcmp(name, de->name, len);
1141}
1142
1143/*
1144 * Returns 0 if not found, -1 on failure, and 1 on success
1145 */
1146int search_dir(struct buffer_head *bh,
1147               char *search_buf,
1148               int buf_size,
1149               struct inode *dir,
1150               const struct qstr *d_name,
1151               unsigned int offset,
1152               struct ext4_dir_entry_2 **res_dir)
1153{
1154        struct ext4_dir_entry_2 * de;
1155        char * dlimit;
1156        int de_len;
1157        const char *name = d_name->name;
1158        int namelen = d_name->len;
1159
1160        de = (struct ext4_dir_entry_2 *)search_buf;
1161        dlimit = search_buf + buf_size;
1162        while ((char *) de < dlimit) {
1163                /* this code is executed quadratically often */
1164                /* do minimal checking `by hand' */
1165
1166                if ((char *) de + namelen <= dlimit &&
1167                    ext4_match (namelen, name, de)) {
1168                        /* found a match - just to be sure, do a full check */
1169                        if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data,
1170                                                 bh->b_size, offset))
1171                                return -1;
1172                        *res_dir = de;
1173                        return 1;
1174                }
1175                /* prevent looping on a bad block */
1176                de_len = ext4_rec_len_from_disk(de->rec_len,
1177                                                dir->i_sb->s_blocksize);
1178                if (de_len <= 0)
1179                        return -1;
1180                offset += de_len;
1181                de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
1182        }
1183        return 0;
1184}
1185
1186static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
1187                               struct ext4_dir_entry *de)
1188{
1189        struct super_block *sb = dir->i_sb;
1190
1191        if (!is_dx(dir))
1192                return 0;
1193        if (block == 0)
1194                return 1;
1195        if (de->inode == 0 &&
1196            ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) ==
1197                        sb->s_blocksize)
1198                return 1;
1199        return 0;
1200}
1201
1202/*
1203 *      ext4_find_entry()
1204 *
1205 * finds an entry in the specified directory with the wanted name. It
1206 * returns the cache buffer in which the entry was found, and the entry
1207 * itself (as a parameter - res_dir). It does NOT read the inode of the
1208 * entry - you'll have to do that yourself if you want to.
1209 *
1210 * The returned buffer_head has ->b_count elevated.  The caller is expected
1211 * to brelse() it when appropriate.
1212 */
1213static struct buffer_head * ext4_find_entry (struct inode *dir,
1214                                        const struct qstr *d_name,
1215                                        struct ext4_dir_entry_2 **res_dir,
1216                                        int *inlined)
1217{
1218        struct super_block *sb;
1219        struct buffer_head *bh_use[NAMEI_RA_SIZE];
1220        struct buffer_head *bh, *ret = NULL;
1221        ext4_lblk_t start, block, b;
1222        const u8 *name = d_name->name;
1223        int ra_max = 0;         /* Number of bh's in the readahead
1224                                   buffer, bh_use[] */
1225        int ra_ptr = 0;         /* Current index into readahead
1226                                   buffer */
1227        int num = 0;
1228        ext4_lblk_t  nblocks;
1229        int i, err;
1230        int namelen;
1231
1232        *res_dir = NULL;
1233        sb = dir->i_sb;
1234        namelen = d_name->len;
1235        if (namelen > EXT4_NAME_LEN)
1236                return NULL;
1237
1238        if (ext4_has_inline_data(dir)) {
1239                int has_inline_data = 1;
1240                ret = ext4_find_inline_entry(dir, d_name, res_dir,
1241                                             &has_inline_data);
1242                if (has_inline_data) {
1243                        if (inlined)
1244                                *inlined = 1;
1245                        return ret;
1246                }
1247        }
1248
1249        if ((namelen <= 2) && (name[0] == '.') &&
1250            (name[1] == '.' || name[1] == '\0')) {
1251                /*
1252                 * "." or ".." will only be in the first block
1253                 * NFS may look up ".."; "." should be handled by the VFS
1254                 */
1255                block = start = 0;
1256                nblocks = 1;
1257                goto restart;
1258        }
1259        if (is_dx(dir)) {
1260                bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
1261                /*
1262                 * On success, or if the error was file not found,
1263                 * return.  Otherwise, fall back to doing a search the
1264                 * old fashioned way.
1265                 */
1266                if (bh || (err != ERR_BAD_DX_DIR))
1267                        return bh;
1268                dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
1269                               "falling back\n"));
1270        }
1271        nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1272        start = EXT4_I(dir)->i_dir_start_lookup;
1273        if (start >= nblocks)
1274                start = 0;
1275        block = start;
1276restart:
1277        do {
1278                /*
1279                 * We deal with the read-ahead logic here.
1280                 */
1281                if (ra_ptr >= ra_max) {
1282                        /* Refill the readahead buffer */
1283                        ra_ptr = 0;
1284                        b = block;
1285                        for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
1286                                /*
1287                                 * Terminate if we reach the end of the
1288                                 * directory and must wrap, or if our
1289                                 * search has finished at this block.
1290                                 */
1291                                if (b >= nblocks || (num && block == start)) {
1292                                        bh_use[ra_max] = NULL;
1293                                        break;
1294                                }
1295                                num++;
1296                                bh = ext4_getblk(NULL, dir, b++, 0, &err);
1297                                bh_use[ra_max] = bh;
1298                                if (bh)
1299                                        ll_rw_block(READ | REQ_META | REQ_PRIO,
1300                                                    1, &bh);
1301                        }
1302                }
1303                if ((bh = bh_use[ra_ptr++]) == NULL)
1304                        goto next;
1305                wait_on_buffer(bh);
1306                if (!buffer_uptodate(bh)) {
1307                        /* read error, skip block & hope for the best */
1308                        EXT4_ERROR_INODE(dir, "reading directory lblock %lu",
1309                                         (unsigned long) block);
1310                        brelse(bh);
1311                        goto next;
1312                }
1313                if (!buffer_verified(bh) &&
1314                    !is_dx_internal_node(dir, block,
1315                                         (struct ext4_dir_entry *)bh->b_data) &&
1316                    !ext4_dirent_csum_verify(dir,
1317                                (struct ext4_dir_entry *)bh->b_data)) {
1318                        EXT4_ERROR_INODE(dir, "checksumming directory "
1319                                         "block %lu", (unsigned long)block);
1320                        brelse(bh);
1321                        goto next;
1322                }
1323                set_buffer_verified(bh);
1324                i = search_dirblock(bh, dir, d_name,
1325                            block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
1326                if (i == 1) {
1327                        EXT4_I(dir)->i_dir_start_lookup = block;
1328                        ret = bh;
1329                        goto cleanup_and_exit;
1330                } else {
1331                        brelse(bh);
1332                        if (i < 0)
1333                                goto cleanup_and_exit;
1334                }
1335        next:
1336                if (++block >= nblocks)
1337                        block = 0;
1338        } while (block != start);
1339
1340        /*
1341         * If the directory has grown while we were searching, then
1342         * search the last part of the directory before giving up.
1343         */
1344        block = nblocks;
1345        nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1346        if (block < nblocks) {
1347                start = 0;
1348                goto restart;
1349        }
1350
1351cleanup_and_exit:
1352        /* Clean up the read-ahead blocks */
1353        for (; ra_ptr < ra_max; ra_ptr++)
1354                brelse(bh_use[ra_ptr]);
1355        return ret;
1356}
1357
1358static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
1359                       struct ext4_dir_entry_2 **res_dir, int *err)
1360{
1361        struct super_block * sb = dir->i_sb;
1362        struct dx_hash_info     hinfo;
1363        struct dx_frame frames[2], *frame;
1364        struct buffer_head *bh;
1365        ext4_lblk_t block;
1366        int retval;
1367
1368        if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
1369                return NULL;
1370        do {
1371                block = dx_get_block(frame->at);
1372                bh = ext4_read_dirblock(dir, block, DIRENT);
1373                if (IS_ERR(bh)) {
1374                        *err = PTR_ERR(bh);
1375                        goto errout;
1376                }
1377                retval = search_dirblock(bh, dir, d_name,
1378                                         block << EXT4_BLOCK_SIZE_BITS(sb),
1379                                         res_dir);
1380                if (retval == 1) {      /* Success! */
1381                        dx_release(frames);
1382                        return bh;
1383                }
1384                brelse(bh);
1385                if (retval == -1) {
1386                        *err = ERR_BAD_DX_DIR;
1387                        goto errout;
1388                }
1389
1390                /* Check to see if we should continue to search */
1391                retval = ext4_htree_next_block(dir, hinfo.hash, frame,
1392                                               frames, NULL);
1393                if (retval < 0) {
1394                        ext4_warning(sb,
1395                             "error reading index page in directory #%lu",
1396                             dir->i_ino);
1397                        *err = retval;
1398                        goto errout;
1399                }
1400        } while (retval == 1);
1401
1402        *err = -ENOENT;
1403errout:
1404        dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name));
1405        dx_release (frames);
1406        return NULL;
1407}
1408
1409static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
1410{
1411        struct inode *inode;
1412        struct ext4_dir_entry_2 *de;
1413        struct buffer_head *bh;
1414
1415        if (dentry->d_name.len > EXT4_NAME_LEN)
1416                return ERR_PTR(-ENAMETOOLONG);
1417
1418        bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
1419        inode = NULL;
1420        if (bh) {
1421                __u32 ino = le32_to_cpu(de->inode);
1422                brelse(bh);
1423                if (!ext4_valid_inum(dir->i_sb, ino)) {
1424                        EXT4_ERROR_INODE(dir, "bad inode number: %u", ino);
1425                        return ERR_PTR(-EIO);
1426                }
1427                if (unlikely(ino == dir->i_ino)) {
1428                        EXT4_ERROR_INODE(dir, "'%.*s' linked to parent dir",
1429                                         dentry->d_name.len,
1430                                         dentry->d_name.name);
1431                        return ERR_PTR(-EIO);
1432                }
1433                inode = ext4_iget(dir->i_sb, ino);
1434                if (inode == ERR_PTR(-ESTALE)) {
1435                        EXT4_ERROR_INODE(dir,
1436                                         "deleted inode referenced: %u",
1437                                         ino);
1438                        return ERR_PTR(-EIO);
1439                }
1440        }
1441        return d_splice_alias(inode, dentry);
1442}
1443
1444
1445struct dentry *ext4_get_parent(struct dentry *child)
1446{
1447        __u32 ino;
1448        static const struct qstr dotdot = QSTR_INIT("..", 2);
1449        struct ext4_dir_entry_2 * de;
1450        struct buffer_head *bh;
1451
1452        bh = ext4_find_entry(child->d_inode, &dotdot, &de, NULL);
1453        if (!bh)
1454                return ERR_PTR(-ENOENT);
1455        ino = le32_to_cpu(de->inode);
1456        brelse(bh);
1457
1458        if (!ext4_valid_inum(child->d_inode->i_sb, ino)) {
1459                EXT4_ERROR_INODE(child->d_inode,
1460                                 "bad parent inode number: %u", ino);
1461                return ERR_PTR(-EIO);
1462        }
1463
1464        return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino));
1465}
1466
1467/*
1468 * Move count entries from end of map between two memory locations.
1469 * Returns pointer to last entry moved.
1470 */
1471static struct ext4_dir_entry_2 *
1472dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
1473                unsigned blocksize)
1474{
1475        unsigned rec_len = 0;
1476
1477        while (count--) {
1478                struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
1479                                                (from + (map->offs<<2));
1480                rec_len = EXT4_DIR_REC_LEN(de->name_len);
1481                memcpy (to, de, rec_len);
1482                ((struct ext4_dir_entry_2 *) to)->rec_len =
1483                                ext4_rec_len_to_disk(rec_len, blocksize);
1484                de->inode = 0;
1485                map++;
1486                to += rec_len;
1487        }
1488        return (struct ext4_dir_entry_2 *) (to - rec_len);
1489}
1490
1491/*
1492 * Compact each dir entry in the range to the minimal rec_len.
1493 * Returns pointer to last entry in range.
1494 */
1495static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize)
1496{
1497        struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
1498        unsigned rec_len = 0;
1499
1500        prev = to = de;
1501        while ((char*)de < base + blocksize) {
1502                next = ext4_next_entry(de, blocksize);
1503                if (de->inode && de->name_len) {
1504                        rec_len = EXT4_DIR_REC_LEN(de->name_len);
1505                        if (de > to)
1506                                memmove(to, de, rec_len);
1507                        to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
1508                        prev = to;
1509                        to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len);
1510                }
1511                de = next;
1512        }
1513        return prev;
1514}
1515
1516/*
1517 * Split a full leaf block to make room for a new dir entry.
1518 * Allocate a new block, and move entries so that they are approx. equally full.
1519 * Returns pointer to de in block into which the new entry will be inserted.
1520 */
1521static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1522                        struct buffer_head **bh,struct dx_frame *frame,
1523                        struct dx_hash_info *hinfo, int *error)
1524{
1525        unsigned blocksize = dir->i_sb->s_blocksize;
1526        unsigned count, continued;
1527        struct buffer_head *bh2;
1528        ext4_lblk_t newblock;
1529        u32 hash2;
1530        struct dx_map_entry *map;
1531        char *data1 = (*bh)->b_data, *data2;
1532        unsigned split, move, size;
1533        struct ext4_dir_entry_2 *de = NULL, *de2;
1534        struct ext4_dir_entry_tail *t;
1535        int     csum_size = 0;
1536        int     err = 0, i;
1537
1538        if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
1539                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1540                csum_size = sizeof(struct ext4_dir_entry_tail);
1541
1542        bh2 = ext4_append(handle, dir, &newblock);
1543        if (IS_ERR(bh2)) {
1544                brelse(*bh);
1545                *bh = NULL;
1546                *error = PTR_ERR(bh2);
1547                return NULL;
1548        }
1549
1550        BUFFER_TRACE(*bh, "get_write_access");
1551        err = ext4_journal_get_write_access(handle, *bh);
1552        if (err)
1553                goto journal_error;
1554
1555        BUFFER_TRACE(frame->bh, "get_write_access");
1556        err = ext4_journal_get_write_access(handle, frame->bh);
1557        if (err)
1558                goto journal_error;
1559
1560        data2 = bh2->b_data;
1561
1562        /* create map in the end of data2 block */
1563        map = (struct dx_map_entry *) (data2 + blocksize);
1564        count = dx_make_map((struct ext4_dir_entry_2 *) data1,
1565                             blocksize, hinfo, map);
1566        map -= count;
1567        dx_sort_map(map, count);
1568        /* Split the existing block in the middle, size-wise */
1569        size = 0;
1570        move = 0;
1571        for (i = count-1; i >= 0; i--) {
1572                /* is more than half of this entry in 2nd half of the block? */
1573                if (size + map[i].size/2 > blocksize/2)
1574                        break;
1575                size += map[i].size;
1576                move++;
1577        }
1578        /* map index at which we will split */
1579        split = count - move;
1580        hash2 = map[split].hash;
1581        continued = hash2 == map[split - 1].hash;
1582        dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n",
1583                        (unsigned long)dx_get_block(frame->at),
1584                                        hash2, split, count-split));
1585
1586        /* Fancy dance to stay within two buffers */
1587        de2 = dx_move_dirents(data1, data2, map + split, count - split, blocksize);
1588        de = dx_pack_dirents(data1, blocksize);
1589        de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
1590                                           (char *) de,
1591                                           blocksize);
1592        de2->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) -
1593                                            (char *) de2,
1594                                            blocksize);
1595        if (csum_size) {
1596                t = EXT4_DIRENT_TAIL(data2, blocksize);
1597                initialize_dirent_tail(t, blocksize);
1598
1599                t = EXT4_DIRENT_TAIL(data1, blocksize);
1600                initialize_dirent_tail(t, blocksize);
1601        }
1602
1603        dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
1604        dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));
1605
1606        /* Which block gets the new entry? */
1607        if (hinfo->hash >= hash2)
1608        {
1609                swap(*bh, bh2);
1610                de = de2;
1611        }
1612        dx_insert_block(frame, hash2 + continued, newblock);
1613        err = ext4_handle_dirty_dirent_node(handle, dir, bh2);
1614        if (err)
1615                goto journal_error;
1616        err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
1617        if (err)
1618                goto journal_error;
1619        brelse(bh2);
1620        dxtrace(dx_show_index("frame", frame->entries));
1621        return de;
1622
1623journal_error:
1624        brelse(*bh);
1625        brelse(bh2);
1626        *bh = NULL;
1627        ext4_std_error(dir->i_sb, err);
1628        *error = err;
1629        return NULL;
1630}
1631
1632int ext4_find_dest_de(struct inode *dir, struct inode *inode,
1633                      struct buffer_head *bh,
1634                      void *buf, int buf_size,
1635                      const char *name, int namelen,
1636                      struct ext4_dir_entry_2 **dest_de)
1637{
1638        struct ext4_dir_entry_2 *de;
1639        unsigned short reclen = EXT4_DIR_REC_LEN(namelen);
1640        int nlen, rlen;
1641        unsigned int offset = 0;
1642        char *top;
1643
1644        de = (struct ext4_dir_entry_2 *)buf;
1645        top = buf + buf_size - reclen;
1646        while ((char *) de <= top) {
1647                if (ext4_check_dir_entry(dir, NULL, de, bh,
1648                                         buf, buf_size, offset))
1649                        return -EIO;
1650                if (ext4_match(namelen, name, de))
1651                        return -EEXIST;
1652                nlen = EXT4_DIR_REC_LEN(de->name_len);
1653                rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
1654                if ((de->inode ? rlen - nlen : rlen) >= reclen)
1655                        break;
1656                de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
1657                offset += rlen;
1658        }
1659        if ((char *) de > top)
1660                return -ENOSPC;
1661
1662        *dest_de = de;
1663        return 0;
1664}
1665
1666void ext4_insert_dentry(struct inode *inode,
1667                        struct ext4_dir_entry_2 *de,
1668                        int buf_size,
1669                        const char *name, int namelen)
1670{
1671
1672        int nlen, rlen;
1673
1674        nlen = EXT4_DIR_REC_LEN(de->name_len);
1675        rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
1676        if (de->inode) {
1677                struct ext4_dir_entry_2 *de1 =
1678                                (struct ext4_dir_entry_2 *)((char *)de + nlen);
1679                de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, buf_size);
1680                de->rec_len = ext4_rec_len_to_disk(nlen, buf_size);
1681                de = de1;
1682        }
1683        de->file_type = EXT4_FT_UNKNOWN;
1684        de->inode = cpu_to_le32(inode->i_ino);
1685        ext4_set_de_type(inode->i_sb, de, inode->i_mode);
1686        de->name_len = namelen;
1687        memcpy(de->name, name, namelen);
1688}
1689/*
1690 * Add a new entry into a directory (leaf) block.  If de is non-NULL,
1691 * it points to a directory entry which is guaranteed to be large
1692 * enough for new directory entry.  If de is NULL, then
1693 * add_dirent_to_buf will attempt search the directory block for
1694 * space.  It will return -ENOSPC if no space is available, and -EIO
1695 * and -EEXIST if directory entry already exists.
1696 */
1697static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1698                             struct inode *inode, struct ext4_dir_entry_2 *de,
1699                             struct buffer_head *bh)
1700{
1701        struct inode    *dir = dentry->d_parent->d_inode;
1702        const char      *name = dentry->d_name.name;
1703        int             namelen = dentry->d_name.len;
1704        unsigned int    blocksize = dir->i_sb->s_blocksize;
1705        int             csum_size = 0;
1706        int             err;
1707
1708        if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
1709                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1710                csum_size = sizeof(struct ext4_dir_entry_tail);
1711
1712        if (!de) {
1713                err = ext4_find_dest_de(dir, inode,
1714                                        bh, bh->b_data, blocksize - csum_size,
1715                                        name, namelen, &de);
1716                if (err)
1717                        return err;
1718        }
1719        BUFFER_TRACE(bh, "get_write_access");
1720        err = ext4_journal_get_write_access(handle, bh);
1721        if (err) {
1722                ext4_std_error(dir->i_sb, err);
1723                return err;
1724        }
1725
1726        /* By now the buffer is marked for journaling */
1727        ext4_insert_dentry(inode, de, blocksize, name, namelen);
1728
1729        /*
1730         * XXX shouldn't update any times until successful
1731         * completion of syscall, but too many callers depend
1732         * on this.
1733         *
1734         * XXX similarly, too many callers depend on
1735         * ext4_new_inode() setting the times, but error
1736         * recovery deletes the inode, so the worst that can
1737         * happen is that the times are slightly out of date
1738         * and/or different from the directory change time.
1739         */
1740        dir->i_mtime = dir->i_ctime = ext4_current_time(dir);
1741        ext4_update_dx_flag(dir);
1742        dir->i_version++;
1743        ext4_mark_inode_dirty(handle, dir);
1744        BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
1745        err = ext4_handle_dirty_dirent_node(handle, dir, bh);
1746        if (err)
1747                ext4_std_error(dir->i_sb, err);
1748        return 0;
1749}
1750
1751/*
1752 * This converts a one block unindexed directory to a 3 block indexed
1753 * directory, and adds the dentry to the indexed directory.
1754 */
1755static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1756                            struct inode *inode, struct buffer_head *bh)
1757{
1758        struct inode    *dir = dentry->d_parent->d_inode;
1759        const char      *name = dentry->d_name.name;
1760        int             namelen = dentry->d_name.len;
1761        struct buffer_head *bh2;
1762        struct dx_root  *root;
1763        struct dx_frame frames[2], *frame;
1764        struct dx_entry *entries;
1765        struct ext4_dir_entry_2 *de, *de2;
1766        struct ext4_dir_entry_tail *t;
1767        char            *data1, *top;
1768        unsigned        len;
1769        int             retval;
1770        unsigned        blocksize;
1771        struct dx_hash_info hinfo;
1772        ext4_lblk_t  block;
1773        struct fake_dirent *fde;
1774        int             csum_size = 0;
1775
1776        if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
1777                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1778                csum_size = sizeof(struct ext4_dir_entry_tail);
1779
1780        blocksize =  dir->i_sb->s_blocksize;
1781        dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino));
1782        retval = ext4_journal_get_write_access(handle, bh);
1783        if (retval) {
1784                ext4_std_error(dir->i_sb, retval);
1785                brelse(bh);
1786                return retval;
1787        }
1788        root = (struct dx_root *) bh->b_data;
1789
1790        /* The 0th block becomes the root, move the dirents out */
1791        fde = &root->dotdot;
1792        de = (struct ext4_dir_entry_2 *)((char *)fde +
1793                ext4_rec_len_from_disk(fde->rec_len, blocksize));
1794        if ((char *) de >= (((char *) root) + blocksize)) {
1795                EXT4_ERROR_INODE(dir, "invalid rec_len for '..'");
1796                brelse(bh);
1797                return -EIO;
1798        }
1799        len = ((char *) root) + (blocksize - csum_size) - (char *) de;
1800
1801        /* Allocate new block for the 0th block's dirents */
1802        bh2 = ext4_append(handle, dir, &block);
1803        if (IS_ERR(bh2)) {
1804                brelse(bh);
1805                return PTR_ERR(bh2);
1806        }
1807        ext4_set_inode_flag(dir, EXT4_INODE_INDEX);
1808        data1 = bh2->b_data;
1809
1810        memcpy (data1, de, len);
1811        de = (struct ext4_dir_entry_2 *) data1;
1812        top = data1 + len;
1813        while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
1814                de = de2;
1815        de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
1816                                           (char *) de,
1817                                           blocksize);
1818
1819        if (csum_size) {
1820                t = EXT4_DIRENT_TAIL(data1, blocksize);
1821                initialize_dirent_tail(t, blocksize);
1822        }
1823
1824        /* Initialize the root; the dot dirents already exist */
1825        de = (struct ext4_dir_entry_2 *) (&root->dotdot);
1826        de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2),
1827                                           blocksize);
1828        memset (&root->info, 0, sizeof(root->info));
1829        root->info.info_length = sizeof(root->info);
1830        root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
1831        entries = root->entries;
1832        dx_set_block(entries, 1);
1833        dx_set_count(entries, 1);
1834        dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
1835
1836        /* Initialize as for dx_probe */
1837        hinfo.hash_version = root->info.hash_version;
1838        if (hinfo.hash_version <= DX_HASH_TEA)
1839                hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
1840        hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
1841        ext4fs_dirhash(name, namelen, &hinfo);
1842        frame = frames;
1843        frame->entries = entries;
1844        frame->at = entries;
1845        frame->bh = bh;
1846        bh = bh2;
1847
1848        ext4_handle_dirty_dx_node(handle, dir, frame->bh);
1849        ext4_handle_dirty_dirent_node(handle, dir, bh);
1850
1851        de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
1852        if (!de) {
1853                /*
1854                 * Even if the block split failed, we have to properly write
1855                 * out all the changes we did so far. Otherwise we can end up
1856                 * with corrupted filesystem.
1857                 */
1858                ext4_mark_inode_dirty(handle, dir);
1859                dx_release(frames);
1860                return retval;
1861        }
1862        dx_release(frames);
1863
1864        retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
1865        brelse(bh);
1866        return retval;
1867}
1868
1869/*
1870 *      ext4_add_entry()
1871 *
1872 * adds a file entry to the specified directory, using the same
1873 * semantics as ext4_find_entry(). It returns NULL if it failed.
1874 *
1875 * NOTE!! The inode part of 'de' is left at 0 - which means you
1876 * may not sleep between calling this and putting something into
1877 * the entry, as someone else might have used it while you slept.
1878 */
1879static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1880                          struct inode *inode)
1881{
1882        struct inode *dir = dentry->d_parent->d_inode;
1883        struct buffer_head *bh;
1884        struct ext4_dir_entry_2 *de;
1885        struct ext4_dir_entry_tail *t;
1886        struct super_block *sb;
1887        int     retval;
1888        int     dx_fallback=0;
1889        unsigned blocksize;
1890        ext4_lblk_t block, blocks;
1891        int     csum_size = 0;
1892
1893        if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
1894                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1895                csum_size = sizeof(struct ext4_dir_entry_tail);
1896
1897        sb = dir->i_sb;
1898        blocksize = sb->s_blocksize;
1899        if (!dentry->d_name.len)
1900                return -EINVAL;
1901
1902        if (ext4_has_inline_data(dir)) {
1903                retval = ext4_try_add_inline_entry(handle, dentry, inode);
1904                if (retval < 0)
1905                        return retval;
1906                if (retval == 1) {
1907                        retval = 0;
1908                        return retval;
1909                }
1910        }
1911
1912        if (is_dx(dir)) {
1913                retval = ext4_dx_add_entry(handle, dentry, inode);
1914                if (!retval || (retval != ERR_BAD_DX_DIR))
1915                        return retval;
1916                ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
1917                dx_fallback++;
1918                ext4_mark_inode_dirty(handle, dir);
1919        }
1920        blocks = dir->i_size >> sb->s_blocksize_bits;
1921        for (block = 0; block < blocks; block++) {
1922                bh = ext4_read_dirblock(dir, block, DIRENT);
1923                if (IS_ERR(bh))
1924                        return PTR_ERR(bh);
1925
1926                retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1927                if (retval != -ENOSPC) {
1928                        brelse(bh);
1929                        return retval;
1930                }
1931
1932                if (blocks == 1 && !dx_fallback &&
1933                    EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
1934                        return make_indexed_dir(handle, dentry, inode, bh);
1935                brelse(bh);
1936        }
1937        bh = ext4_append(handle, dir, &block);
1938        if (IS_ERR(bh))
1939                return PTR_ERR(bh);
1940        de = (struct ext4_dir_entry_2 *) bh->b_data;
1941        de->inode = 0;
1942        de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize);
1943
1944        if (csum_size) {
1945                t = EXT4_DIRENT_TAIL(bh->b_data, blocksize);
1946                initialize_dirent_tail(t, blocksize);
1947        }
1948
1949        retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
1950        brelse(bh);
1951        if (retval == 0)
1952                ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY);
1953        return retval;
1954}
1955
1956/*
1957 * Returns 0 for success, or a negative error value
1958 */
1959static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1960                             struct inode *inode)
1961{
1962        struct dx_frame frames[2], *frame;
1963        struct dx_entry *entries, *at;
1964        struct dx_hash_info hinfo;
1965        struct buffer_head *bh;
1966        struct inode *dir = dentry->d_parent->d_inode;
1967        struct super_block *sb = dir->i_sb;
1968        struct ext4_dir_entry_2 *de;
1969        int err;
1970
1971        frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
1972        if (!frame)
1973                return err;
1974        entries = frame->entries;
1975        at = frame->at;
1976        bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT);
1977        if (IS_ERR(bh)) {
1978                err = PTR_ERR(bh);
1979                bh = NULL;
1980                goto cleanup;
1981        }
1982
1983        BUFFER_TRACE(bh, "get_write_access");
1984        err = ext4_journal_get_write_access(handle, bh);
1985        if (err)
1986                goto journal_error;
1987
1988        err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1989        if (err != -ENOSPC)
1990                goto cleanup;
1991
1992        /* Block full, should compress but for now just split */
1993        dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
1994                       dx_get_count(entries), dx_get_limit(entries)));
1995        /* Need to split index? */
1996        if (dx_get_count(entries) == dx_get_limit(entries)) {
1997                ext4_lblk_t newblock;
1998                unsigned icount = dx_get_count(entries);
1999                int levels = frame - frames;
2000                struct dx_entry *entries2;
2001                struct dx_node *node2;
2002                struct buffer_head *bh2;
2003
2004                if (levels && (dx_get_count(frames->entries) ==
2005                               dx_get_limit(frames->entries))) {
2006                        ext4_warning(sb, "Directory index full!");
2007                        err = -ENOSPC;
2008                        goto cleanup;
2009                }
2010                bh2 = ext4_append(handle, dir, &newblock);
2011                if (IS_ERR(bh2)) {
2012                        err = PTR_ERR(bh2);
2013                        goto cleanup;
2014                }
2015                node2 = (struct dx_node *)(bh2->b_data);
2016                entries2 = node2->entries;
2017                memset(&node2->fake, 0, sizeof(struct fake_dirent));
2018                node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize,
2019                                                           sb->s_blocksize);
2020                BUFFER_TRACE(frame->bh, "get_write_access");
2021                err = ext4_journal_get_write_access(handle, frame->bh);
2022                if (err)
2023                        goto journal_error;
2024                if (levels) {
2025                        unsigned icount1 = icount/2, icount2 = icount - icount1;
2026                        unsigned hash2 = dx_get_hash(entries + icount1);
2027                        dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
2028                                       icount1, icount2));
2029
2030                        BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
2031                        err = ext4_journal_get_write_access(handle,
2032                                                             frames[0].bh);
2033                        if (err)
2034                                goto journal_error;
2035
2036                        memcpy((char *) entries2, (char *) (entries + icount1),
2037                               icount2 * sizeof(struct dx_entry));
2038                        dx_set_count(entries, icount1);
2039                        dx_set_count(entries2, icount2);
2040                        dx_set_limit(entries2, dx_node_limit(dir));
2041
2042                        /* Which index block gets the new entry? */
2043                        if (at - entries >= icount1) {
2044                                frame->at = at = at - entries - icount1 + entries2;
2045                                frame->entries = entries = entries2;
2046                                swap(frame->bh, bh2);
2047                        }
2048                        dx_insert_block(frames + 0, hash2, newblock);
2049                        dxtrace(dx_show_index("node", frames[1].entries));
2050                        dxtrace(dx_show_index("node",
2051                               ((struct dx_node *) bh2->b_data)->entries));
2052                        err = ext4_handle_dirty_dx_node(handle, dir, bh2);
2053                        if (err)
2054                                goto journal_error;
2055                        brelse (bh2);
2056                } else {
2057                        dxtrace(printk(KERN_DEBUG
2058                                       "Creating second level index...\n"));
2059                        memcpy((char *) entries2, (char *) entries,
2060                               icount * sizeof(struct dx_entry));
2061                        dx_set_limit(entries2, dx_node_limit(dir));
2062
2063                        /* Set up root */
2064                        dx_set_count(entries, 1);
2065                        dx_set_block(entries + 0, newblock);
2066                        ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
2067
2068                        /* Add new access path frame */
2069                        frame = frames + 1;
2070                        frame->at = at = at - entries + entries2;
2071                        frame->entries = entries = entries2;
2072                        frame->bh = bh2;
2073                        err = ext4_journal_get_write_access(handle,
2074                                                             frame->bh);
2075                        if (err)
2076                                goto journal_error;
2077                }
2078                err = ext4_handle_dirty_dx_node(handle, dir, frames[0].bh);
2079                if (err) {
2080                        ext4_std_error(inode->i_sb, err);
2081                        goto cleanup;
2082                }
2083        }
2084        de = do_split(handle, dir, &bh, frame, &hinfo, &err);
2085        if (!de)
2086                goto cleanup;
2087        err = add_dirent_to_buf(handle, dentry, inode, de, bh);
2088        goto cleanup;
2089
2090journal_error:
2091        ext4_std_error(dir->i_sb, err);
2092cleanup:
2093        brelse(bh);
2094        dx_release(frames);
2095        return err;
2096}
2097
2098/*
2099 * ext4_generic_delete_entry deletes a directory entry by merging it
2100 * with the previous entry
2101 */
2102int ext4_generic_delete_entry(handle_t *handle,
2103                              struct inode *dir,
2104                              struct ext4_dir_entry_2 *de_del,
2105                              struct buffer_head *bh,
2106                              void *entry_buf,
2107                              int buf_size,
2108                              int csum_size)
2109{
2110        struct ext4_dir_entry_2 *de, *pde;
2111        unsigned int blocksize = dir->i_sb->s_blocksize;
2112        int i;
2113
2114        i = 0;
2115        pde = NULL;
2116        de = (struct ext4_dir_entry_2 *)entry_buf;
2117        while (i < buf_size - csum_size) {
2118                if (ext4_check_dir_entry(dir, NULL, de, bh,
2119                                         bh->b_data, bh->b_size, i))
2120                        return -EIO;
2121                if (de == de_del)  {
2122                        if (pde)
2123                                pde->rec_len = ext4_rec_len_to_disk(
2124                                        ext4_rec_len_from_disk(pde->rec_len,
2125                                                               blocksize) +
2126                                        ext4_rec_len_from_disk(de->rec_len,
2127                                                               blocksize),
2128                                        blocksize);
2129                        else
2130                                de->inode = 0;
2131                        dir->i_version++;
2132                        return 0;
2133                }
2134                i += ext4_rec_len_from_disk(de->rec_len, blocksize);
2135                pde = de;
2136                de = ext4_next_entry(de, blocksize);
2137        }
2138        return -ENOENT;
2139}
2140
2141static int ext4_delete_entry(handle_t *handle,
2142                             struct inode *dir,
2143                             struct ext4_dir_entry_2 *de_del,
2144                             struct buffer_head *bh)
2145{
2146        int err, csum_size = 0;
2147
2148        if (ext4_has_inline_data(dir)) {
2149                int has_inline_data = 1;
2150                err = ext4_delete_inline_entry(handle, dir, de_del, bh,
2151                                               &has_inline_data);
2152                if (has_inline_data)
2153                        return err;
2154        }
2155
2156        if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
2157                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
2158                csum_size = sizeof(struct ext4_dir_entry_tail);
2159
2160        BUFFER_TRACE(bh, "get_write_access");
2161        err = ext4_journal_get_write_access(handle, bh);
2162        if (unlikely(err))
2163                goto out;
2164
2165        err = ext4_generic_delete_entry(handle, dir, de_del,
2166                                        bh, bh->b_data,
2167                                        dir->i_sb->s_blocksize, csum_size);
2168        if (err)
2169                goto out;
2170
2171        BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
2172        err = ext4_handle_dirty_dirent_node(handle, dir, bh);
2173        if (unlikely(err))
2174                goto out;
2175
2176        return 0;
2177out:
2178        if (err != -ENOENT)
2179                ext4_std_error(dir->i_sb, err);
2180        return err;
2181}
2182
2183/*
2184 * DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2,
2185 * since this indicates that nlinks count was previously 1.
2186 */
2187static void ext4_inc_count(handle_t *handle, struct inode *inode)
2188{
2189        inc_nlink(inode);
2190        if (is_dx(inode) && inode->i_nlink > 1) {
2191                /* limit is 16-bit i_links_count */
2192                if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) {
2193                        set_nlink(inode, 1);
2194                        EXT4_SET_RO_COMPAT_FEATURE(inode->i_sb,
2195                                              EXT4_FEATURE_RO_COMPAT_DIR_NLINK);
2196                }
2197        }
2198}
2199
2200/*
2201 * If a directory had nlink == 1, then we should let it be 1. This indicates
2202 * directory has >EXT4_LINK_MAX subdirs.
2203 */
2204static void ext4_dec_count(handle_t *handle, struct inode *inode)
2205{
2206        if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
2207                drop_nlink(inode);
2208}
2209
2210
2211static int ext4_add_nondir(handle_t *handle,
2212                struct dentry *dentry, struct inode *inode)
2213{
2214        int err = ext4_add_entry(handle, dentry, inode);
2215        if (!err) {
2216                ext4_mark_inode_dirty(handle, inode);
2217                unlock_new_inode(inode);
2218                d_instantiate(dentry, inode);
2219                return 0;
2220        }
2221        drop_nlink(inode);
2222        unlock_new_inode(inode);
2223        iput(inode);
2224        return err;
2225}
2226
2227/*
2228 * By the time this is called, we already have created
2229 * the directory cache entry for the new file, but it
2230 * is so far negative - it has no inode.
2231 *
2232 * If the create succeeds, we fill in the inode information
2233 * with d_instantiate().
2234 */
2235static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
2236                       bool excl)
2237{
2238        handle_t *handle;
2239        struct inode *inode;
2240        int err, credits, retries = 0;
2241
2242        dquot_initialize(dir);
2243
2244        credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2245                   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
2246retry:
2247        inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0,
2248                                            NULL, EXT4_HT_DIR, credits);
2249        handle = ext4_journal_current_handle();
2250        err = PTR_ERR(inode);
2251        if (!IS_ERR(inode)) {
2252                inode->i_op = &ext4_file_inode_operations;
2253                inode->i_fop = &ext4_file_operations;
2254                ext4_set_aops(inode);
2255                err = ext4_add_nondir(handle, dentry, inode);
2256                if (!err && IS_DIRSYNC(dir))
2257                        ext4_handle_sync(handle);
2258        }
2259        if (handle)
2260                ext4_journal_stop(handle);
2261        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2262                goto retry;
2263        return err;
2264}
2265
2266static int ext4_mknod(struct inode *dir, struct dentry *dentry,
2267                      umode_t mode, dev_t rdev)
2268{
2269        handle_t *handle;
2270        struct inode *inode;
2271        int err, credits, retries = 0;
2272
2273        if (!new_valid_dev(rdev))
2274                return -EINVAL;
2275
2276        dquot_initialize(dir);
2277
2278        credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2279                   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
2280retry:
2281        inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0,
2282                                            NULL, EXT4_HT_DIR, credits);
2283        handle = ext4_journal_current_handle();
2284        err = PTR_ERR(inode);
2285        if (!IS_ERR(inode)) {
2286                init_special_inode(inode, inode->i_mode, rdev);
2287                inode->i_op = &ext4_special_inode_operations;
2288                err = ext4_add_nondir(handle, dentry, inode);
2289                if (!err && IS_DIRSYNC(dir))
2290                        ext4_handle_sync(handle);
2291        }
2292        if (handle)
2293                ext4_journal_stop(handle);
2294        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2295                goto retry;
2296        return err;
2297}
2298
2299static int ext4_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
2300{
2301        handle_t *handle;
2302        struct inode *inode;
2303        int err, retries = 0;
2304
2305        dquot_initialize(dir);
2306
2307retry:
2308        inode = ext4_new_inode_start_handle(dir, mode,
2309                                            NULL, 0, NULL,
2310                                            EXT4_HT_DIR,
2311                        EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
2312                          4 + EXT4_XATTR_TRANS_BLOCKS);
2313        handle = ext4_journal_current_handle();
2314        err = PTR_ERR(inode);
2315        if (!IS_ERR(inode)) {
2316                inode->i_op = &ext4_file_inode_operations;
2317                inode->i_fop = &ext4_file_operations;
2318                ext4_set_aops(inode);
2319                d_tmpfile(dentry, inode);
2320                err = ext4_orphan_add(handle, inode);
2321                if (err)
2322                        goto err_drop_inode;
2323                mark_inode_dirty(inode);
2324                unlock_new_inode(inode);
2325        }
2326        if (handle)
2327                ext4_journal_stop(handle);
2328        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2329                goto retry;
2330        return err;
2331err_drop_inode:
2332        ext4_journal_stop(handle);
2333        unlock_new_inode(inode);
2334        iput(inode);
2335        return err;
2336}
2337
2338struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
2339                          struct ext4_dir_entry_2 *de,
2340                          int blocksize, int csum_size,
2341                          unsigned int parent_ino, int dotdot_real_len)
2342{
2343        de->inode = cpu_to_le32(inode->i_ino);
2344        de->name_len = 1;
2345        de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
2346                                           blocksize);
2347        strcpy(de->name, ".");
2348        ext4_set_de_type(inode->i_sb, de, S_IFDIR);
2349
2350        de = ext4_next_entry(de, blocksize);
2351        de->inode = cpu_to_le32(parent_ino);
2352        de->name_len = 2;
2353        if (!dotdot_real_len)
2354                de->rec_len = ext4_rec_len_to_disk(blocksize -
2355                                        (csum_size + EXT4_DIR_REC_LEN(1)),
2356                                        blocksize);
2357        else
2358                de->rec_len = ext4_rec_len_to_disk(
2359                                EXT4_DIR_REC_LEN(de->name_len), blocksize);
2360        strcpy(de->name, "..");
2361        ext4_set_de_type(inode->i_sb, de, S_IFDIR);
2362
2363        return ext4_next_entry(de, blocksize);
2364}
2365
2366static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
2367                             struct inode *inode)
2368{
2369        struct buffer_head *dir_block = NULL;
2370        struct ext4_dir_entry_2 *de;
2371        struct ext4_dir_entry_tail *t;
2372        ext4_lblk_t block = 0;
2373        unsigned int blocksize = dir->i_sb->s_blocksize;
2374        int csum_size = 0;
2375        int err;
2376
2377        if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
2378                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
2379                csum_size = sizeof(struct ext4_dir_entry_tail);
2380
2381        if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
2382                err = ext4_try_create_inline_dir(handle, dir, inode);
2383                if (err < 0 && err != -ENOSPC)
2384                        goto out;
2385                if (!err)
2386                        goto out;
2387        }
2388
2389        inode->i_size = 0;
2390        dir_block = ext4_append(handle, inode, &block);
2391        if (IS_ERR(dir_block))
2392                return PTR_ERR(dir_block);
2393        BUFFER_TRACE(dir_block, "get_write_access");
2394        err = ext4_journal_get_write_access(handle, dir_block);
2395        if (err)
2396                goto out;
2397        de = (struct ext4_dir_entry_2 *)dir_block->b_data;
2398        ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0);
2399        set_nlink(inode, 2);
2400        if (csum_size) {
2401                t = EXT4_DIRENT_TAIL(dir_block->b_data, blocksize);
2402                initialize_dirent_tail(t, blocksize);
2403        }
2404
2405        BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
2406        err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
2407        if (err)
2408                goto out;
2409        set_buffer_verified(dir_block);
2410out:
2411        brelse(dir_block);
2412        return err;
2413}
2414
2415static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
2416{
2417        handle_t *handle;
2418        struct inode *inode;
2419        int err, credits, retries = 0;
2420
2421        if (EXT4_DIR_LINK_MAX(dir))
2422                return -EMLINK;
2423
2424        dquot_initialize(dir);
2425
2426        credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2427                   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
2428retry:
2429        inode = ext4_new_inode_start_handle(dir, S_IFDIR | mode,
2430                                            &dentry->d_name,
2431                                            0, NULL, EXT4_HT_DIR, credits);
2432        handle = ext4_journal_current_handle();
2433        err = PTR_ERR(inode);
2434        if (IS_ERR(inode))
2435                goto out_stop;
2436
2437        inode->i_op = &ext4_dir_inode_operations;
2438        inode->i_fop = &ext4_dir_operations;
2439        err = ext4_init_new_dir(handle, dir, inode);
2440        if (err)
2441                goto out_clear_inode;
2442        err = ext4_mark_inode_dirty(handle, inode);
2443        if (!err)
2444                err = ext4_add_entry(handle, dentry, inode);
2445        if (err) {
2446out_clear_inode:
2447                clear_nlink(inode);
2448                unlock_new_inode(inode);
2449                ext4_mark_inode_dirty(handle, inode);
2450                iput(inode);
2451                goto out_stop;
2452        }
2453        ext4_inc_count(handle, dir);
2454        ext4_update_dx_flag(dir);
2455        err = ext4_mark_inode_dirty(handle, dir);
2456        if (err)
2457                goto out_clear_inode;
2458        unlock_new_inode(inode);
2459        d_instantiate(dentry, inode);
2460        if (IS_DIRSYNC(dir))
2461                ext4_handle_sync(handle);
2462
2463out_stop:
2464        if (handle)
2465                ext4_journal_stop(handle);
2466        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2467                goto retry;
2468        return err;
2469}
2470
2471/*
2472 * routine to check that the specified directory is empty (for rmdir)
2473 */
2474static int empty_dir(struct inode *inode)
2475{
2476        unsigned int offset;
2477        struct buffer_head *bh;
2478        struct ext4_dir_entry_2 *de, *de1;
2479        struct super_block *sb;
2480        int err = 0;
2481
2482        if (ext4_has_inline_data(inode)) {
2483                int has_inline_data = 1;
2484
2485                err = empty_inline_dir(inode, &has_inline_data);
2486                if (has_inline_data)
2487                        return err;
2488        }
2489
2490        sb = inode->i_sb;
2491        if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2)) {
2492                EXT4_ERROR_INODE(inode, "invalid size");
2493                return 1;
2494        }
2495        bh = ext4_read_dirblock(inode, 0, EITHER);
2496        if (IS_ERR(bh))
2497                return 1;
2498
2499        de = (struct ext4_dir_entry_2 *) bh->b_data;
2500        de1 = ext4_next_entry(de, sb->s_blocksize);
2501        if (le32_to_cpu(de->inode) != inode->i_ino ||
2502                        !le32_to_cpu(de1->inode) ||
2503                        strcmp(".", de->name) ||
2504                        strcmp("..", de1->name)) {
2505                ext4_warning(inode->i_sb,
2506                             "bad directory (dir #%lu) - no `.' or `..'",
2507                             inode->i_ino);
2508                brelse(bh);
2509                return 1;
2510        }
2511        offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) +
2512                 ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize);
2513        de = ext4_next_entry(de1, sb->s_blocksize);
2514        while (offset < inode->i_size) {
2515                if (!bh ||
2516                    (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
2517                        unsigned int lblock;
2518                        err = 0;
2519                        brelse(bh);
2520                        lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb);
2521                        bh = ext4_read_dirblock(inode, lblock, EITHER);
2522                        if (IS_ERR(bh))
2523                                return 1;
2524                        de = (struct ext4_dir_entry_2 *) bh->b_data;
2525                }
2526                if (ext4_check_dir_entry(inode, NULL, de, bh,
2527                                         bh->b_data, bh->b_size, offset)) {
2528                        de = (struct ext4_dir_entry_2 *)(bh->b_data +
2529                                                         sb->s_blocksize);
2530                        offset = (offset | (sb->s_blocksize - 1)) + 1;
2531                        continue;
2532                }
2533                if (le32_to_cpu(de->inode)) {
2534                        brelse(bh);
2535                        return 0;
2536                }
2537                offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
2538                de = ext4_next_entry(de, sb->s_blocksize);
2539        }
2540        brelse(bh);
2541        return 1;
2542}
2543
2544/* ext4_orphan_add() links an unlinked or truncated inode into a list of
2545 * such inodes, starting at the superblock, in case we crash before the
2546 * file is closed/deleted, or in case the inode truncate spans multiple
2547 * transactions and the last transaction is not recovered after a crash.
2548 *
2549 * At filesystem recovery time, we walk this list deleting unlinked
2550 * inodes and truncating linked inodes in ext4_orphan_cleanup().
2551 */
2552int ext4_orphan_add(handle_t *handle, struct inode *inode)
2553{
2554        struct super_block *sb = inode->i_sb;
2555        struct ext4_iloc iloc;
2556        int err = 0, rc;
2557
2558        if (!EXT4_SB(sb)->s_journal)
2559                return 0;
2560
2561        mutex_lock(&EXT4_SB(sb)->s_orphan_lock);
2562        if (!list_empty(&EXT4_I(inode)->i_orphan))
2563                goto out_unlock;
2564
2565        /*
2566         * Orphan handling is only valid for files with data blocks
2567         * being truncated, or files being unlinked. Note that we either
2568         * hold i_mutex, or the inode can not be referenced from outside,
2569         * so i_nlink should not be bumped due to race
2570         */
2571        J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
2572                  S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
2573
2574        BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
2575        err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
2576        if (err)
2577                goto out_unlock;
2578
2579        err = ext4_reserve_inode_write(handle, inode, &iloc);
2580        if (err)
2581                goto out_unlock;
2582        /*
2583         * Due to previous errors inode may be already a part of on-disk
2584         * orphan list. If so skip on-disk list modification.
2585         */
2586        if (NEXT_ORPHAN(inode) && NEXT_ORPHAN(inode) <=
2587                (le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)))
2588                        goto mem_insert;
2589
2590        /* Insert this inode at the head of the on-disk orphan list... */
2591        NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan);
2592        EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
2593        err = ext4_handle_dirty_super(handle, sb);
2594        rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
2595        if (!err)
2596                err = rc;
2597
2598        /* Only add to the head of the in-memory list if all the
2599         * previous operations succeeded.  If the orphan_add is going to
2600         * fail (possibly taking the journal offline), we can't risk
2601         * leaving the inode on the orphan list: stray orphan-list
2602         * entries can cause panics at unmount time.
2603         *
2604         * This is safe: on error we're going to ignore the orphan list
2605         * anyway on the next recovery. */
2606mem_insert:
2607        if (!err)
2608                list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
2609
2610        jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
2611        jbd_debug(4, "orphan inode %lu will point to %d\n",
2612                        inode->i_ino, NEXT_ORPHAN(inode));
2613out_unlock:
2614        mutex_unlock(&EXT4_SB(sb)->s_orphan_lock);
2615        ext4_std_error(inode->i_sb, err);
2616        return err;
2617}
2618
2619/*
2620 * ext4_orphan_del() removes an unlinked or truncated inode from the list
2621 * of such inodes stored on disk, because it is finally being cleaned up.
2622 */
2623int ext4_orphan_del(handle_t *handle, struct inode *inode)
2624{
2625        struct list_head *prev;
2626        struct ext4_inode_info *ei = EXT4_I(inode);
2627        struct ext4_sb_info *sbi;
2628        __u32 ino_next;
2629        struct ext4_iloc iloc;
2630        int err = 0;
2631
2632        if ((!EXT4_SB(inode->i_sb)->s_journal) &&
2633            !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS))
2634                return 0;
2635
2636        mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
2637        if (list_empty(&ei->i_orphan))
2638                goto out;
2639
2640        ino_next = NEXT_ORPHAN(inode);
2641        prev = ei->i_orphan.prev;
2642        sbi = EXT4_SB(inode->i_sb);
2643
2644        jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
2645
2646        list_del_init(&ei->i_orphan);
2647
2648        /* If we're on an error path, we may not have a valid
2649         * transaction handle with which to update the orphan list on
2650         * disk, but we still need to remove the inode from the linked
2651         * list in memory. */
2652        if (!handle)
2653                goto out;
2654
2655        err = ext4_reserve_inode_write(handle, inode, &iloc);
2656        if (err)
2657                goto out_err;
2658
2659        if (prev == &sbi->s_orphan) {
2660                jbd_debug(4, "superblock will point to %u\n", ino_next);
2661                BUFFER_TRACE(sbi->s_sbh, "get_write_access");
2662                err = ext4_journal_get_write_access(handle, sbi->s_sbh);
2663                if (err)
2664                        goto out_brelse;
2665                sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
2666                err = ext4_handle_dirty_super(handle, inode->i_sb);
2667        } else {
2668                struct ext4_iloc iloc2;
2669                struct inode *i_prev =
2670                        &list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode;
2671
2672                jbd_debug(4, "orphan inode %lu will point to %u\n",
2673                          i_prev->i_ino, ino_next);
2674                err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
2675                if (err)
2676                        goto out_brelse;
2677                NEXT_ORPHAN(i_prev) = ino_next;
2678                err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2);
2679        }
2680        if (err)
2681                goto out_brelse;
2682        NEXT_ORPHAN(inode) = 0;
2683        err = ext4_mark_iloc_dirty(handle, inode, &iloc);
2684
2685out_err:
2686        ext4_std_error(inode->i_sb, err);
2687out:
2688        mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
2689        return err;
2690
2691out_brelse:
2692        brelse(iloc.bh);
2693        goto out_err;
2694}
2695
2696static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2697{
2698        int retval;
2699        struct inode *inode;
2700        struct buffer_head *bh;
2701        struct ext4_dir_entry_2 *de;
2702        handle_t *handle = NULL;
2703
2704        /* Initialize quotas before so that eventual writes go in
2705         * separate transaction */
2706        dquot_initialize(dir);
2707        dquot_initialize(dentry->d_inode);
2708
2709        retval = -ENOENT;
2710        bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
2711        if (!bh)
2712                goto end_rmdir;
2713
2714        inode = dentry->d_inode;
2715
2716        retval = -EIO;
2717        if (le32_to_cpu(de->inode) != inode->i_ino)
2718                goto end_rmdir;
2719
2720        retval = -ENOTEMPTY;
2721        if (!empty_dir(inode))
2722                goto end_rmdir;
2723
2724        handle = ext4_journal_start(dir, EXT4_HT_DIR,
2725                                    EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
2726        if (IS_ERR(handle)) {
2727                retval = PTR_ERR(handle);
2728                handle = NULL;
2729                goto end_rmdir;
2730        }
2731
2732        if (IS_DIRSYNC(dir))
2733                ext4_handle_sync(handle);
2734
2735        retval = ext4_delete_entry(handle, dir, de, bh);
2736        if (retval)
2737                goto end_rmdir;
2738        if (!EXT4_DIR_LINK_EMPTY(inode))
2739                ext4_warning(inode->i_sb,
2740                             "empty directory has too many links (%d)",
2741                             inode->i_nlink);
2742        inode->i_version++;
2743        clear_nlink(inode);
2744        /* There's no need to set i_disksize: the fact that i_nlink is
2745         * zero will ensure that the right thing happens during any
2746         * recovery. */
2747        inode->i_size = 0;
2748        ext4_orphan_add(handle, inode);
2749        inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode);
2750        ext4_mark_inode_dirty(handle, inode);
2751        ext4_dec_count(handle, dir);
2752        ext4_update_dx_flag(dir);
2753        ext4_mark_inode_dirty(handle, dir);
2754
2755end_rmdir:
2756        brelse(bh);
2757        if (handle)
2758                ext4_journal_stop(handle);
2759        return retval;
2760}
2761
2762static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2763{
2764        int retval;
2765        struct inode *inode;
2766        struct buffer_head *bh;
2767        struct ext4_dir_entry_2 *de;
2768        handle_t *handle = NULL;
2769
2770        trace_ext4_unlink_enter(dir, dentry);
2771        /* Initialize quotas before so that eventual writes go
2772         * in separate transaction */
2773        dquot_initialize(dir);
2774        dquot_initialize(dentry->d_inode);
2775
2776        retval = -ENOENT;
2777        bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
2778        if (!bh)
2779                goto end_unlink;
2780
2781        inode = dentry->d_inode;
2782
2783        retval = -EIO;
2784        if (le32_to_cpu(de->inode) != inode->i_ino)
2785                goto end_unlink;
2786
2787        handle = ext4_journal_start(dir, EXT4_HT_DIR,
2788                                    EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
2789        if (IS_ERR(handle)) {
2790                retval = PTR_ERR(handle);
2791                handle = NULL;
2792                goto end_unlink;
2793        }
2794
2795        if (IS_DIRSYNC(dir))
2796                ext4_handle_sync(handle);
2797
2798        if (!inode->i_nlink) {
2799                ext4_warning(inode->i_sb,
2800                             "Deleting nonexistent file (%lu), %d",
2801                             inode->i_ino, inode->i_nlink);
2802                set_nlink(inode, 1);
2803        }
2804        retval = ext4_delete_entry(handle, dir, de, bh);
2805        if (retval)
2806                goto end_unlink;
2807        dir->i_ctime = dir->i_mtime = ext4_current_time(dir);
2808        ext4_update_dx_flag(dir);
2809        ext4_mark_inode_dirty(handle, dir);
2810        drop_nlink(inode);
2811        if (!inode->i_nlink)
2812                ext4_orphan_add(handle, inode);
2813        inode->i_ctime = ext4_current_time(inode);
2814        ext4_mark_inode_dirty(handle, inode);
2815        retval = 0;
2816
2817end_unlink:
2818        brelse(bh);
2819        if (handle)
2820                ext4_journal_stop(handle);
2821        trace_ext4_unlink_exit(dentry, retval);
2822        return retval;
2823}
2824
2825static int ext4_symlink(struct inode *dir,
2826                        struct dentry *dentry, const char *symname)
2827{
2828        handle_t *handle;
2829        struct inode *inode;
2830        int l, err, retries = 0;
2831        int credits;
2832
2833        l = strlen(symname)+1;
2834        if (l > dir->i_sb->s_blocksize)
2835                return -ENAMETOOLONG;
2836
2837        dquot_initialize(dir);
2838
2839        if (l > EXT4_N_BLOCKS * 4) {
2840                /*
2841                 * For non-fast symlinks, we just allocate inode and put it on
2842                 * orphan list in the first transaction => we need bitmap,
2843                 * group descriptor, sb, inode block, quota blocks, and
2844                 * possibly selinux xattr blocks.
2845                 */
2846                credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
2847                          EXT4_XATTR_TRANS_BLOCKS;
2848        } else {
2849                /*
2850                 * Fast symlink. We have to add entry to directory
2851                 * (EXT4_DATA_TRANS_BLOCKS + EXT4_INDEX_EXTRA_TRANS_BLOCKS),
2852                 * allocate new inode (bitmap, group descriptor, inode block,
2853                 * quota blocks, sb is already counted in previous macros).
2854                 */
2855                credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2856                          EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3;
2857        }
2858retry:
2859        inode = ext4_new_inode_start_handle(dir, S_IFLNK|S_IRWXUGO,
2860                                            &dentry->d_name, 0, NULL,
2861                                            EXT4_HT_DIR, credits);
2862        handle = ext4_journal_current_handle();
2863        err = PTR_ERR(inode);
2864        if (IS_ERR(inode))
2865                goto out_stop;
2866
2867        if (l > EXT4_N_BLOCKS * 4) {
2868                inode->i_op = &ext4_symlink_inode_operations;
2869                ext4_set_aops(inode);
2870                /*
2871                 * We cannot call page_symlink() with transaction started
2872                 * because it calls into ext4_write_begin() which can wait
2873                 * for transaction commit if we are running out of space
2874                 * and thus we deadlock. So we have to stop transaction now
2875                 * and restart it when symlink contents is written.
2876                 * 
2877                 * To keep fs consistent in case of crash, we have to put inode
2878                 * to orphan list in the mean time.
2879                 */
2880                drop_nlink(inode);
2881                err = ext4_orphan_add(handle, inode);
2882                ext4_journal_stop(handle);
2883                if (err)
2884                        goto err_drop_inode;
2885                err = __page_symlink(inode, symname, l, 1);
2886                if (err)
2887                        goto err_drop_inode;
2888                /*
2889                 * Now inode is being linked into dir (EXT4_DATA_TRANS_BLOCKS
2890                 * + EXT4_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified
2891                 */
2892                handle = ext4_journal_start(dir, EXT4_HT_DIR,
2893                                EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2894                                EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1);
2895                if (IS_ERR(handle)) {
2896                        err = PTR_ERR(handle);
2897                        goto err_drop_inode;
2898                }
2899                set_nlink(inode, 1);
2900                err = ext4_orphan_del(handle, inode);
2901                if (err) {
2902                        ext4_journal_stop(handle);
2903                        clear_nlink(inode);
2904                        goto err_drop_inode;
2905                }
2906        } else {
2907                /* clear the extent format for fast symlink */
2908                ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
2909                inode->i_op = &ext4_fast_symlink_inode_operations;
2910                memcpy((char *)&EXT4_I(inode)->i_data, symname, l);
2911                inode->i_size = l-1;
2912        }
2913        EXT4_I(inode)->i_disksize = inode->i_size;
2914        err = ext4_add_nondir(handle, dentry, inode);
2915        if (!err && IS_DIRSYNC(dir))
2916                ext4_handle_sync(handle);
2917
2918out_stop:
2919        if (handle)
2920                ext4_journal_stop(handle);
2921        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2922                goto retry;
2923        return err;
2924err_drop_inode:
2925        unlock_new_inode(inode);
2926        iput(inode);
2927        return err;
2928}
2929
2930static int ext4_link(struct dentry *old_dentry,
2931                     struct inode *dir, struct dentry *dentry)
2932{
2933        handle_t *handle;
2934        struct inode *inode = old_dentry->d_inode;
2935        int err, retries = 0;
2936
2937        if (inode->i_nlink >= EXT4_LINK_MAX)
2938                return -EMLINK;
2939
2940        dquot_initialize(dir);
2941
2942retry:
2943        handle = ext4_journal_start(dir, EXT4_HT_DIR,
2944                (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2945                 EXT4_INDEX_EXTRA_TRANS_BLOCKS) + 1);
2946        if (IS_ERR(handle))
2947                return PTR_ERR(handle);
2948
2949        if (IS_DIRSYNC(dir))
2950                ext4_handle_sync(handle);
2951
2952        inode->i_ctime = ext4_current_time(inode);
2953        ext4_inc_count(handle, inode);
2954        ihold(inode);
2955
2956        err = ext4_add_entry(handle, dentry, inode);
2957        if (!err) {
2958                ext4_mark_inode_dirty(handle, inode);
2959                /* this can happen only for tmpfile being
2960                 * linked the first time
2961                 */
2962                if (inode->i_nlink == 1)
2963                        ext4_orphan_del(handle, inode);
2964                d_instantiate(dentry, inode);
2965        } else {
2966                drop_nlink(inode);
2967                iput(inode);
2968        }
2969        ext4_journal_stop(handle);
2970        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2971                goto retry;
2972        return err;
2973}
2974
2975
2976/*
2977 * Try to find buffer head where contains the parent block.
2978 * It should be the inode block if it is inlined or the 1st block
2979 * if it is a normal dir.
2980 */
2981static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
2982                                        struct inode *inode,
2983                                        int *retval,
2984                                        struct ext4_dir_entry_2 **parent_de,
2985                                        int *inlined)
2986{
2987        struct buffer_head *bh;
2988
2989        if (!ext4_has_inline_data(inode)) {
2990                bh = ext4_read_dirblock(inode, 0, EITHER);
2991                if (IS_ERR(bh)) {
2992                        *retval = PTR_ERR(bh);
2993                        return NULL;
2994                }
2995                *parent_de = ext4_next_entry(
2996                                        (struct ext4_dir_entry_2 *)bh->b_data,
2997                                        inode->i_sb->s_blocksize);
2998                return bh;
2999        }
3000
3001        *inlined = 1;
3002        return ext4_get_first_inline_block(inode, parent_de, retval);
3003}
3004
3005/*
3006 * Anybody can rename anything with this: the permission checks are left to the
3007 * higher-level routines.
3008 */
3009static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3010                       struct inode *new_dir, struct dentry *new_dentry)
3011{
3012        handle_t *handle;
3013        struct inode *old_inode, *new_inode;
3014        struct buffer_head *old_bh, *new_bh, *dir_bh;
3015        struct ext4_dir_entry_2 *old_de, *new_de;
3016        int retval, force_da_alloc = 0;
3017        int inlined = 0, new_inlined = 0;
3018        struct ext4_dir_entry_2 *parent_de;
3019
3020        dquot_initialize(old_dir);
3021        dquot_initialize(new_dir);
3022
3023        old_bh = new_bh = dir_bh = NULL;
3024
3025        /* Initialize quotas before so that eventual writes go
3026         * in separate transaction */
3027        if (new_dentry->d_inode)
3028                dquot_initialize(new_dentry->d_inode);
3029        handle = ext4_journal_start(old_dir, EXT4_HT_DIR,
3030                (2 * EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
3031                 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
3032        if (IS_ERR(handle))
3033                return PTR_ERR(handle);
3034
3035        if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
3036                ext4_handle_sync(handle);
3037
3038        old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de, NULL);
3039        /*
3040         *  Check for inode number is _not_ due to possible IO errors.
3041         *  We might rmdir the source, keep it as pwd of some process
3042         *  and merrily kill the link to whatever was created under the
3043         *  same name. Goodbye sticky bit ;-<
3044         */
3045        old_inode = old_dentry->d_inode;
3046        retval = -ENOENT;
3047        if (!old_bh || le32_to_cpu(old_de->inode) != old_inode->i_ino)
3048                goto end_rename;
3049
3050        new_inode = new_dentry->d_inode;
3051        new_bh = ext4_find_entry(new_dir, &new_dentry->d_name,
3052                                 &new_de, &new_inlined);
3053        if (new_bh) {
3054                if (!new_inode) {
3055                        brelse(new_bh);
3056                        new_bh = NULL;
3057                }
3058        }
3059        if (S_ISDIR(old_inode->i_mode)) {
3060                if (new_inode) {
3061                        retval = -ENOTEMPTY;
3062                        if (!empty_dir(new_inode))
3063                                goto end_rename;
3064                }
3065                retval = -EIO;
3066                dir_bh = ext4_get_first_dir_block(handle, old_inode,
3067                                                  &retval, &parent_de,
3068                                                  &inlined);
3069                if (!dir_bh)
3070                        goto end_rename;
3071                if (le32_to_cpu(parent_de->inode) != old_dir->i_ino)
3072                        goto end_rename;
3073                retval = -EMLINK;
3074                if (!new_inode && new_dir != old_dir &&
3075                    EXT4_DIR_LINK_MAX(new_dir))
3076                        goto end_rename;
3077                BUFFER_TRACE(dir_bh, "get_write_access");
3078                retval = ext4_journal_get_write_access(handle, dir_bh);
3079                if (retval)
3080                        goto end_rename;
3081        }
3082        if (!new_bh) {
3083                retval = ext4_add_entry(handle, new_dentry, old_inode);
3084                if (retval)
3085                        goto end_rename;
3086        } else {
3087                BUFFER_TRACE(new_bh, "get write access");
3088                retval = ext4_journal_get_write_access(handle, new_bh);
3089                if (retval)
3090                        goto end_rename;
3091                new_de->inode = cpu_to_le32(old_inode->i_ino);
3092                if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
3093                                              EXT4_FEATURE_INCOMPAT_FILETYPE))
3094                        new_de->file_type = old_de->file_type;
3095                new_dir->i_version++;
3096                new_dir->i_ctime = new_dir->i_mtime =
3097                                        ext4_current_time(new_dir);
3098                ext4_mark_inode_dirty(handle, new_dir);
3099                BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata");
3100                if (!new_inlined) {
3101                        retval = ext4_handle_dirty_dirent_node(handle,
3102                                                               new_dir, new_bh);
3103                        if (unlikely(retval)) {
3104                                ext4_std_error(new_dir->i_sb, retval);
3105                                goto end_rename;
3106                        }
3107                }
3108                brelse(new_bh);
3109                new_bh = NULL;
3110        }
3111
3112        /*
3113         * Like most other Unix systems, set the ctime for inodes on a
3114         * rename.
3115         */
3116        old_inode->i_ctime = ext4_current_time(old_inode);
3117        ext4_mark_inode_dirty(handle, old_inode);
3118
3119        /*
3120         * ok, that's it
3121         */
3122        if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
3123            old_de->name_len != old_dentry->d_name.len ||
3124            strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
3125            (retval = ext4_delete_entry(handle, old_dir,
3126                                        old_de, old_bh)) == -ENOENT) {
3127                /* old_de could have moved from under us during htree split, so
3128                 * make sure that we are deleting the right entry.  We might
3129                 * also be pointing to a stale entry in the unused part of
3130                 * old_bh so just checking inum and the name isn't enough. */
3131                struct buffer_head *old_bh2;
3132                struct ext4_dir_entry_2 *old_de2;
3133
3134                old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name,
3135                                          &old_de2, NULL);
3136                if (old_bh2) {
3137                        retval = ext4_delete_entry(handle, old_dir,
3138                                                   old_de2, old_bh2);
3139                        brelse(old_bh2);
3140                }
3141        }
3142        if (retval) {
3143                ext4_warning(old_dir->i_sb,
3144                                "Deleting old file (%lu), %d, error=%d",
3145                                old_dir->i_ino, old_dir->i_nlink, retval);
3146        }
3147
3148        if (new_inode) {
3149                ext4_dec_count(handle, new_inode);
3150                new_inode->i_ctime = ext4_current_time(new_inode);
3151        }
3152        old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir);
3153        ext4_update_dx_flag(old_dir);
3154        if (dir_bh) {
3155                parent_de->inode = cpu_to_le32(new_dir->i_ino);
3156                BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
3157                if (!inlined) {
3158                        if (is_dx(old_inode)) {
3159                                retval = ext4_handle_dirty_dx_node(handle,
3160                                                                   old_inode,
3161                                                                   dir_bh);
3162                        } else {
3163                                retval = ext4_handle_dirty_dirent_node(handle,
3164                                                        old_inode, dir_bh);
3165                        }
3166                } else {
3167                        retval = ext4_mark_inode_dirty(handle, old_inode);
3168                }
3169                if (retval) {
3170                        ext4_std_error(old_dir->i_sb, retval);
3171                        goto end_rename;
3172                }
3173                ext4_dec_count(handle, old_dir);
3174                if (new_inode) {
3175                        /* checked empty_dir above, can't have another parent,
3176                         * ext4_dec_count() won't work for many-linked dirs */
3177                        clear_nlink(new_inode);
3178                } else {
3179                        ext4_inc_count(handle, new_dir);
3180                        ext4_update_dx_flag(new_dir);
3181                        ext4_mark_inode_dirty(handle, new_dir);
3182                }
3183        }
3184        ext4_mark_inode_dirty(handle, old_dir);
3185        if (new_inode) {
3186                ext4_mark_inode_dirty(handle, new_inode);
3187                if (!new_inode->i_nlink)
3188                        ext4_orphan_add(handle, new_inode);
3189                if (!test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC))
3190                        force_da_alloc = 1;
3191        }
3192        retval = 0;
3193
3194end_rename:
3195        brelse(dir_bh);
3196        brelse(old_bh);
3197        brelse(new_bh);
3198        ext4_journal_stop(handle);
3199        if (retval == 0 && force_da_alloc)
3200                ext4_alloc_da_blocks(old_inode);
3201        return retval;
3202}
3203
3204/*
3205 * directories can handle most operations...
3206 */
3207const struct inode_operations ext4_dir_inode_operations = {
3208        .create         = ext4_create,
3209        .lookup         = ext4_lookup,
3210        .link           = ext4_link,
3211        .unlink         = ext4_unlink,
3212        .symlink        = ext4_symlink,
3213        .mkdir          = ext4_mkdir,
3214        .rmdir          = ext4_rmdir,
3215        .mknod          = ext4_mknod,
3216        .tmpfile        = ext4_tmpfile,
3217        .rename         = ext4_rename,
3218        .setattr        = ext4_setattr,
3219        .setxattr       = generic_setxattr,
3220        .getxattr       = generic_getxattr,
3221        .listxattr      = ext4_listxattr,
3222        .removexattr    = generic_removexattr,
3223        .get_acl        = ext4_get_acl,
3224        .fiemap         = ext4_fiemap,
3225};
3226
3227const struct inode_operations ext4_special_inode_operations = {
3228        .setattr        = ext4_setattr,
3229        .setxattr       = generic_setxattr,
3230        .getxattr       = generic_getxattr,
3231        .listxattr      = ext4_listxattr,
3232        .removexattr    = generic_removexattr,
3233        .get_acl        = ext4_get_acl,
3234};
3235