linux/fs/ext4/namei.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/ext4/namei.c
   3 *
   4 * Copyright (C) 1992, 1993, 1994, 1995
   5 * Remy Card (card@masi.ibp.fr)
   6 * Laboratoire MASI - Institut Blaise Pascal
   7 * Universite Pierre et Marie Curie (Paris VI)
   8 *
   9 *  from
  10 *
  11 *  linux/fs/minix/namei.c
  12 *
  13 *  Copyright (C) 1991, 1992  Linus Torvalds
  14 *
  15 *  Big-endian to little-endian byte-swapping/bitmaps by
  16 *        David S. Miller (davem@caip.rutgers.edu), 1995
  17 *  Directory entry file type support and forward compatibility hooks
  18 *      for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
  19 *  Hash Tree Directory indexing (c)
  20 *      Daniel Phillips, 2001
  21 *  Hash Tree Directory indexing porting
  22 *      Christopher Li, 2002
  23 *  Hash Tree Directory indexing cleanup
  24 *      Theodore Ts'o, 2002
  25 */
  26
  27#include <linux/fs.h>
  28#include <linux/pagemap.h>
  29#include <linux/jbd2.h>
  30#include <linux/time.h>
  31#include <linux/fcntl.h>
  32#include <linux/stat.h>
  33#include <linux/string.h>
  34#include <linux/quotaops.h>
  35#include <linux/buffer_head.h>
  36#include <linux/bio.h>
  37#include "ext4.h"
  38#include "ext4_jbd2.h"
  39
  40#include "xattr.h"
  41#include "acl.h"
  42
  43#include <trace/events/ext4.h>
  44/*
  45 * define how far ahead to read directories while searching them.
  46 */
  47#define NAMEI_RA_CHUNKS  2
  48#define NAMEI_RA_BLOCKS  4
  49#define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
  50
  51static struct buffer_head *ext4_append(handle_t *handle,
  52                                        struct inode *inode,
  53                                        ext4_lblk_t *block)
  54{
  55        struct buffer_head *bh;
  56        int err = 0;
  57
  58        if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
  59                     ((inode->i_size >> 10) >=
  60                      EXT4_SB(inode->i_sb)->s_max_dir_size_kb)))
  61                return ERR_PTR(-ENOSPC);
  62
  63        *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
  64
  65        bh = ext4_bread(handle, inode, *block, 1, &err);
  66        if (!bh)
  67                return ERR_PTR(err);
  68        inode->i_size += inode->i_sb->s_blocksize;
  69        EXT4_I(inode)->i_disksize = inode->i_size;
  70        err = ext4_journal_get_write_access(handle, bh);
  71        if (err) {
  72                brelse(bh);
  73                ext4_std_error(inode->i_sb, err);
  74                return ERR_PTR(err);
  75        }
  76        return bh;
  77}
  78
  79static int ext4_dx_csum_verify(struct inode *inode,
  80                               struct ext4_dir_entry *dirent);
  81
  82typedef enum {
  83        EITHER, INDEX, DIRENT
  84} dirblock_type_t;
  85
  86#define ext4_read_dirblock(inode, block, type) \
  87        __ext4_read_dirblock((inode), (block), (type), __LINE__)
  88
  89static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
  90                                              ext4_lblk_t block,
  91                                              dirblock_type_t type,
  92                                              unsigned int line)
  93{
  94        struct buffer_head *bh;
  95        struct ext4_dir_entry *dirent;
  96        int err = 0, is_dx_block = 0;
  97
  98        bh = ext4_bread(NULL, inode, block, 0, &err);
  99        if (!bh) {
 100                if (err == 0) {
 101                        ext4_error_inode(inode, __func__, line, block,
 102                                               "Directory hole found");
 103                        return ERR_PTR(-EIO);
 104                }
 105                __ext4_warning(inode->i_sb, __func__, line,
 106                               "error reading directory block "
 107                               "(ino %lu, block %lu)", inode->i_ino,
 108                               (unsigned long) block);
 109                return ERR_PTR(err);
 110        }
 111        dirent = (struct ext4_dir_entry *) bh->b_data;
 112        /* Determine whether or not we have an index block */
 113        if (is_dx(inode)) {
 114                if (block == 0)
 115                        is_dx_block = 1;
 116                else if (ext4_rec_len_from_disk(dirent->rec_len,
 117                                                inode->i_sb->s_blocksize) ==
 118                         inode->i_sb->s_blocksize)
 119                        is_dx_block = 1;
 120        }
 121        if (!is_dx_block && type == INDEX) {
 122                ext4_error_inode(inode, __func__, line, block,
 123                       "directory leaf block found instead of index block");
 124                return ERR_PTR(-EIO);
 125        }
 126        if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
 127                                        EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) ||
 128            buffer_verified(bh))
 129                return bh;
 130
 131        /*
 132         * An empty leaf block can get mistaken for a index block; for
 133         * this reason, we can only check the index checksum when the
 134         * caller is sure it should be an index block.
 135         */
 136        if (is_dx_block && type == INDEX) {
 137                if (ext4_dx_csum_verify(inode, dirent))
 138                        set_buffer_verified(bh);
 139                else {
 140                        ext4_error_inode(inode, __func__, line, block,
 141                                "Directory index failed checksum");
 142                        brelse(bh);
 143                        return ERR_PTR(-EIO);
 144                }
 145        }
 146        if (!is_dx_block) {
 147                if (ext4_dirent_csum_verify(inode, dirent))
 148                        set_buffer_verified(bh);
 149                else {
 150                        ext4_error_inode(inode, __func__, line, block,
 151                                "Directory block failed checksum");
 152                        brelse(bh);
 153                        return ERR_PTR(-EIO);
 154                }
 155        }
 156        return bh;
 157}
 158
 159#ifndef assert
 160#define assert(test) J_ASSERT(test)
 161#endif
 162
 163#ifdef DX_DEBUG
 164#define dxtrace(command) command
 165#else
 166#define dxtrace(command)
 167#endif
 168
 169struct fake_dirent
 170{
 171        __le32 inode;
 172        __le16 rec_len;
 173        u8 name_len;
 174        u8 file_type;
 175};
 176
 177struct dx_countlimit
 178{
 179        __le16 limit;
 180        __le16 count;
 181};
 182
 183struct dx_entry
 184{
 185        __le32 hash;
 186        __le32 block;
 187};
 188
 189/*
 190 * dx_root_info is laid out so that if it should somehow get overlaid by a
 191 * dirent the two low bits of the hash version will be zero.  Therefore, the
 192 * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
 193 */
 194
 195struct dx_root
 196{
 197        struct fake_dirent dot;
 198        char dot_name[4];
 199        struct fake_dirent dotdot;
 200        char dotdot_name[4];
 201        struct dx_root_info
 202        {
 203                __le32 reserved_zero;
 204                u8 hash_version;
 205                u8 info_length; /* 8 */
 206                u8 indirect_levels;
 207                u8 unused_flags;
 208        }
 209        info;
 210        struct dx_entry entries[0];
 211};
 212
 213struct dx_node
 214{
 215        struct fake_dirent fake;
 216        struct dx_entry entries[0];
 217};
 218
 219
 220struct dx_frame
 221{
 222        struct buffer_head *bh;
 223        struct dx_entry *entries;
 224        struct dx_entry *at;
 225};
 226
 227struct dx_map_entry
 228{
 229        u32 hash;
 230        u16 offs;
 231        u16 size;
 232};
 233
 234/*
 235 * This goes at the end of each htree block.
 236 */
 237struct dx_tail {
 238        u32 dt_reserved;
 239        __le32 dt_checksum;     /* crc32c(uuid+inum+dirblock) */
 240};
 241
 242static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
 243static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
 244static inline unsigned dx_get_hash(struct dx_entry *entry);
 245static void dx_set_hash(struct dx_entry *entry, unsigned value);
 246static unsigned dx_get_count(struct dx_entry *entries);
 247static unsigned dx_get_limit(struct dx_entry *entries);
 248static void dx_set_count(struct dx_entry *entries, unsigned value);
 249static void dx_set_limit(struct dx_entry *entries, unsigned value);
 250static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
 251static unsigned dx_node_limit(struct inode *dir);
 252static struct dx_frame *dx_probe(const struct qstr *d_name,
 253                                 struct inode *dir,
 254                                 struct dx_hash_info *hinfo,
 255                                 struct dx_frame *frame,
 256                                 int *err);
 257static void dx_release(struct dx_frame *frames);
 258static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
 259                       struct dx_hash_info *hinfo, struct dx_map_entry map[]);
 260static void dx_sort_map(struct dx_map_entry *map, unsigned count);
 261static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
 262                struct dx_map_entry *offsets, int count, unsigned blocksize);
 263static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize);
 264static void dx_insert_block(struct dx_frame *frame,
 265                                        u32 hash, ext4_lblk_t block);
 266static int ext4_htree_next_block(struct inode *dir, __u32 hash,
 267                                 struct dx_frame *frame,
 268                                 struct dx_frame *frames,
 269                                 __u32 *start_hash);
 270static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
 271                const struct qstr *d_name,
 272                struct ext4_dir_entry_2 **res_dir,
 273                int *err);
 274static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
 275                             struct inode *inode);
 276
 277/* checksumming functions */
 278void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
 279                            unsigned int blocksize)
 280{
 281        memset(t, 0, sizeof(struct ext4_dir_entry_tail));
 282        t->det_rec_len = ext4_rec_len_to_disk(
 283                        sizeof(struct ext4_dir_entry_tail), blocksize);
 284        t->det_reserved_ft = EXT4_FT_DIR_CSUM;
 285}
 286
 287/* Walk through a dirent block to find a checksum "dirent" at the tail */
 288static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
 289                                                   struct ext4_dir_entry *de)
 290{
 291        struct ext4_dir_entry_tail *t;
 292
 293#ifdef PARANOID
 294        struct ext4_dir_entry *d, *top;
 295
 296        d = de;
 297        top = (struct ext4_dir_entry *)(((void *)de) +
 298                (EXT4_BLOCK_SIZE(inode->i_sb) -
 299                sizeof(struct ext4_dir_entry_tail)));
 300        while (d < top && d->rec_len)
 301                d = (struct ext4_dir_entry *)(((void *)d) +
 302                    le16_to_cpu(d->rec_len));
 303
 304        if (d != top)
 305                return NULL;
 306
 307        t = (struct ext4_dir_entry_tail *)d;
 308#else
 309        t = EXT4_DIRENT_TAIL(de, EXT4_BLOCK_SIZE(inode->i_sb));
 310#endif
 311
 312        if (t->det_reserved_zero1 ||
 313            le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) ||
 314            t->det_reserved_zero2 ||
 315            t->det_reserved_ft != EXT4_FT_DIR_CSUM)
 316                return NULL;
 317
 318        return t;
 319}
 320
 321static __le32 ext4_dirent_csum(struct inode *inode,
 322                               struct ext4_dir_entry *dirent, int size)
 323{
 324        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 325        struct ext4_inode_info *ei = EXT4_I(inode);
 326        __u32 csum;
 327
 328        csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
 329        return cpu_to_le32(csum);
 330}
 331
 332static void warn_no_space_for_csum(struct inode *inode)
 333{
 334        ext4_warning(inode->i_sb, "no space in directory inode %lu leaf for "
 335                     "checksum.  Please run e2fsck -D.", inode->i_ino);
 336}
 337
 338int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
 339{
 340        struct ext4_dir_entry_tail *t;
 341
 342        if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
 343                                        EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
 344                return 1;
 345
 346        t = get_dirent_tail(inode, dirent);
 347        if (!t) {
 348                warn_no_space_for_csum(inode);
 349                return 0;
 350        }
 351
 352        if (t->det_checksum != ext4_dirent_csum(inode, dirent,
 353                                                (void *)t - (void *)dirent))
 354                return 0;
 355
 356        return 1;
 357}
 358
 359static void ext4_dirent_csum_set(struct inode *inode,
 360                                 struct ext4_dir_entry *dirent)
 361{
 362        struct ext4_dir_entry_tail *t;
 363
 364        if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
 365                                        EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
 366                return;
 367
 368        t = get_dirent_tail(inode, dirent);
 369        if (!t) {
 370                warn_no_space_for_csum(inode);
 371                return;
 372        }
 373
 374        t->det_checksum = ext4_dirent_csum(inode, dirent,
 375                                           (void *)t - (void *)dirent);
 376}
 377
 378int ext4_handle_dirty_dirent_node(handle_t *handle,
 379                                  struct inode *inode,
 380                                  struct buffer_head *bh)
 381{
 382        ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
 383        return ext4_handle_dirty_metadata(handle, inode, bh);
 384}
 385
 386static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
 387                                               struct ext4_dir_entry *dirent,
 388                                               int *offset)
 389{
 390        struct ext4_dir_entry *dp;
 391        struct dx_root_info *root;
 392        int count_offset;
 393
 394        if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb))
 395                count_offset = 8;
 396        else if (le16_to_cpu(dirent->rec_len) == 12) {
 397                dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
 398                if (le16_to_cpu(dp->rec_len) !=
 399                    EXT4_BLOCK_SIZE(inode->i_sb) - 12)
 400                        return NULL;
 401                root = (struct dx_root_info *)(((void *)dp + 12));
 402                if (root->reserved_zero ||
 403                    root->info_length != sizeof(struct dx_root_info))
 404                        return NULL;
 405                count_offset = 32;
 406        } else
 407                return NULL;
 408
 409        if (offset)
 410                *offset = count_offset;
 411        return (struct dx_countlimit *)(((void *)dirent) + count_offset);
 412}
 413
 414static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent,
 415                           int count_offset, int count, struct dx_tail *t)
 416{
 417        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 418        struct ext4_inode_info *ei = EXT4_I(inode);
 419        __u32 csum;
 420        __le32 save_csum;
 421        int size;
 422
 423        size = count_offset + (count * sizeof(struct dx_entry));
 424        save_csum = t->dt_checksum;
 425        t->dt_checksum = 0;
 426        csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
 427        csum = ext4_chksum(sbi, csum, (__u8 *)t, sizeof(struct dx_tail));
 428        t->dt_checksum = save_csum;
 429
 430        return cpu_to_le32(csum);
 431}
 432
 433static int ext4_dx_csum_verify(struct inode *inode,
 434                               struct ext4_dir_entry *dirent)
 435{
 436        struct dx_countlimit *c;
 437        struct dx_tail *t;
 438        int count_offset, limit, count;
 439
 440        if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
 441                                        EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
 442                return 1;
 443
 444        c = get_dx_countlimit(inode, dirent, &count_offset);
 445        if (!c) {
 446                EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
 447                return 1;
 448        }
 449        limit = le16_to_cpu(c->limit);
 450        count = le16_to_cpu(c->count);
 451        if (count_offset + (limit * sizeof(struct dx_entry)) >
 452            EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
 453                warn_no_space_for_csum(inode);
 454                return 1;
 455        }
 456        t = (struct dx_tail *)(((struct dx_entry *)c) + limit);
 457
 458        if (t->dt_checksum != ext4_dx_csum(inode, dirent, count_offset,
 459                                            count, t))
 460                return 0;
 461        return 1;
 462}
 463
 464static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
 465{
 466        struct dx_countlimit *c;
 467        struct dx_tail *t;
 468        int count_offset, limit, count;
 469
 470        if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
 471                                        EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
 472                return;
 473
 474        c = get_dx_countlimit(inode, dirent, &count_offset);
 475        if (!c) {
 476                EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
 477                return;
 478        }
 479        limit = le16_to_cpu(c->limit);
 480        count = le16_to_cpu(c->count);
 481        if (count_offset + (limit * sizeof(struct dx_entry)) >
 482            EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
 483                warn_no_space_for_csum(inode);
 484                return;
 485        }
 486        t = (struct dx_tail *)(((struct dx_entry *)c) + limit);
 487
 488        t->dt_checksum = ext4_dx_csum(inode, dirent, count_offset, count, t);
 489}
 490
 491static inline int ext4_handle_dirty_dx_node(handle_t *handle,
 492                                            struct inode *inode,
 493                                            struct buffer_head *bh)
 494{
 495        ext4_dx_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
 496        return ext4_handle_dirty_metadata(handle, inode, bh);
 497}
 498
 499/*
 500 * p is at least 6 bytes before the end of page
 501 */
 502static inline struct ext4_dir_entry_2 *
 503ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
 504{
 505        return (struct ext4_dir_entry_2 *)((char *)p +
 506                ext4_rec_len_from_disk(p->rec_len, blocksize));
 507}
 508
 509/*
 510 * Future: use high four bits of block for coalesce-on-delete flags
 511 * Mask them off for now.
 512 */
 513
 514static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
 515{
 516        return le32_to_cpu(entry->block) & 0x00ffffff;
 517}
 518
 519static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
 520{
 521        entry->block = cpu_to_le32(value);
 522}
 523
 524static inline unsigned dx_get_hash(struct dx_entry *entry)
 525{
 526        return le32_to_cpu(entry->hash);
 527}
 528
 529static inline void dx_set_hash(struct dx_entry *entry, unsigned value)
 530{
 531        entry->hash = cpu_to_le32(value);
 532}
 533
 534static inline unsigned dx_get_count(struct dx_entry *entries)
 535{
 536        return le16_to_cpu(((struct dx_countlimit *) entries)->count);
 537}
 538
 539static inline unsigned dx_get_limit(struct dx_entry *entries)
 540{
 541        return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
 542}
 543
 544static inline void dx_set_count(struct dx_entry *entries, unsigned value)
 545{
 546        ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
 547}
 548
 549static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
 550{
 551        ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
 552}
 553
 554static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
 555{
 556        unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
 557                EXT4_DIR_REC_LEN(2) - infosize;
 558
 559        if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
 560                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
 561                entry_space -= sizeof(struct dx_tail);
 562        return entry_space / sizeof(struct dx_entry);
 563}
 564
 565static inline unsigned dx_node_limit(struct inode *dir)
 566{
 567        unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
 568
 569        if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
 570                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
 571                entry_space -= sizeof(struct dx_tail);
 572        return entry_space / sizeof(struct dx_entry);
 573}
 574
 575/*
 576 * Debug
 577 */
 578#ifdef DX_DEBUG
 579static void dx_show_index(char * label, struct dx_entry *entries)
 580{
 581        int i, n = dx_get_count (entries);
 582        printk(KERN_DEBUG "%s index ", label);
 583        for (i = 0; i < n; i++) {
 584                printk("%x->%lu ", i ? dx_get_hash(entries + i) :
 585                                0, (unsigned long)dx_get_block(entries + i));
 586        }
 587        printk("\n");
 588}
 589
 590struct stats
 591{
 592        unsigned names;
 593        unsigned space;
 594        unsigned bcount;
 595};
 596
 597static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_entry_2 *de,
 598                                 int size, int show_names)
 599{
 600        unsigned names = 0, space = 0;
 601        char *base = (char *) de;
 602        struct dx_hash_info h = *hinfo;
 603
 604        printk("names: ");
 605        while ((char *) de < base + size)
 606        {
 607                if (de->inode)
 608                {
 609                        if (show_names)
 610                        {
 611                                int len = de->name_len;
 612                                char *name = de->name;
 613                                while (len--) printk("%c", *name++);
 614                                ext4fs_dirhash(de->name, de->name_len, &h);
 615                                printk(":%x.%u ", h.hash,
 616                                       (unsigned) ((char *) de - base));
 617                        }
 618                        space += EXT4_DIR_REC_LEN(de->name_len);
 619                        names++;
 620                }
 621                de = ext4_next_entry(de, size);
 622        }
 623        printk("(%i)\n", names);
 624        return (struct stats) { names, space, 1 };
 625}
 626
 627struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
 628                             struct dx_entry *entries, int levels)
 629{
 630        unsigned blocksize = dir->i_sb->s_blocksize;
 631        unsigned count = dx_get_count(entries), names = 0, space = 0, i;
 632        unsigned bcount = 0;
 633        struct buffer_head *bh;
 634        int err;
 635        printk("%i indexed blocks...\n", count);
 636        for (i = 0; i < count; i++, entries++)
 637        {
 638                ext4_lblk_t block = dx_get_block(entries);
 639                ext4_lblk_t hash  = i ? dx_get_hash(entries): 0;
 640                u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
 641                struct stats stats;
 642                printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
 643                if (!(bh = ext4_bread (NULL,dir, block, 0,&err))) continue;
 644                stats = levels?
 645                   dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
 646                   dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0);
 647                names += stats.names;
 648                space += stats.space;
 649                bcount += stats.bcount;
 650                brelse(bh);
 651        }
 652        if (bcount)
 653                printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
 654                       levels ? "" : "   ", names, space/bcount,
 655                       (space/bcount)*100/blocksize);
 656        return (struct stats) { names, space, bcount};
 657}
 658#endif /* DX_DEBUG */
 659
 660/*
 661 * Probe for a directory leaf block to search.
 662 *
 663 * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
 664 * error in the directory index, and the caller should fall back to
 665 * searching the directory normally.  The callers of dx_probe **MUST**
 666 * check for this error code, and make sure it never gets reflected
 667 * back to userspace.
 668 */
 669static struct dx_frame *
 670dx_probe(const struct qstr *d_name, struct inode *dir,
 671         struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
 672{
 673        unsigned count, indirect;
 674        struct dx_entry *at, *entries, *p, *q, *m;
 675        struct dx_root *root;
 676        struct buffer_head *bh;
 677        struct dx_frame *frame = frame_in;
 678        u32 hash;
 679
 680        frame->bh = NULL;
 681        bh = ext4_read_dirblock(dir, 0, INDEX);
 682        if (IS_ERR(bh)) {
 683                *err = PTR_ERR(bh);
 684                goto fail;
 685        }
 686        root = (struct dx_root *) bh->b_data;
 687        if (root->info.hash_version != DX_HASH_TEA &&
 688            root->info.hash_version != DX_HASH_HALF_MD4 &&
 689            root->info.hash_version != DX_HASH_LEGACY) {
 690                ext4_warning(dir->i_sb, "Unrecognised inode hash code %d",
 691                             root->info.hash_version);
 692                brelse(bh);
 693                *err = ERR_BAD_DX_DIR;
 694                goto fail;
 695        }
 696        hinfo->hash_version = root->info.hash_version;
 697        if (hinfo->hash_version <= DX_HASH_TEA)
 698                hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
 699        hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
 700        if (d_name)
 701                ext4fs_dirhash(d_name->name, d_name->len, hinfo);
 702        hash = hinfo->hash;
 703
 704        if (root->info.unused_flags & 1) {
 705                ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x",
 706                             root->info.unused_flags);
 707                brelse(bh);
 708                *err = ERR_BAD_DX_DIR;
 709                goto fail;
 710        }
 711
 712        if ((indirect = root->info.indirect_levels) > 1) {
 713                ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
 714                             root->info.indirect_levels);
 715                brelse(bh);
 716                *err = ERR_BAD_DX_DIR;
 717                goto fail;
 718        }
 719
 720        entries = (struct dx_entry *) (((char *)&root->info) +
 721                                       root->info.info_length);
 722
 723        if (dx_get_limit(entries) != dx_root_limit(dir,
 724                                                   root->info.info_length)) {
 725                ext4_warning(dir->i_sb, "dx entry: limit != root limit");
 726                brelse(bh);
 727                *err = ERR_BAD_DX_DIR;
 728                goto fail;
 729        }
 730
 731        dxtrace(printk("Look up %x", hash));
 732        while (1)
 733        {
 734                count = dx_get_count(entries);
 735                if (!count || count > dx_get_limit(entries)) {
 736                        ext4_warning(dir->i_sb,
 737                                     "dx entry: no count or count > limit");
 738                        brelse(bh);
 739                        *err = ERR_BAD_DX_DIR;
 740                        goto fail2;
 741                }
 742
 743                p = entries + 1;
 744                q = entries + count - 1;
 745                while (p <= q)
 746                {
 747                        m = p + (q - p)/2;
 748                        dxtrace(printk("."));
 749                        if (dx_get_hash(m) > hash)
 750                                q = m - 1;
 751                        else
 752                                p = m + 1;
 753                }
 754
 755                if (0) // linear search cross check
 756                {
 757                        unsigned n = count - 1;
 758                        at = entries;
 759                        while (n--)
 760                        {
 761                                dxtrace(printk(","));
 762                                if (dx_get_hash(++at) > hash)
 763                                {
 764                                        at--;
 765                                        break;
 766                                }
 767                        }
 768                        assert (at == p - 1);
 769                }
 770
 771                at = p - 1;
 772                dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
 773                frame->bh = bh;
 774                frame->entries = entries;
 775                frame->at = at;
 776                if (!indirect--) return frame;
 777                bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX);
 778                if (IS_ERR(bh)) {
 779                        *err = PTR_ERR(bh);
 780                        goto fail2;
 781                }
 782                entries = ((struct dx_node *) bh->b_data)->entries;
 783
 784                if (dx_get_limit(entries) != dx_node_limit (dir)) {
 785                        ext4_warning(dir->i_sb,
 786                                     "dx entry: limit != node limit");
 787                        brelse(bh);
 788                        *err = ERR_BAD_DX_DIR;
 789                        goto fail2;
 790                }
 791                frame++;
 792                frame->bh = NULL;
 793        }
 794fail2:
 795        while (frame >= frame_in) {
 796                brelse(frame->bh);
 797                frame--;
 798        }
 799fail:
 800        if (*err == ERR_BAD_DX_DIR)
 801                ext4_warning(dir->i_sb,
 802                             "Corrupt dir inode %lu, running e2fsck is "
 803                             "recommended.", dir->i_ino);
 804        return NULL;
 805}
 806
 807static void dx_release (struct dx_frame *frames)
 808{
 809        if (frames[0].bh == NULL)
 810                return;
 811
 812        if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
 813                brelse(frames[1].bh);
 814        brelse(frames[0].bh);
 815}
 816
 817/*
 818 * This function increments the frame pointer to search the next leaf
 819 * block, and reads in the necessary intervening nodes if the search
 820 * should be necessary.  Whether or not the search is necessary is
 821 * controlled by the hash parameter.  If the hash value is even, then
 822 * the search is only continued if the next block starts with that
 823 * hash value.  This is used if we are searching for a specific file.
 824 *
 825 * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
 826 *
 827 * This function returns 1 if the caller should continue to search,
 828 * or 0 if it should not.  If there is an error reading one of the
 829 * index blocks, it will a negative error code.
 830 *
 831 * If start_hash is non-null, it will be filled in with the starting
 832 * hash of the next page.
 833 */
 834static int ext4_htree_next_block(struct inode *dir, __u32 hash,
 835                                 struct dx_frame *frame,
 836                                 struct dx_frame *frames,
 837                                 __u32 *start_hash)
 838{
 839        struct dx_frame *p;
 840        struct buffer_head *bh;
 841        int num_frames = 0;
 842        __u32 bhash;
 843
 844        p = frame;
 845        /*
 846         * Find the next leaf page by incrementing the frame pointer.
 847         * If we run out of entries in the interior node, loop around and
 848         * increment pointer in the parent node.  When we break out of
 849         * this loop, num_frames indicates the number of interior
 850         * nodes need to be read.
 851         */
 852        while (1) {
 853                if (++(p->at) < p->entries + dx_get_count(p->entries))
 854                        break;
 855                if (p == frames)
 856                        return 0;
 857                num_frames++;
 858                p--;
 859        }
 860
 861        /*
 862         * If the hash is 1, then continue only if the next page has a
 863         * continuation hash of any value.  This is used for readdir
 864         * handling.  Otherwise, check to see if the hash matches the
 865         * desired contiuation hash.  If it doesn't, return since
 866         * there's no point to read in the successive index pages.
 867         */
 868        bhash = dx_get_hash(p->at);
 869        if (start_hash)
 870                *start_hash = bhash;
 871        if ((hash & 1) == 0) {
 872                if ((bhash & ~1) != hash)
 873                        return 0;
 874        }
 875        /*
 876         * If the hash is HASH_NB_ALWAYS, we always go to the next
 877         * block so no check is necessary
 878         */
 879        while (num_frames--) {
 880                bh = ext4_read_dirblock(dir, dx_get_block(p->at), INDEX);
 881                if (IS_ERR(bh))
 882                        return PTR_ERR(bh);
 883                p++;
 884                brelse(p->bh);
 885                p->bh = bh;
 886                p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
 887        }
 888        return 1;
 889}
 890
 891
 892/*
 893 * This function fills a red-black tree with information from a
 894 * directory block.  It returns the number directory entries loaded
 895 * into the tree.  If there is an error it is returned in err.
 896 */
 897static int htree_dirblock_to_tree(struct file *dir_file,
 898                                  struct inode *dir, ext4_lblk_t block,
 899                                  struct dx_hash_info *hinfo,
 900                                  __u32 start_hash, __u32 start_minor_hash)
 901{
 902        struct buffer_head *bh;
 903        struct ext4_dir_entry_2 *de, *top;
 904        int err = 0, count = 0;
 905
 906        dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
 907                                                        (unsigned long)block));
 908        bh = ext4_read_dirblock(dir, block, DIRENT);
 909        if (IS_ERR(bh))
 910                return PTR_ERR(bh);
 911
 912        de = (struct ext4_dir_entry_2 *) bh->b_data;
 913        top = (struct ext4_dir_entry_2 *) ((char *) de +
 914                                           dir->i_sb->s_blocksize -
 915                                           EXT4_DIR_REC_LEN(0));
 916        for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
 917                if (ext4_check_dir_entry(dir, NULL, de, bh,
 918                                bh->b_data, bh->b_size,
 919                                (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
 920                                         + ((char *)de - bh->b_data))) {
 921                        /* silently ignore the rest of the block */
 922                        break;
 923                }
 924                ext4fs_dirhash(de->name, de->name_len, hinfo);
 925                if ((hinfo->hash < start_hash) ||
 926                    ((hinfo->hash == start_hash) &&
 927                     (hinfo->minor_hash < start_minor_hash)))
 928                        continue;
 929                if (de->inode == 0)
 930                        continue;
 931                if ((err = ext4_htree_store_dirent(dir_file,
 932                                   hinfo->hash, hinfo->minor_hash, de)) != 0) {
 933                        brelse(bh);
 934                        return err;
 935                }
 936                count++;
 937        }
 938        brelse(bh);
 939        return count;
 940}
 941
 942
 943/*
 944 * This function fills a red-black tree with information from a
 945 * directory.  We start scanning the directory in hash order, starting
 946 * at start_hash and start_minor_hash.
 947 *
 948 * This function returns the number of entries inserted into the tree,
 949 * or a negative error code.
 950 */
 951int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 952                         __u32 start_minor_hash, __u32 *next_hash)
 953{
 954        struct dx_hash_info hinfo;
 955        struct ext4_dir_entry_2 *de;
 956        struct dx_frame frames[2], *frame;
 957        struct inode *dir;
 958        ext4_lblk_t block;
 959        int count = 0;
 960        int ret, err;
 961        __u32 hashval;
 962
 963        dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
 964                       start_hash, start_minor_hash));
 965        dir = file_inode(dir_file);
 966        if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
 967                hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
 968                if (hinfo.hash_version <= DX_HASH_TEA)
 969                        hinfo.hash_version +=
 970                                EXT4_SB(dir->i_sb)->s_hash_unsigned;
 971                hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
 972                if (ext4_has_inline_data(dir)) {
 973                        int has_inline_data = 1;
 974                        count = htree_inlinedir_to_tree(dir_file, dir, 0,
 975                                                        &hinfo, start_hash,
 976                                                        start_minor_hash,
 977                                                        &has_inline_data);
 978                        if (has_inline_data) {
 979                                *next_hash = ~0;
 980                                return count;
 981                        }
 982                }
 983                count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
 984                                               start_hash, start_minor_hash);
 985                *next_hash = ~0;
 986                return count;
 987        }
 988        hinfo.hash = start_hash;
 989        hinfo.minor_hash = 0;
 990        frame = dx_probe(NULL, dir, &hinfo, frames, &err);
 991        if (!frame)
 992                return err;
 993
 994        /* Add '.' and '..' from the htree header */
 995        if (!start_hash && !start_minor_hash) {
 996                de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
 997                if ((err = ext4_htree_store_dirent(dir_file, 0, 0, de)) != 0)
 998                        goto errout;
 999                count++;
1000        }
1001        if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
1002                de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
1003                de = ext4_next_entry(de, dir->i_sb->s_blocksize);
1004                if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0)
1005                        goto errout;
1006                count++;
1007        }
1008
1009        while (1) {
1010                block = dx_get_block(frame->at);
1011                ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
1012                                             start_hash, start_minor_hash);
1013                if (ret < 0) {
1014                        err = ret;
1015                        goto errout;
1016                }
1017                count += ret;
1018                hashval = ~0;
1019                ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS,
1020                                            frame, frames, &hashval);
1021                *next_hash = hashval;
1022                if (ret < 0) {
1023                        err = ret;
1024                        goto errout;
1025                }
1026                /*
1027                 * Stop if:  (a) there are no more entries, or
1028                 * (b) we have inserted at least one entry and the
1029                 * next hash value is not a continuation
1030                 */
1031                if ((ret == 0) ||
1032                    (count && ((hashval & 1) == 0)))
1033                        break;
1034        }
1035        dx_release(frames);
1036        dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
1037                       "next hash: %x\n", count, *next_hash));
1038        return count;
1039errout:
1040        dx_release(frames);
1041        return (err);
1042}
1043
1044static inline int search_dirblock(struct buffer_head *bh,
1045                                  struct inode *dir,
1046                                  const struct qstr *d_name,
1047                                  unsigned int offset,
1048                                  struct ext4_dir_entry_2 **res_dir)
1049{
1050        return search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir,
1051                          d_name, offset, res_dir);
1052}
1053
1054/*
1055 * Directory block splitting, compacting
1056 */
1057
1058/*
1059 * Create map of hash values, offsets, and sizes, stored at end of block.
1060 * Returns number of entries mapped.
1061 */
1062static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
1063                       struct dx_hash_info *hinfo,
1064                       struct dx_map_entry *map_tail)
1065{
1066        int count = 0;
1067        char *base = (char *) de;
1068        struct dx_hash_info h = *hinfo;
1069
1070        while ((char *) de < base + blocksize) {
1071                if (de->name_len && de->inode) {
1072                        ext4fs_dirhash(de->name, de->name_len, &h);
1073                        map_tail--;
1074                        map_tail->hash = h.hash;
1075                        map_tail->offs = ((char *) de - base)>>2;
1076                        map_tail->size = le16_to_cpu(de->rec_len);
1077                        count++;
1078                        cond_resched();
1079                }
1080                /* XXX: do we need to check rec_len == 0 case? -Chris */
1081                de = ext4_next_entry(de, blocksize);
1082        }
1083        return count;
1084}
1085
1086/* Sort map by hash value */
1087static void dx_sort_map (struct dx_map_entry *map, unsigned count)
1088{
1089        struct dx_map_entry *p, *q, *top = map + count - 1;
1090        int more;
1091        /* Combsort until bubble sort doesn't suck */
1092        while (count > 2) {
1093                count = count*10/13;
1094                if (count - 9 < 2) /* 9, 10 -> 11 */
1095                        count = 11;
1096                for (p = top, q = p - count; q >= map; p--, q--)
1097                        if (p->hash < q->hash)
1098                                swap(*p, *q);
1099        }
1100        /* Garden variety bubble sort */
1101        do {
1102                more = 0;
1103                q = top;
1104                while (q-- > map) {
1105                        if (q[1].hash >= q[0].hash)
1106                                continue;
1107                        swap(*(q+1), *q);
1108                        more = 1;
1109                }
1110        } while(more);
1111}
1112
1113static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
1114{
1115        struct dx_entry *entries = frame->entries;
1116        struct dx_entry *old = frame->at, *new = old + 1;
1117        int count = dx_get_count(entries);
1118
1119        assert(count < dx_get_limit(entries));
1120        assert(old < entries + count);
1121        memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
1122        dx_set_hash(new, hash);
1123        dx_set_block(new, block);
1124        dx_set_count(entries, count + 1);
1125}
1126
1127/*
1128 * NOTE! unlike strncmp, ext4_match returns 1 for success, 0 for failure.
1129 *
1130 * `len <= EXT4_NAME_LEN' is guaranteed by caller.
1131 * `de != NULL' is guaranteed by caller.
1132 */
1133static inline int ext4_match (int len, const char * const name,
1134                              struct ext4_dir_entry_2 * de)
1135{
1136        if (len != de->name_len)
1137                return 0;
1138        if (!de->inode)
1139                return 0;
1140        return !memcmp(name, de->name, len);
1141}
1142
1143/*
1144 * Returns 0 if not found, -1 on failure, and 1 on success
1145 */
1146int search_dir(struct buffer_head *bh,
1147               char *search_buf,
1148               int buf_size,
1149               struct inode *dir,
1150               const struct qstr *d_name,
1151               unsigned int offset,
1152               struct ext4_dir_entry_2 **res_dir)
1153{
1154        struct ext4_dir_entry_2 * de;
1155        char * dlimit;
1156        int de_len;
1157        const char *name = d_name->name;
1158        int namelen = d_name->len;
1159
1160        de = (struct ext4_dir_entry_2 *)search_buf;
1161        dlimit = search_buf + buf_size;
1162        while ((char *) de < dlimit) {
1163                /* this code is executed quadratically often */
1164                /* do minimal checking `by hand' */
1165
1166                if ((char *) de + namelen <= dlimit &&
1167                    ext4_match (namelen, name, de)) {
1168                        /* found a match - just to be sure, do a full check */
1169                        if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data,
1170                                                 bh->b_size, offset))
1171                                return -1;
1172                        *res_dir = de;
1173                        return 1;
1174                }
1175                /* prevent looping on a bad block */
1176                de_len = ext4_rec_len_from_disk(de->rec_len,
1177                                                dir->i_sb->s_blocksize);
1178                if (de_len <= 0)
1179                        return -1;
1180                offset += de_len;
1181                de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
1182        }
1183        return 0;
1184}
1185
1186static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
1187                               struct ext4_dir_entry *de)
1188{
1189        struct super_block *sb = dir->i_sb;
1190
1191        if (!is_dx(dir))
1192                return 0;
1193        if (block == 0)
1194                return 1;
1195        if (de->inode == 0 &&
1196            ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) ==
1197                        sb->s_blocksize)
1198                return 1;
1199        return 0;
1200}
1201
1202/*
1203 *      ext4_find_entry()
1204 *
1205 * finds an entry in the specified directory with the wanted name. It
1206 * returns the cache buffer in which the entry was found, and the entry
1207 * itself (as a parameter - res_dir). It does NOT read the inode of the
1208 * entry - you'll have to do that yourself if you want to.
1209 *
1210 * The returned buffer_head has ->b_count elevated.  The caller is expected
1211 * to brelse() it when appropriate.
1212 */
1213static struct buffer_head * ext4_find_entry (struct inode *dir,
1214                                        const struct qstr *d_name,
1215                                        struct ext4_dir_entry_2 **res_dir,
1216                                        int *inlined)
1217{
1218        struct super_block *sb;
1219        struct buffer_head *bh_use[NAMEI_RA_SIZE];
1220        struct buffer_head *bh, *ret = NULL;
1221        ext4_lblk_t start, block, b;
1222        const u8 *name = d_name->name;
1223        int ra_max = 0;         /* Number of bh's in the readahead
1224                                   buffer, bh_use[] */
1225        int ra_ptr = 0;         /* Current index into readahead
1226                                   buffer */
1227        int num = 0;
1228        ext4_lblk_t  nblocks;
1229        int i, err;
1230        int namelen;
1231
1232        *res_dir = NULL;
1233        sb = dir->i_sb;
1234        namelen = d_name->len;
1235        if (namelen > EXT4_NAME_LEN)
1236                return NULL;
1237
1238        if (ext4_has_inline_data(dir)) {
1239                int has_inline_data = 1;
1240                ret = ext4_find_inline_entry(dir, d_name, res_dir,
1241                                             &has_inline_data);
1242                if (has_inline_data) {
1243                        if (inlined)
1244                                *inlined = 1;
1245                        return ret;
1246                }
1247        }
1248
1249        if ((namelen <= 2) && (name[0] == '.') &&
1250            (name[1] == '.' || name[1] == '\0')) {
1251                /*
1252                 * "." or ".." will only be in the first block
1253                 * NFS may look up ".."; "." should be handled by the VFS
1254                 */
1255                block = start = 0;
1256                nblocks = 1;
1257                goto restart;
1258        }
1259        if (is_dx(dir)) {
1260                bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
1261                /*
1262                 * On success, or if the error was file not found,
1263                 * return.  Otherwise, fall back to doing a search the
1264                 * old fashioned way.
1265                 */
1266                if (bh || (err != ERR_BAD_DX_DIR))
1267                        return bh;
1268                dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
1269                               "falling back\n"));
1270        }
1271        nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1272        start = EXT4_I(dir)->i_dir_start_lookup;
1273        if (start >= nblocks)
1274                start = 0;
1275        block = start;
1276restart:
1277        do {
1278                /*
1279                 * We deal with the read-ahead logic here.
1280                 */
1281                if (ra_ptr >= ra_max) {
1282                        /* Refill the readahead buffer */
1283                        ra_ptr = 0;
1284                        b = block;
1285                        for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
1286                                /*
1287                                 * Terminate if we reach the end of the
1288                                 * directory and must wrap, or if our
1289                                 * search has finished at this block.
1290                                 */
1291                                if (b >= nblocks || (num && block == start)) {
1292                                        bh_use[ra_max] = NULL;
1293                                        break;
1294                                }
1295                                num++;
1296                                bh = ext4_getblk(NULL, dir, b++, 0, &err);
1297                                bh_use[ra_max] = bh;
1298                                if (bh)
1299                                        ll_rw_block(READ | REQ_META | REQ_PRIO,
1300                                                    1, &bh);
1301                        }
1302                }
1303                if ((bh = bh_use[ra_ptr++]) == NULL)
1304                        goto next;
1305                wait_on_buffer(bh);
1306                if (!buffer_uptodate(bh)) {
1307                        /* read error, skip block & hope for the best */
1308                        EXT4_ERROR_INODE(dir, "reading directory lblock %lu",
1309                                         (unsigned long) block);
1310                        brelse(bh);
1311                        goto next;
1312                }
1313                if (!buffer_verified(bh) &&
1314                    !is_dx_internal_node(dir, block,
1315                                         (struct ext4_dir_entry *)bh->b_data) &&
1316                    !ext4_dirent_csum_verify(dir,
1317                                (struct ext4_dir_entry *)bh->b_data)) {
1318                        EXT4_ERROR_INODE(dir, "checksumming directory "
1319                                         "block %lu", (unsigned long)block);
1320                        brelse(bh);
1321                        goto next;
1322                }
1323                set_buffer_verified(bh);
1324                i = search_dirblock(bh, dir, d_name,
1325                            block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
1326                if (i == 1) {
1327                        EXT4_I(dir)->i_dir_start_lookup = block;
1328                        ret = bh;
1329                        goto cleanup_and_exit;
1330                } else {
1331                        brelse(bh);
1332                        if (i < 0)
1333                                goto cleanup_and_exit;
1334                }
1335        next:
1336                if (++block >= nblocks)
1337                        block = 0;
1338        } while (block != start);
1339
1340        /*
1341         * If the directory has grown while we were searching, then
1342         * search the last part of the directory before giving up.
1343         */
1344        block = nblocks;
1345        nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1346        if (block < nblocks) {
1347                start = 0;
1348                goto restart;
1349        }
1350
1351cleanup_and_exit:
1352        /* Clean up the read-ahead blocks */
1353        for (; ra_ptr < ra_max; ra_ptr++)
1354                brelse(bh_use[ra_ptr]);
1355        return ret;
1356}
1357
1358static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
1359                       struct ext4_dir_entry_2 **res_dir, int *err)
1360{
1361        struct super_block * sb = dir->i_sb;
1362        struct dx_hash_info     hinfo;
1363        struct dx_frame frames[2], *frame;
1364        struct buffer_head *bh;
1365        ext4_lblk_t block;
1366        int retval;
1367
1368        if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
1369                return NULL;
1370        do {
1371                block = dx_get_block(frame->at);
1372                bh = ext4_read_dirblock(dir, block, DIRENT);
1373                if (IS_ERR(bh)) {
1374                        *err = PTR_ERR(bh);
1375                        goto errout;
1376                }
1377                retval = search_dirblock(bh, dir, d_name,
1378                                         block << EXT4_BLOCK_SIZE_BITS(sb),
1379                                         res_dir);
1380                if (retval == 1) {      /* Success! */
1381                        dx_release(frames);
1382                        return bh;
1383                }
1384                brelse(bh);
1385                if (retval == -1) {
1386                        *err = ERR_BAD_DX_DIR;
1387                        goto errout;
1388                }
1389
1390                /* Check to see if we should continue to search */
1391                retval = ext4_htree_next_block(dir, hinfo.hash, frame,
1392                                               frames, NULL);
1393                if (retval < 0) {
1394                        ext4_warning(sb,
1395                             "error reading index page in directory #%lu",
1396                             dir->i_ino);
1397                        *err = retval;
1398                        goto errout;
1399                }
1400        } while (retval == 1);
1401
1402        *err = -ENOENT;
1403errout:
1404        dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name));
1405        dx_release (frames);
1406        return NULL;
1407}
1408
1409static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
1410{
1411        struct inode *inode;
1412        struct ext4_dir_entry_2 *de;
1413        struct buffer_head *bh;
1414
1415        if (dentry->d_name.len > EXT4_NAME_LEN)
1416                return ERR_PTR(-ENAMETOOLONG);
1417
1418        bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
1419        inode = NULL;
1420        if (bh) {
1421                __u32 ino = le32_to_cpu(de->inode);
1422                brelse(bh);
1423                if (!ext4_valid_inum(dir->i_sb, ino)) {
1424                        EXT4_ERROR_INODE(dir, "bad inode number: %u", ino);
1425                        return ERR_PTR(-EIO);
1426                }
1427                if (unlikely(ino == dir->i_ino)) {
1428                        EXT4_ERROR_INODE(dir, "'%pd' linked to parent dir",
1429                                         dentry);
1430                        return ERR_PTR(-EIO);
1431                }
1432                inode = ext4_iget(dir->i_sb, ino);
1433                if (inode == ERR_PTR(-ESTALE)) {
1434                        EXT4_ERROR_INODE(dir,
1435                                         "deleted inode referenced: %u",
1436                                         ino);
1437                        return ERR_PTR(-EIO);
1438                }
1439        }
1440        return d_splice_alias(inode, dentry);
1441}
1442
1443
1444struct dentry *ext4_get_parent(struct dentry *child)
1445{
1446        __u32 ino;
1447        static const struct qstr dotdot = QSTR_INIT("..", 2);
1448        struct ext4_dir_entry_2 * de;
1449        struct buffer_head *bh;
1450
1451        bh = ext4_find_entry(child->d_inode, &dotdot, &de, NULL);
1452        if (!bh)
1453                return ERR_PTR(-ENOENT);
1454        ino = le32_to_cpu(de->inode);
1455        brelse(bh);
1456
1457        if (!ext4_valid_inum(child->d_inode->i_sb, ino)) {
1458                EXT4_ERROR_INODE(child->d_inode,
1459                                 "bad parent inode number: %u", ino);
1460                return ERR_PTR(-EIO);
1461        }
1462
1463        return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino));
1464}
1465
1466/*
1467 * Move count entries from end of map between two memory locations.
1468 * Returns pointer to last entry moved.
1469 */
1470static struct ext4_dir_entry_2 *
1471dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
1472                unsigned blocksize)
1473{
1474        unsigned rec_len = 0;
1475
1476        while (count--) {
1477                struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
1478                                                (from + (map->offs<<2));
1479                rec_len = EXT4_DIR_REC_LEN(de->name_len);
1480                memcpy (to, de, rec_len);
1481                ((struct ext4_dir_entry_2 *) to)->rec_len =
1482                                ext4_rec_len_to_disk(rec_len, blocksize);
1483                de->inode = 0;
1484                map++;
1485                to += rec_len;
1486        }
1487        return (struct ext4_dir_entry_2 *) (to - rec_len);
1488}
1489
1490/*
1491 * Compact each dir entry in the range to the minimal rec_len.
1492 * Returns pointer to last entry in range.
1493 */
1494static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize)
1495{
1496        struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
1497        unsigned rec_len = 0;
1498
1499        prev = to = de;
1500        while ((char*)de < base + blocksize) {
1501                next = ext4_next_entry(de, blocksize);
1502                if (de->inode && de->name_len) {
1503                        rec_len = EXT4_DIR_REC_LEN(de->name_len);
1504                        if (de > to)
1505                                memmove(to, de, rec_len);
1506                        to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
1507                        prev = to;
1508                        to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len);
1509                }
1510                de = next;
1511        }
1512        return prev;
1513}
1514
1515/*
1516 * Split a full leaf block to make room for a new dir entry.
1517 * Allocate a new block, and move entries so that they are approx. equally full.
1518 * Returns pointer to de in block into which the new entry will be inserted.
1519 */
1520static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1521                        struct buffer_head **bh,struct dx_frame *frame,
1522                        struct dx_hash_info *hinfo, int *error)
1523{
1524        unsigned blocksize = dir->i_sb->s_blocksize;
1525        unsigned count, continued;
1526        struct buffer_head *bh2;
1527        ext4_lblk_t newblock;
1528        u32 hash2;
1529        struct dx_map_entry *map;
1530        char *data1 = (*bh)->b_data, *data2;
1531        unsigned split, move, size;
1532        struct ext4_dir_entry_2 *de = NULL, *de2;
1533        struct ext4_dir_entry_tail *t;
1534        int     csum_size = 0;
1535        int     err = 0, i;
1536
1537        if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
1538                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1539                csum_size = sizeof(struct ext4_dir_entry_tail);
1540
1541        bh2 = ext4_append(handle, dir, &newblock);
1542        if (IS_ERR(bh2)) {
1543                brelse(*bh);
1544                *bh = NULL;
1545                *error = PTR_ERR(bh2);
1546                return NULL;
1547        }
1548
1549        BUFFER_TRACE(*bh, "get_write_access");
1550        err = ext4_journal_get_write_access(handle, *bh);
1551        if (err)
1552                goto journal_error;
1553
1554        BUFFER_TRACE(frame->bh, "get_write_access");
1555        err = ext4_journal_get_write_access(handle, frame->bh);
1556        if (err)
1557                goto journal_error;
1558
1559        data2 = bh2->b_data;
1560
1561        /* create map in the end of data2 block */
1562        map = (struct dx_map_entry *) (data2 + blocksize);
1563        count = dx_make_map((struct ext4_dir_entry_2 *) data1,
1564                             blocksize, hinfo, map);
1565        map -= count;
1566        dx_sort_map(map, count);
1567        /* Split the existing block in the middle, size-wise */
1568        size = 0;
1569        move = 0;
1570        for (i = count-1; i >= 0; i--) {
1571                /* is more than half of this entry in 2nd half of the block? */
1572                if (size + map[i].size/2 > blocksize/2)
1573                        break;
1574                size += map[i].size;
1575                move++;
1576        }
1577        /* map index at which we will split */
1578        split = count - move;
1579        hash2 = map[split].hash;
1580        continued = hash2 == map[split - 1].hash;
1581        dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n",
1582                        (unsigned long)dx_get_block(frame->at),
1583                                        hash2, split, count-split));
1584
1585        /* Fancy dance to stay within two buffers */
1586        de2 = dx_move_dirents(data1, data2, map + split, count - split, blocksize);
1587        de = dx_pack_dirents(data1, blocksize);
1588        de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
1589                                           (char *) de,
1590                                           blocksize);
1591        de2->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) -
1592                                            (char *) de2,
1593                                            blocksize);
1594        if (csum_size) {
1595                t = EXT4_DIRENT_TAIL(data2, blocksize);
1596                initialize_dirent_tail(t, blocksize);
1597
1598                t = EXT4_DIRENT_TAIL(data1, blocksize);
1599                initialize_dirent_tail(t, blocksize);
1600        }
1601
1602        dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
1603        dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));
1604
1605        /* Which block gets the new entry? */
1606        if (hinfo->hash >= hash2)
1607        {
1608                swap(*bh, bh2);
1609                de = de2;
1610        }
1611        dx_insert_block(frame, hash2 + continued, newblock);
1612        err = ext4_handle_dirty_dirent_node(handle, dir, bh2);
1613        if (err)
1614                goto journal_error;
1615        err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
1616        if (err)
1617                goto journal_error;
1618        brelse(bh2);
1619        dxtrace(dx_show_index("frame", frame->entries));
1620        return de;
1621
1622journal_error:
1623        brelse(*bh);
1624        brelse(bh2);
1625        *bh = NULL;
1626        ext4_std_error(dir->i_sb, err);
1627        *error = err;
1628        return NULL;
1629}
1630
1631int ext4_find_dest_de(struct inode *dir, struct inode *inode,
1632                      struct buffer_head *bh,
1633                      void *buf, int buf_size,
1634                      const char *name, int namelen,
1635                      struct ext4_dir_entry_2 **dest_de)
1636{
1637        struct ext4_dir_entry_2 *de;
1638        unsigned short reclen = EXT4_DIR_REC_LEN(namelen);
1639        int nlen, rlen;
1640        unsigned int offset = 0;
1641        char *top;
1642
1643        de = (struct ext4_dir_entry_2 *)buf;
1644        top = buf + buf_size - reclen;
1645        while ((char *) de <= top) {
1646                if (ext4_check_dir_entry(dir, NULL, de, bh,
1647                                         buf, buf_size, offset))
1648                        return -EIO;
1649                if (ext4_match(namelen, name, de))
1650                        return -EEXIST;
1651                nlen = EXT4_DIR_REC_LEN(de->name_len);
1652                rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
1653                if ((de->inode ? rlen - nlen : rlen) >= reclen)
1654                        break;
1655                de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
1656                offset += rlen;
1657        }
1658        if ((char *) de > top)
1659                return -ENOSPC;
1660
1661        *dest_de = de;
1662        return 0;
1663}
1664
1665void ext4_insert_dentry(struct inode *inode,
1666                        struct ext4_dir_entry_2 *de,
1667                        int buf_size,
1668                        const char *name, int namelen)
1669{
1670
1671        int nlen, rlen;
1672
1673        nlen = EXT4_DIR_REC_LEN(de->name_len);
1674        rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
1675        if (de->inode) {
1676                struct ext4_dir_entry_2 *de1 =
1677                                (struct ext4_dir_entry_2 *)((char *)de + nlen);
1678                de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, buf_size);
1679                de->rec_len = ext4_rec_len_to_disk(nlen, buf_size);
1680                de = de1;
1681        }
1682        de->file_type = EXT4_FT_UNKNOWN;
1683        de->inode = cpu_to_le32(inode->i_ino);
1684        ext4_set_de_type(inode->i_sb, de, inode->i_mode);
1685        de->name_len = namelen;
1686        memcpy(de->name, name, namelen);
1687}
1688/*
1689 * Add a new entry into a directory (leaf) block.  If de is non-NULL,
1690 * it points to a directory entry which is guaranteed to be large
1691 * enough for new directory entry.  If de is NULL, then
1692 * add_dirent_to_buf will attempt search the directory block for
1693 * space.  It will return -ENOSPC if no space is available, and -EIO
1694 * and -EEXIST if directory entry already exists.
1695 */
1696static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1697                             struct inode *inode, struct ext4_dir_entry_2 *de,
1698                             struct buffer_head *bh)
1699{
1700        struct inode    *dir = dentry->d_parent->d_inode;
1701        const char      *name = dentry->d_name.name;
1702        int             namelen = dentry->d_name.len;
1703        unsigned int    blocksize = dir->i_sb->s_blocksize;
1704        int             csum_size = 0;
1705        int             err;
1706
1707        if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
1708                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1709                csum_size = sizeof(struct ext4_dir_entry_tail);
1710
1711        if (!de) {
1712                err = ext4_find_dest_de(dir, inode,
1713                                        bh, bh->b_data, blocksize - csum_size,
1714                                        name, namelen, &de);
1715                if (err)
1716                        return err;
1717        }
1718        BUFFER_TRACE(bh, "get_write_access");
1719        err = ext4_journal_get_write_access(handle, bh);
1720        if (err) {
1721                ext4_std_error(dir->i_sb, err);
1722                return err;
1723        }
1724
1725        /* By now the buffer is marked for journaling */
1726        ext4_insert_dentry(inode, de, blocksize, name, namelen);
1727
1728        /*
1729         * XXX shouldn't update any times until successful
1730         * completion of syscall, but too many callers depend
1731         * on this.
1732         *
1733         * XXX similarly, too many callers depend on
1734         * ext4_new_inode() setting the times, but error
1735         * recovery deletes the inode, so the worst that can
1736         * happen is that the times are slightly out of date
1737         * and/or different from the directory change time.
1738         */
1739        dir->i_mtime = dir->i_ctime = ext4_current_time(dir);
1740        ext4_update_dx_flag(dir);
1741        dir->i_version++;
1742        ext4_mark_inode_dirty(handle, dir);
1743        BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
1744        err = ext4_handle_dirty_dirent_node(handle, dir, bh);
1745        if (err)
1746                ext4_std_error(dir->i_sb, err);
1747        return 0;
1748}
1749
1750/*
1751 * This converts a one block unindexed directory to a 3 block indexed
1752 * directory, and adds the dentry to the indexed directory.
1753 */
1754static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1755                            struct inode *inode, struct buffer_head *bh)
1756{
1757        struct inode    *dir = dentry->d_parent->d_inode;
1758        const char      *name = dentry->d_name.name;
1759        int             namelen = dentry->d_name.len;
1760        struct buffer_head *bh2;
1761        struct dx_root  *root;
1762        struct dx_frame frames[2], *frame;
1763        struct dx_entry *entries;
1764        struct ext4_dir_entry_2 *de, *de2;
1765        struct ext4_dir_entry_tail *t;
1766        char            *data1, *top;
1767        unsigned        len;
1768        int             retval;
1769        unsigned        blocksize;
1770        struct dx_hash_info hinfo;
1771        ext4_lblk_t  block;
1772        struct fake_dirent *fde;
1773        int             csum_size = 0;
1774
1775        if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
1776                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1777                csum_size = sizeof(struct ext4_dir_entry_tail);
1778
1779        blocksize =  dir->i_sb->s_blocksize;
1780        dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino));
1781        retval = ext4_journal_get_write_access(handle, bh);
1782        if (retval) {
1783                ext4_std_error(dir->i_sb, retval);
1784                brelse(bh);
1785                return retval;
1786        }
1787        root = (struct dx_root *) bh->b_data;
1788
1789        /* The 0th block becomes the root, move the dirents out */
1790        fde = &root->dotdot;
1791        de = (struct ext4_dir_entry_2 *)((char *)fde +
1792                ext4_rec_len_from_disk(fde->rec_len, blocksize));
1793        if ((char *) de >= (((char *) root) + blocksize)) {
1794                EXT4_ERROR_INODE(dir, "invalid rec_len for '..'");
1795                brelse(bh);
1796                return -EIO;
1797        }
1798        len = ((char *) root) + (blocksize - csum_size) - (char *) de;
1799
1800        /* Allocate new block for the 0th block's dirents */
1801        bh2 = ext4_append(handle, dir, &block);
1802        if (IS_ERR(bh2)) {
1803                brelse(bh);
1804                return PTR_ERR(bh2);
1805        }
1806        ext4_set_inode_flag(dir, EXT4_INODE_INDEX);
1807        data1 = bh2->b_data;
1808
1809        memcpy (data1, de, len);
1810        de = (struct ext4_dir_entry_2 *) data1;
1811        top = data1 + len;
1812        while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
1813                de = de2;
1814        de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
1815                                           (char *) de,
1816                                           blocksize);
1817
1818        if (csum_size) {
1819                t = EXT4_DIRENT_TAIL(data1, blocksize);
1820                initialize_dirent_tail(t, blocksize);
1821        }
1822
1823        /* Initialize the root; the dot dirents already exist */
1824        de = (struct ext4_dir_entry_2 *) (&root->dotdot);
1825        de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2),
1826                                           blocksize);
1827        memset (&root->info, 0, sizeof(root->info));
1828        root->info.info_length = sizeof(root->info);
1829        root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
1830        entries = root->entries;
1831        dx_set_block(entries, 1);
1832        dx_set_count(entries, 1);
1833        dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
1834
1835        /* Initialize as for dx_probe */
1836        hinfo.hash_version = root->info.hash_version;
1837        if (hinfo.hash_version <= DX_HASH_TEA)
1838                hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
1839        hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
1840        ext4fs_dirhash(name, namelen, &hinfo);
1841        frame = frames;
1842        frame->entries = entries;
1843        frame->at = entries;
1844        frame->bh = bh;
1845        bh = bh2;
1846
1847        ext4_handle_dirty_dx_node(handle, dir, frame->bh);
1848        ext4_handle_dirty_dirent_node(handle, dir, bh);
1849
1850        de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
1851        if (!de) {
1852                /*
1853                 * Even if the block split failed, we have to properly write
1854                 * out all the changes we did so far. Otherwise we can end up
1855                 * with corrupted filesystem.
1856                 */
1857                ext4_mark_inode_dirty(handle, dir);
1858                dx_release(frames);
1859                return retval;
1860        }
1861        dx_release(frames);
1862
1863        retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
1864        brelse(bh);
1865        return retval;
1866}
1867
1868/*
1869 *      ext4_add_entry()
1870 *
1871 * adds a file entry to the specified directory, using the same
1872 * semantics as ext4_find_entry(). It returns NULL if it failed.
1873 *
1874 * NOTE!! The inode part of 'de' is left at 0 - which means you
1875 * may not sleep between calling this and putting something into
1876 * the entry, as someone else might have used it while you slept.
1877 */
1878static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1879                          struct inode *inode)
1880{
1881        struct inode *dir = dentry->d_parent->d_inode;
1882        struct buffer_head *bh;
1883        struct ext4_dir_entry_2 *de;
1884        struct ext4_dir_entry_tail *t;
1885        struct super_block *sb;
1886        int     retval;
1887        int     dx_fallback=0;
1888        unsigned blocksize;
1889        ext4_lblk_t block, blocks;
1890        int     csum_size = 0;
1891
1892        if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
1893                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1894                csum_size = sizeof(struct ext4_dir_entry_tail);
1895
1896        sb = dir->i_sb;
1897        blocksize = sb->s_blocksize;
1898        if (!dentry->d_name.len)
1899                return -EINVAL;
1900
1901        if (ext4_has_inline_data(dir)) {
1902                retval = ext4_try_add_inline_entry(handle, dentry, inode);
1903                if (retval < 0)
1904                        return retval;
1905                if (retval == 1) {
1906                        retval = 0;
1907                        return retval;
1908                }
1909        }
1910
1911        if (is_dx(dir)) {
1912                retval = ext4_dx_add_entry(handle, dentry, inode);
1913                if (!retval || (retval != ERR_BAD_DX_DIR))
1914                        return retval;
1915                ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
1916                dx_fallback++;
1917                ext4_mark_inode_dirty(handle, dir);
1918        }
1919        blocks = dir->i_size >> sb->s_blocksize_bits;
1920        for (block = 0; block < blocks; block++) {
1921                bh = ext4_read_dirblock(dir, block, DIRENT);
1922                if (IS_ERR(bh))
1923                        return PTR_ERR(bh);
1924
1925                retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1926                if (retval != -ENOSPC) {
1927                        brelse(bh);
1928                        return retval;
1929                }
1930
1931                if (blocks == 1 && !dx_fallback &&
1932                    EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
1933                        return make_indexed_dir(handle, dentry, inode, bh);
1934                brelse(bh);
1935        }
1936        bh = ext4_append(handle, dir, &block);
1937        if (IS_ERR(bh))
1938                return PTR_ERR(bh);
1939        de = (struct ext4_dir_entry_2 *) bh->b_data;
1940        de->inode = 0;
1941        de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize);
1942
1943        if (csum_size) {
1944                t = EXT4_DIRENT_TAIL(bh->b_data, blocksize);
1945                initialize_dirent_tail(t, blocksize);
1946        }
1947
1948        retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
1949        brelse(bh);
1950        if (retval == 0)
1951                ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY);
1952        return retval;
1953}
1954
1955/*
1956 * Returns 0 for success, or a negative error value
1957 */
1958static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1959                             struct inode *inode)
1960{
1961        struct dx_frame frames[2], *frame;
1962        struct dx_entry *entries, *at;
1963        struct dx_hash_info hinfo;
1964        struct buffer_head *bh;
1965        struct inode *dir = dentry->d_parent->d_inode;
1966        struct super_block *sb = dir->i_sb;
1967        struct ext4_dir_entry_2 *de;
1968        int err;
1969
1970        frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
1971        if (!frame)
1972                return err;
1973        entries = frame->entries;
1974        at = frame->at;
1975        bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT);
1976        if (IS_ERR(bh)) {
1977                err = PTR_ERR(bh);
1978                bh = NULL;
1979                goto cleanup;
1980        }
1981
1982        BUFFER_TRACE(bh, "get_write_access");
1983        err = ext4_journal_get_write_access(handle, bh);
1984        if (err)
1985                goto journal_error;
1986
1987        err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1988        if (err != -ENOSPC)
1989                goto cleanup;
1990
1991        /* Block full, should compress but for now just split */
1992        dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
1993                       dx_get_count(entries), dx_get_limit(entries)));
1994        /* Need to split index? */
1995        if (dx_get_count(entries) == dx_get_limit(entries)) {
1996                ext4_lblk_t newblock;
1997                unsigned icount = dx_get_count(entries);
1998                int levels = frame - frames;
1999                struct dx_entry *entries2;
2000                struct dx_node *node2;
2001                struct buffer_head *bh2;
2002
2003                if (levels && (dx_get_count(frames->entries) ==
2004                               dx_get_limit(frames->entries))) {
2005                        ext4_warning(sb, "Directory index full!");
2006                        err = -ENOSPC;
2007                        goto cleanup;
2008                }
2009                bh2 = ext4_append(handle, dir, &newblock);
2010                if (IS_ERR(bh2)) {
2011                        err = PTR_ERR(bh2);
2012                        goto cleanup;
2013                }
2014                node2 = (struct dx_node *)(bh2->b_data);
2015                entries2 = node2->entries;
2016                memset(&node2->fake, 0, sizeof(struct fake_dirent));
2017                node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize,
2018                                                           sb->s_blocksize);
2019                BUFFER_TRACE(frame->bh, "get_write_access");
2020                err = ext4_journal_get_write_access(handle, frame->bh);
2021                if (err)
2022                        goto journal_error;
2023                if (levels) {
2024                        unsigned icount1 = icount/2, icount2 = icount - icount1;
2025                        unsigned hash2 = dx_get_hash(entries + icount1);
2026                        dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
2027                                       icount1, icount2));
2028
2029                        BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
2030                        err = ext4_journal_get_write_access(handle,
2031                                                             frames[0].bh);
2032                        if (err)
2033                                goto journal_error;
2034
2035                        memcpy((char *) entries2, (char *) (entries + icount1),
2036                               icount2 * sizeof(struct dx_entry));
2037                        dx_set_count(entries, icount1);
2038                        dx_set_count(entries2, icount2);
2039                        dx_set_limit(entries2, dx_node_limit(dir));
2040
2041                        /* Which index block gets the new entry? */
2042                        if (at - entries >= icount1) {
2043                                frame->at = at = at - entries - icount1 + entries2;
2044                                frame->entries = entries = entries2;
2045                                swap(frame->bh, bh2);
2046                        }
2047                        dx_insert_block(frames + 0, hash2, newblock);
2048                        dxtrace(dx_show_index("node", frames[1].entries));
2049                        dxtrace(dx_show_index("node",
2050                               ((struct dx_node *) bh2->b_data)->entries));
2051                        err = ext4_handle_dirty_dx_node(handle, dir, bh2);
2052                        if (err)
2053                                goto journal_error;
2054                        brelse (bh2);
2055                } else {
2056                        dxtrace(printk(KERN_DEBUG
2057                                       "Creating second level index...\n"));
2058                        memcpy((char *) entries2, (char *) entries,
2059                               icount * sizeof(struct dx_entry));
2060                        dx_set_limit(entries2, dx_node_limit(dir));
2061
2062                        /* Set up root */
2063                        dx_set_count(entries, 1);
2064                        dx_set_block(entries + 0, newblock);
2065                        ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
2066
2067                        /* Add new access path frame */
2068                        frame = frames + 1;
2069                        frame->at = at = at - entries + entries2;
2070                        frame->entries = entries = entries2;
2071                        frame->bh = bh2;
2072                        err = ext4_journal_get_write_access(handle,
2073                                                             frame->bh);
2074                        if (err)
2075                                goto journal_error;
2076                }
2077                err = ext4_handle_dirty_dx_node(handle, dir, frames[0].bh);
2078                if (err) {
2079                        ext4_std_error(inode->i_sb, err);
2080                        goto cleanup;
2081                }
2082        }
2083        de = do_split(handle, dir, &bh, frame, &hinfo, &err);
2084        if (!de)
2085                goto cleanup;
2086        err = add_dirent_to_buf(handle, dentry, inode, de, bh);
2087        goto cleanup;
2088
2089journal_error:
2090        ext4_std_error(dir->i_sb, err);
2091cleanup:
2092        brelse(bh);
2093        dx_release(frames);
2094        return err;
2095}
2096
2097/*
2098 * ext4_generic_delete_entry deletes a directory entry by merging it
2099 * with the previous entry
2100 */
2101int ext4_generic_delete_entry(handle_t *handle,
2102                              struct inode *dir,
2103                              struct ext4_dir_entry_2 *de_del,
2104                              struct buffer_head *bh,
2105                              void *entry_buf,
2106                              int buf_size,
2107                              int csum_size)
2108{
2109        struct ext4_dir_entry_2 *de, *pde;
2110        unsigned int blocksize = dir->i_sb->s_blocksize;
2111        int i;
2112
2113        i = 0;
2114        pde = NULL;
2115        de = (struct ext4_dir_entry_2 *)entry_buf;
2116        while (i < buf_size - csum_size) {
2117                if (ext4_check_dir_entry(dir, NULL, de, bh,
2118                                         bh->b_data, bh->b_size, i))
2119                        return -EIO;
2120                if (de == de_del)  {
2121                        if (pde)
2122                                pde->rec_len = ext4_rec_len_to_disk(
2123                                        ext4_rec_len_from_disk(pde->rec_len,
2124                                                               blocksize) +
2125                                        ext4_rec_len_from_disk(de->rec_len,
2126                                                               blocksize),
2127                                        blocksize);
2128                        else
2129                                de->inode = 0;
2130                        dir->i_version++;
2131                        return 0;
2132                }
2133                i += ext4_rec_len_from_disk(de->rec_len, blocksize);
2134                pde = de;
2135                de = ext4_next_entry(de, blocksize);
2136        }
2137        return -ENOENT;
2138}
2139
2140static int ext4_delete_entry(handle_t *handle,
2141                             struct inode *dir,
2142                             struct ext4_dir_entry_2 *de_del,
2143                             struct buffer_head *bh)
2144{
2145        int err, csum_size = 0;
2146
2147        if (ext4_has_inline_data(dir)) {
2148                int has_inline_data = 1;
2149                err = ext4_delete_inline_entry(handle, dir, de_del, bh,
2150                                               &has_inline_data);
2151                if (has_inline_data)
2152                        return err;
2153        }
2154
2155        if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
2156                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
2157                csum_size = sizeof(struct ext4_dir_entry_tail);
2158
2159        BUFFER_TRACE(bh, "get_write_access");
2160        err = ext4_journal_get_write_access(handle, bh);
2161        if (unlikely(err))
2162                goto out;
2163
2164        err = ext4_generic_delete_entry(handle, dir, de_del,
2165                                        bh, bh->b_data,
2166                                        dir->i_sb->s_blocksize, csum_size);
2167        if (err)
2168                goto out;
2169
2170        BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
2171        err = ext4_handle_dirty_dirent_node(handle, dir, bh);
2172        if (unlikely(err))
2173                goto out;
2174
2175        return 0;
2176out:
2177        if (err != -ENOENT)
2178                ext4_std_error(dir->i_sb, err);
2179        return err;
2180}
2181
2182/*
2183 * DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2,
2184 * since this indicates that nlinks count was previously 1.
2185 */
2186static void ext4_inc_count(handle_t *handle, struct inode *inode)
2187{
2188        inc_nlink(inode);
2189        if (is_dx(inode) && inode->i_nlink > 1) {
2190                /* limit is 16-bit i_links_count */
2191                if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) {
2192                        set_nlink(inode, 1);
2193                        EXT4_SET_RO_COMPAT_FEATURE(inode->i_sb,
2194                                              EXT4_FEATURE_RO_COMPAT_DIR_NLINK);
2195                }
2196        }
2197}
2198
2199/*
2200 * If a directory had nlink == 1, then we should let it be 1. This indicates
2201 * directory has >EXT4_LINK_MAX subdirs.
2202 */
2203static void ext4_dec_count(handle_t *handle, struct inode *inode)
2204{
2205        if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
2206                drop_nlink(inode);
2207}
2208
2209
2210static int ext4_add_nondir(handle_t *handle,
2211                struct dentry *dentry, struct inode *inode)
2212{
2213        int err = ext4_add_entry(handle, dentry, inode);
2214        if (!err) {
2215                ext4_mark_inode_dirty(handle, inode);
2216                unlock_new_inode(inode);
2217                d_instantiate(dentry, inode);
2218                return 0;
2219        }
2220        drop_nlink(inode);
2221        unlock_new_inode(inode);
2222        iput(inode);
2223        return err;
2224}
2225
2226/*
2227 * By the time this is called, we already have created
2228 * the directory cache entry for the new file, but it
2229 * is so far negative - it has no inode.
2230 *
2231 * If the create succeeds, we fill in the inode information
2232 * with d_instantiate().
2233 */
2234static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
2235                       bool excl)
2236{
2237        handle_t *handle;
2238        struct inode *inode;
2239        int err, credits, retries = 0;
2240
2241        dquot_initialize(dir);
2242
2243        credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2244                   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
2245retry:
2246        inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0,
2247                                            NULL, EXT4_HT_DIR, credits);
2248        handle = ext4_journal_current_handle();
2249        err = PTR_ERR(inode);
2250        if (!IS_ERR(inode)) {
2251                inode->i_op = &ext4_file_inode_operations;
2252                inode->i_fop = &ext4_file_operations;
2253                ext4_set_aops(inode);
2254                err = ext4_add_nondir(handle, dentry, inode);
2255                if (!err && IS_DIRSYNC(dir))
2256                        ext4_handle_sync(handle);
2257        }
2258        if (handle)
2259                ext4_journal_stop(handle);
2260        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2261                goto retry;
2262        return err;
2263}
2264
2265static int ext4_mknod(struct inode *dir, struct dentry *dentry,
2266                      umode_t mode, dev_t rdev)
2267{
2268        handle_t *handle;
2269        struct inode *inode;
2270        int err, credits, retries = 0;
2271
2272        if (!new_valid_dev(rdev))
2273                return -EINVAL;
2274
2275        dquot_initialize(dir);
2276
2277        credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2278                   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
2279retry:
2280        inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0,
2281                                            NULL, EXT4_HT_DIR, credits);
2282        handle = ext4_journal_current_handle();
2283        err = PTR_ERR(inode);
2284        if (!IS_ERR(inode)) {
2285                init_special_inode(inode, inode->i_mode, rdev);
2286                inode->i_op = &ext4_special_inode_operations;
2287                err = ext4_add_nondir(handle, dentry, inode);
2288                if (!err && IS_DIRSYNC(dir))
2289                        ext4_handle_sync(handle);
2290        }
2291        if (handle)
2292                ext4_journal_stop(handle);
2293        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2294                goto retry;
2295        return err;
2296}
2297
2298static int ext4_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
2299{
2300        handle_t *handle;
2301        struct inode *inode;
2302        int err, retries = 0;
2303
2304        dquot_initialize(dir);
2305
2306retry:
2307        inode = ext4_new_inode_start_handle(dir, mode,
2308                                            NULL, 0, NULL,
2309                                            EXT4_HT_DIR,
2310                        EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
2311                          4 + EXT4_XATTR_TRANS_BLOCKS);
2312        handle = ext4_journal_current_handle();
2313        err = PTR_ERR(inode);
2314        if (!IS_ERR(inode)) {
2315                inode->i_op = &ext4_file_inode_operations;
2316                inode->i_fop = &ext4_file_operations;
2317                ext4_set_aops(inode);
2318                d_tmpfile(dentry, inode);
2319                err = ext4_orphan_add(handle, inode);
2320                if (err)
2321                        goto err_unlock_inode;
2322                mark_inode_dirty(inode);
2323                unlock_new_inode(inode);
2324        }
2325        if (handle)
2326                ext4_journal_stop(handle);
2327        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2328                goto retry;
2329        return err;
2330err_unlock_inode:
2331        ext4_journal_stop(handle);
2332        unlock_new_inode(inode);
2333        return err;
2334}
2335
2336struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
2337                          struct ext4_dir_entry_2 *de,
2338                          int blocksize, int csum_size,
2339                          unsigned int parent_ino, int dotdot_real_len)
2340{
2341        de->inode = cpu_to_le32(inode->i_ino);
2342        de->name_len = 1;
2343        de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
2344                                           blocksize);
2345        strcpy(de->name, ".");
2346        ext4_set_de_type(inode->i_sb, de, S_IFDIR);
2347
2348        de = ext4_next_entry(de, blocksize);
2349        de->inode = cpu_to_le32(parent_ino);
2350        de->name_len = 2;
2351        if (!dotdot_real_len)
2352                de->rec_len = ext4_rec_len_to_disk(blocksize -
2353                                        (csum_size + EXT4_DIR_REC_LEN(1)),
2354                                        blocksize);
2355        else
2356                de->rec_len = ext4_rec_len_to_disk(
2357                                EXT4_DIR_REC_LEN(de->name_len), blocksize);
2358        strcpy(de->name, "..");
2359        ext4_set_de_type(inode->i_sb, de, S_IFDIR);
2360
2361        return ext4_next_entry(de, blocksize);
2362}
2363
2364static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
2365                             struct inode *inode)
2366{
2367        struct buffer_head *dir_block = NULL;
2368        struct ext4_dir_entry_2 *de;
2369        struct ext4_dir_entry_tail *t;
2370        ext4_lblk_t block = 0;
2371        unsigned int blocksize = dir->i_sb->s_blocksize;
2372        int csum_size = 0;
2373        int err;
2374
2375        if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
2376                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
2377                csum_size = sizeof(struct ext4_dir_entry_tail);
2378
2379        if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
2380                err = ext4_try_create_inline_dir(handle, dir, inode);
2381                if (err < 0 && err != -ENOSPC)
2382                        goto out;
2383                if (!err)
2384                        goto out;
2385        }
2386
2387        inode->i_size = 0;
2388        dir_block = ext4_append(handle, inode, &block);
2389        if (IS_ERR(dir_block))
2390                return PTR_ERR(dir_block);
2391        BUFFER_TRACE(dir_block, "get_write_access");
2392        err = ext4_journal_get_write_access(handle, dir_block);
2393        if (err)
2394                goto out;
2395        de = (struct ext4_dir_entry_2 *)dir_block->b_data;
2396        ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0);
2397        set_nlink(inode, 2);
2398        if (csum_size) {
2399                t = EXT4_DIRENT_TAIL(dir_block->b_data, blocksize);
2400                initialize_dirent_tail(t, blocksize);
2401        }
2402
2403        BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
2404        err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
2405        if (err)
2406                goto out;
2407        set_buffer_verified(dir_block);
2408out:
2409        brelse(dir_block);
2410        return err;
2411}
2412
2413static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
2414{
2415        handle_t *handle;
2416        struct inode *inode;
2417        int err, credits, retries = 0;
2418
2419        if (EXT4_DIR_LINK_MAX(dir))
2420                return -EMLINK;
2421
2422        dquot_initialize(dir);
2423
2424        credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2425                   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
2426retry:
2427        inode = ext4_new_inode_start_handle(dir, S_IFDIR | mode,
2428                                            &dentry->d_name,
2429                                            0, NULL, EXT4_HT_DIR, credits);
2430        handle = ext4_journal_current_handle();
2431        err = PTR_ERR(inode);
2432        if (IS_ERR(inode))
2433                goto out_stop;
2434
2435        inode->i_op = &ext4_dir_inode_operations;
2436        inode->i_fop = &ext4_dir_operations;
2437        err = ext4_init_new_dir(handle, dir, inode);
2438        if (err)
2439                goto out_clear_inode;
2440        err = ext4_mark_inode_dirty(handle, inode);
2441        if (!err)
2442                err = ext4_add_entry(handle, dentry, inode);
2443        if (err) {
2444out_clear_inode:
2445                clear_nlink(inode);
2446                unlock_new_inode(inode);
2447                ext4_mark_inode_dirty(handle, inode);
2448                iput(inode);
2449                goto out_stop;
2450        }
2451        ext4_inc_count(handle, dir);
2452        ext4_update_dx_flag(dir);
2453        err = ext4_mark_inode_dirty(handle, dir);
2454        if (err)
2455                goto out_clear_inode;
2456        unlock_new_inode(inode);
2457        d_instantiate(dentry, inode);
2458        if (IS_DIRSYNC(dir))
2459                ext4_handle_sync(handle);
2460
2461out_stop:
2462        if (handle)
2463                ext4_journal_stop(handle);
2464        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2465                goto retry;
2466        return err;
2467}
2468
2469/*
2470 * routine to check that the specified directory is empty (for rmdir)
2471 */
2472static int empty_dir(struct inode *inode)
2473{
2474        unsigned int offset;
2475        struct buffer_head *bh;
2476        struct ext4_dir_entry_2 *de, *de1;
2477        struct super_block *sb;
2478        int err = 0;
2479
2480        if (ext4_has_inline_data(inode)) {
2481                int has_inline_data = 1;
2482
2483                err = empty_inline_dir(inode, &has_inline_data);
2484                if (has_inline_data)
2485                        return err;
2486        }
2487
2488        sb = inode->i_sb;
2489        if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2)) {
2490                EXT4_ERROR_INODE(inode, "invalid size");
2491                return 1;
2492        }
2493        bh = ext4_read_dirblock(inode, 0, EITHER);
2494        if (IS_ERR(bh))
2495                return 1;
2496
2497        de = (struct ext4_dir_entry_2 *) bh->b_data;
2498        de1 = ext4_next_entry(de, sb->s_blocksize);
2499        if (le32_to_cpu(de->inode) != inode->i_ino ||
2500                        !le32_to_cpu(de1->inode) ||
2501                        strcmp(".", de->name) ||
2502                        strcmp("..", de1->name)) {
2503                ext4_warning(inode->i_sb,
2504                             "bad directory (dir #%lu) - no `.' or `..'",
2505                             inode->i_ino);
2506                brelse(bh);
2507                return 1;
2508        }
2509        offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) +
2510                 ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize);
2511        de = ext4_next_entry(de1, sb->s_blocksize);
2512        while (offset < inode->i_size) {
2513                if (!bh ||
2514                    (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
2515                        unsigned int lblock;
2516                        err = 0;
2517                        brelse(bh);
2518                        lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb);
2519                        bh = ext4_read_dirblock(inode, lblock, EITHER);
2520                        if (IS_ERR(bh))
2521                                return 1;
2522                        de = (struct ext4_dir_entry_2 *) bh->b_data;
2523                }
2524                if (ext4_check_dir_entry(inode, NULL, de, bh,
2525                                         bh->b_data, bh->b_size, offset)) {
2526                        de = (struct ext4_dir_entry_2 *)(bh->b_data +
2527                                                         sb->s_blocksize);
2528                        offset = (offset | (sb->s_blocksize - 1)) + 1;
2529                        continue;
2530                }
2531                if (le32_to_cpu(de->inode)) {
2532                        brelse(bh);
2533                        return 0;
2534                }
2535                offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
2536                de = ext4_next_entry(de, sb->s_blocksize);
2537        }
2538        brelse(bh);
2539        return 1;
2540}
2541
2542/* ext4_orphan_add() links an unlinked or truncated inode into a list of
2543 * such inodes, starting at the superblock, in case we crash before the
2544 * file is closed/deleted, or in case the inode truncate spans multiple
2545 * transactions and the last transaction is not recovered after a crash.
2546 *
2547 * At filesystem recovery time, we walk this list deleting unlinked
2548 * inodes and truncating linked inodes in ext4_orphan_cleanup().
2549 */
2550int ext4_orphan_add(handle_t *handle, struct inode *inode)
2551{
2552        struct super_block *sb = inode->i_sb;
2553        struct ext4_iloc iloc;
2554        int err = 0, rc;
2555
2556        if (!EXT4_SB(sb)->s_journal)
2557                return 0;
2558
2559        mutex_lock(&EXT4_SB(sb)->s_orphan_lock);
2560        if (!list_empty(&EXT4_I(inode)->i_orphan))
2561                goto out_unlock;
2562
2563        /*
2564         * Orphan handling is only valid for files with data blocks
2565         * being truncated, or files being unlinked. Note that we either
2566         * hold i_mutex, or the inode can not be referenced from outside,
2567         * so i_nlink should not be bumped due to race
2568         */
2569        J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
2570                  S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
2571
2572        BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
2573        err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
2574        if (err)
2575                goto out_unlock;
2576
2577        err = ext4_reserve_inode_write(handle, inode, &iloc);
2578        if (err)
2579                goto out_unlock;
2580        /*
2581         * Due to previous errors inode may be already a part of on-disk
2582         * orphan list. If so skip on-disk list modification.
2583         */
2584        if (NEXT_ORPHAN(inode) && NEXT_ORPHAN(inode) <=
2585                (le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)))
2586                        goto mem_insert;
2587
2588        /* Insert this inode at the head of the on-disk orphan list... */
2589        NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan);
2590        EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
2591        err = ext4_handle_dirty_super(handle, sb);
2592        rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
2593        if (!err)
2594                err = rc;
2595
2596        /* Only add to the head of the in-memory list if all the
2597         * previous operations succeeded.  If the orphan_add is going to
2598         * fail (possibly taking the journal offline), we can't risk
2599         * leaving the inode on the orphan list: stray orphan-list
2600         * entries can cause panics at unmount time.
2601         *
2602         * This is safe: on error we're going to ignore the orphan list
2603         * anyway on the next recovery. */
2604mem_insert:
2605        if (!err)
2606                list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
2607
2608        jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
2609        jbd_debug(4, "orphan inode %lu will point to %d\n",
2610                        inode->i_ino, NEXT_ORPHAN(inode));
2611out_unlock:
2612        mutex_unlock(&EXT4_SB(sb)->s_orphan_lock);
2613        ext4_std_error(inode->i_sb, err);
2614        return err;
2615}
2616
2617/*
2618 * ext4_orphan_del() removes an unlinked or truncated inode from the list
2619 * of such inodes stored on disk, because it is finally being cleaned up.
2620 */
2621int ext4_orphan_del(handle_t *handle, struct inode *inode)
2622{
2623        struct list_head *prev;
2624        struct ext4_inode_info *ei = EXT4_I(inode);
2625        struct ext4_sb_info *sbi;
2626        __u32 ino_next;
2627        struct ext4_iloc iloc;
2628        int err = 0;
2629
2630        if ((!EXT4_SB(inode->i_sb)->s_journal) &&
2631            !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS))
2632                return 0;
2633
2634        mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
2635        if (list_empty(&ei->i_orphan))
2636                goto out;
2637
2638        ino_next = NEXT_ORPHAN(inode);
2639        prev = ei->i_orphan.prev;
2640        sbi = EXT4_SB(inode->i_sb);
2641
2642        jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
2643
2644        list_del_init(&ei->i_orphan);
2645
2646        /* If we're on an error path, we may not have a valid
2647         * transaction handle with which to update the orphan list on
2648         * disk, but we still need to remove the inode from the linked
2649         * list in memory. */
2650        if (!handle)
2651                goto out;
2652
2653        err = ext4_reserve_inode_write(handle, inode, &iloc);
2654        if (err)
2655                goto out_err;
2656
2657        if (prev == &sbi->s_orphan) {
2658                jbd_debug(4, "superblock will point to %u\n", ino_next);
2659                BUFFER_TRACE(sbi->s_sbh, "get_write_access");
2660                err = ext4_journal_get_write_access(handle, sbi->s_sbh);
2661                if (err)
2662                        goto out_brelse;
2663                sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
2664                err = ext4_handle_dirty_super(handle, inode->i_sb);
2665        } else {
2666                struct ext4_iloc iloc2;
2667                struct inode *i_prev =
2668                        &list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode;
2669
2670                jbd_debug(4, "orphan inode %lu will point to %u\n",
2671                          i_prev->i_ino, ino_next);
2672                err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
2673                if (err)
2674                        goto out_brelse;
2675                NEXT_ORPHAN(i_prev) = ino_next;
2676                err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2);
2677        }
2678        if (err)
2679                goto out_brelse;
2680        NEXT_ORPHAN(inode) = 0;
2681        err = ext4_mark_iloc_dirty(handle, inode, &iloc);
2682
2683out_err:
2684        ext4_std_error(inode->i_sb, err);
2685out:
2686        mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
2687        return err;
2688
2689out_brelse:
2690        brelse(iloc.bh);
2691        goto out_err;
2692}
2693
2694static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2695{
2696        int retval;
2697        struct inode *inode;
2698        struct buffer_head *bh;
2699        struct ext4_dir_entry_2 *de;
2700        handle_t *handle = NULL;
2701
2702        /* Initialize quotas before so that eventual writes go in
2703         * separate transaction */
2704        dquot_initialize(dir);
2705        dquot_initialize(dentry->d_inode);
2706
2707        retval = -ENOENT;
2708        bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
2709        if (!bh)
2710                goto end_rmdir;
2711
2712        inode = dentry->d_inode;
2713
2714        retval = -EIO;
2715        if (le32_to_cpu(de->inode) != inode->i_ino)
2716                goto end_rmdir;
2717
2718        retval = -ENOTEMPTY;
2719        if (!empty_dir(inode))
2720                goto end_rmdir;
2721
2722        handle = ext4_journal_start(dir, EXT4_HT_DIR,
2723                                    EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
2724        if (IS_ERR(handle)) {
2725                retval = PTR_ERR(handle);
2726                handle = NULL;
2727                goto end_rmdir;
2728        }
2729
2730        if (IS_DIRSYNC(dir))
2731                ext4_handle_sync(handle);
2732
2733        retval = ext4_delete_entry(handle, dir, de, bh);
2734        if (retval)
2735                goto end_rmdir;
2736        if (!EXT4_DIR_LINK_EMPTY(inode))
2737                ext4_warning(inode->i_sb,
2738                             "empty directory has too many links (%d)",
2739                             inode->i_nlink);
2740        inode->i_version++;
2741        clear_nlink(inode);
2742        /* There's no need to set i_disksize: the fact that i_nlink is
2743         * zero will ensure that the right thing happens during any
2744         * recovery. */
2745        inode->i_size = 0;
2746        ext4_orphan_add(handle, inode);
2747        inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode);
2748        ext4_mark_inode_dirty(handle, inode);
2749        ext4_dec_count(handle, dir);
2750        ext4_update_dx_flag(dir);
2751        ext4_mark_inode_dirty(handle, dir);
2752
2753end_rmdir:
2754        brelse(bh);
2755        if (handle)
2756                ext4_journal_stop(handle);
2757        return retval;
2758}
2759
2760static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2761{
2762        int retval;
2763        struct inode *inode;
2764        struct buffer_head *bh;
2765        struct ext4_dir_entry_2 *de;
2766        handle_t *handle = NULL;
2767
2768        trace_ext4_unlink_enter(dir, dentry);
2769        /* Initialize quotas before so that eventual writes go
2770         * in separate transaction */
2771        dquot_initialize(dir);
2772        dquot_initialize(dentry->d_inode);
2773
2774        retval = -ENOENT;
2775        bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
2776        if (!bh)
2777                goto end_unlink;
2778
2779        inode = dentry->d_inode;
2780
2781        retval = -EIO;
2782        if (le32_to_cpu(de->inode) != inode->i_ino)
2783                goto end_unlink;
2784
2785        handle = ext4_journal_start(dir, EXT4_HT_DIR,
2786                                    EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
2787        if (IS_ERR(handle)) {
2788                retval = PTR_ERR(handle);
2789                handle = NULL;
2790                goto end_unlink;
2791        }
2792
2793        if (IS_DIRSYNC(dir))
2794                ext4_handle_sync(handle);
2795
2796        if (!inode->i_nlink) {
2797                ext4_warning(inode->i_sb,
2798                             "Deleting nonexistent file (%lu), %d",
2799                             inode->i_ino, inode->i_nlink);
2800                set_nlink(inode, 1);
2801        }
2802        retval = ext4_delete_entry(handle, dir, de, bh);
2803        if (retval)
2804                goto end_unlink;
2805        dir->i_ctime = dir->i_mtime = ext4_current_time(dir);
2806        ext4_update_dx_flag(dir);
2807        ext4_mark_inode_dirty(handle, dir);
2808        drop_nlink(inode);
2809        if (!inode->i_nlink)
2810                ext4_orphan_add(handle, inode);
2811        inode->i_ctime = ext4_current_time(inode);
2812        ext4_mark_inode_dirty(handle, inode);
2813        retval = 0;
2814
2815end_unlink:
2816        brelse(bh);
2817        if (handle)
2818                ext4_journal_stop(handle);
2819        trace_ext4_unlink_exit(dentry, retval);
2820        return retval;
2821}
2822
2823static int ext4_symlink(struct inode *dir,
2824                        struct dentry *dentry, const char *symname)
2825{
2826        handle_t *handle;
2827        struct inode *inode;
2828        int l, err, retries = 0;
2829        int credits;
2830
2831        l = strlen(symname)+1;
2832        if (l > dir->i_sb->s_blocksize)
2833                return -ENAMETOOLONG;
2834
2835        dquot_initialize(dir);
2836
2837        if (l > EXT4_N_BLOCKS * 4) {
2838                /*
2839                 * For non-fast symlinks, we just allocate inode and put it on
2840                 * orphan list in the first transaction => we need bitmap,
2841                 * group descriptor, sb, inode block, quota blocks, and
2842                 * possibly selinux xattr blocks.
2843                 */
2844                credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
2845                          EXT4_XATTR_TRANS_BLOCKS;
2846        } else {
2847                /*
2848                 * Fast symlink. We have to add entry to directory
2849                 * (EXT4_DATA_TRANS_BLOCKS + EXT4_INDEX_EXTRA_TRANS_BLOCKS),
2850                 * allocate new inode (bitmap, group descriptor, inode block,
2851                 * quota blocks, sb is already counted in previous macros).
2852                 */
2853                credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2854                          EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3;
2855        }
2856retry:
2857        inode = ext4_new_inode_start_handle(dir, S_IFLNK|S_IRWXUGO,
2858                                            &dentry->d_name, 0, NULL,
2859                                            EXT4_HT_DIR, credits);
2860        handle = ext4_journal_current_handle();
2861        err = PTR_ERR(inode);
2862        if (IS_ERR(inode))
2863                goto out_stop;
2864
2865        if (l > EXT4_N_BLOCKS * 4) {
2866                inode->i_op = &ext4_symlink_inode_operations;
2867                ext4_set_aops(inode);
2868                /*
2869                 * We cannot call page_symlink() with transaction started
2870                 * because it calls into ext4_write_begin() which can wait
2871                 * for transaction commit if we are running out of space
2872                 * and thus we deadlock. So we have to stop transaction now
2873                 * and restart it when symlink contents is written.
2874                 * 
2875                 * To keep fs consistent in case of crash, we have to put inode
2876                 * to orphan list in the mean time.
2877                 */
2878                drop_nlink(inode);
2879                err = ext4_orphan_add(handle, inode);
2880                ext4_journal_stop(handle);
2881                if (err)
2882                        goto err_drop_inode;
2883                err = __page_symlink(inode, symname, l, 1);
2884                if (err)
2885                        goto err_drop_inode;
2886                /*
2887                 * Now inode is being linked into dir (EXT4_DATA_TRANS_BLOCKS
2888                 * + EXT4_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified
2889                 */
2890                handle = ext4_journal_start(dir, EXT4_HT_DIR,
2891                                EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2892                                EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1);
2893                if (IS_ERR(handle)) {
2894                        err = PTR_ERR(handle);
2895                        goto err_drop_inode;
2896                }
2897                set_nlink(inode, 1);
2898                err = ext4_orphan_del(handle, inode);
2899                if (err) {
2900                        ext4_journal_stop(handle);
2901                        clear_nlink(inode);
2902                        goto err_drop_inode;
2903                }
2904        } else {
2905                /* clear the extent format for fast symlink */
2906                ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
2907                inode->i_op = &ext4_fast_symlink_inode_operations;
2908                memcpy((char *)&EXT4_I(inode)->i_data, symname, l);
2909                inode->i_size = l-1;
2910        }
2911        EXT4_I(inode)->i_disksize = inode->i_size;
2912        err = ext4_add_nondir(handle, dentry, inode);
2913        if (!err && IS_DIRSYNC(dir))
2914                ext4_handle_sync(handle);
2915
2916out_stop:
2917        if (handle)
2918                ext4_journal_stop(handle);
2919        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2920                goto retry;
2921        return err;
2922err_drop_inode:
2923        unlock_new_inode(inode);
2924        iput(inode);
2925        return err;
2926}
2927
2928static int ext4_link(struct dentry *old_dentry,
2929                     struct inode *dir, struct dentry *dentry)
2930{
2931        handle_t *handle;
2932        struct inode *inode = old_dentry->d_inode;
2933        int err, retries = 0;
2934
2935        if (inode->i_nlink >= EXT4_LINK_MAX)
2936                return -EMLINK;
2937
2938        dquot_initialize(dir);
2939
2940retry:
2941        handle = ext4_journal_start(dir, EXT4_HT_DIR,
2942                (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2943                 EXT4_INDEX_EXTRA_TRANS_BLOCKS) + 1);
2944        if (IS_ERR(handle))
2945                return PTR_ERR(handle);
2946
2947        if (IS_DIRSYNC(dir))
2948                ext4_handle_sync(handle);
2949
2950        inode->i_ctime = ext4_current_time(inode);
2951        ext4_inc_count(handle, inode);
2952        ihold(inode);
2953
2954        err = ext4_add_entry(handle, dentry, inode);
2955        if (!err) {
2956                ext4_mark_inode_dirty(handle, inode);
2957                /* this can happen only for tmpfile being
2958                 * linked the first time
2959                 */
2960                if (inode->i_nlink == 1)
2961                        ext4_orphan_del(handle, inode);
2962                d_instantiate(dentry, inode);
2963        } else {
2964                drop_nlink(inode);
2965                iput(inode);
2966        }
2967        ext4_journal_stop(handle);
2968        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2969                goto retry;
2970        return err;
2971}
2972
2973
2974/*
2975 * Try to find buffer head where contains the parent block.
2976 * It should be the inode block if it is inlined or the 1st block
2977 * if it is a normal dir.
2978 */
2979static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
2980                                        struct inode *inode,
2981                                        int *retval,
2982                                        struct ext4_dir_entry_2 **parent_de,
2983                                        int *inlined)
2984{
2985        struct buffer_head *bh;
2986
2987        if (!ext4_has_inline_data(inode)) {
2988                bh = ext4_read_dirblock(inode, 0, EITHER);
2989                if (IS_ERR(bh)) {
2990                        *retval = PTR_ERR(bh);
2991                        return NULL;
2992                }
2993                *parent_de = ext4_next_entry(
2994                                        (struct ext4_dir_entry_2 *)bh->b_data,
2995                                        inode->i_sb->s_blocksize);
2996                return bh;
2997        }
2998
2999        *inlined = 1;
3000        return ext4_get_first_inline_block(inode, parent_de, retval);
3001}
3002
3003struct ext4_renament {
3004        struct inode *dir;
3005        struct dentry *dentry;
3006        struct inode *inode;
3007        bool is_dir;
3008        int dir_nlink_delta;
3009
3010        /* entry for "dentry" */
3011        struct buffer_head *bh;
3012        struct ext4_dir_entry_2 *de;
3013        int inlined;
3014
3015        /* entry for ".." in inode if it's a directory */
3016        struct buffer_head *dir_bh;
3017        struct ext4_dir_entry_2 *parent_de;
3018        int dir_inlined;
3019};
3020
3021static int ext4_rename_dir_prepare(handle_t *handle, struct ext4_renament *ent)
3022{
3023        int retval;
3024
3025        ent->dir_bh = ext4_get_first_dir_block(handle, ent->inode,
3026                                              &retval, &ent->parent_de,
3027                                              &ent->dir_inlined);
3028        if (!ent->dir_bh)
3029                return retval;
3030        if (le32_to_cpu(ent->parent_de->inode) != ent->dir->i_ino)
3031                return -EIO;
3032        BUFFER_TRACE(ent->dir_bh, "get_write_access");
3033        return ext4_journal_get_write_access(handle, ent->dir_bh);
3034}
3035
3036static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent,
3037                                  unsigned dir_ino)
3038{
3039        int retval;
3040
3041        ent->parent_de->inode = cpu_to_le32(dir_ino);
3042        BUFFER_TRACE(ent->dir_bh, "call ext4_handle_dirty_metadata");
3043        if (!ent->dir_inlined) {
3044                if (is_dx(ent->inode)) {
3045                        retval = ext4_handle_dirty_dx_node(handle,
3046                                                           ent->inode,
3047                                                           ent->dir_bh);
3048                } else {
3049                        retval = ext4_handle_dirty_dirent_node(handle,
3050                                                               ent->inode,
3051                                                               ent->dir_bh);
3052                }
3053        } else {
3054                retval = ext4_mark_inode_dirty(handle, ent->inode);
3055        }
3056        if (retval) {
3057                ext4_std_error(ent->dir->i_sb, retval);
3058                return retval;
3059        }
3060        return 0;
3061}
3062
3063static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
3064                       unsigned ino, unsigned file_type)
3065{
3066        int retval;
3067
3068        BUFFER_TRACE(ent->bh, "get write access");
3069        retval = ext4_journal_get_write_access(handle, ent->bh);
3070        if (retval)
3071                return retval;
3072        ent->de->inode = cpu_to_le32(ino);
3073        if (EXT4_HAS_INCOMPAT_FEATURE(ent->dir->i_sb,
3074                                      EXT4_FEATURE_INCOMPAT_FILETYPE))
3075                ent->de->file_type = file_type;
3076        ent->dir->i_version++;
3077        ent->dir->i_ctime = ent->dir->i_mtime =
3078                ext4_current_time(ent->dir);
3079        ext4_mark_inode_dirty(handle, ent->dir);
3080        BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata");
3081        if (!ent->inlined) {
3082                retval = ext4_handle_dirty_dirent_node(handle,
3083                                                       ent->dir, ent->bh);
3084                if (unlikely(retval)) {
3085                        ext4_std_error(ent->dir->i_sb, retval);
3086                        return retval;
3087                }
3088        }
3089        brelse(ent->bh);
3090        ent->bh = NULL;
3091
3092        return 0;
3093}
3094
3095static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
3096                                  const struct qstr *d_name)
3097{
3098        int retval = -ENOENT;
3099        struct buffer_head *bh;
3100        struct ext4_dir_entry_2 *de;
3101
3102        bh = ext4_find_entry(dir, d_name, &de, NULL);
3103        if (bh) {
3104                retval = ext4_delete_entry(handle, dir, de, bh);
3105                brelse(bh);
3106        }
3107        return retval;
3108}
3109
3110static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent)
3111{
3112        int retval;
3113        /*
3114         * ent->de could have moved from under us during htree split, so make
3115         * sure that we are deleting the right entry.  We might also be pointing
3116         * to a stale entry in the unused part of ent->bh so just checking inum
3117         * and the name isn't enough.
3118         */
3119        if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino ||
3120            ent->de->name_len != ent->dentry->d_name.len ||
3121            strncmp(ent->de->name, ent->dentry->d_name.name,
3122                    ent->de->name_len)) {
3123                retval = ext4_find_delete_entry(handle, ent->dir,
3124                                                &ent->dentry->d_name);
3125        } else {
3126                retval = ext4_delete_entry(handle, ent->dir, ent->de, ent->bh);
3127                if (retval == -ENOENT) {
3128                        retval = ext4_find_delete_entry(handle, ent->dir,
3129                                                        &ent->dentry->d_name);
3130                }
3131        }
3132
3133        if (retval) {
3134                ext4_warning(ent->dir->i_sb,
3135                                "Deleting old file (%lu), %d, error=%d",
3136                                ent->dir->i_ino, ent->dir->i_nlink, retval);
3137        }
3138}
3139
3140static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent)
3141{
3142        if (ent->dir_nlink_delta) {
3143                if (ent->dir_nlink_delta == -1)
3144                        ext4_dec_count(handle, ent->dir);
3145                else
3146                        ext4_inc_count(handle, ent->dir);
3147                ext4_mark_inode_dirty(handle, ent->dir);
3148        }
3149}
3150
3151/*
3152 * Anybody can rename anything with this: the permission checks are left to the
3153 * higher-level routines.
3154 *
3155 * n.b.  old_{dentry,inode) refers to the source dentry/inode
3156 * while new_{dentry,inode) refers to the destination dentry/inode
3157 * This comes from rename(const char *oldpath, const char *newpath)
3158 */
3159static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3160                       struct inode *new_dir, struct dentry *new_dentry)
3161{
3162        handle_t *handle = NULL;
3163        struct ext4_renament old = {
3164                .dir = old_dir,
3165                .dentry = old_dentry,
3166                .inode = old_dentry->d_inode,
3167        };
3168        struct ext4_renament new = {
3169                .dir = new_dir,
3170                .dentry = new_dentry,
3171                .inode = new_dentry->d_inode,
3172        };
3173        int retval;
3174
3175        dquot_initialize(old.dir);
3176        dquot_initialize(new.dir);
3177
3178        /* Initialize quotas before so that eventual writes go
3179         * in separate transaction */
3180        if (new.inode)
3181                dquot_initialize(new.inode);
3182
3183        old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
3184        /*
3185         *  Check for inode number is _not_ due to possible IO errors.
3186         *  We might rmdir the source, keep it as pwd of some process
3187         *  and merrily kill the link to whatever was created under the
3188         *  same name. Goodbye sticky bit ;-<
3189         */
3190        retval = -ENOENT;
3191        if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
3192                goto end_rename;
3193
3194        new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
3195                                 &new.de, &new.inlined);
3196        if (new.bh) {
3197                if (!new.inode) {
3198                        brelse(new.bh);
3199                        new.bh = NULL;
3200                }
3201        }
3202        if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC))
3203                ext4_alloc_da_blocks(old.inode);
3204
3205        handle = ext4_journal_start(old.dir, EXT4_HT_DIR,
3206                (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) +
3207                 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
3208        if (IS_ERR(handle))
3209                return PTR_ERR(handle);
3210
3211        if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
3212                ext4_handle_sync(handle);
3213
3214        if (S_ISDIR(old.inode->i_mode)) {
3215                if (new.inode) {
3216                        retval = -ENOTEMPTY;
3217                        if (!empty_dir(new.inode))
3218                                goto end_rename;
3219                } else {
3220                        retval = -EMLINK;
3221                        if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir))
3222                                goto end_rename;
3223                }
3224                retval = ext4_rename_dir_prepare(handle, &old);
3225                if (retval)
3226                        goto end_rename;
3227        }
3228        if (!new.bh) {
3229                retval = ext4_add_entry(handle, new.dentry, old.inode);
3230                if (retval)
3231                        goto end_rename;
3232        } else {
3233                retval = ext4_setent(handle, &new,
3234                                     old.inode->i_ino, old.de->file_type);
3235                if (retval)
3236                        goto end_rename;
3237        }
3238
3239        /*
3240         * Like most other Unix systems, set the ctime for inodes on a
3241         * rename.
3242         */
3243        old.inode->i_ctime = ext4_current_time(old.inode);
3244        ext4_mark_inode_dirty(handle, old.inode);
3245
3246        /*
3247         * ok, that's it
3248         */
3249        ext4_rename_delete(handle, &old);
3250
3251        if (new.inode) {
3252                ext4_dec_count(handle, new.inode);
3253                new.inode->i_ctime = ext4_current_time(new.inode);
3254        }
3255        old.dir->i_ctime = old.dir->i_mtime = ext4_current_time(old.dir);
3256        ext4_update_dx_flag(old.dir);
3257        if (old.dir_bh) {
3258                retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
3259                if (retval)
3260                        goto end_rename;
3261
3262                ext4_dec_count(handle, old.dir);
3263                if (new.inode) {
3264                        /* checked empty_dir above, can't have another parent,
3265                         * ext4_dec_count() won't work for many-linked dirs */
3266                        clear_nlink(new.inode);
3267                } else {
3268                        ext4_inc_count(handle, new.dir);
3269                        ext4_update_dx_flag(new.dir);
3270                        ext4_mark_inode_dirty(handle, new.dir);
3271                }
3272        }
3273        ext4_mark_inode_dirty(handle, old.dir);
3274        if (new.inode) {
3275                ext4_mark_inode_dirty(handle, new.inode);
3276                if (!new.inode->i_nlink)
3277                        ext4_orphan_add(handle, new.inode);
3278        }
3279        retval = 0;
3280
3281end_rename:
3282        brelse(old.dir_bh);
3283        brelse(old.bh);
3284        brelse(new.bh);
3285        if (handle)
3286                ext4_journal_stop(handle);
3287        return retval;
3288}
3289
3290static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
3291                             struct inode *new_dir, struct dentry *new_dentry)
3292{
3293        handle_t *handle = NULL;
3294        struct ext4_renament old = {
3295                .dir = old_dir,
3296                .dentry = old_dentry,
3297                .inode = old_dentry->d_inode,
3298        };
3299        struct ext4_renament new = {
3300                .dir = new_dir,
3301                .dentry = new_dentry,
3302                .inode = new_dentry->d_inode,
3303        };
3304        u8 new_file_type;
3305        int retval;
3306
3307        dquot_initialize(old.dir);
3308        dquot_initialize(new.dir);
3309
3310        old.bh = ext4_find_entry(old.dir, &old.dentry->d_name,
3311                                 &old.de, &old.inlined);
3312        /*
3313         *  Check for inode number is _not_ due to possible IO errors.
3314         *  We might rmdir the source, keep it as pwd of some process
3315         *  and merrily kill the link to whatever was created under the
3316         *  same name. Goodbye sticky bit ;-<
3317         */
3318        retval = -ENOENT;
3319        if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
3320                goto end_rename;
3321
3322        new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
3323                                 &new.de, &new.inlined);
3324
3325        /* RENAME_EXCHANGE case: old *and* new must both exist */
3326        if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino)
3327                goto end_rename;
3328
3329        handle = ext4_journal_start(old.dir, EXT4_HT_DIR,
3330                (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) +
3331                 2 * EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
3332        if (IS_ERR(handle))
3333                return PTR_ERR(handle);
3334
3335        if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
3336                ext4_handle_sync(handle);
3337
3338        if (S_ISDIR(old.inode->i_mode)) {
3339                old.is_dir = true;
3340                retval = ext4_rename_dir_prepare(handle, &old);
3341                if (retval)
3342                        goto end_rename;
3343        }
3344        if (S_ISDIR(new.inode->i_mode)) {
3345                new.is_dir = true;
3346                retval = ext4_rename_dir_prepare(handle, &new);
3347                if (retval)
3348                        goto end_rename;
3349        }
3350
3351        /*
3352         * Other than the special case of overwriting a directory, parents'
3353         * nlink only needs to be modified if this is a cross directory rename.
3354         */
3355        if (old.dir != new.dir && old.is_dir != new.is_dir) {
3356                old.dir_nlink_delta = old.is_dir ? -1 : 1;
3357                new.dir_nlink_delta = -old.dir_nlink_delta;
3358                retval = -EMLINK;
3359                if ((old.dir_nlink_delta > 0 && EXT4_DIR_LINK_MAX(old.dir)) ||
3360                    (new.dir_nlink_delta > 0 && EXT4_DIR_LINK_MAX(new.dir)))
3361                        goto end_rename;
3362        }
3363
3364        new_file_type = new.de->file_type;
3365        retval = ext4_setent(handle, &new, old.inode->i_ino, old.de->file_type);
3366        if (retval)
3367                goto end_rename;
3368
3369        retval = ext4_setent(handle, &old, new.inode->i_ino, new_file_type);
3370        if (retval)
3371                goto end_rename;
3372
3373        /*
3374         * Like most other Unix systems, set the ctime for inodes on a
3375         * rename.
3376         */
3377        old.inode->i_ctime = ext4_current_time(old.inode);
3378        new.inode->i_ctime = ext4_current_time(new.inode);
3379        ext4_mark_inode_dirty(handle, old.inode);
3380        ext4_mark_inode_dirty(handle, new.inode);
3381
3382        if (old.dir_bh) {
3383                retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
3384                if (retval)
3385                        goto end_rename;
3386        }
3387        if (new.dir_bh) {
3388                retval = ext4_rename_dir_finish(handle, &new, old.dir->i_ino);
3389                if (retval)
3390                        goto end_rename;
3391        }
3392        ext4_update_dir_count(handle, &old);
3393        ext4_update_dir_count(handle, &new);
3394        retval = 0;
3395
3396end_rename:
3397        brelse(old.dir_bh);
3398        brelse(new.dir_bh);
3399        brelse(old.bh);
3400        brelse(new.bh);
3401        if (handle)
3402                ext4_journal_stop(handle);
3403        return retval;
3404}
3405
3406static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry,
3407                        struct inode *new_dir, struct dentry *new_dentry,
3408                        unsigned int flags)
3409{
3410        if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
3411                return -EINVAL;
3412
3413        if (flags & RENAME_EXCHANGE) {
3414                return ext4_cross_rename(old_dir, old_dentry,
3415                                         new_dir, new_dentry);
3416        }
3417        /*
3418         * Existence checking was done by the VFS, otherwise "RENAME_NOREPLACE"
3419         * is equivalent to regular rename.
3420         */
3421        return ext4_rename(old_dir, old_dentry, new_dir, new_dentry);
3422}
3423
3424/*
3425 * directories can handle most operations...
3426 */
3427const struct inode_operations ext4_dir_inode_operations = {
3428        .create         = ext4_create,
3429        .lookup         = ext4_lookup,
3430        .link           = ext4_link,
3431        .unlink         = ext4_unlink,
3432        .symlink        = ext4_symlink,
3433        .mkdir          = ext4_mkdir,
3434        .rmdir          = ext4_rmdir,
3435        .mknod          = ext4_mknod,
3436        .tmpfile        = ext4_tmpfile,
3437        .rename         = ext4_rename,
3438        .rename2        = ext4_rename2,
3439        .setattr        = ext4_setattr,
3440        .setxattr       = generic_setxattr,
3441        .getxattr       = generic_getxattr,
3442        .listxattr      = ext4_listxattr,
3443        .removexattr    = generic_removexattr,
3444        .get_acl        = ext4_get_acl,
3445        .set_acl        = ext4_set_acl,
3446        .fiemap         = ext4_fiemap,
3447};
3448
3449const struct inode_operations ext4_special_inode_operations = {
3450        .setattr        = ext4_setattr,
3451        .setxattr       = generic_setxattr,
3452        .getxattr       = generic_getxattr,
3453        .listxattr      = ext4_listxattr,
3454        .removexattr    = generic_removexattr,
3455        .get_acl        = ext4_get_acl,
3456        .set_acl        = ext4_set_acl,
3457};
3458