linux/fs/gfs2/dir.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   3 * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
   4 *
   5 * This copyrighted material is made available to anyone wishing to use,
   6 * modify, copy, or redistribute it subject to the terms and conditions
   7 * of the GNU General Public License version 2.
   8 */
   9
  10/*
  11 * Implements Extendible Hashing as described in:
  12 *   "Extendible Hashing" by Fagin, et al in
  13 *     __ACM Trans. on Database Systems__, Sept 1979.
  14 *
  15 *
  16 * Here's the layout of dirents which is essentially the same as that of ext2
  17 * within a single block. The field de_name_len is the number of bytes
  18 * actually required for the name (no null terminator). The field de_rec_len
  19 * is the number of bytes allocated to the dirent. The offset of the next
  20 * dirent in the block is (dirent + dirent->de_rec_len). When a dirent is
  21 * deleted, the preceding dirent inherits its allocated space, ie
  22 * prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained
  23 * by adding de_rec_len to the current dirent, this essentially causes the
  24 * deleted dirent to get jumped over when iterating through all the dirents.
  25 *
  26 * When deleting the first dirent in a block, there is no previous dirent so
  27 * the field de_ino is set to zero to designate it as deleted. When allocating
  28 * a dirent, gfs2_dirent_alloc iterates through the dirents in a block. If the
  29 * first dirent has (de_ino == 0) and de_rec_len is large enough, this first
  30 * dirent is allocated. Otherwise it must go through all the 'used' dirents
  31 * searching for one in which the amount of total space minus the amount of
  32 * used space will provide enough space for the new dirent.
  33 *
  34 * There are two types of blocks in which dirents reside. In a stuffed dinode,
  35 * the dirents begin at offset sizeof(struct gfs2_dinode) from the beginning of
  36 * the block.  In leaves, they begin at offset sizeof(struct gfs2_leaf) from the
  37 * beginning of the leaf block. The dirents reside in leaves when
  38 *
  39 * dip->i_diskflags & GFS2_DIF_EXHASH is true
  40 *
  41 * Otherwise, the dirents are "linear", within a single stuffed dinode block.
  42 *
  43 * When the dirents are in leaves, the actual contents of the directory file are
  44 * used as an array of 64-bit block pointers pointing to the leaf blocks. The
  45 * dirents are NOT in the directory file itself. There can be more than one
  46 * block pointer in the array that points to the same leaf. In fact, when a
  47 * directory is first converted from linear to exhash, all of the pointers
  48 * point to the same leaf.
  49 *
  50 * When a leaf is completely full, the size of the hash table can be
  51 * doubled unless it is already at the maximum size which is hard coded into
  52 * GFS2_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list,
  53 * but never before the maximum hash table size has been reached.
  54 */
  55
  56#include <linux/slab.h>
  57#include <linux/spinlock.h>
  58#include <linux/buffer_head.h>
  59#include <linux/sort.h>
  60#include <linux/gfs2_ondisk.h>
  61#include <linux/crc32.h>
  62#include <linux/vmalloc.h>
  63
  64#include "gfs2.h"
  65#include "incore.h"
  66#include "dir.h"
  67#include "glock.h"
  68#include "inode.h"
  69#include "meta_io.h"
  70#include "quota.h"
  71#include "rgrp.h"
  72#include "trans.h"
  73#include "bmap.h"
  74#include "util.h"
  75
  76#define IS_LEAF     1 /* Hashed (leaf) directory */
  77#define IS_DINODE   2 /* Linear (stuffed dinode block) directory */
  78
  79#define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1)
  80#define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1))
  81
  82struct qstr gfs2_qdot __read_mostly;
  83struct qstr gfs2_qdotdot __read_mostly;
  84
  85typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent,
  86                            const struct qstr *name, void *opaque);
  87
  88int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
  89                            struct buffer_head **bhp)
  90{
  91        struct buffer_head *bh;
  92
  93        bh = gfs2_meta_new(ip->i_gl, block);
  94        gfs2_trans_add_bh(ip->i_gl, bh, 1);
  95        gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD);
  96        gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
  97        *bhp = bh;
  98        return 0;
  99}
 100
 101static int gfs2_dir_get_existing_buffer(struct gfs2_inode *ip, u64 block,
 102                                        struct buffer_head **bhp)
 103{
 104        struct buffer_head *bh;
 105        int error;
 106
 107        error = gfs2_meta_read(ip->i_gl, block, DIO_WAIT, &bh);
 108        if (error)
 109                return error;
 110        if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_JD)) {
 111                brelse(bh);
 112                return -EIO;
 113        }
 114        *bhp = bh;
 115        return 0;
 116}
 117
 118static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
 119                                  unsigned int offset, unsigned int size)
 120{
 121        struct buffer_head *dibh;
 122        int error;
 123
 124        error = gfs2_meta_inode_buffer(ip, &dibh);
 125        if (error)
 126                return error;
 127
 128        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 129        memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
 130        if (ip->i_inode.i_size < offset + size)
 131                i_size_write(&ip->i_inode, offset + size);
 132        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 133        gfs2_dinode_out(ip, dibh->b_data);
 134
 135        brelse(dibh);
 136
 137        return size;
 138}
 139
 140
 141
 142/**
 143 * gfs2_dir_write_data - Write directory information to the inode
 144 * @ip: The GFS2 inode
 145 * @buf: The buffer containing information to be written
 146 * @offset: The file offset to start writing at
 147 * @size: The amount of data to write
 148 *
 149 * Returns: The number of bytes correctly written or error code
 150 */
 151static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
 152                               u64 offset, unsigned int size)
 153{
 154        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 155        struct buffer_head *dibh;
 156        u64 lblock, dblock;
 157        u32 extlen = 0;
 158        unsigned int o;
 159        int copied = 0;
 160        int error = 0;
 161        int new = 0;
 162
 163        if (!size)
 164                return 0;
 165
 166        if (gfs2_is_stuffed(ip) &&
 167            offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
 168                return gfs2_dir_write_stuffed(ip, buf, (unsigned int)offset,
 169                                              size);
 170
 171        if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
 172                return -EINVAL;
 173
 174        if (gfs2_is_stuffed(ip)) {
 175                error = gfs2_unstuff_dinode(ip, NULL);
 176                if (error)
 177                        return error;
 178        }
 179
 180        lblock = offset;
 181        o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
 182
 183        while (copied < size) {
 184                unsigned int amount;
 185                struct buffer_head *bh;
 186
 187                amount = size - copied;
 188                if (amount > sdp->sd_sb.sb_bsize - o)
 189                        amount = sdp->sd_sb.sb_bsize - o;
 190
 191                if (!extlen) {
 192                        new = 1;
 193                        error = gfs2_extent_map(&ip->i_inode, lblock, &new,
 194                                                &dblock, &extlen);
 195                        if (error)
 196                                goto fail;
 197                        error = -EIO;
 198                        if (gfs2_assert_withdraw(sdp, dblock))
 199                                goto fail;
 200                }
 201
 202                if (amount == sdp->sd_jbsize || new)
 203                        error = gfs2_dir_get_new_buffer(ip, dblock, &bh);
 204                else
 205                        error = gfs2_dir_get_existing_buffer(ip, dblock, &bh);
 206
 207                if (error)
 208                        goto fail;
 209
 210                gfs2_trans_add_bh(ip->i_gl, bh, 1);
 211                memcpy(bh->b_data + o, buf, amount);
 212                brelse(bh);
 213
 214                buf += amount;
 215                copied += amount;
 216                lblock++;
 217                dblock++;
 218                extlen--;
 219
 220                o = sizeof(struct gfs2_meta_header);
 221        }
 222
 223out:
 224        error = gfs2_meta_inode_buffer(ip, &dibh);
 225        if (error)
 226                return error;
 227
 228        if (ip->i_inode.i_size < offset + copied)
 229                i_size_write(&ip->i_inode, offset + copied);
 230        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 231
 232        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 233        gfs2_dinode_out(ip, dibh->b_data);
 234        brelse(dibh);
 235
 236        return copied;
 237fail:
 238        if (copied)
 239                goto out;
 240        return error;
 241}
 242
 243static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, char *buf,
 244                                 u64 offset, unsigned int size)
 245{
 246        struct buffer_head *dibh;
 247        int error;
 248
 249        error = gfs2_meta_inode_buffer(ip, &dibh);
 250        if (!error) {
 251                offset += sizeof(struct gfs2_dinode);
 252                memcpy(buf, dibh->b_data + offset, size);
 253                brelse(dibh);
 254        }
 255
 256        return (error) ? error : size;
 257}
 258
 259
 260/**
 261 * gfs2_dir_read_data - Read a data from a directory inode
 262 * @ip: The GFS2 Inode
 263 * @buf: The buffer to place result into
 264 * @offset: File offset to begin jdata_readng from
 265 * @size: Amount of data to transfer
 266 *
 267 * Returns: The amount of data actually copied or the error
 268 */
 269static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
 270                              unsigned int size, unsigned ra)
 271{
 272        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 273        u64 lblock, dblock;
 274        u32 extlen = 0;
 275        unsigned int o;
 276        int copied = 0;
 277        int error = 0;
 278        u64 disksize = i_size_read(&ip->i_inode);
 279
 280        if (offset >= disksize)
 281                return 0;
 282
 283        if (offset + size > disksize)
 284                size = disksize - offset;
 285
 286        if (!size)
 287                return 0;
 288
 289        if (gfs2_is_stuffed(ip))
 290                return gfs2_dir_read_stuffed(ip, buf, offset, size);
 291
 292        if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
 293                return -EINVAL;
 294
 295        lblock = offset;
 296        o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
 297
 298        while (copied < size) {
 299                unsigned int amount;
 300                struct buffer_head *bh;
 301                int new;
 302
 303                amount = size - copied;
 304                if (amount > sdp->sd_sb.sb_bsize - o)
 305                        amount = sdp->sd_sb.sb_bsize - o;
 306
 307                if (!extlen) {
 308                        new = 0;
 309                        error = gfs2_extent_map(&ip->i_inode, lblock, &new,
 310                                                &dblock, &extlen);
 311                        if (error || !dblock)
 312                                goto fail;
 313                        BUG_ON(extlen < 1);
 314                        if (!ra)
 315                                extlen = 1;
 316                        bh = gfs2_meta_ra(ip->i_gl, dblock, extlen);
 317                } else {
 318                        error = gfs2_meta_read(ip->i_gl, dblock, DIO_WAIT, &bh);
 319                        if (error)
 320                                goto fail;
 321                }
 322                error = gfs2_metatype_check(sdp, bh, GFS2_METATYPE_JD);
 323                if (error) {
 324                        brelse(bh);
 325                        goto fail;
 326                }
 327                dblock++;
 328                extlen--;
 329                memcpy(buf, bh->b_data + o, amount);
 330                brelse(bh);
 331                buf += amount;
 332                copied += amount;
 333                lblock++;
 334                o = sizeof(struct gfs2_meta_header);
 335        }
 336
 337        return copied;
 338fail:
 339        return (copied) ? copied : error;
 340}
 341
 342static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent)
 343{
 344        return dent->de_inum.no_addr == 0 || dent->de_inum.no_formal_ino == 0;
 345}
 346
 347static inline int __gfs2_dirent_find(const struct gfs2_dirent *dent,
 348                                     const struct qstr *name, int ret)
 349{
 350        if (!gfs2_dirent_sentinel(dent) &&
 351            be32_to_cpu(dent->de_hash) == name->hash &&
 352            be16_to_cpu(dent->de_name_len) == name->len &&
 353            memcmp(dent+1, name->name, name->len) == 0)
 354                return ret;
 355        return 0;
 356}
 357
 358static int gfs2_dirent_find(const struct gfs2_dirent *dent,
 359                            const struct qstr *name,
 360                            void *opaque)
 361{
 362        return __gfs2_dirent_find(dent, name, 1);
 363}
 364
 365static int gfs2_dirent_prev(const struct gfs2_dirent *dent,
 366                            const struct qstr *name,
 367                            void *opaque)
 368{
 369        return __gfs2_dirent_find(dent, name, 2);
 370}
 371
 372/*
 373 * name->name holds ptr to start of block.
 374 * name->len holds size of block.
 375 */
 376static int gfs2_dirent_last(const struct gfs2_dirent *dent,
 377                            const struct qstr *name,
 378                            void *opaque)
 379{
 380        const char *start = name->name;
 381        const char *end = (const char *)dent + be16_to_cpu(dent->de_rec_len);
 382        if (name->len == (end - start))
 383                return 1;
 384        return 0;
 385}
 386
 387static int gfs2_dirent_find_space(const struct gfs2_dirent *dent,
 388                                  const struct qstr *name,
 389                                  void *opaque)
 390{
 391        unsigned required = GFS2_DIRENT_SIZE(name->len);
 392        unsigned actual = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
 393        unsigned totlen = be16_to_cpu(dent->de_rec_len);
 394
 395        if (gfs2_dirent_sentinel(dent))
 396                actual = 0;
 397        if (totlen - actual >= required)
 398                return 1;
 399        return 0;
 400}
 401
 402struct dirent_gather {
 403        const struct gfs2_dirent **pdent;
 404        unsigned offset;
 405};
 406
 407static int gfs2_dirent_gather(const struct gfs2_dirent *dent,
 408                              const struct qstr *name,
 409                              void *opaque)
 410{
 411        struct dirent_gather *g = opaque;
 412        if (!gfs2_dirent_sentinel(dent)) {
 413                g->pdent[g->offset++] = dent;
 414        }
 415        return 0;
 416}
 417
 418/*
 419 * Other possible things to check:
 420 * - Inode located within filesystem size (and on valid block)
 421 * - Valid directory entry type
 422 * Not sure how heavy-weight we want to make this... could also check
 423 * hash is correct for example, but that would take a lot of extra time.
 424 * For now the most important thing is to check that the various sizes
 425 * are correct.
 426 */
 427static int gfs2_check_dirent(struct gfs2_dirent *dent, unsigned int offset,
 428                             unsigned int size, unsigned int len, int first)
 429{
 430        const char *msg = "gfs2_dirent too small";
 431        if (unlikely(size < sizeof(struct gfs2_dirent)))
 432                goto error;
 433        msg = "gfs2_dirent misaligned";
 434        if (unlikely(offset & 0x7))
 435                goto error;
 436        msg = "gfs2_dirent points beyond end of block";
 437        if (unlikely(offset + size > len))
 438                goto error;
 439        msg = "zero inode number";
 440        if (unlikely(!first && gfs2_dirent_sentinel(dent)))
 441                goto error;
 442        msg = "name length is greater than space in dirent";
 443        if (!gfs2_dirent_sentinel(dent) &&
 444            unlikely(sizeof(struct gfs2_dirent)+be16_to_cpu(dent->de_name_len) >
 445                     size))
 446                goto error;
 447        return 0;
 448error:
 449        printk(KERN_WARNING "gfs2_check_dirent: %s (%s)\n", msg,
 450               first ? "first in block" : "not first in block");
 451        return -EIO;
 452}
 453
 454static int gfs2_dirent_offset(const void *buf)
 455{
 456        const struct gfs2_meta_header *h = buf;
 457        int offset;
 458
 459        BUG_ON(buf == NULL);
 460
 461        switch(be32_to_cpu(h->mh_type)) {
 462        case GFS2_METATYPE_LF:
 463                offset = sizeof(struct gfs2_leaf);
 464                break;
 465        case GFS2_METATYPE_DI:
 466                offset = sizeof(struct gfs2_dinode);
 467                break;
 468        default:
 469                goto wrong_type;
 470        }
 471        return offset;
 472wrong_type:
 473        printk(KERN_WARNING "gfs2_scan_dirent: wrong block type %u\n",
 474               be32_to_cpu(h->mh_type));
 475        return -1;
 476}
 477
 478static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode, void *buf,
 479                                            unsigned int len, gfs2_dscan_t scan,
 480                                            const struct qstr *name,
 481                                            void *opaque)
 482{
 483        struct gfs2_dirent *dent, *prev;
 484        unsigned offset;
 485        unsigned size;
 486        int ret = 0;
 487
 488        ret = gfs2_dirent_offset(buf);
 489        if (ret < 0)
 490                goto consist_inode;
 491
 492        offset = ret;
 493        prev = NULL;
 494        dent = buf + offset;
 495        size = be16_to_cpu(dent->de_rec_len);
 496        if (gfs2_check_dirent(dent, offset, size, len, 1))
 497                goto consist_inode;
 498        do {
 499                ret = scan(dent, name, opaque);
 500                if (ret)
 501                        break;
 502                offset += size;
 503                if (offset == len)
 504                        break;
 505                prev = dent;
 506                dent = buf + offset;
 507                size = be16_to_cpu(dent->de_rec_len);
 508                if (gfs2_check_dirent(dent, offset, size, len, 0))
 509                        goto consist_inode;
 510        } while(1);
 511
 512        switch(ret) {
 513        case 0:
 514                return NULL;
 515        case 1:
 516                return dent;
 517        case 2:
 518                return prev ? prev : dent;
 519        default:
 520                BUG_ON(ret > 0);
 521                return ERR_PTR(ret);
 522        }
 523
 524consist_inode:
 525        gfs2_consist_inode(GFS2_I(inode));
 526        return ERR_PTR(-EIO);
 527}
 528
 529static int dirent_check_reclen(struct gfs2_inode *dip,
 530                               const struct gfs2_dirent *d, const void *end_p)
 531{
 532        const void *ptr = d;
 533        u16 rec_len = be16_to_cpu(d->de_rec_len);
 534
 535        if (unlikely(rec_len < sizeof(struct gfs2_dirent)))
 536                goto broken;
 537        ptr += rec_len;
 538        if (ptr < end_p)
 539                return rec_len;
 540        if (ptr == end_p)
 541                return -ENOENT;
 542broken:
 543        gfs2_consist_inode(dip);
 544        return -EIO;
 545}
 546
 547/**
 548 * dirent_next - Next dirent
 549 * @dip: the directory
 550 * @bh: The buffer
 551 * @dent: Pointer to list of dirents
 552 *
 553 * Returns: 0 on success, error code otherwise
 554 */
 555
 556static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh,
 557                       struct gfs2_dirent **dent)
 558{
 559        struct gfs2_dirent *cur = *dent, *tmp;
 560        char *bh_end = bh->b_data + bh->b_size;
 561        int ret;
 562
 563        ret = dirent_check_reclen(dip, cur, bh_end);
 564        if (ret < 0)
 565                return ret;
 566
 567        tmp = (void *)cur + ret;
 568        ret = dirent_check_reclen(dip, tmp, bh_end);
 569        if (ret == -EIO)
 570                return ret;
 571
 572        /* Only the first dent could ever have de_inum.no_addr == 0 */
 573        if (gfs2_dirent_sentinel(tmp)) {
 574                gfs2_consist_inode(dip);
 575                return -EIO;
 576        }
 577
 578        *dent = tmp;
 579        return 0;
 580}
 581
 582/**
 583 * dirent_del - Delete a dirent
 584 * @dip: The GFS2 inode
 585 * @bh: The buffer
 586 * @prev: The previous dirent
 587 * @cur: The current dirent
 588 *
 589 */
 590
 591static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
 592                       struct gfs2_dirent *prev, struct gfs2_dirent *cur)
 593{
 594        u16 cur_rec_len, prev_rec_len;
 595
 596        if (gfs2_dirent_sentinel(cur)) {
 597                gfs2_consist_inode(dip);
 598                return;
 599        }
 600
 601        gfs2_trans_add_bh(dip->i_gl, bh, 1);
 602
 603        /* If there is no prev entry, this is the first entry in the block.
 604           The de_rec_len is already as big as it needs to be.  Just zero
 605           out the inode number and return.  */
 606
 607        if (!prev) {
 608                cur->de_inum.no_addr = 0;
 609                cur->de_inum.no_formal_ino = 0;
 610                return;
 611        }
 612
 613        /*  Combine this dentry with the previous one.  */
 614
 615        prev_rec_len = be16_to_cpu(prev->de_rec_len);
 616        cur_rec_len = be16_to_cpu(cur->de_rec_len);
 617
 618        if ((char *)prev + prev_rec_len != (char *)cur)
 619                gfs2_consist_inode(dip);
 620        if ((char *)cur + cur_rec_len > bh->b_data + bh->b_size)
 621                gfs2_consist_inode(dip);
 622
 623        prev_rec_len += cur_rec_len;
 624        prev->de_rec_len = cpu_to_be16(prev_rec_len);
 625}
 626
 627/*
 628 * Takes a dent from which to grab space as an argument. Returns the
 629 * newly created dent.
 630 */
 631static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode,
 632                                            struct gfs2_dirent *dent,
 633                                            const struct qstr *name,
 634                                            struct buffer_head *bh)
 635{
 636        struct gfs2_inode *ip = GFS2_I(inode);
 637        struct gfs2_dirent *ndent;
 638        unsigned offset = 0, totlen;
 639
 640        if (!gfs2_dirent_sentinel(dent))
 641                offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
 642        totlen = be16_to_cpu(dent->de_rec_len);
 643        BUG_ON(offset + name->len > totlen);
 644        gfs2_trans_add_bh(ip->i_gl, bh, 1);
 645        ndent = (struct gfs2_dirent *)((char *)dent + offset);
 646        dent->de_rec_len = cpu_to_be16(offset);
 647        gfs2_qstr2dirent(name, totlen - offset, ndent);
 648        return ndent;
 649}
 650
 651static struct gfs2_dirent *gfs2_dirent_alloc(struct inode *inode,
 652                                             struct buffer_head *bh,
 653                                             const struct qstr *name)
 654{
 655        struct gfs2_dirent *dent;
 656        dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
 657                                gfs2_dirent_find_space, name, NULL);
 658        if (!dent || IS_ERR(dent))
 659                return dent;
 660        return gfs2_init_dirent(inode, dent, name, bh);
 661}
 662
 663static int get_leaf(struct gfs2_inode *dip, u64 leaf_no,
 664                    struct buffer_head **bhp)
 665{
 666        int error;
 667
 668        error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_WAIT, bhp);
 669        if (!error && gfs2_metatype_check(GFS2_SB(&dip->i_inode), *bhp, GFS2_METATYPE_LF)) {
 670                /* printk(KERN_INFO "block num=%llu\n", leaf_no); */
 671                error = -EIO;
 672        }
 673
 674        return error;
 675}
 676
 677/**
 678 * get_leaf_nr - Get a leaf number associated with the index
 679 * @dip: The GFS2 inode
 680 * @index:
 681 * @leaf_out:
 682 *
 683 * Returns: 0 on success, error code otherwise
 684 */
 685
 686static int get_leaf_nr(struct gfs2_inode *dip, u32 index,
 687                       u64 *leaf_out)
 688{
 689        __be64 leaf_no;
 690        int error;
 691
 692        error = gfs2_dir_read_data(dip, (char *)&leaf_no,
 693                                    index * sizeof(__be64),
 694                                    sizeof(__be64), 0);
 695        if (error != sizeof(u64))
 696                return (error < 0) ? error : -EIO;
 697
 698        *leaf_out = be64_to_cpu(leaf_no);
 699
 700        return 0;
 701}
 702
 703static int get_first_leaf(struct gfs2_inode *dip, u32 index,
 704                          struct buffer_head **bh_out)
 705{
 706        u64 leaf_no;
 707        int error;
 708
 709        error = get_leaf_nr(dip, index, &leaf_no);
 710        if (!error)
 711                error = get_leaf(dip, leaf_no, bh_out);
 712
 713        return error;
 714}
 715
 716static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode,
 717                                              const struct qstr *name,
 718                                              gfs2_dscan_t scan,
 719                                              struct buffer_head **pbh)
 720{
 721        struct buffer_head *bh;
 722        struct gfs2_dirent *dent;
 723        struct gfs2_inode *ip = GFS2_I(inode);
 724        int error;
 725
 726        if (ip->i_diskflags & GFS2_DIF_EXHASH) {
 727                struct gfs2_leaf *leaf;
 728                unsigned hsize = 1 << ip->i_depth;
 729                unsigned index;
 730                u64 ln;
 731                if (hsize * sizeof(u64) != i_size_read(inode)) {
 732                        gfs2_consist_inode(ip);
 733                        return ERR_PTR(-EIO);
 734                }
 735
 736                index = name->hash >> (32 - ip->i_depth);
 737                error = get_first_leaf(ip, index, &bh);
 738                if (error)
 739                        return ERR_PTR(error);
 740                do {
 741                        dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
 742                                                scan, name, NULL);
 743                        if (dent)
 744                                goto got_dent;
 745                        leaf = (struct gfs2_leaf *)bh->b_data;
 746                        ln = be64_to_cpu(leaf->lf_next);
 747                        brelse(bh);
 748                        if (!ln)
 749                                break;
 750
 751                        error = get_leaf(ip, ln, &bh);
 752                } while(!error);
 753
 754                return error ? ERR_PTR(error) : NULL;
 755        }
 756
 757
 758        error = gfs2_meta_inode_buffer(ip, &bh);
 759        if (error)
 760                return ERR_PTR(error);
 761        dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, scan, name, NULL);
 762got_dent:
 763        if (unlikely(dent == NULL || IS_ERR(dent))) {
 764                brelse(bh);
 765                bh = NULL;
 766        }
 767        *pbh = bh;
 768        return dent;
 769}
 770
 771static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, u16 depth)
 772{
 773        struct gfs2_inode *ip = GFS2_I(inode);
 774        unsigned int n = 1;
 775        u64 bn;
 776        int error;
 777        struct buffer_head *bh;
 778        struct gfs2_leaf *leaf;
 779        struct gfs2_dirent *dent;
 780        struct qstr name = { .name = "", .len = 0, .hash = 0 };
 781
 782        error = gfs2_alloc_block(ip, &bn, &n);
 783        if (error)
 784                return NULL;
 785        bh = gfs2_meta_new(ip->i_gl, bn);
 786        if (!bh)
 787                return NULL;
 788
 789        gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1);
 790        gfs2_trans_add_bh(ip->i_gl, bh, 1);
 791        gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
 792        leaf = (struct gfs2_leaf *)bh->b_data;
 793        leaf->lf_depth = cpu_to_be16(depth);
 794        leaf->lf_entries = 0;
 795        leaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE);
 796        leaf->lf_next = 0;
 797        memset(leaf->lf_reserved, 0, sizeof(leaf->lf_reserved));
 798        dent = (struct gfs2_dirent *)(leaf+1);
 799        gfs2_qstr2dirent(&name, bh->b_size - sizeof(struct gfs2_leaf), dent);
 800        *pbh = bh;
 801        return leaf;
 802}
 803
 804/**
 805 * dir_make_exhash - Convert a stuffed directory into an ExHash directory
 806 * @dip: The GFS2 inode
 807 *
 808 * Returns: 0 on success, error code otherwise
 809 */
 810
 811static int dir_make_exhash(struct inode *inode)
 812{
 813        struct gfs2_inode *dip = GFS2_I(inode);
 814        struct gfs2_sbd *sdp = GFS2_SB(inode);
 815        struct gfs2_dirent *dent;
 816        struct qstr args;
 817        struct buffer_head *bh, *dibh;
 818        struct gfs2_leaf *leaf;
 819        int y;
 820        u32 x;
 821        __be64 *lp;
 822        u64 bn;
 823        int error;
 824
 825        error = gfs2_meta_inode_buffer(dip, &dibh);
 826        if (error)
 827                return error;
 828
 829        /*  Turn over a new leaf  */
 830
 831        leaf = new_leaf(inode, &bh, 0);
 832        if (!leaf)
 833                return -ENOSPC;
 834        bn = bh->b_blocknr;
 835
 836        gfs2_assert(sdp, dip->i_entries < (1 << 16));
 837        leaf->lf_entries = cpu_to_be16(dip->i_entries);
 838
 839        /*  Copy dirents  */
 840
 841        gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_leaf), dibh,
 842                             sizeof(struct gfs2_dinode));
 843
 844        /*  Find last entry  */
 845
 846        x = 0;
 847        args.len = bh->b_size - sizeof(struct gfs2_dinode) +
 848                   sizeof(struct gfs2_leaf);
 849        args.name = bh->b_data;
 850        dent = gfs2_dirent_scan(&dip->i_inode, bh->b_data, bh->b_size,
 851                                gfs2_dirent_last, &args, NULL);
 852        if (!dent) {
 853                brelse(bh);
 854                brelse(dibh);
 855                return -EIO;
 856        }
 857        if (IS_ERR(dent)) {
 858                brelse(bh);
 859                brelse(dibh);
 860                return PTR_ERR(dent);
 861        }
 862
 863        /*  Adjust the last dirent's record length
 864           (Remember that dent still points to the last entry.)  */
 865
 866        dent->de_rec_len = cpu_to_be16(be16_to_cpu(dent->de_rec_len) +
 867                sizeof(struct gfs2_dinode) -
 868                sizeof(struct gfs2_leaf));
 869
 870        brelse(bh);
 871
 872        /*  We're done with the new leaf block, now setup the new
 873            hash table.  */
 874
 875        gfs2_trans_add_bh(dip->i_gl, dibh, 1);
 876        gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
 877
 878        lp = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode));
 879
 880        for (x = sdp->sd_hash_ptrs; x--; lp++)
 881                *lp = cpu_to_be64(bn);
 882
 883        i_size_write(inode, sdp->sd_sb.sb_bsize / 2);
 884        gfs2_add_inode_blocks(&dip->i_inode, 1);
 885        dip->i_diskflags |= GFS2_DIF_EXHASH;
 886
 887        for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
 888        dip->i_depth = y;
 889
 890        gfs2_dinode_out(dip, dibh->b_data);
 891
 892        brelse(dibh);
 893
 894        return 0;
 895}
 896
 897/**
 898 * dir_split_leaf - Split a leaf block into two
 899 * @dip: The GFS2 inode
 900 * @index:
 901 * @leaf_no:
 902 *
 903 * Returns: 0 on success, error code on failure
 904 */
 905
 906static int dir_split_leaf(struct inode *inode, const struct qstr *name)
 907{
 908        struct gfs2_inode *dip = GFS2_I(inode);
 909        struct buffer_head *nbh, *obh, *dibh;
 910        struct gfs2_leaf *nleaf, *oleaf;
 911        struct gfs2_dirent *dent = NULL, *prev = NULL, *next = NULL, *new;
 912        u32 start, len, half_len, divider;
 913        u64 bn, leaf_no;
 914        __be64 *lp;
 915        u32 index;
 916        int x, moved = 0;
 917        int error;
 918
 919        index = name->hash >> (32 - dip->i_depth);
 920        error = get_leaf_nr(dip, index, &leaf_no);
 921        if (error)
 922                return error;
 923
 924        /*  Get the old leaf block  */
 925        error = get_leaf(dip, leaf_no, &obh);
 926        if (error)
 927                return error;
 928
 929        oleaf = (struct gfs2_leaf *)obh->b_data;
 930        if (dip->i_depth == be16_to_cpu(oleaf->lf_depth)) {
 931                brelse(obh);
 932                return 1; /* can't split */
 933        }
 934
 935        gfs2_trans_add_bh(dip->i_gl, obh, 1);
 936
 937        nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1);
 938        if (!nleaf) {
 939                brelse(obh);
 940                return -ENOSPC;
 941        }
 942        bn = nbh->b_blocknr;
 943
 944        /*  Compute the start and len of leaf pointers in the hash table.  */
 945        len = 1 << (dip->i_depth - be16_to_cpu(oleaf->lf_depth));
 946        half_len = len >> 1;
 947        if (!half_len) {
 948                printk(KERN_WARNING "i_depth %u lf_depth %u index %u\n", dip->i_depth, be16_to_cpu(oleaf->lf_depth), index);
 949                gfs2_consist_inode(dip);
 950                error = -EIO;
 951                goto fail_brelse;
 952        }
 953
 954        start = (index & ~(len - 1));
 955
 956        /* Change the pointers.
 957           Don't bother distinguishing stuffed from non-stuffed.
 958           This code is complicated enough already. */
 959        lp = kmalloc(half_len * sizeof(__be64), GFP_NOFS);
 960        if (!lp) {
 961                error = -ENOMEM;
 962                goto fail_brelse;
 963        }
 964
 965        /*  Change the pointers  */
 966        for (x = 0; x < half_len; x++)
 967                lp[x] = cpu_to_be64(bn);
 968
 969        error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(u64),
 970                                    half_len * sizeof(u64));
 971        if (error != half_len * sizeof(u64)) {
 972                if (error >= 0)
 973                        error = -EIO;
 974                goto fail_lpfree;
 975        }
 976
 977        kfree(lp);
 978
 979        /*  Compute the divider  */
 980        divider = (start + half_len) << (32 - dip->i_depth);
 981
 982        /*  Copy the entries  */
 983        dent = (struct gfs2_dirent *)(obh->b_data + sizeof(struct gfs2_leaf));
 984
 985        do {
 986                next = dent;
 987                if (dirent_next(dip, obh, &next))
 988                        next = NULL;
 989
 990                if (!gfs2_dirent_sentinel(dent) &&
 991                    be32_to_cpu(dent->de_hash) < divider) {
 992                        struct qstr str;
 993                        str.name = (char*)(dent+1);
 994                        str.len = be16_to_cpu(dent->de_name_len);
 995                        str.hash = be32_to_cpu(dent->de_hash);
 996                        new = gfs2_dirent_alloc(inode, nbh, &str);
 997                        if (IS_ERR(new)) {
 998                                error = PTR_ERR(new);
 999                                break;
1000                        }
1001
1002                        new->de_inum = dent->de_inum; /* No endian worries */
1003                        new->de_type = dent->de_type; /* No endian worries */
1004                        be16_add_cpu(&nleaf->lf_entries, 1);
1005
1006                        dirent_del(dip, obh, prev, dent);
1007
1008                        if (!oleaf->lf_entries)
1009                                gfs2_consist_inode(dip);
1010                        be16_add_cpu(&oleaf->lf_entries, -1);
1011
1012                        if (!prev)
1013                                prev = dent;
1014
1015                        moved = 1;
1016                } else {
1017                        prev = dent;
1018                }
1019                dent = next;
1020        } while (dent);
1021
1022        oleaf->lf_depth = nleaf->lf_depth;
1023
1024        error = gfs2_meta_inode_buffer(dip, &dibh);
1025        if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) {
1026                gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1027                gfs2_add_inode_blocks(&dip->i_inode, 1);
1028                gfs2_dinode_out(dip, dibh->b_data);
1029                brelse(dibh);
1030        }
1031
1032        brelse(obh);
1033        brelse(nbh);
1034
1035        return error;
1036
1037fail_lpfree:
1038        kfree(lp);
1039
1040fail_brelse:
1041        brelse(obh);
1042        brelse(nbh);
1043        return error;
1044}
1045
1046/**
1047 * dir_double_exhash - Double size of ExHash table
1048 * @dip: The GFS2 dinode
1049 *
1050 * Returns: 0 on success, error code on failure
1051 */
1052
1053static int dir_double_exhash(struct gfs2_inode *dip)
1054{
1055        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1056        struct buffer_head *dibh;
1057        u32 hsize;
1058        u64 *buf;
1059        u64 *from, *to;
1060        u64 block;
1061        u64 disksize = i_size_read(&dip->i_inode);
1062        int x;
1063        int error = 0;
1064
1065        hsize = 1 << dip->i_depth;
1066        if (hsize * sizeof(u64) != disksize) {
1067                gfs2_consist_inode(dip);
1068                return -EIO;
1069        }
1070
1071        /*  Allocate both the "from" and "to" buffers in one big chunk  */
1072
1073        buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS);
1074        if (!buf)
1075                return -ENOMEM;
1076
1077        for (block = disksize >> sdp->sd_hash_bsize_shift; block--;) {
1078                error = gfs2_dir_read_data(dip, (char *)buf,
1079                                            block * sdp->sd_hash_bsize,
1080                                            sdp->sd_hash_bsize, 1);
1081                if (error != sdp->sd_hash_bsize) {
1082                        if (error >= 0)
1083                                error = -EIO;
1084                        goto fail;
1085                }
1086
1087                from = buf;
1088                to = (u64 *)((char *)buf + sdp->sd_hash_bsize);
1089
1090                for (x = sdp->sd_hash_ptrs; x--; from++) {
1091                        *to++ = *from;  /*  No endianess worries  */
1092                        *to++ = *from;
1093                }
1094
1095                error = gfs2_dir_write_data(dip,
1096                                             (char *)buf + sdp->sd_hash_bsize,
1097                                             block * sdp->sd_sb.sb_bsize,
1098                                             sdp->sd_sb.sb_bsize);
1099                if (error != sdp->sd_sb.sb_bsize) {
1100                        if (error >= 0)
1101                                error = -EIO;
1102                        goto fail;
1103                }
1104        }
1105
1106        kfree(buf);
1107
1108        error = gfs2_meta_inode_buffer(dip, &dibh);
1109        if (!gfs2_assert_withdraw(sdp, !error)) {
1110                dip->i_depth++;
1111                gfs2_dinode_out(dip, dibh->b_data);
1112                brelse(dibh);
1113        }
1114
1115        return error;
1116
1117fail:
1118        kfree(buf);
1119        return error;
1120}
1121
1122/**
1123 * compare_dents - compare directory entries by hash value
1124 * @a: first dent
1125 * @b: second dent
1126 *
1127 * When comparing the hash entries of @a to @b:
1128 *   gt: returns 1
1129 *   lt: returns -1
1130 *   eq: returns 0
1131 */
1132
1133static int compare_dents(const void *a, const void *b)
1134{
1135        const struct gfs2_dirent *dent_a, *dent_b;
1136        u32 hash_a, hash_b;
1137        int ret = 0;
1138
1139        dent_a = *(const struct gfs2_dirent **)a;
1140        hash_a = be32_to_cpu(dent_a->de_hash);
1141
1142        dent_b = *(const struct gfs2_dirent **)b;
1143        hash_b = be32_to_cpu(dent_b->de_hash);
1144
1145        if (hash_a > hash_b)
1146                ret = 1;
1147        else if (hash_a < hash_b)
1148                ret = -1;
1149        else {
1150                unsigned int len_a = be16_to_cpu(dent_a->de_name_len);
1151                unsigned int len_b = be16_to_cpu(dent_b->de_name_len);
1152
1153                if (len_a > len_b)
1154                        ret = 1;
1155                else if (len_a < len_b)
1156                        ret = -1;
1157                else
1158                        ret = memcmp(dent_a + 1, dent_b + 1, len_a);
1159        }
1160
1161        return ret;
1162}
1163
1164/**
1165 * do_filldir_main - read out directory entries
1166 * @dip: The GFS2 inode
1167 * @offset: The offset in the file to read from
1168 * @opaque: opaque data to pass to filldir
1169 * @filldir: The function to pass entries to
1170 * @darr: an array of struct gfs2_dirent pointers to read
1171 * @entries: the number of entries in darr
1172 * @copied: pointer to int that's non-zero if a entry has been copied out
1173 *
1174 * Jump through some hoops to make sure that if there are hash collsions,
1175 * they are read out at the beginning of a buffer.  We want to minimize
1176 * the possibility that they will fall into different readdir buffers or
1177 * that someone will want to seek to that location.
1178 *
1179 * Returns: errno, >0 on exception from filldir
1180 */
1181
1182static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
1183                           void *opaque, filldir_t filldir,
1184                           const struct gfs2_dirent **darr, u32 entries,
1185                           int *copied)
1186{
1187        const struct gfs2_dirent *dent, *dent_next;
1188        u64 off, off_next;
1189        unsigned int x, y;
1190        int run = 0;
1191        int error = 0;
1192
1193        sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL);
1194
1195        dent_next = darr[0];
1196        off_next = be32_to_cpu(dent_next->de_hash);
1197        off_next = gfs2_disk_hash2offset(off_next);
1198
1199        for (x = 0, y = 1; x < entries; x++, y++) {
1200                dent = dent_next;
1201                off = off_next;
1202
1203                if (y < entries) {
1204                        dent_next = darr[y];
1205                        off_next = be32_to_cpu(dent_next->de_hash);
1206                        off_next = gfs2_disk_hash2offset(off_next);
1207
1208                        if (off < *offset)
1209                                continue;
1210                        *offset = off;
1211
1212                        if (off_next == off) {
1213                                if (*copied && !run)
1214                                        return 1;
1215                                run = 1;
1216                        } else
1217                                run = 0;
1218                } else {
1219                        if (off < *offset)
1220                                continue;
1221                        *offset = off;
1222                }
1223
1224                error = filldir(opaque, (const char *)(dent + 1),
1225                                be16_to_cpu(dent->de_name_len),
1226                                off, be64_to_cpu(dent->de_inum.no_addr),
1227                                be16_to_cpu(dent->de_type));
1228                if (error)
1229                        return 1;
1230
1231                *copied = 1;
1232        }
1233
1234        /* Increment the *offset by one, so the next time we come into the
1235           do_filldir fxn, we get the next entry instead of the last one in the
1236           current leaf */
1237
1238        (*offset)++;
1239
1240        return 0;
1241}
1242
1243static void *gfs2_alloc_sort_buffer(unsigned size)
1244{
1245        void *ptr = NULL;
1246
1247        if (size < KMALLOC_MAX_SIZE)
1248                ptr = kmalloc(size, GFP_NOFS | __GFP_NOWARN);
1249        if (!ptr)
1250                ptr = __vmalloc(size, GFP_NOFS, PAGE_KERNEL);
1251        return ptr;
1252}
1253
1254static void gfs2_free_sort_buffer(void *ptr)
1255{
1256        if (is_vmalloc_addr(ptr))
1257                vfree(ptr);
1258        else
1259                kfree(ptr);
1260}
1261
1262static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1263                              filldir_t filldir, int *copied, unsigned *depth,
1264                              u64 leaf_no)
1265{
1266        struct gfs2_inode *ip = GFS2_I(inode);
1267        struct gfs2_sbd *sdp = GFS2_SB(inode);
1268        struct buffer_head *bh;
1269        struct gfs2_leaf *lf;
1270        unsigned entries = 0, entries2 = 0;
1271        unsigned leaves = 0;
1272        const struct gfs2_dirent **darr, *dent;
1273        struct dirent_gather g;
1274        struct buffer_head **larr;
1275        int leaf = 0;
1276        int error, i;
1277        u64 lfn = leaf_no;
1278
1279        do {
1280                error = get_leaf(ip, lfn, &bh);
1281                if (error)
1282                        goto out;
1283                lf = (struct gfs2_leaf *)bh->b_data;
1284                if (leaves == 0)
1285                        *depth = be16_to_cpu(lf->lf_depth);
1286                entries += be16_to_cpu(lf->lf_entries);
1287                leaves++;
1288                lfn = be64_to_cpu(lf->lf_next);
1289                brelse(bh);
1290        } while(lfn);
1291
1292        if (!entries)
1293                return 0;
1294
1295        error = -ENOMEM;
1296        /*
1297         * The extra 99 entries are not normally used, but are a buffer
1298         * zone in case the number of entries in the leaf is corrupt.
1299         * 99 is the maximum number of entries that can fit in a single
1300         * leaf block.
1301         */
1302        larr = gfs2_alloc_sort_buffer((leaves + entries + 99) * sizeof(void *));
1303        if (!larr)
1304                goto out;
1305        darr = (const struct gfs2_dirent **)(larr + leaves);
1306        g.pdent = darr;
1307        g.offset = 0;
1308        lfn = leaf_no;
1309
1310        do {
1311                error = get_leaf(ip, lfn, &bh);
1312                if (error)
1313                        goto out_free;
1314                lf = (struct gfs2_leaf *)bh->b_data;
1315                lfn = be64_to_cpu(lf->lf_next);
1316                if (lf->lf_entries) {
1317                        entries2 += be16_to_cpu(lf->lf_entries);
1318                        dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
1319                                                gfs2_dirent_gather, NULL, &g);
1320                        error = PTR_ERR(dent);
1321                        if (IS_ERR(dent))
1322                                goto out_free;
1323                        if (entries2 != g.offset) {
1324                                fs_warn(sdp, "Number of entries corrupt in dir "
1325                                                "leaf %llu, entries2 (%u) != "
1326                                                "g.offset (%u)\n",
1327                                        (unsigned long long)bh->b_blocknr,
1328                                        entries2, g.offset);
1329                                        
1330                                error = -EIO;
1331                                goto out_free;
1332                        }
1333                        error = 0;
1334                        larr[leaf++] = bh;
1335                } else {
1336                        brelse(bh);
1337                }
1338        } while(lfn);
1339
1340        BUG_ON(entries2 != entries);
1341        error = do_filldir_main(ip, offset, opaque, filldir, darr,
1342                                entries, copied);
1343out_free:
1344        for(i = 0; i < leaf; i++)
1345                brelse(larr[i]);
1346        gfs2_free_sort_buffer(larr);
1347out:
1348        return error;
1349}
1350
1351/**
1352 * dir_e_read - Reads the entries from a directory into a filldir buffer
1353 * @dip: dinode pointer
1354 * @offset: the hash of the last entry read shifted to the right once
1355 * @opaque: buffer for the filldir function to fill
1356 * @filldir: points to the filldir function to use
1357 *
1358 * Returns: errno
1359 */
1360
1361static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
1362                      filldir_t filldir)
1363{
1364        struct gfs2_inode *dip = GFS2_I(inode);
1365        struct gfs2_sbd *sdp = GFS2_SB(inode);
1366        u32 hsize, len = 0;
1367        u32 ht_offset, lp_offset, ht_offset_cur = -1;
1368        u32 hash, index;
1369        __be64 *lp;
1370        int copied = 0;
1371        int error = 0;
1372        unsigned depth = 0;
1373
1374        hsize = 1 << dip->i_depth;
1375        if (hsize * sizeof(u64) != i_size_read(inode)) {
1376                gfs2_consist_inode(dip);
1377                return -EIO;
1378        }
1379
1380        hash = gfs2_dir_offset2hash(*offset);
1381        index = hash >> (32 - dip->i_depth);
1382
1383        lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
1384        if (!lp)
1385                return -ENOMEM;
1386
1387        while (index < hsize) {
1388                lp_offset = index & (sdp->sd_hash_ptrs - 1);
1389                ht_offset = index - lp_offset;
1390
1391                if (ht_offset_cur != ht_offset) {
1392                        error = gfs2_dir_read_data(dip, (char *)lp,
1393                                                ht_offset * sizeof(__be64),
1394                                                sdp->sd_hash_bsize, 1);
1395                        if (error != sdp->sd_hash_bsize) {
1396                                if (error >= 0)
1397                                        error = -EIO;
1398                                goto out;
1399                        }
1400                        ht_offset_cur = ht_offset;
1401                }
1402
1403                error = gfs2_dir_read_leaf(inode, offset, opaque, filldir,
1404                                           &copied, &depth,
1405                                           be64_to_cpu(lp[lp_offset]));
1406                if (error)
1407                        break;
1408
1409                len = 1 << (dip->i_depth - depth);
1410                index = (index & ~(len - 1)) + len;
1411        }
1412
1413out:
1414        kfree(lp);
1415        if (error > 0)
1416                error = 0;
1417        return error;
1418}
1419
1420int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
1421                  filldir_t filldir)
1422{
1423        struct gfs2_inode *dip = GFS2_I(inode);
1424        struct gfs2_sbd *sdp = GFS2_SB(inode);
1425        struct dirent_gather g;
1426        const struct gfs2_dirent **darr, *dent;
1427        struct buffer_head *dibh;
1428        int copied = 0;
1429        int error;
1430
1431        if (!dip->i_entries)
1432                return 0;
1433
1434        if (dip->i_diskflags & GFS2_DIF_EXHASH)
1435                return dir_e_read(inode, offset, opaque, filldir);
1436
1437        if (!gfs2_is_stuffed(dip)) {
1438                gfs2_consist_inode(dip);
1439                return -EIO;
1440        }
1441
1442        error = gfs2_meta_inode_buffer(dip, &dibh);
1443        if (error)
1444                return error;
1445
1446        error = -ENOMEM;
1447        /* 96 is max number of dirents which can be stuffed into an inode */
1448        darr = kmalloc(96 * sizeof(struct gfs2_dirent *), GFP_NOFS);
1449        if (darr) {
1450                g.pdent = darr;
1451                g.offset = 0;
1452                dent = gfs2_dirent_scan(inode, dibh->b_data, dibh->b_size,
1453                                        gfs2_dirent_gather, NULL, &g);
1454                if (IS_ERR(dent)) {
1455                        error = PTR_ERR(dent);
1456                        goto out;
1457                }
1458                if (dip->i_entries != g.offset) {
1459                        fs_warn(sdp, "Number of entries corrupt in dir %llu, "
1460                                "ip->i_entries (%u) != g.offset (%u)\n",
1461                                (unsigned long long)dip->i_no_addr,
1462                                dip->i_entries,
1463                                g.offset);
1464                        error = -EIO;
1465                        goto out;
1466                }
1467                error = do_filldir_main(dip, offset, opaque, filldir, darr,
1468                                        dip->i_entries, &copied);
1469out:
1470                kfree(darr);
1471        }
1472
1473        if (error > 0)
1474                error = 0;
1475
1476        brelse(dibh);
1477
1478        return error;
1479}
1480
1481/**
1482 * gfs2_dir_search - Search a directory
1483 * @dip: The GFS2 inode
1484 * @filename:
1485 * @inode:
1486 *
1487 * This routine searches a directory for a file or another directory.
1488 * Assumes a glock is held on dip.
1489 *
1490 * Returns: errno
1491 */
1492
1493struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
1494{
1495        struct buffer_head *bh;
1496        struct gfs2_dirent *dent;
1497        struct inode *inode;
1498
1499        dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
1500        if (dent) {
1501                if (IS_ERR(dent))
1502                        return ERR_CAST(dent);
1503                inode = gfs2_inode_lookup(dir->i_sb, 
1504                                be16_to_cpu(dent->de_type),
1505                                be64_to_cpu(dent->de_inum.no_addr),
1506                                be64_to_cpu(dent->de_inum.no_formal_ino), 0);
1507                brelse(bh);
1508                return inode;
1509        }
1510        return ERR_PTR(-ENOENT);
1511}
1512
1513int gfs2_dir_check(struct inode *dir, const struct qstr *name,
1514                   const struct gfs2_inode *ip)
1515{
1516        struct buffer_head *bh;
1517        struct gfs2_dirent *dent;
1518        int ret = -ENOENT;
1519
1520        dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
1521        if (dent) {
1522                if (IS_ERR(dent))
1523                        return PTR_ERR(dent);
1524                if (ip) {
1525                        if (be64_to_cpu(dent->de_inum.no_addr) != ip->i_no_addr)
1526                                goto out;
1527                        if (be64_to_cpu(dent->de_inum.no_formal_ino) !=
1528                            ip->i_no_formal_ino)
1529                                goto out;
1530                        if (unlikely(IF2DT(ip->i_inode.i_mode) !=
1531                            be16_to_cpu(dent->de_type))) {
1532                                gfs2_consist_inode(GFS2_I(dir));
1533                                ret = -EIO;
1534                                goto out;
1535                        }
1536                }
1537                ret = 0;
1538out:
1539                brelse(bh);
1540        }
1541        return ret;
1542}
1543
1544static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1545{
1546        struct buffer_head *bh, *obh;
1547        struct gfs2_inode *ip = GFS2_I(inode);
1548        struct gfs2_leaf *leaf, *oleaf;
1549        int error;
1550        u32 index;
1551        u64 bn;
1552
1553        index = name->hash >> (32 - ip->i_depth);
1554        error = get_first_leaf(ip, index, &obh);
1555        if (error)
1556                return error;
1557        do {
1558                oleaf = (struct gfs2_leaf *)obh->b_data;
1559                bn = be64_to_cpu(oleaf->lf_next);
1560                if (!bn)
1561                        break;
1562                brelse(obh);
1563                error = get_leaf(ip, bn, &obh);
1564                if (error)
1565                        return error;
1566        } while(1);
1567
1568        gfs2_trans_add_bh(ip->i_gl, obh, 1);
1569
1570        leaf = new_leaf(inode, &bh, be16_to_cpu(oleaf->lf_depth));
1571        if (!leaf) {
1572                brelse(obh);
1573                return -ENOSPC;
1574        }
1575        oleaf->lf_next = cpu_to_be64(bh->b_blocknr);
1576        brelse(bh);
1577        brelse(obh);
1578
1579        error = gfs2_meta_inode_buffer(ip, &bh);
1580        if (error)
1581                return error;
1582        gfs2_trans_add_bh(ip->i_gl, bh, 1);
1583        gfs2_add_inode_blocks(&ip->i_inode, 1);
1584        gfs2_dinode_out(ip, bh->b_data);
1585        brelse(bh);
1586        return 0;
1587}
1588
1589/**
1590 * gfs2_dir_add - Add new filename into directory
1591 * @dip: The GFS2 inode
1592 * @filename: The new name
1593 * @inode: The inode number of the entry
1594 * @type: The type of the entry
1595 *
1596 * Returns: 0 on success, error code on failure
1597 */
1598
1599int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1600                 const struct gfs2_inode *nip)
1601{
1602        struct gfs2_inode *ip = GFS2_I(inode);
1603        struct buffer_head *bh;
1604        struct gfs2_dirent *dent;
1605        struct gfs2_leaf *leaf;
1606        int error;
1607
1608        while(1) {
1609                dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space,
1610                                          &bh);
1611                if (dent) {
1612                        if (IS_ERR(dent))
1613                                return PTR_ERR(dent);
1614                        dent = gfs2_init_dirent(inode, dent, name, bh);
1615                        gfs2_inum_out(nip, dent);
1616                        dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode));
1617                        if (ip->i_diskflags & GFS2_DIF_EXHASH) {
1618                                leaf = (struct gfs2_leaf *)bh->b_data;
1619                                be16_add_cpu(&leaf->lf_entries, 1);
1620                        }
1621                        brelse(bh);
1622                        error = gfs2_meta_inode_buffer(ip, &bh);
1623                        if (error)
1624                                break;
1625                        gfs2_trans_add_bh(ip->i_gl, bh, 1);
1626                        ip->i_entries++;
1627                        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1628                        if (S_ISDIR(nip->i_inode.i_mode))
1629                                inc_nlink(&ip->i_inode);
1630                        gfs2_dinode_out(ip, bh->b_data);
1631                        brelse(bh);
1632                        error = 0;
1633                        break;
1634                }
1635                if (!(ip->i_diskflags & GFS2_DIF_EXHASH)) {
1636                        error = dir_make_exhash(inode);
1637                        if (error)
1638                                break;
1639                        continue;
1640                }
1641                error = dir_split_leaf(inode, name);
1642                if (error == 0)
1643                        continue;
1644                if (error < 0)
1645                        break;
1646                if (ip->i_depth < GFS2_DIR_MAX_DEPTH) {
1647                        error = dir_double_exhash(ip);
1648                        if (error)
1649                                break;
1650                        error = dir_split_leaf(inode, name);
1651                        if (error < 0)
1652                                break;
1653                        if (error == 0)
1654                                continue;
1655                }
1656                error = dir_new_leaf(inode, name);
1657                if (!error)
1658                        continue;
1659                error = -ENOSPC;
1660                break;
1661        }
1662        return error;
1663}
1664
1665
1666/**
1667 * gfs2_dir_del - Delete a directory entry
1668 * @dip: The GFS2 inode
1669 * @filename: The filename
1670 *
1671 * Returns: 0 on success, error code on failure
1672 */
1673
1674int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
1675{
1676        const struct qstr *name = &dentry->d_name;
1677        struct gfs2_dirent *dent, *prev = NULL;
1678        struct buffer_head *bh;
1679        int error;
1680
1681        /* Returns _either_ the entry (if its first in block) or the
1682           previous entry otherwise */
1683        dent = gfs2_dirent_search(&dip->i_inode, name, gfs2_dirent_prev, &bh);
1684        if (!dent) {
1685                gfs2_consist_inode(dip);
1686                return -EIO;
1687        }
1688        if (IS_ERR(dent)) {
1689                gfs2_consist_inode(dip);
1690                return PTR_ERR(dent);
1691        }
1692        /* If not first in block, adjust pointers accordingly */
1693        if (gfs2_dirent_find(dent, name, NULL) == 0) {
1694                prev = dent;
1695                dent = (struct gfs2_dirent *)((char *)dent + be16_to_cpu(prev->de_rec_len));
1696        }
1697
1698        dirent_del(dip, bh, prev, dent);
1699        if (dip->i_diskflags & GFS2_DIF_EXHASH) {
1700                struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
1701                u16 entries = be16_to_cpu(leaf->lf_entries);
1702                if (!entries)
1703                        gfs2_consist_inode(dip);
1704                leaf->lf_entries = cpu_to_be16(--entries);
1705        }
1706        brelse(bh);
1707
1708        error = gfs2_meta_inode_buffer(dip, &bh);
1709        if (error)
1710                return error;
1711
1712        if (!dip->i_entries)
1713                gfs2_consist_inode(dip);
1714        gfs2_trans_add_bh(dip->i_gl, bh, 1);
1715        dip->i_entries--;
1716        dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
1717        if (S_ISDIR(dentry->d_inode->i_mode))
1718                drop_nlink(&dip->i_inode);
1719        gfs2_dinode_out(dip, bh->b_data);
1720        brelse(bh);
1721        mark_inode_dirty(&dip->i_inode);
1722
1723        return error;
1724}
1725
1726/**
1727 * gfs2_dir_mvino - Change inode number of directory entry
1728 * @dip: The GFS2 inode
1729 * @filename:
1730 * @new_inode:
1731 *
1732 * This routine changes the inode number of a directory entry.  It's used
1733 * by rename to change ".." when a directory is moved.
1734 * Assumes a glock is held on dvp.
1735 *
1736 * Returns: errno
1737 */
1738
1739int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1740                   const struct gfs2_inode *nip, unsigned int new_type)
1741{
1742        struct buffer_head *bh;
1743        struct gfs2_dirent *dent;
1744        int error;
1745
1746        dent = gfs2_dirent_search(&dip->i_inode, filename, gfs2_dirent_find, &bh);
1747        if (!dent) {
1748                gfs2_consist_inode(dip);
1749                return -EIO;
1750        }
1751        if (IS_ERR(dent))
1752                return PTR_ERR(dent);
1753
1754        gfs2_trans_add_bh(dip->i_gl, bh, 1);
1755        gfs2_inum_out(nip, dent);
1756        dent->de_type = cpu_to_be16(new_type);
1757
1758        if (dip->i_diskflags & GFS2_DIF_EXHASH) {
1759                brelse(bh);
1760                error = gfs2_meta_inode_buffer(dip, &bh);
1761                if (error)
1762                        return error;
1763                gfs2_trans_add_bh(dip->i_gl, bh, 1);
1764        }
1765
1766        dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
1767        gfs2_dinode_out(dip, bh->b_data);
1768        brelse(bh);
1769        return 0;
1770}
1771
1772/**
1773 * leaf_dealloc - Deallocate a directory leaf
1774 * @dip: the directory
1775 * @index: the hash table offset in the directory
1776 * @len: the number of pointers to this leaf
1777 * @leaf_no: the leaf number
1778 * @leaf_bh: buffer_head for the starting leaf
1779 * last_dealloc: 1 if this is the final dealloc for the leaf, else 0
1780 *
1781 * Returns: errno
1782 */
1783
1784static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1785                        u64 leaf_no, struct buffer_head *leaf_bh,
1786                        int last_dealloc)
1787{
1788        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1789        struct gfs2_leaf *tmp_leaf;
1790        struct gfs2_rgrp_list rlist;
1791        struct buffer_head *bh, *dibh;
1792        u64 blk, nblk;
1793        unsigned int rg_blocks = 0, l_blocks = 0;
1794        char *ht;
1795        unsigned int x, size = len * sizeof(u64);
1796        int error;
1797
1798        memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
1799
1800        ht = kzalloc(size, GFP_NOFS);
1801        if (!ht)
1802                return -ENOMEM;
1803
1804        if (!gfs2_alloc_get(dip)) {
1805                error = -ENOMEM;
1806                goto out;
1807        }
1808
1809        error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1810        if (error)
1811                goto out_put;
1812
1813        error = gfs2_rindex_hold(sdp, &dip->i_alloc->al_ri_gh);
1814        if (error)
1815                goto out_qs;
1816
1817        /*  Count the number of leaves  */
1818        bh = leaf_bh;
1819
1820        for (blk = leaf_no; blk; blk = nblk) {
1821                if (blk != leaf_no) {
1822                        error = get_leaf(dip, blk, &bh);
1823                        if (error)
1824                                goto out_rlist;
1825                }
1826                tmp_leaf = (struct gfs2_leaf *)bh->b_data;
1827                nblk = be64_to_cpu(tmp_leaf->lf_next);
1828                if (blk != leaf_no)
1829                        brelse(bh);
1830
1831                gfs2_rlist_add(sdp, &rlist, blk);
1832                l_blocks++;
1833        }
1834
1835        gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE);
1836
1837        for (x = 0; x < rlist.rl_rgrps; x++) {
1838                struct gfs2_rgrpd *rgd;
1839                rgd = rlist.rl_ghs[x].gh_gl->gl_object;
1840                rg_blocks += rgd->rd_length;
1841        }
1842
1843        error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
1844        if (error)
1845                goto out_rlist;
1846
1847        error = gfs2_trans_begin(sdp,
1848                        rg_blocks + (DIV_ROUND_UP(size, sdp->sd_jbsize) + 1) +
1849                        RES_DINODE + RES_STATFS + RES_QUOTA, l_blocks);
1850        if (error)
1851                goto out_rg_gunlock;
1852
1853        bh = leaf_bh;
1854
1855        for (blk = leaf_no; blk; blk = nblk) {
1856                if (blk != leaf_no) {
1857                        error = get_leaf(dip, blk, &bh);
1858                        if (error)
1859                                goto out_end_trans;
1860                }
1861                tmp_leaf = (struct gfs2_leaf *)bh->b_data;
1862                nblk = be64_to_cpu(tmp_leaf->lf_next);
1863                if (blk != leaf_no)
1864                        brelse(bh);
1865
1866                gfs2_free_meta(dip, blk, 1);
1867                gfs2_add_inode_blocks(&dip->i_inode, -1);
1868        }
1869
1870        error = gfs2_dir_write_data(dip, ht, index * sizeof(u64), size);
1871        if (error != size) {
1872                if (error >= 0)
1873                        error = -EIO;
1874                goto out_end_trans;
1875        }
1876
1877        error = gfs2_meta_inode_buffer(dip, &dibh);
1878        if (error)
1879                goto out_end_trans;
1880
1881        gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1882        /* On the last dealloc, make this a regular file in case we crash.
1883           (We don't want to free these blocks a second time.)  */
1884        if (last_dealloc)
1885                dip->i_inode.i_mode = S_IFREG;
1886        gfs2_dinode_out(dip, dibh->b_data);
1887        brelse(dibh);
1888
1889out_end_trans:
1890        gfs2_trans_end(sdp);
1891out_rg_gunlock:
1892        gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
1893out_rlist:
1894        gfs2_rlist_free(&rlist);
1895        gfs2_glock_dq_uninit(&dip->i_alloc->al_ri_gh);
1896out_qs:
1897        gfs2_quota_unhold(dip);
1898out_put:
1899        gfs2_alloc_put(dip);
1900out:
1901        kfree(ht);
1902        return error;
1903}
1904
1905/**
1906 * gfs2_dir_exhash_dealloc - free all the leaf blocks in a directory
1907 * @dip: the directory
1908 *
1909 * Dealloc all on-disk directory leaves to FREEMETA state
1910 * Change on-disk inode type to "regular file"
1911 *
1912 * Returns: errno
1913 */
1914
1915int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
1916{
1917        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1918        struct buffer_head *bh;
1919        struct gfs2_leaf *leaf;
1920        u32 hsize, len;
1921        u32 ht_offset, lp_offset, ht_offset_cur = -1;
1922        u32 index = 0, next_index;
1923        __be64 *lp;
1924        u64 leaf_no;
1925        int error = 0, last;
1926
1927        hsize = 1 << dip->i_depth;
1928        if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) {
1929                gfs2_consist_inode(dip);
1930                return -EIO;
1931        }
1932
1933        lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
1934        if (!lp)
1935                return -ENOMEM;
1936
1937        while (index < hsize) {
1938                lp_offset = index & (sdp->sd_hash_ptrs - 1);
1939                ht_offset = index - lp_offset;
1940
1941                if (ht_offset_cur != ht_offset) {
1942                        error = gfs2_dir_read_data(dip, (char *)lp,
1943                                                ht_offset * sizeof(__be64),
1944                                                sdp->sd_hash_bsize, 1);
1945                        if (error != sdp->sd_hash_bsize) {
1946                                if (error >= 0)
1947                                        error = -EIO;
1948                                goto out;
1949                        }
1950                        ht_offset_cur = ht_offset;
1951                }
1952
1953                leaf_no = be64_to_cpu(lp[lp_offset]);
1954                if (leaf_no) {
1955                        error = get_leaf(dip, leaf_no, &bh);
1956                        if (error)
1957                                goto out;
1958                        leaf = (struct gfs2_leaf *)bh->b_data;
1959                        len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth));
1960
1961                        next_index = (index & ~(len - 1)) + len;
1962                        last = ((next_index >= hsize) ? 1 : 0);
1963                        error = leaf_dealloc(dip, index, len, leaf_no, bh,
1964                                             last);
1965                        brelse(bh);
1966                        if (error)
1967                                goto out;
1968                        index = next_index;
1969                } else
1970                        index++;
1971        }
1972
1973        if (index != hsize) {
1974                gfs2_consist_inode(dip);
1975                error = -EIO;
1976        }
1977
1978out:
1979        kfree(lp);
1980
1981        return error;
1982}
1983
1984/**
1985 * gfs2_diradd_alloc_required - find if adding entry will require an allocation
1986 * @ip: the file being written to
1987 * @filname: the filename that's going to be added
1988 *
1989 * Returns: 1 if alloc required, 0 if not, -ve on error
1990 */
1991
1992int gfs2_diradd_alloc_required(struct inode *inode, const struct qstr *name)
1993{
1994        struct gfs2_dirent *dent;
1995        struct buffer_head *bh;
1996
1997        dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, &bh);
1998        if (!dent) {
1999                return 1;
2000        }
2001        if (IS_ERR(dent))
2002                return PTR_ERR(dent);
2003        brelse(bh);
2004        return 0;
2005}
2006
2007