linux/fs/gfs2/dir.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   4 * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
   5 */
   6
   7/*
   8 * Implements Extendible Hashing as described in:
   9 *   "Extendible Hashing" by Fagin, et al in
  10 *     __ACM Trans. on Database Systems__, Sept 1979.
  11 *
  12 *
  13 * Here's the layout of dirents which is essentially the same as that of ext2
  14 * within a single block. The field de_name_len is the number of bytes
  15 * actually required for the name (no null terminator). The field de_rec_len
  16 * is the number of bytes allocated to the dirent. The offset of the next
  17 * dirent in the block is (dirent + dirent->de_rec_len). When a dirent is
  18 * deleted, the preceding dirent inherits its allocated space, ie
  19 * prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained
  20 * by adding de_rec_len to the current dirent, this essentially causes the
  21 * deleted dirent to get jumped over when iterating through all the dirents.
  22 *
  23 * When deleting the first dirent in a block, there is no previous dirent so
  24 * the field de_ino is set to zero to designate it as deleted. When allocating
  25 * a dirent, gfs2_dirent_alloc iterates through the dirents in a block. If the
  26 * first dirent has (de_ino == 0) and de_rec_len is large enough, this first
  27 * dirent is allocated. Otherwise it must go through all the 'used' dirents
  28 * searching for one in which the amount of total space minus the amount of
  29 * used space will provide enough space for the new dirent.
  30 *
  31 * There are two types of blocks in which dirents reside. In a stuffed dinode,
  32 * the dirents begin at offset sizeof(struct gfs2_dinode) from the beginning of
  33 * the block.  In leaves, they begin at offset sizeof(struct gfs2_leaf) from the
  34 * beginning of the leaf block. The dirents reside in leaves when
  35 *
  36 * dip->i_diskflags & GFS2_DIF_EXHASH is true
  37 *
  38 * Otherwise, the dirents are "linear", within a single stuffed dinode block.
  39 *
  40 * When the dirents are in leaves, the actual contents of the directory file are
  41 * used as an array of 64-bit block pointers pointing to the leaf blocks. The
  42 * dirents are NOT in the directory file itself. There can be more than one
  43 * block pointer in the array that points to the same leaf. In fact, when a
  44 * directory is first converted from linear to exhash, all of the pointers
  45 * point to the same leaf.
  46 *
  47 * When a leaf is completely full, the size of the hash table can be
  48 * doubled unless it is already at the maximum size which is hard coded into
  49 * GFS2_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list,
  50 * but never before the maximum hash table size has been reached.
  51 */
  52
  53#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  54
  55#include <linux/slab.h>
  56#include <linux/spinlock.h>
  57#include <linux/buffer_head.h>
  58#include <linux/sort.h>
  59#include <linux/gfs2_ondisk.h>
  60#include <linux/crc32.h>
  61#include <linux/vmalloc.h>
  62#include <linux/bio.h>
  63
  64#include "gfs2.h"
  65#include "incore.h"
  66#include "dir.h"
  67#include "glock.h"
  68#include "inode.h"
  69#include "meta_io.h"
  70#include "quota.h"
  71#include "rgrp.h"
  72#include "trans.h"
  73#include "bmap.h"
  74#include "util.h"
  75
  76#define MAX_RA_BLOCKS 32 /* max read-ahead blocks */
  77
  78#define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1)
  79#define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1))
  80#define GFS2_HASH_INDEX_MASK 0xffffc000
  81#define GFS2_USE_HASH_FLAG 0x2000
  82
  83struct qstr gfs2_qdot __read_mostly;
  84struct qstr gfs2_qdotdot __read_mostly;
  85
  86typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent,
  87                            const struct qstr *name, void *opaque);
  88
  89int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
  90                            struct buffer_head **bhp)
  91{
  92        struct buffer_head *bh;
  93
  94        bh = gfs2_meta_new(ip->i_gl, block);
  95        gfs2_trans_add_meta(ip->i_gl, bh);
  96        gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD);
  97        gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
  98        *bhp = bh;
  99        return 0;
 100}
 101
 102static int gfs2_dir_get_existing_buffer(struct gfs2_inode *ip, u64 block,
 103                                        struct buffer_head **bhp)
 104{
 105        struct buffer_head *bh;
 106        int error;
 107
 108        error = gfs2_meta_read(ip->i_gl, block, DIO_WAIT, 0, &bh);
 109        if (error)
 110                return error;
 111        if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_JD)) {
 112                brelse(bh);
 113                return -EIO;
 114        }
 115        *bhp = bh;
 116        return 0;
 117}
 118
 119static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
 120                                  unsigned int offset, unsigned int size)
 121{
 122        struct buffer_head *dibh;
 123        int error;
 124
 125        error = gfs2_meta_inode_buffer(ip, &dibh);
 126        if (error)
 127                return error;
 128
 129        gfs2_trans_add_meta(ip->i_gl, dibh);
 130        memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
 131        if (ip->i_inode.i_size < offset + size)
 132                i_size_write(&ip->i_inode, offset + size);
 133        ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
 134        gfs2_dinode_out(ip, dibh->b_data);
 135
 136        brelse(dibh);
 137
 138        return size;
 139}
 140
 141
 142
 143/**
 144 * gfs2_dir_write_data - Write directory information to the inode
 145 * @ip: The GFS2 inode
 146 * @buf: The buffer containing information to be written
 147 * @offset: The file offset to start writing at
 148 * @size: The amount of data to write
 149 *
 150 * Returns: The number of bytes correctly written or error code
 151 */
 152static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
 153                               u64 offset, unsigned int size)
 154{
 155        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 156        struct buffer_head *dibh;
 157        u64 lblock, dblock;
 158        u32 extlen = 0;
 159        unsigned int o;
 160        int copied = 0;
 161        int error = 0;
 162        int new = 0;
 163
 164        if (!size)
 165                return 0;
 166
 167        if (gfs2_is_stuffed(ip) && offset + size <= gfs2_max_stuffed_size(ip))
 168                return gfs2_dir_write_stuffed(ip, buf, (unsigned int)offset,
 169                                              size);
 170
 171        if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
 172                return -EINVAL;
 173
 174        if (gfs2_is_stuffed(ip)) {
 175                error = gfs2_unstuff_dinode(ip, NULL);
 176                if (error)
 177                        return error;
 178        }
 179
 180        lblock = offset;
 181        o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
 182
 183        while (copied < size) {
 184                unsigned int amount;
 185                struct buffer_head *bh;
 186
 187                amount = size - copied;
 188                if (amount > sdp->sd_sb.sb_bsize - o)
 189                        amount = sdp->sd_sb.sb_bsize - o;
 190
 191                if (!extlen) {
 192                        new = 1;
 193                        error = gfs2_extent_map(&ip->i_inode, lblock, &new,
 194                                                &dblock, &extlen);
 195                        if (error)
 196                                goto fail;
 197                        error = -EIO;
 198                        if (gfs2_assert_withdraw(sdp, dblock))
 199                                goto fail;
 200                }
 201
 202                if (amount == sdp->sd_jbsize || new)
 203                        error = gfs2_dir_get_new_buffer(ip, dblock, &bh);
 204                else
 205                        error = gfs2_dir_get_existing_buffer(ip, dblock, &bh);
 206
 207                if (error)
 208                        goto fail;
 209
 210                gfs2_trans_add_meta(ip->i_gl, bh);
 211                memcpy(bh->b_data + o, buf, amount);
 212                brelse(bh);
 213
 214                buf += amount;
 215                copied += amount;
 216                lblock++;
 217                dblock++;
 218                extlen--;
 219
 220                o = sizeof(struct gfs2_meta_header);
 221        }
 222
 223out:
 224        error = gfs2_meta_inode_buffer(ip, &dibh);
 225        if (error)
 226                return error;
 227
 228        if (ip->i_inode.i_size < offset + copied)
 229                i_size_write(&ip->i_inode, offset + copied);
 230        ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
 231
 232        gfs2_trans_add_meta(ip->i_gl, dibh);
 233        gfs2_dinode_out(ip, dibh->b_data);
 234        brelse(dibh);
 235
 236        return copied;
 237fail:
 238        if (copied)
 239                goto out;
 240        return error;
 241}
 242
 243static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, __be64 *buf,
 244                                 unsigned int size)
 245{
 246        struct buffer_head *dibh;
 247        int error;
 248
 249        error = gfs2_meta_inode_buffer(ip, &dibh);
 250        if (!error) {
 251                memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), size);
 252                brelse(dibh);
 253        }
 254
 255        return (error) ? error : size;
 256}
 257
 258
 259/**
 260 * gfs2_dir_read_data - Read a data from a directory inode
 261 * @ip: The GFS2 Inode
 262 * @buf: The buffer to place result into
 263 * @size: Amount of data to transfer
 264 *
 265 * Returns: The amount of data actually copied or the error
 266 */
 267static int gfs2_dir_read_data(struct gfs2_inode *ip, __be64 *buf,
 268                              unsigned int size)
 269{
 270        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 271        u64 lblock, dblock;
 272        u32 extlen = 0;
 273        unsigned int o;
 274        int copied = 0;
 275        int error = 0;
 276
 277        if (gfs2_is_stuffed(ip))
 278                return gfs2_dir_read_stuffed(ip, buf, size);
 279
 280        if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
 281                return -EINVAL;
 282
 283        lblock = 0;
 284        o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
 285
 286        while (copied < size) {
 287                unsigned int amount;
 288                struct buffer_head *bh;
 289                int new;
 290
 291                amount = size - copied;
 292                if (amount > sdp->sd_sb.sb_bsize - o)
 293                        amount = sdp->sd_sb.sb_bsize - o;
 294
 295                if (!extlen) {
 296                        new = 0;
 297                        error = gfs2_extent_map(&ip->i_inode, lblock, &new,
 298                                                &dblock, &extlen);
 299                        if (error || !dblock)
 300                                goto fail;
 301                        BUG_ON(extlen < 1);
 302                        bh = gfs2_meta_ra(ip->i_gl, dblock, extlen);
 303                } else {
 304                        error = gfs2_meta_read(ip->i_gl, dblock, DIO_WAIT, 0, &bh);
 305                        if (error)
 306                                goto fail;
 307                }
 308                error = gfs2_metatype_check(sdp, bh, GFS2_METATYPE_JD);
 309                if (error) {
 310                        brelse(bh);
 311                        goto fail;
 312                }
 313                dblock++;
 314                extlen--;
 315                memcpy(buf, bh->b_data + o, amount);
 316                brelse(bh);
 317                buf += (amount/sizeof(__be64));
 318                copied += amount;
 319                lblock++;
 320                o = sizeof(struct gfs2_meta_header);
 321        }
 322
 323        return copied;
 324fail:
 325        return (copied) ? copied : error;
 326}
 327
 328/**
 329 * gfs2_dir_get_hash_table - Get pointer to the dir hash table
 330 * @ip: The inode in question
 331 *
 332 * Returns: The hash table or an error
 333 */
 334
 335static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip)
 336{
 337        struct inode *inode = &ip->i_inode;
 338        int ret;
 339        u32 hsize;
 340        __be64 *hc;
 341
 342        BUG_ON(!(ip->i_diskflags & GFS2_DIF_EXHASH));
 343
 344        hc = ip->i_hash_cache;
 345        if (hc)
 346                return hc;
 347
 348        hsize = BIT(ip->i_depth);
 349        hsize *= sizeof(__be64);
 350        if (hsize != i_size_read(&ip->i_inode)) {
 351                gfs2_consist_inode(ip);
 352                return ERR_PTR(-EIO);
 353        }
 354
 355        hc = kmalloc(hsize, GFP_NOFS | __GFP_NOWARN);
 356        if (hc == NULL)
 357                hc = __vmalloc(hsize, GFP_NOFS);
 358
 359        if (hc == NULL)
 360                return ERR_PTR(-ENOMEM);
 361
 362        ret = gfs2_dir_read_data(ip, hc, hsize);
 363        if (ret < 0) {
 364                kvfree(hc);
 365                return ERR_PTR(ret);
 366        }
 367
 368        spin_lock(&inode->i_lock);
 369        if (likely(!ip->i_hash_cache)) {
 370                ip->i_hash_cache = hc;
 371                hc = NULL;
 372        }
 373        spin_unlock(&inode->i_lock);
 374        kvfree(hc);
 375
 376        return ip->i_hash_cache;
 377}
 378
 379/**
 380 * gfs2_dir_hash_inval - Invalidate dir hash
 381 * @ip: The directory inode
 382 *
 383 * Must be called with an exclusive glock, or during glock invalidation.
 384 */
 385void gfs2_dir_hash_inval(struct gfs2_inode *ip)
 386{
 387        __be64 *hc;
 388
 389        spin_lock(&ip->i_inode.i_lock);
 390        hc = ip->i_hash_cache;
 391        ip->i_hash_cache = NULL;
 392        spin_unlock(&ip->i_inode.i_lock);
 393
 394        kvfree(hc);
 395}
 396
 397static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent)
 398{
 399        return dent->de_inum.no_addr == 0 || dent->de_inum.no_formal_ino == 0;
 400}
 401
 402static inline int __gfs2_dirent_find(const struct gfs2_dirent *dent,
 403                                     const struct qstr *name, int ret)
 404{
 405        if (!gfs2_dirent_sentinel(dent) &&
 406            be32_to_cpu(dent->de_hash) == name->hash &&
 407            be16_to_cpu(dent->de_name_len) == name->len &&
 408            memcmp(dent+1, name->name, name->len) == 0)
 409                return ret;
 410        return 0;
 411}
 412
 413static int gfs2_dirent_find(const struct gfs2_dirent *dent,
 414                            const struct qstr *name,
 415                            void *opaque)
 416{
 417        return __gfs2_dirent_find(dent, name, 1);
 418}
 419
 420static int gfs2_dirent_prev(const struct gfs2_dirent *dent,
 421                            const struct qstr *name,
 422                            void *opaque)
 423{
 424        return __gfs2_dirent_find(dent, name, 2);
 425}
 426
 427/*
 428 * name->name holds ptr to start of block.
 429 * name->len holds size of block.
 430 */
 431static int gfs2_dirent_last(const struct gfs2_dirent *dent,
 432                            const struct qstr *name,
 433                            void *opaque)
 434{
 435        const char *start = name->name;
 436        const char *end = (const char *)dent + be16_to_cpu(dent->de_rec_len);
 437        if (name->len == (end - start))
 438                return 1;
 439        return 0;
 440}
 441
 442/* Look for the dirent that contains the offset specified in data. Once we
 443 * find that dirent, there must be space available there for the new dirent */
 444static int gfs2_dirent_find_offset(const struct gfs2_dirent *dent,
 445                                  const struct qstr *name,
 446                                  void *ptr)
 447{
 448        unsigned required = GFS2_DIRENT_SIZE(name->len);
 449        unsigned actual = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
 450        unsigned totlen = be16_to_cpu(dent->de_rec_len);
 451
 452        if (ptr < (void *)dent || ptr >= (void *)dent + totlen)
 453                return 0;
 454        if (gfs2_dirent_sentinel(dent))
 455                actual = 0;
 456        if (ptr < (void *)dent + actual)
 457                return -1;
 458        if ((void *)dent + totlen >= ptr + required)
 459                return 1;
 460        return -1;
 461}
 462
 463static int gfs2_dirent_find_space(const struct gfs2_dirent *dent,
 464                                  const struct qstr *name,
 465                                  void *opaque)
 466{
 467        unsigned required = GFS2_DIRENT_SIZE(name->len);
 468        unsigned actual = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
 469        unsigned totlen = be16_to_cpu(dent->de_rec_len);
 470
 471        if (gfs2_dirent_sentinel(dent))
 472                actual = 0;
 473        if (totlen - actual >= required)
 474                return 1;
 475        return 0;
 476}
 477
 478struct dirent_gather {
 479        const struct gfs2_dirent **pdent;
 480        unsigned offset;
 481};
 482
 483static int gfs2_dirent_gather(const struct gfs2_dirent *dent,
 484                              const struct qstr *name,
 485                              void *opaque)
 486{
 487        struct dirent_gather *g = opaque;
 488        if (!gfs2_dirent_sentinel(dent)) {
 489                g->pdent[g->offset++] = dent;
 490        }
 491        return 0;
 492}
 493
 494/*
 495 * Other possible things to check:
 496 * - Inode located within filesystem size (and on valid block)
 497 * - Valid directory entry type
 498 * Not sure how heavy-weight we want to make this... could also check
 499 * hash is correct for example, but that would take a lot of extra time.
 500 * For now the most important thing is to check that the various sizes
 501 * are correct.
 502 */
 503static int gfs2_check_dirent(struct gfs2_sbd *sdp,
 504                             struct gfs2_dirent *dent, unsigned int offset,
 505                             unsigned int size, unsigned int len, int first)
 506{
 507        const char *msg = "gfs2_dirent too small";
 508        if (unlikely(size < sizeof(struct gfs2_dirent)))
 509                goto error;
 510        msg = "gfs2_dirent misaligned";
 511        if (unlikely(offset & 0x7))
 512                goto error;
 513        msg = "gfs2_dirent points beyond end of block";
 514        if (unlikely(offset + size > len))
 515                goto error;
 516        msg = "zero inode number";
 517        if (unlikely(!first && gfs2_dirent_sentinel(dent)))
 518                goto error;
 519        msg = "name length is greater than space in dirent";
 520        if (!gfs2_dirent_sentinel(dent) &&
 521            unlikely(sizeof(struct gfs2_dirent)+be16_to_cpu(dent->de_name_len) >
 522                     size))
 523                goto error;
 524        return 0;
 525error:
 526        fs_warn(sdp, "%s: %s (%s)\n",
 527                __func__, msg, first ? "first in block" : "not first in block");
 528        return -EIO;
 529}
 530
 531static int gfs2_dirent_offset(struct gfs2_sbd *sdp, const void *buf)
 532{
 533        const struct gfs2_meta_header *h = buf;
 534        int offset;
 535
 536        BUG_ON(buf == NULL);
 537
 538        switch(be32_to_cpu(h->mh_type)) {
 539        case GFS2_METATYPE_LF:
 540                offset = sizeof(struct gfs2_leaf);
 541                break;
 542        case GFS2_METATYPE_DI:
 543                offset = sizeof(struct gfs2_dinode);
 544                break;
 545        default:
 546                goto wrong_type;
 547        }
 548        return offset;
 549wrong_type:
 550        fs_warn(sdp, "%s: wrong block type %u\n", __func__,
 551                be32_to_cpu(h->mh_type));
 552        return -1;
 553}
 554
 555static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode, void *buf,
 556                                            unsigned int len, gfs2_dscan_t scan,
 557                                            const struct qstr *name,
 558                                            void *opaque)
 559{
 560        struct gfs2_dirent *dent, *prev;
 561        unsigned offset;
 562        unsigned size;
 563        int ret = 0;
 564
 565        ret = gfs2_dirent_offset(GFS2_SB(inode), buf);
 566        if (ret < 0)
 567                goto consist_inode;
 568
 569        offset = ret;
 570        prev = NULL;
 571        dent = buf + offset;
 572        size = be16_to_cpu(dent->de_rec_len);
 573        if (gfs2_check_dirent(GFS2_SB(inode), dent, offset, size, len, 1))
 574                goto consist_inode;
 575        do {
 576                ret = scan(dent, name, opaque);
 577                if (ret)
 578                        break;
 579                offset += size;
 580                if (offset == len)
 581                        break;
 582                prev = dent;
 583                dent = buf + offset;
 584                size = be16_to_cpu(dent->de_rec_len);
 585                if (gfs2_check_dirent(GFS2_SB(inode), dent, offset, size,
 586                                      len, 0))
 587                        goto consist_inode;
 588        } while(1);
 589
 590        switch(ret) {
 591        case 0:
 592                return NULL;
 593        case 1:
 594                return dent;
 595        case 2:
 596                return prev ? prev : dent;
 597        default:
 598                BUG_ON(ret > 0);
 599                return ERR_PTR(ret);
 600        }
 601
 602consist_inode:
 603        gfs2_consist_inode(GFS2_I(inode));
 604        return ERR_PTR(-EIO);
 605}
 606
 607static int dirent_check_reclen(struct gfs2_inode *dip,
 608                               const struct gfs2_dirent *d, const void *end_p)
 609{
 610        const void *ptr = d;
 611        u16 rec_len = be16_to_cpu(d->de_rec_len);
 612
 613        if (unlikely(rec_len < sizeof(struct gfs2_dirent)))
 614                goto broken;
 615        ptr += rec_len;
 616        if (ptr < end_p)
 617                return rec_len;
 618        if (ptr == end_p)
 619                return -ENOENT;
 620broken:
 621        gfs2_consist_inode(dip);
 622        return -EIO;
 623}
 624
 625/**
 626 * dirent_next - Next dirent
 627 * @dip: the directory
 628 * @bh: The buffer
 629 * @dent: Pointer to list of dirents
 630 *
 631 * Returns: 0 on success, error code otherwise
 632 */
 633
 634static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh,
 635                       struct gfs2_dirent **dent)
 636{
 637        struct gfs2_dirent *cur = *dent, *tmp;
 638        char *bh_end = bh->b_data + bh->b_size;
 639        int ret;
 640
 641        ret = dirent_check_reclen(dip, cur, bh_end);
 642        if (ret < 0)
 643                return ret;
 644
 645        tmp = (void *)cur + ret;
 646        ret = dirent_check_reclen(dip, tmp, bh_end);
 647        if (ret == -EIO)
 648                return ret;
 649
 650        /* Only the first dent could ever have de_inum.no_addr == 0 */
 651        if (gfs2_dirent_sentinel(tmp)) {
 652                gfs2_consist_inode(dip);
 653                return -EIO;
 654        }
 655
 656        *dent = tmp;
 657        return 0;
 658}
 659
 660/**
 661 * dirent_del - Delete a dirent
 662 * @dip: The GFS2 inode
 663 * @bh: The buffer
 664 * @prev: The previous dirent
 665 * @cur: The current dirent
 666 *
 667 */
 668
 669static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
 670                       struct gfs2_dirent *prev, struct gfs2_dirent *cur)
 671{
 672        u16 cur_rec_len, prev_rec_len;
 673
 674        if (gfs2_dirent_sentinel(cur)) {
 675                gfs2_consist_inode(dip);
 676                return;
 677        }
 678
 679        gfs2_trans_add_meta(dip->i_gl, bh);
 680
 681        /* If there is no prev entry, this is the first entry in the block.
 682           The de_rec_len is already as big as it needs to be.  Just zero
 683           out the inode number and return.  */
 684
 685        if (!prev) {
 686                cur->de_inum.no_addr = 0;
 687                cur->de_inum.no_formal_ino = 0;
 688                return;
 689        }
 690
 691        /*  Combine this dentry with the previous one.  */
 692
 693        prev_rec_len = be16_to_cpu(prev->de_rec_len);
 694        cur_rec_len = be16_to_cpu(cur->de_rec_len);
 695
 696        if ((char *)prev + prev_rec_len != (char *)cur)
 697                gfs2_consist_inode(dip);
 698        if ((char *)cur + cur_rec_len > bh->b_data + bh->b_size)
 699                gfs2_consist_inode(dip);
 700
 701        prev_rec_len += cur_rec_len;
 702        prev->de_rec_len = cpu_to_be16(prev_rec_len);
 703}
 704
 705
 706static struct gfs2_dirent *do_init_dirent(struct inode *inode,
 707                                          struct gfs2_dirent *dent,
 708                                          const struct qstr *name,
 709                                          struct buffer_head *bh,
 710                                          unsigned offset)
 711{
 712        struct gfs2_inode *ip = GFS2_I(inode);
 713        struct gfs2_dirent *ndent;
 714        unsigned totlen;
 715
 716        totlen = be16_to_cpu(dent->de_rec_len);
 717        BUG_ON(offset + name->len > totlen);
 718        gfs2_trans_add_meta(ip->i_gl, bh);
 719        ndent = (struct gfs2_dirent *)((char *)dent + offset);
 720        dent->de_rec_len = cpu_to_be16(offset);
 721        gfs2_qstr2dirent(name, totlen - offset, ndent);
 722        return ndent;
 723}
 724
 725
 726/*
 727 * Takes a dent from which to grab space as an argument. Returns the
 728 * newly created dent.
 729 */
 730static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode,
 731                                            struct gfs2_dirent *dent,
 732                                            const struct qstr *name,
 733                                            struct buffer_head *bh)
 734{
 735        unsigned offset = 0;
 736
 737        if (!gfs2_dirent_sentinel(dent))
 738                offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
 739        return do_init_dirent(inode, dent, name, bh, offset);
 740}
 741
 742static struct gfs2_dirent *gfs2_dirent_split_alloc(struct inode *inode,
 743                                                   struct buffer_head *bh,
 744                                                   const struct qstr *name,
 745                                                   void *ptr)
 746{
 747        struct gfs2_dirent *dent;
 748        dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
 749                                gfs2_dirent_find_offset, name, ptr);
 750        if (IS_ERR_OR_NULL(dent))
 751                return dent;
 752        return do_init_dirent(inode, dent, name, bh,
 753                              (unsigned)(ptr - (void *)dent));
 754}
 755
 756static int get_leaf(struct gfs2_inode *dip, u64 leaf_no,
 757                    struct buffer_head **bhp)
 758{
 759        int error;
 760
 761        error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_WAIT, 0, bhp);
 762        if (!error && gfs2_metatype_check(GFS2_SB(&dip->i_inode), *bhp, GFS2_METATYPE_LF)) {
 763                /* pr_info("block num=%llu\n", leaf_no); */
 764                error = -EIO;
 765        }
 766
 767        return error;
 768}
 769
 770/**
 771 * get_leaf_nr - Get a leaf number associated with the index
 772 * @dip: The GFS2 inode
 773 * @index:
 774 * @leaf_out:
 775 *
 776 * Returns: 0 on success, error code otherwise
 777 */
 778
 779static int get_leaf_nr(struct gfs2_inode *dip, u32 index,
 780                       u64 *leaf_out)
 781{
 782        __be64 *hash;
 783        int error;
 784
 785        hash = gfs2_dir_get_hash_table(dip);
 786        error = PTR_ERR_OR_ZERO(hash);
 787
 788        if (!error)
 789                *leaf_out = be64_to_cpu(*(hash + index));
 790
 791        return error;
 792}
 793
 794static int get_first_leaf(struct gfs2_inode *dip, u32 index,
 795                          struct buffer_head **bh_out)
 796{
 797        u64 leaf_no;
 798        int error;
 799
 800        error = get_leaf_nr(dip, index, &leaf_no);
 801        if (!error)
 802                error = get_leaf(dip, leaf_no, bh_out);
 803
 804        return error;
 805}
 806
 807static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode,
 808                                              const struct qstr *name,
 809                                              gfs2_dscan_t scan,
 810                                              struct buffer_head **pbh)
 811{
 812        struct buffer_head *bh;
 813        struct gfs2_dirent *dent;
 814        struct gfs2_inode *ip = GFS2_I(inode);
 815        int error;
 816
 817        if (ip->i_diskflags & GFS2_DIF_EXHASH) {
 818                struct gfs2_leaf *leaf;
 819                unsigned int hsize = BIT(ip->i_depth);
 820                unsigned int index;
 821                u64 ln;
 822                if (hsize * sizeof(u64) != i_size_read(inode)) {
 823                        gfs2_consist_inode(ip);
 824                        return ERR_PTR(-EIO);
 825                }
 826
 827                index = name->hash >> (32 - ip->i_depth);
 828                error = get_first_leaf(ip, index, &bh);
 829                if (error)
 830                        return ERR_PTR(error);
 831                do {
 832                        dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
 833                                                scan, name, NULL);
 834                        if (dent)
 835                                goto got_dent;
 836                        leaf = (struct gfs2_leaf *)bh->b_data;
 837                        ln = be64_to_cpu(leaf->lf_next);
 838                        brelse(bh);
 839                        if (!ln)
 840                                break;
 841
 842                        error = get_leaf(ip, ln, &bh);
 843                } while(!error);
 844
 845                return error ? ERR_PTR(error) : NULL;
 846        }
 847
 848
 849        error = gfs2_meta_inode_buffer(ip, &bh);
 850        if (error)
 851                return ERR_PTR(error);
 852        dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, scan, name, NULL);
 853got_dent:
 854        if (IS_ERR_OR_NULL(dent)) {
 855                brelse(bh);
 856                bh = NULL;
 857        }
 858        *pbh = bh;
 859        return dent;
 860}
 861
 862static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, u16 depth)
 863{
 864        struct gfs2_inode *ip = GFS2_I(inode);
 865        unsigned int n = 1;
 866        u64 bn;
 867        int error;
 868        struct buffer_head *bh;
 869        struct gfs2_leaf *leaf;
 870        struct gfs2_dirent *dent;
 871        struct timespec64 tv = current_time(inode);
 872
 873        error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
 874        if (error)
 875                return NULL;
 876        bh = gfs2_meta_new(ip->i_gl, bn);
 877        if (!bh)
 878                return NULL;
 879
 880        gfs2_trans_remove_revoke(GFS2_SB(inode), bn, 1);
 881        gfs2_trans_add_meta(ip->i_gl, bh);
 882        gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
 883        leaf = (struct gfs2_leaf *)bh->b_data;
 884        leaf->lf_depth = cpu_to_be16(depth);
 885        leaf->lf_entries = 0;
 886        leaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE);
 887        leaf->lf_next = 0;
 888        leaf->lf_inode = cpu_to_be64(ip->i_no_addr);
 889        leaf->lf_dist = cpu_to_be32(1);
 890        leaf->lf_nsec = cpu_to_be32(tv.tv_nsec);
 891        leaf->lf_sec = cpu_to_be64(tv.tv_sec);
 892        memset(leaf->lf_reserved2, 0, sizeof(leaf->lf_reserved2));
 893        dent = (struct gfs2_dirent *)(leaf+1);
 894        gfs2_qstr2dirent(&empty_name, bh->b_size - sizeof(struct gfs2_leaf), dent);
 895        *pbh = bh;
 896        return leaf;
 897}
 898
 899/**
 900 * dir_make_exhash - Convert a stuffed directory into an ExHash directory
 901 * @dip: The GFS2 inode
 902 *
 903 * Returns: 0 on success, error code otherwise
 904 */
 905
 906static int dir_make_exhash(struct inode *inode)
 907{
 908        struct gfs2_inode *dip = GFS2_I(inode);
 909        struct gfs2_sbd *sdp = GFS2_SB(inode);
 910        struct gfs2_dirent *dent;
 911        struct qstr args;
 912        struct buffer_head *bh, *dibh;
 913        struct gfs2_leaf *leaf;
 914        int y;
 915        u32 x;
 916        __be64 *lp;
 917        u64 bn;
 918        int error;
 919
 920        error = gfs2_meta_inode_buffer(dip, &dibh);
 921        if (error)
 922                return error;
 923
 924        /*  Turn over a new leaf  */
 925
 926        leaf = new_leaf(inode, &bh, 0);
 927        if (!leaf)
 928                return -ENOSPC;
 929        bn = bh->b_blocknr;
 930
 931        gfs2_assert(sdp, dip->i_entries < BIT(16));
 932        leaf->lf_entries = cpu_to_be16(dip->i_entries);
 933
 934        /*  Copy dirents  */
 935
 936        gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_leaf), dibh,
 937                             sizeof(struct gfs2_dinode));
 938
 939        /*  Find last entry  */
 940
 941        x = 0;
 942        args.len = bh->b_size - sizeof(struct gfs2_dinode) +
 943                   sizeof(struct gfs2_leaf);
 944        args.name = bh->b_data;
 945        dent = gfs2_dirent_scan(&dip->i_inode, bh->b_data, bh->b_size,
 946                                gfs2_dirent_last, &args, NULL);
 947        if (!dent) {
 948                brelse(bh);
 949                brelse(dibh);
 950                return -EIO;
 951        }
 952        if (IS_ERR(dent)) {
 953                brelse(bh);
 954                brelse(dibh);
 955                return PTR_ERR(dent);
 956        }
 957
 958        /*  Adjust the last dirent's record length
 959           (Remember that dent still points to the last entry.)  */
 960
 961        dent->de_rec_len = cpu_to_be16(be16_to_cpu(dent->de_rec_len) +
 962                sizeof(struct gfs2_dinode) -
 963                sizeof(struct gfs2_leaf));
 964
 965        brelse(bh);
 966
 967        /*  We're done with the new leaf block, now setup the new
 968            hash table.  */
 969
 970        gfs2_trans_add_meta(dip->i_gl, dibh);
 971        gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
 972
 973        lp = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode));
 974
 975        for (x = sdp->sd_hash_ptrs; x--; lp++)
 976                *lp = cpu_to_be64(bn);
 977
 978        i_size_write(inode, sdp->sd_sb.sb_bsize / 2);
 979        gfs2_add_inode_blocks(&dip->i_inode, 1);
 980        dip->i_diskflags |= GFS2_DIF_EXHASH;
 981
 982        for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
 983        dip->i_depth = y;
 984
 985        gfs2_dinode_out(dip, dibh->b_data);
 986
 987        brelse(dibh);
 988
 989        return 0;
 990}
 991
 992/**
 993 * dir_split_leaf - Split a leaf block into two
 994 * @dip: The GFS2 inode
 995 * @index:
 996 * @leaf_no:
 997 *
 998 * Returns: 0 on success, error code on failure
 999 */
1000
1001static int dir_split_leaf(struct inode *inode, const struct qstr *name)
1002{
1003        struct gfs2_inode *dip = GFS2_I(inode);
1004        struct buffer_head *nbh, *obh, *dibh;
1005        struct gfs2_leaf *nleaf, *oleaf;
1006        struct gfs2_dirent *dent = NULL, *prev = NULL, *next = NULL, *new;
1007        u32 start, len, half_len, divider;
1008        u64 bn, leaf_no;
1009        __be64 *lp;
1010        u32 index;
1011        int x;
1012        int error;
1013
1014        index = name->hash >> (32 - dip->i_depth);
1015        error = get_leaf_nr(dip, index, &leaf_no);
1016        if (error)
1017                return error;
1018
1019        /*  Get the old leaf block  */
1020        error = get_leaf(dip, leaf_no, &obh);
1021        if (error)
1022                return error;
1023
1024        oleaf = (struct gfs2_leaf *)obh->b_data;
1025        if (dip->i_depth == be16_to_cpu(oleaf->lf_depth)) {
1026                brelse(obh);
1027                return 1; /* can't split */
1028        }
1029
1030        gfs2_trans_add_meta(dip->i_gl, obh);
1031
1032        nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1);
1033        if (!nleaf) {
1034                brelse(obh);
1035                return -ENOSPC;
1036        }
1037        bn = nbh->b_blocknr;
1038
1039        /*  Compute the start and len of leaf pointers in the hash table.  */
1040        len = BIT(dip->i_depth - be16_to_cpu(oleaf->lf_depth));
1041        half_len = len >> 1;
1042        if (!half_len) {
1043                fs_warn(GFS2_SB(inode), "i_depth %u lf_depth %u index %u\n",
1044                        dip->i_depth, be16_to_cpu(oleaf->lf_depth), index);
1045                gfs2_consist_inode(dip);
1046                error = -EIO;
1047                goto fail_brelse;
1048        }
1049
1050        start = (index & ~(len - 1));
1051
1052        /* Change the pointers.
1053           Don't bother distinguishing stuffed from non-stuffed.
1054           This code is complicated enough already. */
1055        lp = kmalloc_array(half_len, sizeof(__be64), GFP_NOFS);
1056        if (!lp) {
1057                error = -ENOMEM;
1058                goto fail_brelse;
1059        }
1060
1061        /*  Change the pointers  */
1062        for (x = 0; x < half_len; x++)
1063                lp[x] = cpu_to_be64(bn);
1064
1065        gfs2_dir_hash_inval(dip);
1066
1067        error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(u64),
1068                                    half_len * sizeof(u64));
1069        if (error != half_len * sizeof(u64)) {
1070                if (error >= 0)
1071                        error = -EIO;
1072                goto fail_lpfree;
1073        }
1074
1075        kfree(lp);
1076
1077        /*  Compute the divider  */
1078        divider = (start + half_len) << (32 - dip->i_depth);
1079
1080        /*  Copy the entries  */
1081        dent = (struct gfs2_dirent *)(obh->b_data + sizeof(struct gfs2_leaf));
1082
1083        do {
1084                next = dent;
1085                if (dirent_next(dip, obh, &next))
1086                        next = NULL;
1087
1088                if (!gfs2_dirent_sentinel(dent) &&
1089                    be32_to_cpu(dent->de_hash) < divider) {
1090                        struct qstr str;
1091                        void *ptr = ((char *)dent - obh->b_data) + nbh->b_data;
1092                        str.name = (char*)(dent+1);
1093                        str.len = be16_to_cpu(dent->de_name_len);
1094                        str.hash = be32_to_cpu(dent->de_hash);
1095                        new = gfs2_dirent_split_alloc(inode, nbh, &str, ptr);
1096                        if (IS_ERR(new)) {
1097                                error = PTR_ERR(new);
1098                                break;
1099                        }
1100
1101                        new->de_inum = dent->de_inum; /* No endian worries */
1102                        new->de_type = dent->de_type; /* No endian worries */
1103                        be16_add_cpu(&nleaf->lf_entries, 1);
1104
1105                        dirent_del(dip, obh, prev, dent);
1106
1107                        if (!oleaf->lf_entries)
1108                                gfs2_consist_inode(dip);
1109                        be16_add_cpu(&oleaf->lf_entries, -1);
1110
1111                        if (!prev)
1112                                prev = dent;
1113                } else {
1114                        prev = dent;
1115                }
1116                dent = next;
1117        } while (dent);
1118
1119        oleaf->lf_depth = nleaf->lf_depth;
1120
1121        error = gfs2_meta_inode_buffer(dip, &dibh);
1122        if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) {
1123                gfs2_trans_add_meta(dip->i_gl, dibh);
1124                gfs2_add_inode_blocks(&dip->i_inode, 1);
1125                gfs2_dinode_out(dip, dibh->b_data);
1126                brelse(dibh);
1127        }
1128
1129        brelse(obh);
1130        brelse(nbh);
1131
1132        return error;
1133
1134fail_lpfree:
1135        kfree(lp);
1136
1137fail_brelse:
1138        brelse(obh);
1139        brelse(nbh);
1140        return error;
1141}
1142
1143/**
1144 * dir_double_exhash - Double size of ExHash table
1145 * @dip: The GFS2 dinode
1146 *
1147 * Returns: 0 on success, error code on failure
1148 */
1149
1150static int dir_double_exhash(struct gfs2_inode *dip)
1151{
1152        struct buffer_head *dibh;
1153        u32 hsize;
1154        u32 hsize_bytes;
1155        __be64 *hc;
1156        __be64 *hc2, *h;
1157        int x;
1158        int error = 0;
1159
1160        hsize = BIT(dip->i_depth);
1161        hsize_bytes = hsize * sizeof(__be64);
1162
1163        hc = gfs2_dir_get_hash_table(dip);
1164        if (IS_ERR(hc))
1165                return PTR_ERR(hc);
1166
1167        hc2 = kmalloc_array(hsize_bytes, 2, GFP_NOFS | __GFP_NOWARN);
1168        if (hc2 == NULL)
1169                hc2 = __vmalloc(hsize_bytes * 2, GFP_NOFS);
1170
1171        if (!hc2)
1172                return -ENOMEM;
1173
1174        h = hc2;
1175        error = gfs2_meta_inode_buffer(dip, &dibh);
1176        if (error)
1177                goto out_kfree;
1178
1179        for (x = 0; x < hsize; x++) {
1180                *h++ = *hc;
1181                *h++ = *hc;
1182                hc++;
1183        }
1184
1185        error = gfs2_dir_write_data(dip, (char *)hc2, 0, hsize_bytes * 2);
1186        if (error != (hsize_bytes * 2))
1187                goto fail;
1188
1189        gfs2_dir_hash_inval(dip);
1190        dip->i_hash_cache = hc2;
1191        dip->i_depth++;
1192        gfs2_dinode_out(dip, dibh->b_data);
1193        brelse(dibh);
1194        return 0;
1195
1196fail:
1197        /* Replace original hash table & size */
1198        gfs2_dir_write_data(dip, (char *)hc, 0, hsize_bytes);
1199        i_size_write(&dip->i_inode, hsize_bytes);
1200        gfs2_dinode_out(dip, dibh->b_data);
1201        brelse(dibh);
1202out_kfree:
1203        kvfree(hc2);
1204        return error;
1205}
1206
1207/**
1208 * compare_dents - compare directory entries by hash value
1209 * @a: first dent
1210 * @b: second dent
1211 *
1212 * When comparing the hash entries of @a to @b:
1213 *   gt: returns 1
1214 *   lt: returns -1
1215 *   eq: returns 0
1216 */
1217
1218static int compare_dents(const void *a, const void *b)
1219{
1220        const struct gfs2_dirent *dent_a, *dent_b;
1221        u32 hash_a, hash_b;
1222        int ret = 0;
1223
1224        dent_a = *(const struct gfs2_dirent **)a;
1225        hash_a = dent_a->de_cookie;
1226
1227        dent_b = *(const struct gfs2_dirent **)b;
1228        hash_b = dent_b->de_cookie;
1229
1230        if (hash_a > hash_b)
1231                ret = 1;
1232        else if (hash_a < hash_b)
1233                ret = -1;
1234        else {
1235                unsigned int len_a = be16_to_cpu(dent_a->de_name_len);
1236                unsigned int len_b = be16_to_cpu(dent_b->de_name_len);
1237
1238                if (len_a > len_b)
1239                        ret = 1;
1240                else if (len_a < len_b)
1241                        ret = -1;
1242                else
1243                        ret = memcmp(dent_a + 1, dent_b + 1, len_a);
1244        }
1245
1246        return ret;
1247}
1248
1249/**
1250 * do_filldir_main - read out directory entries
1251 * @dip: The GFS2 inode
1252 * @ctx: what to feed the entries to
1253 * @darr: an array of struct gfs2_dirent pointers to read
1254 * @entries: the number of entries in darr
1255 * @copied: pointer to int that's non-zero if a entry has been copied out
1256 *
1257 * Jump through some hoops to make sure that if there are hash collsions,
1258 * they are read out at the beginning of a buffer.  We want to minimize
1259 * the possibility that they will fall into different readdir buffers or
1260 * that someone will want to seek to that location.
1261 *
1262 * Returns: errno, >0 if the actor tells you to stop
1263 */
1264
1265static int do_filldir_main(struct gfs2_inode *dip, struct dir_context *ctx,
1266                           struct gfs2_dirent **darr, u32 entries,
1267                           u32 sort_start, int *copied)
1268{
1269        const struct gfs2_dirent *dent, *dent_next;
1270        u64 off, off_next;
1271        unsigned int x, y;
1272        int run = 0;
1273
1274        if (sort_start < entries)
1275                sort(&darr[sort_start], entries - sort_start,
1276                     sizeof(struct gfs2_dirent *), compare_dents, NULL);
1277
1278        dent_next = darr[0];
1279        off_next = dent_next->de_cookie;
1280
1281        for (x = 0, y = 1; x < entries; x++, y++) {
1282                dent = dent_next;
1283                off = off_next;
1284
1285                if (y < entries) {
1286                        dent_next = darr[y];
1287                        off_next = dent_next->de_cookie;
1288
1289                        if (off < ctx->pos)
1290                                continue;
1291                        ctx->pos = off;
1292
1293                        if (off_next == off) {
1294                                if (*copied && !run)
1295                                        return 1;
1296                                run = 1;
1297                        } else
1298                                run = 0;
1299                } else {
1300                        if (off < ctx->pos)
1301                                continue;
1302                        ctx->pos = off;
1303                }
1304
1305                if (!dir_emit(ctx, (const char *)(dent + 1),
1306                                be16_to_cpu(dent->de_name_len),
1307                                be64_to_cpu(dent->de_inum.no_addr),
1308                                be16_to_cpu(dent->de_type)))
1309                        return 1;
1310
1311                *copied = 1;
1312        }
1313
1314        /* Increment the ctx->pos by one, so the next time we come into the
1315           do_filldir fxn, we get the next entry instead of the last one in the
1316           current leaf */
1317
1318        ctx->pos++;
1319
1320        return 0;
1321}
1322
1323static void *gfs2_alloc_sort_buffer(unsigned size)
1324{
1325        void *ptr = NULL;
1326
1327        if (size < KMALLOC_MAX_SIZE)
1328                ptr = kmalloc(size, GFP_NOFS | __GFP_NOWARN);
1329        if (!ptr)
1330                ptr = __vmalloc(size, GFP_NOFS);
1331        return ptr;
1332}
1333
1334
1335static int gfs2_set_cookies(struct gfs2_sbd *sdp, struct buffer_head *bh,
1336                            unsigned leaf_nr, struct gfs2_dirent **darr,
1337                            unsigned entries)
1338{
1339        int sort_id = -1;
1340        int i;
1341        
1342        for (i = 0; i < entries; i++) {
1343                unsigned offset;
1344
1345                darr[i]->de_cookie = be32_to_cpu(darr[i]->de_hash);
1346                darr[i]->de_cookie = gfs2_disk_hash2offset(darr[i]->de_cookie);
1347
1348                if (!sdp->sd_args.ar_loccookie)
1349                        continue;
1350                offset = (char *)(darr[i]) -
1351                        (bh->b_data + gfs2_dirent_offset(sdp, bh->b_data));
1352                offset /= GFS2_MIN_DIRENT_SIZE;
1353                offset += leaf_nr * sdp->sd_max_dents_per_leaf;
1354                if (offset >= GFS2_USE_HASH_FLAG ||
1355                    leaf_nr >= GFS2_USE_HASH_FLAG) {
1356                        darr[i]->de_cookie |= GFS2_USE_HASH_FLAG;
1357                        if (sort_id < 0)
1358                                sort_id = i;
1359                        continue;
1360                }
1361                darr[i]->de_cookie &= GFS2_HASH_INDEX_MASK;
1362                darr[i]->de_cookie |= offset;
1363        }
1364        return sort_id;
1365}       
1366
1367
1368static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx,
1369                              int *copied, unsigned *depth,
1370                              u64 leaf_no)
1371{
1372        struct gfs2_inode *ip = GFS2_I(inode);
1373        struct gfs2_sbd *sdp = GFS2_SB(inode);
1374        struct buffer_head *bh;
1375        struct gfs2_leaf *lf;
1376        unsigned entries = 0, entries2 = 0;
1377        unsigned leaves = 0, leaf = 0, offset, sort_offset;
1378        struct gfs2_dirent **darr, *dent;
1379        struct dirent_gather g;
1380        struct buffer_head **larr;
1381        int error, i, need_sort = 0, sort_id;
1382        u64 lfn = leaf_no;
1383
1384        do {
1385                error = get_leaf(ip, lfn, &bh);
1386                if (error)
1387                        goto out;
1388                lf = (struct gfs2_leaf *)bh->b_data;
1389                if (leaves == 0)
1390                        *depth = be16_to_cpu(lf->lf_depth);
1391                entries += be16_to_cpu(lf->lf_entries);
1392                leaves++;
1393                lfn = be64_to_cpu(lf->lf_next);
1394                brelse(bh);
1395        } while(lfn);
1396
1397        if (*depth < GFS2_DIR_MAX_DEPTH || !sdp->sd_args.ar_loccookie) {
1398                need_sort = 1;
1399                sort_offset = 0;
1400        }
1401
1402        if (!entries)
1403                return 0;
1404
1405        error = -ENOMEM;
1406        /*
1407         * The extra 99 entries are not normally used, but are a buffer
1408         * zone in case the number of entries in the leaf is corrupt.
1409         * 99 is the maximum number of entries that can fit in a single
1410         * leaf block.
1411         */
1412        larr = gfs2_alloc_sort_buffer((leaves + entries + 99) * sizeof(void *));
1413        if (!larr)
1414                goto out;
1415        darr = (struct gfs2_dirent **)(larr + leaves);
1416        g.pdent = (const struct gfs2_dirent **)darr;
1417        g.offset = 0;
1418        lfn = leaf_no;
1419
1420        do {
1421                error = get_leaf(ip, lfn, &bh);
1422                if (error)
1423                        goto out_free;
1424                lf = (struct gfs2_leaf *)bh->b_data;
1425                lfn = be64_to_cpu(lf->lf_next);
1426                if (lf->lf_entries) {
1427                        offset = g.offset;
1428                        entries2 += be16_to_cpu(lf->lf_entries);
1429                        dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
1430                                                gfs2_dirent_gather, NULL, &g);
1431                        error = PTR_ERR(dent);
1432                        if (IS_ERR(dent))
1433                                goto out_free;
1434                        if (entries2 != g.offset) {
1435                                fs_warn(sdp, "Number of entries corrupt in dir "
1436                                                "leaf %llu, entries2 (%u) != "
1437                                                "g.offset (%u)\n",
1438                                        (unsigned long long)bh->b_blocknr,
1439                                        entries2, g.offset);
1440                                gfs2_consist_inode(ip);
1441                                error = -EIO;
1442                                goto out_free;
1443                        }
1444                        error = 0;
1445                        sort_id = gfs2_set_cookies(sdp, bh, leaf, &darr[offset],
1446                                                   be16_to_cpu(lf->lf_entries));
1447                        if (!need_sort && sort_id >= 0) {
1448                                need_sort = 1;
1449                                sort_offset = offset + sort_id;
1450                        }
1451                        larr[leaf++] = bh;
1452                } else {
1453                        larr[leaf++] = NULL;
1454                        brelse(bh);
1455                }
1456        } while(lfn);
1457
1458        BUG_ON(entries2 != entries);
1459        error = do_filldir_main(ip, ctx, darr, entries, need_sort ?
1460                                sort_offset : entries, copied);
1461out_free:
1462        for(i = 0; i < leaf; i++)
1463                brelse(larr[i]);
1464        kvfree(larr);
1465out:
1466        return error;
1467}
1468
1469/**
1470 * gfs2_dir_readahead - Issue read-ahead requests for leaf blocks.
1471 *
1472 * Note: we can't calculate each index like dir_e_read can because we don't
1473 * have the leaf, and therefore we don't have the depth, and therefore we
1474 * don't have the length. So we have to just read enough ahead to make up
1475 * for the loss of information.
1476 */
1477static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index,
1478                               struct file_ra_state *f_ra)
1479{
1480        struct gfs2_inode *ip = GFS2_I(inode);
1481        struct gfs2_glock *gl = ip->i_gl;
1482        struct buffer_head *bh;
1483        u64 blocknr = 0, last;
1484        unsigned count;
1485
1486        /* First check if we've already read-ahead for the whole range. */
1487        if (index + MAX_RA_BLOCKS < f_ra->start)
1488                return;
1489
1490        f_ra->start = max((pgoff_t)index, f_ra->start);
1491        for (count = 0; count < MAX_RA_BLOCKS; count++) {
1492                if (f_ra->start >= hsize) /* if exceeded the hash table */
1493                        break;
1494
1495                last = blocknr;
1496                blocknr = be64_to_cpu(ip->i_hash_cache[f_ra->start]);
1497                f_ra->start++;
1498                if (blocknr == last)
1499                        continue;
1500
1501                bh = gfs2_getbuf(gl, blocknr, 1);
1502                if (trylock_buffer(bh)) {
1503                        if (buffer_uptodate(bh)) {
1504                                unlock_buffer(bh);
1505                                brelse(bh);
1506                                continue;
1507                        }
1508                        bh->b_end_io = end_buffer_read_sync;
1509                        submit_bh(REQ_OP_READ,
1510                                  REQ_RAHEAD | REQ_META | REQ_PRIO,
1511                                  bh);
1512                        continue;
1513                }
1514                brelse(bh);
1515        }
1516}
1517
1518/**
1519 * dir_e_read - Reads the entries from a directory into a filldir buffer
1520 * @dip: dinode pointer
1521 * @ctx: actor to feed the entries to
1522 *
1523 * Returns: errno
1524 */
1525
1526static int dir_e_read(struct inode *inode, struct dir_context *ctx,
1527                      struct file_ra_state *f_ra)
1528{
1529        struct gfs2_inode *dip = GFS2_I(inode);
1530        u32 hsize, len = 0;
1531        u32 hash, index;
1532        __be64 *lp;
1533        int copied = 0;
1534        int error = 0;
1535        unsigned depth = 0;
1536
1537        hsize = BIT(dip->i_depth);
1538        hash = gfs2_dir_offset2hash(ctx->pos);
1539        index = hash >> (32 - dip->i_depth);
1540
1541        if (dip->i_hash_cache == NULL)
1542                f_ra->start = 0;
1543        lp = gfs2_dir_get_hash_table(dip);
1544        if (IS_ERR(lp))
1545                return PTR_ERR(lp);
1546
1547        gfs2_dir_readahead(inode, hsize, index, f_ra);
1548
1549        while (index < hsize) {
1550                error = gfs2_dir_read_leaf(inode, ctx,
1551                                           &copied, &depth,
1552                                           be64_to_cpu(lp[index]));
1553                if (error)
1554                        break;
1555
1556                len = BIT(dip->i_depth - depth);
1557                index = (index & ~(len - 1)) + len;
1558        }
1559
1560        if (error > 0)
1561                error = 0;
1562        return error;
1563}
1564
1565int gfs2_dir_read(struct inode *inode, struct dir_context *ctx,
1566                  struct file_ra_state *f_ra)
1567{
1568        struct gfs2_inode *dip = GFS2_I(inode);
1569        struct gfs2_sbd *sdp = GFS2_SB(inode);
1570        struct dirent_gather g;
1571        struct gfs2_dirent **darr, *dent;
1572        struct buffer_head *dibh;
1573        int copied = 0;
1574        int error;
1575
1576        if (!dip->i_entries)
1577                return 0;
1578
1579        if (dip->i_diskflags & GFS2_DIF_EXHASH)
1580                return dir_e_read(inode, ctx, f_ra);
1581
1582        if (!gfs2_is_stuffed(dip)) {
1583                gfs2_consist_inode(dip);
1584                return -EIO;
1585        }
1586
1587        error = gfs2_meta_inode_buffer(dip, &dibh);
1588        if (error)
1589                return error;
1590
1591        error = -ENOMEM;
1592        /* 96 is max number of dirents which can be stuffed into an inode */
1593        darr = kmalloc_array(96, sizeof(struct gfs2_dirent *), GFP_NOFS);
1594        if (darr) {
1595                g.pdent = (const struct gfs2_dirent **)darr;
1596                g.offset = 0;
1597                dent = gfs2_dirent_scan(inode, dibh->b_data, dibh->b_size,
1598                                        gfs2_dirent_gather, NULL, &g);
1599                if (IS_ERR(dent)) {
1600                        error = PTR_ERR(dent);
1601                        goto out;
1602                }
1603                if (dip->i_entries != g.offset) {
1604                        fs_warn(sdp, "Number of entries corrupt in dir %llu, "
1605                                "ip->i_entries (%u) != g.offset (%u)\n",
1606                                (unsigned long long)dip->i_no_addr,
1607                                dip->i_entries,
1608                                g.offset);
1609                        gfs2_consist_inode(dip);
1610                        error = -EIO;
1611                        goto out;
1612                }
1613                gfs2_set_cookies(sdp, dibh, 0, darr, dip->i_entries);
1614                error = do_filldir_main(dip, ctx, darr,
1615                                        dip->i_entries, 0, &copied);
1616out:
1617                kfree(darr);
1618        }
1619
1620        if (error > 0)
1621                error = 0;
1622
1623        brelse(dibh);
1624
1625        return error;
1626}
1627
1628/**
1629 * gfs2_dir_search - Search a directory
1630 * @dip: The GFS2 dir inode
1631 * @name: The name we are looking up
1632 * @fail_on_exist: Fail if the name exists rather than looking it up
1633 *
1634 * This routine searches a directory for a file or another directory.
1635 * Assumes a glock is held on dip.
1636 *
1637 * Returns: errno
1638 */
1639
1640struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name,
1641                              bool fail_on_exist)
1642{
1643        struct buffer_head *bh;
1644        struct gfs2_dirent *dent;
1645        u64 addr, formal_ino;
1646        u16 dtype;
1647
1648        dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
1649        if (dent) {
1650                struct inode *inode;
1651                u16 rahead;
1652
1653                if (IS_ERR(dent))
1654                        return ERR_CAST(dent);
1655                dtype = be16_to_cpu(dent->de_type);
1656                rahead = be16_to_cpu(dent->de_rahead);
1657                addr = be64_to_cpu(dent->de_inum.no_addr);
1658                formal_ino = be64_to_cpu(dent->de_inum.no_formal_ino);
1659                brelse(bh);
1660                if (fail_on_exist)
1661                        return ERR_PTR(-EEXIST);
1662                inode = gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino,
1663                                          GFS2_BLKST_FREE /* ignore */);
1664                if (!IS_ERR(inode))
1665                        GFS2_I(inode)->i_rahead = rahead;
1666                return inode;
1667        }
1668        return ERR_PTR(-ENOENT);
1669}
1670
1671int gfs2_dir_check(struct inode *dir, const struct qstr *name,
1672                   const struct gfs2_inode *ip)
1673{
1674        struct buffer_head *bh;
1675        struct gfs2_dirent *dent;
1676        int ret = -ENOENT;
1677
1678        dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
1679        if (dent) {
1680                if (IS_ERR(dent))
1681                        return PTR_ERR(dent);
1682                if (ip) {
1683                        if (be64_to_cpu(dent->de_inum.no_addr) != ip->i_no_addr)
1684                                goto out;
1685                        if (be64_to_cpu(dent->de_inum.no_formal_ino) !=
1686                            ip->i_no_formal_ino)
1687                                goto out;
1688                        if (unlikely(IF2DT(ip->i_inode.i_mode) !=
1689                            be16_to_cpu(dent->de_type))) {
1690                                gfs2_consist_inode(GFS2_I(dir));
1691                                ret = -EIO;
1692                                goto out;
1693                        }
1694                }
1695                ret = 0;
1696out:
1697                brelse(bh);
1698        }
1699        return ret;
1700}
1701
1702/**
1703 * dir_new_leaf - Add a new leaf onto hash chain
1704 * @inode: The directory
1705 * @name: The name we are adding
1706 *
1707 * This adds a new dir leaf onto an existing leaf when there is not
1708 * enough space to add a new dir entry. This is a last resort after
1709 * we've expanded the hash table to max size and also split existing
1710 * leaf blocks, so it will only occur for very large directories.
1711 *
1712 * The dist parameter is set to 1 for leaf blocks directly attached
1713 * to the hash table, 2 for one layer of indirection, 3 for two layers
1714 * etc. We are thus able to tell the difference between an old leaf
1715 * with dist set to zero (i.e. "don't know") and a new one where we
1716 * set this information for debug/fsck purposes.
1717 *
1718 * Returns: 0 on success, or -ve on error
1719 */
1720
1721static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1722{
1723        struct buffer_head *bh, *obh;
1724        struct gfs2_inode *ip = GFS2_I(inode);
1725        struct gfs2_leaf *leaf, *oleaf;
1726        u32 dist = 1;
1727        int error;
1728        u32 index;
1729        u64 bn;
1730
1731        index = name->hash >> (32 - ip->i_depth);
1732        error = get_first_leaf(ip, index, &obh);
1733        if (error)
1734                return error;
1735        do {
1736                dist++;
1737                oleaf = (struct gfs2_leaf *)obh->b_data;
1738                bn = be64_to_cpu(oleaf->lf_next);
1739                if (!bn)
1740                        break;
1741                brelse(obh);
1742                error = get_leaf(ip, bn, &obh);
1743                if (error)
1744                        return error;
1745        } while(1);
1746
1747        gfs2_trans_add_meta(ip->i_gl, obh);
1748
1749        leaf = new_leaf(inode, &bh, be16_to_cpu(oleaf->lf_depth));
1750        if (!leaf) {
1751                brelse(obh);
1752                return -ENOSPC;
1753        }
1754        leaf->lf_dist = cpu_to_be32(dist);
1755        oleaf->lf_next = cpu_to_be64(bh->b_blocknr);
1756        brelse(bh);
1757        brelse(obh);
1758
1759        error = gfs2_meta_inode_buffer(ip, &bh);
1760        if (error)
1761                return error;
1762        gfs2_trans_add_meta(ip->i_gl, bh);
1763        gfs2_add_inode_blocks(&ip->i_inode, 1);
1764        gfs2_dinode_out(ip, bh->b_data);
1765        brelse(bh);
1766        return 0;
1767}
1768
1769static u16 gfs2_inode_ra_len(const struct gfs2_inode *ip)
1770{
1771        u64 where = ip->i_no_addr + 1;
1772        if (ip->i_eattr == where)
1773                return 1;
1774        return 0;
1775}
1776
1777/**
1778 * gfs2_dir_add - Add new filename into directory
1779 * @inode: The directory inode
1780 * @name: The new name
1781 * @nip: The GFS2 inode to be linked in to the directory
1782 * @da: The directory addition info
1783 *
1784 * If the call to gfs2_diradd_alloc_required resulted in there being
1785 * no need to allocate any new directory blocks, then it will contain
1786 * a pointer to the directory entry and the bh in which it resides. We
1787 * can use that without having to repeat the search. If there was no
1788 * free space, then we must now create more space.
1789 *
1790 * Returns: 0 on success, error code on failure
1791 */
1792
1793int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1794                 const struct gfs2_inode *nip, struct gfs2_diradd *da)
1795{
1796        struct gfs2_inode *ip = GFS2_I(inode);
1797        struct buffer_head *bh = da->bh;
1798        struct gfs2_dirent *dent = da->dent;
1799        struct timespec64 tv;
1800        struct gfs2_leaf *leaf;
1801        int error;
1802
1803        while(1) {
1804                if (da->bh == NULL) {
1805                        dent = gfs2_dirent_search(inode, name,
1806                                                  gfs2_dirent_find_space, &bh);
1807                }
1808                if (dent) {
1809                        if (IS_ERR(dent))
1810                                return PTR_ERR(dent);
1811                        dent = gfs2_init_dirent(inode, dent, name, bh);
1812                        gfs2_inum_out(nip, dent);
1813                        dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode));
1814                        dent->de_rahead = cpu_to_be16(gfs2_inode_ra_len(nip));
1815                        tv = current_time(&ip->i_inode);
1816                        if (ip->i_diskflags & GFS2_DIF_EXHASH) {
1817                                leaf = (struct gfs2_leaf *)bh->b_data;
1818                                be16_add_cpu(&leaf->lf_entries, 1);
1819                                leaf->lf_nsec = cpu_to_be32(tv.tv_nsec);
1820                                leaf->lf_sec = cpu_to_be64(tv.tv_sec);
1821                        }
1822                        da->dent = NULL;
1823                        da->bh = NULL;
1824                        brelse(bh);
1825                        ip->i_entries++;
1826                        ip->i_inode.i_mtime = ip->i_inode.i_ctime = tv;
1827                        if (S_ISDIR(nip->i_inode.i_mode))
1828                                inc_nlink(&ip->i_inode);
1829                        mark_inode_dirty(inode);
1830                        error = 0;
1831                        break;
1832                }
1833                if (!(ip->i_diskflags & GFS2_DIF_EXHASH)) {
1834                        error = dir_make_exhash(inode);
1835                        if (error)
1836                                break;
1837                        continue;
1838                }
1839                error = dir_split_leaf(inode, name);
1840                if (error == 0)
1841                        continue;
1842                if (error < 0)
1843                        break;
1844                if (ip->i_depth < GFS2_DIR_MAX_DEPTH) {
1845                        error = dir_double_exhash(ip);
1846                        if (error)
1847                                break;
1848                        error = dir_split_leaf(inode, name);
1849                        if (error < 0)
1850                                break;
1851                        if (error == 0)
1852                                continue;
1853                }
1854                error = dir_new_leaf(inode, name);
1855                if (!error)
1856                        continue;
1857                error = -ENOSPC;
1858                break;
1859        }
1860        return error;
1861}
1862
1863
1864/**
1865 * gfs2_dir_del - Delete a directory entry
1866 * @dip: The GFS2 inode
1867 * @filename: The filename
1868 *
1869 * Returns: 0 on success, error code on failure
1870 */
1871
1872int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
1873{
1874        const struct qstr *name = &dentry->d_name;
1875        struct gfs2_dirent *dent, *prev = NULL;
1876        struct buffer_head *bh;
1877        struct timespec64 tv = current_time(&dip->i_inode);
1878
1879        /* Returns _either_ the entry (if its first in block) or the
1880           previous entry otherwise */
1881        dent = gfs2_dirent_search(&dip->i_inode, name, gfs2_dirent_prev, &bh);
1882        if (!dent) {
1883                gfs2_consist_inode(dip);
1884                return -EIO;
1885        }
1886        if (IS_ERR(dent)) {
1887                gfs2_consist_inode(dip);
1888                return PTR_ERR(dent);
1889        }
1890        /* If not first in block, adjust pointers accordingly */
1891        if (gfs2_dirent_find(dent, name, NULL) == 0) {
1892                prev = dent;
1893                dent = (struct gfs2_dirent *)((char *)dent + be16_to_cpu(prev->de_rec_len));
1894        }
1895
1896        dirent_del(dip, bh, prev, dent);
1897        if (dip->i_diskflags & GFS2_DIF_EXHASH) {
1898                struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
1899                u16 entries = be16_to_cpu(leaf->lf_entries);
1900                if (!entries)
1901                        gfs2_consist_inode(dip);
1902                leaf->lf_entries = cpu_to_be16(--entries);
1903                leaf->lf_nsec = cpu_to_be32(tv.tv_nsec);
1904                leaf->lf_sec = cpu_to_be64(tv.tv_sec);
1905        }
1906        brelse(bh);
1907
1908        if (!dip->i_entries)
1909                gfs2_consist_inode(dip);
1910        dip->i_entries--;
1911        dip->i_inode.i_mtime = dip->i_inode.i_ctime = tv;
1912        if (d_is_dir(dentry))
1913                drop_nlink(&dip->i_inode);
1914        mark_inode_dirty(&dip->i_inode);
1915
1916        return 0;
1917}
1918
1919/**
1920 * gfs2_dir_mvino - Change inode number of directory entry
1921 * @dip: The GFS2 inode
1922 * @filename:
1923 * @new_inode:
1924 *
1925 * This routine changes the inode number of a directory entry.  It's used
1926 * by rename to change ".." when a directory is moved.
1927 * Assumes a glock is held on dvp.
1928 *
1929 * Returns: errno
1930 */
1931
1932int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1933                   const struct gfs2_inode *nip, unsigned int new_type)
1934{
1935        struct buffer_head *bh;
1936        struct gfs2_dirent *dent;
1937
1938        dent = gfs2_dirent_search(&dip->i_inode, filename, gfs2_dirent_find, &bh);
1939        if (!dent) {
1940                gfs2_consist_inode(dip);
1941                return -EIO;
1942        }
1943        if (IS_ERR(dent))
1944                return PTR_ERR(dent);
1945
1946        gfs2_trans_add_meta(dip->i_gl, bh);
1947        gfs2_inum_out(nip, dent);
1948        dent->de_type = cpu_to_be16(new_type);
1949        brelse(bh);
1950
1951        dip->i_inode.i_mtime = dip->i_inode.i_ctime = current_time(&dip->i_inode);
1952        mark_inode_dirty_sync(&dip->i_inode);
1953        return 0;
1954}
1955
1956/**
1957 * leaf_dealloc - Deallocate a directory leaf
1958 * @dip: the directory
1959 * @index: the hash table offset in the directory
1960 * @len: the number of pointers to this leaf
1961 * @leaf_no: the leaf number
1962 * @leaf_bh: buffer_head for the starting leaf
1963 * last_dealloc: 1 if this is the final dealloc for the leaf, else 0
1964 *
1965 * Returns: errno
1966 */
1967
1968static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1969                        u64 leaf_no, struct buffer_head *leaf_bh,
1970                        int last_dealloc)
1971{
1972        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1973        struct gfs2_leaf *tmp_leaf;
1974        struct gfs2_rgrp_list rlist;
1975        struct buffer_head *bh, *dibh;
1976        u64 blk, nblk;
1977        unsigned int rg_blocks = 0, l_blocks = 0;
1978        char *ht;
1979        unsigned int x, size = len * sizeof(u64);
1980        int error;
1981
1982        error = gfs2_rindex_update(sdp);
1983        if (error)
1984                return error;
1985
1986        memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
1987
1988        ht = kzalloc(size, GFP_NOFS | __GFP_NOWARN);
1989        if (ht == NULL)
1990                ht = __vmalloc(size, GFP_NOFS | __GFP_NOWARN | __GFP_ZERO);
1991        if (!ht)
1992                return -ENOMEM;
1993
1994        error = gfs2_quota_hold(dip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1995        if (error)
1996                goto out;
1997
1998        /*  Count the number of leaves  */
1999        bh = leaf_bh;
2000
2001        for (blk = leaf_no; blk; blk = nblk) {
2002                if (blk != leaf_no) {
2003                        error = get_leaf(dip, blk, &bh);
2004                        if (error)
2005                                goto out_rlist;
2006                }
2007                tmp_leaf = (struct gfs2_leaf *)bh->b_data;
2008                nblk = be64_to_cpu(tmp_leaf->lf_next);
2009                if (blk != leaf_no)
2010                        brelse(bh);
2011
2012                gfs2_rlist_add(dip, &rlist, blk);
2013                l_blocks++;
2014        }
2015
2016        gfs2_rlist_alloc(&rlist);
2017
2018        for (x = 0; x < rlist.rl_rgrps; x++) {
2019                struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(rlist.rl_ghs[x].gh_gl);
2020
2021                rg_blocks += rgd->rd_length;
2022        }
2023
2024        error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
2025        if (error)
2026                goto out_rlist;
2027
2028        error = gfs2_trans_begin(sdp,
2029                        rg_blocks + (DIV_ROUND_UP(size, sdp->sd_jbsize) + 1) +
2030                        RES_DINODE + RES_STATFS + RES_QUOTA, RES_DINODE +
2031                                 l_blocks);
2032        if (error)
2033                goto out_rg_gunlock;
2034
2035        bh = leaf_bh;
2036
2037        for (blk = leaf_no; blk; blk = nblk) {
2038                struct gfs2_rgrpd *rgd;
2039
2040                if (blk != leaf_no) {
2041                        error = get_leaf(dip, blk, &bh);
2042                        if (error)
2043                                goto out_end_trans;
2044                }
2045                tmp_leaf = (struct gfs2_leaf *)bh->b_data;
2046                nblk = be64_to_cpu(tmp_leaf->lf_next);
2047                if (blk != leaf_no)
2048                        brelse(bh);
2049
2050                rgd = gfs2_blk2rgrpd(sdp, blk, true);
2051                gfs2_free_meta(dip, rgd, blk, 1);
2052                gfs2_add_inode_blocks(&dip->i_inode, -1);
2053        }
2054
2055        error = gfs2_dir_write_data(dip, ht, index * sizeof(u64), size);
2056        if (error != size) {
2057                if (error >= 0)
2058                        error = -EIO;
2059                goto out_end_trans;
2060        }
2061
2062        error = gfs2_meta_inode_buffer(dip, &dibh);
2063        if (error)
2064                goto out_end_trans;
2065
2066        gfs2_trans_add_meta(dip->i_gl, dibh);
2067        /* On the last dealloc, make this a regular file in case we crash.
2068           (We don't want to free these blocks a second time.)  */
2069        if (last_dealloc)
2070                dip->i_inode.i_mode = S_IFREG;
2071        gfs2_dinode_out(dip, dibh->b_data);
2072        brelse(dibh);
2073
2074out_end_trans:
2075        gfs2_trans_end(sdp);
2076out_rg_gunlock:
2077        gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
2078out_rlist:
2079        gfs2_rlist_free(&rlist);
2080        gfs2_quota_unhold(dip);
2081out:
2082        kvfree(ht);
2083        return error;
2084}
2085
2086/**
2087 * gfs2_dir_exhash_dealloc - free all the leaf blocks in a directory
2088 * @dip: the directory
2089 *
2090 * Dealloc all on-disk directory leaves to FREEMETA state
2091 * Change on-disk inode type to "regular file"
2092 *
2093 * Returns: errno
2094 */
2095
2096int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
2097{
2098        struct buffer_head *bh;
2099        struct gfs2_leaf *leaf;
2100        u32 hsize, len;
2101        u32 index = 0, next_index;
2102        __be64 *lp;
2103        u64 leaf_no;
2104        int error = 0, last;
2105
2106        hsize = BIT(dip->i_depth);
2107
2108        lp = gfs2_dir_get_hash_table(dip);
2109        if (IS_ERR(lp))
2110                return PTR_ERR(lp);
2111
2112        while (index < hsize) {
2113                leaf_no = be64_to_cpu(lp[index]);
2114                if (leaf_no) {
2115                        error = get_leaf(dip, leaf_no, &bh);
2116                        if (error)
2117                                goto out;
2118                        leaf = (struct gfs2_leaf *)bh->b_data;
2119                        len = BIT(dip->i_depth - be16_to_cpu(leaf->lf_depth));
2120
2121                        next_index = (index & ~(len - 1)) + len;
2122                        last = ((next_index >= hsize) ? 1 : 0);
2123                        error = leaf_dealloc(dip, index, len, leaf_no, bh,
2124                                             last);
2125                        brelse(bh);
2126                        if (error)
2127                                goto out;
2128                        index = next_index;
2129                } else
2130                        index++;
2131        }
2132
2133        if (index != hsize) {
2134                gfs2_consist_inode(dip);
2135                error = -EIO;
2136        }
2137
2138out:
2139
2140        return error;
2141}
2142
2143/**
2144 * gfs2_diradd_alloc_required - find if adding entry will require an allocation
2145 * @ip: the file being written to
2146 * @filname: the filename that's going to be added
2147 * @da: The structure to return dir alloc info
2148 *
2149 * Returns: 0 if ok, -ve on error
2150 */
2151
2152int gfs2_diradd_alloc_required(struct inode *inode, const struct qstr *name,
2153                               struct gfs2_diradd *da)
2154{
2155        struct gfs2_inode *ip = GFS2_I(inode);
2156        struct gfs2_sbd *sdp = GFS2_SB(inode);
2157        const unsigned int extra = sizeof(struct gfs2_dinode) - sizeof(struct gfs2_leaf);
2158        struct gfs2_dirent *dent;
2159        struct buffer_head *bh;
2160
2161        da->nr_blocks = 0;
2162        da->bh = NULL;
2163        da->dent = NULL;
2164
2165        dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, &bh);
2166        if (!dent) {
2167                da->nr_blocks = sdp->sd_max_dirres;
2168                if (!(ip->i_diskflags & GFS2_DIF_EXHASH) &&
2169                    (GFS2_DIRENT_SIZE(name->len) < extra))
2170                        da->nr_blocks = 1;
2171                return 0;
2172        }
2173        if (IS_ERR(dent))
2174                return PTR_ERR(dent);
2175
2176        if (da->save_loc) {
2177                da->bh = bh;
2178                da->dent = dent;
2179        } else {
2180                brelse(bh);
2181        }
2182        return 0;
2183}
2184
2185