linux/fs/gfs2/rgrp.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   3 * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
   4 *
   5 * This copyrighted material is made available to anyone wishing to use,
   6 * modify, copy, or redistribute it subject to the terms and conditions
   7 * of the GNU General Public License version 2.
   8 */
   9
  10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  11
  12#include <linux/slab.h>
  13#include <linux/spinlock.h>
  14#include <linux/completion.h>
  15#include <linux/buffer_head.h>
  16#include <linux/fs.h>
  17#include <linux/gfs2_ondisk.h>
  18#include <linux/prefetch.h>
  19#include <linux/blkdev.h>
  20#include <linux/rbtree.h>
  21#include <linux/random.h>
  22
  23#include "gfs2.h"
  24#include "incore.h"
  25#include "glock.h"
  26#include "glops.h"
  27#include "lops.h"
  28#include "meta_io.h"
  29#include "quota.h"
  30#include "rgrp.h"
  31#include "super.h"
  32#include "trans.h"
  33#include "util.h"
  34#include "log.h"
  35#include "inode.h"
  36#include "trace_gfs2.h"
  37
  38#define BFITNOENT ((u32)~0)
  39#define NO_BLOCK ((u64)~0)
  40
  41#if BITS_PER_LONG == 32
  42#define LBITMASK   (0x55555555UL)
  43#define LBITSKIP55 (0x55555555UL)
  44#define LBITSKIP00 (0x00000000UL)
  45#else
  46#define LBITMASK   (0x5555555555555555UL)
  47#define LBITSKIP55 (0x5555555555555555UL)
  48#define LBITSKIP00 (0x0000000000000000UL)
  49#endif
  50
  51/*
  52 * These routines are used by the resource group routines (rgrp.c)
  53 * to keep track of block allocation.  Each block is represented by two
  54 * bits.  So, each byte represents GFS2_NBBY (i.e. 4) blocks.
  55 *
  56 * 0 = Free
  57 * 1 = Used (not metadata)
  58 * 2 = Unlinked (still in use) inode
  59 * 3 = Used (metadata)
  60 */
  61
  62struct gfs2_extent {
  63        struct gfs2_rbm rbm;
  64        u32 len;
  65};
  66
  67static const char valid_change[16] = {
  68                /* current */
  69        /* n */ 0, 1, 1, 1,
  70        /* e */ 1, 0, 0, 0,
  71        /* w */ 0, 0, 0, 1,
  72                1, 0, 0, 0
  73};
  74
  75static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
  76                         const struct gfs2_inode *ip, bool nowrap);
  77
  78
  79/**
  80 * gfs2_setbit - Set a bit in the bitmaps
  81 * @rbm: The position of the bit to set
  82 * @do_clone: Also set the clone bitmap, if it exists
  83 * @new_state: the new state of the block
  84 *
  85 */
  86
  87static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
  88                               unsigned char new_state)
  89{
  90        unsigned char *byte1, *byte2, *end, cur_state;
  91        struct gfs2_bitmap *bi = rbm_bi(rbm);
  92        unsigned int buflen = bi->bi_len;
  93        const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
  94
  95        byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY);
  96        end = bi->bi_bh->b_data + bi->bi_offset + buflen;
  97
  98        BUG_ON(byte1 >= end);
  99
 100        cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
 101
 102        if (unlikely(!valid_change[new_state * 4 + cur_state])) {
 103                pr_warn("buf_blk = 0x%x old_state=%d, new_state=%d\n",
 104                        rbm->offset, cur_state, new_state);
 105                pr_warn("rgrp=0x%llx bi_start=0x%x\n",
 106                        (unsigned long long)rbm->rgd->rd_addr, bi->bi_start);
 107                pr_warn("bi_offset=0x%x bi_len=0x%x\n",
 108                        bi->bi_offset, bi->bi_len);
 109                dump_stack();
 110                gfs2_consist_rgrpd(rbm->rgd);
 111                return;
 112        }
 113        *byte1 ^= (cur_state ^ new_state) << bit;
 114
 115        if (do_clone && bi->bi_clone) {
 116                byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY);
 117                cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
 118                *byte2 ^= (cur_state ^ new_state) << bit;
 119        }
 120}
 121
 122/**
 123 * gfs2_testbit - test a bit in the bitmaps
 124 * @rbm: The bit to test
 125 *
 126 * Returns: The two bit block state of the requested bit
 127 */
 128
 129static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)
 130{
 131        struct gfs2_bitmap *bi = rbm_bi(rbm);
 132        const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset;
 133        const u8 *byte;
 134        unsigned int bit;
 135
 136        byte = buffer + (rbm->offset / GFS2_NBBY);
 137        bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
 138
 139        return (*byte >> bit) & GFS2_BIT_MASK;
 140}
 141
 142/**
 143 * gfs2_bit_search
 144 * @ptr: Pointer to bitmap data
 145 * @mask: Mask to use (normally 0x55555.... but adjusted for search start)
 146 * @state: The state we are searching for
 147 *
 148 * We xor the bitmap data with a patter which is the bitwise opposite
 149 * of what we are looking for, this gives rise to a pattern of ones
 150 * wherever there is a match. Since we have two bits per entry, we
 151 * take this pattern, shift it down by one place and then and it with
 152 * the original. All the even bit positions (0,2,4, etc) then represent
 153 * successful matches, so we mask with 0x55555..... to remove the unwanted
 154 * odd bit positions.
 155 *
 156 * This allows searching of a whole u64 at once (32 blocks) with a
 157 * single test (on 64 bit arches).
 158 */
 159
 160static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
 161{
 162        u64 tmp;
 163        static const u64 search[] = {
 164                [0] = 0xffffffffffffffffULL,
 165                [1] = 0xaaaaaaaaaaaaaaaaULL,
 166                [2] = 0x5555555555555555ULL,
 167                [3] = 0x0000000000000000ULL,
 168        };
 169        tmp = le64_to_cpu(*ptr) ^ search[state];
 170        tmp &= (tmp >> 1);
 171        tmp &= mask;
 172        return tmp;
 173}
 174
 175/**
 176 * rs_cmp - multi-block reservation range compare
 177 * @blk: absolute file system block number of the new reservation
 178 * @len: number of blocks in the new reservation
 179 * @rs: existing reservation to compare against
 180 *
 181 * returns: 1 if the block range is beyond the reach of the reservation
 182 *         -1 if the block range is before the start of the reservation
 183 *          0 if the block range overlaps with the reservation
 184 */
 185static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
 186{
 187        u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
 188
 189        if (blk >= startblk + rs->rs_free)
 190                return 1;
 191        if (blk + len - 1 < startblk)
 192                return -1;
 193        return 0;
 194}
 195
 196/**
 197 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
 198 *       a block in a given allocation state.
 199 * @buf: the buffer that holds the bitmaps
 200 * @len: the length (in bytes) of the buffer
 201 * @goal: start search at this block's bit-pair (within @buffer)
 202 * @state: GFS2_BLKST_XXX the state of the block we're looking for.
 203 *
 204 * Scope of @goal and returned block number is only within this bitmap buffer,
 205 * not entire rgrp or filesystem.  @buffer will be offset from the actual
 206 * beginning of a bitmap block buffer, skipping any header structures, but
 207 * headers are always a multiple of 64 bits long so that the buffer is
 208 * always aligned to a 64 bit boundary.
 209 *
 210 * The size of the buffer is in bytes, but is it assumed that it is
 211 * always ok to read a complete multiple of 64 bits at the end
 212 * of the block in case the end is no aligned to a natural boundary.
 213 *
 214 * Return: the block number (bitmap buffer scope) that was found
 215 */
 216
 217static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
 218                       u32 goal, u8 state)
 219{
 220        u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
 221        const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
 222        const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
 223        u64 tmp;
 224        u64 mask = 0x5555555555555555ULL;
 225        u32 bit;
 226
 227        /* Mask off bits we don't care about at the start of the search */
 228        mask <<= spoint;
 229        tmp = gfs2_bit_search(ptr, mask, state);
 230        ptr++;
 231        while(tmp == 0 && ptr < end) {
 232                tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
 233                ptr++;
 234        }
 235        /* Mask off any bits which are more than len bytes from the start */
 236        if (ptr == end && (len & (sizeof(u64) - 1)))
 237                tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1))));
 238        /* Didn't find anything, so return */
 239        if (tmp == 0)
 240                return BFITNOENT;
 241        ptr--;
 242        bit = __ffs64(tmp);
 243        bit /= 2;       /* two bits per entry in the bitmap */
 244        return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
 245}
 246
 247/**
 248 * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
 249 * @rbm: The rbm with rgd already set correctly
 250 * @block: The block number (filesystem relative)
 251 *
 252 * This sets the bi and offset members of an rbm based on a
 253 * resource group and a filesystem relative block number. The
 254 * resource group must be set in the rbm on entry, the bi and
 255 * offset members will be set by this function.
 256 *
 257 * Returns: 0 on success, or an error code
 258 */
 259
 260static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
 261{
 262        u64 rblock = block - rbm->rgd->rd_data0;
 263
 264        if (WARN_ON_ONCE(rblock > UINT_MAX))
 265                return -EINVAL;
 266        if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
 267                return -E2BIG;
 268
 269        rbm->bii = 0;
 270        rbm->offset = (u32)(rblock);
 271        /* Check if the block is within the first block */
 272        if (rbm->offset < rbm_bi(rbm)->bi_blocks)
 273                return 0;
 274
 275        /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */
 276        rbm->offset += (sizeof(struct gfs2_rgrp) -
 277                        sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
 278        rbm->bii = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
 279        rbm->offset -= rbm->bii * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
 280        return 0;
 281}
 282
 283/**
 284 * gfs2_rbm_incr - increment an rbm structure
 285 * @rbm: The rbm with rgd already set correctly
 286 *
 287 * This function takes an existing rbm structure and increments it to the next
 288 * viable block offset.
 289 *
 290 * Returns: If incrementing the offset would cause the rbm to go past the
 291 *          end of the rgrp, true is returned, otherwise false.
 292 *
 293 */
 294
 295static bool gfs2_rbm_incr(struct gfs2_rbm *rbm)
 296{
 297        if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */
 298                rbm->offset++;
 299                return false;
 300        }
 301        if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */
 302                return true;
 303
 304        rbm->offset = 0;
 305        rbm->bii++;
 306        return false;
 307}
 308
 309/**
 310 * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
 311 * @rbm: Position to search (value/result)
 312 * @n_unaligned: Number of unaligned blocks to check
 313 * @len: Decremented for each block found (terminate on zero)
 314 *
 315 * Returns: true if a non-free block is encountered
 316 */
 317
 318static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
 319{
 320        u32 n;
 321        u8 res;
 322
 323        for (n = 0; n < n_unaligned; n++) {
 324                res = gfs2_testbit(rbm);
 325                if (res != GFS2_BLKST_FREE)
 326                        return true;
 327                (*len)--;
 328                if (*len == 0)
 329                        return true;
 330                if (gfs2_rbm_incr(rbm))
 331                        return true;
 332        }
 333
 334        return false;
 335}
 336
 337/**
 338 * gfs2_free_extlen - Return extent length of free blocks
 339 * @rrbm: Starting position
 340 * @len: Max length to check
 341 *
 342 * Starting at the block specified by the rbm, see how many free blocks
 343 * there are, not reading more than len blocks ahead. This can be done
 344 * using memchr_inv when the blocks are byte aligned, but has to be done
 345 * on a block by block basis in case of unaligned blocks. Also this
 346 * function can cope with bitmap boundaries (although it must stop on
 347 * a resource group boundary)
 348 *
 349 * Returns: Number of free blocks in the extent
 350 */
 351
 352static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
 353{
 354        struct gfs2_rbm rbm = *rrbm;
 355        u32 n_unaligned = rbm.offset & 3;
 356        u32 size = len;
 357        u32 bytes;
 358        u32 chunk_size;
 359        u8 *ptr, *start, *end;
 360        u64 block;
 361        struct gfs2_bitmap *bi;
 362
 363        if (n_unaligned &&
 364            gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
 365                goto out;
 366
 367        n_unaligned = len & 3;
 368        /* Start is now byte aligned */
 369        while (len > 3) {
 370                bi = rbm_bi(&rbm);
 371                start = bi->bi_bh->b_data;
 372                if (bi->bi_clone)
 373                        start = bi->bi_clone;
 374                end = start + bi->bi_bh->b_size;
 375                start += bi->bi_offset;
 376                BUG_ON(rbm.offset & 3);
 377                start += (rbm.offset / GFS2_NBBY);
 378                bytes = min_t(u32, len / GFS2_NBBY, (end - start));
 379                ptr = memchr_inv(start, 0, bytes);
 380                chunk_size = ((ptr == NULL) ? bytes : (ptr - start));
 381                chunk_size *= GFS2_NBBY;
 382                BUG_ON(len < chunk_size);
 383                len -= chunk_size;
 384                block = gfs2_rbm_to_block(&rbm);
 385                if (gfs2_rbm_from_block(&rbm, block + chunk_size)) {
 386                        n_unaligned = 0;
 387                        break;
 388                }
 389                if (ptr) {
 390                        n_unaligned = 3;
 391                        break;
 392                }
 393                n_unaligned = len & 3;
 394        }
 395
 396        /* Deal with any bits left over at the end */
 397        if (n_unaligned)
 398                gfs2_unaligned_extlen(&rbm, n_unaligned, &len);
 399out:
 400        return size - len;
 401}
 402
 403/**
 404 * gfs2_bitcount - count the number of bits in a certain state
 405 * @rgd: the resource group descriptor
 406 * @buffer: the buffer that holds the bitmaps
 407 * @buflen: the length (in bytes) of the buffer
 408 * @state: the state of the block we're looking for
 409 *
 410 * Returns: The number of bits
 411 */
 412
 413static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
 414                         unsigned int buflen, u8 state)
 415{
 416        const u8 *byte = buffer;
 417        const u8 *end = buffer + buflen;
 418        const u8 state1 = state << 2;
 419        const u8 state2 = state << 4;
 420        const u8 state3 = state << 6;
 421        u32 count = 0;
 422
 423        for (; byte < end; byte++) {
 424                if (((*byte) & 0x03) == state)
 425                        count++;
 426                if (((*byte) & 0x0C) == state1)
 427                        count++;
 428                if (((*byte) & 0x30) == state2)
 429                        count++;
 430                if (((*byte) & 0xC0) == state3)
 431                        count++;
 432        }
 433
 434        return count;
 435}
 436
 437/**
 438 * gfs2_rgrp_verify - Verify that a resource group is consistent
 439 * @rgd: the rgrp
 440 *
 441 */
 442
 443void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 444{
 445        struct gfs2_sbd *sdp = rgd->rd_sbd;
 446        struct gfs2_bitmap *bi = NULL;
 447        u32 length = rgd->rd_length;
 448        u32 count[4], tmp;
 449        int buf, x;
 450
 451        memset(count, 0, 4 * sizeof(u32));
 452
 453        /* Count # blocks in each of 4 possible allocation states */
 454        for (buf = 0; buf < length; buf++) {
 455                bi = rgd->rd_bits + buf;
 456                for (x = 0; x < 4; x++)
 457                        count[x] += gfs2_bitcount(rgd,
 458                                                  bi->bi_bh->b_data +
 459                                                  bi->bi_offset,
 460                                                  bi->bi_len, x);
 461        }
 462
 463        if (count[0] != rgd->rd_free) {
 464                if (gfs2_consist_rgrpd(rgd))
 465                        fs_err(sdp, "free data mismatch:  %u != %u\n",
 466                               count[0], rgd->rd_free);
 467                return;
 468        }
 469
 470        tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
 471        if (count[1] != tmp) {
 472                if (gfs2_consist_rgrpd(rgd))
 473                        fs_err(sdp, "used data mismatch:  %u != %u\n",
 474                               count[1], tmp);
 475                return;
 476        }
 477
 478        if (count[2] + count[3] != rgd->rd_dinodes) {
 479                if (gfs2_consist_rgrpd(rgd))
 480                        fs_err(sdp, "used metadata mismatch:  %u != %u\n",
 481                               count[2] + count[3], rgd->rd_dinodes);
 482                return;
 483        }
 484}
 485
 486/**
 487 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
 488 * @sdp: The GFS2 superblock
 489 * @blk: The data block number
 490 * @exact: True if this needs to be an exact match
 491 *
 492 * Returns: The resource group, or NULL if not found
 493 */
 494
 495struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)
 496{
 497        struct rb_node *n, *next;
 498        struct gfs2_rgrpd *cur;
 499
 500        spin_lock(&sdp->sd_rindex_spin);
 501        n = sdp->sd_rindex_tree.rb_node;
 502        while (n) {
 503                cur = rb_entry(n, struct gfs2_rgrpd, rd_node);
 504                next = NULL;
 505                if (blk < cur->rd_addr)
 506                        next = n->rb_left;
 507                else if (blk >= cur->rd_data0 + cur->rd_data)
 508                        next = n->rb_right;
 509                if (next == NULL) {
 510                        spin_unlock(&sdp->sd_rindex_spin);
 511                        if (exact) {
 512                                if (blk < cur->rd_addr)
 513                                        return NULL;
 514                                if (blk >= cur->rd_data0 + cur->rd_data)
 515                                        return NULL;
 516                        }
 517                        return cur;
 518                }
 519                n = next;
 520        }
 521        spin_unlock(&sdp->sd_rindex_spin);
 522
 523        return NULL;
 524}
 525
 526/**
 527 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
 528 * @sdp: The GFS2 superblock
 529 *
 530 * Returns: The first rgrp in the filesystem
 531 */
 532
 533struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
 534{
 535        const struct rb_node *n;
 536        struct gfs2_rgrpd *rgd;
 537
 538        spin_lock(&sdp->sd_rindex_spin);
 539        n = rb_first(&sdp->sd_rindex_tree);
 540        rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 541        spin_unlock(&sdp->sd_rindex_spin);
 542
 543        return rgd;
 544}
 545
 546/**
 547 * gfs2_rgrpd_get_next - get the next RG
 548 * @rgd: the resource group descriptor
 549 *
 550 * Returns: The next rgrp
 551 */
 552
 553struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
 554{
 555        struct gfs2_sbd *sdp = rgd->rd_sbd;
 556        const struct rb_node *n;
 557
 558        spin_lock(&sdp->sd_rindex_spin);
 559        n = rb_next(&rgd->rd_node);
 560        if (n == NULL)
 561                n = rb_first(&sdp->sd_rindex_tree);
 562
 563        if (unlikely(&rgd->rd_node == n)) {
 564                spin_unlock(&sdp->sd_rindex_spin);
 565                return NULL;
 566        }
 567        rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 568        spin_unlock(&sdp->sd_rindex_spin);
 569        return rgd;
 570}
 571
 572void check_and_update_goal(struct gfs2_inode *ip)
 573{
 574        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 575        if (!ip->i_goal || gfs2_blk2rgrpd(sdp, ip->i_goal, 1) == NULL)
 576                ip->i_goal = ip->i_no_addr;
 577}
 578
 579void gfs2_free_clones(struct gfs2_rgrpd *rgd)
 580{
 581        int x;
 582
 583        for (x = 0; x < rgd->rd_length; x++) {
 584                struct gfs2_bitmap *bi = rgd->rd_bits + x;
 585                kfree(bi->bi_clone);
 586                bi->bi_clone = NULL;
 587        }
 588}
 589
 590/**
 591 * gfs2_rsqa_alloc - make sure we have a reservation assigned to the inode
 592 *                 plus a quota allocations data structure, if necessary
 593 * @ip: the inode for this reservation
 594 */
 595int gfs2_rsqa_alloc(struct gfs2_inode *ip)
 596{
 597        return gfs2_qa_alloc(ip);
 598}
 599
 600static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
 601{
 602        gfs2_print_dbg(seq, "  B: n:%llu s:%llu b:%u f:%u\n",
 603                       (unsigned long long)rs->rs_inum,
 604                       (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
 605                       rs->rs_rbm.offset, rs->rs_free);
 606}
 607
 608/**
 609 * __rs_deltree - remove a multi-block reservation from the rgd tree
 610 * @rs: The reservation to remove
 611 *
 612 */
 613static void __rs_deltree(struct gfs2_blkreserv *rs)
 614{
 615        struct gfs2_rgrpd *rgd;
 616
 617        if (!gfs2_rs_active(rs))
 618                return;
 619
 620        rgd = rs->rs_rbm.rgd;
 621        trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
 622        rb_erase(&rs->rs_node, &rgd->rd_rstree);
 623        RB_CLEAR_NODE(&rs->rs_node);
 624
 625        if (rs->rs_free) {
 626                struct gfs2_bitmap *bi = rbm_bi(&rs->rs_rbm);
 627
 628                /* return reserved blocks to the rgrp */
 629                BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
 630                rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
 631                /* The rgrp extent failure point is likely not to increase;
 632                   it will only do so if the freed blocks are somehow
 633                   contiguous with a span of free blocks that follows. Still,
 634                   it will force the number to be recalculated later. */
 635                rgd->rd_extfail_pt += rs->rs_free;
 636                rs->rs_free = 0;
 637                clear_bit(GBF_FULL, &bi->bi_flags);
 638        }
 639}
 640
 641/**
 642 * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
 643 * @rs: The reservation to remove
 644 *
 645 */
 646void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
 647{
 648        struct gfs2_rgrpd *rgd;
 649
 650        rgd = rs->rs_rbm.rgd;
 651        if (rgd) {
 652                spin_lock(&rgd->rd_rsspin);
 653                __rs_deltree(rs);
 654                BUG_ON(rs->rs_free);
 655                spin_unlock(&rgd->rd_rsspin);
 656        }
 657}
 658
 659/**
 660 * gfs2_rsqa_delete - delete a multi-block reservation and quota allocation
 661 * @ip: The inode for this reservation
 662 * @wcount: The inode's write count, or NULL
 663 *
 664 */
 665void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount)
 666{
 667        down_write(&ip->i_rw_mutex);
 668        if ((wcount == NULL) || (atomic_read(wcount) <= 1))
 669                gfs2_rs_deltree(&ip->i_res);
 670        up_write(&ip->i_rw_mutex);
 671        gfs2_qa_delete(ip, wcount);
 672}
 673
 674/**
 675 * return_all_reservations - return all reserved blocks back to the rgrp.
 676 * @rgd: the rgrp that needs its space back
 677 *
 678 * We previously reserved a bunch of blocks for allocation. Now we need to
 679 * give them back. This leave the reservation structures in tact, but removes
 680 * all of their corresponding "no-fly zones".
 681 */
 682static void return_all_reservations(struct gfs2_rgrpd *rgd)
 683{
 684        struct rb_node *n;
 685        struct gfs2_blkreserv *rs;
 686
 687        spin_lock(&rgd->rd_rsspin);
 688        while ((n = rb_first(&rgd->rd_rstree))) {
 689                rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
 690                __rs_deltree(rs);
 691        }
 692        spin_unlock(&rgd->rd_rsspin);
 693}
 694
 695void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 696{
 697        struct rb_node *n;
 698        struct gfs2_rgrpd *rgd;
 699        struct gfs2_glock *gl;
 700
 701        while ((n = rb_first(&sdp->sd_rindex_tree))) {
 702                rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 703                gl = rgd->rd_gl;
 704
 705                rb_erase(n, &sdp->sd_rindex_tree);
 706
 707                if (gl) {
 708                        glock_clear_object(gl, rgd);
 709                        gfs2_glock_put(gl);
 710                }
 711
 712                gfs2_free_clones(rgd);
 713                kfree(rgd->rd_bits);
 714                rgd->rd_bits = NULL;
 715                return_all_reservations(rgd);
 716                kmem_cache_free(gfs2_rgrpd_cachep, rgd);
 717        }
 718}
 719
 720static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
 721{
 722        pr_info("ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
 723        pr_info("ri_length = %u\n", rgd->rd_length);
 724        pr_info("ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
 725        pr_info("ri_data = %u\n", rgd->rd_data);
 726        pr_info("ri_bitbytes = %u\n", rgd->rd_bitbytes);
 727}
 728
 729/**
 730 * gfs2_compute_bitstructs - Compute the bitmap sizes
 731 * @rgd: The resource group descriptor
 732 *
 733 * Calculates bitmap descriptors, one for each block that contains bitmap data
 734 *
 735 * Returns: errno
 736 */
 737
 738static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 739{
 740        struct gfs2_sbd *sdp = rgd->rd_sbd;
 741        struct gfs2_bitmap *bi;
 742        u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
 743        u32 bytes_left, bytes;
 744        int x;
 745
 746        if (!length)
 747                return -EINVAL;
 748
 749        rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS);
 750        if (!rgd->rd_bits)
 751                return -ENOMEM;
 752
 753        bytes_left = rgd->rd_bitbytes;
 754
 755        for (x = 0; x < length; x++) {
 756                bi = rgd->rd_bits + x;
 757
 758                bi->bi_flags = 0;
 759                /* small rgrp; bitmap stored completely in header block */
 760                if (length == 1) {
 761                        bytes = bytes_left;
 762                        bi->bi_offset = sizeof(struct gfs2_rgrp);
 763                        bi->bi_start = 0;
 764                        bi->bi_len = bytes;
 765                        bi->bi_blocks = bytes * GFS2_NBBY;
 766                /* header block */
 767                } else if (x == 0) {
 768                        bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
 769                        bi->bi_offset = sizeof(struct gfs2_rgrp);
 770                        bi->bi_start = 0;
 771                        bi->bi_len = bytes;
 772                        bi->bi_blocks = bytes * GFS2_NBBY;
 773                /* last block */
 774                } else if (x + 1 == length) {
 775                        bytes = bytes_left;
 776                        bi->bi_offset = sizeof(struct gfs2_meta_header);
 777                        bi->bi_start = rgd->rd_bitbytes - bytes_left;
 778                        bi->bi_len = bytes;
 779                        bi->bi_blocks = bytes * GFS2_NBBY;
 780                /* other blocks */
 781                } else {
 782                        bytes = sdp->sd_sb.sb_bsize -
 783                                sizeof(struct gfs2_meta_header);
 784                        bi->bi_offset = sizeof(struct gfs2_meta_header);
 785                        bi->bi_start = rgd->rd_bitbytes - bytes_left;
 786                        bi->bi_len = bytes;
 787                        bi->bi_blocks = bytes * GFS2_NBBY;
 788                }
 789
 790                bytes_left -= bytes;
 791        }
 792
 793        if (bytes_left) {
 794                gfs2_consist_rgrpd(rgd);
 795                return -EIO;
 796        }
 797        bi = rgd->rd_bits + (length - 1);
 798        if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
 799                if (gfs2_consist_rgrpd(rgd)) {
 800                        gfs2_rindex_print(rgd);
 801                        fs_err(sdp, "start=%u len=%u offset=%u\n",
 802                               bi->bi_start, bi->bi_len, bi->bi_offset);
 803                }
 804                return -EIO;
 805        }
 806
 807        return 0;
 808}
 809
 810/**
 811 * gfs2_ri_total - Total up the file system space, according to the rindex.
 812 * @sdp: the filesystem
 813 *
 814 */
 815u64 gfs2_ri_total(struct gfs2_sbd *sdp)
 816{
 817        u64 total_data = 0;     
 818        struct inode *inode = sdp->sd_rindex;
 819        struct gfs2_inode *ip = GFS2_I(inode);
 820        char buf[sizeof(struct gfs2_rindex)];
 821        int error, rgrps;
 822
 823        for (rgrps = 0;; rgrps++) {
 824                loff_t pos = rgrps * sizeof(struct gfs2_rindex);
 825
 826                if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
 827                        break;
 828                error = gfs2_internal_read(ip, buf, &pos,
 829                                           sizeof(struct gfs2_rindex));
 830                if (error != sizeof(struct gfs2_rindex))
 831                        break;
 832                total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
 833        }
 834        return total_data;
 835}
 836
 837static int rgd_insert(struct gfs2_rgrpd *rgd)
 838{
 839        struct gfs2_sbd *sdp = rgd->rd_sbd;
 840        struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
 841
 842        /* Figure out where to put new node */
 843        while (*newn) {
 844                struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd,
 845                                                  rd_node);
 846
 847                parent = *newn;
 848                if (rgd->rd_addr < cur->rd_addr)
 849                        newn = &((*newn)->rb_left);
 850                else if (rgd->rd_addr > cur->rd_addr)
 851                        newn = &((*newn)->rb_right);
 852                else
 853                        return -EEXIST;
 854        }
 855
 856        rb_link_node(&rgd->rd_node, parent, newn);
 857        rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
 858        sdp->sd_rgrps++;
 859        return 0;
 860}
 861
 862/**
 863 * read_rindex_entry - Pull in a new resource index entry from the disk
 864 * @ip: Pointer to the rindex inode
 865 *
 866 * Returns: 0 on success, > 0 on EOF, error code otherwise
 867 */
 868
 869static int read_rindex_entry(struct gfs2_inode *ip)
 870{
 871        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 872        const unsigned bsize = sdp->sd_sb.sb_bsize;
 873        loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
 874        struct gfs2_rindex buf;
 875        int error;
 876        struct gfs2_rgrpd *rgd;
 877
 878        if (pos >= i_size_read(&ip->i_inode))
 879                return 1;
 880
 881        error = gfs2_internal_read(ip, (char *)&buf, &pos,
 882                                   sizeof(struct gfs2_rindex));
 883
 884        if (error != sizeof(struct gfs2_rindex))
 885                return (error == 0) ? 1 : error;
 886
 887        rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);
 888        error = -ENOMEM;
 889        if (!rgd)
 890                return error;
 891
 892        rgd->rd_sbd = sdp;
 893        rgd->rd_addr = be64_to_cpu(buf.ri_addr);
 894        rgd->rd_length = be32_to_cpu(buf.ri_length);
 895        rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
 896        rgd->rd_data = be32_to_cpu(buf.ri_data);
 897        rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
 898        spin_lock_init(&rgd->rd_rsspin);
 899
 900        error = compute_bitstructs(rgd);
 901        if (error)
 902                goto fail;
 903
 904        error = gfs2_glock_get(sdp, rgd->rd_addr,
 905                               &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
 906        if (error)
 907                goto fail;
 908
 909        rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
 910        rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED);
 911        if (rgd->rd_data > sdp->sd_max_rg_data)
 912                sdp->sd_max_rg_data = rgd->rd_data;
 913        spin_lock(&sdp->sd_rindex_spin);
 914        error = rgd_insert(rgd);
 915        spin_unlock(&sdp->sd_rindex_spin);
 916        if (!error) {
 917                glock_set_object(rgd->rd_gl, rgd);
 918                rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK;
 919                rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr +
 920                                                    rgd->rd_length) * bsize) - 1;
 921                return 0;
 922        }
 923
 924        error = 0; /* someone else read in the rgrp; free it and ignore it */
 925        gfs2_glock_put(rgd->rd_gl);
 926
 927fail:
 928        kfree(rgd->rd_bits);
 929        rgd->rd_bits = NULL;
 930        kmem_cache_free(gfs2_rgrpd_cachep, rgd);
 931        return error;
 932}
 933
 934/**
 935 * set_rgrp_preferences - Run all the rgrps, selecting some we prefer to use
 936 * @sdp: the GFS2 superblock
 937 *
 938 * The purpose of this function is to select a subset of the resource groups
 939 * and mark them as PREFERRED. We do it in such a way that each node prefers
 940 * to use a unique set of rgrps to minimize glock contention.
 941 */
 942static void set_rgrp_preferences(struct gfs2_sbd *sdp)
 943{
 944        struct gfs2_rgrpd *rgd, *first;
 945        int i;
 946
 947        /* Skip an initial number of rgrps, based on this node's journal ID.
 948           That should start each node out on its own set. */
 949        rgd = gfs2_rgrpd_get_first(sdp);
 950        for (i = 0; i < sdp->sd_lockstruct.ls_jid; i++)
 951                rgd = gfs2_rgrpd_get_next(rgd);
 952        first = rgd;
 953
 954        do {
 955                rgd->rd_flags |= GFS2_RDF_PREFERRED;
 956                for (i = 0; i < sdp->sd_journals; i++) {
 957                        rgd = gfs2_rgrpd_get_next(rgd);
 958                        if (!rgd || rgd == first)
 959                                break;
 960                }
 961        } while (rgd && rgd != first);
 962}
 963
 964/**
 965 * gfs2_ri_update - Pull in a new resource index from the disk
 966 * @ip: pointer to the rindex inode
 967 *
 968 * Returns: 0 on successful update, error code otherwise
 969 */
 970
 971static int gfs2_ri_update(struct gfs2_inode *ip)
 972{
 973        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 974        int error;
 975
 976        do {
 977                error = read_rindex_entry(ip);
 978        } while (error == 0);
 979
 980        if (error < 0)
 981                return error;
 982
 983        set_rgrp_preferences(sdp);
 984
 985        sdp->sd_rindex_uptodate = 1;
 986        return 0;
 987}
 988
 989/**
 990 * gfs2_rindex_update - Update the rindex if required
 991 * @sdp: The GFS2 superblock
 992 *
 993 * We grab a lock on the rindex inode to make sure that it doesn't
 994 * change whilst we are performing an operation. We keep this lock
 995 * for quite long periods of time compared to other locks. This
 996 * doesn't matter, since it is shared and it is very, very rarely
 997 * accessed in the exclusive mode (i.e. only when expanding the filesystem).
 998 *
 999 * This makes sure that we're using the latest copy of the resource index
1000 * special file, which might have been updated if someone expanded the
1001 * filesystem (via gfs2_grow utility), which adds new resource groups.
1002 *
1003 * Returns: 0 on succeess, error code otherwise
1004 */
1005
1006int gfs2_rindex_update(struct gfs2_sbd *sdp)
1007{
1008        struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
1009        struct gfs2_glock *gl = ip->i_gl;
1010        struct gfs2_holder ri_gh;
1011        int error = 0;
1012        int unlock_required = 0;
1013
1014        /* Read new copy from disk if we don't have the latest */
1015        if (!sdp->sd_rindex_uptodate) {
1016                if (!gfs2_glock_is_locked_by_me(gl)) {
1017                        error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
1018                        if (error)
1019                                return error;
1020                        unlock_required = 1;
1021                }
1022                if (!sdp->sd_rindex_uptodate)
1023                        error = gfs2_ri_update(ip);
1024                if (unlock_required)
1025                        gfs2_glock_dq_uninit(&ri_gh);
1026        }
1027
1028        return error;
1029}
1030
1031static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
1032{
1033        const struct gfs2_rgrp *str = buf;
1034        u32 rg_flags;
1035
1036        rg_flags = be32_to_cpu(str->rg_flags);
1037        rg_flags &= ~GFS2_RDF_MASK;
1038        rgd->rd_flags &= GFS2_RDF_MASK;
1039        rgd->rd_flags |= rg_flags;
1040        rgd->rd_free = be32_to_cpu(str->rg_free);
1041        rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
1042        rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
1043}
1044
1045static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
1046{
1047        struct gfs2_rgrp *str = buf;
1048
1049        str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
1050        str->rg_free = cpu_to_be32(rgd->rd_free);
1051        str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
1052        str->__pad = cpu_to_be32(0);
1053        str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
1054        memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
1055}
1056
1057static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
1058{
1059        struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1060        struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
1061
1062        if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free ||
1063            rgl->rl_dinodes != str->rg_dinodes ||
1064            rgl->rl_igeneration != str->rg_igeneration)
1065                return 0;
1066        return 1;
1067}
1068
1069static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
1070{
1071        const struct gfs2_rgrp *str = buf;
1072
1073        rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
1074        rgl->rl_flags = str->rg_flags;
1075        rgl->rl_free = str->rg_free;
1076        rgl->rl_dinodes = str->rg_dinodes;
1077        rgl->rl_igeneration = str->rg_igeneration;
1078        rgl->__pad = 0UL;
1079}
1080
1081static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change)
1082{
1083        struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1084        u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change;
1085        rgl->rl_unlinked = cpu_to_be32(unlinked);
1086}
1087
1088static u32 count_unlinked(struct gfs2_rgrpd *rgd)
1089{
1090        struct gfs2_bitmap *bi;
1091        const u32 length = rgd->rd_length;
1092        const u8 *buffer = NULL;
1093        u32 i, goal, count = 0;
1094
1095        for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) {
1096                goal = 0;
1097                buffer = bi->bi_bh->b_data + bi->bi_offset;
1098                WARN_ON(!buffer_uptodate(bi->bi_bh));
1099                while (goal < bi->bi_len * GFS2_NBBY) {
1100                        goal = gfs2_bitfit(buffer, bi->bi_len, goal,
1101                                           GFS2_BLKST_UNLINKED);
1102                        if (goal == BFITNOENT)
1103                                break;
1104                        count++;
1105                        goal++;
1106                }
1107        }
1108
1109        return count;
1110}
1111
1112
1113/**
1114 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
1115 * @rgd: the struct gfs2_rgrpd describing the RG to read in
1116 *
1117 * Read in all of a Resource Group's header and bitmap blocks.
1118 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
1119 *
1120 * Returns: errno
1121 */
1122
1123static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
1124{
1125        struct gfs2_sbd *sdp = rgd->rd_sbd;
1126        struct gfs2_glock *gl = rgd->rd_gl;
1127        unsigned int length = rgd->rd_length;
1128        struct gfs2_bitmap *bi;
1129        unsigned int x, y;
1130        int error;
1131
1132        if (rgd->rd_bits[0].bi_bh != NULL)
1133                return 0;
1134
1135        for (x = 0; x < length; x++) {
1136                bi = rgd->rd_bits + x;
1137                error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, 0, &bi->bi_bh);
1138                if (error)
1139                        goto fail;
1140        }
1141
1142        for (y = length; y--;) {
1143                bi = rgd->rd_bits + y;
1144                error = gfs2_meta_wait(sdp, bi->bi_bh);
1145                if (error)
1146                        goto fail;
1147                if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB :
1148                                              GFS2_METATYPE_RG)) {
1149                        error = -EIO;
1150                        goto fail;
1151                }
1152        }
1153
1154        if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
1155                for (x = 0; x < length; x++)
1156                        clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
1157                gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
1158                rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1159                rgd->rd_free_clone = rgd->rd_free;
1160                /* max out the rgrp allocation failure point */
1161                rgd->rd_extfail_pt = rgd->rd_free;
1162        }
1163        if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
1164                rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
1165                gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
1166                                     rgd->rd_bits[0].bi_bh->b_data);
1167        }
1168        else if (sdp->sd_args.ar_rgrplvb) {
1169                if (!gfs2_rgrp_lvb_valid(rgd)){
1170                        gfs2_consist_rgrpd(rgd);
1171                        error = -EIO;
1172                        goto fail;
1173                }
1174                if (rgd->rd_rgl->rl_unlinked == 0)
1175                        rgd->rd_flags &= ~GFS2_RDF_CHECK;
1176        }
1177        return 0;
1178
1179fail:
1180        while (x--) {
1181                bi = rgd->rd_bits + x;
1182                brelse(bi->bi_bh);
1183                bi->bi_bh = NULL;
1184                gfs2_assert_warn(sdp, !bi->bi_clone);
1185        }
1186
1187        return error;
1188}
1189
1190static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
1191{
1192        u32 rl_flags;
1193
1194        if (rgd->rd_flags & GFS2_RDF_UPTODATE)
1195                return 0;
1196
1197        if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
1198                return gfs2_rgrp_bh_get(rgd);
1199
1200        rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
1201        rl_flags &= ~GFS2_RDF_MASK;
1202        rgd->rd_flags &= GFS2_RDF_MASK;
1203        rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1204        if (rgd->rd_rgl->rl_unlinked == 0)
1205                rgd->rd_flags &= ~GFS2_RDF_CHECK;
1206        rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
1207        rgd->rd_free_clone = rgd->rd_free;
1208        rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
1209        rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
1210        return 0;
1211}
1212
1213int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
1214{
1215        struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1216        struct gfs2_sbd *sdp = rgd->rd_sbd;
1217
1218        if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb)
1219                return 0;
1220        return gfs2_rgrp_bh_get(rgd);
1221}
1222
1223/**
1224 * gfs2_rgrp_brelse - Release RG bitmaps read in with gfs2_rgrp_bh_get()
1225 * @rgd: The resource group
1226 *
1227 */
1228
1229void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd)
1230{
1231        int x, length = rgd->rd_length;
1232
1233        for (x = 0; x < length; x++) {
1234                struct gfs2_bitmap *bi = rgd->rd_bits + x;
1235                if (bi->bi_bh) {
1236                        brelse(bi->bi_bh);
1237                        bi->bi_bh = NULL;
1238                }
1239        }
1240
1241}
1242
1243/**
1244 * gfs2_rgrp_go_unlock - Unlock a rgrp glock
1245 * @gh: The glock holder for the resource group
1246 *
1247 */
1248
1249void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
1250{
1251        struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1252        int demote_requested = test_bit(GLF_DEMOTE, &gh->gh_gl->gl_flags) |
1253                test_bit(GLF_PENDING_DEMOTE, &gh->gh_gl->gl_flags);
1254
1255        if (rgd && demote_requested)
1256                gfs2_rgrp_brelse(rgd);
1257}
1258
1259int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
1260                             struct buffer_head *bh,
1261                             const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
1262{
1263        struct super_block *sb = sdp->sd_vfs;
1264        u64 blk;
1265        sector_t start = 0;
1266        sector_t nr_blks = 0;
1267        int rv;
1268        unsigned int x;
1269        u32 trimmed = 0;
1270        u8 diff;
1271
1272        for (x = 0; x < bi->bi_len; x++) {
1273                const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
1274                clone += bi->bi_offset;
1275                clone += x;
1276                if (bh) {
1277                        const u8 *orig = bh->b_data + bi->bi_offset + x;
1278                        diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
1279                } else {
1280                        diff = ~(*clone | (*clone >> 1));
1281                }
1282                diff &= 0x55;
1283                if (diff == 0)
1284                        continue;
1285                blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
1286                while(diff) {
1287                        if (diff & 1) {
1288                                if (nr_blks == 0)
1289                                        goto start_new_extent;
1290                                if ((start + nr_blks) != blk) {
1291                                        if (nr_blks >= minlen) {
1292                                                rv = sb_issue_discard(sb,
1293                                                        start, nr_blks,
1294                                                        GFP_NOFS, 0);
1295                                                if (rv)
1296                                                        goto fail;
1297                                                trimmed += nr_blks;
1298                                        }
1299                                        nr_blks = 0;
1300start_new_extent:
1301                                        start = blk;
1302                                }
1303                                nr_blks++;
1304                        }
1305                        diff >>= 2;
1306                        blk++;
1307                }
1308        }
1309        if (nr_blks >= minlen) {
1310                rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0);
1311                if (rv)
1312                        goto fail;
1313                trimmed += nr_blks;
1314        }
1315        if (ptrimmed)
1316                *ptrimmed = trimmed;
1317        return 0;
1318
1319fail:
1320        if (sdp->sd_args.ar_discard)
1321                fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
1322        sdp->sd_args.ar_discard = 0;
1323        return -EIO;
1324}
1325
1326/**
1327 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem
1328 * @filp: Any file on the filesystem
1329 * @argp: Pointer to the arguments (also used to pass result)
1330 *
1331 * Returns: 0 on success, otherwise error code
1332 */
1333
1334int gfs2_fitrim(struct file *filp, void __user *argp)
1335{
1336        struct inode *inode = file_inode(filp);
1337        struct gfs2_sbd *sdp = GFS2_SB(inode);
1338        struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
1339        struct buffer_head *bh;
1340        struct gfs2_rgrpd *rgd;
1341        struct gfs2_rgrpd *rgd_end;
1342        struct gfs2_holder gh;
1343        struct fstrim_range r;
1344        int ret = 0;
1345        u64 amt;
1346        u64 trimmed = 0;
1347        u64 start, end, minlen;
1348        unsigned int x;
1349        unsigned bs_shift = sdp->sd_sb.sb_bsize_shift;
1350
1351        if (!capable(CAP_SYS_ADMIN))
1352                return -EPERM;
1353
1354        if (!blk_queue_discard(q))
1355                return -EOPNOTSUPP;
1356
1357        if (copy_from_user(&r, argp, sizeof(r)))
1358                return -EFAULT;
1359
1360        ret = gfs2_rindex_update(sdp);
1361        if (ret)
1362                return ret;
1363
1364        start = r.start >> bs_shift;
1365        end = start + (r.len >> bs_shift);
1366        minlen = max_t(u64, r.minlen,
1367                       q->limits.discard_granularity) >> bs_shift;
1368
1369        if (end <= start || minlen > sdp->sd_max_rg_data)
1370                return -EINVAL;
1371
1372        rgd = gfs2_blk2rgrpd(sdp, start, 0);
1373        rgd_end = gfs2_blk2rgrpd(sdp, end, 0);
1374
1375        if ((gfs2_rgrpd_get_first(sdp) == gfs2_rgrpd_get_next(rgd_end))
1376            && (start > rgd_end->rd_data0 + rgd_end->rd_data))
1377                return -EINVAL; /* start is beyond the end of the fs */
1378
1379        while (1) {
1380
1381                ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
1382                if (ret)
1383                        goto out;
1384
1385                if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) {
1386                        /* Trim each bitmap in the rgrp */
1387                        for (x = 0; x < rgd->rd_length; x++) {
1388                                struct gfs2_bitmap *bi = rgd->rd_bits + x;
1389                                ret = gfs2_rgrp_send_discards(sdp,
1390                                                rgd->rd_data0, NULL, bi, minlen,
1391                                                &amt);
1392                                if (ret) {
1393                                        gfs2_glock_dq_uninit(&gh);
1394                                        goto out;
1395                                }
1396                                trimmed += amt;
1397                        }
1398
1399                        /* Mark rgrp as having been trimmed */
1400                        ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
1401                        if (ret == 0) {
1402                                bh = rgd->rd_bits[0].bi_bh;
1403                                rgd->rd_flags |= GFS2_RGF_TRIMMED;
1404                                gfs2_trans_add_meta(rgd->rd_gl, bh);
1405                                gfs2_rgrp_out(rgd, bh->b_data);
1406                                gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
1407                                gfs2_trans_end(sdp);
1408                        }
1409                }
1410                gfs2_glock_dq_uninit(&gh);
1411
1412                if (rgd == rgd_end)
1413                        break;
1414
1415                rgd = gfs2_rgrpd_get_next(rgd);
1416        }
1417
1418out:
1419        r.len = trimmed << bs_shift;
1420        if (copy_to_user(argp, &r, sizeof(r)))
1421                return -EFAULT;
1422
1423        return ret;
1424}
1425
1426/**
1427 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
1428 * @ip: the inode structure
1429 *
1430 */
1431static void rs_insert(struct gfs2_inode *ip)
1432{
1433        struct rb_node **newn, *parent = NULL;
1434        int rc;
1435        struct gfs2_blkreserv *rs = &ip->i_res;
1436        struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
1437        u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
1438
1439        BUG_ON(gfs2_rs_active(rs));
1440
1441        spin_lock(&rgd->rd_rsspin);
1442        newn = &rgd->rd_rstree.rb_node;
1443        while (*newn) {
1444                struct gfs2_blkreserv *cur =
1445                        rb_entry(*newn, struct gfs2_blkreserv, rs_node);
1446
1447                parent = *newn;
1448                rc = rs_cmp(fsblock, rs->rs_free, cur);
1449                if (rc > 0)
1450                        newn = &((*newn)->rb_right);
1451                else if (rc < 0)
1452                        newn = &((*newn)->rb_left);
1453                else {
1454                        spin_unlock(&rgd->rd_rsspin);
1455                        WARN_ON(1);
1456                        return;
1457                }
1458        }
1459
1460        rb_link_node(&rs->rs_node, parent, newn);
1461        rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
1462
1463        /* Do our rgrp accounting for the reservation */
1464        rgd->rd_reserved += rs->rs_free; /* blocks reserved */
1465        spin_unlock(&rgd->rd_rsspin);
1466        trace_gfs2_rs(rs, TRACE_RS_INSERT);
1467}
1468
1469/**
1470 * rg_mblk_search - find a group of multiple free blocks to form a reservation
1471 * @rgd: the resource group descriptor
1472 * @ip: pointer to the inode for which we're reserving blocks
1473 * @ap: the allocation parameters
1474 *
1475 */
1476
1477static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
1478                           const struct gfs2_alloc_parms *ap)
1479{
1480        struct gfs2_rbm rbm = { .rgd = rgd, };
1481        u64 goal;
1482        struct gfs2_blkreserv *rs = &ip->i_res;
1483        u32 extlen;
1484        u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;
1485        int ret;
1486        struct inode *inode = &ip->i_inode;
1487
1488        if (S_ISDIR(inode->i_mode))
1489                extlen = 1;
1490        else {
1491                extlen = max_t(u32, atomic_read(&rs->rs_sizehint), ap->target);
1492                extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
1493        }
1494        if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
1495                return;
1496
1497        /* Find bitmap block that contains bits for goal block */
1498        if (rgrp_contains_block(rgd, ip->i_goal))
1499                goal = ip->i_goal;
1500        else
1501                goal = rgd->rd_last_alloc + rgd->rd_data0;
1502
1503        if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
1504                return;
1505
1506        ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true);
1507        if (ret == 0) {
1508                rs->rs_rbm = rbm;
1509                rs->rs_free = extlen;
1510                rs->rs_inum = ip->i_no_addr;
1511                rs_insert(ip);
1512        } else {
1513                if (goal == rgd->rd_last_alloc + rgd->rd_data0)
1514                        rgd->rd_last_alloc = 0;
1515        }
1516}
1517
1518/**
1519 * gfs2_next_unreserved_block - Return next block that is not reserved
1520 * @rgd: The resource group
1521 * @block: The starting block
1522 * @length: The required length
1523 * @ip: Ignore any reservations for this inode
1524 *
1525 * If the block does not appear in any reservation, then return the
1526 * block number unchanged. If it does appear in the reservation, then
1527 * keep looking through the tree of reservations in order to find the
1528 * first block number which is not reserved.
1529 */
1530
1531static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
1532                                      u32 length,
1533                                      const struct gfs2_inode *ip)
1534{
1535        struct gfs2_blkreserv *rs;
1536        struct rb_node *n;
1537        int rc;
1538
1539        spin_lock(&rgd->rd_rsspin);
1540        n = rgd->rd_rstree.rb_node;
1541        while (n) {
1542                rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1543                rc = rs_cmp(block, length, rs);
1544                if (rc < 0)
1545                        n = n->rb_left;
1546                else if (rc > 0)
1547                        n = n->rb_right;
1548                else
1549                        break;
1550        }
1551
1552        if (n) {
1553                while ((rs_cmp(block, length, rs) == 0) && (&ip->i_res != rs)) {
1554                        block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
1555                        n = n->rb_right;
1556                        if (n == NULL)
1557                                break;
1558                        rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1559                }
1560        }
1561
1562        spin_unlock(&rgd->rd_rsspin);
1563        return block;
1564}
1565
1566/**
1567 * gfs2_reservation_check_and_update - Check for reservations during block alloc
1568 * @rbm: The current position in the resource group
1569 * @ip: The inode for which we are searching for blocks
1570 * @minext: The minimum extent length
1571 * @maxext: A pointer to the maximum extent structure
1572 *
1573 * This checks the current position in the rgrp to see whether there is
1574 * a reservation covering this block. If not then this function is a
1575 * no-op. If there is, then the position is moved to the end of the
1576 * contiguous reservation(s) so that we are pointing at the first
1577 * non-reserved block.
1578 *
1579 * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error
1580 */
1581
1582static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
1583                                             const struct gfs2_inode *ip,
1584                                             u32 minext,
1585                                             struct gfs2_extent *maxext)
1586{
1587        u64 block = gfs2_rbm_to_block(rbm);
1588        u32 extlen = 1;
1589        u64 nblock;
1590        int ret;
1591
1592        /*
1593         * If we have a minimum extent length, then skip over any extent
1594         * which is less than the min extent length in size.
1595         */
1596        if (minext) {
1597                extlen = gfs2_free_extlen(rbm, minext);
1598                if (extlen <= maxext->len)
1599                        goto fail;
1600        }
1601
1602        /*
1603         * Check the extent which has been found against the reservations
1604         * and skip if parts of it are already reserved
1605         */
1606        nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
1607        if (nblock == block) {
1608                if (!minext || extlen >= minext)
1609                        return 0;
1610
1611                if (extlen > maxext->len) {
1612                        maxext->len = extlen;
1613                        maxext->rbm = *rbm;
1614                }
1615fail:
1616                nblock = block + extlen;
1617        }
1618        ret = gfs2_rbm_from_block(rbm, nblock);
1619        if (ret < 0)
1620                return ret;
1621        return 1;
1622}
1623
1624/**
1625 * gfs2_rbm_find - Look for blocks of a particular state
1626 * @rbm: Value/result starting position and final position
1627 * @state: The state which we want to find
1628 * @minext: Pointer to the requested extent length (NULL for a single block)
1629 *          This is updated to be the actual reservation size.
1630 * @ip: If set, check for reservations
1631 * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
1632 *          around until we've reached the starting point.
1633 *
1634 * Side effects:
1635 * - If looking for free blocks, we set GBF_FULL on each bitmap which
1636 *   has no free blocks in it.
1637 * - If looking for free blocks, we set rd_extfail_pt on each rgrp which
1638 *   has come up short on a free block search.
1639 *
1640 * Returns: 0 on success, -ENOSPC if there is no block of the requested state
1641 */
1642
1643static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
1644                         const struct gfs2_inode *ip, bool nowrap)
1645{
1646        struct buffer_head *bh;
1647        int initial_bii;
1648        u32 initial_offset;
1649        int first_bii = rbm->bii;
1650        u32 first_offset = rbm->offset;
1651        u32 offset;
1652        u8 *buffer;
1653        int n = 0;
1654        int iters = rbm->rgd->rd_length;
1655        int ret;
1656        struct gfs2_bitmap *bi;
1657        struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, };
1658
1659        /* If we are not starting at the beginning of a bitmap, then we
1660         * need to add one to the bitmap count to ensure that we search
1661         * the starting bitmap twice.
1662         */
1663        if (rbm->offset != 0)
1664                iters++;
1665
1666        while(1) {
1667                bi = rbm_bi(rbm);
1668                if (test_bit(GBF_FULL, &bi->bi_flags) &&
1669                    (state == GFS2_BLKST_FREE))
1670                        goto next_bitmap;
1671
1672                bh = bi->bi_bh;
1673                buffer = bh->b_data + bi->bi_offset;
1674                WARN_ON(!buffer_uptodate(bh));
1675                if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1676                        buffer = bi->bi_clone + bi->bi_offset;
1677                initial_offset = rbm->offset;
1678                offset = gfs2_bitfit(buffer, bi->bi_len, rbm->offset, state);
1679                if (offset == BFITNOENT)
1680                        goto bitmap_full;
1681                rbm->offset = offset;
1682                if (ip == NULL)
1683                        return 0;
1684
1685                initial_bii = rbm->bii;
1686                ret = gfs2_reservation_check_and_update(rbm, ip,
1687                                                        minext ? *minext : 0,
1688                                                        &maxext);
1689                if (ret == 0)
1690                        return 0;
1691                if (ret > 0) {
1692                        n += (rbm->bii - initial_bii);
1693                        goto next_iter;
1694                }
1695                if (ret == -E2BIG) {
1696                        rbm->bii = 0;
1697                        rbm->offset = 0;
1698                        n += (rbm->bii - initial_bii);
1699                        goto res_covered_end_of_rgrp;
1700                }
1701                return ret;
1702
1703bitmap_full:    /* Mark bitmap as full and fall through */
1704                if ((state == GFS2_BLKST_FREE) && initial_offset == 0)
1705                        set_bit(GBF_FULL, &bi->bi_flags);
1706
1707next_bitmap:    /* Find next bitmap in the rgrp */
1708                rbm->offset = 0;
1709                rbm->bii++;
1710                if (rbm->bii == rbm->rgd->rd_length)
1711                        rbm->bii = 0;
1712res_covered_end_of_rgrp:
1713                if ((rbm->bii == 0) && nowrap)
1714                        break;
1715                n++;
1716next_iter:
1717                if (n >= iters)
1718                        break;
1719        }
1720
1721        if (minext == NULL || state != GFS2_BLKST_FREE)
1722                return -ENOSPC;
1723
1724        /* If the extent was too small, and it's smaller than the smallest
1725           to have failed before, remember for future reference that it's
1726           useless to search this rgrp again for this amount or more. */
1727        if ((first_offset == 0) && (first_bii == 0) &&
1728            (*minext < rbm->rgd->rd_extfail_pt))
1729                rbm->rgd->rd_extfail_pt = *minext;
1730
1731        /* If the maximum extent we found is big enough to fulfill the
1732           minimum requirements, use it anyway. */
1733        if (maxext.len) {
1734                *rbm = maxext.rbm;
1735                *minext = maxext.len;
1736                return 0;
1737        }
1738
1739        return -ENOSPC;
1740}
1741
1742/**
1743 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
1744 * @rgd: The rgrp
1745 * @last_unlinked: block address of the last dinode we unlinked
1746 * @skip: block address we should explicitly not unlink
1747 *
1748 * Returns: 0 if no error
1749 *          The inode, if one has been found, in inode.
1750 */
1751
1752static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
1753{
1754        u64 block;
1755        struct gfs2_sbd *sdp = rgd->rd_sbd;
1756        struct gfs2_glock *gl;
1757        struct gfs2_inode *ip;
1758        int error;
1759        int found = 0;
1760        struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 };
1761
1762        while (1) {
1763                down_write(&sdp->sd_log_flush_lock);
1764                error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
1765                                      true);
1766                up_write(&sdp->sd_log_flush_lock);
1767                if (error == -ENOSPC)
1768                        break;
1769                if (WARN_ON_ONCE(error))
1770                        break;
1771
1772                block = gfs2_rbm_to_block(&rbm);
1773                if (gfs2_rbm_from_block(&rbm, block + 1))
1774                        break;
1775                if (*last_unlinked != NO_BLOCK && block <= *last_unlinked)
1776                        continue;
1777                if (block == skip)
1778                        continue;
1779                *last_unlinked = block;
1780
1781                error = gfs2_glock_get(sdp, block, &gfs2_iopen_glops, CREATE, &gl);
1782                if (error)
1783                        continue;
1784
1785                /* If the inode is already in cache, we can ignore it here
1786                 * because the existing inode disposal code will deal with
1787                 * it when all refs have gone away. Accessing gl_object like
1788                 * this is not safe in general. Here it is ok because we do
1789                 * not dereference the pointer, and we only need an approx
1790                 * answer to whether it is NULL or not.
1791                 */
1792                ip = gl->gl_object;
1793
1794                if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
1795                        gfs2_glock_put(gl);
1796                else
1797                        found++;
1798
1799                /* Limit reclaim to sensible number of tasks */
1800                if (found > NR_CPUS)
1801                        return;
1802        }
1803
1804        rgd->rd_flags &= ~GFS2_RDF_CHECK;
1805        return;
1806}
1807
1808/**
1809 * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
1810 * @rgd: The rgrp in question
1811 * @loops: An indication of how picky we can be (0=very, 1=less so)
1812 *
1813 * This function uses the recently added glock statistics in order to
1814 * figure out whether a parciular resource group is suffering from
1815 * contention from multiple nodes. This is done purely on the basis
1816 * of timings, since this is the only data we have to work with and
1817 * our aim here is to reject a resource group which is highly contended
1818 * but (very important) not to do this too often in order to ensure that
1819 * we do not land up introducing fragmentation by changing resource
1820 * groups when not actually required.
1821 *
1822 * The calculation is fairly simple, we want to know whether the SRTTB
1823 * (i.e. smoothed round trip time for blocking operations) to acquire
1824 * the lock for this rgrp's glock is significantly greater than the
1825 * time taken for resource groups on average. We introduce a margin in
1826 * the form of the variable @var which is computed as the sum of the two
1827 * respective variences, and multiplied by a factor depending on @loops
1828 * and whether we have a lot of data to base the decision on. This is
1829 * then tested against the square difference of the means in order to
1830 * decide whether the result is statistically significant or not.
1831 *
1832 * Returns: A boolean verdict on the congestion status
1833 */
1834
1835static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
1836{
1837        const struct gfs2_glock *gl = rgd->rd_gl;
1838        const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
1839        struct gfs2_lkstats *st;
1840        u64 r_dcount, l_dcount;
1841        u64 l_srttb, a_srttb = 0;
1842        s64 srttb_diff;
1843        u64 sqr_diff;
1844        u64 var;
1845        int cpu, nonzero = 0;
1846
1847        preempt_disable();
1848        for_each_present_cpu(cpu) {
1849                st = &per_cpu_ptr(sdp->sd_lkstats, cpu)->lkstats[LM_TYPE_RGRP];
1850                if (st->stats[GFS2_LKS_SRTTB]) {
1851                        a_srttb += st->stats[GFS2_LKS_SRTTB];
1852                        nonzero++;
1853                }
1854        }
1855        st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
1856        if (nonzero)
1857                do_div(a_srttb, nonzero);
1858        r_dcount = st->stats[GFS2_LKS_DCOUNT];
1859        var = st->stats[GFS2_LKS_SRTTVARB] +
1860              gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
1861        preempt_enable();
1862
1863        l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
1864        l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
1865
1866        if ((l_dcount < 1) || (r_dcount < 1) || (a_srttb == 0))
1867                return false;
1868
1869        srttb_diff = a_srttb - l_srttb;
1870        sqr_diff = srttb_diff * srttb_diff;
1871
1872        var *= 2;
1873        if (l_dcount < 8 || r_dcount < 8)
1874                var *= 2;
1875        if (loops == 1)
1876                var *= 2;
1877
1878        return ((srttb_diff < 0) && (sqr_diff > var));
1879}
1880
1881/**
1882 * gfs2_rgrp_used_recently
1883 * @rs: The block reservation with the rgrp to test
1884 * @msecs: The time limit in milliseconds
1885 *
1886 * Returns: True if the rgrp glock has been used within the time limit
1887 */
1888static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
1889                                    u64 msecs)
1890{
1891        u64 tdiff;
1892
1893        tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
1894                            rs->rs_rbm.rgd->rd_gl->gl_dstamp));
1895
1896        return tdiff > (msecs * 1000 * 1000);
1897}
1898
1899static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
1900{
1901        const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1902        u32 skip;
1903
1904        get_random_bytes(&skip, sizeof(skip));
1905        return skip % sdp->sd_rgrps;
1906}
1907
1908static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
1909{
1910        struct gfs2_rgrpd *rgd = *pos;
1911        struct gfs2_sbd *sdp = rgd->rd_sbd;
1912
1913        rgd = gfs2_rgrpd_get_next(rgd);
1914        if (rgd == NULL)
1915                rgd = gfs2_rgrpd_get_first(sdp);
1916        *pos = rgd;
1917        if (rgd != begin) /* If we didn't wrap */
1918                return true;
1919        return false;
1920}
1921
1922/**
1923 * fast_to_acquire - determine if a resource group will be fast to acquire
1924 *
1925 * If this is one of our preferred rgrps, it should be quicker to acquire,
1926 * because we tried to set ourselves up as dlm lock master.
1927 */
1928static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
1929{
1930        struct gfs2_glock *gl = rgd->rd_gl;
1931
1932        if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) &&
1933            !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
1934            !test_bit(GLF_DEMOTE, &gl->gl_flags))
1935                return 1;
1936        if (rgd->rd_flags & GFS2_RDF_PREFERRED)
1937                return 1;
1938        return 0;
1939}
1940
1941/**
1942 * gfs2_inplace_reserve - Reserve space in the filesystem
1943 * @ip: the inode to reserve space for
1944 * @ap: the allocation parameters
1945 *
1946 * We try our best to find an rgrp that has at least ap->target blocks
1947 * available. After a couple of passes (loops == 2), the prospects of finding
1948 * such an rgrp diminish. At this stage, we return the first rgrp that has
1949 * atleast ap->min_target blocks available. Either way, we set ap->allowed to
1950 * the number of blocks available in the chosen rgrp.
1951 *
1952 * Returns: 0 on success,
1953 *          -ENOMEM if a suitable rgrp can't be found
1954 *          errno otherwise
1955 */
1956
1957int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
1958{
1959        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1960        struct gfs2_rgrpd *begin = NULL;
1961        struct gfs2_blkreserv *rs = &ip->i_res;
1962        int error = 0, rg_locked, flags = 0;
1963        u64 last_unlinked = NO_BLOCK;
1964        int loops = 0;
1965        u32 skip = 0;
1966
1967        if (sdp->sd_args.ar_rgrplvb)
1968                flags |= GL_SKIP;
1969        if (gfs2_assert_warn(sdp, ap->target))
1970                return -EINVAL;
1971        if (gfs2_rs_active(rs)) {
1972                begin = rs->rs_rbm.rgd;
1973        } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
1974                rs->rs_rbm.rgd = begin = ip->i_rgd;
1975        } else {
1976                check_and_update_goal(ip);
1977                rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
1978        }
1979        if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV))
1980                skip = gfs2_orlov_skip(ip);
1981        if (rs->rs_rbm.rgd == NULL)
1982                return -EBADSLT;
1983
1984        while (loops < 3) {
1985                rg_locked = 1;
1986
1987                if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
1988                        rg_locked = 0;
1989                        if (skip && skip--)
1990                                goto next_rgrp;
1991                        if (!gfs2_rs_active(rs)) {
1992                                if (loops == 0 &&
1993                                    !fast_to_acquire(rs->rs_rbm.rgd))
1994                                        goto next_rgrp;
1995                                if ((loops < 2) &&
1996                                    gfs2_rgrp_used_recently(rs, 1000) &&
1997                                    gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
1998                                        goto next_rgrp;
1999                        }
2000                        error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
2001                                                   LM_ST_EXCLUSIVE, flags,
2002                                                   &rs->rs_rgd_gh);
2003                        if (unlikely(error))
2004                                return error;
2005                        if (!gfs2_rs_active(rs) && (loops < 2) &&
2006                            gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
2007                                goto skip_rgrp;
2008                        if (sdp->sd_args.ar_rgrplvb) {
2009                                error = update_rgrp_lvb(rs->rs_rbm.rgd);
2010                                if (unlikely(error)) {
2011                                        gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2012                                        return error;
2013                                }
2014                        }
2015                }
2016
2017                /* Skip unuseable resource groups */
2018                if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
2019                                                 GFS2_RDF_ERROR)) ||
2020                    (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
2021                        goto skip_rgrp;
2022
2023                if (sdp->sd_args.ar_rgrplvb)
2024                        gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
2025
2026                /* Get a reservation if we don't already have one */
2027                if (!gfs2_rs_active(rs))
2028                        rg_mblk_search(rs->rs_rbm.rgd, ip, ap);
2029
2030                /* Skip rgrps when we can't get a reservation on first pass */
2031                if (!gfs2_rs_active(rs) && (loops < 1))
2032                        goto check_rgrp;
2033
2034                /* If rgrp has enough free space, use it */
2035                if (rs->rs_rbm.rgd->rd_free_clone >= ap->target ||
2036                    (loops == 2 && ap->min_target &&
2037                     rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) {
2038                        ip->i_rgd = rs->rs_rbm.rgd;
2039                        ap->allowed = ip->i_rgd->rd_free_clone;
2040                        return 0;
2041                }
2042check_rgrp:
2043                /* Check for unlinked inodes which can be reclaimed */
2044                if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
2045                        try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
2046                                        ip->i_no_addr);
2047skip_rgrp:
2048                /* Drop reservation, if we couldn't use reserved rgrp */
2049                if (gfs2_rs_active(rs))
2050                        gfs2_rs_deltree(rs);
2051
2052                /* Unlock rgrp if required */
2053                if (!rg_locked)
2054                        gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2055next_rgrp:
2056                /* Find the next rgrp, and continue looking */
2057                if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
2058                        continue;
2059                if (skip)
2060                        continue;
2061
2062                /* If we've scanned all the rgrps, but found no free blocks
2063                 * then this checks for some less likely conditions before
2064                 * trying again.
2065                 */
2066                loops++;
2067                /* Check that fs hasn't grown if writing to rindex */
2068                if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
2069                        error = gfs2_ri_update(ip);
2070                        if (error)
2071                                return error;
2072                }
2073                /* Flushing the log may release space */
2074                if (loops == 2)
2075                        gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
2076        }
2077
2078        return -ENOSPC;
2079}
2080
2081/**
2082 * gfs2_inplace_release - release an inplace reservation
2083 * @ip: the inode the reservation was taken out on
2084 *
2085 * Release a reservation made by gfs2_inplace_reserve().
2086 */
2087
2088void gfs2_inplace_release(struct gfs2_inode *ip)
2089{
2090        struct gfs2_blkreserv *rs = &ip->i_res;
2091
2092        if (gfs2_holder_initialized(&rs->rs_rgd_gh))
2093                gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2094}
2095
2096/**
2097 * gfs2_get_block_type - Check a block in a RG is of given type
2098 * @rgd: the resource group holding the block
2099 * @block: the block number
2100 *
2101 * Returns: The block type (GFS2_BLKST_*)
2102 */
2103
2104static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
2105{
2106        struct gfs2_rbm rbm = { .rgd = rgd, };
2107        int ret;
2108
2109        ret = gfs2_rbm_from_block(&rbm, block);
2110        WARN_ON_ONCE(ret != 0);
2111
2112        return gfs2_testbit(&rbm);
2113}
2114
2115
2116/**
2117 * gfs2_alloc_extent - allocate an extent from a given bitmap
2118 * @rbm: the resource group information
2119 * @dinode: TRUE if the first block we allocate is for a dinode
2120 * @n: The extent length (value/result)
2121 *
2122 * Add the bitmap buffer to the transaction.
2123 * Set the found bits to @new_state to change block's allocation state.
2124 */
2125static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
2126                             unsigned int *n)
2127{
2128        struct gfs2_rbm pos = { .rgd = rbm->rgd, };
2129        const unsigned int elen = *n;
2130        u64 block;
2131        int ret;
2132
2133        *n = 1;
2134        block = gfs2_rbm_to_block(rbm);
2135        gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh);
2136        gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2137        block++;
2138        while (*n < elen) {
2139                ret = gfs2_rbm_from_block(&pos, block);
2140                if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
2141                        break;
2142                gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh);
2143                gfs2_setbit(&pos, true, GFS2_BLKST_USED);
2144                (*n)++;
2145                block++;
2146        }
2147}
2148
2149/**
2150 * rgblk_free - Change alloc state of given block(s)
2151 * @sdp: the filesystem
2152 * @bstart: the start of a run of blocks to free
2153 * @blen: the length of the block run (all must lie within ONE RG!)
2154 * @new_state: GFS2_BLKST_XXX the after-allocation block state
2155 *
2156 * Returns:  Resource group containing the block(s)
2157 */
2158
2159static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
2160                                     u32 blen, unsigned char new_state)
2161{
2162        struct gfs2_rbm rbm;
2163        struct gfs2_bitmap *bi, *bi_prev = NULL;
2164
2165        rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
2166        if (!rbm.rgd) {
2167                if (gfs2_consist(sdp))
2168                        fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
2169                return NULL;
2170        }
2171
2172        gfs2_rbm_from_block(&rbm, bstart);
2173        while (blen--) {
2174                bi = rbm_bi(&rbm);
2175                if (bi != bi_prev) {
2176                        if (!bi->bi_clone) {
2177                                bi->bi_clone = kmalloc(bi->bi_bh->b_size,
2178                                                      GFP_NOFS | __GFP_NOFAIL);
2179                                memcpy(bi->bi_clone + bi->bi_offset,
2180                                       bi->bi_bh->b_data + bi->bi_offset,
2181                                       bi->bi_len);
2182                        }
2183                        gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh);
2184                        bi_prev = bi;
2185                }
2186                gfs2_setbit(&rbm, false, new_state);
2187                gfs2_rbm_incr(&rbm);
2188        }
2189
2190        return rbm.rgd;
2191}
2192
2193/**
2194 * gfs2_rgrp_dump - print out an rgrp
2195 * @seq: The iterator
2196 * @gl: The glock in question
2197 *
2198 */
2199
2200void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
2201{
2202        struct gfs2_rgrpd *rgd = gl->gl_object;
2203        struct gfs2_blkreserv *trs;
2204        const struct rb_node *n;
2205
2206        if (rgd == NULL)
2207                return;
2208        gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
2209                       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
2210                       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
2211                       rgd->rd_reserved, rgd->rd_extfail_pt);
2212        spin_lock(&rgd->rd_rsspin);
2213        for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
2214                trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
2215                dump_rs(seq, trs);
2216        }
2217        spin_unlock(&rgd->rd_rsspin);
2218}
2219
2220static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
2221{
2222        struct gfs2_sbd *sdp = rgd->rd_sbd;
2223        fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
2224                (unsigned long long)rgd->rd_addr);
2225        fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
2226        gfs2_rgrp_dump(NULL, rgd->rd_gl);
2227        rgd->rd_flags |= GFS2_RDF_ERROR;
2228}
2229
2230/**
2231 * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation
2232 * @ip: The inode we have just allocated blocks for
2233 * @rbm: The start of the allocated blocks
2234 * @len: The extent length
2235 *
2236 * Adjusts a reservation after an allocation has taken place. If the
2237 * reservation does not match the allocation, or if it is now empty
2238 * then it is removed.
2239 */
2240
2241static void gfs2_adjust_reservation(struct gfs2_inode *ip,
2242                                    const struct gfs2_rbm *rbm, unsigned len)
2243{
2244        struct gfs2_blkreserv *rs = &ip->i_res;
2245        struct gfs2_rgrpd *rgd = rbm->rgd;
2246        unsigned rlen;
2247        u64 block;
2248        int ret;
2249
2250        spin_lock(&rgd->rd_rsspin);
2251        if (gfs2_rs_active(rs)) {
2252                if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
2253                        block = gfs2_rbm_to_block(rbm);
2254                        ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
2255                        rlen = min(rs->rs_free, len);
2256                        rs->rs_free -= rlen;
2257                        rgd->rd_reserved -= rlen;
2258                        trace_gfs2_rs(rs, TRACE_RS_CLAIM);
2259                        if (rs->rs_free && !ret)
2260                                goto out;
2261                        /* We used up our block reservation, so we should
2262                           reserve more blocks next time. */
2263                        atomic_add(RGRP_RSRV_ADDBLKS, &rs->rs_sizehint);
2264                }
2265                __rs_deltree(rs);
2266        }
2267out:
2268        spin_unlock(&rgd->rd_rsspin);
2269}
2270
2271/**
2272 * gfs2_set_alloc_start - Set starting point for block allocation
2273 * @rbm: The rbm which will be set to the required location
2274 * @ip: The gfs2 inode
2275 * @dinode: Flag to say if allocation includes a new inode
2276 *
2277 * This sets the starting point from the reservation if one is active
2278 * otherwise it falls back to guessing a start point based on the
2279 * inode's goal block or the last allocation point in the rgrp.
2280 */
2281
2282static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
2283                                 const struct gfs2_inode *ip, bool dinode)
2284{
2285        u64 goal;
2286
2287        if (gfs2_rs_active(&ip->i_res)) {
2288                *rbm = ip->i_res.rs_rbm;
2289                return;
2290        }
2291
2292        if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal))
2293                goal = ip->i_goal;
2294        else
2295                goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
2296
2297        gfs2_rbm_from_block(rbm, goal);
2298}
2299
2300/**
2301 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
2302 * @ip: the inode to allocate the block for
2303 * @bn: Used to return the starting block number
2304 * @nblocks: requested number of blocks/extent length (value/result)
2305 * @dinode: 1 if we're allocating a dinode block, else 0
2306 * @generation: the generation number of the inode
2307 *
2308 * Returns: 0 or error
2309 */
2310
2311int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2312                      bool dinode, u64 *generation)
2313{
2314        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2315        struct buffer_head *dibh;
2316        struct gfs2_rbm rbm = { .rgd = ip->i_rgd, };
2317        unsigned int ndata;
2318        u64 block; /* block, within the file system scope */
2319        int error;
2320
2321        gfs2_set_alloc_start(&rbm, ip, dinode);
2322        error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false);
2323
2324        if (error == -ENOSPC) {
2325                gfs2_set_alloc_start(&rbm, ip, dinode);
2326                error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false);
2327        }
2328
2329        /* Since all blocks are reserved in advance, this shouldn't happen */
2330        if (error) {
2331                fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d fail_pt=%d\n",
2332                        (unsigned long long)ip->i_no_addr, error, *nblocks,
2333                        test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags),
2334                        rbm.rgd->rd_extfail_pt);
2335                goto rgrp_error;
2336        }
2337
2338        gfs2_alloc_extent(&rbm, dinode, nblocks);
2339        block = gfs2_rbm_to_block(&rbm);
2340        rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
2341        if (gfs2_rs_active(&ip->i_res))
2342                gfs2_adjust_reservation(ip, &rbm, *nblocks);
2343        ndata = *nblocks;
2344        if (dinode)
2345                ndata--;
2346
2347        if (!dinode) {
2348                ip->i_goal = block + ndata - 1;
2349                error = gfs2_meta_inode_buffer(ip, &dibh);
2350                if (error == 0) {
2351                        struct gfs2_dinode *di =
2352                                (struct gfs2_dinode *)dibh->b_data;
2353                        gfs2_trans_add_meta(ip->i_gl, dibh);
2354                        di->di_goal_meta = di->di_goal_data =
2355                                cpu_to_be64(ip->i_goal);
2356                        brelse(dibh);
2357                }
2358        }
2359        if (rbm.rgd->rd_free < *nblocks) {
2360                pr_warn("nblocks=%u\n", *nblocks);
2361                goto rgrp_error;
2362        }
2363
2364        rbm.rgd->rd_free -= *nblocks;
2365        if (dinode) {
2366                rbm.rgd->rd_dinodes++;
2367                *generation = rbm.rgd->rd_igeneration++;
2368                if (*generation == 0)
2369                        *generation = rbm.rgd->rd_igeneration++;
2370        }
2371
2372        gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
2373        gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
2374        gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data);
2375
2376        gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
2377        if (dinode)
2378                gfs2_trans_add_unrevoke(sdp, block, *nblocks);
2379
2380        gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
2381
2382        rbm.rgd->rd_free_clone -= *nblocks;
2383        trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
2384                               dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2385        *bn = block;
2386        return 0;
2387
2388rgrp_error:
2389        gfs2_rgrp_error(rbm.rgd);
2390        return -EIO;
2391}
2392
2393/**
2394 * __gfs2_free_blocks - free a contiguous run of block(s)
2395 * @ip: the inode these blocks are being freed from
2396 * @bstart: first block of a run of contiguous blocks
2397 * @blen: the length of the block run
2398 * @meta: 1 if the blocks represent metadata
2399 *
2400 */
2401
2402void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
2403{
2404        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2405        struct gfs2_rgrpd *rgd;
2406
2407        rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
2408        if (!rgd)
2409                return;
2410        trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
2411        rgd->rd_free += blen;
2412        rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
2413        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2414        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2415        gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2416
2417        /* Directories keep their data in the metadata address space */
2418        if (meta || ip->i_depth)
2419                gfs2_meta_wipe(ip, bstart, blen);
2420}
2421
2422/**
2423 * gfs2_free_meta - free a contiguous run of data block(s)
2424 * @ip: the inode these blocks are being freed from
2425 * @bstart: first block of a run of contiguous blocks
2426 * @blen: the length of the block run
2427 *
2428 */
2429
2430void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
2431{
2432        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2433
2434        __gfs2_free_blocks(ip, bstart, blen, 1);
2435        gfs2_statfs_change(sdp, 0, +blen, 0);
2436        gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
2437}
2438
2439void gfs2_unlink_di(struct inode *inode)
2440{
2441        struct gfs2_inode *ip = GFS2_I(inode);
2442        struct gfs2_sbd *sdp = GFS2_SB(inode);
2443        struct gfs2_rgrpd *rgd;
2444        u64 blkno = ip->i_no_addr;
2445
2446        rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
2447        if (!rgd)
2448                return;
2449        trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
2450        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2451        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2452        gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2453        update_rgrp_lvb_unlinked(rgd, 1);
2454}
2455
2456static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
2457{
2458        struct gfs2_sbd *sdp = rgd->rd_sbd;
2459        struct gfs2_rgrpd *tmp_rgd;
2460
2461        tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
2462        if (!tmp_rgd)
2463                return;
2464        gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
2465
2466        if (!rgd->rd_dinodes)
2467                gfs2_consist_rgrpd(rgd);
2468        rgd->rd_dinodes--;
2469        rgd->rd_free++;
2470
2471        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2472        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2473        gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2474        update_rgrp_lvb_unlinked(rgd, -1);
2475
2476        gfs2_statfs_change(sdp, 0, +1, -1);
2477}
2478
2479
2480void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
2481{
2482        gfs2_free_uninit_di(rgd, ip->i_no_addr);
2483        trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
2484        gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
2485        gfs2_meta_wipe(ip, ip->i_no_addr, 1);
2486}
2487
2488/**
2489 * gfs2_check_blk_type - Check the type of a block
2490 * @sdp: The superblock
2491 * @no_addr: The block number to check
2492 * @type: The block type we are looking for
2493 *
2494 * Returns: 0 if the block type matches the expected type
2495 *          -ESTALE if it doesn't match
2496 *          or -ve errno if something went wrong while checking
2497 */
2498
2499int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
2500{
2501        struct gfs2_rgrpd *rgd;
2502        struct gfs2_holder rgd_gh;
2503        int error = -EINVAL;
2504
2505        rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
2506        if (!rgd)
2507                goto fail;
2508
2509        error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
2510        if (error)
2511                goto fail;
2512
2513        if (gfs2_get_block_type(rgd, no_addr) != type)
2514                error = -ESTALE;
2515
2516        gfs2_glock_dq_uninit(&rgd_gh);
2517fail:
2518        return error;
2519}
2520
2521/**
2522 * gfs2_rlist_add - add a RG to a list of RGs
2523 * @ip: the inode
2524 * @rlist: the list of resource groups
2525 * @block: the block
2526 *
2527 * Figure out what RG a block belongs to and add that RG to the list
2528 *
2529 * FIXME: Don't use NOFAIL
2530 *
2531 */
2532
2533void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
2534                    u64 block)
2535{
2536        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2537        struct gfs2_rgrpd *rgd;
2538        struct gfs2_rgrpd **tmp;
2539        unsigned int new_space;
2540        unsigned int x;
2541
2542        if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
2543                return;
2544
2545        if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block))
2546                rgd = ip->i_rgd;
2547        else
2548                rgd = gfs2_blk2rgrpd(sdp, block, 1);
2549        if (!rgd) {
2550                fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block);
2551                return;
2552        }
2553        ip->i_rgd = rgd;
2554
2555        for (x = 0; x < rlist->rl_rgrps; x++)
2556                if (rlist->rl_rgd[x] == rgd)
2557                        return;
2558
2559        if (rlist->rl_rgrps == rlist->rl_space) {
2560                new_space = rlist->rl_space + 10;
2561
2562                tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
2563                              GFP_NOFS | __GFP_NOFAIL);
2564
2565                if (rlist->rl_rgd) {
2566                        memcpy(tmp, rlist->rl_rgd,
2567                               rlist->rl_space * sizeof(struct gfs2_rgrpd *));
2568                        kfree(rlist->rl_rgd);
2569                }
2570
2571                rlist->rl_space = new_space;
2572                rlist->rl_rgd = tmp;
2573        }
2574
2575        rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
2576}
2577
2578/**
2579 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
2580 *      and initialize an array of glock holders for them
2581 * @rlist: the list of resource groups
2582 * @state: the lock state to acquire the RG lock in
2583 *
2584 * FIXME: Don't use NOFAIL
2585 *
2586 */
2587
2588void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
2589{
2590        unsigned int x;
2591
2592        rlist->rl_ghs = kmalloc(rlist->rl_rgrps * sizeof(struct gfs2_holder),
2593                                GFP_NOFS | __GFP_NOFAIL);
2594        for (x = 0; x < rlist->rl_rgrps; x++)
2595                gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
2596                                state, 0,
2597                                &rlist->rl_ghs[x]);
2598}
2599
2600/**
2601 * gfs2_rlist_free - free a resource group list
2602 * @rlist: the list of resource groups
2603 *
2604 */
2605
2606void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
2607{
2608        unsigned int x;
2609
2610        kfree(rlist->rl_rgd);
2611
2612        if (rlist->rl_ghs) {
2613                for (x = 0; x < rlist->rl_rgrps; x++)
2614                        gfs2_holder_uninit(&rlist->rl_ghs[x]);
2615                kfree(rlist->rl_ghs);
2616                rlist->rl_ghs = NULL;
2617        }
2618}
2619
2620