linux/fs/gfs2/rgrp.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   3 * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
   4 *
   5 * This copyrighted material is made available to anyone wishing to use,
   6 * modify, copy, or redistribute it subject to the terms and conditions
   7 * of the GNU General Public License version 2.
   8 */
   9
  10#include <linux/slab.h>
  11#include <linux/spinlock.h>
  12#include <linux/completion.h>
  13#include <linux/buffer_head.h>
  14#include <linux/fs.h>
  15#include <linux/gfs2_ondisk.h>
  16#include <linux/prefetch.h>
  17#include <linux/blkdev.h>
  18#include <linux/rbtree.h>
  19#include <linux/random.h>
  20
  21#include "gfs2.h"
  22#include "incore.h"
  23#include "glock.h"
  24#include "glops.h"
  25#include "lops.h"
  26#include "meta_io.h"
  27#include "quota.h"
  28#include "rgrp.h"
  29#include "super.h"
  30#include "trans.h"
  31#include "util.h"
  32#include "log.h"
  33#include "inode.h"
  34#include "trace_gfs2.h"
  35
  36#define BFITNOENT ((u32)~0)
  37#define NO_BLOCK ((u64)~0)
  38
  39#if BITS_PER_LONG == 32
  40#define LBITMASK   (0x55555555UL)
  41#define LBITSKIP55 (0x55555555UL)
  42#define LBITSKIP00 (0x00000000UL)
  43#else
  44#define LBITMASK   (0x5555555555555555UL)
  45#define LBITSKIP55 (0x5555555555555555UL)
  46#define LBITSKIP00 (0x0000000000000000UL)
  47#endif
  48
  49/*
  50 * These routines are used by the resource group routines (rgrp.c)
  51 * to keep track of block allocation.  Each block is represented by two
  52 * bits.  So, each byte represents GFS2_NBBY (i.e. 4) blocks.
  53 *
  54 * 0 = Free
  55 * 1 = Used (not metadata)
  56 * 2 = Unlinked (still in use) inode
  57 * 3 = Used (metadata)
  58 */
  59
  60struct gfs2_extent {
  61        struct gfs2_rbm rbm;
  62        u32 len;
  63};
  64
  65static const char valid_change[16] = {
  66                /* current */
  67        /* n */ 0, 1, 1, 1,
  68        /* e */ 1, 0, 0, 0,
  69        /* w */ 0, 0, 0, 1,
  70                1, 0, 0, 0
  71};
  72
  73static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
  74                         const struct gfs2_inode *ip, bool nowrap,
  75                         const struct gfs2_alloc_parms *ap);
  76
  77
  78/**
  79 * gfs2_setbit - Set a bit in the bitmaps
  80 * @rbm: The position of the bit to set
  81 * @do_clone: Also set the clone bitmap, if it exists
  82 * @new_state: the new state of the block
  83 *
  84 */
  85
  86static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
  87                               unsigned char new_state)
  88{
  89        unsigned char *byte1, *byte2, *end, cur_state;
  90        unsigned int buflen = rbm->bi->bi_len;
  91        const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
  92
  93        byte1 = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY);
  94        end = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + buflen;
  95
  96        BUG_ON(byte1 >= end);
  97
  98        cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
  99
 100        if (unlikely(!valid_change[new_state * 4 + cur_state])) {
 101                printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, "
 102                       "new_state=%d\n", rbm->offset, cur_state, new_state);
 103                printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n",
 104                       (unsigned long long)rbm->rgd->rd_addr,
 105                       rbm->bi->bi_start);
 106                printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n",
 107                       rbm->bi->bi_offset, rbm->bi->bi_len);
 108                dump_stack();
 109                gfs2_consist_rgrpd(rbm->rgd);
 110                return;
 111        }
 112        *byte1 ^= (cur_state ^ new_state) << bit;
 113
 114        if (do_clone && rbm->bi->bi_clone) {
 115                byte2 = rbm->bi->bi_clone + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY);
 116                cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
 117                *byte2 ^= (cur_state ^ new_state) << bit;
 118        }
 119}
 120
 121/**
 122 * gfs2_testbit - test a bit in the bitmaps
 123 * @rbm: The bit to test
 124 *
 125 * Returns: The two bit block state of the requested bit
 126 */
 127
 128static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)
 129{
 130        const u8 *buffer = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset;
 131        const u8 *byte;
 132        unsigned int bit;
 133
 134        byte = buffer + (rbm->offset / GFS2_NBBY);
 135        bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
 136
 137        return (*byte >> bit) & GFS2_BIT_MASK;
 138}
 139
 140/**
 141 * gfs2_bit_search
 142 * @ptr: Pointer to bitmap data
 143 * @mask: Mask to use (normally 0x55555.... but adjusted for search start)
 144 * @state: The state we are searching for
 145 *
 146 * We xor the bitmap data with a patter which is the bitwise opposite
 147 * of what we are looking for, this gives rise to a pattern of ones
 148 * wherever there is a match. Since we have two bits per entry, we
 149 * take this pattern, shift it down by one place and then and it with
 150 * the original. All the even bit positions (0,2,4, etc) then represent
 151 * successful matches, so we mask with 0x55555..... to remove the unwanted
 152 * odd bit positions.
 153 *
 154 * This allows searching of a whole u64 at once (32 blocks) with a
 155 * single test (on 64 bit arches).
 156 */
 157
 158static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
 159{
 160        u64 tmp;
 161        static const u64 search[] = {
 162                [0] = 0xffffffffffffffffULL,
 163                [1] = 0xaaaaaaaaaaaaaaaaULL,
 164                [2] = 0x5555555555555555ULL,
 165                [3] = 0x0000000000000000ULL,
 166        };
 167        tmp = le64_to_cpu(*ptr) ^ search[state];
 168        tmp &= (tmp >> 1);
 169        tmp &= mask;
 170        return tmp;
 171}
 172
 173/**
 174 * rs_cmp - multi-block reservation range compare
 175 * @blk: absolute file system block number of the new reservation
 176 * @len: number of blocks in the new reservation
 177 * @rs: existing reservation to compare against
 178 *
 179 * returns: 1 if the block range is beyond the reach of the reservation
 180 *         -1 if the block range is before the start of the reservation
 181 *          0 if the block range overlaps with the reservation
 182 */
 183static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
 184{
 185        u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
 186
 187        if (blk >= startblk + rs->rs_free)
 188                return 1;
 189        if (blk + len - 1 < startblk)
 190                return -1;
 191        return 0;
 192}
 193
 194/**
 195 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
 196 *       a block in a given allocation state.
 197 * @buf: the buffer that holds the bitmaps
 198 * @len: the length (in bytes) of the buffer
 199 * @goal: start search at this block's bit-pair (within @buffer)
 200 * @state: GFS2_BLKST_XXX the state of the block we're looking for.
 201 *
 202 * Scope of @goal and returned block number is only within this bitmap buffer,
 203 * not entire rgrp or filesystem.  @buffer will be offset from the actual
 204 * beginning of a bitmap block buffer, skipping any header structures, but
 205 * headers are always a multiple of 64 bits long so that the buffer is
 206 * always aligned to a 64 bit boundary.
 207 *
 208 * The size of the buffer is in bytes, but is it assumed that it is
 209 * always ok to read a complete multiple of 64 bits at the end
 210 * of the block in case the end is no aligned to a natural boundary.
 211 *
 212 * Return: the block number (bitmap buffer scope) that was found
 213 */
 214
 215static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
 216                       u32 goal, u8 state)
 217{
 218        u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
 219        const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
 220        const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
 221        u64 tmp;
 222        u64 mask = 0x5555555555555555ULL;
 223        u32 bit;
 224
 225        /* Mask off bits we don't care about at the start of the search */
 226        mask <<= spoint;
 227        tmp = gfs2_bit_search(ptr, mask, state);
 228        ptr++;
 229        while(tmp == 0 && ptr < end) {
 230                tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
 231                ptr++;
 232        }
 233        /* Mask off any bits which are more than len bytes from the start */
 234        if (ptr == end && (len & (sizeof(u64) - 1)))
 235                tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1))));
 236        /* Didn't find anything, so return */
 237        if (tmp == 0)
 238                return BFITNOENT;
 239        ptr--;
 240        bit = __ffs64(tmp);
 241        bit /= 2;       /* two bits per entry in the bitmap */
 242        return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
 243}
 244
 245/**
 246 * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
 247 * @rbm: The rbm with rgd already set correctly
 248 * @block: The block number (filesystem relative)
 249 *
 250 * This sets the bi and offset members of an rbm based on a
 251 * resource group and a filesystem relative block number. The
 252 * resource group must be set in the rbm on entry, the bi and
 253 * offset members will be set by this function.
 254 *
 255 * Returns: 0 on success, or an error code
 256 */
 257
 258static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
 259{
 260        u64 rblock = block - rbm->rgd->rd_data0;
 261        u32 x;
 262
 263        if (WARN_ON_ONCE(rblock > UINT_MAX))
 264                return -EINVAL;
 265        if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
 266                return -E2BIG;
 267
 268        rbm->bi = rbm->rgd->rd_bits;
 269        rbm->offset = (u32)(rblock);
 270        /* Check if the block is within the first block */
 271        if (rbm->offset < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY)
 272                return 0;
 273
 274        /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */
 275        rbm->offset += (sizeof(struct gfs2_rgrp) -
 276                        sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
 277        x = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
 278        rbm->offset -= x * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
 279        rbm->bi += x;
 280        return 0;
 281}
 282
 283/**
 284 * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
 285 * @rbm: Position to search (value/result)
 286 * @n_unaligned: Number of unaligned blocks to check
 287 * @len: Decremented for each block found (terminate on zero)
 288 *
 289 * Returns: true if a non-free block is encountered
 290 */
 291
 292static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
 293{
 294        u64 block;
 295        u32 n;
 296        u8 res;
 297
 298        for (n = 0; n < n_unaligned; n++) {
 299                res = gfs2_testbit(rbm);
 300                if (res != GFS2_BLKST_FREE)
 301                        return true;
 302                (*len)--;
 303                if (*len == 0)
 304                        return true;
 305                block = gfs2_rbm_to_block(rbm);
 306                if (gfs2_rbm_from_block(rbm, block + 1))
 307                        return true;
 308        }
 309
 310        return false;
 311}
 312
 313/**
 314 * gfs2_free_extlen - Return extent length of free blocks
 315 * @rbm: Starting position
 316 * @len: Max length to check
 317 *
 318 * Starting at the block specified by the rbm, see how many free blocks
 319 * there are, not reading more than len blocks ahead. This can be done
 320 * using memchr_inv when the blocks are byte aligned, but has to be done
 321 * on a block by block basis in case of unaligned blocks. Also this
 322 * function can cope with bitmap boundaries (although it must stop on
 323 * a resource group boundary)
 324 *
 325 * Returns: Number of free blocks in the extent
 326 */
 327
 328static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
 329{
 330        struct gfs2_rbm rbm = *rrbm;
 331        u32 n_unaligned = rbm.offset & 3;
 332        u32 size = len;
 333        u32 bytes;
 334        u32 chunk_size;
 335        u8 *ptr, *start, *end;
 336        u64 block;
 337
 338        if (n_unaligned &&
 339            gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
 340                goto out;
 341
 342        n_unaligned = len & 3;
 343        /* Start is now byte aligned */
 344        while (len > 3) {
 345                start = rbm.bi->bi_bh->b_data;
 346                if (rbm.bi->bi_clone)
 347                        start = rbm.bi->bi_clone;
 348                end = start + rbm.bi->bi_bh->b_size;
 349                start += rbm.bi->bi_offset;
 350                BUG_ON(rbm.offset & 3);
 351                start += (rbm.offset / GFS2_NBBY);
 352                bytes = min_t(u32, len / GFS2_NBBY, (end - start));
 353                ptr = memchr_inv(start, 0, bytes);
 354                chunk_size = ((ptr == NULL) ? bytes : (ptr - start));
 355                chunk_size *= GFS2_NBBY;
 356                BUG_ON(len < chunk_size);
 357                len -= chunk_size;
 358                block = gfs2_rbm_to_block(&rbm);
 359                if (gfs2_rbm_from_block(&rbm, block + chunk_size)) {
 360                        n_unaligned = 0;
 361                        break;
 362                }
 363                if (ptr) {
 364                        n_unaligned = 3;
 365                        break;
 366                }
 367                n_unaligned = len & 3;
 368        }
 369
 370        /* Deal with any bits left over at the end */
 371        if (n_unaligned)
 372                gfs2_unaligned_extlen(&rbm, n_unaligned, &len);
 373out:
 374        return size - len;
 375}
 376
 377/**
 378 * gfs2_bitcount - count the number of bits in a certain state
 379 * @rgd: the resource group descriptor
 380 * @buffer: the buffer that holds the bitmaps
 381 * @buflen: the length (in bytes) of the buffer
 382 * @state: the state of the block we're looking for
 383 *
 384 * Returns: The number of bits
 385 */
 386
 387static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
 388                         unsigned int buflen, u8 state)
 389{
 390        const u8 *byte = buffer;
 391        const u8 *end = buffer + buflen;
 392        const u8 state1 = state << 2;
 393        const u8 state2 = state << 4;
 394        const u8 state3 = state << 6;
 395        u32 count = 0;
 396
 397        for (; byte < end; byte++) {
 398                if (((*byte) & 0x03) == state)
 399                        count++;
 400                if (((*byte) & 0x0C) == state1)
 401                        count++;
 402                if (((*byte) & 0x30) == state2)
 403                        count++;
 404                if (((*byte) & 0xC0) == state3)
 405                        count++;
 406        }
 407
 408        return count;
 409}
 410
 411/**
 412 * gfs2_rgrp_verify - Verify that a resource group is consistent
 413 * @rgd: the rgrp
 414 *
 415 */
 416
 417void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 418{
 419        struct gfs2_sbd *sdp = rgd->rd_sbd;
 420        struct gfs2_bitmap *bi = NULL;
 421        u32 length = rgd->rd_length;
 422        u32 count[4], tmp;
 423        int buf, x;
 424
 425        memset(count, 0, 4 * sizeof(u32));
 426
 427        /* Count # blocks in each of 4 possible allocation states */
 428        for (buf = 0; buf < length; buf++) {
 429                bi = rgd->rd_bits + buf;
 430                for (x = 0; x < 4; x++)
 431                        count[x] += gfs2_bitcount(rgd,
 432                                                  bi->bi_bh->b_data +
 433                                                  bi->bi_offset,
 434                                                  bi->bi_len, x);
 435        }
 436
 437        if (count[0] != rgd->rd_free) {
 438                if (gfs2_consist_rgrpd(rgd))
 439                        fs_err(sdp, "free data mismatch:  %u != %u\n",
 440                               count[0], rgd->rd_free);
 441                return;
 442        }
 443
 444        tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
 445        if (count[1] != tmp) {
 446                if (gfs2_consist_rgrpd(rgd))
 447                        fs_err(sdp, "used data mismatch:  %u != %u\n",
 448                               count[1], tmp);
 449                return;
 450        }
 451
 452        if (count[2] + count[3] != rgd->rd_dinodes) {
 453                if (gfs2_consist_rgrpd(rgd))
 454                        fs_err(sdp, "used metadata mismatch:  %u != %u\n",
 455                               count[2] + count[3], rgd->rd_dinodes);
 456                return;
 457        }
 458}
 459
 460/**
 461 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
 462 * @sdp: The GFS2 superblock
 463 * @blk: The data block number
 464 * @exact: True if this needs to be an exact match
 465 *
 466 * The @exact argument should be set to true by most callers. The exception
 467 * is when we need to match blocks which are not represented by the rgrp
 468 * bitmap, but which are part of the rgrp (i.e. padding blocks) which are
 469 * there for alignment purposes. Another way of looking at it is that @exact
 470 * matches only valid data/metadata blocks, but with @exact false, it will
 471 * match any block within the extent of the rgrp.
 472 *
 473 * Returns: The resource group, or NULL if not found
 474 */
 475
 476struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)
 477{
 478        struct rb_node *n, *next;
 479        struct gfs2_rgrpd *cur;
 480
 481        spin_lock(&sdp->sd_rindex_spin);
 482        n = sdp->sd_rindex_tree.rb_node;
 483        while (n) {
 484                cur = rb_entry(n, struct gfs2_rgrpd, rd_node);
 485                next = NULL;
 486                if (blk < cur->rd_addr)
 487                        next = n->rb_left;
 488                else if (blk >= cur->rd_data0 + cur->rd_data)
 489                        next = n->rb_right;
 490                if (next == NULL) {
 491                        spin_unlock(&sdp->sd_rindex_spin);
 492                        if (exact) {
 493                                if (blk < cur->rd_addr)
 494                                        return NULL;
 495                                if (blk >= cur->rd_data0 + cur->rd_data)
 496                                        return NULL;
 497                        }
 498                        return cur;
 499                }
 500                n = next;
 501        }
 502        spin_unlock(&sdp->sd_rindex_spin);
 503
 504        return NULL;
 505}
 506
 507/**
 508 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
 509 * @sdp: The GFS2 superblock
 510 *
 511 * Returns: The first rgrp in the filesystem
 512 */
 513
 514struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
 515{
 516        const struct rb_node *n;
 517        struct gfs2_rgrpd *rgd;
 518
 519        spin_lock(&sdp->sd_rindex_spin);
 520        n = rb_first(&sdp->sd_rindex_tree);
 521        rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 522        spin_unlock(&sdp->sd_rindex_spin);
 523
 524        return rgd;
 525}
 526
 527/**
 528 * gfs2_rgrpd_get_next - get the next RG
 529 * @rgd: the resource group descriptor
 530 *
 531 * Returns: The next rgrp
 532 */
 533
 534struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
 535{
 536        struct gfs2_sbd *sdp = rgd->rd_sbd;
 537        const struct rb_node *n;
 538
 539        spin_lock(&sdp->sd_rindex_spin);
 540        n = rb_next(&rgd->rd_node);
 541        if (n == NULL)
 542                n = rb_first(&sdp->sd_rindex_tree);
 543
 544        if (unlikely(&rgd->rd_node == n)) {
 545                spin_unlock(&sdp->sd_rindex_spin);
 546                return NULL;
 547        }
 548        rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 549        spin_unlock(&sdp->sd_rindex_spin);
 550        return rgd;
 551}
 552
 553void check_and_update_goal(struct gfs2_inode *ip)
 554{
 555        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 556        if (!ip->i_goal || gfs2_blk2rgrpd(sdp, ip->i_goal, 1) == NULL)
 557                ip->i_goal = ip->i_no_addr;
 558}
 559
 560void gfs2_free_clones(struct gfs2_rgrpd *rgd)
 561{
 562        int x;
 563
 564        for (x = 0; x < rgd->rd_length; x++) {
 565                struct gfs2_bitmap *bi = rgd->rd_bits + x;
 566                kfree(bi->bi_clone);
 567                bi->bi_clone = NULL;
 568        }
 569}
 570
 571/**
 572 * gfs2_rsqa_alloc - make sure we have a reservation assigned to the inode
 573 *                 plus a quota allocations data structure, if necessary
 574 * @ip: the inode for this reservation
 575 */
 576int gfs2_rsqa_alloc(struct gfs2_inode *ip)
 577{
 578        return gfs2_qa_alloc(ip);
 579}
 580
 581static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
 582{
 583        gfs2_print_dbg(seq, "  B: n:%llu s:%llu b:%u f:%u\n",
 584                       (unsigned long long)rs->rs_inum,
 585                       (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
 586                       rs->rs_rbm.offset, rs->rs_free);
 587}
 588
 589/**
 590 * __rs_deltree - remove a multi-block reservation from the rgd tree
 591 * @rs: The reservation to remove
 592 *
 593 */
 594static void __rs_deltree(struct gfs2_blkreserv *rs)
 595{
 596        struct gfs2_rgrpd *rgd;
 597
 598        if (!gfs2_rs_active(rs))
 599                return;
 600
 601        rgd = rs->rs_rbm.rgd;
 602        trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
 603        rb_erase(&rs->rs_node, &rgd->rd_rstree);
 604        RB_CLEAR_NODE(&rs->rs_node);
 605
 606        if (rs->rs_free) {
 607                /* return reserved blocks to the rgrp */
 608                BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
 609                rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
 610                /* The rgrp extent failure point is likely not to increase;
 611                   it will only do so if the freed blocks are somehow
 612                   contiguous with a span of free blocks that follows. Still,
 613                   it will force the number to be recalculated later. */
 614                rgd->rd_extfail_pt += rs->rs_free;
 615                rs->rs_free = 0;
 616                clear_bit(GBF_FULL, &rs->rs_rbm.bi->bi_flags);
 617                smp_mb__after_clear_bit();
 618        }
 619}
 620
 621/**
 622 * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
 623 * @rs: The reservation to remove
 624 *
 625 */
 626void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
 627{
 628        struct gfs2_rgrpd *rgd;
 629
 630        rgd = rs->rs_rbm.rgd;
 631        if (rgd) {
 632                spin_lock(&rgd->rd_rsspin);
 633                __rs_deltree(rs);
 634                BUG_ON(rs->rs_free);
 635                spin_unlock(&rgd->rd_rsspin);
 636        }
 637}
 638
 639/**
 640 * gfs2_rsqa_delete - delete a multi-block reservation and quota allocation
 641 * @ip: The inode for this reservation
 642 * @wcount: The inode's write count, or NULL
 643 *
 644 */
 645void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount)
 646{
 647        down_write(&ip->i_rw_mutex);
 648        if ((wcount == NULL) || (atomic_read(wcount) <= 1))
 649                gfs2_rs_deltree(&ip->i_res);
 650        up_write(&ip->i_rw_mutex);
 651        gfs2_qa_delete(ip, wcount);
 652}
 653
 654/**
 655 * return_all_reservations - return all reserved blocks back to the rgrp.
 656 * @rgd: the rgrp that needs its space back
 657 *
 658 * We previously reserved a bunch of blocks for allocation. Now we need to
 659 * give them back. This leave the reservation structures in tact, but removes
 660 * all of their corresponding "no-fly zones".
 661 */
 662static void return_all_reservations(struct gfs2_rgrpd *rgd)
 663{
 664        struct rb_node *n;
 665        struct gfs2_blkreserv *rs;
 666
 667        spin_lock(&rgd->rd_rsspin);
 668        while ((n = rb_first(&rgd->rd_rstree))) {
 669                rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
 670                __rs_deltree(rs);
 671        }
 672        spin_unlock(&rgd->rd_rsspin);
 673}
 674
 675void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 676{
 677        struct rb_node *n;
 678        struct gfs2_rgrpd *rgd;
 679        struct gfs2_glock *gl;
 680
 681        while ((n = rb_first(&sdp->sd_rindex_tree))) {
 682                rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 683                gl = rgd->rd_gl;
 684
 685                rb_erase(n, &sdp->sd_rindex_tree);
 686
 687                if (gl) {
 688                        glock_clear_object(gl, rgd);
 689                        gfs2_rgrp_brelse(rgd);
 690                        gfs2_glock_add_to_lru(gl);
 691                        gfs2_glock_put(gl);
 692                }
 693
 694                gfs2_free_clones(rgd);
 695                kfree(rgd->rd_bits);
 696                rgd->rd_bits = NULL;
 697                return_all_reservations(rgd);
 698                kmem_cache_free(gfs2_rgrpd_cachep, rgd);
 699        }
 700}
 701
 702static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
 703{
 704        printk(KERN_INFO "  ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
 705        printk(KERN_INFO "  ri_length = %u\n", rgd->rd_length);
 706        printk(KERN_INFO "  ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
 707        printk(KERN_INFO "  ri_data = %u\n", rgd->rd_data);
 708        printk(KERN_INFO "  ri_bitbytes = %u\n", rgd->rd_bitbytes);
 709}
 710
 711/**
 712 * gfs2_compute_bitstructs - Compute the bitmap sizes
 713 * @rgd: The resource group descriptor
 714 *
 715 * Calculates bitmap descriptors, one for each block that contains bitmap data
 716 *
 717 * Returns: errno
 718 */
 719
 720static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 721{
 722        struct gfs2_sbd *sdp = rgd->rd_sbd;
 723        struct gfs2_bitmap *bi;
 724        u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
 725        u32 bytes_left, bytes;
 726        int x;
 727
 728        if (!length)
 729                return -EINVAL;
 730
 731        rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS);
 732        if (!rgd->rd_bits)
 733                return -ENOMEM;
 734
 735        bytes_left = rgd->rd_bitbytes;
 736
 737        for (x = 0; x < length; x++) {
 738                bi = rgd->rd_bits + x;
 739
 740                bi->bi_flags = 0;
 741                /* small rgrp; bitmap stored completely in header block */
 742                if (length == 1) {
 743                        bytes = bytes_left;
 744                        bi->bi_offset = sizeof(struct gfs2_rgrp);
 745                        bi->bi_start = 0;
 746                        bi->bi_len = bytes;
 747                /* header block */
 748                } else if (x == 0) {
 749                        bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
 750                        bi->bi_offset = sizeof(struct gfs2_rgrp);
 751                        bi->bi_start = 0;
 752                        bi->bi_len = bytes;
 753                /* last block */
 754                } else if (x + 1 == length) {
 755                        bytes = bytes_left;
 756                        bi->bi_offset = sizeof(struct gfs2_meta_header);
 757                        bi->bi_start = rgd->rd_bitbytes - bytes_left;
 758                        bi->bi_len = bytes;
 759                /* other blocks */
 760                } else {
 761                        bytes = sdp->sd_sb.sb_bsize -
 762                                sizeof(struct gfs2_meta_header);
 763                        bi->bi_offset = sizeof(struct gfs2_meta_header);
 764                        bi->bi_start = rgd->rd_bitbytes - bytes_left;
 765                        bi->bi_len = bytes;
 766                }
 767
 768                bytes_left -= bytes;
 769        }
 770
 771        if (bytes_left) {
 772                gfs2_consist_rgrpd(rgd);
 773                return -EIO;
 774        }
 775        bi = rgd->rd_bits + (length - 1);
 776        if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
 777                if (gfs2_consist_rgrpd(rgd)) {
 778                        gfs2_rindex_print(rgd);
 779                        fs_err(sdp, "start=%u len=%u offset=%u\n",
 780                               bi->bi_start, bi->bi_len, bi->bi_offset);
 781                }
 782                return -EIO;
 783        }
 784
 785        return 0;
 786}
 787
 788/**
 789 * gfs2_ri_total - Total up the file system space, according to the rindex.
 790 * @sdp: the filesystem
 791 *
 792 */
 793u64 gfs2_ri_total(struct gfs2_sbd *sdp)
 794{
 795        u64 total_data = 0;     
 796        struct inode *inode = sdp->sd_rindex;
 797        struct gfs2_inode *ip = GFS2_I(inode);
 798        char buf[sizeof(struct gfs2_rindex)];
 799        int error, rgrps;
 800
 801        for (rgrps = 0;; rgrps++) {
 802                loff_t pos = rgrps * sizeof(struct gfs2_rindex);
 803
 804                if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
 805                        break;
 806                error = gfs2_internal_read(ip, buf, &pos,
 807                                           sizeof(struct gfs2_rindex));
 808                if (error != sizeof(struct gfs2_rindex))
 809                        break;
 810                total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
 811        }
 812        return total_data;
 813}
 814
 815static int rgd_insert(struct gfs2_rgrpd *rgd)
 816{
 817        struct gfs2_sbd *sdp = rgd->rd_sbd;
 818        struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
 819
 820        /* Figure out where to put new node */
 821        while (*newn) {
 822                struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd,
 823                                                  rd_node);
 824
 825                parent = *newn;
 826                if (rgd->rd_addr < cur->rd_addr)
 827                        newn = &((*newn)->rb_left);
 828                else if (rgd->rd_addr > cur->rd_addr)
 829                        newn = &((*newn)->rb_right);
 830                else
 831                        return -EEXIST;
 832        }
 833
 834        rb_link_node(&rgd->rd_node, parent, newn);
 835        rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
 836        sdp->sd_rgrps++;
 837        return 0;
 838}
 839
 840/**
 841 * read_rindex_entry - Pull in a new resource index entry from the disk
 842 * @ip: Pointer to the rindex inode
 843 *
 844 * Returns: 0 on success, > 0 on EOF, error code otherwise
 845 */
 846
 847static int read_rindex_entry(struct gfs2_inode *ip)
 848{
 849        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 850        loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
 851        struct gfs2_rindex buf;
 852        int error;
 853        struct gfs2_rgrpd *rgd;
 854
 855        if (pos >= i_size_read(&ip->i_inode))
 856                return 1;
 857
 858        error = gfs2_internal_read(ip, (char *)&buf, &pos,
 859                                   sizeof(struct gfs2_rindex));
 860
 861        if (error != sizeof(struct gfs2_rindex))
 862                return (error == 0) ? 1 : error;
 863
 864        rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);
 865        error = -ENOMEM;
 866        if (!rgd)
 867                return error;
 868
 869        rgd->rd_sbd = sdp;
 870        rgd->rd_addr = be64_to_cpu(buf.ri_addr);
 871        rgd->rd_length = be32_to_cpu(buf.ri_length);
 872        rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
 873        rgd->rd_data = be32_to_cpu(buf.ri_data);
 874        rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
 875        spin_lock_init(&rgd->rd_rsspin);
 876
 877        error = compute_bitstructs(rgd);
 878        if (error)
 879                goto fail;
 880
 881        error = gfs2_glock_get(sdp, rgd->rd_addr,
 882                               &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
 883        if (error)
 884                goto fail;
 885
 886        rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
 887        rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED);
 888        if (rgd->rd_data > sdp->sd_max_rg_data)
 889                sdp->sd_max_rg_data = rgd->rd_data;
 890        spin_lock(&sdp->sd_rindex_spin);
 891        error = rgd_insert(rgd);
 892        spin_unlock(&sdp->sd_rindex_spin);
 893        if (!error) {
 894                glock_set_object(rgd->rd_gl, rgd);
 895                return 0;
 896        }
 897
 898        error = 0; /* someone else read in the rgrp; free it and ignore it */
 899        gfs2_glock_put(rgd->rd_gl);
 900
 901fail:
 902        kfree(rgd->rd_bits);
 903        rgd->rd_bits = NULL;
 904        kmem_cache_free(gfs2_rgrpd_cachep, rgd);
 905        return error;
 906}
 907
 908/**
 909 * set_rgrp_preferences - Run all the rgrps, selecting some we prefer to use
 910 * @sdp: the GFS2 superblock
 911 *
 912 * The purpose of this function is to select a subset of the resource groups
 913 * and mark them as PREFERRED. We do it in such a way that each node prefers
 914 * to use a unique set of rgrps to minimize glock contention.
 915 */
 916static void set_rgrp_preferences(struct gfs2_sbd *sdp)
 917{
 918        struct gfs2_rgrpd *rgd, *first;
 919        int i;
 920
 921        /* Skip an initial number of rgrps, based on this node's journal ID.
 922           That should start each node out on its own set. */
 923        rgd = gfs2_rgrpd_get_first(sdp);
 924        for (i = 0; i < sdp->sd_lockstruct.ls_jid; i++)
 925                rgd = gfs2_rgrpd_get_next(rgd);
 926        first = rgd;
 927
 928        do {
 929                rgd->rd_flags |= GFS2_RDF_PREFERRED;
 930                for (i = 0; i < sdp->sd_journals; i++) {
 931                        rgd = gfs2_rgrpd_get_next(rgd);
 932                        if (!rgd || rgd == first)
 933                                break;
 934                }
 935        } while (rgd && rgd != first);
 936}
 937
 938/**
 939 * gfs2_ri_update - Pull in a new resource index from the disk
 940 * @ip: pointer to the rindex inode
 941 *
 942 * Returns: 0 on successful update, error code otherwise
 943 */
 944
 945static int gfs2_ri_update(struct gfs2_inode *ip)
 946{
 947        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 948        int error;
 949
 950        do {
 951                error = read_rindex_entry(ip);
 952        } while (error == 0);
 953
 954        if (error < 0)
 955                return error;
 956
 957        set_rgrp_preferences(sdp);
 958
 959        sdp->sd_rindex_uptodate = 1;
 960        return 0;
 961}
 962
 963/**
 964 * gfs2_rindex_update - Update the rindex if required
 965 * @sdp: The GFS2 superblock
 966 *
 967 * We grab a lock on the rindex inode to make sure that it doesn't
 968 * change whilst we are performing an operation. We keep this lock
 969 * for quite long periods of time compared to other locks. This
 970 * doesn't matter, since it is shared and it is very, very rarely
 971 * accessed in the exclusive mode (i.e. only when expanding the filesystem).
 972 *
 973 * This makes sure that we're using the latest copy of the resource index
 974 * special file, which might have been updated if someone expanded the
 975 * filesystem (via gfs2_grow utility), which adds new resource groups.
 976 *
 977 * Returns: 0 on succeess, error code otherwise
 978 */
 979
 980int gfs2_rindex_update(struct gfs2_sbd *sdp)
 981{
 982        struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
 983        struct gfs2_glock *gl = ip->i_gl;
 984        struct gfs2_holder ri_gh;
 985        int error = 0;
 986        int unlock_required = 0;
 987
 988        /* Read new copy from disk if we don't have the latest */
 989        if (!sdp->sd_rindex_uptodate) {
 990                if (!gfs2_glock_is_locked_by_me(gl)) {
 991                        error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
 992                        if (error)
 993                                return error;
 994                        unlock_required = 1;
 995                }
 996                if (!sdp->sd_rindex_uptodate)
 997                        error = gfs2_ri_update(ip);
 998                if (unlock_required)
 999                        gfs2_glock_dq_uninit(&ri_gh);
1000        }
1001
1002        return error;
1003}
1004
1005static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
1006{
1007        const struct gfs2_rgrp *str = buf;
1008        u32 rg_flags;
1009
1010        rg_flags = be32_to_cpu(str->rg_flags);
1011        rg_flags &= ~GFS2_RDF_MASK;
1012        rgd->rd_flags &= GFS2_RDF_MASK;
1013        rgd->rd_flags |= rg_flags;
1014        rgd->rd_free = be32_to_cpu(str->rg_free);
1015        rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
1016        rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
1017}
1018
1019static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
1020{
1021        const struct gfs2_rgrp *str = buf;
1022
1023        rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
1024        rgl->rl_flags = str->rg_flags;
1025        rgl->rl_free = str->rg_free;
1026        rgl->rl_dinodes = str->rg_dinodes;
1027        rgl->rl_igeneration = str->rg_igeneration;
1028        rgl->__pad = 0UL;
1029}
1030
1031static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
1032{
1033        struct gfs2_rgrp *str = buf;
1034
1035        str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
1036        str->rg_free = cpu_to_be32(rgd->rd_free);
1037        str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
1038        str->__pad = cpu_to_be32(0);
1039        str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
1040        memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
1041        gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, buf);
1042}
1043
1044static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
1045{
1046        struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1047        struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
1048        int valid = 1;
1049
1050        if (rgl->rl_flags != str->rg_flags) {
1051                printk(KERN_WARNING "GFS2: rgd: %llu lvb flag mismatch %u/%u",
1052                       (unsigned long long)rgd->rd_addr,
1053                       be32_to_cpu(rgl->rl_flags), be32_to_cpu(str->rg_flags));
1054                valid = 0;
1055        }
1056        if (rgl->rl_free != str->rg_free) {
1057                printk(KERN_WARNING "GFS2: rgd: %llu lvb free mismatch %u/%u",
1058                       (unsigned long long)rgd->rd_addr,
1059                       be32_to_cpu(rgl->rl_free), be32_to_cpu(str->rg_free));
1060                valid = 0;
1061        }
1062        if (rgl->rl_dinodes != str->rg_dinodes) {
1063                printk(KERN_WARNING "GFS2: rgd: %llu lvb dinode mismatch %u/%u",
1064                       (unsigned long long)rgd->rd_addr,
1065                       be32_to_cpu(rgl->rl_dinodes),
1066                       be32_to_cpu(str->rg_dinodes));
1067                valid = 0;
1068        }
1069        if (rgl->rl_igeneration != str->rg_igeneration) {
1070                printk(KERN_WARNING "GFS2: rgd: %llu lvb igen mismatch "
1071                       "%llu/%llu", (unsigned long long)rgd->rd_addr,
1072                       (unsigned long long)be64_to_cpu(rgl->rl_igeneration),
1073                       (unsigned long long)be64_to_cpu(str->rg_igeneration));
1074                valid = 0;
1075        }
1076        return valid;
1077}
1078
1079static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change)
1080{
1081        struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1082        u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change;
1083        rgl->rl_unlinked = cpu_to_be32(unlinked);
1084}
1085
1086static u32 count_unlinked(struct gfs2_rgrpd *rgd)
1087{
1088        struct gfs2_bitmap *bi;
1089        const u32 length = rgd->rd_length;
1090        const u8 *buffer = NULL;
1091        u32 i, goal, count = 0;
1092
1093        for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) {
1094                goal = 0;
1095                buffer = bi->bi_bh->b_data + bi->bi_offset;
1096                WARN_ON(!buffer_uptodate(bi->bi_bh));
1097                while (goal < bi->bi_len * GFS2_NBBY) {
1098                        goal = gfs2_bitfit(buffer, bi->bi_len, goal,
1099                                           GFS2_BLKST_UNLINKED);
1100                        if (goal == BFITNOENT)
1101                                break;
1102                        count++;
1103                        goal++;
1104                }
1105        }
1106
1107        return count;
1108}
1109
1110
1111/**
1112 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
1113 * @rgd: the struct gfs2_rgrpd describing the RG to read in
1114 *
1115 * Read in all of a Resource Group's header and bitmap blocks.
1116 * Caller must eventually call gfs2_rgrp_brelse() to free the bitmaps.
1117 *
1118 * Returns: errno
1119 */
1120
1121int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
1122{
1123        struct gfs2_sbd *sdp = rgd->rd_sbd;
1124        struct gfs2_glock *gl = rgd->rd_gl;
1125        unsigned int length = rgd->rd_length;
1126        struct gfs2_bitmap *bi;
1127        unsigned int x, y;
1128        int error;
1129
1130        if (rgd->rd_bits[0].bi_bh != NULL)
1131                return 0;
1132
1133        for (x = 0; x < length; x++) {
1134                bi = rgd->rd_bits + x;
1135                error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, 0, &bi->bi_bh);
1136                if (error)
1137                        goto fail;
1138        }
1139
1140        for (y = length; y--;) {
1141                bi = rgd->rd_bits + y;
1142                error = gfs2_meta_wait(sdp, bi->bi_bh);
1143                if (error)
1144                        goto fail;
1145                if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB :
1146                                              GFS2_METATYPE_RG)) {
1147                        error = -EIO;
1148                        goto fail;
1149                }
1150        }
1151
1152        if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
1153                for (x = 0; x < length; x++)
1154                        clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
1155                gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
1156                rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1157                rgd->rd_free_clone = rgd->rd_free;
1158                /* max out the rgrp allocation failure point */
1159                rgd->rd_extfail_pt = rgd->rd_free;
1160        }
1161        if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
1162                rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
1163                gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
1164                                     rgd->rd_bits[0].bi_bh->b_data);
1165        }
1166        else if (sdp->sd_args.ar_rgrplvb) {
1167                if (!gfs2_rgrp_lvb_valid(rgd)){
1168                        gfs2_consist_rgrpd(rgd);
1169                        error = -EIO;
1170                        goto fail;
1171                }
1172                if (rgd->rd_rgl->rl_unlinked == 0)
1173                        rgd->rd_flags &= ~GFS2_RDF_CHECK;
1174        }
1175        return 0;
1176
1177fail:
1178        while (x--) {
1179                bi = rgd->rd_bits + x;
1180                brelse(bi->bi_bh);
1181                bi->bi_bh = NULL;
1182                gfs2_assert_warn(sdp, !bi->bi_clone);
1183        }
1184
1185        return error;
1186}
1187
1188int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
1189{
1190        u32 rl_flags;
1191
1192        if (rgd->rd_flags & GFS2_RDF_UPTODATE)
1193                return 0;
1194
1195        if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
1196                return gfs2_rgrp_bh_get(rgd);
1197
1198        rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
1199        rl_flags &= ~GFS2_RDF_MASK;
1200        rgd->rd_flags &= GFS2_RDF_MASK;
1201        rgd->rd_flags |= (rl_flags | GFS2_RDF_CHECK);
1202        if (rgd->rd_rgl->rl_unlinked == 0)
1203                rgd->rd_flags &= ~GFS2_RDF_CHECK;
1204        rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
1205        rgd->rd_free_clone = rgd->rd_free;
1206        rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
1207        rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
1208        return 0;
1209}
1210
1211int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
1212{
1213        struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1214        struct gfs2_sbd *sdp = rgd->rd_sbd;
1215
1216        if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb)
1217                return 0;
1218        return gfs2_rgrp_bh_get((struct gfs2_rgrpd *)gh->gh_gl->gl_object);
1219}
1220
1221/**
1222 * gfs2_rgrp_brelse - Release RG bitmaps read in with gfs2_rgrp_bh_get()
1223 * @rgd: The resource group
1224 *
1225 */
1226
1227void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd)
1228{
1229        int x, length = rgd->rd_length;
1230
1231        for (x = 0; x < length; x++) {
1232                struct gfs2_bitmap *bi = rgd->rd_bits + x;
1233                if (bi->bi_bh) {
1234                        brelse(bi->bi_bh);
1235                        bi->bi_bh = NULL;
1236                }
1237        }
1238
1239}
1240
1241/**
1242 * gfs2_rgrp_go_unlock - Unlock a rgrp glock
1243 * @gh: The glock holder for the resource group
1244 *
1245 */
1246
1247void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
1248{
1249        struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1250        int demote_requested = test_bit(GLF_DEMOTE, &gh->gh_gl->gl_flags) |
1251                test_bit(GLF_PENDING_DEMOTE, &gh->gh_gl->gl_flags);
1252
1253        if (rgd && demote_requested)
1254                gfs2_rgrp_brelse(rgd);
1255}
1256
1257int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
1258                             struct buffer_head *bh,
1259                             const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
1260{
1261        struct super_block *sb = sdp->sd_vfs;
1262        u64 blk;
1263        sector_t start = 0;
1264        sector_t nr_blks = 0;
1265        int rv;
1266        unsigned int x;
1267        u32 trimmed = 0;
1268        u8 diff;
1269
1270        for (x = 0; x < bi->bi_len; x++) {
1271                const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
1272                clone += bi->bi_offset;
1273                clone += x;
1274                if (bh) {
1275                        const u8 *orig = bh->b_data + bi->bi_offset + x;
1276                        diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
1277                } else {
1278                        diff = ~(*clone | (*clone >> 1));
1279                }
1280                diff &= 0x55;
1281                if (diff == 0)
1282                        continue;
1283                blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
1284                while(diff) {
1285                        if (diff & 1) {
1286                                if (nr_blks == 0)
1287                                        goto start_new_extent;
1288                                if ((start + nr_blks) != blk) {
1289                                        if (nr_blks >= minlen) {
1290                                                rv = sb_issue_discard(sb,
1291                                                        start, nr_blks,
1292                                                        GFP_NOFS, 0);
1293                                                if (rv)
1294                                                        goto fail;
1295                                                trimmed += nr_blks;
1296                                        }
1297                                        nr_blks = 0;
1298start_new_extent:
1299                                        start = blk;
1300                                }
1301                                nr_blks++;
1302                        }
1303                        diff >>= 2;
1304                        blk++;
1305                }
1306        }
1307        if (nr_blks >= minlen) {
1308                rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0);
1309                if (rv)
1310                        goto fail;
1311                trimmed += nr_blks;
1312        }
1313        if (ptrimmed)
1314                *ptrimmed = trimmed;
1315        return 0;
1316
1317fail:
1318        if (sdp->sd_args.ar_discard)
1319                fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
1320        sdp->sd_args.ar_discard = 0;
1321        return -EIO;
1322}
1323
1324/**
1325 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem
1326 * @filp: Any file on the filesystem
1327 * @argp: Pointer to the arguments (also used to pass result)
1328 *
1329 * Returns: 0 on success, otherwise error code
1330 */
1331
1332int gfs2_fitrim(struct file *filp, void __user *argp)
1333{
1334        struct inode *inode = file_inode(filp);
1335        struct gfs2_sbd *sdp = GFS2_SB(inode);
1336        struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
1337        struct buffer_head *bh;
1338        struct gfs2_rgrpd *rgd;
1339        struct gfs2_rgrpd *rgd_end;
1340        struct gfs2_holder gh;
1341        struct fstrim_range r;
1342        int ret = 0;
1343        u64 amt;
1344        u64 trimmed = 0;
1345        u64 start, end, minlen;
1346        unsigned int x;
1347        unsigned bs_shift = sdp->sd_sb.sb_bsize_shift;
1348
1349        if (!capable(CAP_SYS_ADMIN))
1350                return -EPERM;
1351
1352        if (!blk_queue_discard(q))
1353                return -EOPNOTSUPP;
1354
1355        if (copy_from_user(&r, argp, sizeof(r)))
1356                return -EFAULT;
1357
1358        ret = gfs2_rindex_update(sdp);
1359        if (ret)
1360                return ret;
1361
1362        start = r.start >> bs_shift;
1363        end = start + (r.len >> bs_shift);
1364        minlen = max_t(u64, r.minlen,
1365                       q->limits.discard_granularity) >> bs_shift;
1366
1367        rgd = gfs2_blk2rgrpd(sdp, start, 0);
1368        rgd_end = gfs2_blk2rgrpd(sdp, end - 1, 0);
1369
1370        if (end <= start ||
1371            minlen > sdp->sd_max_rg_data ||
1372            start > rgd_end->rd_data0 + rgd_end->rd_data)
1373                return -EINVAL;
1374
1375        while (1) {
1376
1377                ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
1378                if (ret)
1379                        goto out;
1380
1381                if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) {
1382                        /* Trim each bitmap in the rgrp */
1383                        for (x = 0; x < rgd->rd_length; x++) {
1384                                struct gfs2_bitmap *bi = rgd->rd_bits + x;
1385                                ret = gfs2_rgrp_send_discards(sdp,
1386                                                rgd->rd_data0, NULL, bi, minlen,
1387                                                &amt);
1388                                if (ret) {
1389                                        gfs2_glock_dq_uninit(&gh);
1390                                        goto out;
1391                                }
1392                                trimmed += amt;
1393                        }
1394
1395                        /* Mark rgrp as having been trimmed */
1396                        ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
1397                        if (ret == 0) {
1398                                bh = rgd->rd_bits[0].bi_bh;
1399                                rgd->rd_flags |= GFS2_RGF_TRIMMED;
1400                                gfs2_trans_add_meta(rgd->rd_gl, bh);
1401                                gfs2_rgrp_out(rgd, bh->b_data);
1402                                gfs2_trans_end(sdp);
1403                        }
1404                }
1405                gfs2_glock_dq_uninit(&gh);
1406
1407                if (rgd == rgd_end)
1408                        break;
1409
1410                rgd = gfs2_rgrpd_get_next(rgd);
1411        }
1412
1413out:
1414        r.len = trimmed << 9;
1415        if (copy_to_user(argp, &r, sizeof(r)))
1416                return -EFAULT;
1417
1418        return ret;
1419}
1420
1421/**
1422 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
1423 * @ip: the inode structure
1424 *
1425 */
1426static void rs_insert(struct gfs2_inode *ip)
1427{
1428        struct rb_node **newn, *parent = NULL;
1429        int rc;
1430        struct gfs2_blkreserv *rs = &ip->i_res;
1431        struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
1432        u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
1433
1434        BUG_ON(gfs2_rs_active(rs));
1435
1436        spin_lock(&rgd->rd_rsspin);
1437        newn = &rgd->rd_rstree.rb_node;
1438        while (*newn) {
1439                struct gfs2_blkreserv *cur =
1440                        rb_entry(*newn, struct gfs2_blkreserv, rs_node);
1441
1442                parent = *newn;
1443                rc = rs_cmp(fsblock, rs->rs_free, cur);
1444                if (rc > 0)
1445                        newn = &((*newn)->rb_right);
1446                else if (rc < 0)
1447                        newn = &((*newn)->rb_left);
1448                else {
1449                        spin_unlock(&rgd->rd_rsspin);
1450                        WARN_ON(1);
1451                        return;
1452                }
1453        }
1454
1455        rb_link_node(&rs->rs_node, parent, newn);
1456        rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
1457
1458        /* Do our rgrp accounting for the reservation */
1459        rgd->rd_reserved += rs->rs_free; /* blocks reserved */
1460        spin_unlock(&rgd->rd_rsspin);
1461        trace_gfs2_rs(rs, TRACE_RS_INSERT);
1462}
1463
1464/**
1465 * rg_mblk_search - find a group of multiple free blocks to form a reservation
1466 * @rgd: the resource group descriptor
1467 * @ip: pointer to the inode for which we're reserving blocks
1468 * @ap: the allocation parameters
1469 *
1470 */
1471
1472static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
1473                           const struct gfs2_alloc_parms *ap)
1474{
1475        struct gfs2_rbm rbm = { .rgd = rgd, };
1476        u64 goal;
1477        struct gfs2_blkreserv *rs = &ip->i_res;
1478        u32 extlen;
1479        u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;
1480        int ret;
1481        struct inode *inode = &ip->i_inode;
1482
1483        if (S_ISDIR(inode->i_mode))
1484                extlen = 1;
1485        else {
1486                extlen = max_t(u32, atomic_read(&rs->rs_sizehint), ap->target);
1487                extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
1488        }
1489        if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
1490                return;
1491
1492        /* Find bitmap block that contains bits for goal block */
1493        if (rgrp_contains_block(rgd, ip->i_goal))
1494                goal = ip->i_goal;
1495        else
1496                goal = rgd->rd_last_alloc + rgd->rd_data0;
1497
1498        if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
1499                return;
1500
1501        ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true, ap);
1502        if (ret == 0) {
1503                rs->rs_rbm = rbm;
1504                rs->rs_free = extlen;
1505                rs->rs_inum = ip->i_no_addr;
1506                rs_insert(ip);
1507        } else {
1508                if (goal == rgd->rd_last_alloc + rgd->rd_data0)
1509                        rgd->rd_last_alloc = 0;
1510        }
1511}
1512
1513/**
1514 * gfs2_next_unreserved_block - Return next block that is not reserved
1515 * @rgd: The resource group
1516 * @block: The starting block
1517 * @length: The required length
1518 * @ip: Ignore any reservations for this inode
1519 *
1520 * If the block does not appear in any reservation, then return the
1521 * block number unchanged. If it does appear in the reservation, then
1522 * keep looking through the tree of reservations in order to find the
1523 * first block number which is not reserved.
1524 */
1525
1526static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
1527                                      u32 length,
1528                                      const struct gfs2_inode *ip)
1529{
1530        struct gfs2_blkreserv *rs;
1531        struct rb_node *n;
1532        int rc;
1533
1534        spin_lock(&rgd->rd_rsspin);
1535        n = rgd->rd_rstree.rb_node;
1536        while (n) {
1537                rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1538                rc = rs_cmp(block, length, rs);
1539                if (rc < 0)
1540                        n = n->rb_left;
1541                else if (rc > 0)
1542                        n = n->rb_right;
1543                else
1544                        break;
1545        }
1546
1547        if (n) {
1548                while ((rs_cmp(block, length, rs) == 0) && (&ip->i_res != rs)) {
1549                        block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
1550                        n = n->rb_right;
1551                        if (n == NULL)
1552                                break;
1553                        rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1554                }
1555        }
1556
1557        spin_unlock(&rgd->rd_rsspin);
1558        return block;
1559}
1560
1561/**
1562 * gfs2_reservation_check_and_update - Check for reservations during block alloc
1563 * @rbm: The current position in the resource group
1564 * @ip: The inode for which we are searching for blocks
1565 * @minext: The minimum extent length
1566 * @maxext: A pointer to the maximum extent structure
1567 *
1568 * This checks the current position in the rgrp to see whether there is
1569 * a reservation covering this block. If not then this function is a
1570 * no-op. If there is, then the position is moved to the end of the
1571 * contiguous reservation(s) so that we are pointing at the first
1572 * non-reserved block.
1573 *
1574 * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error
1575 */
1576
1577static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
1578                                             const struct gfs2_inode *ip,
1579                                             u32 minext,
1580                                             struct gfs2_extent *maxext)
1581{
1582        u64 block = gfs2_rbm_to_block(rbm);
1583        u32 extlen = 1;
1584        u64 nblock;
1585        int ret;
1586
1587        /*
1588         * If we have a minimum extent length, then skip over any extent
1589         * which is less than the min extent length in size.
1590         */
1591        if (minext) {
1592                extlen = gfs2_free_extlen(rbm, minext);
1593                if (extlen <= maxext->len)
1594                        goto fail;
1595        }
1596
1597        /*
1598         * Check the extent which has been found against the reservations
1599         * and skip if parts of it are already reserved
1600         */
1601        nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
1602        if (nblock == block) {
1603                if (!minext || extlen >= minext)
1604                        return 0;
1605
1606                if (extlen > maxext->len) {
1607                        maxext->len = extlen;
1608                        maxext->rbm = *rbm;
1609                }
1610fail:
1611                nblock = block + extlen;
1612        }
1613        ret = gfs2_rbm_from_block(rbm, nblock);
1614        if (ret < 0)
1615                return ret;
1616        return 1;
1617}
1618
1619/**
1620 * gfs2_rbm_find - Look for blocks of a particular state
1621 * @rbm: Value/result starting position and final position
1622 * @state: The state which we want to find
1623 * @minext: Pointer to the requested extent length (NULL for a single block)
1624 *          This is updated to be the actual reservation size.
1625 * @ip: If set, check for reservations
1626 * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
1627 *          around until we've reached the starting point.
1628 * @ap: the allocation parameters
1629 *
1630 * Side effects:
1631 * - If looking for free blocks, we set GBF_FULL on each bitmap which
1632 *   has no free blocks in it.
1633 * - If looking for free blocks, we set rd_extfail_pt on each rgrp which
1634 *   has come up short on a free block search.
1635 *
1636 * Returns: 0 on success, -ENOSPC if there is no block of the requested state
1637 */
1638
1639static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
1640                         const struct gfs2_inode *ip, bool nowrap,
1641                         const struct gfs2_alloc_parms *ap)
1642{
1643        struct buffer_head *bh;
1644        struct gfs2_bitmap *initial_bi;
1645        u32 initial_offset;
1646        struct gfs2_bitmap *first_bi = rbm->bi;
1647        u32 first_offset = rbm->offset;
1648        u32 offset;
1649        u8 *buffer;
1650        int index;
1651        int n = 0;
1652        int iters = rbm->rgd->rd_length;
1653        int ret;
1654        struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, };
1655
1656        /* If we are not starting at the beginning of a bitmap, then we
1657         * need to add one to the bitmap count to ensure that we search
1658         * the starting bitmap twice.
1659         */
1660        if (rbm->offset != 0)
1661                iters++;
1662
1663        while(1) {
1664                if (test_bit(GBF_FULL, &rbm->bi->bi_flags) &&
1665                    (state == GFS2_BLKST_FREE))
1666                        goto next_bitmap;
1667
1668                bh = rbm->bi->bi_bh;
1669                buffer = bh->b_data + rbm->bi->bi_offset;
1670                WARN_ON(!buffer_uptodate(bh));
1671                if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone)
1672                        buffer = rbm->bi->bi_clone + rbm->bi->bi_offset;
1673                initial_offset = rbm->offset;
1674                offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state);
1675                if (offset == BFITNOENT)
1676                        goto bitmap_full;
1677                rbm->offset = offset;
1678                if (ip == NULL)
1679                        return 0;
1680
1681                initial_bi = rbm->bi;
1682                ret = gfs2_reservation_check_and_update(rbm, ip,
1683                                                        minext ? *minext : 0,
1684                                                        &maxext);
1685                if (ret == 0)
1686                        return 0;
1687                if (ret > 0) {
1688                        n += (rbm->bi - initial_bi);
1689                        goto next_iter;
1690                }
1691                if (ret == -E2BIG) {
1692                        index = 0;
1693                        rbm->offset = 0;
1694                        n += (rbm->bi - initial_bi);
1695                        goto res_covered_end_of_rgrp;
1696                }
1697                return ret;
1698
1699bitmap_full:    /* Mark bitmap as full and fall through */
1700                if ((state == GFS2_BLKST_FREE) && initial_offset == 0)
1701                        set_bit(GBF_FULL, &rbm->bi->bi_flags);
1702
1703next_bitmap:    /* Find next bitmap in the rgrp */
1704                rbm->offset = 0;
1705                index = rbm->bi - rbm->rgd->rd_bits;
1706                index++;
1707                if (index == rbm->rgd->rd_length)
1708                        index = 0;
1709res_covered_end_of_rgrp:
1710                rbm->bi = &rbm->rgd->rd_bits[index];
1711                if ((index == 0) && nowrap)
1712                        break;
1713                n++;
1714next_iter:
1715                if (n >= iters)
1716                        break;
1717        }
1718
1719        if (minext == NULL || state != GFS2_BLKST_FREE)
1720                return -ENOSPC;
1721
1722        /* If the extent was too small, and it's smaller than the smallest
1723           to have failed before, remember for future reference that it's
1724           useless to search this rgrp again for this amount or more. */
1725        if ((first_offset == 0) && (first_bi == rbm->rgd->rd_bits) &&
1726            (*minext < rbm->rgd->rd_extfail_pt))
1727                rbm->rgd->rd_extfail_pt = *minext;
1728
1729        /* If the maximum extent we found is big enough to fulfill the
1730           minimum requirements, use it anyway. */
1731        if (maxext.len) {
1732                *rbm = maxext.rbm;
1733                *minext = maxext.len;
1734                return 0;
1735        }
1736
1737        return -ENOSPC;
1738}
1739
1740/**
1741 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
1742 * @rgd: The rgrp
1743 * @last_unlinked: block address of the last dinode we unlinked
1744 * @skip: block address we should explicitly not unlink
1745 *
1746 * Returns: 0 if no error
1747 *          The inode, if one has been found, in inode.
1748 */
1749
1750static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
1751{
1752        u64 block;
1753        struct gfs2_sbd *sdp = rgd->rd_sbd;
1754        struct gfs2_glock *gl;
1755        struct gfs2_inode *ip;
1756        int error;
1757        int found = 0;
1758        struct gfs2_rbm rbm = { .rgd = rgd, .bi = rgd->rd_bits, .offset = 0 };
1759
1760        while (1) {
1761                down_write(&sdp->sd_log_flush_lock);
1762                error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
1763                                      true, NULL);
1764                up_write(&sdp->sd_log_flush_lock);
1765                if (error == -ENOSPC)
1766                        break;
1767                if (WARN_ON_ONCE(error))
1768                        break;
1769
1770                block = gfs2_rbm_to_block(&rbm);
1771                if (gfs2_rbm_from_block(&rbm, block + 1))
1772                        break;
1773                if (*last_unlinked != NO_BLOCK && block <= *last_unlinked)
1774                        continue;
1775                if (block == skip)
1776                        continue;
1777                *last_unlinked = block;
1778
1779                error = gfs2_glock_get(sdp, block, &gfs2_iopen_glops, CREATE, &gl);
1780                if (error)
1781                        continue;
1782
1783                /* If the inode is already in cache, we can ignore it here
1784                 * because the existing inode disposal code will deal with
1785                 * it when all refs have gone away. Accessing gl_object like
1786                 * this is not safe in general. Here it is ok because we do
1787                 * not dereference the pointer, and we only need an approx
1788                 * answer to whether it is NULL or not.
1789                 */
1790                ip = gl->gl_object;
1791
1792                if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
1793                        gfs2_glock_put(gl);
1794                else
1795                        found++;
1796
1797                /* Limit reclaim to sensible number of tasks */
1798                if (found > NR_CPUS)
1799                        return;
1800        }
1801
1802        rgd->rd_flags &= ~GFS2_RDF_CHECK;
1803        return;
1804}
1805
1806/**
1807 * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
1808 * @rgd: The rgrp in question
1809 * @loops: An indication of how picky we can be (0=very, 1=less so)
1810 *
1811 * This function uses the recently added glock statistics in order to
1812 * figure out whether a parciular resource group is suffering from
1813 * contention from multiple nodes. This is done purely on the basis
1814 * of timings, since this is the only data we have to work with and
1815 * our aim here is to reject a resource group which is highly contended
1816 * but (very important) not to do this too often in order to ensure that
1817 * we do not land up introducing fragmentation by changing resource
1818 * groups when not actually required.
1819 *
1820 * The calculation is fairly simple, we want to know whether the SRTTB
1821 * (i.e. smoothed round trip time for blocking operations) to acquire
1822 * the lock for this rgrp's glock is significantly greater than the
1823 * time taken for resource groups on average. We introduce a margin in
1824 * the form of the variable @var which is computed as the sum of the two
1825 * respective variences, and multiplied by a factor depending on @loops
1826 * and whether we have a lot of data to base the decision on. This is
1827 * then tested against the square difference of the means in order to
1828 * decide whether the result is statistically significant or not.
1829 *
1830 * Returns: A boolean verdict on the congestion status
1831 */
1832
1833static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
1834{
1835        const struct gfs2_glock *gl = rgd->rd_gl;
1836        const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
1837        struct gfs2_lkstats *st;
1838        s64 r_dcount, l_dcount;
1839        s64 l_srttb, a_srttb = 0;
1840        s64 srttb_diff;
1841        s64 sqr_diff;
1842        s64 var;
1843        int cpu, nonzero = 0;
1844
1845        preempt_disable();
1846        for_each_present_cpu(cpu) {
1847                st = &per_cpu_ptr(sdp->sd_lkstats, cpu)->lkstats[LM_TYPE_RGRP];
1848                if (st->stats[GFS2_LKS_SRTTB]) {
1849                        a_srttb += st->stats[GFS2_LKS_SRTTB];
1850                        nonzero++;
1851                }
1852        }
1853        st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
1854        if (nonzero)
1855                do_div(a_srttb, nonzero);
1856        r_dcount = st->stats[GFS2_LKS_DCOUNT];
1857        var = st->stats[GFS2_LKS_SRTTVARB] +
1858              gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
1859        preempt_enable();
1860
1861        l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
1862        l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
1863
1864        if ((l_dcount < 1) || (r_dcount < 1) || (a_srttb == 0))
1865                return false;
1866
1867        srttb_diff = a_srttb - l_srttb;
1868        sqr_diff = srttb_diff * srttb_diff;
1869
1870        var *= 2;
1871        if (l_dcount < 8 || r_dcount < 8)
1872                var *= 2;
1873        if (loops == 1)
1874                var *= 2;
1875
1876        return ((srttb_diff < 0) && (sqr_diff > var));
1877}
1878
1879/**
1880 * gfs2_rgrp_used_recently
1881 * @rs: The block reservation with the rgrp to test
1882 * @msecs: The time limit in milliseconds
1883 *
1884 * Returns: True if the rgrp glock has been used within the time limit
1885 */
1886static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
1887                                    u64 msecs)
1888{
1889        u64 tdiff;
1890
1891        tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
1892                            rs->rs_rbm.rgd->rd_gl->gl_dstamp));
1893
1894        return tdiff > (msecs * 1000 * 1000);
1895}
1896
1897static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
1898{
1899        const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1900        u32 skip;
1901
1902        get_random_bytes(&skip, sizeof(skip));
1903        return skip % sdp->sd_rgrps;
1904}
1905
1906static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
1907{
1908        struct gfs2_rgrpd *rgd = *pos;
1909        struct gfs2_sbd *sdp = rgd->rd_sbd;
1910
1911        rgd = gfs2_rgrpd_get_next(rgd);
1912        if (rgd == NULL)
1913                rgd = gfs2_rgrpd_get_first(sdp);
1914        *pos = rgd;
1915        if (rgd != begin) /* If we didn't wrap */
1916                return true;
1917        return false;
1918}
1919
1920/**
1921 * fast_to_acquire - determine if a resource group will be fast to acquire
1922 *
1923 * If this is one of our preferred rgrps, it should be quicker to acquire,
1924 * because we tried to set ourselves up as dlm lock master.
1925 */
1926static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
1927{
1928        struct gfs2_glock *gl = rgd->rd_gl;
1929
1930        if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) &&
1931            !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
1932            !test_bit(GLF_DEMOTE, &gl->gl_flags))
1933                return 1;
1934        if (rgd->rd_flags & GFS2_RDF_PREFERRED)
1935                return 1;
1936        return 0;
1937}
1938
1939/**
1940 * gfs2_inplace_reserve - Reserve space in the filesystem
1941 * @ip: the inode to reserve space for
1942 * @ap: the allocation parameters
1943 *
1944 * We try our best to find an rgrp that has at least ap->target blocks
1945 * available. After a couple of passes (loops == 2), the prospects of finding
1946 * such an rgrp diminish. At this stage, we return the first rgrp that has
1947 * atleast ap->min_target blocks available. Either way, we set ap->allowed to
1948 * the number of blocks available in the chosen rgrp.
1949 *
1950 * Returns: 0 on success,
1951 *          -ENOMEM if a suitable rgrp can't be found
1952 *          errno otherwise
1953 */
1954
1955int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
1956{
1957        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1958        struct gfs2_rgrpd *begin = NULL;
1959        struct gfs2_blkreserv *rs = &ip->i_res;
1960        int error = 0, rg_locked, flags = 0;
1961        u64 last_unlinked = NO_BLOCK;
1962        int loops = 0;
1963        u32 skip = 0;
1964
1965        if (sdp->sd_args.ar_rgrplvb)
1966                flags |= GL_SKIP;
1967        if (gfs2_assert_warn(sdp, ap->target))
1968                return -EINVAL;
1969        if (gfs2_rs_active(rs)) {
1970                begin = rs->rs_rbm.rgd;
1971        } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
1972                rs->rs_rbm.rgd = begin = ip->i_rgd;
1973        } else {
1974                check_and_update_goal(ip);
1975                rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
1976        }
1977        if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV))
1978                skip = gfs2_orlov_skip(ip);
1979        if (rs->rs_rbm.rgd == NULL)
1980                return -EBADSLT;
1981
1982        while (loops < 3) {
1983                rg_locked = 1;
1984
1985                if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
1986                        rg_locked = 0;
1987                        if (skip && skip--)
1988                                goto next_rgrp;
1989                        if (!gfs2_rs_active(rs)) {
1990                                if (loops == 0 &&
1991                                    !fast_to_acquire(rs->rs_rbm.rgd))
1992                                        goto next_rgrp;
1993                                if ((loops < 2) &&
1994                                    gfs2_rgrp_used_recently(rs, 1000) &&
1995                                    gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
1996                                        goto next_rgrp;
1997                        }
1998                        error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
1999                                                   LM_ST_EXCLUSIVE, flags,
2000                                                   &rs->rs_rgd_gh);
2001                        if (unlikely(error))
2002                                return error;
2003                        if (!gfs2_rs_active(rs) && (loops < 2) &&
2004                            gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
2005                                goto skip_rgrp;
2006                        if (sdp->sd_args.ar_rgrplvb) {
2007                                error = update_rgrp_lvb(rs->rs_rbm.rgd);
2008                                if (unlikely(error)) {
2009                                        gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2010                                        return error;
2011                                }
2012                        }
2013                }
2014
2015                /* Skip unuseable resource groups */
2016                if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
2017                                                 GFS2_RDF_ERROR)) ||
2018                    (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
2019                        goto skip_rgrp;
2020
2021                if (sdp->sd_args.ar_rgrplvb)
2022                        gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
2023
2024                /* Get a reservation if we don't already have one */
2025                if (!gfs2_rs_active(rs))
2026                        rg_mblk_search(rs->rs_rbm.rgd, ip, ap);
2027
2028                /* Skip rgrps when we can't get a reservation on first pass */
2029                if (!gfs2_rs_active(rs) && (loops < 1))
2030                        goto check_rgrp;
2031
2032                /* If rgrp has enough free space, use it */
2033                if (rs->rs_rbm.rgd->rd_free_clone >= ap->target ||
2034                        (loops == 2 && ap->min_target &&
2035                         rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) {
2036                        ip->i_rgd = rs->rs_rbm.rgd;
2037                        ap->allowed = ip->i_rgd->rd_free_clone;
2038                        return 0;
2039                }
2040
2041check_rgrp:
2042                /* Check for unlinked inodes which can be reclaimed */
2043                if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
2044                        try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
2045                                        ip->i_no_addr);
2046skip_rgrp:
2047                /* Drop reservation, if we couldn't use reserved rgrp */
2048                if (gfs2_rs_active(rs))
2049                        gfs2_rs_deltree(rs);
2050
2051                /* Unlock rgrp if required */
2052                if (!rg_locked)
2053                        gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2054next_rgrp:
2055                /* Find the next rgrp, and continue looking */
2056                if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
2057                        continue;
2058                if (skip)
2059                        continue;
2060
2061                /* If we've scanned all the rgrps, but found no free blocks
2062                 * then this checks for some less likely conditions before
2063                 * trying again.
2064                 */
2065                loops++;
2066                /* Check that fs hasn't grown if writing to rindex */
2067                if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
2068                        error = gfs2_ri_update(ip);
2069                        if (error)
2070                                return error;
2071                }
2072                /* Flushing the log may release space */
2073                if (loops == 2)
2074                        gfs2_log_flush(sdp, NULL);
2075        }
2076
2077        return -ENOSPC;
2078}
2079
2080/**
2081 * gfs2_inplace_release - release an inplace reservation
2082 * @ip: the inode the reservation was taken out on
2083 *
2084 * Release a reservation made by gfs2_inplace_reserve().
2085 */
2086
2087void gfs2_inplace_release(struct gfs2_inode *ip)
2088{
2089        struct gfs2_blkreserv *rs = &ip->i_res;
2090
2091        if (gfs2_holder_initialized(&rs->rs_rgd_gh))
2092                gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2093}
2094
2095/**
2096 * gfs2_get_block_type - Check a block in a RG is of given type
2097 * @rgd: the resource group holding the block
2098 * @block: the block number
2099 *
2100 * Returns: The block type (GFS2_BLKST_*)
2101 */
2102
2103static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
2104{
2105        struct gfs2_rbm rbm = { .rgd = rgd, };
2106        int ret;
2107
2108        ret = gfs2_rbm_from_block(&rbm, block);
2109        WARN_ON_ONCE(ret != 0);
2110
2111        return gfs2_testbit(&rbm);
2112}
2113
2114
2115/**
2116 * gfs2_alloc_extent - allocate an extent from a given bitmap
2117 * @rbm: the resource group information
2118 * @dinode: TRUE if the first block we allocate is for a dinode
2119 * @n: The extent length (value/result)
2120 *
2121 * Add the bitmap buffer to the transaction.
2122 * Set the found bits to @new_state to change block's allocation state.
2123 */
2124static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
2125                             unsigned int *n)
2126{
2127        struct gfs2_rbm pos = { .rgd = rbm->rgd, };
2128        const unsigned int elen = *n;
2129        u64 block;
2130        int ret;
2131
2132        *n = 1;
2133        block = gfs2_rbm_to_block(rbm);
2134        gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm->bi->bi_bh);
2135        gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2136        block++;
2137        while (*n < elen) {
2138                ret = gfs2_rbm_from_block(&pos, block);
2139                if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
2140                        break;
2141                gfs2_trans_add_meta(pos.rgd->rd_gl, pos.bi->bi_bh);
2142                gfs2_setbit(&pos, true, GFS2_BLKST_USED);
2143                (*n)++;
2144                block++;
2145        }
2146}
2147
2148/**
2149 * rgblk_free - Change alloc state of given block(s)
2150 * @sdp: the filesystem
2151 * @bstart: the start of a run of blocks to free
2152 * @blen: the length of the block run (all must lie within ONE RG!)
2153 * @new_state: GFS2_BLKST_XXX the after-allocation block state
2154 *
2155 * Returns:  Resource group containing the block(s)
2156 */
2157
2158static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
2159                                     u32 blen, unsigned char new_state)
2160{
2161        struct gfs2_rbm rbm;
2162
2163        rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
2164        if (!rbm.rgd) {
2165                if (gfs2_consist(sdp))
2166                        fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
2167                return NULL;
2168        }
2169
2170        while (blen--) {
2171                gfs2_rbm_from_block(&rbm, bstart);
2172                bstart++;
2173                if (!rbm.bi->bi_clone) {
2174                        rbm.bi->bi_clone = kmalloc(rbm.bi->bi_bh->b_size,
2175                                                   GFP_NOFS | __GFP_NOFAIL);
2176                        memcpy(rbm.bi->bi_clone + rbm.bi->bi_offset,
2177                               rbm.bi->bi_bh->b_data + rbm.bi->bi_offset,
2178                               rbm.bi->bi_len);
2179                }
2180                gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.bi->bi_bh);
2181                gfs2_setbit(&rbm, false, new_state);
2182        }
2183
2184        return rbm.rgd;
2185}
2186
2187/**
2188 * gfs2_rgrp_dump - print out an rgrp
2189 * @seq: The iterator
2190 * @gl: The glock in question
2191 *
2192 */
2193
2194int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
2195{
2196        struct gfs2_rgrpd *rgd = gl->gl_object;
2197        struct gfs2_blkreserv *trs;
2198        const struct rb_node *n;
2199
2200        if (rgd == NULL)
2201                return 0;
2202        gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
2203                       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
2204                       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
2205                       rgd->rd_reserved, rgd->rd_extfail_pt);
2206        if (rgd->rd_sbd->sd_args.ar_rgrplvb) {
2207                struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
2208
2209                gfs2_print_dbg(seq, "  L: f:%02x b:%u i:%u\n",
2210                               be32_to_cpu(rgl->rl_flags),
2211                               be32_to_cpu(rgl->rl_free),
2212                               be32_to_cpu(rgl->rl_dinodes));
2213        }
2214        spin_lock(&rgd->rd_rsspin);
2215        for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
2216                trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
2217                dump_rs(seq, trs);
2218        }
2219        spin_unlock(&rgd->rd_rsspin);
2220        return 0;
2221}
2222
2223static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
2224{
2225        struct gfs2_sbd *sdp = rgd->rd_sbd;
2226        fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
2227                (unsigned long long)rgd->rd_addr);
2228        fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
2229        gfs2_rgrp_dump(NULL, rgd->rd_gl);
2230        rgd->rd_flags |= GFS2_RDF_ERROR;
2231}
2232
2233/**
2234 * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation
2235 * @ip: The inode we have just allocated blocks for
2236 * @rbm: The start of the allocated blocks
2237 * @len: The extent length
2238 *
2239 * Adjusts a reservation after an allocation has taken place. If the
2240 * reservation does not match the allocation, or if it is now empty
2241 * then it is removed.
2242 */
2243
2244static void gfs2_adjust_reservation(struct gfs2_inode *ip,
2245                                    const struct gfs2_rbm *rbm, unsigned len)
2246{
2247        struct gfs2_blkreserv *rs = &ip->i_res;
2248        struct gfs2_rgrpd *rgd = rbm->rgd;
2249        unsigned rlen;
2250        u64 block;
2251        int ret;
2252
2253        spin_lock(&rgd->rd_rsspin);
2254        if (gfs2_rs_active(rs)) {
2255                if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
2256                        block = gfs2_rbm_to_block(rbm);
2257                        ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
2258                        rlen = min(rs->rs_free, len);
2259                        rs->rs_free -= rlen;
2260                        rgd->rd_reserved -= rlen;
2261                        trace_gfs2_rs(rs, TRACE_RS_CLAIM);
2262                        if (rs->rs_free && !ret)
2263                                goto out;
2264                        /* We used up our block reservation, so we should
2265                           reserve more blocks next time. */
2266                        atomic_add(RGRP_RSRV_ADDBLKS, &rs->rs_sizehint);
2267                }
2268                __rs_deltree(rs);
2269        }
2270out:
2271        spin_unlock(&rgd->rd_rsspin);
2272}
2273
2274/**
2275 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
2276 * @ip: the inode to allocate the block for
2277 * @bn: Used to return the starting block number
2278 * @nblocks: requested number of blocks/extent length (value/result)
2279 * @dinode: 1 if we're allocating a dinode block, else 0
2280 * @generation: the generation number of the inode
2281 *
2282 * Returns: 0 or error
2283 */
2284
2285int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2286                      bool dinode, u64 *generation)
2287{
2288        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2289        struct buffer_head *dibh;
2290        struct gfs2_rbm rbm = { .rgd = ip->i_rgd, };
2291        unsigned int ndata;
2292        u64 goal;
2293        u64 block; /* block, within the file system scope */
2294        int error;
2295
2296        if (gfs2_rs_active(&ip->i_res))
2297                goal = gfs2_rbm_to_block(&ip->i_res.rs_rbm);
2298        else if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal))
2299                goal = ip->i_goal;
2300        else
2301                goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0;
2302
2303        gfs2_rbm_from_block(&rbm, goal);
2304        error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false, NULL);
2305
2306        if (error == -ENOSPC) {
2307                gfs2_rbm_from_block(&rbm, goal);
2308                error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false,
2309                                      NULL);
2310        }
2311
2312        /* Since all blocks are reserved in advance, this shouldn't happen */
2313        if (error) {
2314                fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d, fail_pt=%d\n",
2315                        (unsigned long long)ip->i_no_addr, error, *nblocks,
2316                        test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags),
2317                        rbm.rgd->rd_extfail_pt);
2318                goto rgrp_error;
2319        }
2320
2321        gfs2_alloc_extent(&rbm, dinode, nblocks);
2322        block = gfs2_rbm_to_block(&rbm);
2323        rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
2324        if (gfs2_rs_active(&ip->i_res))
2325                gfs2_adjust_reservation(ip, &rbm, *nblocks);
2326        ndata = *nblocks;
2327        if (dinode)
2328                ndata--;
2329
2330        if (!dinode) {
2331                ip->i_goal = block + ndata - 1;
2332                error = gfs2_meta_inode_buffer(ip, &dibh);
2333                if (error == 0) {
2334                        struct gfs2_dinode *di =
2335                                (struct gfs2_dinode *)dibh->b_data;
2336                        gfs2_trans_add_meta(ip->i_gl, dibh);
2337                        di->di_goal_meta = di->di_goal_data =
2338                                cpu_to_be64(ip->i_goal);
2339                        brelse(dibh);
2340                }
2341        }
2342        if (rbm.rgd->rd_free < *nblocks) {
2343                printk(KERN_WARNING "nblocks=%u\n", *nblocks);
2344                goto rgrp_error;
2345        }
2346
2347        rbm.rgd->rd_free -= *nblocks;
2348        if (dinode) {
2349                rbm.rgd->rd_dinodes++;
2350                *generation = rbm.rgd->rd_igeneration++;
2351                if (*generation == 0)
2352                        *generation = rbm.rgd->rd_igeneration++;
2353        }
2354
2355        gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
2356        gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
2357
2358        gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
2359        if (dinode)
2360                gfs2_trans_add_unrevoke(sdp, block, 1);
2361
2362        gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
2363
2364        rbm.rgd->rd_free_clone -= *nblocks;
2365        trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
2366                               dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2367        *bn = block;
2368        return 0;
2369
2370rgrp_error:
2371        gfs2_rgrp_error(rbm.rgd);
2372        return -EIO;
2373}
2374
2375/**
2376 * __gfs2_free_blocks - free a contiguous run of block(s)
2377 * @ip: the inode these blocks are being freed from
2378 * @bstart: first block of a run of contiguous blocks
2379 * @blen: the length of the block run
2380 * @meta: 1 if the blocks represent metadata
2381 *
2382 */
2383
2384void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
2385{
2386        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2387        struct gfs2_rgrpd *rgd;
2388
2389        rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
2390        if (!rgd)
2391                return;
2392        trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
2393        rgd->rd_free += blen;
2394        rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
2395        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2396        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2397
2398        /* Directories keep their data in the metadata address space */
2399        if (meta || ip->i_depth)
2400                gfs2_meta_wipe(ip, bstart, blen);
2401}
2402
2403/**
2404 * gfs2_free_meta - free a contiguous run of data block(s)
2405 * @ip: the inode these blocks are being freed from
2406 * @bstart: first block of a run of contiguous blocks
2407 * @blen: the length of the block run
2408 *
2409 */
2410
2411void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
2412{
2413        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2414
2415        __gfs2_free_blocks(ip, bstart, blen, 1);
2416        gfs2_statfs_change(sdp, 0, +blen, 0);
2417        gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
2418}
2419
2420void gfs2_unlink_di(struct inode *inode)
2421{
2422        struct gfs2_inode *ip = GFS2_I(inode);
2423        struct gfs2_sbd *sdp = GFS2_SB(inode);
2424        struct gfs2_rgrpd *rgd;
2425        u64 blkno = ip->i_no_addr;
2426
2427        rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
2428        if (!rgd)
2429                return;
2430        trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
2431        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2432        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2433        update_rgrp_lvb_unlinked(rgd, 1);
2434}
2435
2436static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
2437{
2438        struct gfs2_sbd *sdp = rgd->rd_sbd;
2439        struct gfs2_rgrpd *tmp_rgd;
2440
2441        tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
2442        if (!tmp_rgd)
2443                return;
2444        gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
2445
2446        if (!rgd->rd_dinodes)
2447                gfs2_consist_rgrpd(rgd);
2448        rgd->rd_dinodes--;
2449        rgd->rd_free++;
2450
2451        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2452        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2453        update_rgrp_lvb_unlinked(rgd, -1);
2454
2455        gfs2_statfs_change(sdp, 0, +1, -1);
2456}
2457
2458
2459void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
2460{
2461        gfs2_free_uninit_di(rgd, ip->i_no_addr);
2462        trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
2463        gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
2464        gfs2_meta_wipe(ip, ip->i_no_addr, 1);
2465}
2466
2467/**
2468 * gfs2_check_blk_type - Check the type of a block
2469 * @sdp: The superblock
2470 * @no_addr: The block number to check
2471 * @type: The block type we are looking for
2472 *
2473 * Returns: 0 if the block type matches the expected type
2474 *          -ESTALE if it doesn't match
2475 *          or -ve errno if something went wrong while checking
2476 */
2477
2478int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
2479{
2480        struct gfs2_rgrpd *rgd;
2481        struct gfs2_holder rgd_gh;
2482        int error = -EINVAL;
2483
2484        rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
2485        if (!rgd)
2486                goto fail;
2487
2488        error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
2489        if (error)
2490                goto fail;
2491
2492        if (gfs2_get_block_type(rgd, no_addr) != type)
2493                error = -ESTALE;
2494
2495        gfs2_glock_dq_uninit(&rgd_gh);
2496fail:
2497        return error;
2498}
2499
2500/**
2501 * gfs2_rlist_add - add a RG to a list of RGs
2502 * @ip: the inode
2503 * @rlist: the list of resource groups
2504 * @block: the block
2505 *
2506 * Figure out what RG a block belongs to and add that RG to the list
2507 *
2508 * FIXME: Don't use NOFAIL
2509 *
2510 */
2511
2512void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
2513                    u64 block)
2514{
2515        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2516        struct gfs2_rgrpd *rgd;
2517        struct gfs2_rgrpd **tmp;
2518        unsigned int new_space;
2519        unsigned int x;
2520
2521        if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
2522                return;
2523
2524        if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block))
2525                rgd = ip->i_rgd;
2526        else
2527                rgd = gfs2_blk2rgrpd(sdp, block, 1);
2528        if (!rgd) {
2529                fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block);
2530                return;
2531        }
2532        ip->i_rgd = rgd;
2533
2534        for (x = 0; x < rlist->rl_rgrps; x++)
2535                if (rlist->rl_rgd[x] == rgd)
2536                        return;
2537
2538        if (rlist->rl_rgrps == rlist->rl_space) {
2539                new_space = rlist->rl_space + 10;
2540
2541                tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
2542                              GFP_NOFS | __GFP_NOFAIL);
2543
2544                if (rlist->rl_rgd) {
2545                        memcpy(tmp, rlist->rl_rgd,
2546                               rlist->rl_space * sizeof(struct gfs2_rgrpd *));
2547                        kfree(rlist->rl_rgd);
2548                }
2549
2550                rlist->rl_space = new_space;
2551                rlist->rl_rgd = tmp;
2552        }
2553
2554        rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
2555}
2556
2557/**
2558 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
2559 *      and initialize an array of glock holders for them
2560 * @rlist: the list of resource groups
2561 * @state: the lock state to acquire the RG lock in
2562 *
2563 * FIXME: Don't use NOFAIL
2564 *
2565 */
2566
2567void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
2568{
2569        unsigned int x;
2570
2571        rlist->rl_ghs = kmalloc(rlist->rl_rgrps * sizeof(struct gfs2_holder),
2572                                GFP_NOFS | __GFP_NOFAIL);
2573        for (x = 0; x < rlist->rl_rgrps; x++)
2574                gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
2575                                state, 0,
2576                                &rlist->rl_ghs[x]);
2577}
2578
2579/**
2580 * gfs2_rlist_free - free a resource group list
2581 * @list: the list of resource groups
2582 *
2583 */
2584
2585void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
2586{
2587        unsigned int x;
2588
2589        kfree(rlist->rl_rgd);
2590
2591        if (rlist->rl_ghs) {
2592                for (x = 0; x < rlist->rl_rgrps; x++)
2593                        gfs2_holder_uninit(&rlist->rl_ghs[x]);
2594                kfree(rlist->rl_ghs);
2595                rlist->rl_ghs = NULL;
2596        }
2597}
2598
2599