linux/fs/gfs2/rgrp.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   3 * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
   4 *
   5 * This copyrighted material is made available to anyone wishing to use,
   6 * modify, copy, or redistribute it subject to the terms and conditions
   7 * of the GNU General Public License version 2.
   8 */
   9
  10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  11
  12#include <linux/slab.h>
  13#include <linux/spinlock.h>
  14#include <linux/completion.h>
  15#include <linux/buffer_head.h>
  16#include <linux/fs.h>
  17#include <linux/gfs2_ondisk.h>
  18#include <linux/prefetch.h>
  19#include <linux/blkdev.h>
  20#include <linux/rbtree.h>
  21#include <linux/random.h>
  22
  23#include "gfs2.h"
  24#include "incore.h"
  25#include "glock.h"
  26#include "glops.h"
  27#include "lops.h"
  28#include "meta_io.h"
  29#include "quota.h"
  30#include "rgrp.h"
  31#include "super.h"
  32#include "trans.h"
  33#include "util.h"
  34#include "log.h"
  35#include "inode.h"
  36#include "trace_gfs2.h"
  37
  38#define BFITNOENT ((u32)~0)
  39#define NO_BLOCK ((u64)~0)
  40
  41#if BITS_PER_LONG == 32
  42#define LBITMASK   (0x55555555UL)
  43#define LBITSKIP55 (0x55555555UL)
  44#define LBITSKIP00 (0x00000000UL)
  45#else
  46#define LBITMASK   (0x5555555555555555UL)
  47#define LBITSKIP55 (0x5555555555555555UL)
  48#define LBITSKIP00 (0x0000000000000000UL)
  49#endif
  50
  51/*
  52 * These routines are used by the resource group routines (rgrp.c)
  53 * to keep track of block allocation.  Each block is represented by two
  54 * bits.  So, each byte represents GFS2_NBBY (i.e. 4) blocks.
  55 *
  56 * 0 = Free
  57 * 1 = Used (not metadata)
  58 * 2 = Unlinked (still in use) inode
  59 * 3 = Used (metadata)
  60 */
  61
  62struct gfs2_extent {
  63        struct gfs2_rbm rbm;
  64        u32 len;
  65};
  66
  67static const char valid_change[16] = {
  68                /* current */
  69        /* n */ 0, 1, 1, 1,
  70        /* e */ 1, 0, 0, 0,
  71        /* w */ 0, 0, 0, 1,
  72                1, 0, 0, 0
  73};
  74
  75static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
  76                         const struct gfs2_inode *ip, bool nowrap,
  77                         const struct gfs2_alloc_parms *ap);
  78
  79
  80/**
  81 * gfs2_setbit - Set a bit in the bitmaps
  82 * @rbm: The position of the bit to set
  83 * @do_clone: Also set the clone bitmap, if it exists
  84 * @new_state: the new state of the block
  85 *
  86 */
  87
  88static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
  89                               unsigned char new_state)
  90{
  91        unsigned char *byte1, *byte2, *end, cur_state;
  92        struct gfs2_bitmap *bi = rbm_bi(rbm);
  93        unsigned int buflen = bi->bi_len;
  94        const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
  95
  96        byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY);
  97        end = bi->bi_bh->b_data + bi->bi_offset + buflen;
  98
  99        BUG_ON(byte1 >= end);
 100
 101        cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
 102
 103        if (unlikely(!valid_change[new_state * 4 + cur_state])) {
 104                pr_warn("buf_blk = 0x%x old_state=%d, new_state=%d\n",
 105                        rbm->offset, cur_state, new_state);
 106                pr_warn("rgrp=0x%llx bi_start=0x%x\n",
 107                        (unsigned long long)rbm->rgd->rd_addr, bi->bi_start);
 108                pr_warn("bi_offset=0x%x bi_len=0x%x\n",
 109                        bi->bi_offset, bi->bi_len);
 110                dump_stack();
 111                gfs2_consist_rgrpd(rbm->rgd);
 112                return;
 113        }
 114        *byte1 ^= (cur_state ^ new_state) << bit;
 115
 116        if (do_clone && bi->bi_clone) {
 117                byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY);
 118                cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
 119                *byte2 ^= (cur_state ^ new_state) << bit;
 120        }
 121}
 122
 123/**
 124 * gfs2_testbit - test a bit in the bitmaps
 125 * @rbm: The bit to test
 126 *
 127 * Returns: The two bit block state of the requested bit
 128 */
 129
 130static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)
 131{
 132        struct gfs2_bitmap *bi = rbm_bi(rbm);
 133        const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset;
 134        const u8 *byte;
 135        unsigned int bit;
 136
 137        byte = buffer + (rbm->offset / GFS2_NBBY);
 138        bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
 139
 140        return (*byte >> bit) & GFS2_BIT_MASK;
 141}
 142
 143/**
 144 * gfs2_bit_search
 145 * @ptr: Pointer to bitmap data
 146 * @mask: Mask to use (normally 0x55555.... but adjusted for search start)
 147 * @state: The state we are searching for
 148 *
 149 * We xor the bitmap data with a patter which is the bitwise opposite
 150 * of what we are looking for, this gives rise to a pattern of ones
 151 * wherever there is a match. Since we have two bits per entry, we
 152 * take this pattern, shift it down by one place and then and it with
 153 * the original. All the even bit positions (0,2,4, etc) then represent
 154 * successful matches, so we mask with 0x55555..... to remove the unwanted
 155 * odd bit positions.
 156 *
 157 * This allows searching of a whole u64 at once (32 blocks) with a
 158 * single test (on 64 bit arches).
 159 */
 160
 161static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
 162{
 163        u64 tmp;
 164        static const u64 search[] = {
 165                [0] = 0xffffffffffffffffULL,
 166                [1] = 0xaaaaaaaaaaaaaaaaULL,
 167                [2] = 0x5555555555555555ULL,
 168                [3] = 0x0000000000000000ULL,
 169        };
 170        tmp = le64_to_cpu(*ptr) ^ search[state];
 171        tmp &= (tmp >> 1);
 172        tmp &= mask;
 173        return tmp;
 174}
 175
 176/**
 177 * rs_cmp - multi-block reservation range compare
 178 * @blk: absolute file system block number of the new reservation
 179 * @len: number of blocks in the new reservation
 180 * @rs: existing reservation to compare against
 181 *
 182 * returns: 1 if the block range is beyond the reach of the reservation
 183 *         -1 if the block range is before the start of the reservation
 184 *          0 if the block range overlaps with the reservation
 185 */
 186static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
 187{
 188        u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
 189
 190        if (blk >= startblk + rs->rs_free)
 191                return 1;
 192        if (blk + len - 1 < startblk)
 193                return -1;
 194        return 0;
 195}
 196
 197/**
 198 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
 199 *       a block in a given allocation state.
 200 * @buf: the buffer that holds the bitmaps
 201 * @len: the length (in bytes) of the buffer
 202 * @goal: start search at this block's bit-pair (within @buffer)
 203 * @state: GFS2_BLKST_XXX the state of the block we're looking for.
 204 *
 205 * Scope of @goal and returned block number is only within this bitmap buffer,
 206 * not entire rgrp or filesystem.  @buffer will be offset from the actual
 207 * beginning of a bitmap block buffer, skipping any header structures, but
 208 * headers are always a multiple of 64 bits long so that the buffer is
 209 * always aligned to a 64 bit boundary.
 210 *
 211 * The size of the buffer is in bytes, but is it assumed that it is
 212 * always ok to read a complete multiple of 64 bits at the end
 213 * of the block in case the end is no aligned to a natural boundary.
 214 *
 215 * Return: the block number (bitmap buffer scope) that was found
 216 */
 217
 218static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
 219                       u32 goal, u8 state)
 220{
 221        u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
 222        const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
 223        const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
 224        u64 tmp;
 225        u64 mask = 0x5555555555555555ULL;
 226        u32 bit;
 227
 228        /* Mask off bits we don't care about at the start of the search */
 229        mask <<= spoint;
 230        tmp = gfs2_bit_search(ptr, mask, state);
 231        ptr++;
 232        while(tmp == 0 && ptr < end) {
 233                tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
 234                ptr++;
 235        }
 236        /* Mask off any bits which are more than len bytes from the start */
 237        if (ptr == end && (len & (sizeof(u64) - 1)))
 238                tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1))));
 239        /* Didn't find anything, so return */
 240        if (tmp == 0)
 241                return BFITNOENT;
 242        ptr--;
 243        bit = __ffs64(tmp);
 244        bit /= 2;       /* two bits per entry in the bitmap */
 245        return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
 246}
 247
 248/**
 249 * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
 250 * @rbm: The rbm with rgd already set correctly
 251 * @block: The block number (filesystem relative)
 252 *
 253 * This sets the bi and offset members of an rbm based on a
 254 * resource group and a filesystem relative block number. The
 255 * resource group must be set in the rbm on entry, the bi and
 256 * offset members will be set by this function.
 257 *
 258 * Returns: 0 on success, or an error code
 259 */
 260
 261static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
 262{
 263        u64 rblock = block - rbm->rgd->rd_data0;
 264
 265        if (WARN_ON_ONCE(rblock > UINT_MAX))
 266                return -EINVAL;
 267        if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
 268                return -E2BIG;
 269
 270        rbm->bii = 0;
 271        rbm->offset = (u32)(rblock);
 272        /* Check if the block is within the first block */
 273        if (rbm->offset < rbm_bi(rbm)->bi_blocks)
 274                return 0;
 275
 276        /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */
 277        rbm->offset += (sizeof(struct gfs2_rgrp) -
 278                        sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
 279        rbm->bii = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
 280        rbm->offset -= rbm->bii * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
 281        return 0;
 282}
 283
 284/**
 285 * gfs2_rbm_incr - increment an rbm structure
 286 * @rbm: The rbm with rgd already set correctly
 287 *
 288 * This function takes an existing rbm structure and increments it to the next
 289 * viable block offset.
 290 *
 291 * Returns: If incrementing the offset would cause the rbm to go past the
 292 *          end of the rgrp, true is returned, otherwise false.
 293 *
 294 */
 295
 296static bool gfs2_rbm_incr(struct gfs2_rbm *rbm)
 297{
 298        if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */
 299                rbm->offset++;
 300                return false;
 301        }
 302        if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */
 303                return true;
 304
 305        rbm->offset = 0;
 306        rbm->bii++;
 307        return false;
 308}
 309
 310/**
 311 * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
 312 * @rbm: Position to search (value/result)
 313 * @n_unaligned: Number of unaligned blocks to check
 314 * @len: Decremented for each block found (terminate on zero)
 315 *
 316 * Returns: true if a non-free block is encountered
 317 */
 318
 319static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
 320{
 321        u32 n;
 322        u8 res;
 323
 324        for (n = 0; n < n_unaligned; n++) {
 325                res = gfs2_testbit(rbm);
 326                if (res != GFS2_BLKST_FREE)
 327                        return true;
 328                (*len)--;
 329                if (*len == 0)
 330                        return true;
 331                if (gfs2_rbm_incr(rbm))
 332                        return true;
 333        }
 334
 335        return false;
 336}
 337
 338/**
 339 * gfs2_free_extlen - Return extent length of free blocks
 340 * @rrbm: Starting position
 341 * @len: Max length to check
 342 *
 343 * Starting at the block specified by the rbm, see how many free blocks
 344 * there are, not reading more than len blocks ahead. This can be done
 345 * using memchr_inv when the blocks are byte aligned, but has to be done
 346 * on a block by block basis in case of unaligned blocks. Also this
 347 * function can cope with bitmap boundaries (although it must stop on
 348 * a resource group boundary)
 349 *
 350 * Returns: Number of free blocks in the extent
 351 */
 352
 353static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
 354{
 355        struct gfs2_rbm rbm = *rrbm;
 356        u32 n_unaligned = rbm.offset & 3;
 357        u32 size = len;
 358        u32 bytes;
 359        u32 chunk_size;
 360        u8 *ptr, *start, *end;
 361        u64 block;
 362        struct gfs2_bitmap *bi;
 363
 364        if (n_unaligned &&
 365            gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
 366                goto out;
 367
 368        n_unaligned = len & 3;
 369        /* Start is now byte aligned */
 370        while (len > 3) {
 371                bi = rbm_bi(&rbm);
 372                start = bi->bi_bh->b_data;
 373                if (bi->bi_clone)
 374                        start = bi->bi_clone;
 375                end = start + bi->bi_bh->b_size;
 376                start += bi->bi_offset;
 377                BUG_ON(rbm.offset & 3);
 378                start += (rbm.offset / GFS2_NBBY);
 379                bytes = min_t(u32, len / GFS2_NBBY, (end - start));
 380                ptr = memchr_inv(start, 0, bytes);
 381                chunk_size = ((ptr == NULL) ? bytes : (ptr - start));
 382                chunk_size *= GFS2_NBBY;
 383                BUG_ON(len < chunk_size);
 384                len -= chunk_size;
 385                block = gfs2_rbm_to_block(&rbm);
 386                if (gfs2_rbm_from_block(&rbm, block + chunk_size)) {
 387                        n_unaligned = 0;
 388                        break;
 389                }
 390                if (ptr) {
 391                        n_unaligned = 3;
 392                        break;
 393                }
 394                n_unaligned = len & 3;
 395        }
 396
 397        /* Deal with any bits left over at the end */
 398        if (n_unaligned)
 399                gfs2_unaligned_extlen(&rbm, n_unaligned, &len);
 400out:
 401        return size - len;
 402}
 403
 404/**
 405 * gfs2_bitcount - count the number of bits in a certain state
 406 * @rgd: the resource group descriptor
 407 * @buffer: the buffer that holds the bitmaps
 408 * @buflen: the length (in bytes) of the buffer
 409 * @state: the state of the block we're looking for
 410 *
 411 * Returns: The number of bits
 412 */
 413
 414static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
 415                         unsigned int buflen, u8 state)
 416{
 417        const u8 *byte = buffer;
 418        const u8 *end = buffer + buflen;
 419        const u8 state1 = state << 2;
 420        const u8 state2 = state << 4;
 421        const u8 state3 = state << 6;
 422        u32 count = 0;
 423
 424        for (; byte < end; byte++) {
 425                if (((*byte) & 0x03) == state)
 426                        count++;
 427                if (((*byte) & 0x0C) == state1)
 428                        count++;
 429                if (((*byte) & 0x30) == state2)
 430                        count++;
 431                if (((*byte) & 0xC0) == state3)
 432                        count++;
 433        }
 434
 435        return count;
 436}
 437
 438/**
 439 * gfs2_rgrp_verify - Verify that a resource group is consistent
 440 * @rgd: the rgrp
 441 *
 442 */
 443
 444void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 445{
 446        struct gfs2_sbd *sdp = rgd->rd_sbd;
 447        struct gfs2_bitmap *bi = NULL;
 448        u32 length = rgd->rd_length;
 449        u32 count[4], tmp;
 450        int buf, x;
 451
 452        memset(count, 0, 4 * sizeof(u32));
 453
 454        /* Count # blocks in each of 4 possible allocation states */
 455        for (buf = 0; buf < length; buf++) {
 456                bi = rgd->rd_bits + buf;
 457                for (x = 0; x < 4; x++)
 458                        count[x] += gfs2_bitcount(rgd,
 459                                                  bi->bi_bh->b_data +
 460                                                  bi->bi_offset,
 461                                                  bi->bi_len, x);
 462        }
 463
 464        if (count[0] != rgd->rd_free) {
 465                if (gfs2_consist_rgrpd(rgd))
 466                        fs_err(sdp, "free data mismatch:  %u != %u\n",
 467                               count[0], rgd->rd_free);
 468                return;
 469        }
 470
 471        tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
 472        if (count[1] != tmp) {
 473                if (gfs2_consist_rgrpd(rgd))
 474                        fs_err(sdp, "used data mismatch:  %u != %u\n",
 475                               count[1], tmp);
 476                return;
 477        }
 478
 479        if (count[2] + count[3] != rgd->rd_dinodes) {
 480                if (gfs2_consist_rgrpd(rgd))
 481                        fs_err(sdp, "used metadata mismatch:  %u != %u\n",
 482                               count[2] + count[3], rgd->rd_dinodes);
 483                return;
 484        }
 485}
 486
 487static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
 488{
 489        u64 first = rgd->rd_data0;
 490        u64 last = first + rgd->rd_data;
 491        return first <= block && block < last;
 492}
 493
 494/**
 495 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
 496 * @sdp: The GFS2 superblock
 497 * @blk: The data block number
 498 * @exact: True if this needs to be an exact match
 499 *
 500 * Returns: The resource group, or NULL if not found
 501 */
 502
 503struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)
 504{
 505        struct rb_node *n, *next;
 506        struct gfs2_rgrpd *cur;
 507
 508        spin_lock(&sdp->sd_rindex_spin);
 509        n = sdp->sd_rindex_tree.rb_node;
 510        while (n) {
 511                cur = rb_entry(n, struct gfs2_rgrpd, rd_node);
 512                next = NULL;
 513                if (blk < cur->rd_addr)
 514                        next = n->rb_left;
 515                else if (blk >= cur->rd_data0 + cur->rd_data)
 516                        next = n->rb_right;
 517                if (next == NULL) {
 518                        spin_unlock(&sdp->sd_rindex_spin);
 519                        if (exact) {
 520                                if (blk < cur->rd_addr)
 521                                        return NULL;
 522                                if (blk >= cur->rd_data0 + cur->rd_data)
 523                                        return NULL;
 524                        }
 525                        return cur;
 526                }
 527                n = next;
 528        }
 529        spin_unlock(&sdp->sd_rindex_spin);
 530
 531        return NULL;
 532}
 533
 534/**
 535 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
 536 * @sdp: The GFS2 superblock
 537 *
 538 * Returns: The first rgrp in the filesystem
 539 */
 540
 541struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
 542{
 543        const struct rb_node *n;
 544        struct gfs2_rgrpd *rgd;
 545
 546        spin_lock(&sdp->sd_rindex_spin);
 547        n = rb_first(&sdp->sd_rindex_tree);
 548        rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 549        spin_unlock(&sdp->sd_rindex_spin);
 550
 551        return rgd;
 552}
 553
 554/**
 555 * gfs2_rgrpd_get_next - get the next RG
 556 * @rgd: the resource group descriptor
 557 *
 558 * Returns: The next rgrp
 559 */
 560
 561struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
 562{
 563        struct gfs2_sbd *sdp = rgd->rd_sbd;
 564        const struct rb_node *n;
 565
 566        spin_lock(&sdp->sd_rindex_spin);
 567        n = rb_next(&rgd->rd_node);
 568        if (n == NULL)
 569                n = rb_first(&sdp->sd_rindex_tree);
 570
 571        if (unlikely(&rgd->rd_node == n)) {
 572                spin_unlock(&sdp->sd_rindex_spin);
 573                return NULL;
 574        }
 575        rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 576        spin_unlock(&sdp->sd_rindex_spin);
 577        return rgd;
 578}
 579
 580void check_and_update_goal(struct gfs2_inode *ip)
 581{
 582        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 583        if (!ip->i_goal || gfs2_blk2rgrpd(sdp, ip->i_goal, 1) == NULL)
 584                ip->i_goal = ip->i_no_addr;
 585}
 586
 587void gfs2_free_clones(struct gfs2_rgrpd *rgd)
 588{
 589        int x;
 590
 591        for (x = 0; x < rgd->rd_length; x++) {
 592                struct gfs2_bitmap *bi = rgd->rd_bits + x;
 593                kfree(bi->bi_clone);
 594                bi->bi_clone = NULL;
 595        }
 596}
 597
 598/**
 599 * gfs2_rsqa_alloc - make sure we have a reservation assigned to the inode
 600 *                 plus a quota allocations data structure, if necessary
 601 * @ip: the inode for this reservation
 602 */
 603int gfs2_rsqa_alloc(struct gfs2_inode *ip)
 604{
 605        return gfs2_qa_alloc(ip);
 606}
 607
 608static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
 609{
 610        gfs2_print_dbg(seq, "  B: n:%llu s:%llu b:%u f:%u\n",
 611                       (unsigned long long)rs->rs_inum,
 612                       (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
 613                       rs->rs_rbm.offset, rs->rs_free);
 614}
 615
 616/**
 617 * __rs_deltree - remove a multi-block reservation from the rgd tree
 618 * @rs: The reservation to remove
 619 *
 620 */
 621static void __rs_deltree(struct gfs2_blkreserv *rs)
 622{
 623        struct gfs2_rgrpd *rgd;
 624
 625        if (!gfs2_rs_active(rs))
 626                return;
 627
 628        rgd = rs->rs_rbm.rgd;
 629        trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
 630        rb_erase(&rs->rs_node, &rgd->rd_rstree);
 631        RB_CLEAR_NODE(&rs->rs_node);
 632
 633        if (rs->rs_free) {
 634                struct gfs2_bitmap *bi = rbm_bi(&rs->rs_rbm);
 635
 636                /* return reserved blocks to the rgrp */
 637                BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
 638                rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
 639                /* The rgrp extent failure point is likely not to increase;
 640                   it will only do so if the freed blocks are somehow
 641                   contiguous with a span of free blocks that follows. Still,
 642                   it will force the number to be recalculated later. */
 643                rgd->rd_extfail_pt += rs->rs_free;
 644                rs->rs_free = 0;
 645                clear_bit(GBF_FULL, &bi->bi_flags);
 646        }
 647}
 648
 649/**
 650 * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
 651 * @rs: The reservation to remove
 652 *
 653 */
 654void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
 655{
 656        struct gfs2_rgrpd *rgd;
 657
 658        rgd = rs->rs_rbm.rgd;
 659        if (rgd) {
 660                spin_lock(&rgd->rd_rsspin);
 661                __rs_deltree(rs);
 662                spin_unlock(&rgd->rd_rsspin);
 663        }
 664}
 665
 666/**
 667 * gfs2_rsqa_delete - delete a multi-block reservation and quota allocation
 668 * @ip: The inode for this reservation
 669 * @wcount: The inode's write count, or NULL
 670 *
 671 */
 672void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount)
 673{
 674        down_write(&ip->i_rw_mutex);
 675        if ((wcount == NULL) || (atomic_read(wcount) <= 1)) {
 676                gfs2_rs_deltree(&ip->i_res);
 677                BUG_ON(ip->i_res.rs_free);
 678        }
 679        up_write(&ip->i_rw_mutex);
 680        gfs2_qa_delete(ip, wcount);
 681}
 682
 683/**
 684 * return_all_reservations - return all reserved blocks back to the rgrp.
 685 * @rgd: the rgrp that needs its space back
 686 *
 687 * We previously reserved a bunch of blocks for allocation. Now we need to
 688 * give them back. This leave the reservation structures in tact, but removes
 689 * all of their corresponding "no-fly zones".
 690 */
 691static void return_all_reservations(struct gfs2_rgrpd *rgd)
 692{
 693        struct rb_node *n;
 694        struct gfs2_blkreserv *rs;
 695
 696        spin_lock(&rgd->rd_rsspin);
 697        while ((n = rb_first(&rgd->rd_rstree))) {
 698                rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
 699                __rs_deltree(rs);
 700        }
 701        spin_unlock(&rgd->rd_rsspin);
 702}
 703
 704void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 705{
 706        struct rb_node *n;
 707        struct gfs2_rgrpd *rgd;
 708        struct gfs2_glock *gl;
 709
 710        while ((n = rb_first(&sdp->sd_rindex_tree))) {
 711                rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 712                gl = rgd->rd_gl;
 713
 714                rb_erase(n, &sdp->sd_rindex_tree);
 715
 716                if (gl) {
 717                        spin_lock(&gl->gl_lockref.lock);
 718                        gl->gl_object = NULL;
 719                        spin_unlock(&gl->gl_lockref.lock);
 720                        gfs2_glock_add_to_lru(gl);
 721                        gfs2_glock_put(gl);
 722                }
 723
 724                gfs2_free_clones(rgd);
 725                kfree(rgd->rd_bits);
 726                return_all_reservations(rgd);
 727                kmem_cache_free(gfs2_rgrpd_cachep, rgd);
 728        }
 729}
 730
 731static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
 732{
 733        pr_info("ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
 734        pr_info("ri_length = %u\n", rgd->rd_length);
 735        pr_info("ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
 736        pr_info("ri_data = %u\n", rgd->rd_data);
 737        pr_info("ri_bitbytes = %u\n", rgd->rd_bitbytes);
 738}
 739
 740/**
 741 * gfs2_compute_bitstructs - Compute the bitmap sizes
 742 * @rgd: The resource group descriptor
 743 *
 744 * Calculates bitmap descriptors, one for each block that contains bitmap data
 745 *
 746 * Returns: errno
 747 */
 748
 749static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 750{
 751        struct gfs2_sbd *sdp = rgd->rd_sbd;
 752        struct gfs2_bitmap *bi;
 753        u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
 754        u32 bytes_left, bytes;
 755        int x;
 756
 757        if (!length)
 758                return -EINVAL;
 759
 760        rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS);
 761        if (!rgd->rd_bits)
 762                return -ENOMEM;
 763
 764        bytes_left = rgd->rd_bitbytes;
 765
 766        for (x = 0; x < length; x++) {
 767                bi = rgd->rd_bits + x;
 768
 769                bi->bi_flags = 0;
 770                /* small rgrp; bitmap stored completely in header block */
 771                if (length == 1) {
 772                        bytes = bytes_left;
 773                        bi->bi_offset = sizeof(struct gfs2_rgrp);
 774                        bi->bi_start = 0;
 775                        bi->bi_len = bytes;
 776                        bi->bi_blocks = bytes * GFS2_NBBY;
 777                /* header block */
 778                } else if (x == 0) {
 779                        bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
 780                        bi->bi_offset = sizeof(struct gfs2_rgrp);
 781                        bi->bi_start = 0;
 782                        bi->bi_len = bytes;
 783                        bi->bi_blocks = bytes * GFS2_NBBY;
 784                /* last block */
 785                } else if (x + 1 == length) {
 786                        bytes = bytes_left;
 787                        bi->bi_offset = sizeof(struct gfs2_meta_header);
 788                        bi->bi_start = rgd->rd_bitbytes - bytes_left;
 789                        bi->bi_len = bytes;
 790                        bi->bi_blocks = bytes * GFS2_NBBY;
 791                /* other blocks */
 792                } else {
 793                        bytes = sdp->sd_sb.sb_bsize -
 794                                sizeof(struct gfs2_meta_header);
 795                        bi->bi_offset = sizeof(struct gfs2_meta_header);
 796                        bi->bi_start = rgd->rd_bitbytes - bytes_left;
 797                        bi->bi_len = bytes;
 798                        bi->bi_blocks = bytes * GFS2_NBBY;
 799                }
 800
 801                bytes_left -= bytes;
 802        }
 803
 804        if (bytes_left) {
 805                gfs2_consist_rgrpd(rgd);
 806                return -EIO;
 807        }
 808        bi = rgd->rd_bits + (length - 1);
 809        if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
 810                if (gfs2_consist_rgrpd(rgd)) {
 811                        gfs2_rindex_print(rgd);
 812                        fs_err(sdp, "start=%u len=%u offset=%u\n",
 813                               bi->bi_start, bi->bi_len, bi->bi_offset);
 814                }
 815                return -EIO;
 816        }
 817
 818        return 0;
 819}
 820
 821/**
 822 * gfs2_ri_total - Total up the file system space, according to the rindex.
 823 * @sdp: the filesystem
 824 *
 825 */
 826u64 gfs2_ri_total(struct gfs2_sbd *sdp)
 827{
 828        u64 total_data = 0;     
 829        struct inode *inode = sdp->sd_rindex;
 830        struct gfs2_inode *ip = GFS2_I(inode);
 831        char buf[sizeof(struct gfs2_rindex)];
 832        int error, rgrps;
 833
 834        for (rgrps = 0;; rgrps++) {
 835                loff_t pos = rgrps * sizeof(struct gfs2_rindex);
 836
 837                if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
 838                        break;
 839                error = gfs2_internal_read(ip, buf, &pos,
 840                                           sizeof(struct gfs2_rindex));
 841                if (error != sizeof(struct gfs2_rindex))
 842                        break;
 843                total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
 844        }
 845        return total_data;
 846}
 847
 848static int rgd_insert(struct gfs2_rgrpd *rgd)
 849{
 850        struct gfs2_sbd *sdp = rgd->rd_sbd;
 851        struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
 852
 853        /* Figure out where to put new node */
 854        while (*newn) {
 855                struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd,
 856                                                  rd_node);
 857
 858                parent = *newn;
 859                if (rgd->rd_addr < cur->rd_addr)
 860                        newn = &((*newn)->rb_left);
 861                else if (rgd->rd_addr > cur->rd_addr)
 862                        newn = &((*newn)->rb_right);
 863                else
 864                        return -EEXIST;
 865        }
 866
 867        rb_link_node(&rgd->rd_node, parent, newn);
 868        rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
 869        sdp->sd_rgrps++;
 870        return 0;
 871}
 872
 873/**
 874 * read_rindex_entry - Pull in a new resource index entry from the disk
 875 * @ip: Pointer to the rindex inode
 876 *
 877 * Returns: 0 on success, > 0 on EOF, error code otherwise
 878 */
 879
 880static int read_rindex_entry(struct gfs2_inode *ip)
 881{
 882        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 883        const unsigned bsize = sdp->sd_sb.sb_bsize;
 884        loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
 885        struct gfs2_rindex buf;
 886        int error;
 887        struct gfs2_rgrpd *rgd;
 888
 889        if (pos >= i_size_read(&ip->i_inode))
 890                return 1;
 891
 892        error = gfs2_internal_read(ip, (char *)&buf, &pos,
 893                                   sizeof(struct gfs2_rindex));
 894
 895        if (error != sizeof(struct gfs2_rindex))
 896                return (error == 0) ? 1 : error;
 897
 898        rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);
 899        error = -ENOMEM;
 900        if (!rgd)
 901                return error;
 902
 903        rgd->rd_sbd = sdp;
 904        rgd->rd_addr = be64_to_cpu(buf.ri_addr);
 905        rgd->rd_length = be32_to_cpu(buf.ri_length);
 906        rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
 907        rgd->rd_data = be32_to_cpu(buf.ri_data);
 908        rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
 909        spin_lock_init(&rgd->rd_rsspin);
 910
 911        error = compute_bitstructs(rgd);
 912        if (error)
 913                goto fail;
 914
 915        error = gfs2_glock_get(sdp, rgd->rd_addr,
 916                               &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
 917        if (error)
 918                goto fail;
 919
 920        rgd->rd_gl->gl_object = rgd;
 921        rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK;
 922        rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1;
 923        rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
 924        rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED);
 925        if (rgd->rd_data > sdp->sd_max_rg_data)
 926                sdp->sd_max_rg_data = rgd->rd_data;
 927        spin_lock(&sdp->sd_rindex_spin);
 928        error = rgd_insert(rgd);
 929        spin_unlock(&sdp->sd_rindex_spin);
 930        if (!error)
 931                return 0;
 932
 933        error = 0; /* someone else read in the rgrp; free it and ignore it */
 934        gfs2_glock_put(rgd->rd_gl);
 935
 936fail:
 937        kfree(rgd->rd_bits);
 938        kmem_cache_free(gfs2_rgrpd_cachep, rgd);
 939        return error;
 940}
 941
 942/**
 943 * set_rgrp_preferences - Run all the rgrps, selecting some we prefer to use
 944 * @sdp: the GFS2 superblock
 945 *
 946 * The purpose of this function is to select a subset of the resource groups
 947 * and mark them as PREFERRED. We do it in such a way that each node prefers
 948 * to use a unique set of rgrps to minimize glock contention.
 949 */
 950static void set_rgrp_preferences(struct gfs2_sbd *sdp)
 951{
 952        struct gfs2_rgrpd *rgd, *first;
 953        int i;
 954
 955        /* Skip an initial number of rgrps, based on this node's journal ID.
 956           That should start each node out on its own set. */
 957        rgd = gfs2_rgrpd_get_first(sdp);
 958        for (i = 0; i < sdp->sd_lockstruct.ls_jid; i++)
 959                rgd = gfs2_rgrpd_get_next(rgd);
 960        first = rgd;
 961
 962        do {
 963                rgd->rd_flags |= GFS2_RDF_PREFERRED;
 964                for (i = 0; i < sdp->sd_journals; i++) {
 965                        rgd = gfs2_rgrpd_get_next(rgd);
 966                        if (!rgd || rgd == first)
 967                                break;
 968                }
 969        } while (rgd && rgd != first);
 970}
 971
 972/**
 973 * gfs2_ri_update - Pull in a new resource index from the disk
 974 * @ip: pointer to the rindex inode
 975 *
 976 * Returns: 0 on successful update, error code otherwise
 977 */
 978
 979static int gfs2_ri_update(struct gfs2_inode *ip)
 980{
 981        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 982        int error;
 983
 984        do {
 985                error = read_rindex_entry(ip);
 986        } while (error == 0);
 987
 988        if (error < 0)
 989                return error;
 990
 991        set_rgrp_preferences(sdp);
 992
 993        sdp->sd_rindex_uptodate = 1;
 994        return 0;
 995}
 996
 997/**
 998 * gfs2_rindex_update - Update the rindex if required
 999 * @sdp: The GFS2 superblock
1000 *
1001 * We grab a lock on the rindex inode to make sure that it doesn't
1002 * change whilst we are performing an operation. We keep this lock
1003 * for quite long periods of time compared to other locks. This
1004 * doesn't matter, since it is shared and it is very, very rarely
1005 * accessed in the exclusive mode (i.e. only when expanding the filesystem).
1006 *
1007 * This makes sure that we're using the latest copy of the resource index
1008 * special file, which might have been updated if someone expanded the
1009 * filesystem (via gfs2_grow utility), which adds new resource groups.
1010 *
1011 * Returns: 0 on succeess, error code otherwise
1012 */
1013
1014int gfs2_rindex_update(struct gfs2_sbd *sdp)
1015{
1016        struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
1017        struct gfs2_glock *gl = ip->i_gl;
1018        struct gfs2_holder ri_gh;
1019        int error = 0;
1020        int unlock_required = 0;
1021
1022        /* Read new copy from disk if we don't have the latest */
1023        if (!sdp->sd_rindex_uptodate) {
1024                if (!gfs2_glock_is_locked_by_me(gl)) {
1025                        error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
1026                        if (error)
1027                                return error;
1028                        unlock_required = 1;
1029                }
1030                if (!sdp->sd_rindex_uptodate)
1031                        error = gfs2_ri_update(ip);
1032                if (unlock_required)
1033                        gfs2_glock_dq_uninit(&ri_gh);
1034        }
1035
1036        return error;
1037}
1038
1039static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
1040{
1041        const struct gfs2_rgrp *str = buf;
1042        u32 rg_flags;
1043
1044        rg_flags = be32_to_cpu(str->rg_flags);
1045        rg_flags &= ~GFS2_RDF_MASK;
1046        rgd->rd_flags &= GFS2_RDF_MASK;
1047        rgd->rd_flags |= rg_flags;
1048        rgd->rd_free = be32_to_cpu(str->rg_free);
1049        rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
1050        rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
1051}
1052
1053static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
1054{
1055        struct gfs2_rgrp *str = buf;
1056
1057        str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
1058        str->rg_free = cpu_to_be32(rgd->rd_free);
1059        str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
1060        str->__pad = cpu_to_be32(0);
1061        str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
1062        memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
1063}
1064
1065static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
1066{
1067        struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1068        struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
1069
1070        if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free ||
1071            rgl->rl_dinodes != str->rg_dinodes ||
1072            rgl->rl_igeneration != str->rg_igeneration)
1073                return 0;
1074        return 1;
1075}
1076
1077static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
1078{
1079        const struct gfs2_rgrp *str = buf;
1080
1081        rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
1082        rgl->rl_flags = str->rg_flags;
1083        rgl->rl_free = str->rg_free;
1084        rgl->rl_dinodes = str->rg_dinodes;
1085        rgl->rl_igeneration = str->rg_igeneration;
1086        rgl->__pad = 0UL;
1087}
1088
1089static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change)
1090{
1091        struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1092        u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change;
1093        rgl->rl_unlinked = cpu_to_be32(unlinked);
1094}
1095
1096static u32 count_unlinked(struct gfs2_rgrpd *rgd)
1097{
1098        struct gfs2_bitmap *bi;
1099        const u32 length = rgd->rd_length;
1100        const u8 *buffer = NULL;
1101        u32 i, goal, count = 0;
1102
1103        for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) {
1104                goal = 0;
1105                buffer = bi->bi_bh->b_data + bi->bi_offset;
1106                WARN_ON(!buffer_uptodate(bi->bi_bh));
1107                while (goal < bi->bi_len * GFS2_NBBY) {
1108                        goal = gfs2_bitfit(buffer, bi->bi_len, goal,
1109                                           GFS2_BLKST_UNLINKED);
1110                        if (goal == BFITNOENT)
1111                                break;
1112                        count++;
1113                        goal++;
1114                }
1115        }
1116
1117        return count;
1118}
1119
1120
1121/**
1122 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
1123 * @rgd: the struct gfs2_rgrpd describing the RG to read in
1124 *
1125 * Read in all of a Resource Group's header and bitmap blocks.
1126 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
1127 *
1128 * Returns: errno
1129 */
1130
1131static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
1132{
1133        struct gfs2_sbd *sdp = rgd->rd_sbd;
1134        struct gfs2_glock *gl = rgd->rd_gl;
1135        unsigned int length = rgd->rd_length;
1136        struct gfs2_bitmap *bi;
1137        unsigned int x, y;
1138        int error;
1139
1140        if (rgd->rd_bits[0].bi_bh != NULL)
1141                return 0;
1142
1143        for (x = 0; x < length; x++) {
1144                bi = rgd->rd_bits + x;
1145                error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, 0, &bi->bi_bh);
1146                if (error)
1147                        goto fail;
1148        }
1149
1150        for (y = length; y--;) {
1151                bi = rgd->rd_bits + y;
1152                error = gfs2_meta_wait(sdp, bi->bi_bh);
1153                if (error)
1154                        goto fail;
1155                if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB :
1156                                              GFS2_METATYPE_RG)) {
1157                        error = -EIO;
1158                        goto fail;
1159                }
1160        }
1161
1162        if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
1163                for (x = 0; x < length; x++)
1164                        clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
1165                gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
1166                rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1167                rgd->rd_free_clone = rgd->rd_free;
1168                /* max out the rgrp allocation failure point */
1169                rgd->rd_extfail_pt = rgd->rd_free;
1170        }
1171        if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
1172                rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
1173                gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
1174                                     rgd->rd_bits[0].bi_bh->b_data);
1175        }
1176        else if (sdp->sd_args.ar_rgrplvb) {
1177                if (!gfs2_rgrp_lvb_valid(rgd)){
1178                        gfs2_consist_rgrpd(rgd);
1179                        error = -EIO;
1180                        goto fail;
1181                }
1182                if (rgd->rd_rgl->rl_unlinked == 0)
1183                        rgd->rd_flags &= ~GFS2_RDF_CHECK;
1184        }
1185        return 0;
1186
1187fail:
1188        while (x--) {
1189                bi = rgd->rd_bits + x;
1190                brelse(bi->bi_bh);
1191                bi->bi_bh = NULL;
1192                gfs2_assert_warn(sdp, !bi->bi_clone);
1193        }
1194
1195        return error;
1196}
1197
1198static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
1199{
1200        u32 rl_flags;
1201
1202        if (rgd->rd_flags & GFS2_RDF_UPTODATE)
1203                return 0;
1204
1205        if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
1206                return gfs2_rgrp_bh_get(rgd);
1207
1208        rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
1209        rl_flags &= ~GFS2_RDF_MASK;
1210        rgd->rd_flags &= GFS2_RDF_MASK;
1211        rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1212        if (rgd->rd_rgl->rl_unlinked == 0)
1213                rgd->rd_flags &= ~GFS2_RDF_CHECK;
1214        rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
1215        rgd->rd_free_clone = rgd->rd_free;
1216        rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
1217        rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
1218        return 0;
1219}
1220
1221int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
1222{
1223        struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1224        struct gfs2_sbd *sdp = rgd->rd_sbd;
1225
1226        if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb)
1227                return 0;
1228        return gfs2_rgrp_bh_get(rgd);
1229}
1230
1231/**
1232 * gfs2_rgrp_brelse - Release RG bitmaps read in with gfs2_rgrp_bh_get()
1233 * @rgd: The resource group
1234 *
1235 */
1236
1237void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd)
1238{
1239        int x, length = rgd->rd_length;
1240
1241        for (x = 0; x < length; x++) {
1242                struct gfs2_bitmap *bi = rgd->rd_bits + x;
1243                if (bi->bi_bh) {
1244                        brelse(bi->bi_bh);
1245                        bi->bi_bh = NULL;
1246                }
1247        }
1248
1249}
1250
1251/**
1252 * gfs2_rgrp_go_unlock - Unlock a rgrp glock
1253 * @gh: The glock holder for the resource group
1254 *
1255 */
1256
1257void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
1258{
1259        struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1260        int demote_requested = test_bit(GLF_DEMOTE, &gh->gh_gl->gl_flags) |
1261                test_bit(GLF_PENDING_DEMOTE, &gh->gh_gl->gl_flags);
1262
1263        if (rgd && demote_requested)
1264                gfs2_rgrp_brelse(rgd);
1265}
1266
1267int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
1268                             struct buffer_head *bh,
1269                             const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
1270{
1271        struct super_block *sb = sdp->sd_vfs;
1272        u64 blk;
1273        sector_t start = 0;
1274        sector_t nr_blks = 0;
1275        int rv;
1276        unsigned int x;
1277        u32 trimmed = 0;
1278        u8 diff;
1279
1280        for (x = 0; x < bi->bi_len; x++) {
1281                const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
1282                clone += bi->bi_offset;
1283                clone += x;
1284                if (bh) {
1285                        const u8 *orig = bh->b_data + bi->bi_offset + x;
1286                        diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
1287                } else {
1288                        diff = ~(*clone | (*clone >> 1));
1289                }
1290                diff &= 0x55;
1291                if (diff == 0)
1292                        continue;
1293                blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
1294                while(diff) {
1295                        if (diff & 1) {
1296                                if (nr_blks == 0)
1297                                        goto start_new_extent;
1298                                if ((start + nr_blks) != blk) {
1299                                        if (nr_blks >= minlen) {
1300                                                rv = sb_issue_discard(sb,
1301                                                        start, nr_blks,
1302                                                        GFP_NOFS, 0);
1303                                                if (rv)
1304                                                        goto fail;
1305                                                trimmed += nr_blks;
1306                                        }
1307                                        nr_blks = 0;
1308start_new_extent:
1309                                        start = blk;
1310                                }
1311                                nr_blks++;
1312                        }
1313                        diff >>= 2;
1314                        blk++;
1315                }
1316        }
1317        if (nr_blks >= minlen) {
1318                rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0);
1319                if (rv)
1320                        goto fail;
1321                trimmed += nr_blks;
1322        }
1323        if (ptrimmed)
1324                *ptrimmed = trimmed;
1325        return 0;
1326
1327fail:
1328        if (sdp->sd_args.ar_discard)
1329                fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
1330        sdp->sd_args.ar_discard = 0;
1331        return -EIO;
1332}
1333
1334/**
1335 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem
1336 * @filp: Any file on the filesystem
1337 * @argp: Pointer to the arguments (also used to pass result)
1338 *
1339 * Returns: 0 on success, otherwise error code
1340 */
1341
1342int gfs2_fitrim(struct file *filp, void __user *argp)
1343{
1344        struct inode *inode = file_inode(filp);
1345        struct gfs2_sbd *sdp = GFS2_SB(inode);
1346        struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
1347        struct buffer_head *bh;
1348        struct gfs2_rgrpd *rgd;
1349        struct gfs2_rgrpd *rgd_end;
1350        struct gfs2_holder gh;
1351        struct fstrim_range r;
1352        int ret = 0;
1353        u64 amt;
1354        u64 trimmed = 0;
1355        u64 start, end, minlen;
1356        unsigned int x;
1357        unsigned bs_shift = sdp->sd_sb.sb_bsize_shift;
1358
1359        if (!capable(CAP_SYS_ADMIN))
1360                return -EPERM;
1361
1362        if (!blk_queue_discard(q))
1363                return -EOPNOTSUPP;
1364
1365        if (copy_from_user(&r, argp, sizeof(r)))
1366                return -EFAULT;
1367
1368        ret = gfs2_rindex_update(sdp);
1369        if (ret)
1370                return ret;
1371
1372        start = r.start >> bs_shift;
1373        end = start + (r.len >> bs_shift);
1374        minlen = max_t(u64, r.minlen,
1375                       q->limits.discard_granularity) >> bs_shift;
1376
1377        if (end <= start || minlen > sdp->sd_max_rg_data)
1378                return -EINVAL;
1379
1380        rgd = gfs2_blk2rgrpd(sdp, start, 0);
1381        rgd_end = gfs2_blk2rgrpd(sdp, end, 0);
1382
1383        if ((gfs2_rgrpd_get_first(sdp) == gfs2_rgrpd_get_next(rgd_end))
1384            && (start > rgd_end->rd_data0 + rgd_end->rd_data))
1385                return -EINVAL; /* start is beyond the end of the fs */
1386
1387        while (1) {
1388
1389                ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
1390                if (ret)
1391                        goto out;
1392
1393                if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) {
1394                        /* Trim each bitmap in the rgrp */
1395                        for (x = 0; x < rgd->rd_length; x++) {
1396                                struct gfs2_bitmap *bi = rgd->rd_bits + x;
1397                                ret = gfs2_rgrp_send_discards(sdp,
1398                                                rgd->rd_data0, NULL, bi, minlen,
1399                                                &amt);
1400                                if (ret) {
1401                                        gfs2_glock_dq_uninit(&gh);
1402                                        goto out;
1403                                }
1404                                trimmed += amt;
1405                        }
1406
1407                        /* Mark rgrp as having been trimmed */
1408                        ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
1409                        if (ret == 0) {
1410                                bh = rgd->rd_bits[0].bi_bh;
1411                                rgd->rd_flags |= GFS2_RGF_TRIMMED;
1412                                gfs2_trans_add_meta(rgd->rd_gl, bh);
1413                                gfs2_rgrp_out(rgd, bh->b_data);
1414                                gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
1415                                gfs2_trans_end(sdp);
1416                        }
1417                }
1418                gfs2_glock_dq_uninit(&gh);
1419
1420                if (rgd == rgd_end)
1421                        break;
1422
1423                rgd = gfs2_rgrpd_get_next(rgd);
1424        }
1425
1426out:
1427        r.len = trimmed << bs_shift;
1428        if (copy_to_user(argp, &r, sizeof(r)))
1429                return -EFAULT;
1430
1431        return ret;
1432}
1433
1434/**
1435 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
1436 * @ip: the inode structure
1437 *
1438 */
1439static void rs_insert(struct gfs2_inode *ip)
1440{
1441        struct rb_node **newn, *parent = NULL;
1442        int rc;
1443        struct gfs2_blkreserv *rs = &ip->i_res;
1444        struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
1445        u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
1446
1447        BUG_ON(gfs2_rs_active(rs));
1448
1449        spin_lock(&rgd->rd_rsspin);
1450        newn = &rgd->rd_rstree.rb_node;
1451        while (*newn) {
1452                struct gfs2_blkreserv *cur =
1453                        rb_entry(*newn, struct gfs2_blkreserv, rs_node);
1454
1455                parent = *newn;
1456                rc = rs_cmp(fsblock, rs->rs_free, cur);
1457                if (rc > 0)
1458                        newn = &((*newn)->rb_right);
1459                else if (rc < 0)
1460                        newn = &((*newn)->rb_left);
1461                else {
1462                        spin_unlock(&rgd->rd_rsspin);
1463                        WARN_ON(1);
1464                        return;
1465                }
1466        }
1467
1468        rb_link_node(&rs->rs_node, parent, newn);
1469        rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
1470
1471        /* Do our rgrp accounting for the reservation */
1472        rgd->rd_reserved += rs->rs_free; /* blocks reserved */
1473        spin_unlock(&rgd->rd_rsspin);
1474        trace_gfs2_rs(rs, TRACE_RS_INSERT);
1475}
1476
1477/**
1478 * rg_mblk_search - find a group of multiple free blocks to form a reservation
1479 * @rgd: the resource group descriptor
1480 * @ip: pointer to the inode for which we're reserving blocks
1481 * @ap: the allocation parameters
1482 *
1483 */
1484
1485static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
1486                           const struct gfs2_alloc_parms *ap)
1487{
1488        struct gfs2_rbm rbm = { .rgd = rgd, };
1489        u64 goal;
1490        struct gfs2_blkreserv *rs = &ip->i_res;
1491        u32 extlen;
1492        u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;
1493        int ret;
1494        struct inode *inode = &ip->i_inode;
1495
1496        if (S_ISDIR(inode->i_mode))
1497                extlen = 1;
1498        else {
1499                extlen = max_t(u32, atomic_read(&rs->rs_sizehint), ap->target);
1500                extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
1501        }
1502        if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
1503                return;
1504
1505        /* Find bitmap block that contains bits for goal block */
1506        if (rgrp_contains_block(rgd, ip->i_goal))
1507                goal = ip->i_goal;
1508        else
1509                goal = rgd->rd_last_alloc + rgd->rd_data0;
1510
1511        if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
1512                return;
1513
1514        ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true, ap);
1515        if (ret == 0) {
1516                rs->rs_rbm = rbm;
1517                rs->rs_free = extlen;
1518                rs->rs_inum = ip->i_no_addr;
1519                rs_insert(ip);
1520        } else {
1521                if (goal == rgd->rd_last_alloc + rgd->rd_data0)
1522                        rgd->rd_last_alloc = 0;
1523        }
1524}
1525
1526/**
1527 * gfs2_next_unreserved_block - Return next block that is not reserved
1528 * @rgd: The resource group
1529 * @block: The starting block
1530 * @length: The required length
1531 * @ip: Ignore any reservations for this inode
1532 *
1533 * If the block does not appear in any reservation, then return the
1534 * block number unchanged. If it does appear in the reservation, then
1535 * keep looking through the tree of reservations in order to find the
1536 * first block number which is not reserved.
1537 */
1538
1539static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
1540                                      u32 length,
1541                                      const struct gfs2_inode *ip)
1542{
1543        struct gfs2_blkreserv *rs;
1544        struct rb_node *n;
1545        int rc;
1546
1547        spin_lock(&rgd->rd_rsspin);
1548        n = rgd->rd_rstree.rb_node;
1549        while (n) {
1550                rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1551                rc = rs_cmp(block, length, rs);
1552                if (rc < 0)
1553                        n = n->rb_left;
1554                else if (rc > 0)
1555                        n = n->rb_right;
1556                else
1557                        break;
1558        }
1559
1560        if (n) {
1561                while ((rs_cmp(block, length, rs) == 0) && (&ip->i_res != rs)) {
1562                        block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
1563                        n = n->rb_right;
1564                        if (n == NULL)
1565                                break;
1566                        rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1567                }
1568        }
1569
1570        spin_unlock(&rgd->rd_rsspin);
1571        return block;
1572}
1573
1574/**
1575 * gfs2_reservation_check_and_update - Check for reservations during block alloc
1576 * @rbm: The current position in the resource group
1577 * @ip: The inode for which we are searching for blocks
1578 * @minext: The minimum extent length
1579 * @maxext: A pointer to the maximum extent structure
1580 *
1581 * This checks the current position in the rgrp to see whether there is
1582 * a reservation covering this block. If not then this function is a
1583 * no-op. If there is, then the position is moved to the end of the
1584 * contiguous reservation(s) so that we are pointing at the first
1585 * non-reserved block.
1586 *
1587 * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error
1588 */
1589
1590static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
1591                                             const struct gfs2_inode *ip,
1592                                             u32 minext,
1593                                             struct gfs2_extent *maxext)
1594{
1595        u64 block = gfs2_rbm_to_block(rbm);
1596        u32 extlen = 1;
1597        u64 nblock;
1598        int ret;
1599
1600        /*
1601         * If we have a minimum extent length, then skip over any extent
1602         * which is less than the min extent length in size.
1603         */
1604        if (minext) {
1605                extlen = gfs2_free_extlen(rbm, minext);
1606                if (extlen <= maxext->len)
1607                        goto fail;
1608        }
1609
1610        /*
1611         * Check the extent which has been found against the reservations
1612         * and skip if parts of it are already reserved
1613         */
1614        nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
1615        if (nblock == block) {
1616                if (!minext || extlen >= minext)
1617                        return 0;
1618
1619                if (extlen > maxext->len) {
1620                        maxext->len = extlen;
1621                        maxext->rbm = *rbm;
1622                }
1623fail:
1624                nblock = block + extlen;
1625        }
1626        ret = gfs2_rbm_from_block(rbm, nblock);
1627        if (ret < 0)
1628                return ret;
1629        return 1;
1630}
1631
1632/**
1633 * gfs2_rbm_find - Look for blocks of a particular state
1634 * @rbm: Value/result starting position and final position
1635 * @state: The state which we want to find
1636 * @minext: Pointer to the requested extent length (NULL for a single block)
1637 *          This is updated to be the actual reservation size.
1638 * @ip: If set, check for reservations
1639 * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
1640 *          around until we've reached the starting point.
1641 * @ap: the allocation parameters
1642 *
1643 * Side effects:
1644 * - If looking for free blocks, we set GBF_FULL on each bitmap which
1645 *   has no free blocks in it.
1646 * - If looking for free blocks, we set rd_extfail_pt on each rgrp which
1647 *   has come up short on a free block search.
1648 *
1649 * Returns: 0 on success, -ENOSPC if there is no block of the requested state
1650 */
1651
1652static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
1653                         const struct gfs2_inode *ip, bool nowrap,
1654                         const struct gfs2_alloc_parms *ap)
1655{
1656        struct buffer_head *bh;
1657        int initial_bii;
1658        u32 initial_offset;
1659        int first_bii = rbm->bii;
1660        u32 first_offset = rbm->offset;
1661        u32 offset;
1662        u8 *buffer;
1663        int n = 0;
1664        int iters = rbm->rgd->rd_length;
1665        int ret;
1666        struct gfs2_bitmap *bi;
1667        struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, };
1668
1669        /* If we are not starting at the beginning of a bitmap, then we
1670         * need to add one to the bitmap count to ensure that we search
1671         * the starting bitmap twice.
1672         */
1673        if (rbm->offset != 0)
1674                iters++;
1675
1676        while(1) {
1677                bi = rbm_bi(rbm);
1678                if (test_bit(GBF_FULL, &bi->bi_flags) &&
1679                    (state == GFS2_BLKST_FREE))
1680                        goto next_bitmap;
1681
1682                bh = bi->bi_bh;
1683                buffer = bh->b_data + bi->bi_offset;
1684                WARN_ON(!buffer_uptodate(bh));
1685                if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1686                        buffer = bi->bi_clone + bi->bi_offset;
1687                initial_offset = rbm->offset;
1688                offset = gfs2_bitfit(buffer, bi->bi_len, rbm->offset, state);
1689                if (offset == BFITNOENT)
1690                        goto bitmap_full;
1691                rbm->offset = offset;
1692                if (ip == NULL)
1693                        return 0;
1694
1695                initial_bii = rbm->bii;
1696                ret = gfs2_reservation_check_and_update(rbm, ip,
1697                                                        minext ? *minext : 0,
1698                                                        &maxext);
1699                if (ret == 0)
1700                        return 0;
1701                if (ret > 0) {
1702                        n += (rbm->bii - initial_bii);
1703                        goto next_iter;
1704                }
1705                if (ret == -E2BIG) {
1706                        rbm->bii = 0;
1707                        rbm->offset = 0;
1708                        n += (rbm->bii - initial_bii);
1709                        goto res_covered_end_of_rgrp;
1710                }
1711                return ret;
1712
1713bitmap_full:    /* Mark bitmap as full and fall through */
1714                if ((state == GFS2_BLKST_FREE) && initial_offset == 0)
1715                        set_bit(GBF_FULL, &bi->bi_flags);
1716
1717next_bitmap:    /* Find next bitmap in the rgrp */
1718                rbm->offset = 0;
1719                rbm->bii++;
1720                if (rbm->bii == rbm->rgd->rd_length)
1721                        rbm->bii = 0;
1722res_covered_end_of_rgrp:
1723                if ((rbm->bii == 0) && nowrap)
1724                        break;
1725                n++;
1726next_iter:
1727                if (n >= iters)
1728                        break;
1729        }
1730
1731        if (minext == NULL || state != GFS2_BLKST_FREE)
1732                return -ENOSPC;
1733
1734        /* If the extent was too small, and it's smaller than the smallest
1735           to have failed before, remember for future reference that it's
1736           useless to search this rgrp again for this amount or more. */
1737        if ((first_offset == 0) && (first_bii == 0) &&
1738            (*minext < rbm->rgd->rd_extfail_pt))
1739                rbm->rgd->rd_extfail_pt = *minext;
1740
1741        /* If the maximum extent we found is big enough to fulfill the
1742           minimum requirements, use it anyway. */
1743        if (maxext.len) {
1744                *rbm = maxext.rbm;
1745                *minext = maxext.len;
1746                return 0;
1747        }
1748
1749        return -ENOSPC;
1750}
1751
1752/**
1753 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
1754 * @rgd: The rgrp
1755 * @last_unlinked: block address of the last dinode we unlinked
1756 * @skip: block address we should explicitly not unlink
1757 *
1758 * Returns: 0 if no error
1759 *          The inode, if one has been found, in inode.
1760 */
1761
1762static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
1763{
1764        u64 block;
1765        struct gfs2_sbd *sdp = rgd->rd_sbd;
1766        struct gfs2_glock *gl;
1767        struct gfs2_inode *ip;
1768        int error;
1769        int found = 0;
1770        struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 };
1771
1772        while (1) {
1773                down_write(&sdp->sd_log_flush_lock);
1774                error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
1775                                      true, NULL);
1776                up_write(&sdp->sd_log_flush_lock);
1777                if (error == -ENOSPC)
1778                        break;
1779                if (WARN_ON_ONCE(error))
1780                        break;
1781
1782                block = gfs2_rbm_to_block(&rbm);
1783                if (gfs2_rbm_from_block(&rbm, block + 1))
1784                        break;
1785                if (*last_unlinked != NO_BLOCK && block <= *last_unlinked)
1786                        continue;
1787                if (block == skip)
1788                        continue;
1789                *last_unlinked = block;
1790
1791                error = gfs2_glock_get(sdp, block, &gfs2_iopen_glops, CREATE, &gl);
1792                if (error)
1793                        continue;
1794
1795                /* If the inode is already in cache, we can ignore it here
1796                 * because the existing inode disposal code will deal with
1797                 * it when all refs have gone away. Accessing gl_object like
1798                 * this is not safe in general. Here it is ok because we do
1799                 * not dereference the pointer, and we only need an approx
1800                 * answer to whether it is NULL or not.
1801                 */
1802                ip = gl->gl_object;
1803
1804                if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
1805                        gfs2_glock_put(gl);
1806                else
1807                        found++;
1808
1809                /* Limit reclaim to sensible number of tasks */
1810                if (found > NR_CPUS)
1811                        return;
1812        }
1813
1814        rgd->rd_flags &= ~GFS2_RDF_CHECK;
1815        return;
1816}
1817
1818/**
1819 * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
1820 * @rgd: The rgrp in question
1821 * @loops: An indication of how picky we can be (0=very, 1=less so)
1822 *
1823 * This function uses the recently added glock statistics in order to
1824 * figure out whether a parciular resource group is suffering from
1825 * contention from multiple nodes. This is done purely on the basis
1826 * of timings, since this is the only data we have to work with and
1827 * our aim here is to reject a resource group which is highly contended
1828 * but (very important) not to do this too often in order to ensure that
1829 * we do not land up introducing fragmentation by changing resource
1830 * groups when not actually required.
1831 *
1832 * The calculation is fairly simple, we want to know whether the SRTTB
1833 * (i.e. smoothed round trip time for blocking operations) to acquire
1834 * the lock for this rgrp's glock is significantly greater than the
1835 * time taken for resource groups on average. We introduce a margin in
1836 * the form of the variable @var which is computed as the sum of the two
1837 * respective variences, and multiplied by a factor depending on @loops
1838 * and whether we have a lot of data to base the decision on. This is
1839 * then tested against the square difference of the means in order to
1840 * decide whether the result is statistically significant or not.
1841 *
1842 * Returns: A boolean verdict on the congestion status
1843 */
1844
1845static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
1846{
1847        const struct gfs2_glock *gl = rgd->rd_gl;
1848        const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
1849        struct gfs2_lkstats *st;
1850        u64 r_dcount, l_dcount;
1851        u64 l_srttb, a_srttb = 0;
1852        s64 srttb_diff;
1853        u64 sqr_diff;
1854        u64 var;
1855        int cpu, nonzero = 0;
1856
1857        preempt_disable();
1858        for_each_present_cpu(cpu) {
1859                st = &per_cpu_ptr(sdp->sd_lkstats, cpu)->lkstats[LM_TYPE_RGRP];
1860                if (st->stats[GFS2_LKS_SRTTB]) {
1861                        a_srttb += st->stats[GFS2_LKS_SRTTB];
1862                        nonzero++;
1863                }
1864        }
1865        st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
1866        if (nonzero)
1867                do_div(a_srttb, nonzero);
1868        r_dcount = st->stats[GFS2_LKS_DCOUNT];
1869        var = st->stats[GFS2_LKS_SRTTVARB] +
1870              gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
1871        preempt_enable();
1872
1873        l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
1874        l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
1875
1876        if ((l_dcount < 1) || (r_dcount < 1) || (a_srttb == 0))
1877                return false;
1878
1879        srttb_diff = a_srttb - l_srttb;
1880        sqr_diff = srttb_diff * srttb_diff;
1881
1882        var *= 2;
1883        if (l_dcount < 8 || r_dcount < 8)
1884                var *= 2;
1885        if (loops == 1)
1886                var *= 2;
1887
1888        return ((srttb_diff < 0) && (sqr_diff > var));
1889}
1890
1891/**
1892 * gfs2_rgrp_used_recently
1893 * @rs: The block reservation with the rgrp to test
1894 * @msecs: The time limit in milliseconds
1895 *
1896 * Returns: True if the rgrp glock has been used within the time limit
1897 */
1898static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
1899                                    u64 msecs)
1900{
1901        u64 tdiff;
1902
1903        tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
1904                            rs->rs_rbm.rgd->rd_gl->gl_dstamp));
1905
1906        return tdiff > (msecs * 1000 * 1000);
1907}
1908
1909static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
1910{
1911        const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1912        u32 skip;
1913
1914        get_random_bytes(&skip, sizeof(skip));
1915        return skip % sdp->sd_rgrps;
1916}
1917
1918static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
1919{
1920        struct gfs2_rgrpd *rgd = *pos;
1921        struct gfs2_sbd *sdp = rgd->rd_sbd;
1922
1923        rgd = gfs2_rgrpd_get_next(rgd);
1924        if (rgd == NULL)
1925                rgd = gfs2_rgrpd_get_first(sdp);
1926        *pos = rgd;
1927        if (rgd != begin) /* If we didn't wrap */
1928                return true;
1929        return false;
1930}
1931
1932/**
1933 * fast_to_acquire - determine if a resource group will be fast to acquire
1934 *
1935 * If this is one of our preferred rgrps, it should be quicker to acquire,
1936 * because we tried to set ourselves up as dlm lock master.
1937 */
1938static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
1939{
1940        struct gfs2_glock *gl = rgd->rd_gl;
1941
1942        if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) &&
1943            !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
1944            !test_bit(GLF_DEMOTE, &gl->gl_flags))
1945                return 1;
1946        if (rgd->rd_flags & GFS2_RDF_PREFERRED)
1947                return 1;
1948        return 0;
1949}
1950
1951/**
1952 * gfs2_inplace_reserve - Reserve space in the filesystem
1953 * @ip: the inode to reserve space for
1954 * @ap: the allocation parameters
1955 *
1956 * We try our best to find an rgrp that has at least ap->target blocks
1957 * available. After a couple of passes (loops == 2), the prospects of finding
1958 * such an rgrp diminish. At this stage, we return the first rgrp that has
1959 * atleast ap->min_target blocks available. Either way, we set ap->allowed to
1960 * the number of blocks available in the chosen rgrp.
1961 *
1962 * Returns: 0 on success,
1963 *          -ENOMEM if a suitable rgrp can't be found
1964 *          errno otherwise
1965 */
1966
1967int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
1968{
1969        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1970        struct gfs2_rgrpd *begin = NULL;
1971        struct gfs2_blkreserv *rs = &ip->i_res;
1972        int error = 0, rg_locked, flags = 0;
1973        u64 last_unlinked = NO_BLOCK;
1974        int loops = 0;
1975        u32 skip = 0;
1976
1977        if (sdp->sd_args.ar_rgrplvb)
1978                flags |= GL_SKIP;
1979        if (gfs2_assert_warn(sdp, ap->target))
1980                return -EINVAL;
1981        if (gfs2_rs_active(rs)) {
1982                begin = rs->rs_rbm.rgd;
1983        } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
1984                rs->rs_rbm.rgd = begin = ip->i_rgd;
1985        } else {
1986                check_and_update_goal(ip);
1987                rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
1988        }
1989        if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV))
1990                skip = gfs2_orlov_skip(ip);
1991        if (rs->rs_rbm.rgd == NULL)
1992                return -EBADSLT;
1993
1994        while (loops < 3) {
1995                rg_locked = 1;
1996
1997                if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
1998                        rg_locked = 0;
1999                        if (skip && skip--)
2000                                goto next_rgrp;
2001                        if (!gfs2_rs_active(rs)) {
2002                                if (loops == 0 &&
2003                                    !fast_to_acquire(rs->rs_rbm.rgd))
2004                                        goto next_rgrp;
2005                                if ((loops < 2) &&
2006                                    gfs2_rgrp_used_recently(rs, 1000) &&
2007                                    gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
2008                                        goto next_rgrp;
2009                        }
2010                        error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
2011                                                   LM_ST_EXCLUSIVE, flags,
2012                                                   &rs->rs_rgd_gh);
2013                        if (unlikely(error))
2014                                return error;
2015                        if (!gfs2_rs_active(rs) && (loops < 2) &&
2016                            gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
2017                                goto skip_rgrp;
2018                        if (sdp->sd_args.ar_rgrplvb) {
2019                                error = update_rgrp_lvb(rs->rs_rbm.rgd);
2020                                if (unlikely(error)) {
2021                                        gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2022                                        return error;
2023                                }
2024                        }
2025                }
2026
2027                /* Skip unuseable resource groups */
2028                if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
2029                                                 GFS2_RDF_ERROR)) ||
2030                    (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
2031                        goto skip_rgrp;
2032
2033                if (sdp->sd_args.ar_rgrplvb)
2034                        gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
2035
2036                /* Get a reservation if we don't already have one */
2037                if (!gfs2_rs_active(rs))
2038                        rg_mblk_search(rs->rs_rbm.rgd, ip, ap);
2039
2040                /* Skip rgrps when we can't get a reservation on first pass */
2041                if (!gfs2_rs_active(rs) && (loops < 1))
2042                        goto check_rgrp;
2043
2044                /* If rgrp has enough free space, use it */
2045                if (rs->rs_rbm.rgd->rd_free_clone >= ap->target ||
2046                    (loops == 2 && ap->min_target &&
2047                     rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) {
2048                        ip->i_rgd = rs->rs_rbm.rgd;
2049                        ap->allowed = ip->i_rgd->rd_free_clone;
2050                        return 0;
2051                }
2052check_rgrp:
2053                /* Check for unlinked inodes which can be reclaimed */
2054                if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
2055                        try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
2056                                        ip->i_no_addr);
2057skip_rgrp:
2058                /* Drop reservation, if we couldn't use reserved rgrp */
2059                if (gfs2_rs_active(rs))
2060                        gfs2_rs_deltree(rs);
2061
2062                /* Unlock rgrp if required */
2063                if (!rg_locked)
2064                        gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2065next_rgrp:
2066                /* Find the next rgrp, and continue looking */
2067                if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
2068                        continue;
2069                if (skip)
2070                        continue;
2071
2072                /* If we've scanned all the rgrps, but found no free blocks
2073                 * then this checks for some less likely conditions before
2074                 * trying again.
2075                 */
2076                loops++;
2077                /* Check that fs hasn't grown if writing to rindex */
2078                if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
2079                        error = gfs2_ri_update(ip);
2080                        if (error)
2081                                return error;
2082                }
2083                /* Flushing the log may release space */
2084                if (loops == 2)
2085                        gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
2086        }
2087
2088        return -ENOSPC;
2089}
2090
2091/**
2092 * gfs2_inplace_release - release an inplace reservation
2093 * @ip: the inode the reservation was taken out on
2094 *
2095 * Release a reservation made by gfs2_inplace_reserve().
2096 */
2097
2098void gfs2_inplace_release(struct gfs2_inode *ip)
2099{
2100        struct gfs2_blkreserv *rs = &ip->i_res;
2101
2102        if (rs->rs_rgd_gh.gh_gl)
2103                gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2104}
2105
2106/**
2107 * gfs2_get_block_type - Check a block in a RG is of given type
2108 * @rgd: the resource group holding the block
2109 * @block: the block number
2110 *
2111 * Returns: The block type (GFS2_BLKST_*)
2112 */
2113
2114static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
2115{
2116        struct gfs2_rbm rbm = { .rgd = rgd, };
2117        int ret;
2118
2119        ret = gfs2_rbm_from_block(&rbm, block);
2120        WARN_ON_ONCE(ret != 0);
2121
2122        return gfs2_testbit(&rbm);
2123}
2124
2125
2126/**
2127 * gfs2_alloc_extent - allocate an extent from a given bitmap
2128 * @rbm: the resource group information
2129 * @dinode: TRUE if the first block we allocate is for a dinode
2130 * @n: The extent length (value/result)
2131 *
2132 * Add the bitmap buffer to the transaction.
2133 * Set the found bits to @new_state to change block's allocation state.
2134 */
2135static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
2136                             unsigned int *n)
2137{
2138        struct gfs2_rbm pos = { .rgd = rbm->rgd, };
2139        const unsigned int elen = *n;
2140        u64 block;
2141        int ret;
2142
2143        *n = 1;
2144        block = gfs2_rbm_to_block(rbm);
2145        gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh);
2146        gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2147        block++;
2148        while (*n < elen) {
2149                ret = gfs2_rbm_from_block(&pos, block);
2150                if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
2151                        break;
2152                gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh);
2153                gfs2_setbit(&pos, true, GFS2_BLKST_USED);
2154                (*n)++;
2155                block++;
2156        }
2157}
2158
2159/**
2160 * rgblk_free - Change alloc state of given block(s)
2161 * @sdp: the filesystem
2162 * @bstart: the start of a run of blocks to free
2163 * @blen: the length of the block run (all must lie within ONE RG!)
2164 * @new_state: GFS2_BLKST_XXX the after-allocation block state
2165 *
2166 * Returns:  Resource group containing the block(s)
2167 */
2168
2169static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
2170                                     u32 blen, unsigned char new_state)
2171{
2172        struct gfs2_rbm rbm;
2173        struct gfs2_bitmap *bi, *bi_prev = NULL;
2174
2175        rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
2176        if (!rbm.rgd) {
2177                if (gfs2_consist(sdp))
2178                        fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
2179                return NULL;
2180        }
2181
2182        gfs2_rbm_from_block(&rbm, bstart);
2183        while (blen--) {
2184                bi = rbm_bi(&rbm);
2185                if (bi != bi_prev) {
2186                        if (!bi->bi_clone) {
2187                                bi->bi_clone = kmalloc(bi->bi_bh->b_size,
2188                                                      GFP_NOFS | __GFP_NOFAIL);
2189                                memcpy(bi->bi_clone + bi->bi_offset,
2190                                       bi->bi_bh->b_data + bi->bi_offset,
2191                                       bi->bi_len);
2192                        }
2193                        gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh);
2194                        bi_prev = bi;
2195                }
2196                gfs2_setbit(&rbm, false, new_state);
2197                gfs2_rbm_incr(&rbm);
2198        }
2199
2200        return rbm.rgd;
2201}
2202
2203/**
2204 * gfs2_rgrp_dump - print out an rgrp
2205 * @seq: The iterator
2206 * @gl: The glock in question
2207 *
2208 */
2209
2210void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
2211{
2212        struct gfs2_rgrpd *rgd = gl->gl_object;
2213        struct gfs2_blkreserv *trs;
2214        const struct rb_node *n;
2215
2216        if (rgd == NULL)
2217                return;
2218        gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
2219                       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
2220                       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
2221                       rgd->rd_reserved, rgd->rd_extfail_pt);
2222        spin_lock(&rgd->rd_rsspin);
2223        for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
2224                trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
2225                dump_rs(seq, trs);
2226        }
2227        spin_unlock(&rgd->rd_rsspin);
2228}
2229
2230static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
2231{
2232        struct gfs2_sbd *sdp = rgd->rd_sbd;
2233        fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
2234                (unsigned long long)rgd->rd_addr);
2235        fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
2236        gfs2_rgrp_dump(NULL, rgd->rd_gl);
2237        rgd->rd_flags |= GFS2_RDF_ERROR;
2238}
2239
2240/**
2241 * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation
2242 * @ip: The inode we have just allocated blocks for
2243 * @rbm: The start of the allocated blocks
2244 * @len: The extent length
2245 *
2246 * Adjusts a reservation after an allocation has taken place. If the
2247 * reservation does not match the allocation, or if it is now empty
2248 * then it is removed.
2249 */
2250
2251static void gfs2_adjust_reservation(struct gfs2_inode *ip,
2252                                    const struct gfs2_rbm *rbm, unsigned len)
2253{
2254        struct gfs2_blkreserv *rs = &ip->i_res;
2255        struct gfs2_rgrpd *rgd = rbm->rgd;
2256        unsigned rlen;
2257        u64 block;
2258        int ret;
2259
2260        spin_lock(&rgd->rd_rsspin);
2261        if (gfs2_rs_active(rs)) {
2262                if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
2263                        block = gfs2_rbm_to_block(rbm);
2264                        ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
2265                        rlen = min(rs->rs_free, len);
2266                        rs->rs_free -= rlen;
2267                        rgd->rd_reserved -= rlen;
2268                        trace_gfs2_rs(rs, TRACE_RS_CLAIM);
2269                        if (rs->rs_free && !ret)
2270                                goto out;
2271                        /* We used up our block reservation, so we should
2272                           reserve more blocks next time. */
2273                        atomic_add(RGRP_RSRV_ADDBLKS, &rs->rs_sizehint);
2274                }
2275                __rs_deltree(rs);
2276        }
2277out:
2278        spin_unlock(&rgd->rd_rsspin);
2279}
2280
2281/**
2282 * gfs2_set_alloc_start - Set starting point for block allocation
2283 * @rbm: The rbm which will be set to the required location
2284 * @ip: The gfs2 inode
2285 * @dinode: Flag to say if allocation includes a new inode
2286 *
2287 * This sets the starting point from the reservation if one is active
2288 * otherwise it falls back to guessing a start point based on the
2289 * inode's goal block or the last allocation point in the rgrp.
2290 */
2291
2292static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
2293                                 const struct gfs2_inode *ip, bool dinode)
2294{
2295        u64 goal;
2296
2297        if (gfs2_rs_active(&ip->i_res)) {
2298                *rbm = ip->i_res.rs_rbm;
2299                return;
2300        }
2301
2302        if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal))
2303                goal = ip->i_goal;
2304        else
2305                goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
2306
2307        gfs2_rbm_from_block(rbm, goal);
2308}
2309
2310/**
2311 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
2312 * @ip: the inode to allocate the block for
2313 * @bn: Used to return the starting block number
2314 * @nblocks: requested number of blocks/extent length (value/result)
2315 * @dinode: 1 if we're allocating a dinode block, else 0
2316 * @generation: the generation number of the inode
2317 *
2318 * Returns: 0 or error
2319 */
2320
2321int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2322                      bool dinode, u64 *generation)
2323{
2324        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2325        struct buffer_head *dibh;
2326        struct gfs2_rbm rbm = { .rgd = ip->i_rgd, };
2327        unsigned int ndata;
2328        u64 block; /* block, within the file system scope */
2329        int error;
2330
2331        gfs2_set_alloc_start(&rbm, ip, dinode);
2332        error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false, NULL);
2333
2334        if (error == -ENOSPC) {
2335                gfs2_set_alloc_start(&rbm, ip, dinode);
2336                error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false,
2337                                      NULL);
2338        }
2339
2340        /* Since all blocks are reserved in advance, this shouldn't happen */
2341        if (error) {
2342                fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d fail_pt=%d\n",
2343                        (unsigned long long)ip->i_no_addr, error, *nblocks,
2344                        test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags),
2345                        rbm.rgd->rd_extfail_pt);
2346                goto rgrp_error;
2347        }
2348
2349        gfs2_alloc_extent(&rbm, dinode, nblocks);
2350        block = gfs2_rbm_to_block(&rbm);
2351        rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
2352        if (gfs2_rs_active(&ip->i_res))
2353                gfs2_adjust_reservation(ip, &rbm, *nblocks);
2354        ndata = *nblocks;
2355        if (dinode)
2356                ndata--;
2357
2358        if (!dinode) {
2359                ip->i_goal = block + ndata - 1;
2360                error = gfs2_meta_inode_buffer(ip, &dibh);
2361                if (error == 0) {
2362                        struct gfs2_dinode *di =
2363                                (struct gfs2_dinode *)dibh->b_data;
2364                        gfs2_trans_add_meta(ip->i_gl, dibh);
2365                        di->di_goal_meta = di->di_goal_data =
2366                                cpu_to_be64(ip->i_goal);
2367                        brelse(dibh);
2368                }
2369        }
2370        if (rbm.rgd->rd_free < *nblocks) {
2371                pr_warn("nblocks=%u\n", *nblocks);
2372                goto rgrp_error;
2373        }
2374
2375        rbm.rgd->rd_free -= *nblocks;
2376        if (dinode) {
2377                rbm.rgd->rd_dinodes++;
2378                *generation = rbm.rgd->rd_igeneration++;
2379                if (*generation == 0)
2380                        *generation = rbm.rgd->rd_igeneration++;
2381        }
2382
2383        gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
2384        gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
2385        gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data);
2386
2387        gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
2388        if (dinode)
2389                gfs2_trans_add_unrevoke(sdp, block, *nblocks);
2390
2391        gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
2392
2393        rbm.rgd->rd_free_clone -= *nblocks;
2394        trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
2395                               dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2396        *bn = block;
2397        return 0;
2398
2399rgrp_error:
2400        gfs2_rgrp_error(rbm.rgd);
2401        return -EIO;
2402}
2403
2404/**
2405 * __gfs2_free_blocks - free a contiguous run of block(s)
2406 * @ip: the inode these blocks are being freed from
2407 * @bstart: first block of a run of contiguous blocks
2408 * @blen: the length of the block run
2409 * @meta: 1 if the blocks represent metadata
2410 *
2411 */
2412
2413void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
2414{
2415        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2416        struct gfs2_rgrpd *rgd;
2417
2418        rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
2419        if (!rgd)
2420                return;
2421        trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
2422        rgd->rd_free += blen;
2423        rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
2424        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2425        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2426        gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2427
2428        /* Directories keep their data in the metadata address space */
2429        if (meta || ip->i_depth)
2430                gfs2_meta_wipe(ip, bstart, blen);
2431}
2432
2433/**
2434 * gfs2_free_meta - free a contiguous run of data block(s)
2435 * @ip: the inode these blocks are being freed from
2436 * @bstart: first block of a run of contiguous blocks
2437 * @blen: the length of the block run
2438 *
2439 */
2440
2441void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
2442{
2443        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2444
2445        __gfs2_free_blocks(ip, bstart, blen, 1);
2446        gfs2_statfs_change(sdp, 0, +blen, 0);
2447        gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
2448}
2449
2450void gfs2_unlink_di(struct inode *inode)
2451{
2452        struct gfs2_inode *ip = GFS2_I(inode);
2453        struct gfs2_sbd *sdp = GFS2_SB(inode);
2454        struct gfs2_rgrpd *rgd;
2455        u64 blkno = ip->i_no_addr;
2456
2457        rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
2458        if (!rgd)
2459                return;
2460        trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
2461        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2462        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2463        gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2464        update_rgrp_lvb_unlinked(rgd, 1);
2465}
2466
2467static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
2468{
2469        struct gfs2_sbd *sdp = rgd->rd_sbd;
2470        struct gfs2_rgrpd *tmp_rgd;
2471
2472        tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
2473        if (!tmp_rgd)
2474                return;
2475        gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
2476
2477        if (!rgd->rd_dinodes)
2478                gfs2_consist_rgrpd(rgd);
2479        rgd->rd_dinodes--;
2480        rgd->rd_free++;
2481
2482        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2483        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2484        gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2485        update_rgrp_lvb_unlinked(rgd, -1);
2486
2487        gfs2_statfs_change(sdp, 0, +1, -1);
2488}
2489
2490
2491void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
2492{
2493        gfs2_free_uninit_di(rgd, ip->i_no_addr);
2494        trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
2495        gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
2496        gfs2_meta_wipe(ip, ip->i_no_addr, 1);
2497}
2498
2499/**
2500 * gfs2_check_blk_type - Check the type of a block
2501 * @sdp: The superblock
2502 * @no_addr: The block number to check
2503 * @type: The block type we are looking for
2504 *
2505 * Returns: 0 if the block type matches the expected type
2506 *          -ESTALE if it doesn't match
2507 *          or -ve errno if something went wrong while checking
2508 */
2509
2510int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
2511{
2512        struct gfs2_rgrpd *rgd;
2513        struct gfs2_holder rgd_gh;
2514        int error = -EINVAL;
2515
2516        rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
2517        if (!rgd)
2518                goto fail;
2519
2520        error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
2521        if (error)
2522                goto fail;
2523
2524        if (gfs2_get_block_type(rgd, no_addr) != type)
2525                error = -ESTALE;
2526
2527        gfs2_glock_dq_uninit(&rgd_gh);
2528fail:
2529        return error;
2530}
2531
2532/**
2533 * gfs2_rlist_add - add a RG to a list of RGs
2534 * @ip: the inode
2535 * @rlist: the list of resource groups
2536 * @block: the block
2537 *
2538 * Figure out what RG a block belongs to and add that RG to the list
2539 *
2540 * FIXME: Don't use NOFAIL
2541 *
2542 */
2543
2544void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
2545                    u64 block)
2546{
2547        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2548        struct gfs2_rgrpd *rgd;
2549        struct gfs2_rgrpd **tmp;
2550        unsigned int new_space;
2551        unsigned int x;
2552
2553        if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
2554                return;
2555
2556        if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block))
2557                rgd = ip->i_rgd;
2558        else
2559                rgd = gfs2_blk2rgrpd(sdp, block, 1);
2560        if (!rgd) {
2561                fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block);
2562                return;
2563        }
2564        ip->i_rgd = rgd;
2565
2566        for (x = 0; x < rlist->rl_rgrps; x++)
2567                if (rlist->rl_rgd[x] == rgd)
2568                        return;
2569
2570        if (rlist->rl_rgrps == rlist->rl_space) {
2571                new_space = rlist->rl_space + 10;
2572
2573                tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
2574                              GFP_NOFS | __GFP_NOFAIL);
2575
2576                if (rlist->rl_rgd) {
2577                        memcpy(tmp, rlist->rl_rgd,
2578                               rlist->rl_space * sizeof(struct gfs2_rgrpd *));
2579                        kfree(rlist->rl_rgd);
2580                }
2581
2582                rlist->rl_space = new_space;
2583                rlist->rl_rgd = tmp;
2584        }
2585
2586        rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
2587}
2588
2589/**
2590 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
2591 *      and initialize an array of glock holders for them
2592 * @rlist: the list of resource groups
2593 * @state: the lock state to acquire the RG lock in
2594 *
2595 * FIXME: Don't use NOFAIL
2596 *
2597 */
2598
2599void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
2600{
2601        unsigned int x;
2602
2603        rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder),
2604                                GFP_NOFS | __GFP_NOFAIL);
2605        for (x = 0; x < rlist->rl_rgrps; x++)
2606                gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
2607                                state, 0,
2608                                &rlist->rl_ghs[x]);
2609}
2610
2611/**
2612 * gfs2_rlist_free - free a resource group list
2613 * @rlist: the list of resource groups
2614 *
2615 */
2616
2617void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
2618{
2619        unsigned int x;
2620
2621        kfree(rlist->rl_rgd);
2622
2623        if (rlist->rl_ghs) {
2624                for (x = 0; x < rlist->rl_rgrps; x++)
2625                        gfs2_holder_uninit(&rlist->rl_ghs[x]);
2626                kfree(rlist->rl_ghs);
2627                rlist->rl_ghs = NULL;
2628        }
2629}
2630
2631