linux/fs/gfs2/rgrp.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   3 * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
   4 *
   5 * This copyrighted material is made available to anyone wishing to use,
   6 * modify, copy, or redistribute it subject to the terms and conditions
   7 * of the GNU General Public License version 2.
   8 */
   9
  10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  11
  12#include <linux/slab.h>
  13#include <linux/spinlock.h>
  14#include <linux/completion.h>
  15#include <linux/buffer_head.h>
  16#include <linux/fs.h>
  17#include <linux/gfs2_ondisk.h>
  18#include <linux/prefetch.h>
  19#include <linux/blkdev.h>
  20#include <linux/rbtree.h>
  21#include <linux/random.h>
  22
  23#include "gfs2.h"
  24#include "incore.h"
  25#include "glock.h"
  26#include "glops.h"
  27#include "lops.h"
  28#include "meta_io.h"
  29#include "quota.h"
  30#include "rgrp.h"
  31#include "super.h"
  32#include "trans.h"
  33#include "util.h"
  34#include "log.h"
  35#include "inode.h"
  36#include "trace_gfs2.h"
  37
  38#define BFITNOENT ((u32)~0)
  39#define NO_BLOCK ((u64)~0)
  40
  41#if BITS_PER_LONG == 32
  42#define LBITMASK   (0x55555555UL)
  43#define LBITSKIP55 (0x55555555UL)
  44#define LBITSKIP00 (0x00000000UL)
  45#else
  46#define LBITMASK   (0x5555555555555555UL)
  47#define LBITSKIP55 (0x5555555555555555UL)
  48#define LBITSKIP00 (0x0000000000000000UL)
  49#endif
  50
  51/*
  52 * These routines are used by the resource group routines (rgrp.c)
  53 * to keep track of block allocation.  Each block is represented by two
  54 * bits.  So, each byte represents GFS2_NBBY (i.e. 4) blocks.
  55 *
  56 * 0 = Free
  57 * 1 = Used (not metadata)
  58 * 2 = Unlinked (still in use) inode
  59 * 3 = Used (metadata)
  60 */
  61
  62struct gfs2_extent {
  63        struct gfs2_rbm rbm;
  64        u32 len;
  65};
  66
  67static const char valid_change[16] = {
  68                /* current */
  69        /* n */ 0, 1, 1, 1,
  70        /* e */ 1, 0, 0, 0,
  71        /* w */ 0, 0, 0, 1,
  72                1, 0, 0, 0
  73};
  74
  75static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
  76                         const struct gfs2_inode *ip, bool nowrap,
  77                         const struct gfs2_alloc_parms *ap);
  78
  79
  80/**
  81 * gfs2_setbit - Set a bit in the bitmaps
  82 * @rbm: The position of the bit to set
  83 * @do_clone: Also set the clone bitmap, if it exists
  84 * @new_state: the new state of the block
  85 *
  86 */
  87
  88static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
  89                               unsigned char new_state)
  90{
  91        unsigned char *byte1, *byte2, *end, cur_state;
  92        struct gfs2_bitmap *bi = rbm_bi(rbm);
  93        unsigned int buflen = bi->bi_len;
  94        const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
  95
  96        byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY);
  97        end = bi->bi_bh->b_data + bi->bi_offset + buflen;
  98
  99        BUG_ON(byte1 >= end);
 100
 101        cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
 102
 103        if (unlikely(!valid_change[new_state * 4 + cur_state])) {
 104                pr_warn("buf_blk = 0x%x old_state=%d, new_state=%d\n",
 105                        rbm->offset, cur_state, new_state);
 106                pr_warn("rgrp=0x%llx bi_start=0x%x\n",
 107                        (unsigned long long)rbm->rgd->rd_addr, bi->bi_start);
 108                pr_warn("bi_offset=0x%x bi_len=0x%x\n",
 109                        bi->bi_offset, bi->bi_len);
 110                dump_stack();
 111                gfs2_consist_rgrpd(rbm->rgd);
 112                return;
 113        }
 114        *byte1 ^= (cur_state ^ new_state) << bit;
 115
 116        if (do_clone && bi->bi_clone) {
 117                byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY);
 118                cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
 119                *byte2 ^= (cur_state ^ new_state) << bit;
 120        }
 121}
 122
 123/**
 124 * gfs2_testbit - test a bit in the bitmaps
 125 * @rbm: The bit to test
 126 *
 127 * Returns: The two bit block state of the requested bit
 128 */
 129
 130static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)
 131{
 132        struct gfs2_bitmap *bi = rbm_bi(rbm);
 133        const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset;
 134        const u8 *byte;
 135        unsigned int bit;
 136
 137        byte = buffer + (rbm->offset / GFS2_NBBY);
 138        bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
 139
 140        return (*byte >> bit) & GFS2_BIT_MASK;
 141}
 142
 143/**
 144 * gfs2_bit_search
 145 * @ptr: Pointer to bitmap data
 146 * @mask: Mask to use (normally 0x55555.... but adjusted for search start)
 147 * @state: The state we are searching for
 148 *
 149 * We xor the bitmap data with a patter which is the bitwise opposite
 150 * of what we are looking for, this gives rise to a pattern of ones
 151 * wherever there is a match. Since we have two bits per entry, we
 152 * take this pattern, shift it down by one place and then and it with
 153 * the original. All the even bit positions (0,2,4, etc) then represent
 154 * successful matches, so we mask with 0x55555..... to remove the unwanted
 155 * odd bit positions.
 156 *
 157 * This allows searching of a whole u64 at once (32 blocks) with a
 158 * single test (on 64 bit arches).
 159 */
 160
 161static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
 162{
 163        u64 tmp;
 164        static const u64 search[] = {
 165                [0] = 0xffffffffffffffffULL,
 166                [1] = 0xaaaaaaaaaaaaaaaaULL,
 167                [2] = 0x5555555555555555ULL,
 168                [3] = 0x0000000000000000ULL,
 169        };
 170        tmp = le64_to_cpu(*ptr) ^ search[state];
 171        tmp &= (tmp >> 1);
 172        tmp &= mask;
 173        return tmp;
 174}
 175
 176/**
 177 * rs_cmp - multi-block reservation range compare
 178 * @blk: absolute file system block number of the new reservation
 179 * @len: number of blocks in the new reservation
 180 * @rs: existing reservation to compare against
 181 *
 182 * returns: 1 if the block range is beyond the reach of the reservation
 183 *         -1 if the block range is before the start of the reservation
 184 *          0 if the block range overlaps with the reservation
 185 */
 186static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
 187{
 188        u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
 189
 190        if (blk >= startblk + rs->rs_free)
 191                return 1;
 192        if (blk + len - 1 < startblk)
 193                return -1;
 194        return 0;
 195}
 196
 197/**
 198 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
 199 *       a block in a given allocation state.
 200 * @buf: the buffer that holds the bitmaps
 201 * @len: the length (in bytes) of the buffer
 202 * @goal: start search at this block's bit-pair (within @buffer)
 203 * @state: GFS2_BLKST_XXX the state of the block we're looking for.
 204 *
 205 * Scope of @goal and returned block number is only within this bitmap buffer,
 206 * not entire rgrp or filesystem.  @buffer will be offset from the actual
 207 * beginning of a bitmap block buffer, skipping any header structures, but
 208 * headers are always a multiple of 64 bits long so that the buffer is
 209 * always aligned to a 64 bit boundary.
 210 *
 211 * The size of the buffer is in bytes, but is it assumed that it is
 212 * always ok to read a complete multiple of 64 bits at the end
 213 * of the block in case the end is no aligned to a natural boundary.
 214 *
 215 * Return: the block number (bitmap buffer scope) that was found
 216 */
 217
 218static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
 219                       u32 goal, u8 state)
 220{
 221        u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
 222        const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
 223        const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
 224        u64 tmp;
 225        u64 mask = 0x5555555555555555ULL;
 226        u32 bit;
 227
 228        /* Mask off bits we don't care about at the start of the search */
 229        mask <<= spoint;
 230        tmp = gfs2_bit_search(ptr, mask, state);
 231        ptr++;
 232        while(tmp == 0 && ptr < end) {
 233                tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
 234                ptr++;
 235        }
 236        /* Mask off any bits which are more than len bytes from the start */
 237        if (ptr == end && (len & (sizeof(u64) - 1)))
 238                tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1))));
 239        /* Didn't find anything, so return */
 240        if (tmp == 0)
 241                return BFITNOENT;
 242        ptr--;
 243        bit = __ffs64(tmp);
 244        bit /= 2;       /* two bits per entry in the bitmap */
 245        return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
 246}
 247
 248/**
 249 * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
 250 * @rbm: The rbm with rgd already set correctly
 251 * @block: The block number (filesystem relative)
 252 *
 253 * This sets the bi and offset members of an rbm based on a
 254 * resource group and a filesystem relative block number. The
 255 * resource group must be set in the rbm on entry, the bi and
 256 * offset members will be set by this function.
 257 *
 258 * Returns: 0 on success, or an error code
 259 */
 260
 261static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
 262{
 263        u64 rblock = block - rbm->rgd->rd_data0;
 264
 265        if (WARN_ON_ONCE(rblock > UINT_MAX))
 266                return -EINVAL;
 267        if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
 268                return -E2BIG;
 269
 270        rbm->bii = 0;
 271        rbm->offset = (u32)(rblock);
 272        /* Check if the block is within the first block */
 273        if (rbm->offset < rbm_bi(rbm)->bi_blocks)
 274                return 0;
 275
 276        /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */
 277        rbm->offset += (sizeof(struct gfs2_rgrp) -
 278                        sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
 279        rbm->bii = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
 280        rbm->offset -= rbm->bii * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
 281        return 0;
 282}
 283
 284/**
 285 * gfs2_rbm_incr - increment an rbm structure
 286 * @rbm: The rbm with rgd already set correctly
 287 *
 288 * This function takes an existing rbm structure and increments it to the next
 289 * viable block offset.
 290 *
 291 * Returns: If incrementing the offset would cause the rbm to go past the
 292 *          end of the rgrp, true is returned, otherwise false.
 293 *
 294 */
 295
 296static bool gfs2_rbm_incr(struct gfs2_rbm *rbm)
 297{
 298        if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */
 299                rbm->offset++;
 300                return false;
 301        }
 302        if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */
 303                return true;
 304
 305        rbm->offset = 0;
 306        rbm->bii++;
 307        return false;
 308}
 309
 310/**
 311 * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
 312 * @rbm: Position to search (value/result)
 313 * @n_unaligned: Number of unaligned blocks to check
 314 * @len: Decremented for each block found (terminate on zero)
 315 *
 316 * Returns: true if a non-free block is encountered
 317 */
 318
 319static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
 320{
 321        u32 n;
 322        u8 res;
 323
 324        for (n = 0; n < n_unaligned; n++) {
 325                res = gfs2_testbit(rbm);
 326                if (res != GFS2_BLKST_FREE)
 327                        return true;
 328                (*len)--;
 329                if (*len == 0)
 330                        return true;
 331                if (gfs2_rbm_incr(rbm))
 332                        return true;
 333        }
 334
 335        return false;
 336}
 337
 338/**
 339 * gfs2_free_extlen - Return extent length of free blocks
 340 * @rrbm: Starting position
 341 * @len: Max length to check
 342 *
 343 * Starting at the block specified by the rbm, see how many free blocks
 344 * there are, not reading more than len blocks ahead. This can be done
 345 * using memchr_inv when the blocks are byte aligned, but has to be done
 346 * on a block by block basis in case of unaligned blocks. Also this
 347 * function can cope with bitmap boundaries (although it must stop on
 348 * a resource group boundary)
 349 *
 350 * Returns: Number of free blocks in the extent
 351 */
 352
 353static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
 354{
 355        struct gfs2_rbm rbm = *rrbm;
 356        u32 n_unaligned = rbm.offset & 3;
 357        u32 size = len;
 358        u32 bytes;
 359        u32 chunk_size;
 360        u8 *ptr, *start, *end;
 361        u64 block;
 362        struct gfs2_bitmap *bi;
 363
 364        if (n_unaligned &&
 365            gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
 366                goto out;
 367
 368        n_unaligned = len & 3;
 369        /* Start is now byte aligned */
 370        while (len > 3) {
 371                bi = rbm_bi(&rbm);
 372                start = bi->bi_bh->b_data;
 373                if (bi->bi_clone)
 374                        start = bi->bi_clone;
 375                end = start + bi->bi_bh->b_size;
 376                start += bi->bi_offset;
 377                BUG_ON(rbm.offset & 3);
 378                start += (rbm.offset / GFS2_NBBY);
 379                bytes = min_t(u32, len / GFS2_NBBY, (end - start));
 380                ptr = memchr_inv(start, 0, bytes);
 381                chunk_size = ((ptr == NULL) ? bytes : (ptr - start));
 382                chunk_size *= GFS2_NBBY;
 383                BUG_ON(len < chunk_size);
 384                len -= chunk_size;
 385                block = gfs2_rbm_to_block(&rbm);
 386                if (gfs2_rbm_from_block(&rbm, block + chunk_size)) {
 387                        n_unaligned = 0;
 388                        break;
 389                }
 390                if (ptr) {
 391                        n_unaligned = 3;
 392                        break;
 393                }
 394                n_unaligned = len & 3;
 395        }
 396
 397        /* Deal with any bits left over at the end */
 398        if (n_unaligned)
 399                gfs2_unaligned_extlen(&rbm, n_unaligned, &len);
 400out:
 401        return size - len;
 402}
 403
 404/**
 405 * gfs2_bitcount - count the number of bits in a certain state
 406 * @rgd: the resource group descriptor
 407 * @buffer: the buffer that holds the bitmaps
 408 * @buflen: the length (in bytes) of the buffer
 409 * @state: the state of the block we're looking for
 410 *
 411 * Returns: The number of bits
 412 */
 413
 414static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
 415                         unsigned int buflen, u8 state)
 416{
 417        const u8 *byte = buffer;
 418        const u8 *end = buffer + buflen;
 419        const u8 state1 = state << 2;
 420        const u8 state2 = state << 4;
 421        const u8 state3 = state << 6;
 422        u32 count = 0;
 423
 424        for (; byte < end; byte++) {
 425                if (((*byte) & 0x03) == state)
 426                        count++;
 427                if (((*byte) & 0x0C) == state1)
 428                        count++;
 429                if (((*byte) & 0x30) == state2)
 430                        count++;
 431                if (((*byte) & 0xC0) == state3)
 432                        count++;
 433        }
 434
 435        return count;
 436}
 437
 438/**
 439 * gfs2_rgrp_verify - Verify that a resource group is consistent
 440 * @rgd: the rgrp
 441 *
 442 */
 443
 444void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 445{
 446        struct gfs2_sbd *sdp = rgd->rd_sbd;
 447        struct gfs2_bitmap *bi = NULL;
 448        u32 length = rgd->rd_length;
 449        u32 count[4], tmp;
 450        int buf, x;
 451
 452        memset(count, 0, 4 * sizeof(u32));
 453
 454        /* Count # blocks in each of 4 possible allocation states */
 455        for (buf = 0; buf < length; buf++) {
 456                bi = rgd->rd_bits + buf;
 457                for (x = 0; x < 4; x++)
 458                        count[x] += gfs2_bitcount(rgd,
 459                                                  bi->bi_bh->b_data +
 460                                                  bi->bi_offset,
 461                                                  bi->bi_len, x);
 462        }
 463
 464        if (count[0] != rgd->rd_free) {
 465                if (gfs2_consist_rgrpd(rgd))
 466                        fs_err(sdp, "free data mismatch:  %u != %u\n",
 467                               count[0], rgd->rd_free);
 468                return;
 469        }
 470
 471        tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
 472        if (count[1] != tmp) {
 473                if (gfs2_consist_rgrpd(rgd))
 474                        fs_err(sdp, "used data mismatch:  %u != %u\n",
 475                               count[1], tmp);
 476                return;
 477        }
 478
 479        if (count[2] + count[3] != rgd->rd_dinodes) {
 480                if (gfs2_consist_rgrpd(rgd))
 481                        fs_err(sdp, "used metadata mismatch:  %u != %u\n",
 482                               count[2] + count[3], rgd->rd_dinodes);
 483                return;
 484        }
 485}
 486
 487static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
 488{
 489        u64 first = rgd->rd_data0;
 490        u64 last = first + rgd->rd_data;
 491        return first <= block && block < last;
 492}
 493
 494/**
 495 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
 496 * @sdp: The GFS2 superblock
 497 * @blk: The data block number
 498 * @exact: True if this needs to be an exact match
 499 *
 500 * Returns: The resource group, or NULL if not found
 501 */
 502
 503struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)
 504{
 505        struct rb_node *n, *next;
 506        struct gfs2_rgrpd *cur;
 507
 508        spin_lock(&sdp->sd_rindex_spin);
 509        n = sdp->sd_rindex_tree.rb_node;
 510        while (n) {
 511                cur = rb_entry(n, struct gfs2_rgrpd, rd_node);
 512                next = NULL;
 513                if (blk < cur->rd_addr)
 514                        next = n->rb_left;
 515                else if (blk >= cur->rd_data0 + cur->rd_data)
 516                        next = n->rb_right;
 517                if (next == NULL) {
 518                        spin_unlock(&sdp->sd_rindex_spin);
 519                        if (exact) {
 520                                if (blk < cur->rd_addr)
 521                                        return NULL;
 522                                if (blk >= cur->rd_data0 + cur->rd_data)
 523                                        return NULL;
 524                        }
 525                        return cur;
 526                }
 527                n = next;
 528        }
 529        spin_unlock(&sdp->sd_rindex_spin);
 530
 531        return NULL;
 532}
 533
 534/**
 535 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
 536 * @sdp: The GFS2 superblock
 537 *
 538 * Returns: The first rgrp in the filesystem
 539 */
 540
 541struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
 542{
 543        const struct rb_node *n;
 544        struct gfs2_rgrpd *rgd;
 545
 546        spin_lock(&sdp->sd_rindex_spin);
 547        n = rb_first(&sdp->sd_rindex_tree);
 548        rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 549        spin_unlock(&sdp->sd_rindex_spin);
 550
 551        return rgd;
 552}
 553
 554/**
 555 * gfs2_rgrpd_get_next - get the next RG
 556 * @rgd: the resource group descriptor
 557 *
 558 * Returns: The next rgrp
 559 */
 560
 561struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
 562{
 563        struct gfs2_sbd *sdp = rgd->rd_sbd;
 564        const struct rb_node *n;
 565
 566        spin_lock(&sdp->sd_rindex_spin);
 567        n = rb_next(&rgd->rd_node);
 568        if (n == NULL)
 569                n = rb_first(&sdp->sd_rindex_tree);
 570
 571        if (unlikely(&rgd->rd_node == n)) {
 572                spin_unlock(&sdp->sd_rindex_spin);
 573                return NULL;
 574        }
 575        rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 576        spin_unlock(&sdp->sd_rindex_spin);
 577        return rgd;
 578}
 579
 580void check_and_update_goal(struct gfs2_inode *ip)
 581{
 582        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 583        if (!ip->i_goal || gfs2_blk2rgrpd(sdp, ip->i_goal, 1) == NULL)
 584                ip->i_goal = ip->i_no_addr;
 585}
 586
 587void gfs2_free_clones(struct gfs2_rgrpd *rgd)
 588{
 589        int x;
 590
 591        for (x = 0; x < rgd->rd_length; x++) {
 592                struct gfs2_bitmap *bi = rgd->rd_bits + x;
 593                kfree(bi->bi_clone);
 594                bi->bi_clone = NULL;
 595        }
 596}
 597
 598/**
 599 * gfs2_rs_alloc - make sure we have a reservation assigned to the inode
 600 * @ip: the inode for this reservation
 601 */
 602int gfs2_rs_alloc(struct gfs2_inode *ip)
 603{
 604        int error = 0;
 605
 606        down_write(&ip->i_rw_mutex);
 607        if (ip->i_res)
 608                goto out;
 609
 610        ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
 611        if (!ip->i_res) {
 612                error = -ENOMEM;
 613                goto out;
 614        }
 615
 616        RB_CLEAR_NODE(&ip->i_res->rs_node);
 617out:
 618        up_write(&ip->i_rw_mutex);
 619        return error;
 620}
 621
 622static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
 623{
 624        gfs2_print_dbg(seq, "  B: n:%llu s:%llu b:%u f:%u\n",
 625                       (unsigned long long)rs->rs_inum,
 626                       (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
 627                       rs->rs_rbm.offset, rs->rs_free);
 628}
 629
 630/**
 631 * __rs_deltree - remove a multi-block reservation from the rgd tree
 632 * @rs: The reservation to remove
 633 *
 634 */
 635static void __rs_deltree(struct gfs2_blkreserv *rs)
 636{
 637        struct gfs2_rgrpd *rgd;
 638
 639        if (!gfs2_rs_active(rs))
 640                return;
 641
 642        rgd = rs->rs_rbm.rgd;
 643        trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
 644        rb_erase(&rs->rs_node, &rgd->rd_rstree);
 645        RB_CLEAR_NODE(&rs->rs_node);
 646
 647        if (rs->rs_free) {
 648                struct gfs2_bitmap *bi = rbm_bi(&rs->rs_rbm);
 649
 650                /* return reserved blocks to the rgrp */
 651                BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
 652                rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
 653                /* The rgrp extent failure point is likely not to increase;
 654                   it will only do so if the freed blocks are somehow
 655                   contiguous with a span of free blocks that follows. Still,
 656                   it will force the number to be recalculated later. */
 657                rgd->rd_extfail_pt += rs->rs_free;
 658                rs->rs_free = 0;
 659                clear_bit(GBF_FULL, &bi->bi_flags);
 660        }
 661}
 662
 663/**
 664 * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
 665 * @rs: The reservation to remove
 666 *
 667 */
 668void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
 669{
 670        struct gfs2_rgrpd *rgd;
 671
 672        rgd = rs->rs_rbm.rgd;
 673        if (rgd) {
 674                spin_lock(&rgd->rd_rsspin);
 675                __rs_deltree(rs);
 676                spin_unlock(&rgd->rd_rsspin);
 677        }
 678}
 679
 680/**
 681 * gfs2_rs_delete - delete a multi-block reservation
 682 * @ip: The inode for this reservation
 683 * @wcount: The inode's write count, or NULL
 684 *
 685 */
 686void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount)
 687{
 688        down_write(&ip->i_rw_mutex);
 689        if (ip->i_res && ((wcount == NULL) || (atomic_read(wcount) <= 1))) {
 690                gfs2_rs_deltree(ip->i_res);
 691                BUG_ON(ip->i_res->rs_free);
 692                kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
 693                ip->i_res = NULL;
 694        }
 695        up_write(&ip->i_rw_mutex);
 696}
 697
 698/**
 699 * return_all_reservations - return all reserved blocks back to the rgrp.
 700 * @rgd: the rgrp that needs its space back
 701 *
 702 * We previously reserved a bunch of blocks for allocation. Now we need to
 703 * give them back. This leave the reservation structures in tact, but removes
 704 * all of their corresponding "no-fly zones".
 705 */
 706static void return_all_reservations(struct gfs2_rgrpd *rgd)
 707{
 708        struct rb_node *n;
 709        struct gfs2_blkreserv *rs;
 710
 711        spin_lock(&rgd->rd_rsspin);
 712        while ((n = rb_first(&rgd->rd_rstree))) {
 713                rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
 714                __rs_deltree(rs);
 715        }
 716        spin_unlock(&rgd->rd_rsspin);
 717}
 718
 719void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 720{
 721        struct rb_node *n;
 722        struct gfs2_rgrpd *rgd;
 723        struct gfs2_glock *gl;
 724
 725        while ((n = rb_first(&sdp->sd_rindex_tree))) {
 726                rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 727                gl = rgd->rd_gl;
 728
 729                rb_erase(n, &sdp->sd_rindex_tree);
 730
 731                if (gl) {
 732                        spin_lock(&gl->gl_spin);
 733                        gl->gl_object = NULL;
 734                        spin_unlock(&gl->gl_spin);
 735                        gfs2_glock_add_to_lru(gl);
 736                        gfs2_glock_put(gl);
 737                }
 738
 739                gfs2_free_clones(rgd);
 740                kfree(rgd->rd_bits);
 741                return_all_reservations(rgd);
 742                kmem_cache_free(gfs2_rgrpd_cachep, rgd);
 743        }
 744}
 745
 746static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
 747{
 748        pr_info("ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
 749        pr_info("ri_length = %u\n", rgd->rd_length);
 750        pr_info("ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
 751        pr_info("ri_data = %u\n", rgd->rd_data);
 752        pr_info("ri_bitbytes = %u\n", rgd->rd_bitbytes);
 753}
 754
 755/**
 756 * gfs2_compute_bitstructs - Compute the bitmap sizes
 757 * @rgd: The resource group descriptor
 758 *
 759 * Calculates bitmap descriptors, one for each block that contains bitmap data
 760 *
 761 * Returns: errno
 762 */
 763
 764static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 765{
 766        struct gfs2_sbd *sdp = rgd->rd_sbd;
 767        struct gfs2_bitmap *bi;
 768        u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
 769        u32 bytes_left, bytes;
 770        int x;
 771
 772        if (!length)
 773                return -EINVAL;
 774
 775        rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS);
 776        if (!rgd->rd_bits)
 777                return -ENOMEM;
 778
 779        bytes_left = rgd->rd_bitbytes;
 780
 781        for (x = 0; x < length; x++) {
 782                bi = rgd->rd_bits + x;
 783
 784                bi->bi_flags = 0;
 785                /* small rgrp; bitmap stored completely in header block */
 786                if (length == 1) {
 787                        bytes = bytes_left;
 788                        bi->bi_offset = sizeof(struct gfs2_rgrp);
 789                        bi->bi_start = 0;
 790                        bi->bi_len = bytes;
 791                        bi->bi_blocks = bytes * GFS2_NBBY;
 792                /* header block */
 793                } else if (x == 0) {
 794                        bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
 795                        bi->bi_offset = sizeof(struct gfs2_rgrp);
 796                        bi->bi_start = 0;
 797                        bi->bi_len = bytes;
 798                        bi->bi_blocks = bytes * GFS2_NBBY;
 799                /* last block */
 800                } else if (x + 1 == length) {
 801                        bytes = bytes_left;
 802                        bi->bi_offset = sizeof(struct gfs2_meta_header);
 803                        bi->bi_start = rgd->rd_bitbytes - bytes_left;
 804                        bi->bi_len = bytes;
 805                        bi->bi_blocks = bytes * GFS2_NBBY;
 806                /* other blocks */
 807                } else {
 808                        bytes = sdp->sd_sb.sb_bsize -
 809                                sizeof(struct gfs2_meta_header);
 810                        bi->bi_offset = sizeof(struct gfs2_meta_header);
 811                        bi->bi_start = rgd->rd_bitbytes - bytes_left;
 812                        bi->bi_len = bytes;
 813                        bi->bi_blocks = bytes * GFS2_NBBY;
 814                }
 815
 816                bytes_left -= bytes;
 817        }
 818
 819        if (bytes_left) {
 820                gfs2_consist_rgrpd(rgd);
 821                return -EIO;
 822        }
 823        bi = rgd->rd_bits + (length - 1);
 824        if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
 825                if (gfs2_consist_rgrpd(rgd)) {
 826                        gfs2_rindex_print(rgd);
 827                        fs_err(sdp, "start=%u len=%u offset=%u\n",
 828                               bi->bi_start, bi->bi_len, bi->bi_offset);
 829                }
 830                return -EIO;
 831        }
 832
 833        return 0;
 834}
 835
 836/**
 837 * gfs2_ri_total - Total up the file system space, according to the rindex.
 838 * @sdp: the filesystem
 839 *
 840 */
 841u64 gfs2_ri_total(struct gfs2_sbd *sdp)
 842{
 843        u64 total_data = 0;     
 844        struct inode *inode = sdp->sd_rindex;
 845        struct gfs2_inode *ip = GFS2_I(inode);
 846        char buf[sizeof(struct gfs2_rindex)];
 847        int error, rgrps;
 848
 849        for (rgrps = 0;; rgrps++) {
 850                loff_t pos = rgrps * sizeof(struct gfs2_rindex);
 851
 852                if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
 853                        break;
 854                error = gfs2_internal_read(ip, buf, &pos,
 855                                           sizeof(struct gfs2_rindex));
 856                if (error != sizeof(struct gfs2_rindex))
 857                        break;
 858                total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
 859        }
 860        return total_data;
 861}
 862
 863static int rgd_insert(struct gfs2_rgrpd *rgd)
 864{
 865        struct gfs2_sbd *sdp = rgd->rd_sbd;
 866        struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
 867
 868        /* Figure out where to put new node */
 869        while (*newn) {
 870                struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd,
 871                                                  rd_node);
 872
 873                parent = *newn;
 874                if (rgd->rd_addr < cur->rd_addr)
 875                        newn = &((*newn)->rb_left);
 876                else if (rgd->rd_addr > cur->rd_addr)
 877                        newn = &((*newn)->rb_right);
 878                else
 879                        return -EEXIST;
 880        }
 881
 882        rb_link_node(&rgd->rd_node, parent, newn);
 883        rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
 884        sdp->sd_rgrps++;
 885        return 0;
 886}
 887
 888/**
 889 * read_rindex_entry - Pull in a new resource index entry from the disk
 890 * @ip: Pointer to the rindex inode
 891 *
 892 * Returns: 0 on success, > 0 on EOF, error code otherwise
 893 */
 894
 895static int read_rindex_entry(struct gfs2_inode *ip)
 896{
 897        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 898        const unsigned bsize = sdp->sd_sb.sb_bsize;
 899        loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
 900        struct gfs2_rindex buf;
 901        int error;
 902        struct gfs2_rgrpd *rgd;
 903
 904        if (pos >= i_size_read(&ip->i_inode))
 905                return 1;
 906
 907        error = gfs2_internal_read(ip, (char *)&buf, &pos,
 908                                   sizeof(struct gfs2_rindex));
 909
 910        if (error != sizeof(struct gfs2_rindex))
 911                return (error == 0) ? 1 : error;
 912
 913        rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);
 914        error = -ENOMEM;
 915        if (!rgd)
 916                return error;
 917
 918        rgd->rd_sbd = sdp;
 919        rgd->rd_addr = be64_to_cpu(buf.ri_addr);
 920        rgd->rd_length = be32_to_cpu(buf.ri_length);
 921        rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
 922        rgd->rd_data = be32_to_cpu(buf.ri_data);
 923        rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
 924        spin_lock_init(&rgd->rd_rsspin);
 925
 926        error = compute_bitstructs(rgd);
 927        if (error)
 928                goto fail;
 929
 930        error = gfs2_glock_get(sdp, rgd->rd_addr,
 931                               &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
 932        if (error)
 933                goto fail;
 934
 935        rgd->rd_gl->gl_object = rgd;
 936        rgd->rd_gl->gl_vm.start = rgd->rd_addr * bsize;
 937        rgd->rd_gl->gl_vm.end = rgd->rd_gl->gl_vm.start + (rgd->rd_length * bsize) - 1;
 938        rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
 939        rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED);
 940        if (rgd->rd_data > sdp->sd_max_rg_data)
 941                sdp->sd_max_rg_data = rgd->rd_data;
 942        spin_lock(&sdp->sd_rindex_spin);
 943        error = rgd_insert(rgd);
 944        spin_unlock(&sdp->sd_rindex_spin);
 945        if (!error)
 946                return 0;
 947
 948        error = 0; /* someone else read in the rgrp; free it and ignore it */
 949        gfs2_glock_put(rgd->rd_gl);
 950
 951fail:
 952        kfree(rgd->rd_bits);
 953        kmem_cache_free(gfs2_rgrpd_cachep, rgd);
 954        return error;
 955}
 956
 957/**
 958 * set_rgrp_preferences - Run all the rgrps, selecting some we prefer to use
 959 * @sdp: the GFS2 superblock
 960 *
 961 * The purpose of this function is to select a subset of the resource groups
 962 * and mark them as PREFERRED. We do it in such a way that each node prefers
 963 * to use a unique set of rgrps to minimize glock contention.
 964 */
 965static void set_rgrp_preferences(struct gfs2_sbd *sdp)
 966{
 967        struct gfs2_rgrpd *rgd, *first;
 968        int i;
 969
 970        /* Skip an initial number of rgrps, based on this node's journal ID.
 971           That should start each node out on its own set. */
 972        rgd = gfs2_rgrpd_get_first(sdp);
 973        for (i = 0; i < sdp->sd_lockstruct.ls_jid; i++)
 974                rgd = gfs2_rgrpd_get_next(rgd);
 975        first = rgd;
 976
 977        do {
 978                rgd->rd_flags |= GFS2_RDF_PREFERRED;
 979                for (i = 0; i < sdp->sd_journals; i++) {
 980                        rgd = gfs2_rgrpd_get_next(rgd);
 981                        if (rgd == first)
 982                                break;
 983                }
 984        } while (rgd != first);
 985}
 986
 987/**
 988 * gfs2_ri_update - Pull in a new resource index from the disk
 989 * @ip: pointer to the rindex inode
 990 *
 991 * Returns: 0 on successful update, error code otherwise
 992 */
 993
 994static int gfs2_ri_update(struct gfs2_inode *ip)
 995{
 996        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 997        int error;
 998
 999        do {
1000                error = read_rindex_entry(ip);
1001        } while (error == 0);
1002
1003        if (error < 0)
1004                return error;
1005
1006        set_rgrp_preferences(sdp);
1007
1008        sdp->sd_rindex_uptodate = 1;
1009        return 0;
1010}
1011
1012/**
1013 * gfs2_rindex_update - Update the rindex if required
1014 * @sdp: The GFS2 superblock
1015 *
1016 * We grab a lock on the rindex inode to make sure that it doesn't
1017 * change whilst we are performing an operation. We keep this lock
1018 * for quite long periods of time compared to other locks. This
1019 * doesn't matter, since it is shared and it is very, very rarely
1020 * accessed in the exclusive mode (i.e. only when expanding the filesystem).
1021 *
1022 * This makes sure that we're using the latest copy of the resource index
1023 * special file, which might have been updated if someone expanded the
1024 * filesystem (via gfs2_grow utility), which adds new resource groups.
1025 *
1026 * Returns: 0 on succeess, error code otherwise
1027 */
1028
1029int gfs2_rindex_update(struct gfs2_sbd *sdp)
1030{
1031        struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
1032        struct gfs2_glock *gl = ip->i_gl;
1033        struct gfs2_holder ri_gh;
1034        int error = 0;
1035        int unlock_required = 0;
1036
1037        /* Read new copy from disk if we don't have the latest */
1038        if (!sdp->sd_rindex_uptodate) {
1039                if (!gfs2_glock_is_locked_by_me(gl)) {
1040                        error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
1041                        if (error)
1042                                return error;
1043                        unlock_required = 1;
1044                }
1045                if (!sdp->sd_rindex_uptodate)
1046                        error = gfs2_ri_update(ip);
1047                if (unlock_required)
1048                        gfs2_glock_dq_uninit(&ri_gh);
1049        }
1050
1051        return error;
1052}
1053
1054static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
1055{
1056        const struct gfs2_rgrp *str = buf;
1057        u32 rg_flags;
1058
1059        rg_flags = be32_to_cpu(str->rg_flags);
1060        rg_flags &= ~GFS2_RDF_MASK;
1061        rgd->rd_flags &= GFS2_RDF_MASK;
1062        rgd->rd_flags |= rg_flags;
1063        rgd->rd_free = be32_to_cpu(str->rg_free);
1064        rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
1065        rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
1066}
1067
1068static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
1069{
1070        struct gfs2_rgrp *str = buf;
1071
1072        str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
1073        str->rg_free = cpu_to_be32(rgd->rd_free);
1074        str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
1075        str->__pad = cpu_to_be32(0);
1076        str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
1077        memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
1078}
1079
1080static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
1081{
1082        struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1083        struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
1084
1085        if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free ||
1086            rgl->rl_dinodes != str->rg_dinodes ||
1087            rgl->rl_igeneration != str->rg_igeneration)
1088                return 0;
1089        return 1;
1090}
1091
1092static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
1093{
1094        const struct gfs2_rgrp *str = buf;
1095
1096        rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
1097        rgl->rl_flags = str->rg_flags;
1098        rgl->rl_free = str->rg_free;
1099        rgl->rl_dinodes = str->rg_dinodes;
1100        rgl->rl_igeneration = str->rg_igeneration;
1101        rgl->__pad = 0UL;
1102}
1103
1104static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change)
1105{
1106        struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1107        u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change;
1108        rgl->rl_unlinked = cpu_to_be32(unlinked);
1109}
1110
1111static u32 count_unlinked(struct gfs2_rgrpd *rgd)
1112{
1113        struct gfs2_bitmap *bi;
1114        const u32 length = rgd->rd_length;
1115        const u8 *buffer = NULL;
1116        u32 i, goal, count = 0;
1117
1118        for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) {
1119                goal = 0;
1120                buffer = bi->bi_bh->b_data + bi->bi_offset;
1121                WARN_ON(!buffer_uptodate(bi->bi_bh));
1122                while (goal < bi->bi_len * GFS2_NBBY) {
1123                        goal = gfs2_bitfit(buffer, bi->bi_len, goal,
1124                                           GFS2_BLKST_UNLINKED);
1125                        if (goal == BFITNOENT)
1126                                break;
1127                        count++;
1128                        goal++;
1129                }
1130        }
1131
1132        return count;
1133}
1134
1135
1136/**
1137 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
1138 * @rgd: the struct gfs2_rgrpd describing the RG to read in
1139 *
1140 * Read in all of a Resource Group's header and bitmap blocks.
1141 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
1142 *
1143 * Returns: errno
1144 */
1145
1146static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
1147{
1148        struct gfs2_sbd *sdp = rgd->rd_sbd;
1149        struct gfs2_glock *gl = rgd->rd_gl;
1150        unsigned int length = rgd->rd_length;
1151        struct gfs2_bitmap *bi;
1152        unsigned int x, y;
1153        int error;
1154
1155        if (rgd->rd_bits[0].bi_bh != NULL)
1156                return 0;
1157
1158        for (x = 0; x < length; x++) {
1159                bi = rgd->rd_bits + x;
1160                error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
1161                if (error)
1162                        goto fail;
1163        }
1164
1165        for (y = length; y--;) {
1166                bi = rgd->rd_bits + y;
1167                error = gfs2_meta_wait(sdp, bi->bi_bh);
1168                if (error)
1169                        goto fail;
1170                if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB :
1171                                              GFS2_METATYPE_RG)) {
1172                        error = -EIO;
1173                        goto fail;
1174                }
1175        }
1176
1177        if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
1178                for (x = 0; x < length; x++)
1179                        clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
1180                gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
1181                rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1182                rgd->rd_free_clone = rgd->rd_free;
1183                /* max out the rgrp allocation failure point */
1184                rgd->rd_extfail_pt = rgd->rd_free;
1185        }
1186        if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
1187                rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
1188                gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
1189                                     rgd->rd_bits[0].bi_bh->b_data);
1190        }
1191        else if (sdp->sd_args.ar_rgrplvb) {
1192                if (!gfs2_rgrp_lvb_valid(rgd)){
1193                        gfs2_consist_rgrpd(rgd);
1194                        error = -EIO;
1195                        goto fail;
1196                }
1197                if (rgd->rd_rgl->rl_unlinked == 0)
1198                        rgd->rd_flags &= ~GFS2_RDF_CHECK;
1199        }
1200        return 0;
1201
1202fail:
1203        while (x--) {
1204                bi = rgd->rd_bits + x;
1205                brelse(bi->bi_bh);
1206                bi->bi_bh = NULL;
1207                gfs2_assert_warn(sdp, !bi->bi_clone);
1208        }
1209
1210        return error;
1211}
1212
1213static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
1214{
1215        u32 rl_flags;
1216
1217        if (rgd->rd_flags & GFS2_RDF_UPTODATE)
1218                return 0;
1219
1220        if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
1221                return gfs2_rgrp_bh_get(rgd);
1222
1223        rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
1224        rl_flags &= ~GFS2_RDF_MASK;
1225        rgd->rd_flags &= GFS2_RDF_MASK;
1226        rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1227        if (rgd->rd_rgl->rl_unlinked == 0)
1228                rgd->rd_flags &= ~GFS2_RDF_CHECK;
1229        rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
1230        rgd->rd_free_clone = rgd->rd_free;
1231        rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
1232        rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
1233        return 0;
1234}
1235
1236int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
1237{
1238        struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1239        struct gfs2_sbd *sdp = rgd->rd_sbd;
1240
1241        if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb)
1242                return 0;
1243        return gfs2_rgrp_bh_get(rgd);
1244}
1245
1246/**
1247 * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get()
1248 * @gh: The glock holder for the resource group
1249 *
1250 */
1251
1252void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
1253{
1254        struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1255        int x, length = rgd->rd_length;
1256
1257        for (x = 0; x < length; x++) {
1258                struct gfs2_bitmap *bi = rgd->rd_bits + x;
1259                if (bi->bi_bh) {
1260                        brelse(bi->bi_bh);
1261                        bi->bi_bh = NULL;
1262                }
1263        }
1264
1265}
1266
1267int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
1268                             struct buffer_head *bh,
1269                             const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
1270{
1271        struct super_block *sb = sdp->sd_vfs;
1272        u64 blk;
1273        sector_t start = 0;
1274        sector_t nr_blks = 0;
1275        int rv;
1276        unsigned int x;
1277        u32 trimmed = 0;
1278        u8 diff;
1279
1280        for (x = 0; x < bi->bi_len; x++) {
1281                const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
1282                clone += bi->bi_offset;
1283                clone += x;
1284                if (bh) {
1285                        const u8 *orig = bh->b_data + bi->bi_offset + x;
1286                        diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
1287                } else {
1288                        diff = ~(*clone | (*clone >> 1));
1289                }
1290                diff &= 0x55;
1291                if (diff == 0)
1292                        continue;
1293                blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
1294                while(diff) {
1295                        if (diff & 1) {
1296                                if (nr_blks == 0)
1297                                        goto start_new_extent;
1298                                if ((start + nr_blks) != blk) {
1299                                        if (nr_blks >= minlen) {
1300                                                rv = sb_issue_discard(sb,
1301                                                        start, nr_blks,
1302                                                        GFP_NOFS, 0);
1303                                                if (rv)
1304                                                        goto fail;
1305                                                trimmed += nr_blks;
1306                                        }
1307                                        nr_blks = 0;
1308start_new_extent:
1309                                        start = blk;
1310                                }
1311                                nr_blks++;
1312                        }
1313                        diff >>= 2;
1314                        blk++;
1315                }
1316        }
1317        if (nr_blks >= minlen) {
1318                rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0);
1319                if (rv)
1320                        goto fail;
1321                trimmed += nr_blks;
1322        }
1323        if (ptrimmed)
1324                *ptrimmed = trimmed;
1325        return 0;
1326
1327fail:
1328        if (sdp->sd_args.ar_discard)
1329                fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
1330        sdp->sd_args.ar_discard = 0;
1331        return -EIO;
1332}
1333
1334/**
1335 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem
1336 * @filp: Any file on the filesystem
1337 * @argp: Pointer to the arguments (also used to pass result)
1338 *
1339 * Returns: 0 on success, otherwise error code
1340 */
1341
1342int gfs2_fitrim(struct file *filp, void __user *argp)
1343{
1344        struct inode *inode = file_inode(filp);
1345        struct gfs2_sbd *sdp = GFS2_SB(inode);
1346        struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
1347        struct buffer_head *bh;
1348        struct gfs2_rgrpd *rgd;
1349        struct gfs2_rgrpd *rgd_end;
1350        struct gfs2_holder gh;
1351        struct fstrim_range r;
1352        int ret = 0;
1353        u64 amt;
1354        u64 trimmed = 0;
1355        u64 start, end, minlen;
1356        unsigned int x;
1357        unsigned bs_shift = sdp->sd_sb.sb_bsize_shift;
1358
1359        if (!capable(CAP_SYS_ADMIN))
1360                return -EPERM;
1361
1362        if (!blk_queue_discard(q))
1363                return -EOPNOTSUPP;
1364
1365        if (copy_from_user(&r, argp, sizeof(r)))
1366                return -EFAULT;
1367
1368        ret = gfs2_rindex_update(sdp);
1369        if (ret)
1370                return ret;
1371
1372        start = r.start >> bs_shift;
1373        end = start + (r.len >> bs_shift);
1374        minlen = max_t(u64, r.minlen,
1375                       q->limits.discard_granularity) >> bs_shift;
1376
1377        if (end <= start || minlen > sdp->sd_max_rg_data)
1378                return -EINVAL;
1379
1380        rgd = gfs2_blk2rgrpd(sdp, start, 0);
1381        rgd_end = gfs2_blk2rgrpd(sdp, end, 0);
1382
1383        if ((gfs2_rgrpd_get_first(sdp) == gfs2_rgrpd_get_next(rgd_end))
1384            && (start > rgd_end->rd_data0 + rgd_end->rd_data))
1385                return -EINVAL; /* start is beyond the end of the fs */
1386
1387        while (1) {
1388
1389                ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
1390                if (ret)
1391                        goto out;
1392
1393                if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) {
1394                        /* Trim each bitmap in the rgrp */
1395                        for (x = 0; x < rgd->rd_length; x++) {
1396                                struct gfs2_bitmap *bi = rgd->rd_bits + x;
1397                                ret = gfs2_rgrp_send_discards(sdp,
1398                                                rgd->rd_data0, NULL, bi, minlen,
1399                                                &amt);
1400                                if (ret) {
1401                                        gfs2_glock_dq_uninit(&gh);
1402                                        goto out;
1403                                }
1404                                trimmed += amt;
1405                        }
1406
1407                        /* Mark rgrp as having been trimmed */
1408                        ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
1409                        if (ret == 0) {
1410                                bh = rgd->rd_bits[0].bi_bh;
1411                                rgd->rd_flags |= GFS2_RGF_TRIMMED;
1412                                gfs2_trans_add_meta(rgd->rd_gl, bh);
1413                                gfs2_rgrp_out(rgd, bh->b_data);
1414                                gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
1415                                gfs2_trans_end(sdp);
1416                        }
1417                }
1418                gfs2_glock_dq_uninit(&gh);
1419
1420                if (rgd == rgd_end)
1421                        break;
1422
1423                rgd = gfs2_rgrpd_get_next(rgd);
1424        }
1425
1426out:
1427        r.len = trimmed << bs_shift;
1428        if (copy_to_user(argp, &r, sizeof(r)))
1429                return -EFAULT;
1430
1431        return ret;
1432}
1433
1434/**
1435 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
1436 * @ip: the inode structure
1437 *
1438 */
1439static void rs_insert(struct gfs2_inode *ip)
1440{
1441        struct rb_node **newn, *parent = NULL;
1442        int rc;
1443        struct gfs2_blkreserv *rs = ip->i_res;
1444        struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
1445        u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
1446
1447        BUG_ON(gfs2_rs_active(rs));
1448
1449        spin_lock(&rgd->rd_rsspin);
1450        newn = &rgd->rd_rstree.rb_node;
1451        while (*newn) {
1452                struct gfs2_blkreserv *cur =
1453                        rb_entry(*newn, struct gfs2_blkreserv, rs_node);
1454
1455                parent = *newn;
1456                rc = rs_cmp(fsblock, rs->rs_free, cur);
1457                if (rc > 0)
1458                        newn = &((*newn)->rb_right);
1459                else if (rc < 0)
1460                        newn = &((*newn)->rb_left);
1461                else {
1462                        spin_unlock(&rgd->rd_rsspin);
1463                        WARN_ON(1);
1464                        return;
1465                }
1466        }
1467
1468        rb_link_node(&rs->rs_node, parent, newn);
1469        rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
1470
1471        /* Do our rgrp accounting for the reservation */
1472        rgd->rd_reserved += rs->rs_free; /* blocks reserved */
1473        spin_unlock(&rgd->rd_rsspin);
1474        trace_gfs2_rs(rs, TRACE_RS_INSERT);
1475}
1476
1477/**
1478 * rg_mblk_search - find a group of multiple free blocks to form a reservation
1479 * @rgd: the resource group descriptor
1480 * @ip: pointer to the inode for which we're reserving blocks
1481 * @ap: the allocation parameters
1482 *
1483 */
1484
1485static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
1486                           const struct gfs2_alloc_parms *ap)
1487{
1488        struct gfs2_rbm rbm = { .rgd = rgd, };
1489        u64 goal;
1490        struct gfs2_blkreserv *rs = ip->i_res;
1491        u32 extlen;
1492        u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;
1493        int ret;
1494        struct inode *inode = &ip->i_inode;
1495
1496        if (S_ISDIR(inode->i_mode))
1497                extlen = 1;
1498        else {
1499                extlen = max_t(u32, atomic_read(&rs->rs_sizehint), ap->target);
1500                extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
1501        }
1502        if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
1503                return;
1504
1505        /* Find bitmap block that contains bits for goal block */
1506        if (rgrp_contains_block(rgd, ip->i_goal))
1507                goal = ip->i_goal;
1508        else
1509                goal = rgd->rd_last_alloc + rgd->rd_data0;
1510
1511        if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
1512                return;
1513
1514        ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true, ap);
1515        if (ret == 0) {
1516                rs->rs_rbm = rbm;
1517                rs->rs_free = extlen;
1518                rs->rs_inum = ip->i_no_addr;
1519                rs_insert(ip);
1520        } else {
1521                if (goal == rgd->rd_last_alloc + rgd->rd_data0)
1522                        rgd->rd_last_alloc = 0;
1523        }
1524}
1525
1526/**
1527 * gfs2_next_unreserved_block - Return next block that is not reserved
1528 * @rgd: The resource group
1529 * @block: The starting block
1530 * @length: The required length
1531 * @ip: Ignore any reservations for this inode
1532 *
1533 * If the block does not appear in any reservation, then return the
1534 * block number unchanged. If it does appear in the reservation, then
1535 * keep looking through the tree of reservations in order to find the
1536 * first block number which is not reserved.
1537 */
1538
1539static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
1540                                      u32 length,
1541                                      const struct gfs2_inode *ip)
1542{
1543        struct gfs2_blkreserv *rs;
1544        struct rb_node *n;
1545        int rc;
1546
1547        spin_lock(&rgd->rd_rsspin);
1548        n = rgd->rd_rstree.rb_node;
1549        while (n) {
1550                rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1551                rc = rs_cmp(block, length, rs);
1552                if (rc < 0)
1553                        n = n->rb_left;
1554                else if (rc > 0)
1555                        n = n->rb_right;
1556                else
1557                        break;
1558        }
1559
1560        if (n) {
1561                while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) {
1562                        block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
1563                        n = n->rb_right;
1564                        if (n == NULL)
1565                                break;
1566                        rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1567                }
1568        }
1569
1570        spin_unlock(&rgd->rd_rsspin);
1571        return block;
1572}
1573
1574/**
1575 * gfs2_reservation_check_and_update - Check for reservations during block alloc
1576 * @rbm: The current position in the resource group
1577 * @ip: The inode for which we are searching for blocks
1578 * @minext: The minimum extent length
1579 * @maxext: A pointer to the maximum extent structure
1580 *
1581 * This checks the current position in the rgrp to see whether there is
1582 * a reservation covering this block. If not then this function is a
1583 * no-op. If there is, then the position is moved to the end of the
1584 * contiguous reservation(s) so that we are pointing at the first
1585 * non-reserved block.
1586 *
1587 * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error
1588 */
1589
1590static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
1591                                             const struct gfs2_inode *ip,
1592                                             u32 minext,
1593                                             struct gfs2_extent *maxext)
1594{
1595        u64 block = gfs2_rbm_to_block(rbm);
1596        u32 extlen = 1;
1597        u64 nblock;
1598        int ret;
1599
1600        /*
1601         * If we have a minimum extent length, then skip over any extent
1602         * which is less than the min extent length in size.
1603         */
1604        if (minext) {
1605                extlen = gfs2_free_extlen(rbm, minext);
1606                if (extlen <= maxext->len)
1607                        goto fail;
1608        }
1609
1610        /*
1611         * Check the extent which has been found against the reservations
1612         * and skip if parts of it are already reserved
1613         */
1614        nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
1615        if (nblock == block) {
1616                if (!minext || extlen >= minext)
1617                        return 0;
1618
1619                if (extlen > maxext->len) {
1620                        maxext->len = extlen;
1621                        maxext->rbm = *rbm;
1622                }
1623fail:
1624                nblock = block + extlen;
1625        }
1626        ret = gfs2_rbm_from_block(rbm, nblock);
1627        if (ret < 0)
1628                return ret;
1629        return 1;
1630}
1631
1632/**
1633 * gfs2_rbm_find - Look for blocks of a particular state
1634 * @rbm: Value/result starting position and final position
1635 * @state: The state which we want to find
1636 * @minext: Pointer to the requested extent length (NULL for a single block)
1637 *          This is updated to be the actual reservation size.
1638 * @ip: If set, check for reservations
1639 * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
1640 *          around until we've reached the starting point.
1641 * @ap: the allocation parameters
1642 *
1643 * Side effects:
1644 * - If looking for free blocks, we set GBF_FULL on each bitmap which
1645 *   has no free blocks in it.
1646 * - If looking for free blocks, we set rd_extfail_pt on each rgrp which
1647 *   has come up short on a free block search.
1648 *
1649 * Returns: 0 on success, -ENOSPC if there is no block of the requested state
1650 */
1651
1652static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
1653                         const struct gfs2_inode *ip, bool nowrap,
1654                         const struct gfs2_alloc_parms *ap)
1655{
1656        struct buffer_head *bh;
1657        int initial_bii;
1658        u32 initial_offset;
1659        int first_bii = rbm->bii;
1660        u32 first_offset = rbm->offset;
1661        u32 offset;
1662        u8 *buffer;
1663        int n = 0;
1664        int iters = rbm->rgd->rd_length;
1665        int ret;
1666        struct gfs2_bitmap *bi;
1667        struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, };
1668
1669        /* If we are not starting at the beginning of a bitmap, then we
1670         * need to add one to the bitmap count to ensure that we search
1671         * the starting bitmap twice.
1672         */
1673        if (rbm->offset != 0)
1674                iters++;
1675
1676        while(1) {
1677                bi = rbm_bi(rbm);
1678                if (test_bit(GBF_FULL, &bi->bi_flags) &&
1679                    (state == GFS2_BLKST_FREE))
1680                        goto next_bitmap;
1681
1682                bh = bi->bi_bh;
1683                buffer = bh->b_data + bi->bi_offset;
1684                WARN_ON(!buffer_uptodate(bh));
1685                if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1686                        buffer = bi->bi_clone + bi->bi_offset;
1687                initial_offset = rbm->offset;
1688                offset = gfs2_bitfit(buffer, bi->bi_len, rbm->offset, state);
1689                if (offset == BFITNOENT)
1690                        goto bitmap_full;
1691                rbm->offset = offset;
1692                if (ip == NULL)
1693                        return 0;
1694
1695                initial_bii = rbm->bii;
1696                ret = gfs2_reservation_check_and_update(rbm, ip,
1697                                                        minext ? *minext : 0,
1698                                                        &maxext);
1699                if (ret == 0)
1700                        return 0;
1701                if (ret > 0) {
1702                        n += (rbm->bii - initial_bii);
1703                        goto next_iter;
1704                }
1705                if (ret == -E2BIG) {
1706                        rbm->bii = 0;
1707                        rbm->offset = 0;
1708                        n += (rbm->bii - initial_bii);
1709                        goto res_covered_end_of_rgrp;
1710                }
1711                return ret;
1712
1713bitmap_full:    /* Mark bitmap as full and fall through */
1714                if ((state == GFS2_BLKST_FREE) && initial_offset == 0) {
1715                        struct gfs2_bitmap *bi = rbm_bi(rbm);
1716                        set_bit(GBF_FULL, &bi->bi_flags);
1717                }
1718
1719next_bitmap:    /* Find next bitmap in the rgrp */
1720                rbm->offset = 0;
1721                rbm->bii++;
1722                if (rbm->bii == rbm->rgd->rd_length)
1723                        rbm->bii = 0;
1724res_covered_end_of_rgrp:
1725                if ((rbm->bii == 0) && nowrap)
1726                        break;
1727                n++;
1728next_iter:
1729                if (n >= iters)
1730                        break;
1731        }
1732
1733        if (minext == NULL || state != GFS2_BLKST_FREE)
1734                return -ENOSPC;
1735
1736        /* If the extent was too small, and it's smaller than the smallest
1737           to have failed before, remember for future reference that it's
1738           useless to search this rgrp again for this amount or more. */
1739        if ((first_offset == 0) && (first_bii == 0) &&
1740            (*minext < rbm->rgd->rd_extfail_pt))
1741                rbm->rgd->rd_extfail_pt = *minext;
1742
1743        /* If the maximum extent we found is big enough to fulfill the
1744           minimum requirements, use it anyway. */
1745        if (maxext.len) {
1746                *rbm = maxext.rbm;
1747                *minext = maxext.len;
1748                return 0;
1749        }
1750
1751        return -ENOSPC;
1752}
1753
1754/**
1755 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
1756 * @rgd: The rgrp
1757 * @last_unlinked: block address of the last dinode we unlinked
1758 * @skip: block address we should explicitly not unlink
1759 *
1760 * Returns: 0 if no error
1761 *          The inode, if one has been found, in inode.
1762 */
1763
1764static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
1765{
1766        u64 block;
1767        struct gfs2_sbd *sdp = rgd->rd_sbd;
1768        struct gfs2_glock *gl;
1769        struct gfs2_inode *ip;
1770        int error;
1771        int found = 0;
1772        struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 };
1773
1774        while (1) {
1775                down_write(&sdp->sd_log_flush_lock);
1776                error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
1777                                      true, NULL);
1778                up_write(&sdp->sd_log_flush_lock);
1779                if (error == -ENOSPC)
1780                        break;
1781                if (WARN_ON_ONCE(error))
1782                        break;
1783
1784                block = gfs2_rbm_to_block(&rbm);
1785                if (gfs2_rbm_from_block(&rbm, block + 1))
1786                        break;
1787                if (*last_unlinked != NO_BLOCK && block <= *last_unlinked)
1788                        continue;
1789                if (block == skip)
1790                        continue;
1791                *last_unlinked = block;
1792
1793                error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl);
1794                if (error)
1795                        continue;
1796
1797                /* If the inode is already in cache, we can ignore it here
1798                 * because the existing inode disposal code will deal with
1799                 * it when all refs have gone away. Accessing gl_object like
1800                 * this is not safe in general. Here it is ok because we do
1801                 * not dereference the pointer, and we only need an approx
1802                 * answer to whether it is NULL or not.
1803                 */
1804                ip = gl->gl_object;
1805
1806                if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
1807                        gfs2_glock_put(gl);
1808                else
1809                        found++;
1810
1811                /* Limit reclaim to sensible number of tasks */
1812                if (found > NR_CPUS)
1813                        return;
1814        }
1815
1816        rgd->rd_flags &= ~GFS2_RDF_CHECK;
1817        return;
1818}
1819
1820/**
1821 * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
1822 * @rgd: The rgrp in question
1823 * @loops: An indication of how picky we can be (0=very, 1=less so)
1824 *
1825 * This function uses the recently added glock statistics in order to
1826 * figure out whether a parciular resource group is suffering from
1827 * contention from multiple nodes. This is done purely on the basis
1828 * of timings, since this is the only data we have to work with and
1829 * our aim here is to reject a resource group which is highly contended
1830 * but (very important) not to do this too often in order to ensure that
1831 * we do not land up introducing fragmentation by changing resource
1832 * groups when not actually required.
1833 *
1834 * The calculation is fairly simple, we want to know whether the SRTTB
1835 * (i.e. smoothed round trip time for blocking operations) to acquire
1836 * the lock for this rgrp's glock is significantly greater than the
1837 * time taken for resource groups on average. We introduce a margin in
1838 * the form of the variable @var which is computed as the sum of the two
1839 * respective variences, and multiplied by a factor depending on @loops
1840 * and whether we have a lot of data to base the decision on. This is
1841 * then tested against the square difference of the means in order to
1842 * decide whether the result is statistically significant or not.
1843 *
1844 * Returns: A boolean verdict on the congestion status
1845 */
1846
1847static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
1848{
1849        const struct gfs2_glock *gl = rgd->rd_gl;
1850        const struct gfs2_sbd *sdp = gl->gl_sbd;
1851        struct gfs2_lkstats *st;
1852        s64 r_dcount, l_dcount;
1853        s64 r_srttb, l_srttb;
1854        s64 srttb_diff;
1855        s64 sqr_diff;
1856        s64 var;
1857
1858        preempt_disable();
1859        st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
1860        r_srttb = st->stats[GFS2_LKS_SRTTB];
1861        r_dcount = st->stats[GFS2_LKS_DCOUNT];
1862        var = st->stats[GFS2_LKS_SRTTVARB] +
1863              gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
1864        preempt_enable();
1865
1866        l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
1867        l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
1868
1869        if ((l_dcount < 1) || (r_dcount < 1) || (r_srttb == 0))
1870                return false;
1871
1872        srttb_diff = r_srttb - l_srttb;
1873        sqr_diff = srttb_diff * srttb_diff;
1874
1875        var *= 2;
1876        if (l_dcount < 8 || r_dcount < 8)
1877                var *= 2;
1878        if (loops == 1)
1879                var *= 2;
1880
1881        return ((srttb_diff < 0) && (sqr_diff > var));
1882}
1883
1884/**
1885 * gfs2_rgrp_used_recently
1886 * @rs: The block reservation with the rgrp to test
1887 * @msecs: The time limit in milliseconds
1888 *
1889 * Returns: True if the rgrp glock has been used within the time limit
1890 */
1891static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
1892                                    u64 msecs)
1893{
1894        u64 tdiff;
1895
1896        tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
1897                            rs->rs_rbm.rgd->rd_gl->gl_dstamp));
1898
1899        return tdiff > (msecs * 1000 * 1000);
1900}
1901
1902static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
1903{
1904        const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1905        u32 skip;
1906
1907        get_random_bytes(&skip, sizeof(skip));
1908        return skip % sdp->sd_rgrps;
1909}
1910
1911static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
1912{
1913        struct gfs2_rgrpd *rgd = *pos;
1914        struct gfs2_sbd *sdp = rgd->rd_sbd;
1915
1916        rgd = gfs2_rgrpd_get_next(rgd);
1917        if (rgd == NULL)
1918                rgd = gfs2_rgrpd_get_first(sdp);
1919        *pos = rgd;
1920        if (rgd != begin) /* If we didn't wrap */
1921                return true;
1922        return false;
1923}
1924
1925/**
1926 * fast_to_acquire - determine if a resource group will be fast to acquire
1927 *
1928 * If this is one of our preferred rgrps, it should be quicker to acquire,
1929 * because we tried to set ourselves up as dlm lock master.
1930 */
1931static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
1932{
1933        struct gfs2_glock *gl = rgd->rd_gl;
1934
1935        if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) &&
1936            !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
1937            !test_bit(GLF_DEMOTE, &gl->gl_flags))
1938                return 1;
1939        if (rgd->rd_flags & GFS2_RDF_PREFERRED)
1940                return 1;
1941        return 0;
1942}
1943
1944/**
1945 * gfs2_inplace_reserve - Reserve space in the filesystem
1946 * @ip: the inode to reserve space for
1947 * @ap: the allocation parameters
1948 *
1949 * Returns: errno
1950 */
1951
1952int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap)
1953{
1954        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1955        struct gfs2_rgrpd *begin = NULL;
1956        struct gfs2_blkreserv *rs = ip->i_res;
1957        int error = 0, rg_locked, flags = 0;
1958        u64 last_unlinked = NO_BLOCK;
1959        int loops = 0;
1960        u32 skip = 0;
1961
1962        if (sdp->sd_args.ar_rgrplvb)
1963                flags |= GL_SKIP;
1964        if (gfs2_assert_warn(sdp, ap->target))
1965                return -EINVAL;
1966        if (gfs2_rs_active(rs)) {
1967                begin = rs->rs_rbm.rgd;
1968        } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
1969                rs->rs_rbm.rgd = begin = ip->i_rgd;
1970        } else {
1971                check_and_update_goal(ip);
1972                rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
1973        }
1974        if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV))
1975                skip = gfs2_orlov_skip(ip);
1976        if (rs->rs_rbm.rgd == NULL)
1977                return -EBADSLT;
1978
1979        while (loops < 3) {
1980                rg_locked = 1;
1981
1982                if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
1983                        rg_locked = 0;
1984                        if (skip && skip--)
1985                                goto next_rgrp;
1986                        if (!gfs2_rs_active(rs)) {
1987                                if (loops == 0 &&
1988                                    !fast_to_acquire(rs->rs_rbm.rgd))
1989                                        goto next_rgrp;
1990                                if ((loops < 2) &&
1991                                    gfs2_rgrp_used_recently(rs, 1000) &&
1992                                    gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
1993                                        goto next_rgrp;
1994                        }
1995                        error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
1996                                                   LM_ST_EXCLUSIVE, flags,
1997                                                   &rs->rs_rgd_gh);
1998                        if (unlikely(error))
1999                                return error;
2000                        if (!gfs2_rs_active(rs) && (loops < 2) &&
2001                            gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
2002                                goto skip_rgrp;
2003                        if (sdp->sd_args.ar_rgrplvb) {
2004                                error = update_rgrp_lvb(rs->rs_rbm.rgd);
2005                                if (unlikely(error)) {
2006                                        gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2007                                        return error;
2008                                }
2009                        }
2010                }
2011
2012                /* Skip unuseable resource groups */
2013                if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
2014                                                 GFS2_RDF_ERROR)) ||
2015                    (ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
2016                        goto skip_rgrp;
2017
2018                if (sdp->sd_args.ar_rgrplvb)
2019                        gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
2020
2021                /* Get a reservation if we don't already have one */
2022                if (!gfs2_rs_active(rs))
2023                        rg_mblk_search(rs->rs_rbm.rgd, ip, ap);
2024
2025                /* Skip rgrps when we can't get a reservation on first pass */
2026                if (!gfs2_rs_active(rs) && (loops < 1))
2027                        goto check_rgrp;
2028
2029                /* If rgrp has enough free space, use it */
2030                if (rs->rs_rbm.rgd->rd_free_clone >= ap->target) {
2031                        ip->i_rgd = rs->rs_rbm.rgd;
2032                        return 0;
2033                }
2034
2035check_rgrp:
2036                /* Check for unlinked inodes which can be reclaimed */
2037                if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
2038                        try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
2039                                        ip->i_no_addr);
2040skip_rgrp:
2041                /* Drop reservation, if we couldn't use reserved rgrp */
2042                if (gfs2_rs_active(rs))
2043                        gfs2_rs_deltree(rs);
2044
2045                /* Unlock rgrp if required */
2046                if (!rg_locked)
2047                        gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2048next_rgrp:
2049                /* Find the next rgrp, and continue looking */
2050                if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
2051                        continue;
2052                if (skip)
2053                        continue;
2054
2055                /* If we've scanned all the rgrps, but found no free blocks
2056                 * then this checks for some less likely conditions before
2057                 * trying again.
2058                 */
2059                loops++;
2060                /* Check that fs hasn't grown if writing to rindex */
2061                if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
2062                        error = gfs2_ri_update(ip);
2063                        if (error)
2064                                return error;
2065                }
2066                /* Flushing the log may release space */
2067                if (loops == 2)
2068                        gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
2069        }
2070
2071        return -ENOSPC;
2072}
2073
2074/**
2075 * gfs2_inplace_release - release an inplace reservation
2076 * @ip: the inode the reservation was taken out on
2077 *
2078 * Release a reservation made by gfs2_inplace_reserve().
2079 */
2080
2081void gfs2_inplace_release(struct gfs2_inode *ip)
2082{
2083        struct gfs2_blkreserv *rs = ip->i_res;
2084
2085        if (rs->rs_rgd_gh.gh_gl)
2086                gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2087}
2088
2089/**
2090 * gfs2_get_block_type - Check a block in a RG is of given type
2091 * @rgd: the resource group holding the block
2092 * @block: the block number
2093 *
2094 * Returns: The block type (GFS2_BLKST_*)
2095 */
2096
2097static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
2098{
2099        struct gfs2_rbm rbm = { .rgd = rgd, };
2100        int ret;
2101
2102        ret = gfs2_rbm_from_block(&rbm, block);
2103        WARN_ON_ONCE(ret != 0);
2104
2105        return gfs2_testbit(&rbm);
2106}
2107
2108
2109/**
2110 * gfs2_alloc_extent - allocate an extent from a given bitmap
2111 * @rbm: the resource group information
2112 * @dinode: TRUE if the first block we allocate is for a dinode
2113 * @n: The extent length (value/result)
2114 *
2115 * Add the bitmap buffer to the transaction.
2116 * Set the found bits to @new_state to change block's allocation state.
2117 */
2118static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
2119                             unsigned int *n)
2120{
2121        struct gfs2_rbm pos = { .rgd = rbm->rgd, };
2122        const unsigned int elen = *n;
2123        u64 block;
2124        int ret;
2125
2126        *n = 1;
2127        block = gfs2_rbm_to_block(rbm);
2128        gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh);
2129        gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2130        block++;
2131        while (*n < elen) {
2132                ret = gfs2_rbm_from_block(&pos, block);
2133                if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
2134                        break;
2135                gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh);
2136                gfs2_setbit(&pos, true, GFS2_BLKST_USED);
2137                (*n)++;
2138                block++;
2139        }
2140}
2141
2142/**
2143 * rgblk_free - Change alloc state of given block(s)
2144 * @sdp: the filesystem
2145 * @bstart: the start of a run of blocks to free
2146 * @blen: the length of the block run (all must lie within ONE RG!)
2147 * @new_state: GFS2_BLKST_XXX the after-allocation block state
2148 *
2149 * Returns:  Resource group containing the block(s)
2150 */
2151
2152static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
2153                                     u32 blen, unsigned char new_state)
2154{
2155        struct gfs2_rbm rbm;
2156        struct gfs2_bitmap *bi, *bi_prev = NULL;
2157
2158        rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
2159        if (!rbm.rgd) {
2160                if (gfs2_consist(sdp))
2161                        fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
2162                return NULL;
2163        }
2164
2165        gfs2_rbm_from_block(&rbm, bstart);
2166        while (blen--) {
2167                bi = rbm_bi(&rbm);
2168                if (bi != bi_prev) {
2169                        if (!bi->bi_clone) {
2170                                bi->bi_clone = kmalloc(bi->bi_bh->b_size,
2171                                                      GFP_NOFS | __GFP_NOFAIL);
2172                                memcpy(bi->bi_clone + bi->bi_offset,
2173                                       bi->bi_bh->b_data + bi->bi_offset,
2174                                       bi->bi_len);
2175                        }
2176                        gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh);
2177                        bi_prev = bi;
2178                }
2179                gfs2_setbit(&rbm, false, new_state);
2180                gfs2_rbm_incr(&rbm);
2181        }
2182
2183        return rbm.rgd;
2184}
2185
2186/**
2187 * gfs2_rgrp_dump - print out an rgrp
2188 * @seq: The iterator
2189 * @gl: The glock in question
2190 *
2191 */
2192
2193void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
2194{
2195        struct gfs2_rgrpd *rgd = gl->gl_object;
2196        struct gfs2_blkreserv *trs;
2197        const struct rb_node *n;
2198
2199        if (rgd == NULL)
2200                return;
2201        gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
2202                       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
2203                       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
2204                       rgd->rd_reserved, rgd->rd_extfail_pt);
2205        spin_lock(&rgd->rd_rsspin);
2206        for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
2207                trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
2208                dump_rs(seq, trs);
2209        }
2210        spin_unlock(&rgd->rd_rsspin);
2211}
2212
2213static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
2214{
2215        struct gfs2_sbd *sdp = rgd->rd_sbd;
2216        fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
2217                (unsigned long long)rgd->rd_addr);
2218        fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
2219        gfs2_rgrp_dump(NULL, rgd->rd_gl);
2220        rgd->rd_flags |= GFS2_RDF_ERROR;
2221}
2222
2223/**
2224 * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation
2225 * @ip: The inode we have just allocated blocks for
2226 * @rbm: The start of the allocated blocks
2227 * @len: The extent length
2228 *
2229 * Adjusts a reservation after an allocation has taken place. If the
2230 * reservation does not match the allocation, or if it is now empty
2231 * then it is removed.
2232 */
2233
2234static void gfs2_adjust_reservation(struct gfs2_inode *ip,
2235                                    const struct gfs2_rbm *rbm, unsigned len)
2236{
2237        struct gfs2_blkreserv *rs = ip->i_res;
2238        struct gfs2_rgrpd *rgd = rbm->rgd;
2239        unsigned rlen;
2240        u64 block;
2241        int ret;
2242
2243        spin_lock(&rgd->rd_rsspin);
2244        if (gfs2_rs_active(rs)) {
2245                if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
2246                        block = gfs2_rbm_to_block(rbm);
2247                        ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
2248                        rlen = min(rs->rs_free, len);
2249                        rs->rs_free -= rlen;
2250                        rgd->rd_reserved -= rlen;
2251                        trace_gfs2_rs(rs, TRACE_RS_CLAIM);
2252                        if (rs->rs_free && !ret)
2253                                goto out;
2254                        /* We used up our block reservation, so we should
2255                           reserve more blocks next time. */
2256                        atomic_add(RGRP_RSRV_ADDBLKS, &rs->rs_sizehint);
2257                }
2258                __rs_deltree(rs);
2259        }
2260out:
2261        spin_unlock(&rgd->rd_rsspin);
2262}
2263
2264/**
2265 * gfs2_set_alloc_start - Set starting point for block allocation
2266 * @rbm: The rbm which will be set to the required location
2267 * @ip: The gfs2 inode
2268 * @dinode: Flag to say if allocation includes a new inode
2269 *
2270 * This sets the starting point from the reservation if one is active
2271 * otherwise it falls back to guessing a start point based on the
2272 * inode's goal block or the last allocation point in the rgrp.
2273 */
2274
2275static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
2276                                 const struct gfs2_inode *ip, bool dinode)
2277{
2278        u64 goal;
2279
2280        if (gfs2_rs_active(ip->i_res)) {
2281                *rbm = ip->i_res->rs_rbm;
2282                return;
2283        }
2284
2285        if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal))
2286                goal = ip->i_goal;
2287        else
2288                goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
2289
2290        gfs2_rbm_from_block(rbm, goal);
2291}
2292
2293/**
2294 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
2295 * @ip: the inode to allocate the block for
2296 * @bn: Used to return the starting block number
2297 * @nblocks: requested number of blocks/extent length (value/result)
2298 * @dinode: 1 if we're allocating a dinode block, else 0
2299 * @generation: the generation number of the inode
2300 *
2301 * Returns: 0 or error
2302 */
2303
2304int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2305                      bool dinode, u64 *generation)
2306{
2307        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2308        struct buffer_head *dibh;
2309        struct gfs2_rbm rbm = { .rgd = ip->i_rgd, };
2310        unsigned int ndata;
2311        u64 block; /* block, within the file system scope */
2312        int error;
2313
2314        gfs2_set_alloc_start(&rbm, ip, dinode);
2315        error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false, NULL);
2316
2317        if (error == -ENOSPC) {
2318                gfs2_set_alloc_start(&rbm, ip, dinode);
2319                error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false,
2320                                      NULL);
2321        }
2322
2323        /* Since all blocks are reserved in advance, this shouldn't happen */
2324        if (error) {
2325                fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d fail_pt=%d\n",
2326                        (unsigned long long)ip->i_no_addr, error, *nblocks,
2327                        test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags),
2328                        rbm.rgd->rd_extfail_pt);
2329                goto rgrp_error;
2330        }
2331
2332        gfs2_alloc_extent(&rbm, dinode, nblocks);
2333        block = gfs2_rbm_to_block(&rbm);
2334        rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
2335        if (gfs2_rs_active(ip->i_res))
2336                gfs2_adjust_reservation(ip, &rbm, *nblocks);
2337        ndata = *nblocks;
2338        if (dinode)
2339                ndata--;
2340
2341        if (!dinode) {
2342                ip->i_goal = block + ndata - 1;
2343                error = gfs2_meta_inode_buffer(ip, &dibh);
2344                if (error == 0) {
2345                        struct gfs2_dinode *di =
2346                                (struct gfs2_dinode *)dibh->b_data;
2347                        gfs2_trans_add_meta(ip->i_gl, dibh);
2348                        di->di_goal_meta = di->di_goal_data =
2349                                cpu_to_be64(ip->i_goal);
2350                        brelse(dibh);
2351                }
2352        }
2353        if (rbm.rgd->rd_free < *nblocks) {
2354                pr_warn("nblocks=%u\n", *nblocks);
2355                goto rgrp_error;
2356        }
2357
2358        rbm.rgd->rd_free -= *nblocks;
2359        if (dinode) {
2360                rbm.rgd->rd_dinodes++;
2361                *generation = rbm.rgd->rd_igeneration++;
2362                if (*generation == 0)
2363                        *generation = rbm.rgd->rd_igeneration++;
2364        }
2365
2366        gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
2367        gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
2368        gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data);
2369
2370        gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
2371        if (dinode)
2372                gfs2_trans_add_unrevoke(sdp, block, *nblocks);
2373
2374        gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
2375
2376        rbm.rgd->rd_free_clone -= *nblocks;
2377        trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
2378                               dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2379        *bn = block;
2380        return 0;
2381
2382rgrp_error:
2383        gfs2_rgrp_error(rbm.rgd);
2384        return -EIO;
2385}
2386
2387/**
2388 * __gfs2_free_blocks - free a contiguous run of block(s)
2389 * @ip: the inode these blocks are being freed from
2390 * @bstart: first block of a run of contiguous blocks
2391 * @blen: the length of the block run
2392 * @meta: 1 if the blocks represent metadata
2393 *
2394 */
2395
2396void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
2397{
2398        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2399        struct gfs2_rgrpd *rgd;
2400
2401        rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
2402        if (!rgd)
2403                return;
2404        trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
2405        rgd->rd_free += blen;
2406        rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
2407        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2408        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2409        gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2410
2411        /* Directories keep their data in the metadata address space */
2412        if (meta || ip->i_depth)
2413                gfs2_meta_wipe(ip, bstart, blen);
2414}
2415
2416/**
2417 * gfs2_free_meta - free a contiguous run of data block(s)
2418 * @ip: the inode these blocks are being freed from
2419 * @bstart: first block of a run of contiguous blocks
2420 * @blen: the length of the block run
2421 *
2422 */
2423
2424void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
2425{
2426        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2427
2428        __gfs2_free_blocks(ip, bstart, blen, 1);
2429        gfs2_statfs_change(sdp, 0, +blen, 0);
2430        gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
2431}
2432
2433void gfs2_unlink_di(struct inode *inode)
2434{
2435        struct gfs2_inode *ip = GFS2_I(inode);
2436        struct gfs2_sbd *sdp = GFS2_SB(inode);
2437        struct gfs2_rgrpd *rgd;
2438        u64 blkno = ip->i_no_addr;
2439
2440        rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
2441        if (!rgd)
2442                return;
2443        trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
2444        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2445        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2446        gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2447        update_rgrp_lvb_unlinked(rgd, 1);
2448}
2449
2450static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
2451{
2452        struct gfs2_sbd *sdp = rgd->rd_sbd;
2453        struct gfs2_rgrpd *tmp_rgd;
2454
2455        tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
2456        if (!tmp_rgd)
2457                return;
2458        gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
2459
2460        if (!rgd->rd_dinodes)
2461                gfs2_consist_rgrpd(rgd);
2462        rgd->rd_dinodes--;
2463        rgd->rd_free++;
2464
2465        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2466        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2467        gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2468        update_rgrp_lvb_unlinked(rgd, -1);
2469
2470        gfs2_statfs_change(sdp, 0, +1, -1);
2471}
2472
2473
2474void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
2475{
2476        gfs2_free_uninit_di(rgd, ip->i_no_addr);
2477        trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
2478        gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
2479        gfs2_meta_wipe(ip, ip->i_no_addr, 1);
2480}
2481
2482/**
2483 * gfs2_check_blk_type - Check the type of a block
2484 * @sdp: The superblock
2485 * @no_addr: The block number to check
2486 * @type: The block type we are looking for
2487 *
2488 * Returns: 0 if the block type matches the expected type
2489 *          -ESTALE if it doesn't match
2490 *          or -ve errno if something went wrong while checking
2491 */
2492
2493int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
2494{
2495        struct gfs2_rgrpd *rgd;
2496        struct gfs2_holder rgd_gh;
2497        int error = -EINVAL;
2498
2499        rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
2500        if (!rgd)
2501                goto fail;
2502
2503        error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
2504        if (error)
2505                goto fail;
2506
2507        if (gfs2_get_block_type(rgd, no_addr) != type)
2508                error = -ESTALE;
2509
2510        gfs2_glock_dq_uninit(&rgd_gh);
2511fail:
2512        return error;
2513}
2514
2515/**
2516 * gfs2_rlist_add - add a RG to a list of RGs
2517 * @ip: the inode
2518 * @rlist: the list of resource groups
2519 * @block: the block
2520 *
2521 * Figure out what RG a block belongs to and add that RG to the list
2522 *
2523 * FIXME: Don't use NOFAIL
2524 *
2525 */
2526
2527void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
2528                    u64 block)
2529{
2530        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2531        struct gfs2_rgrpd *rgd;
2532        struct gfs2_rgrpd **tmp;
2533        unsigned int new_space;
2534        unsigned int x;
2535
2536        if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
2537                return;
2538
2539        if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block))
2540                rgd = ip->i_rgd;
2541        else
2542                rgd = gfs2_blk2rgrpd(sdp, block, 1);
2543        if (!rgd) {
2544                fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block);
2545                return;
2546        }
2547        ip->i_rgd = rgd;
2548
2549        for (x = 0; x < rlist->rl_rgrps; x++)
2550                if (rlist->rl_rgd[x] == rgd)
2551                        return;
2552
2553        if (rlist->rl_rgrps == rlist->rl_space) {
2554                new_space = rlist->rl_space + 10;
2555
2556                tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
2557                              GFP_NOFS | __GFP_NOFAIL);
2558
2559                if (rlist->rl_rgd) {
2560                        memcpy(tmp, rlist->rl_rgd,
2561                               rlist->rl_space * sizeof(struct gfs2_rgrpd *));
2562                        kfree(rlist->rl_rgd);
2563                }
2564
2565                rlist->rl_space = new_space;
2566                rlist->rl_rgd = tmp;
2567        }
2568
2569        rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
2570}
2571
2572/**
2573 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
2574 *      and initialize an array of glock holders for them
2575 * @rlist: the list of resource groups
2576 * @state: the lock state to acquire the RG lock in
2577 *
2578 * FIXME: Don't use NOFAIL
2579 *
2580 */
2581
2582void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
2583{
2584        unsigned int x;
2585
2586        rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder),
2587                                GFP_NOFS | __GFP_NOFAIL);
2588        for (x = 0; x < rlist->rl_rgrps; x++)
2589                gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
2590                                state, 0,
2591                                &rlist->rl_ghs[x]);
2592}
2593
2594/**
2595 * gfs2_rlist_free - free a resource group list
2596 * @rlist: the list of resource groups
2597 *
2598 */
2599
2600void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
2601{
2602        unsigned int x;
2603
2604        kfree(rlist->rl_rgd);
2605
2606        if (rlist->rl_ghs) {
2607                for (x = 0; x < rlist->rl_rgrps; x++)
2608                        gfs2_holder_uninit(&rlist->rl_ghs[x]);
2609                kfree(rlist->rl_ghs);
2610                rlist->rl_ghs = NULL;
2611        }
2612}
2613
2614