linux/fs/gfs2/rgrp.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   3 * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
   4 *
   5 * This copyrighted material is made available to anyone wishing to use,
   6 * modify, copy, or redistribute it subject to the terms and conditions
   7 * of the GNU General Public License version 2.
   8 */
   9
  10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  11
  12#include <linux/slab.h>
  13#include <linux/spinlock.h>
  14#include <linux/completion.h>
  15#include <linux/buffer_head.h>
  16#include <linux/fs.h>
  17#include <linux/gfs2_ondisk.h>
  18#include <linux/prefetch.h>
  19#include <linux/blkdev.h>
  20#include <linux/rbtree.h>
  21#include <linux/random.h>
  22
  23#include "gfs2.h"
  24#include "incore.h"
  25#include "glock.h"
  26#include "glops.h"
  27#include "lops.h"
  28#include "meta_io.h"
  29#include "quota.h"
  30#include "rgrp.h"
  31#include "super.h"
  32#include "trans.h"
  33#include "util.h"
  34#include "log.h"
  35#include "inode.h"
  36#include "trace_gfs2.h"
  37#include "dir.h"
  38
  39#define BFITNOENT ((u32)~0)
  40#define NO_BLOCK ((u64)~0)
  41
  42#if BITS_PER_LONG == 32
  43#define LBITMASK   (0x55555555UL)
  44#define LBITSKIP55 (0x55555555UL)
  45#define LBITSKIP00 (0x00000000UL)
  46#else
  47#define LBITMASK   (0x5555555555555555UL)
  48#define LBITSKIP55 (0x5555555555555555UL)
  49#define LBITSKIP00 (0x0000000000000000UL)
  50#endif
  51
  52/*
  53 * These routines are used by the resource group routines (rgrp.c)
  54 * to keep track of block allocation.  Each block is represented by two
  55 * bits.  So, each byte represents GFS2_NBBY (i.e. 4) blocks.
  56 *
  57 * 0 = Free
  58 * 1 = Used (not metadata)
  59 * 2 = Unlinked (still in use) inode
  60 * 3 = Used (metadata)
  61 */
  62
  63struct gfs2_extent {
  64        struct gfs2_rbm rbm;
  65        u32 len;
  66};
  67
  68static const char valid_change[16] = {
  69                /* current */
  70        /* n */ 0, 1, 1, 1,
  71        /* e */ 1, 0, 0, 0,
  72        /* w */ 0, 0, 0, 1,
  73                1, 0, 0, 0
  74};
  75
  76static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
  77                         const struct gfs2_inode *ip, bool nowrap);
  78
  79
  80/**
  81 * gfs2_setbit - Set a bit in the bitmaps
  82 * @rbm: The position of the bit to set
  83 * @do_clone: Also set the clone bitmap, if it exists
  84 * @new_state: the new state of the block
  85 *
  86 */
  87
  88static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
  89                               unsigned char new_state)
  90{
  91        unsigned char *byte1, *byte2, *end, cur_state;
  92        struct gfs2_bitmap *bi = rbm_bi(rbm);
  93        unsigned int buflen = bi->bi_bytes;
  94        const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
  95
  96        byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY);
  97        end = bi->bi_bh->b_data + bi->bi_offset + buflen;
  98
  99        BUG_ON(byte1 >= end);
 100
 101        cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
 102
 103        if (unlikely(!valid_change[new_state * 4 + cur_state])) {
 104                struct gfs2_sbd *sdp = rbm->rgd->rd_sbd;
 105
 106                fs_warn(sdp, "buf_blk = 0x%x old_state=%d, new_state=%d\n",
 107                        rbm->offset, cur_state, new_state);
 108                fs_warn(sdp, "rgrp=0x%llx bi_start=0x%x biblk: 0x%llx\n",
 109                        (unsigned long long)rbm->rgd->rd_addr, bi->bi_start,
 110                        (unsigned long long)bi->bi_bh->b_blocknr);
 111                fs_warn(sdp, "bi_offset=0x%x bi_bytes=0x%x block=0x%llx\n",
 112                        bi->bi_offset, bi->bi_bytes,
 113                        (unsigned long long)gfs2_rbm_to_block(rbm));
 114                dump_stack();
 115                gfs2_consist_rgrpd(rbm->rgd);
 116                return;
 117        }
 118        *byte1 ^= (cur_state ^ new_state) << bit;
 119
 120        if (do_clone && bi->bi_clone) {
 121                byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY);
 122                cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
 123                *byte2 ^= (cur_state ^ new_state) << bit;
 124        }
 125}
 126
 127/**
 128 * gfs2_testbit - test a bit in the bitmaps
 129 * @rbm: The bit to test
 130 * @use_clone: If true, test the clone bitmap, not the official bitmap.
 131 *
 132 * Some callers like gfs2_unaligned_extlen need to test the clone bitmaps,
 133 * not the "real" bitmaps, to avoid allocating recently freed blocks.
 134 *
 135 * Returns: The two bit block state of the requested bit
 136 */
 137
 138static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm, bool use_clone)
 139{
 140        struct gfs2_bitmap *bi = rbm_bi(rbm);
 141        const u8 *buffer;
 142        const u8 *byte;
 143        unsigned int bit;
 144
 145        if (use_clone && bi->bi_clone)
 146                buffer = bi->bi_clone;
 147        else
 148                buffer = bi->bi_bh->b_data;
 149        buffer += bi->bi_offset;
 150        byte = buffer + (rbm->offset / GFS2_NBBY);
 151        bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
 152
 153        return (*byte >> bit) & GFS2_BIT_MASK;
 154}
 155
 156/**
 157 * gfs2_bit_search
 158 * @ptr: Pointer to bitmap data
 159 * @mask: Mask to use (normally 0x55555.... but adjusted for search start)
 160 * @state: The state we are searching for
 161 *
 162 * We xor the bitmap data with a patter which is the bitwise opposite
 163 * of what we are looking for, this gives rise to a pattern of ones
 164 * wherever there is a match. Since we have two bits per entry, we
 165 * take this pattern, shift it down by one place and then and it with
 166 * the original. All the even bit positions (0,2,4, etc) then represent
 167 * successful matches, so we mask with 0x55555..... to remove the unwanted
 168 * odd bit positions.
 169 *
 170 * This allows searching of a whole u64 at once (32 blocks) with a
 171 * single test (on 64 bit arches).
 172 */
 173
 174static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
 175{
 176        u64 tmp;
 177        static const u64 search[] = {
 178                [0] = 0xffffffffffffffffULL,
 179                [1] = 0xaaaaaaaaaaaaaaaaULL,
 180                [2] = 0x5555555555555555ULL,
 181                [3] = 0x0000000000000000ULL,
 182        };
 183        tmp = le64_to_cpu(*ptr) ^ search[state];
 184        tmp &= (tmp >> 1);
 185        tmp &= mask;
 186        return tmp;
 187}
 188
 189/**
 190 * rs_cmp - multi-block reservation range compare
 191 * @blk: absolute file system block number of the new reservation
 192 * @len: number of blocks in the new reservation
 193 * @rs: existing reservation to compare against
 194 *
 195 * returns: 1 if the block range is beyond the reach of the reservation
 196 *         -1 if the block range is before the start of the reservation
 197 *          0 if the block range overlaps with the reservation
 198 */
 199static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
 200{
 201        u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
 202
 203        if (blk >= startblk + rs->rs_free)
 204                return 1;
 205        if (blk + len - 1 < startblk)
 206                return -1;
 207        return 0;
 208}
 209
 210/**
 211 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
 212 *       a block in a given allocation state.
 213 * @buf: the buffer that holds the bitmaps
 214 * @len: the length (in bytes) of the buffer
 215 * @goal: start search at this block's bit-pair (within @buffer)
 216 * @state: GFS2_BLKST_XXX the state of the block we're looking for.
 217 *
 218 * Scope of @goal and returned block number is only within this bitmap buffer,
 219 * not entire rgrp or filesystem.  @buffer will be offset from the actual
 220 * beginning of a bitmap block buffer, skipping any header structures, but
 221 * headers are always a multiple of 64 bits long so that the buffer is
 222 * always aligned to a 64 bit boundary.
 223 *
 224 * The size of the buffer is in bytes, but is it assumed that it is
 225 * always ok to read a complete multiple of 64 bits at the end
 226 * of the block in case the end is no aligned to a natural boundary.
 227 *
 228 * Return: the block number (bitmap buffer scope) that was found
 229 */
 230
 231static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
 232                       u32 goal, u8 state)
 233{
 234        u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
 235        const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
 236        const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
 237        u64 tmp;
 238        u64 mask = 0x5555555555555555ULL;
 239        u32 bit;
 240
 241        /* Mask off bits we don't care about at the start of the search */
 242        mask <<= spoint;
 243        tmp = gfs2_bit_search(ptr, mask, state);
 244        ptr++;
 245        while(tmp == 0 && ptr < end) {
 246                tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
 247                ptr++;
 248        }
 249        /* Mask off any bits which are more than len bytes from the start */
 250        if (ptr == end && (len & (sizeof(u64) - 1)))
 251                tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1))));
 252        /* Didn't find anything, so return */
 253        if (tmp == 0)
 254                return BFITNOENT;
 255        ptr--;
 256        bit = __ffs64(tmp);
 257        bit /= 2;       /* two bits per entry in the bitmap */
 258        return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
 259}
 260
 261/**
 262 * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
 263 * @rbm: The rbm with rgd already set correctly
 264 * @block: The block number (filesystem relative)
 265 *
 266 * This sets the bi and offset members of an rbm based on a
 267 * resource group and a filesystem relative block number. The
 268 * resource group must be set in the rbm on entry, the bi and
 269 * offset members will be set by this function.
 270 *
 271 * Returns: 0 on success, or an error code
 272 */
 273
 274static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
 275{
 276        if (!rgrp_contains_block(rbm->rgd, block))
 277                return -E2BIG;
 278        rbm->bii = 0;
 279        rbm->offset = block - rbm->rgd->rd_data0;
 280        /* Check if the block is within the first block */
 281        if (rbm->offset < rbm_bi(rbm)->bi_blocks)
 282                return 0;
 283
 284        /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */
 285        rbm->offset += (sizeof(struct gfs2_rgrp) -
 286                        sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
 287        rbm->bii = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
 288        rbm->offset -= rbm->bii * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
 289        return 0;
 290}
 291
 292/**
 293 * gfs2_rbm_incr - increment an rbm structure
 294 * @rbm: The rbm with rgd already set correctly
 295 *
 296 * This function takes an existing rbm structure and increments it to the next
 297 * viable block offset.
 298 *
 299 * Returns: If incrementing the offset would cause the rbm to go past the
 300 *          end of the rgrp, true is returned, otherwise false.
 301 *
 302 */
 303
 304static bool gfs2_rbm_incr(struct gfs2_rbm *rbm)
 305{
 306        if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */
 307                rbm->offset++;
 308                return false;
 309        }
 310        if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */
 311                return true;
 312
 313        rbm->offset = 0;
 314        rbm->bii++;
 315        return false;
 316}
 317
 318/**
 319 * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
 320 * @rbm: Position to search (value/result)
 321 * @n_unaligned: Number of unaligned blocks to check
 322 * @len: Decremented for each block found (terminate on zero)
 323 *
 324 * Returns: true if a non-free block is encountered
 325 */
 326
 327static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
 328{
 329        u32 n;
 330        u8 res;
 331
 332        for (n = 0; n < n_unaligned; n++) {
 333                res = gfs2_testbit(rbm, true);
 334                if (res != GFS2_BLKST_FREE)
 335                        return true;
 336                (*len)--;
 337                if (*len == 0)
 338                        return true;
 339                if (gfs2_rbm_incr(rbm))
 340                        return true;
 341        }
 342
 343        return false;
 344}
 345
 346/**
 347 * gfs2_free_extlen - Return extent length of free blocks
 348 * @rrbm: Starting position
 349 * @len: Max length to check
 350 *
 351 * Starting at the block specified by the rbm, see how many free blocks
 352 * there are, not reading more than len blocks ahead. This can be done
 353 * using memchr_inv when the blocks are byte aligned, but has to be done
 354 * on a block by block basis in case of unaligned blocks. Also this
 355 * function can cope with bitmap boundaries (although it must stop on
 356 * a resource group boundary)
 357 *
 358 * Returns: Number of free blocks in the extent
 359 */
 360
 361static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
 362{
 363        struct gfs2_rbm rbm = *rrbm;
 364        u32 n_unaligned = rbm.offset & 3;
 365        u32 size = len;
 366        u32 bytes;
 367        u32 chunk_size;
 368        u8 *ptr, *start, *end;
 369        u64 block;
 370        struct gfs2_bitmap *bi;
 371
 372        if (n_unaligned &&
 373            gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
 374                goto out;
 375
 376        n_unaligned = len & 3;
 377        /* Start is now byte aligned */
 378        while (len > 3) {
 379                bi = rbm_bi(&rbm);
 380                start = bi->bi_bh->b_data;
 381                if (bi->bi_clone)
 382                        start = bi->bi_clone;
 383                start += bi->bi_offset;
 384                end = start + bi->bi_bytes;
 385                BUG_ON(rbm.offset & 3);
 386                start += (rbm.offset / GFS2_NBBY);
 387                bytes = min_t(u32, len / GFS2_NBBY, (end - start));
 388                ptr = memchr_inv(start, 0, bytes);
 389                chunk_size = ((ptr == NULL) ? bytes : (ptr - start));
 390                chunk_size *= GFS2_NBBY;
 391                BUG_ON(len < chunk_size);
 392                len -= chunk_size;
 393                block = gfs2_rbm_to_block(&rbm);
 394                if (gfs2_rbm_from_block(&rbm, block + chunk_size)) {
 395                        n_unaligned = 0;
 396                        break;
 397                }
 398                if (ptr) {
 399                        n_unaligned = 3;
 400                        break;
 401                }
 402                n_unaligned = len & 3;
 403        }
 404
 405        /* Deal with any bits left over at the end */
 406        if (n_unaligned)
 407                gfs2_unaligned_extlen(&rbm, n_unaligned, &len);
 408out:
 409        return size - len;
 410}
 411
 412/**
 413 * gfs2_bitcount - count the number of bits in a certain state
 414 * @rgd: the resource group descriptor
 415 * @buffer: the buffer that holds the bitmaps
 416 * @buflen: the length (in bytes) of the buffer
 417 * @state: the state of the block we're looking for
 418 *
 419 * Returns: The number of bits
 420 */
 421
 422static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
 423                         unsigned int buflen, u8 state)
 424{
 425        const u8 *byte = buffer;
 426        const u8 *end = buffer + buflen;
 427        const u8 state1 = state << 2;
 428        const u8 state2 = state << 4;
 429        const u8 state3 = state << 6;
 430        u32 count = 0;
 431
 432        for (; byte < end; byte++) {
 433                if (((*byte) & 0x03) == state)
 434                        count++;
 435                if (((*byte) & 0x0C) == state1)
 436                        count++;
 437                if (((*byte) & 0x30) == state2)
 438                        count++;
 439                if (((*byte) & 0xC0) == state3)
 440                        count++;
 441        }
 442
 443        return count;
 444}
 445
 446/**
 447 * gfs2_rgrp_verify - Verify that a resource group is consistent
 448 * @rgd: the rgrp
 449 *
 450 */
 451
 452void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 453{
 454        struct gfs2_sbd *sdp = rgd->rd_sbd;
 455        struct gfs2_bitmap *bi = NULL;
 456        u32 length = rgd->rd_length;
 457        u32 count[4], tmp;
 458        int buf, x;
 459
 460        memset(count, 0, 4 * sizeof(u32));
 461
 462        /* Count # blocks in each of 4 possible allocation states */
 463        for (buf = 0; buf < length; buf++) {
 464                bi = rgd->rd_bits + buf;
 465                for (x = 0; x < 4; x++)
 466                        count[x] += gfs2_bitcount(rgd,
 467                                                  bi->bi_bh->b_data +
 468                                                  bi->bi_offset,
 469                                                  bi->bi_bytes, x);
 470        }
 471
 472        if (count[0] != rgd->rd_free) {
 473                if (gfs2_consist_rgrpd(rgd))
 474                        fs_err(sdp, "free data mismatch:  %u != %u\n",
 475                               count[0], rgd->rd_free);
 476                return;
 477        }
 478
 479        tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
 480        if (count[1] != tmp) {
 481                if (gfs2_consist_rgrpd(rgd))
 482                        fs_err(sdp, "used data mismatch:  %u != %u\n",
 483                               count[1], tmp);
 484                return;
 485        }
 486
 487        if (count[2] + count[3] != rgd->rd_dinodes) {
 488                if (gfs2_consist_rgrpd(rgd))
 489                        fs_err(sdp, "used metadata mismatch:  %u != %u\n",
 490                               count[2] + count[3], rgd->rd_dinodes);
 491                return;
 492        }
 493}
 494
 495/**
 496 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
 497 * @sdp: The GFS2 superblock
 498 * @blk: The data block number
 499 * @exact: True if this needs to be an exact match
 500 *
 501 * The @exact argument should be set to true by most callers. The exception
 502 * is when we need to match blocks which are not represented by the rgrp
 503 * bitmap, but which are part of the rgrp (i.e. padding blocks) which are
 504 * there for alignment purposes. Another way of looking at it is that @exact
 505 * matches only valid data/metadata blocks, but with @exact false, it will
 506 * match any block within the extent of the rgrp.
 507 *
 508 * Returns: The resource group, or NULL if not found
 509 */
 510
 511struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)
 512{
 513        struct rb_node *n, *next;
 514        struct gfs2_rgrpd *cur;
 515
 516        spin_lock(&sdp->sd_rindex_spin);
 517        n = sdp->sd_rindex_tree.rb_node;
 518        while (n) {
 519                cur = rb_entry(n, struct gfs2_rgrpd, rd_node);
 520                next = NULL;
 521                if (blk < cur->rd_addr)
 522                        next = n->rb_left;
 523                else if (blk >= cur->rd_data0 + cur->rd_data)
 524                        next = n->rb_right;
 525                if (next == NULL) {
 526                        spin_unlock(&sdp->sd_rindex_spin);
 527                        if (exact) {
 528                                if (blk < cur->rd_addr)
 529                                        return NULL;
 530                                if (blk >= cur->rd_data0 + cur->rd_data)
 531                                        return NULL;
 532                        }
 533                        return cur;
 534                }
 535                n = next;
 536        }
 537        spin_unlock(&sdp->sd_rindex_spin);
 538
 539        return NULL;
 540}
 541
 542/**
 543 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
 544 * @sdp: The GFS2 superblock
 545 *
 546 * Returns: The first rgrp in the filesystem
 547 */
 548
 549struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
 550{
 551        const struct rb_node *n;
 552        struct gfs2_rgrpd *rgd;
 553
 554        spin_lock(&sdp->sd_rindex_spin);
 555        n = rb_first(&sdp->sd_rindex_tree);
 556        rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 557        spin_unlock(&sdp->sd_rindex_spin);
 558
 559        return rgd;
 560}
 561
 562/**
 563 * gfs2_rgrpd_get_next - get the next RG
 564 * @rgd: the resource group descriptor
 565 *
 566 * Returns: The next rgrp
 567 */
 568
 569struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
 570{
 571        struct gfs2_sbd *sdp = rgd->rd_sbd;
 572        const struct rb_node *n;
 573
 574        spin_lock(&sdp->sd_rindex_spin);
 575        n = rb_next(&rgd->rd_node);
 576        if (n == NULL)
 577                n = rb_first(&sdp->sd_rindex_tree);
 578
 579        if (unlikely(&rgd->rd_node == n)) {
 580                spin_unlock(&sdp->sd_rindex_spin);
 581                return NULL;
 582        }
 583        rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 584        spin_unlock(&sdp->sd_rindex_spin);
 585        return rgd;
 586}
 587
 588void check_and_update_goal(struct gfs2_inode *ip)
 589{
 590        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 591        if (!ip->i_goal || gfs2_blk2rgrpd(sdp, ip->i_goal, 1) == NULL)
 592                ip->i_goal = ip->i_no_addr;
 593}
 594
 595void gfs2_free_clones(struct gfs2_rgrpd *rgd)
 596{
 597        int x;
 598
 599        for (x = 0; x < rgd->rd_length; x++) {
 600                struct gfs2_bitmap *bi = rgd->rd_bits + x;
 601                kfree(bi->bi_clone);
 602                bi->bi_clone = NULL;
 603        }
 604}
 605
 606/**
 607 * gfs2_rsqa_alloc - make sure we have a reservation assigned to the inode
 608 *                 plus a quota allocations data structure, if necessary
 609 * @ip: the inode for this reservation
 610 */
 611int gfs2_rsqa_alloc(struct gfs2_inode *ip)
 612{
 613        return gfs2_qa_alloc(ip);
 614}
 615
 616static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
 617{
 618        struct gfs2_inode *ip = container_of(rs, struct gfs2_inode, i_res);
 619
 620        gfs2_print_dbg(seq, "  B: n:%llu s:%llu b:%u f:%u\n",
 621                       (unsigned long long)ip->i_no_addr,
 622                       (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
 623                       rs->rs_rbm.offset, rs->rs_free);
 624}
 625
 626/**
 627 * __rs_deltree - remove a multi-block reservation from the rgd tree
 628 * @rs: The reservation to remove
 629 *
 630 */
 631static void __rs_deltree(struct gfs2_blkreserv *rs)
 632{
 633        struct gfs2_rgrpd *rgd;
 634
 635        if (!gfs2_rs_active(rs))
 636                return;
 637
 638        rgd = rs->rs_rbm.rgd;
 639        trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
 640        rb_erase(&rs->rs_node, &rgd->rd_rstree);
 641        RB_CLEAR_NODE(&rs->rs_node);
 642
 643        if (rs->rs_free) {
 644                u64 last_block = gfs2_rbm_to_block(&rs->rs_rbm) +
 645                                 rs->rs_free - 1;
 646                struct gfs2_rbm last_rbm = { .rgd = rs->rs_rbm.rgd, };
 647                struct gfs2_bitmap *start, *last;
 648
 649                /* return reserved blocks to the rgrp */
 650                BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
 651                rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
 652                /* The rgrp extent failure point is likely not to increase;
 653                   it will only do so if the freed blocks are somehow
 654                   contiguous with a span of free blocks that follows. Still,
 655                   it will force the number to be recalculated later. */
 656                rgd->rd_extfail_pt += rs->rs_free;
 657                rs->rs_free = 0;
 658                if (gfs2_rbm_from_block(&last_rbm, last_block))
 659                        return;
 660                start = rbm_bi(&rs->rs_rbm);
 661                last = rbm_bi(&last_rbm);
 662                do
 663                        clear_bit(GBF_FULL, &start->bi_flags);
 664                while (start++ != last);
 665        }
 666}
 667
 668/**
 669 * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
 670 * @rs: The reservation to remove
 671 *
 672 */
 673void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
 674{
 675        struct gfs2_rgrpd *rgd;
 676
 677        rgd = rs->rs_rbm.rgd;
 678        if (rgd) {
 679                spin_lock(&rgd->rd_rsspin);
 680                __rs_deltree(rs);
 681                BUG_ON(rs->rs_free);
 682                spin_unlock(&rgd->rd_rsspin);
 683        }
 684}
 685
 686/**
 687 * gfs2_rsqa_delete - delete a multi-block reservation and quota allocation
 688 * @ip: The inode for this reservation
 689 * @wcount: The inode's write count, or NULL
 690 *
 691 */
 692void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount)
 693{
 694        down_write(&ip->i_rw_mutex);
 695        if ((wcount == NULL) || (atomic_read(wcount) <= 1))
 696                gfs2_rs_deltree(&ip->i_res);
 697        up_write(&ip->i_rw_mutex);
 698        gfs2_qa_delete(ip, wcount);
 699}
 700
 701/**
 702 * return_all_reservations - return all reserved blocks back to the rgrp.
 703 * @rgd: the rgrp that needs its space back
 704 *
 705 * We previously reserved a bunch of blocks for allocation. Now we need to
 706 * give them back. This leave the reservation structures in tact, but removes
 707 * all of their corresponding "no-fly zones".
 708 */
 709static void return_all_reservations(struct gfs2_rgrpd *rgd)
 710{
 711        struct rb_node *n;
 712        struct gfs2_blkreserv *rs;
 713
 714        spin_lock(&rgd->rd_rsspin);
 715        while ((n = rb_first(&rgd->rd_rstree))) {
 716                rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
 717                __rs_deltree(rs);
 718        }
 719        spin_unlock(&rgd->rd_rsspin);
 720}
 721
 722void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 723{
 724        struct rb_node *n;
 725        struct gfs2_rgrpd *rgd;
 726        struct gfs2_glock *gl;
 727
 728        while ((n = rb_first(&sdp->sd_rindex_tree))) {
 729                rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 730                gl = rgd->rd_gl;
 731
 732                rb_erase(n, &sdp->sd_rindex_tree);
 733
 734                if (gl) {
 735                        glock_clear_object(gl, rgd);
 736                        gfs2_rgrp_brelse(rgd);
 737                        gfs2_glock_put(gl);
 738                }
 739
 740                gfs2_free_clones(rgd);
 741                kfree(rgd->rd_bits);
 742                rgd->rd_bits = NULL;
 743                return_all_reservations(rgd);
 744                kmem_cache_free(gfs2_rgrpd_cachep, rgd);
 745        }
 746}
 747
 748static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
 749{
 750        struct gfs2_sbd *sdp = rgd->rd_sbd;
 751
 752        fs_info(sdp, "ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
 753        fs_info(sdp, "ri_length = %u\n", rgd->rd_length);
 754        fs_info(sdp, "ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
 755        fs_info(sdp, "ri_data = %u\n", rgd->rd_data);
 756        fs_info(sdp, "ri_bitbytes = %u\n", rgd->rd_bitbytes);
 757}
 758
 759/**
 760 * gfs2_compute_bitstructs - Compute the bitmap sizes
 761 * @rgd: The resource group descriptor
 762 *
 763 * Calculates bitmap descriptors, one for each block that contains bitmap data
 764 *
 765 * Returns: errno
 766 */
 767
 768static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 769{
 770        struct gfs2_sbd *sdp = rgd->rd_sbd;
 771        struct gfs2_bitmap *bi;
 772        u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
 773        u32 bytes_left, bytes;
 774        int x;
 775
 776        if (!length)
 777                return -EINVAL;
 778
 779        rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS);
 780        if (!rgd->rd_bits)
 781                return -ENOMEM;
 782
 783        bytes_left = rgd->rd_bitbytes;
 784
 785        for (x = 0; x < length; x++) {
 786                bi = rgd->rd_bits + x;
 787
 788                bi->bi_flags = 0;
 789                /* small rgrp; bitmap stored completely in header block */
 790                if (length == 1) {
 791                        bytes = bytes_left;
 792                        bi->bi_offset = sizeof(struct gfs2_rgrp);
 793                        bi->bi_start = 0;
 794                        bi->bi_bytes = bytes;
 795                        bi->bi_blocks = bytes * GFS2_NBBY;
 796                /* header block */
 797                } else if (x == 0) {
 798                        bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
 799                        bi->bi_offset = sizeof(struct gfs2_rgrp);
 800                        bi->bi_start = 0;
 801                        bi->bi_bytes = bytes;
 802                        bi->bi_blocks = bytes * GFS2_NBBY;
 803                /* last block */
 804                } else if (x + 1 == length) {
 805                        bytes = bytes_left;
 806                        bi->bi_offset = sizeof(struct gfs2_meta_header);
 807                        bi->bi_start = rgd->rd_bitbytes - bytes_left;
 808                        bi->bi_bytes = bytes;
 809                        bi->bi_blocks = bytes * GFS2_NBBY;
 810                /* other blocks */
 811                } else {
 812                        bytes = sdp->sd_sb.sb_bsize -
 813                                sizeof(struct gfs2_meta_header);
 814                        bi->bi_offset = sizeof(struct gfs2_meta_header);
 815                        bi->bi_start = rgd->rd_bitbytes - bytes_left;
 816                        bi->bi_bytes = bytes;
 817                        bi->bi_blocks = bytes * GFS2_NBBY;
 818                }
 819
 820                bytes_left -= bytes;
 821        }
 822
 823        if (bytes_left) {
 824                gfs2_consist_rgrpd(rgd);
 825                return -EIO;
 826        }
 827        bi = rgd->rd_bits + (length - 1);
 828        if ((bi->bi_start + bi->bi_bytes) * GFS2_NBBY != rgd->rd_data) {
 829                if (gfs2_consist_rgrpd(rgd)) {
 830                        gfs2_rindex_print(rgd);
 831                        fs_err(sdp, "start=%u len=%u offset=%u\n",
 832                               bi->bi_start, bi->bi_bytes, bi->bi_offset);
 833                }
 834                return -EIO;
 835        }
 836
 837        return 0;
 838}
 839
 840/**
 841 * gfs2_ri_total - Total up the file system space, according to the rindex.
 842 * @sdp: the filesystem
 843 *
 844 */
 845u64 gfs2_ri_total(struct gfs2_sbd *sdp)
 846{
 847        u64 total_data = 0;     
 848        struct inode *inode = sdp->sd_rindex;
 849        struct gfs2_inode *ip = GFS2_I(inode);
 850        char buf[sizeof(struct gfs2_rindex)];
 851        int error, rgrps;
 852
 853        for (rgrps = 0;; rgrps++) {
 854                loff_t pos = rgrps * sizeof(struct gfs2_rindex);
 855
 856                if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
 857                        break;
 858                error = gfs2_internal_read(ip, buf, &pos,
 859                                           sizeof(struct gfs2_rindex));
 860                if (error != sizeof(struct gfs2_rindex))
 861                        break;
 862                total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
 863        }
 864        return total_data;
 865}
 866
 867static int rgd_insert(struct gfs2_rgrpd *rgd)
 868{
 869        struct gfs2_sbd *sdp = rgd->rd_sbd;
 870        struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
 871
 872        /* Figure out where to put new node */
 873        while (*newn) {
 874                struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd,
 875                                                  rd_node);
 876
 877                parent = *newn;
 878                if (rgd->rd_addr < cur->rd_addr)
 879                        newn = &((*newn)->rb_left);
 880                else if (rgd->rd_addr > cur->rd_addr)
 881                        newn = &((*newn)->rb_right);
 882                else
 883                        return -EEXIST;
 884        }
 885
 886        rb_link_node(&rgd->rd_node, parent, newn);
 887        rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
 888        sdp->sd_rgrps++;
 889        return 0;
 890}
 891
 892/**
 893 * read_rindex_entry - Pull in a new resource index entry from the disk
 894 * @ip: Pointer to the rindex inode
 895 *
 896 * Returns: 0 on success, > 0 on EOF, error code otherwise
 897 */
 898
 899static int read_rindex_entry(struct gfs2_inode *ip)
 900{
 901        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 902        const unsigned bsize = sdp->sd_sb.sb_bsize;
 903        loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
 904        struct gfs2_rindex buf;
 905        int error;
 906        struct gfs2_rgrpd *rgd;
 907
 908        if (pos >= i_size_read(&ip->i_inode))
 909                return 1;
 910
 911        error = gfs2_internal_read(ip, (char *)&buf, &pos,
 912                                   sizeof(struct gfs2_rindex));
 913
 914        if (error != sizeof(struct gfs2_rindex))
 915                return (error == 0) ? 1 : error;
 916
 917        rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);
 918        error = -ENOMEM;
 919        if (!rgd)
 920                return error;
 921
 922        rgd->rd_sbd = sdp;
 923        rgd->rd_addr = be64_to_cpu(buf.ri_addr);
 924        rgd->rd_length = be32_to_cpu(buf.ri_length);
 925        rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
 926        rgd->rd_data = be32_to_cpu(buf.ri_data);
 927        rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
 928        spin_lock_init(&rgd->rd_rsspin);
 929
 930        error = compute_bitstructs(rgd);
 931        if (error)
 932                goto fail;
 933
 934        error = gfs2_glock_get(sdp, rgd->rd_addr,
 935                               &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
 936        if (error)
 937                goto fail;
 938
 939        rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
 940        rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED);
 941        if (rgd->rd_data > sdp->sd_max_rg_data)
 942                sdp->sd_max_rg_data = rgd->rd_data;
 943        spin_lock(&sdp->sd_rindex_spin);
 944        error = rgd_insert(rgd);
 945        spin_unlock(&sdp->sd_rindex_spin);
 946        if (!error) {
 947                glock_set_object(rgd->rd_gl, rgd);
 948                rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK;
 949                rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr +
 950                                                    rgd->rd_length) * bsize) - 1;
 951                return 0;
 952        }
 953
 954        error = 0; /* someone else read in the rgrp; free it and ignore it */
 955        gfs2_glock_put(rgd->rd_gl);
 956
 957fail:
 958        kfree(rgd->rd_bits);
 959        rgd->rd_bits = NULL;
 960        kmem_cache_free(gfs2_rgrpd_cachep, rgd);
 961        return error;
 962}
 963
 964/**
 965 * set_rgrp_preferences - Run all the rgrps, selecting some we prefer to use
 966 * @sdp: the GFS2 superblock
 967 *
 968 * The purpose of this function is to select a subset of the resource groups
 969 * and mark them as PREFERRED. We do it in such a way that each node prefers
 970 * to use a unique set of rgrps to minimize glock contention.
 971 */
 972static void set_rgrp_preferences(struct gfs2_sbd *sdp)
 973{
 974        struct gfs2_rgrpd *rgd, *first;
 975        int i;
 976
 977        /* Skip an initial number of rgrps, based on this node's journal ID.
 978           That should start each node out on its own set. */
 979        rgd = gfs2_rgrpd_get_first(sdp);
 980        for (i = 0; i < sdp->sd_lockstruct.ls_jid; i++)
 981                rgd = gfs2_rgrpd_get_next(rgd);
 982        first = rgd;
 983
 984        do {
 985                rgd->rd_flags |= GFS2_RDF_PREFERRED;
 986                for (i = 0; i < sdp->sd_journals; i++) {
 987                        rgd = gfs2_rgrpd_get_next(rgd);
 988                        if (!rgd || rgd == first)
 989                                break;
 990                }
 991        } while (rgd && rgd != first);
 992}
 993
 994/**
 995 * gfs2_ri_update - Pull in a new resource index from the disk
 996 * @ip: pointer to the rindex inode
 997 *
 998 * Returns: 0 on successful update, error code otherwise
 999 */
1000
1001static int gfs2_ri_update(struct gfs2_inode *ip)
1002{
1003        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1004        int error;
1005
1006        do {
1007                error = read_rindex_entry(ip);
1008        } while (error == 0);
1009
1010        if (error < 0)
1011                return error;
1012
1013        set_rgrp_preferences(sdp);
1014
1015        sdp->sd_rindex_uptodate = 1;
1016        return 0;
1017}
1018
1019/**
1020 * gfs2_rindex_update - Update the rindex if required
1021 * @sdp: The GFS2 superblock
1022 *
1023 * We grab a lock on the rindex inode to make sure that it doesn't
1024 * change whilst we are performing an operation. We keep this lock
1025 * for quite long periods of time compared to other locks. This
1026 * doesn't matter, since it is shared and it is very, very rarely
1027 * accessed in the exclusive mode (i.e. only when expanding the filesystem).
1028 *
1029 * This makes sure that we're using the latest copy of the resource index
1030 * special file, which might have been updated if someone expanded the
1031 * filesystem (via gfs2_grow utility), which adds new resource groups.
1032 *
1033 * Returns: 0 on succeess, error code otherwise
1034 */
1035
1036int gfs2_rindex_update(struct gfs2_sbd *sdp)
1037{
1038        struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
1039        struct gfs2_glock *gl = ip->i_gl;
1040        struct gfs2_holder ri_gh;
1041        int error = 0;
1042        int unlock_required = 0;
1043
1044        /* Read new copy from disk if we don't have the latest */
1045        if (!sdp->sd_rindex_uptodate) {
1046                if (!gfs2_glock_is_locked_by_me(gl)) {
1047                        error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
1048                        if (error)
1049                                return error;
1050                        unlock_required = 1;
1051                }
1052                if (!sdp->sd_rindex_uptodate)
1053                        error = gfs2_ri_update(ip);
1054                if (unlock_required)
1055                        gfs2_glock_dq_uninit(&ri_gh);
1056        }
1057
1058        return error;
1059}
1060
1061static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
1062{
1063        const struct gfs2_rgrp *str = buf;
1064        u32 rg_flags;
1065
1066        rg_flags = be32_to_cpu(str->rg_flags);
1067        rg_flags &= ~GFS2_RDF_MASK;
1068        rgd->rd_flags &= GFS2_RDF_MASK;
1069        rgd->rd_flags |= rg_flags;
1070        rgd->rd_free = be32_to_cpu(str->rg_free);
1071        rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
1072        rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
1073        /* rd_data0, rd_data and rd_bitbytes already set from rindex */
1074}
1075
1076static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
1077{
1078        const struct gfs2_rgrp *str = buf;
1079
1080        rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
1081        rgl->rl_flags = str->rg_flags;
1082        rgl->rl_free = str->rg_free;
1083        rgl->rl_dinodes = str->rg_dinodes;
1084        rgl->rl_igeneration = str->rg_igeneration;
1085        rgl->__pad = 0UL;
1086}
1087
1088static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
1089{
1090        struct gfs2_rgrpd *next = gfs2_rgrpd_get_next(rgd);
1091        struct gfs2_rgrp *str = buf;
1092        u32 crc;
1093
1094        str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
1095        str->rg_free = cpu_to_be32(rgd->rd_free);
1096        str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
1097        if (next == NULL)
1098                str->rg_skip = 0;
1099        else if (next->rd_addr > rgd->rd_addr)
1100                str->rg_skip = cpu_to_be32(next->rd_addr - rgd->rd_addr);
1101        str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
1102        str->rg_data0 = cpu_to_be64(rgd->rd_data0);
1103        str->rg_data = cpu_to_be32(rgd->rd_data);
1104        str->rg_bitbytes = cpu_to_be32(rgd->rd_bitbytes);
1105        str->rg_crc = 0;
1106        crc = gfs2_disk_hash(buf, sizeof(struct gfs2_rgrp));
1107        str->rg_crc = cpu_to_be32(crc);
1108
1109        memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
1110        gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, buf);
1111}
1112
1113static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
1114{
1115        struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1116        struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
1117        int valid = 1;
1118
1119        if (rgl->rl_flags != str->rg_flags) {
1120                printk(KERN_WARNING "GFS2: rgd: %llu lvb flag mismatch %u/%u",
1121                       (unsigned long long)rgd->rd_addr,
1122                       be32_to_cpu(rgl->rl_flags), be32_to_cpu(str->rg_flags));
1123                valid = 0;
1124        }
1125        if (rgl->rl_free != str->rg_free) {
1126                printk(KERN_WARNING "GFS2: rgd: %llu lvb free mismatch %u/%u",
1127                       (unsigned long long)rgd->rd_addr,
1128                       be32_to_cpu(rgl->rl_free), be32_to_cpu(str->rg_free));
1129                valid = 0;
1130        }
1131        if (rgl->rl_dinodes != str->rg_dinodes) {
1132                printk(KERN_WARNING "GFS2: rgd: %llu lvb dinode mismatch %u/%u",
1133                       (unsigned long long)rgd->rd_addr,
1134                       be32_to_cpu(rgl->rl_dinodes),
1135                       be32_to_cpu(str->rg_dinodes));
1136                valid = 0;
1137        }
1138        if (rgl->rl_igeneration != str->rg_igeneration) {
1139                printk(KERN_WARNING "GFS2: rgd: %llu lvb igen mismatch "
1140                       "%llu/%llu", (unsigned long long)rgd->rd_addr,
1141                       (unsigned long long)be64_to_cpu(rgl->rl_igeneration),
1142                       (unsigned long long)be64_to_cpu(str->rg_igeneration));
1143                valid = 0;
1144        }
1145        return valid;
1146}
1147
1148static u32 count_unlinked(struct gfs2_rgrpd *rgd)
1149{
1150        struct gfs2_bitmap *bi;
1151        const u32 length = rgd->rd_length;
1152        const u8 *buffer = NULL;
1153        u32 i, goal, count = 0;
1154
1155        for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) {
1156                goal = 0;
1157                buffer = bi->bi_bh->b_data + bi->bi_offset;
1158                WARN_ON(!buffer_uptodate(bi->bi_bh));
1159                while (goal < bi->bi_blocks) {
1160                        goal = gfs2_bitfit(buffer, bi->bi_bytes, goal,
1161                                           GFS2_BLKST_UNLINKED);
1162                        if (goal == BFITNOENT)
1163                                break;
1164                        count++;
1165                        goal++;
1166                }
1167        }
1168
1169        return count;
1170}
1171
1172
1173/**
1174 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
1175 * @rgd: the struct gfs2_rgrpd describing the RG to read in
1176 *
1177 * Read in all of a Resource Group's header and bitmap blocks.
1178 * Caller must eventually call gfs2_rgrp_brelse() to free the bitmaps.
1179 *
1180 * Returns: errno
1181 */
1182
1183static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
1184{
1185        struct gfs2_sbd *sdp = rgd->rd_sbd;
1186        struct gfs2_glock *gl = rgd->rd_gl;
1187        unsigned int length = rgd->rd_length;
1188        struct gfs2_bitmap *bi;
1189        unsigned int x, y;
1190        int error;
1191
1192        if (rgd->rd_bits[0].bi_bh != NULL)
1193                return 0;
1194
1195        for (x = 0; x < length; x++) {
1196                bi = rgd->rd_bits + x;
1197                error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, 0, &bi->bi_bh);
1198                if (error)
1199                        goto fail;
1200        }
1201
1202        for (y = length; y--;) {
1203                bi = rgd->rd_bits + y;
1204                error = gfs2_meta_wait(sdp, bi->bi_bh);
1205                if (error)
1206                        goto fail;
1207                if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB :
1208                                              GFS2_METATYPE_RG)) {
1209                        error = -EIO;
1210                        goto fail;
1211                }
1212        }
1213
1214        if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
1215                for (x = 0; x < length; x++)
1216                        clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
1217                gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
1218                rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1219                rgd->rd_free_clone = rgd->rd_free;
1220                /* max out the rgrp allocation failure point */
1221                rgd->rd_extfail_pt = rgd->rd_free;
1222        }
1223        if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
1224                rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
1225                gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
1226                                     rgd->rd_bits[0].bi_bh->b_data);
1227        }
1228        else if (sdp->sd_args.ar_rgrplvb) {
1229                if (!gfs2_rgrp_lvb_valid(rgd)){
1230                        gfs2_consist_rgrpd(rgd);
1231                        error = -EIO;
1232                        goto fail;
1233                }
1234                if (rgd->rd_rgl->rl_unlinked == 0)
1235                        rgd->rd_flags &= ~GFS2_RDF_CHECK;
1236        }
1237        return 0;
1238
1239fail:
1240        while (x--) {
1241                bi = rgd->rd_bits + x;
1242                brelse(bi->bi_bh);
1243                bi->bi_bh = NULL;
1244                gfs2_assert_warn(sdp, !bi->bi_clone);
1245        }
1246
1247        return error;
1248}
1249
1250static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
1251{
1252        u32 rl_flags;
1253
1254        if (rgd->rd_flags & GFS2_RDF_UPTODATE)
1255                return 0;
1256
1257        if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
1258                return gfs2_rgrp_bh_get(rgd);
1259
1260        rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
1261        rl_flags &= ~GFS2_RDF_MASK;
1262        rgd->rd_flags &= GFS2_RDF_MASK;
1263        rgd->rd_flags |= (rl_flags | GFS2_RDF_CHECK);
1264        if (rgd->rd_rgl->rl_unlinked == 0)
1265                rgd->rd_flags &= ~GFS2_RDF_CHECK;
1266        rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
1267        rgd->rd_free_clone = rgd->rd_free;
1268        rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
1269        rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
1270        return 0;
1271}
1272
1273int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
1274{
1275        struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1276        struct gfs2_sbd *sdp = rgd->rd_sbd;
1277
1278        if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb)
1279                return 0;
1280        return gfs2_rgrp_bh_get(rgd);
1281}
1282
1283/**
1284 * gfs2_rgrp_brelse - Release RG bitmaps read in with gfs2_rgrp_bh_get()
1285 * @rgd: The resource group
1286 *
1287 */
1288
1289void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd)
1290{
1291        int x, length = rgd->rd_length;
1292
1293        for (x = 0; x < length; x++) {
1294                struct gfs2_bitmap *bi = rgd->rd_bits + x;
1295                if (bi->bi_bh) {
1296                        brelse(bi->bi_bh);
1297                        bi->bi_bh = NULL;
1298                }
1299        }
1300
1301}
1302
1303/**
1304 * gfs2_rgrp_go_unlock - Unlock a rgrp glock
1305 * @gh: The glock holder for the resource group
1306 *
1307 */
1308
1309void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
1310{
1311        struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1312        int demote_requested = test_bit(GLF_DEMOTE, &gh->gh_gl->gl_flags) |
1313                test_bit(GLF_PENDING_DEMOTE, &gh->gh_gl->gl_flags);
1314
1315        if (rgd && demote_requested)
1316                gfs2_rgrp_brelse(rgd);
1317}
1318
1319int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
1320                             struct buffer_head *bh,
1321                             const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
1322{
1323        struct super_block *sb = sdp->sd_vfs;
1324        u64 blk;
1325        sector_t start = 0;
1326        sector_t nr_blks = 0;
1327        int rv;
1328        unsigned int x;
1329        u32 trimmed = 0;
1330        u8 diff;
1331
1332        for (x = 0; x < bi->bi_bytes; x++) {
1333                const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
1334                clone += bi->bi_offset;
1335                clone += x;
1336                if (bh) {
1337                        const u8 *orig = bh->b_data + bi->bi_offset + x;
1338                        diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
1339                } else {
1340                        diff = ~(*clone | (*clone >> 1));
1341                }
1342                diff &= 0x55;
1343                if (diff == 0)
1344                        continue;
1345                blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
1346                while(diff) {
1347                        if (diff & 1) {
1348                                if (nr_blks == 0)
1349                                        goto start_new_extent;
1350                                if ((start + nr_blks) != blk) {
1351                                        if (nr_blks >= minlen) {
1352                                                rv = sb_issue_discard(sb,
1353                                                        start, nr_blks,
1354                                                        GFP_NOFS, 0);
1355                                                if (rv)
1356                                                        goto fail;
1357                                                trimmed += nr_blks;
1358                                        }
1359                                        nr_blks = 0;
1360start_new_extent:
1361                                        start = blk;
1362                                }
1363                                nr_blks++;
1364                        }
1365                        diff >>= 2;
1366                        blk++;
1367                }
1368        }
1369        if (nr_blks >= minlen) {
1370                rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0);
1371                if (rv)
1372                        goto fail;
1373                trimmed += nr_blks;
1374        }
1375        if (ptrimmed)
1376                *ptrimmed = trimmed;
1377        return 0;
1378
1379fail:
1380        if (sdp->sd_args.ar_discard)
1381                fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem\n", rv);
1382        sdp->sd_args.ar_discard = 0;
1383        return -EIO;
1384}
1385
1386/**
1387 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem
1388 * @filp: Any file on the filesystem
1389 * @argp: Pointer to the arguments (also used to pass result)
1390 *
1391 * Returns: 0 on success, otherwise error code
1392 */
1393
1394int gfs2_fitrim(struct file *filp, void __user *argp)
1395{
1396        struct inode *inode = file_inode(filp);
1397        struct gfs2_sbd *sdp = GFS2_SB(inode);
1398        struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
1399        struct buffer_head *bh;
1400        struct gfs2_rgrpd *rgd;
1401        struct gfs2_rgrpd *rgd_end;
1402        struct gfs2_holder gh;
1403        struct fstrim_range r;
1404        int ret = 0;
1405        u64 amt;
1406        u64 trimmed = 0;
1407        u64 start, end, minlen;
1408        unsigned int x;
1409        unsigned bs_shift = sdp->sd_sb.sb_bsize_shift;
1410
1411        if (!capable(CAP_SYS_ADMIN))
1412                return -EPERM;
1413
1414        if (!blk_queue_discard(q))
1415                return -EOPNOTSUPP;
1416
1417        if (copy_from_user(&r, argp, sizeof(r)))
1418                return -EFAULT;
1419
1420        ret = gfs2_rindex_update(sdp);
1421        if (ret)
1422                return ret;
1423
1424        start = r.start >> bs_shift;
1425        end = start + (r.len >> bs_shift);
1426        minlen = max_t(u64, r.minlen,
1427                       q->limits.discard_granularity) >> bs_shift;
1428
1429        if (end <= start || minlen > sdp->sd_max_rg_data)
1430                return -EINVAL;
1431
1432        rgd = gfs2_blk2rgrpd(sdp, start, 0);
1433        rgd_end = gfs2_blk2rgrpd(sdp, end, 0);
1434
1435        if ((gfs2_rgrpd_get_first(sdp) == gfs2_rgrpd_get_next(rgd_end))
1436            && (start > rgd_end->rd_data0 + rgd_end->rd_data))
1437                return -EINVAL; /* start is beyond the end of the fs */
1438
1439        while (1) {
1440
1441                ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
1442                if (ret)
1443                        goto out;
1444
1445                if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) {
1446                        /* Trim each bitmap in the rgrp */
1447                        for (x = 0; x < rgd->rd_length; x++) {
1448                                struct gfs2_bitmap *bi = rgd->rd_bits + x;
1449                                ret = gfs2_rgrp_send_discards(sdp,
1450                                                rgd->rd_data0, NULL, bi, minlen,
1451                                                &amt);
1452                                if (ret) {
1453                                        gfs2_glock_dq_uninit(&gh);
1454                                        goto out;
1455                                }
1456                                trimmed += amt;
1457                        }
1458
1459                        /* Mark rgrp as having been trimmed */
1460                        ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
1461                        if (ret == 0) {
1462                                bh = rgd->rd_bits[0].bi_bh;
1463                                rgd->rd_flags |= GFS2_RGF_TRIMMED;
1464                                gfs2_trans_add_meta(rgd->rd_gl, bh);
1465                                gfs2_rgrp_out(rgd, bh->b_data);
1466                                gfs2_trans_end(sdp);
1467                        }
1468                }
1469                gfs2_glock_dq_uninit(&gh);
1470
1471                if (rgd == rgd_end)
1472                        break;
1473
1474                rgd = gfs2_rgrpd_get_next(rgd);
1475        }
1476
1477out:
1478        r.len = trimmed << bs_shift;
1479        if (copy_to_user(argp, &r, sizeof(r)))
1480                return -EFAULT;
1481
1482        return ret;
1483}
1484
1485/**
1486 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
1487 * @ip: the inode structure
1488 *
1489 */
1490static void rs_insert(struct gfs2_inode *ip)
1491{
1492        struct rb_node **newn, *parent = NULL;
1493        int rc;
1494        struct gfs2_blkreserv *rs = &ip->i_res;
1495        struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
1496        u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
1497
1498        BUG_ON(gfs2_rs_active(rs));
1499
1500        spin_lock(&rgd->rd_rsspin);
1501        newn = &rgd->rd_rstree.rb_node;
1502        while (*newn) {
1503                struct gfs2_blkreserv *cur =
1504                        rb_entry(*newn, struct gfs2_blkreserv, rs_node);
1505
1506                parent = *newn;
1507                rc = rs_cmp(fsblock, rs->rs_free, cur);
1508                if (rc > 0)
1509                        newn = &((*newn)->rb_right);
1510                else if (rc < 0)
1511                        newn = &((*newn)->rb_left);
1512                else {
1513                        spin_unlock(&rgd->rd_rsspin);
1514                        WARN_ON(1);
1515                        return;
1516                }
1517        }
1518
1519        rb_link_node(&rs->rs_node, parent, newn);
1520        rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
1521
1522        /* Do our rgrp accounting for the reservation */
1523        rgd->rd_reserved += rs->rs_free; /* blocks reserved */
1524        spin_unlock(&rgd->rd_rsspin);
1525        trace_gfs2_rs(rs, TRACE_RS_INSERT);
1526}
1527
1528/**
1529 * rgd_free - return the number of free blocks we can allocate.
1530 * @rgd: the resource group
1531 *
1532 * This function returns the number of free blocks for an rgrp.
1533 * That's the clone-free blocks (blocks that are free, not including those
1534 * still being used for unlinked files that haven't been deleted.)
1535 *
1536 * It also subtracts any blocks reserved by someone else, but does not
1537 * include free blocks that are still part of our current reservation,
1538 * because obviously we can (and will) allocate them.
1539 */
1540static inline u32 rgd_free(struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *rs)
1541{
1542        u32 tot_reserved, tot_free;
1543
1544        if (WARN_ON_ONCE(rgd->rd_reserved < rs->rs_free))
1545                return 0;
1546        tot_reserved = rgd->rd_reserved - rs->rs_free;
1547
1548        if (rgd->rd_free_clone < tot_reserved)
1549                tot_reserved = 0;
1550
1551        tot_free = rgd->rd_free_clone - tot_reserved;
1552
1553        return tot_free;
1554}
1555
1556/**
1557 * rg_mblk_search - find a group of multiple free blocks to form a reservation
1558 * @rgd: the resource group descriptor
1559 * @ip: pointer to the inode for which we're reserving blocks
1560 * @ap: the allocation parameters
1561 *
1562 */
1563
1564static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
1565                           const struct gfs2_alloc_parms *ap)
1566{
1567        struct gfs2_rbm rbm = { .rgd = rgd, };
1568        u64 goal;
1569        struct gfs2_blkreserv *rs = &ip->i_res;
1570        u32 extlen;
1571        u32 free_blocks = rgd_free(rgd, rs);
1572        int ret;
1573        struct inode *inode = &ip->i_inode;
1574
1575        if (S_ISDIR(inode->i_mode))
1576                extlen = 1;
1577        else {
1578                extlen = max_t(u32, atomic_read(&ip->i_sizehint), ap->target);
1579                extlen = clamp(extlen, (u32)RGRP_RSRV_MINBLKS, free_blocks);
1580        }
1581        if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
1582                return;
1583
1584        /* Find bitmap block that contains bits for goal block */
1585        if (rgrp_contains_block(rgd, ip->i_goal))
1586                goal = ip->i_goal;
1587        else
1588                goal = rgd->rd_last_alloc + rgd->rd_data0;
1589
1590        if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
1591                return;
1592
1593        ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true);
1594        if (ret == 0) {
1595                rs->rs_rbm = rbm;
1596                rs->rs_free = extlen;
1597                rs_insert(ip);
1598        } else {
1599                if (goal == rgd->rd_last_alloc + rgd->rd_data0)
1600                        rgd->rd_last_alloc = 0;
1601        }
1602}
1603
1604/**
1605 * gfs2_next_unreserved_block - Return next block that is not reserved
1606 * @rgd: The resource group
1607 * @block: The starting block
1608 * @length: The required length
1609 * @ip: Ignore any reservations for this inode
1610 *
1611 * If the block does not appear in any reservation, then return the
1612 * block number unchanged. If it does appear in the reservation, then
1613 * keep looking through the tree of reservations in order to find the
1614 * first block number which is not reserved.
1615 */
1616
1617static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
1618                                      u32 length,
1619                                      const struct gfs2_inode *ip)
1620{
1621        struct gfs2_blkreserv *rs;
1622        struct rb_node *n;
1623        int rc;
1624
1625        spin_lock(&rgd->rd_rsspin);
1626        n = rgd->rd_rstree.rb_node;
1627        while (n) {
1628                rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1629                rc = rs_cmp(block, length, rs);
1630                if (rc < 0)
1631                        n = n->rb_left;
1632                else if (rc > 0)
1633                        n = n->rb_right;
1634                else
1635                        break;
1636        }
1637
1638        if (n) {
1639                while ((rs_cmp(block, length, rs) == 0) && (&ip->i_res != rs)) {
1640                        block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
1641                        n = n->rb_right;
1642                        if (n == NULL)
1643                                break;
1644                        rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1645                }
1646        }
1647
1648        spin_unlock(&rgd->rd_rsspin);
1649        return block;
1650}
1651
1652/**
1653 * gfs2_reservation_check_and_update - Check for reservations during block alloc
1654 * @rbm: The current position in the resource group
1655 * @ip: The inode for which we are searching for blocks
1656 * @minext: The minimum extent length
1657 * @maxext: A pointer to the maximum extent structure
1658 *
1659 * This checks the current position in the rgrp to see whether there is
1660 * a reservation covering this block. If not then this function is a
1661 * no-op. If there is, then the position is moved to the end of the
1662 * contiguous reservation(s) so that we are pointing at the first
1663 * non-reserved block.
1664 *
1665 * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error
1666 */
1667
1668static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
1669                                             const struct gfs2_inode *ip,
1670                                             u32 minext,
1671                                             struct gfs2_extent *maxext)
1672{
1673        u64 block = gfs2_rbm_to_block(rbm);
1674        u32 extlen = 1;
1675        u64 nblock;
1676        int ret;
1677
1678        /*
1679         * If we have a minimum extent length, then skip over any extent
1680         * which is less than the min extent length in size.
1681         */
1682        if (minext) {
1683                extlen = gfs2_free_extlen(rbm, minext);
1684                if (extlen <= maxext->len)
1685                        goto fail;
1686        }
1687
1688        /*
1689         * Check the extent which has been found against the reservations
1690         * and skip if parts of it are already reserved
1691         */
1692        nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
1693        if (nblock == block) {
1694                if (!minext || extlen >= minext)
1695                        return 0;
1696
1697                if (extlen > maxext->len) {
1698                        maxext->len = extlen;
1699                        maxext->rbm = *rbm;
1700                }
1701fail:
1702                nblock = block + extlen;
1703        }
1704        ret = gfs2_rbm_from_block(rbm, nblock);
1705        if (ret < 0)
1706                return ret;
1707        return 1;
1708}
1709
1710/**
1711 * gfs2_rbm_find - Look for blocks of a particular state
1712 * @rbm: Value/result starting position and final position
1713 * @state: The state which we want to find
1714 * @minext: Pointer to the requested extent length (NULL for a single block)
1715 *          This is updated to be the actual reservation size.
1716 * @ip: If set, check for reservations
1717 * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
1718 *          around until we've reached the starting point.
1719 *
1720 * Side effects:
1721 * - If looking for free blocks, we set GBF_FULL on each bitmap which
1722 *   has no free blocks in it.
1723 * - If looking for free blocks, we set rd_extfail_pt on each rgrp which
1724 *   has come up short on a free block search.
1725 *
1726 * Returns: 0 on success, -ENOSPC if there is no block of the requested state
1727 */
1728
1729static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
1730                         const struct gfs2_inode *ip, bool nowrap)
1731{
1732        struct buffer_head *bh;
1733        int initial_bii;
1734        u32 initial_offset;
1735        int first_bii = rbm->bii;
1736        u32 first_offset = rbm->offset;
1737        u32 offset;
1738        u8 *buffer;
1739        int n = 0;
1740        int iters = rbm->rgd->rd_length;
1741        int ret;
1742        struct gfs2_bitmap *bi;
1743        struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, };
1744
1745        /* If we are not starting at the beginning of a bitmap, then we
1746         * need to add one to the bitmap count to ensure that we search
1747         * the starting bitmap twice.
1748         */
1749        if (rbm->offset != 0)
1750                iters++;
1751
1752        while(1) {
1753                bi = rbm_bi(rbm);
1754                if ((ip == NULL || !gfs2_rs_active(&ip->i_res)) &&
1755                    test_bit(GBF_FULL, &bi->bi_flags) &&
1756                    (state == GFS2_BLKST_FREE))
1757                        goto next_bitmap;
1758
1759                bh = bi->bi_bh;
1760                buffer = bh->b_data + bi->bi_offset;
1761                WARN_ON(!buffer_uptodate(bh));
1762                if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1763                        buffer = bi->bi_clone + bi->bi_offset;
1764                initial_offset = rbm->offset;
1765                offset = gfs2_bitfit(buffer, bi->bi_bytes, rbm->offset, state);
1766                if (offset == BFITNOENT)
1767                        goto bitmap_full;
1768                rbm->offset = offset;
1769                if (ip == NULL)
1770                        return 0;
1771
1772                initial_bii = rbm->bii;
1773                ret = gfs2_reservation_check_and_update(rbm, ip,
1774                                                        minext ? *minext : 0,
1775                                                        &maxext);
1776                if (ret == 0)
1777                        return 0;
1778                if (ret > 0) {
1779                        n += (rbm->bii - initial_bii);
1780                        goto next_iter;
1781                }
1782                if (ret == -E2BIG) {
1783                        rbm->bii = 0;
1784                        rbm->offset = 0;
1785                        n += (rbm->bii - initial_bii);
1786                        goto res_covered_end_of_rgrp;
1787                }
1788                return ret;
1789
1790bitmap_full:    /* Mark bitmap as full and fall through */
1791                if ((state == GFS2_BLKST_FREE) && initial_offset == 0)
1792                        set_bit(GBF_FULL, &bi->bi_flags);
1793
1794next_bitmap:    /* Find next bitmap in the rgrp */
1795                rbm->offset = 0;
1796                rbm->bii++;
1797                if (rbm->bii == rbm->rgd->rd_length)
1798                        rbm->bii = 0;
1799res_covered_end_of_rgrp:
1800                if ((rbm->bii == 0) && nowrap)
1801                        break;
1802                n++;
1803next_iter:
1804                if (n >= iters)
1805                        break;
1806        }
1807
1808        if (minext == NULL || state != GFS2_BLKST_FREE)
1809                return -ENOSPC;
1810
1811        /* If the extent was too small, and it's smaller than the smallest
1812           to have failed before, remember for future reference that it's
1813           useless to search this rgrp again for this amount or more. */
1814        if ((first_offset == 0) && (first_bii == 0) &&
1815            (*minext < rbm->rgd->rd_extfail_pt))
1816                rbm->rgd->rd_extfail_pt = *minext;
1817
1818        /* If the maximum extent we found is big enough to fulfill the
1819           minimum requirements, use it anyway. */
1820        if (maxext.len) {
1821                *rbm = maxext.rbm;
1822                *minext = maxext.len;
1823                return 0;
1824        }
1825
1826        return -ENOSPC;
1827}
1828
1829/**
1830 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
1831 * @rgd: The rgrp
1832 * @last_unlinked: block address of the last dinode we unlinked
1833 * @skip: block address we should explicitly not unlink
1834 *
1835 * Returns: 0 if no error
1836 *          The inode, if one has been found, in inode.
1837 */
1838
1839static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
1840{
1841        u64 block;
1842        struct gfs2_sbd *sdp = rgd->rd_sbd;
1843        struct gfs2_glock *gl;
1844        struct gfs2_inode *ip;
1845        int error;
1846        int found = 0;
1847        struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 };
1848
1849        while (1) {
1850                down_write(&sdp->sd_log_flush_lock);
1851                error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
1852                                      true);
1853                up_write(&sdp->sd_log_flush_lock);
1854                if (error == -ENOSPC)
1855                        break;
1856                if (WARN_ON_ONCE(error))
1857                        break;
1858
1859                block = gfs2_rbm_to_block(&rbm);
1860                if (gfs2_rbm_from_block(&rbm, block + 1))
1861                        break;
1862                if (*last_unlinked != NO_BLOCK && block <= *last_unlinked)
1863                        continue;
1864                if (block == skip)
1865                        continue;
1866                *last_unlinked = block;
1867
1868                error = gfs2_glock_get(sdp, block, &gfs2_iopen_glops, CREATE, &gl);
1869                if (error)
1870                        continue;
1871
1872                /* If the inode is already in cache, we can ignore it here
1873                 * because the existing inode disposal code will deal with
1874                 * it when all refs have gone away. Accessing gl_object like
1875                 * this is not safe in general. Here it is ok because we do
1876                 * not dereference the pointer, and we only need an approx
1877                 * answer to whether it is NULL or not.
1878                 */
1879                ip = gl->gl_object;
1880
1881                if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
1882                        gfs2_glock_put(gl);
1883                else
1884                        found++;
1885
1886                /* Limit reclaim to sensible number of tasks */
1887                if (found > NR_CPUS)
1888                        return;
1889        }
1890
1891        rgd->rd_flags &= ~GFS2_RDF_CHECK;
1892        return;
1893}
1894
1895/**
1896 * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
1897 * @rgd: The rgrp in question
1898 * @loops: An indication of how picky we can be (0=very, 1=less so)
1899 *
1900 * This function uses the recently added glock statistics in order to
1901 * figure out whether a parciular resource group is suffering from
1902 * contention from multiple nodes. This is done purely on the basis
1903 * of timings, since this is the only data we have to work with and
1904 * our aim here is to reject a resource group which is highly contended
1905 * but (very important) not to do this too often in order to ensure that
1906 * we do not land up introducing fragmentation by changing resource
1907 * groups when not actually required.
1908 *
1909 * The calculation is fairly simple, we want to know whether the SRTTB
1910 * (i.e. smoothed round trip time for blocking operations) to acquire
1911 * the lock for this rgrp's glock is significantly greater than the
1912 * time taken for resource groups on average. We introduce a margin in
1913 * the form of the variable @var which is computed as the sum of the two
1914 * respective variences, and multiplied by a factor depending on @loops
1915 * and whether we have a lot of data to base the decision on. This is
1916 * then tested against the square difference of the means in order to
1917 * decide whether the result is statistically significant or not.
1918 *
1919 * Returns: A boolean verdict on the congestion status
1920 */
1921
1922static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
1923{
1924        const struct gfs2_glock *gl = rgd->rd_gl;
1925        const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
1926        struct gfs2_lkstats *st;
1927        u64 r_dcount, l_dcount;
1928        u64 l_srttb, a_srttb = 0;
1929        s64 srttb_diff;
1930        u64 sqr_diff;
1931        u64 var;
1932        int cpu, nonzero = 0;
1933
1934        preempt_disable();
1935        for_each_present_cpu(cpu) {
1936                st = &per_cpu_ptr(sdp->sd_lkstats, cpu)->lkstats[LM_TYPE_RGRP];
1937                if (st->stats[GFS2_LKS_SRTTB]) {
1938                        a_srttb += st->stats[GFS2_LKS_SRTTB];
1939                        nonzero++;
1940                }
1941        }
1942        st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
1943        if (nonzero)
1944                do_div(a_srttb, nonzero);
1945        r_dcount = st->stats[GFS2_LKS_DCOUNT];
1946        var = st->stats[GFS2_LKS_SRTTVARB] +
1947              gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
1948        preempt_enable();
1949
1950        l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
1951        l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
1952
1953        if ((l_dcount < 1) || (r_dcount < 1) || (a_srttb == 0))
1954                return false;
1955
1956        srttb_diff = a_srttb - l_srttb;
1957        sqr_diff = srttb_diff * srttb_diff;
1958
1959        var *= 2;
1960        if (l_dcount < 8 || r_dcount < 8)
1961                var *= 2;
1962        if (loops == 1)
1963                var *= 2;
1964
1965        return ((srttb_diff < 0) && (sqr_diff > var));
1966}
1967
1968/**
1969 * gfs2_rgrp_used_recently
1970 * @rs: The block reservation with the rgrp to test
1971 * @msecs: The time limit in milliseconds
1972 *
1973 * Returns: True if the rgrp glock has been used within the time limit
1974 */
1975static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
1976                                    u64 msecs)
1977{
1978        u64 tdiff;
1979
1980        tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
1981                            rs->rs_rbm.rgd->rd_gl->gl_dstamp));
1982
1983        return tdiff > (msecs * 1000 * 1000);
1984}
1985
1986static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
1987{
1988        const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1989        u32 skip;
1990
1991        get_random_bytes(&skip, sizeof(skip));
1992        return skip % sdp->sd_rgrps;
1993}
1994
1995static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
1996{
1997        struct gfs2_rgrpd *rgd = *pos;
1998        struct gfs2_sbd *sdp = rgd->rd_sbd;
1999
2000        rgd = gfs2_rgrpd_get_next(rgd);
2001        if (rgd == NULL)
2002                rgd = gfs2_rgrpd_get_first(sdp);
2003        *pos = rgd;
2004        if (rgd != begin) /* If we didn't wrap */
2005                return true;
2006        return false;
2007}
2008
2009/**
2010 * fast_to_acquire - determine if a resource group will be fast to acquire
2011 *
2012 * If this is one of our preferred rgrps, it should be quicker to acquire,
2013 * because we tried to set ourselves up as dlm lock master.
2014 */
2015static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
2016{
2017        struct gfs2_glock *gl = rgd->rd_gl;
2018
2019        if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) &&
2020            !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
2021            !test_bit(GLF_DEMOTE, &gl->gl_flags))
2022                return 1;
2023        if (rgd->rd_flags & GFS2_RDF_PREFERRED)
2024                return 1;
2025        return 0;
2026}
2027
2028/**
2029 * gfs2_inplace_reserve - Reserve space in the filesystem
2030 * @ip: the inode to reserve space for
2031 * @ap: the allocation parameters
2032 *
2033 * We try our best to find an rgrp that has at least ap->target blocks
2034 * available. After a couple of passes (loops == 2), the prospects of finding
2035 * such an rgrp diminish. At this stage, we return the first rgrp that has
2036 * at least ap->min_target blocks available. Either way, we set ap->allowed to
2037 * the number of blocks available in the chosen rgrp.
2038 *
2039 * Returns: 0 on success,
2040 *          -ENOMEM if a suitable rgrp can't be found
2041 *          errno otherwise
2042 */
2043
2044int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
2045{
2046        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2047        struct gfs2_rgrpd *begin = NULL;
2048        struct gfs2_blkreserv *rs = &ip->i_res;
2049        int error = 0, rg_locked, flags = 0;
2050        u64 last_unlinked = NO_BLOCK;
2051        int loops = 0;
2052        u32 free_blocks, skip = 0;
2053
2054        if (sdp->sd_args.ar_rgrplvb)
2055                flags |= GL_SKIP;
2056        if (gfs2_assert_warn(sdp, ap->target))
2057                return -EINVAL;
2058        if (gfs2_rs_active(rs)) {
2059                begin = rs->rs_rbm.rgd;
2060        } else if (rs->rs_rbm.rgd &&
2061                   rgrp_contains_block(rs->rs_rbm.rgd, ip->i_goal)) {
2062                begin = rs->rs_rbm.rgd;
2063        } else {
2064                check_and_update_goal(ip);
2065                rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
2066        }
2067        if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV))
2068                skip = gfs2_orlov_skip(ip);
2069        if (rs->rs_rbm.rgd == NULL)
2070                return -EBADSLT;
2071
2072        while (loops < 3) {
2073                rg_locked = 1;
2074
2075                if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
2076                        rg_locked = 0;
2077                        if (skip && skip--)
2078                                goto next_rgrp;
2079                        if (!gfs2_rs_active(rs)) {
2080                                if (loops == 0 &&
2081                                    !fast_to_acquire(rs->rs_rbm.rgd))
2082                                        goto next_rgrp;
2083                                if ((loops < 2) &&
2084                                    gfs2_rgrp_used_recently(rs, 1000) &&
2085                                    gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
2086                                        goto next_rgrp;
2087                        }
2088                        error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
2089                                                   LM_ST_EXCLUSIVE, flags,
2090                                                   &ip->i_rgd_gh);
2091                        if (unlikely(error))
2092                                return error;
2093                        if (!gfs2_rs_active(rs) && (loops < 2) &&
2094                            gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
2095                                goto skip_rgrp;
2096                        if (sdp->sd_args.ar_rgrplvb) {
2097                                error = update_rgrp_lvb(rs->rs_rbm.rgd);
2098                                if (unlikely(error)) {
2099                                        gfs2_glock_dq_uninit(&ip->i_rgd_gh);
2100                                        return error;
2101                                }
2102                        }
2103                }
2104
2105                /* Skip unusable resource groups */
2106                if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
2107                                                 GFS2_RDF_ERROR)) ||
2108                    (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
2109                        goto skip_rgrp;
2110
2111                if (sdp->sd_args.ar_rgrplvb)
2112                        gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
2113
2114                /* Get a reservation if we don't already have one */
2115                if (!gfs2_rs_active(rs))
2116                        rg_mblk_search(rs->rs_rbm.rgd, ip, ap);
2117
2118                /* Skip rgrps when we can't get a reservation on first pass */
2119                if (!gfs2_rs_active(rs) && (loops < 1))
2120                        goto check_rgrp;
2121
2122                /* If rgrp has enough free space, use it */
2123                free_blocks = rgd_free(rs->rs_rbm.rgd, rs);
2124                if (free_blocks >= ap->target ||
2125                    (loops == 2 && ap->min_target &&
2126                     free_blocks >= ap->min_target)) {
2127                        ap->allowed = free_blocks;
2128                        return 0;
2129                }
2130check_rgrp:
2131                /* Check for unlinked inodes which can be reclaimed */
2132                if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
2133                        try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
2134                                        ip->i_no_addr);
2135skip_rgrp:
2136                /* Drop reservation, if we couldn't use reserved rgrp */
2137                if (gfs2_rs_active(rs))
2138                        gfs2_rs_deltree(rs);
2139
2140                /* Unlock rgrp if required */
2141                if (!rg_locked)
2142                        gfs2_glock_dq_uninit(&ip->i_rgd_gh);
2143next_rgrp:
2144                /* Find the next rgrp, and continue looking */
2145                if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
2146                        continue;
2147                if (skip)
2148                        continue;
2149
2150                /* If we've scanned all the rgrps, but found no free blocks
2151                 * then this checks for some less likely conditions before
2152                 * trying again.
2153                 */
2154                loops++;
2155                /* Check that fs hasn't grown if writing to rindex */
2156                if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
2157                        error = gfs2_ri_update(ip);
2158                        if (error)
2159                                return error;
2160                }
2161                /* Flushing the log may release space */
2162                if (loops == 2)
2163                        gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
2164                                       GFS2_LFC_INPLACE_RESERVE);
2165        }
2166
2167        return -ENOSPC;
2168}
2169
2170/**
2171 * gfs2_inplace_release - release an inplace reservation
2172 * @ip: the inode the reservation was taken out on
2173 *
2174 * Release a reservation made by gfs2_inplace_reserve().
2175 */
2176
2177void gfs2_inplace_release(struct gfs2_inode *ip)
2178{
2179        if (gfs2_holder_initialized(&ip->i_rgd_gh))
2180                gfs2_glock_dq_uninit(&ip->i_rgd_gh);
2181}
2182
2183/**
2184 * gfs2_alloc_extent - allocate an extent from a given bitmap
2185 * @rbm: the resource group information
2186 * @dinode: TRUE if the first block we allocate is for a dinode
2187 * @n: The extent length (value/result)
2188 *
2189 * Add the bitmap buffer to the transaction.
2190 * Set the found bits to @new_state to change block's allocation state.
2191 */
2192static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
2193                             unsigned int *n)
2194{
2195        struct gfs2_rbm pos = { .rgd = rbm->rgd, };
2196        const unsigned int elen = *n;
2197        u64 block;
2198        int ret;
2199
2200        *n = 1;
2201        block = gfs2_rbm_to_block(rbm);
2202        gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh);
2203        gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2204        block++;
2205        while (*n < elen) {
2206                ret = gfs2_rbm_from_block(&pos, block);
2207                if (ret || gfs2_testbit(&pos, true) != GFS2_BLKST_FREE)
2208                        break;
2209                gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh);
2210                gfs2_setbit(&pos, true, GFS2_BLKST_USED);
2211                (*n)++;
2212                block++;
2213        }
2214}
2215
2216/**
2217 * rgblk_free - Change alloc state of given block(s)
2218 * @sdp: the filesystem
2219 * @rgd: the resource group the blocks are in
2220 * @bstart: the start of a run of blocks to free
2221 * @blen: the length of the block run (all must lie within ONE RG!)
2222 * @new_state: GFS2_BLKST_XXX the after-allocation block state
2223 */
2224
2225static void rgblk_free(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd,
2226                       u64 bstart, u32 blen, unsigned char new_state)
2227{
2228        struct gfs2_rbm rbm;
2229        struct gfs2_bitmap *bi, *bi_prev = NULL;
2230
2231        rbm.rgd = rgd;
2232        if (WARN_ON_ONCE(gfs2_rbm_from_block(&rbm, bstart)))
2233                return;
2234        while (blen--) {
2235                bi = rbm_bi(&rbm);
2236                if (bi != bi_prev) {
2237                        if (!bi->bi_clone) {
2238                                bi->bi_clone = kmalloc(bi->bi_bh->b_size,
2239                                                      GFP_NOFS | __GFP_NOFAIL);
2240                                memcpy(bi->bi_clone + bi->bi_offset,
2241                                       bi->bi_bh->b_data + bi->bi_offset,
2242                                       bi->bi_bytes);
2243                        }
2244                        gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh);
2245                        bi_prev = bi;
2246                }
2247                gfs2_setbit(&rbm, false, new_state);
2248                gfs2_rbm_incr(&rbm);
2249        }
2250}
2251
2252/**
2253 * gfs2_rgrp_dump - print out an rgrp
2254 * @seq: The iterator
2255 * @gl: The glock in question
2256 *
2257 */
2258
2259void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl)
2260{
2261        struct gfs2_rgrpd *rgd = gl->gl_object;
2262        struct gfs2_blkreserv *trs;
2263        const struct rb_node *n;
2264
2265        if (rgd == NULL)
2266                return;
2267        gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
2268                       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
2269                       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
2270                       rgd->rd_reserved, rgd->rd_extfail_pt);
2271        if (rgd->rd_sbd->sd_args.ar_rgrplvb) {
2272                struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
2273
2274                gfs2_print_dbg(seq, "  L: f:%02x b:%u i:%u\n",
2275                               be32_to_cpu(rgl->rl_flags),
2276                               be32_to_cpu(rgl->rl_free),
2277                               be32_to_cpu(rgl->rl_dinodes));
2278        }
2279        spin_lock(&rgd->rd_rsspin);
2280        for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
2281                trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
2282                dump_rs(seq, trs);
2283        }
2284        spin_unlock(&rgd->rd_rsspin);
2285}
2286
2287static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
2288{
2289        struct gfs2_sbd *sdp = rgd->rd_sbd;
2290        fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
2291                (unsigned long long)rgd->rd_addr);
2292        fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
2293        gfs2_rgrp_dump(NULL, rgd->rd_gl);
2294        rgd->rd_flags |= GFS2_RDF_ERROR;
2295}
2296
2297/**
2298 * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation
2299 * @ip: The inode we have just allocated blocks for
2300 * @rbm: The start of the allocated blocks
2301 * @len: The extent length
2302 *
2303 * Adjusts a reservation after an allocation has taken place. If the
2304 * reservation does not match the allocation, or if it is now empty
2305 * then it is removed.
2306 */
2307
2308static void gfs2_adjust_reservation(struct gfs2_inode *ip,
2309                                    const struct gfs2_rbm *rbm, unsigned len)
2310{
2311        struct gfs2_blkreserv *rs = &ip->i_res;
2312        struct gfs2_rgrpd *rgd = rbm->rgd;
2313        unsigned rlen;
2314        u64 block;
2315        int ret;
2316
2317        spin_lock(&rgd->rd_rsspin);
2318        if (gfs2_rs_active(rs)) {
2319                if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
2320                        block = gfs2_rbm_to_block(rbm);
2321                        ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
2322                        rlen = min(rs->rs_free, len);
2323                        rs->rs_free -= rlen;
2324                        rgd->rd_reserved -= rlen;
2325                        trace_gfs2_rs(rs, TRACE_RS_CLAIM);
2326                        if (rs->rs_free && !ret)
2327                                goto out;
2328                        /* We used up our block reservation, so we should
2329                           reserve more blocks next time. */
2330                        atomic_add(RGRP_RSRV_ADDBLKS, &ip->i_sizehint);
2331                }
2332                __rs_deltree(rs);
2333        }
2334out:
2335        spin_unlock(&rgd->rd_rsspin);
2336}
2337
2338/**
2339 * gfs2_set_alloc_start - Set starting point for block allocation
2340 * @rbm: The rbm which will be set to the required location
2341 * @ip: The gfs2 inode
2342 * @dinode: Flag to say if allocation includes a new inode
2343 *
2344 * This sets the starting point from the reservation if one is active
2345 * otherwise it falls back to guessing a start point based on the
2346 * inode's goal block or the last allocation point in the rgrp.
2347 */
2348
2349static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
2350                                 const struct gfs2_inode *ip, bool dinode)
2351{
2352        u64 goal;
2353
2354        if (gfs2_rs_active(&ip->i_res)) {
2355                *rbm = ip->i_res.rs_rbm;
2356                return;
2357        }
2358
2359        if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal))
2360                goal = ip->i_goal;
2361        else
2362                goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
2363
2364        if (WARN_ON_ONCE(gfs2_rbm_from_block(rbm, goal))) {
2365                rbm->bii = 0;
2366                rbm->offset = 0;
2367        }
2368}
2369
2370/**
2371 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
2372 * @ip: the inode to allocate the block for
2373 * @bn: Used to return the starting block number
2374 * @nblocks: requested number of blocks/extent length (value/result)
2375 * @dinode: 1 if we're allocating a dinode block, else 0
2376 * @generation: the generation number of the inode
2377 *
2378 * Returns: 0 or error
2379 */
2380
2381int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2382                      bool dinode, u64 *generation)
2383{
2384        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2385        struct buffer_head *dibh;
2386        struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rbm.rgd, };
2387        unsigned int ndata;
2388        u64 block; /* block, within the file system scope */
2389        int error;
2390
2391        gfs2_set_alloc_start(&rbm, ip, dinode);
2392        error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false);
2393
2394        if (error == -ENOSPC) {
2395                gfs2_set_alloc_start(&rbm, ip, dinode);
2396                error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false);
2397        }
2398
2399        /* Since all blocks are reserved in advance, this shouldn't happen */
2400        if (error) {
2401                fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d fail_pt=%d\n",
2402                        (unsigned long long)ip->i_no_addr, error, *nblocks,
2403                        test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags),
2404                        rbm.rgd->rd_extfail_pt);
2405                goto rgrp_error;
2406        }
2407
2408        gfs2_alloc_extent(&rbm, dinode, nblocks);
2409        block = gfs2_rbm_to_block(&rbm);
2410        rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
2411        if (gfs2_rs_active(&ip->i_res))
2412                gfs2_adjust_reservation(ip, &rbm, *nblocks);
2413        ndata = *nblocks;
2414        if (dinode)
2415                ndata--;
2416
2417        if (!dinode) {
2418                ip->i_goal = block + ndata - 1;
2419                error = gfs2_meta_inode_buffer(ip, &dibh);
2420                if (error == 0) {
2421                        struct gfs2_dinode *di =
2422                                (struct gfs2_dinode *)dibh->b_data;
2423                        gfs2_trans_add_meta(ip->i_gl, dibh);
2424                        di->di_goal_meta = di->di_goal_data =
2425                                cpu_to_be64(ip->i_goal);
2426                        brelse(dibh);
2427                }
2428        }
2429        if (rbm.rgd->rd_free < *nblocks) {
2430                fs_warn(sdp, "nblocks=%u\n", *nblocks);
2431                goto rgrp_error;
2432        }
2433
2434        rbm.rgd->rd_free -= *nblocks;
2435        if (dinode) {
2436                rbm.rgd->rd_dinodes++;
2437                *generation = rbm.rgd->rd_igeneration++;
2438                if (*generation == 0)
2439                        *generation = rbm.rgd->rd_igeneration++;
2440        }
2441
2442        gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
2443        gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
2444
2445        gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
2446        if (dinode)
2447                gfs2_trans_add_unrevoke(sdp, block, *nblocks);
2448
2449        gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
2450
2451        rbm.rgd->rd_free_clone -= *nblocks;
2452        trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
2453                               dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2454        *bn = block;
2455        return 0;
2456
2457rgrp_error:
2458        gfs2_rgrp_error(rbm.rgd);
2459        return -EIO;
2460}
2461
2462/**
2463 * __gfs2_free_blocks - free a contiguous run of block(s)
2464 * @ip: the inode these blocks are being freed from
2465 * @rgd: the resource group the blocks are in
2466 * @bstart: first block of a run of contiguous blocks
2467 * @blen: the length of the block run
2468 * @meta: 1 if the blocks represent metadata
2469 *
2470 */
2471
2472void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
2473                        u64 bstart, u32 blen, int meta)
2474{
2475        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2476
2477        rgblk_free(sdp, rgd, bstart, blen, GFS2_BLKST_FREE);
2478        trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
2479        rgd->rd_free += blen;
2480        rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
2481        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2482        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2483
2484        /* Directories keep their data in the metadata address space */
2485        if (meta || ip->i_depth)
2486                gfs2_meta_wipe(ip, bstart, blen);
2487}
2488
2489/**
2490 * gfs2_free_meta - free a contiguous run of data block(s)
2491 * @ip: the inode these blocks are being freed from
2492 * @rgd: the resource group the blocks are in
2493 * @bstart: first block of a run of contiguous blocks
2494 * @blen: the length of the block run
2495 *
2496 */
2497
2498void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
2499                    u64 bstart, u32 blen)
2500{
2501        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2502
2503        __gfs2_free_blocks(ip, rgd, bstart, blen, 1);
2504        gfs2_statfs_change(sdp, 0, +blen, 0);
2505        gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
2506}
2507
2508void gfs2_unlink_di(struct inode *inode)
2509{
2510        struct gfs2_inode *ip = GFS2_I(inode);
2511        struct gfs2_sbd *sdp = GFS2_SB(inode);
2512        struct gfs2_rgrpd *rgd;
2513        u64 blkno = ip->i_no_addr;
2514
2515        rgd = gfs2_blk2rgrpd(sdp, blkno, true);
2516        if (!rgd)
2517                return;
2518        rgblk_free(sdp, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
2519        trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
2520        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2521        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2522        be32_add_cpu(&rgd->rd_rgl->rl_unlinked, 1);
2523}
2524
2525void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
2526{
2527        struct gfs2_sbd *sdp = rgd->rd_sbd;
2528
2529        rgblk_free(sdp, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
2530        if (!rgd->rd_dinodes)
2531                gfs2_consist_rgrpd(rgd);
2532        rgd->rd_dinodes--;
2533        rgd->rd_free++;
2534
2535        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2536        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2537        be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1);
2538
2539        gfs2_statfs_change(sdp, 0, +1, -1);
2540        trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
2541        gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
2542        gfs2_meta_wipe(ip, ip->i_no_addr, 1);
2543}
2544
2545/**
2546 * gfs2_check_blk_type - Check the type of a block
2547 * @sdp: The superblock
2548 * @no_addr: The block number to check
2549 * @type: The block type we are looking for
2550 *
2551 * Returns: 0 if the block type matches the expected type
2552 *          -ESTALE if it doesn't match
2553 *          or -ve errno if something went wrong while checking
2554 */
2555
2556int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
2557{
2558        struct gfs2_rgrpd *rgd;
2559        struct gfs2_holder rgd_gh;
2560        struct gfs2_rbm rbm;
2561        int error = -EINVAL;
2562
2563        rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
2564        if (!rgd)
2565                goto fail;
2566
2567        error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
2568        if (error)
2569                goto fail;
2570
2571        rbm.rgd = rgd;
2572        error = gfs2_rbm_from_block(&rbm, no_addr);
2573        if (WARN_ON_ONCE(error))
2574                goto fail;
2575
2576        if (gfs2_testbit(&rbm, false) != type)
2577                error = -ESTALE;
2578
2579        gfs2_glock_dq_uninit(&rgd_gh);
2580fail:
2581        return error;
2582}
2583
2584/**
2585 * gfs2_rlist_add - add a RG to a list of RGs
2586 * @ip: the inode
2587 * @rlist: the list of resource groups
2588 * @block: the block
2589 *
2590 * Figure out what RG a block belongs to and add that RG to the list
2591 *
2592 * FIXME: Don't use NOFAIL
2593 *
2594 */
2595
2596void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
2597                    u64 block)
2598{
2599        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2600        struct gfs2_rgrpd *rgd;
2601        struct gfs2_rgrpd **tmp;
2602        unsigned int new_space;
2603        unsigned int x;
2604
2605        if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
2606                return;
2607
2608        /*
2609         * The resource group last accessed is kept in the last position.
2610         */
2611
2612        if (rlist->rl_rgrps) {
2613                rgd = rlist->rl_rgd[rlist->rl_rgrps - 1];
2614                if (rgrp_contains_block(rgd, block))
2615                        return;
2616                rgd = gfs2_blk2rgrpd(sdp, block, 1);
2617        } else {
2618                rgd = ip->i_res.rs_rbm.rgd;
2619                if (!rgd || !rgrp_contains_block(rgd, block))
2620                        rgd = gfs2_blk2rgrpd(sdp, block, 1);
2621        }
2622
2623        if (!rgd) {
2624                fs_err(sdp, "rlist_add: no rgrp for block %llu\n",
2625                       (unsigned long long)block);
2626                return;
2627        }
2628
2629        for (x = 0; x < rlist->rl_rgrps; x++) {
2630                if (rlist->rl_rgd[x] == rgd) {
2631                        swap(rlist->rl_rgd[x],
2632                             rlist->rl_rgd[rlist->rl_rgrps - 1]);
2633                        return;
2634                }
2635        }
2636
2637        if (rlist->rl_rgrps == rlist->rl_space) {
2638                new_space = rlist->rl_space + 10;
2639
2640                tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
2641                              GFP_NOFS | __GFP_NOFAIL);
2642
2643                if (rlist->rl_rgd) {
2644                        memcpy(tmp, rlist->rl_rgd,
2645                               rlist->rl_space * sizeof(struct gfs2_rgrpd *));
2646                        kfree(rlist->rl_rgd);
2647                }
2648
2649                rlist->rl_space = new_space;
2650                rlist->rl_rgd = tmp;
2651        }
2652
2653        rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
2654}
2655
2656/**
2657 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
2658 *      and initialize an array of glock holders for them
2659 * @rlist: the list of resource groups
2660 *
2661 * FIXME: Don't use NOFAIL
2662 *
2663 */
2664
2665void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist)
2666{
2667        unsigned int x;
2668
2669        rlist->rl_ghs = kmalloc_array(rlist->rl_rgrps,
2670                                      sizeof(struct gfs2_holder),
2671                                      GFP_NOFS | __GFP_NOFAIL);
2672        for (x = 0; x < rlist->rl_rgrps; x++)
2673                gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
2674                                LM_ST_EXCLUSIVE, 0,
2675                                &rlist->rl_ghs[x]);
2676}
2677
2678/**
2679 * gfs2_rlist_free - free a resource group list
2680 * @rlist: the list of resource groups
2681 *
2682 */
2683
2684void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
2685{
2686        unsigned int x;
2687
2688        kfree(rlist->rl_rgd);
2689
2690        if (rlist->rl_ghs) {
2691                for (x = 0; x < rlist->rl_rgrps; x++)
2692                        gfs2_holder_uninit(&rlist->rl_ghs[x]);
2693                kfree(rlist->rl_ghs);
2694                rlist->rl_ghs = NULL;
2695        }
2696}
2697
2698