linux/fs/gfs2/rgrp.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   4 * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
   5 */
   6
   7#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   8
   9#include <linux/slab.h>
  10#include <linux/spinlock.h>
  11#include <linux/completion.h>
  12#include <linux/buffer_head.h>
  13#include <linux/fs.h>
  14#include <linux/gfs2_ondisk.h>
  15#include <linux/prefetch.h>
  16#include <linux/blkdev.h>
  17#include <linux/rbtree.h>
  18#include <linux/random.h>
  19
  20#include "gfs2.h"
  21#include "incore.h"
  22#include "glock.h"
  23#include "glops.h"
  24#include "lops.h"
  25#include "meta_io.h"
  26#include "quota.h"
  27#include "rgrp.h"
  28#include "super.h"
  29#include "trans.h"
  30#include "util.h"
  31#include "log.h"
  32#include "inode.h"
  33#include "trace_gfs2.h"
  34#include "dir.h"
  35
  36#define BFITNOENT ((u32)~0)
  37#define NO_BLOCK ((u64)~0)
  38
  39#if BITS_PER_LONG == 32
  40#define LBITMASK   (0x55555555UL)
  41#define LBITSKIP55 (0x55555555UL)
  42#define LBITSKIP00 (0x00000000UL)
  43#else
  44#define LBITMASK   (0x5555555555555555UL)
  45#define LBITSKIP55 (0x5555555555555555UL)
  46#define LBITSKIP00 (0x0000000000000000UL)
  47#endif
  48
  49/*
  50 * These routines are used by the resource group routines (rgrp.c)
  51 * to keep track of block allocation.  Each block is represented by two
  52 * bits.  So, each byte represents GFS2_NBBY (i.e. 4) blocks.
  53 *
  54 * 0 = Free
  55 * 1 = Used (not metadata)
  56 * 2 = Unlinked (still in use) inode
  57 * 3 = Used (metadata)
  58 */
  59
  60struct gfs2_extent {
  61        struct gfs2_rbm rbm;
  62        u32 len;
  63};
  64
  65static const char valid_change[16] = {
  66                /* current */
  67        /* n */ 0, 1, 1, 1,
  68        /* e */ 1, 0, 0, 0,
  69        /* w */ 0, 0, 0, 1,
  70                1, 0, 0, 0
  71};
  72
  73static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
  74                         const struct gfs2_inode *ip, bool nowrap);
  75
  76
  77/**
  78 * gfs2_setbit - Set a bit in the bitmaps
  79 * @rbm: The position of the bit to set
  80 * @do_clone: Also set the clone bitmap, if it exists
  81 * @new_state: the new state of the block
  82 *
  83 */
  84
  85static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
  86                               unsigned char new_state)
  87{
  88        unsigned char *byte1, *byte2, *end, cur_state;
  89        struct gfs2_bitmap *bi = rbm_bi(rbm);
  90        unsigned int buflen = bi->bi_bytes;
  91        const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
  92
  93        byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY);
  94        end = bi->bi_bh->b_data + bi->bi_offset + buflen;
  95
  96        BUG_ON(byte1 >= end);
  97
  98        cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
  99
 100        if (unlikely(!valid_change[new_state * 4 + cur_state])) {
 101                struct gfs2_sbd *sdp = rbm->rgd->rd_sbd;
 102
 103                fs_warn(sdp, "buf_blk = 0x%x old_state=%d, new_state=%d\n",
 104                        rbm->offset, cur_state, new_state);
 105                fs_warn(sdp, "rgrp=0x%llx bi_start=0x%x biblk: 0x%llx\n",
 106                        (unsigned long long)rbm->rgd->rd_addr, bi->bi_start,
 107                        (unsigned long long)bi->bi_bh->b_blocknr);
 108                fs_warn(sdp, "bi_offset=0x%x bi_bytes=0x%x block=0x%llx\n",
 109                        bi->bi_offset, bi->bi_bytes,
 110                        (unsigned long long)gfs2_rbm_to_block(rbm));
 111                dump_stack();
 112                gfs2_consist_rgrpd(rbm->rgd);
 113                return;
 114        }
 115        *byte1 ^= (cur_state ^ new_state) << bit;
 116
 117        if (do_clone && bi->bi_clone) {
 118                byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY);
 119                cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
 120                *byte2 ^= (cur_state ^ new_state) << bit;
 121        }
 122}
 123
 124/**
 125 * gfs2_testbit - test a bit in the bitmaps
 126 * @rbm: The bit to test
 127 * @use_clone: If true, test the clone bitmap, not the official bitmap.
 128 *
 129 * Some callers like gfs2_unaligned_extlen need to test the clone bitmaps,
 130 * not the "real" bitmaps, to avoid allocating recently freed blocks.
 131 *
 132 * Returns: The two bit block state of the requested bit
 133 */
 134
 135static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm, bool use_clone)
 136{
 137        struct gfs2_bitmap *bi = rbm_bi(rbm);
 138        const u8 *buffer;
 139        const u8 *byte;
 140        unsigned int bit;
 141
 142        if (use_clone && bi->bi_clone)
 143                buffer = bi->bi_clone;
 144        else
 145                buffer = bi->bi_bh->b_data;
 146        buffer += bi->bi_offset;
 147        byte = buffer + (rbm->offset / GFS2_NBBY);
 148        bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
 149
 150        return (*byte >> bit) & GFS2_BIT_MASK;
 151}
 152
 153/**
 154 * gfs2_bit_search
 155 * @ptr: Pointer to bitmap data
 156 * @mask: Mask to use (normally 0x55555.... but adjusted for search start)
 157 * @state: The state we are searching for
 158 *
 159 * We xor the bitmap data with a patter which is the bitwise opposite
 160 * of what we are looking for, this gives rise to a pattern of ones
 161 * wherever there is a match. Since we have two bits per entry, we
 162 * take this pattern, shift it down by one place and then and it with
 163 * the original. All the even bit positions (0,2,4, etc) then represent
 164 * successful matches, so we mask with 0x55555..... to remove the unwanted
 165 * odd bit positions.
 166 *
 167 * This allows searching of a whole u64 at once (32 blocks) with a
 168 * single test (on 64 bit arches).
 169 */
 170
 171static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
 172{
 173        u64 tmp;
 174        static const u64 search[] = {
 175                [0] = 0xffffffffffffffffULL,
 176                [1] = 0xaaaaaaaaaaaaaaaaULL,
 177                [2] = 0x5555555555555555ULL,
 178                [3] = 0x0000000000000000ULL,
 179        };
 180        tmp = le64_to_cpu(*ptr) ^ search[state];
 181        tmp &= (tmp >> 1);
 182        tmp &= mask;
 183        return tmp;
 184}
 185
 186/**
 187 * rs_cmp - multi-block reservation range compare
 188 * @blk: absolute file system block number of the new reservation
 189 * @len: number of blocks in the new reservation
 190 * @rs: existing reservation to compare against
 191 *
 192 * returns: 1 if the block range is beyond the reach of the reservation
 193 *         -1 if the block range is before the start of the reservation
 194 *          0 if the block range overlaps with the reservation
 195 */
 196static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
 197{
 198        u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
 199
 200        if (blk >= startblk + rs->rs_free)
 201                return 1;
 202        if (blk + len - 1 < startblk)
 203                return -1;
 204        return 0;
 205}
 206
 207/**
 208 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
 209 *       a block in a given allocation state.
 210 * @buf: the buffer that holds the bitmaps
 211 * @len: the length (in bytes) of the buffer
 212 * @goal: start search at this block's bit-pair (within @buffer)
 213 * @state: GFS2_BLKST_XXX the state of the block we're looking for.
 214 *
 215 * Scope of @goal and returned block number is only within this bitmap buffer,
 216 * not entire rgrp or filesystem.  @buffer will be offset from the actual
 217 * beginning of a bitmap block buffer, skipping any header structures, but
 218 * headers are always a multiple of 64 bits long so that the buffer is
 219 * always aligned to a 64 bit boundary.
 220 *
 221 * The size of the buffer is in bytes, but is it assumed that it is
 222 * always ok to read a complete multiple of 64 bits at the end
 223 * of the block in case the end is no aligned to a natural boundary.
 224 *
 225 * Return: the block number (bitmap buffer scope) that was found
 226 */
 227
 228static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
 229                       u32 goal, u8 state)
 230{
 231        u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
 232        const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
 233        const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
 234        u64 tmp;
 235        u64 mask = 0x5555555555555555ULL;
 236        u32 bit;
 237
 238        /* Mask off bits we don't care about at the start of the search */
 239        mask <<= spoint;
 240        tmp = gfs2_bit_search(ptr, mask, state);
 241        ptr++;
 242        while(tmp == 0 && ptr < end) {
 243                tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
 244                ptr++;
 245        }
 246        /* Mask off any bits which are more than len bytes from the start */
 247        if (ptr == end && (len & (sizeof(u64) - 1)))
 248                tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1))));
 249        /* Didn't find anything, so return */
 250        if (tmp == 0)
 251                return BFITNOENT;
 252        ptr--;
 253        bit = __ffs64(tmp);
 254        bit /= 2;       /* two bits per entry in the bitmap */
 255        return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
 256}
 257
 258/**
 259 * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
 260 * @rbm: The rbm with rgd already set correctly
 261 * @block: The block number (filesystem relative)
 262 *
 263 * This sets the bi and offset members of an rbm based on a
 264 * resource group and a filesystem relative block number. The
 265 * resource group must be set in the rbm on entry, the bi and
 266 * offset members will be set by this function.
 267 *
 268 * Returns: 0 on success, or an error code
 269 */
 270
 271static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
 272{
 273        if (!rgrp_contains_block(rbm->rgd, block))
 274                return -E2BIG;
 275        rbm->bii = 0;
 276        rbm->offset = block - rbm->rgd->rd_data0;
 277        /* Check if the block is within the first block */
 278        if (rbm->offset < rbm_bi(rbm)->bi_blocks)
 279                return 0;
 280
 281        /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */
 282        rbm->offset += (sizeof(struct gfs2_rgrp) -
 283                        sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
 284        rbm->bii = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
 285        rbm->offset -= rbm->bii * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
 286        return 0;
 287}
 288
 289/**
 290 * gfs2_rbm_incr - increment an rbm structure
 291 * @rbm: The rbm with rgd already set correctly
 292 *
 293 * This function takes an existing rbm structure and increments it to the next
 294 * viable block offset.
 295 *
 296 * Returns: If incrementing the offset would cause the rbm to go past the
 297 *          end of the rgrp, true is returned, otherwise false.
 298 *
 299 */
 300
 301static bool gfs2_rbm_incr(struct gfs2_rbm *rbm)
 302{
 303        if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */
 304                rbm->offset++;
 305                return false;
 306        }
 307        if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */
 308                return true;
 309
 310        rbm->offset = 0;
 311        rbm->bii++;
 312        return false;
 313}
 314
 315/**
 316 * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
 317 * @rbm: Position to search (value/result)
 318 * @n_unaligned: Number of unaligned blocks to check
 319 * @len: Decremented for each block found (terminate on zero)
 320 *
 321 * Returns: true if a non-free block is encountered
 322 */
 323
 324static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
 325{
 326        u32 n;
 327        u8 res;
 328
 329        for (n = 0; n < n_unaligned; n++) {
 330                res = gfs2_testbit(rbm, true);
 331                if (res != GFS2_BLKST_FREE)
 332                        return true;
 333                (*len)--;
 334                if (*len == 0)
 335                        return true;
 336                if (gfs2_rbm_incr(rbm))
 337                        return true;
 338        }
 339
 340        return false;
 341}
 342
 343/**
 344 * gfs2_free_extlen - Return extent length of free blocks
 345 * @rrbm: Starting position
 346 * @len: Max length to check
 347 *
 348 * Starting at the block specified by the rbm, see how many free blocks
 349 * there are, not reading more than len blocks ahead. This can be done
 350 * using memchr_inv when the blocks are byte aligned, but has to be done
 351 * on a block by block basis in case of unaligned blocks. Also this
 352 * function can cope with bitmap boundaries (although it must stop on
 353 * a resource group boundary)
 354 *
 355 * Returns: Number of free blocks in the extent
 356 */
 357
 358static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
 359{
 360        struct gfs2_rbm rbm = *rrbm;
 361        u32 n_unaligned = rbm.offset & 3;
 362        u32 size = len;
 363        u32 bytes;
 364        u32 chunk_size;
 365        u8 *ptr, *start, *end;
 366        u64 block;
 367        struct gfs2_bitmap *bi;
 368
 369        if (n_unaligned &&
 370            gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
 371                goto out;
 372
 373        n_unaligned = len & 3;
 374        /* Start is now byte aligned */
 375        while (len > 3) {
 376                bi = rbm_bi(&rbm);
 377                start = bi->bi_bh->b_data;
 378                if (bi->bi_clone)
 379                        start = bi->bi_clone;
 380                start += bi->bi_offset;
 381                end = start + bi->bi_bytes;
 382                BUG_ON(rbm.offset & 3);
 383                start += (rbm.offset / GFS2_NBBY);
 384                bytes = min_t(u32, len / GFS2_NBBY, (end - start));
 385                ptr = memchr_inv(start, 0, bytes);
 386                chunk_size = ((ptr == NULL) ? bytes : (ptr - start));
 387                chunk_size *= GFS2_NBBY;
 388                BUG_ON(len < chunk_size);
 389                len -= chunk_size;
 390                block = gfs2_rbm_to_block(&rbm);
 391                if (gfs2_rbm_from_block(&rbm, block + chunk_size)) {
 392                        n_unaligned = 0;
 393                        break;
 394                }
 395                if (ptr) {
 396                        n_unaligned = 3;
 397                        break;
 398                }
 399                n_unaligned = len & 3;
 400        }
 401
 402        /* Deal with any bits left over at the end */
 403        if (n_unaligned)
 404                gfs2_unaligned_extlen(&rbm, n_unaligned, &len);
 405out:
 406        return size - len;
 407}
 408
 409/**
 410 * gfs2_bitcount - count the number of bits in a certain state
 411 * @rgd: the resource group descriptor
 412 * @buffer: the buffer that holds the bitmaps
 413 * @buflen: the length (in bytes) of the buffer
 414 * @state: the state of the block we're looking for
 415 *
 416 * Returns: The number of bits
 417 */
 418
 419static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
 420                         unsigned int buflen, u8 state)
 421{
 422        const u8 *byte = buffer;
 423        const u8 *end = buffer + buflen;
 424        const u8 state1 = state << 2;
 425        const u8 state2 = state << 4;
 426        const u8 state3 = state << 6;
 427        u32 count = 0;
 428
 429        for (; byte < end; byte++) {
 430                if (((*byte) & 0x03) == state)
 431                        count++;
 432                if (((*byte) & 0x0C) == state1)
 433                        count++;
 434                if (((*byte) & 0x30) == state2)
 435                        count++;
 436                if (((*byte) & 0xC0) == state3)
 437                        count++;
 438        }
 439
 440        return count;
 441}
 442
 443/**
 444 * gfs2_rgrp_verify - Verify that a resource group is consistent
 445 * @rgd: the rgrp
 446 *
 447 */
 448
 449void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 450{
 451        struct gfs2_sbd *sdp = rgd->rd_sbd;
 452        struct gfs2_bitmap *bi = NULL;
 453        u32 length = rgd->rd_length;
 454        u32 count[4], tmp;
 455        int buf, x;
 456
 457        memset(count, 0, 4 * sizeof(u32));
 458
 459        /* Count # blocks in each of 4 possible allocation states */
 460        for (buf = 0; buf < length; buf++) {
 461                bi = rgd->rd_bits + buf;
 462                for (x = 0; x < 4; x++)
 463                        count[x] += gfs2_bitcount(rgd,
 464                                                  bi->bi_bh->b_data +
 465                                                  bi->bi_offset,
 466                                                  bi->bi_bytes, x);
 467        }
 468
 469        if (count[0] != rgd->rd_free) {
 470                if (gfs2_consist_rgrpd(rgd))
 471                        fs_err(sdp, "free data mismatch:  %u != %u\n",
 472                               count[0], rgd->rd_free);
 473                return;
 474        }
 475
 476        tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
 477        if (count[1] != tmp) {
 478                if (gfs2_consist_rgrpd(rgd))
 479                        fs_err(sdp, "used data mismatch:  %u != %u\n",
 480                               count[1], tmp);
 481                return;
 482        }
 483
 484        if (count[2] + count[3] != rgd->rd_dinodes) {
 485                if (gfs2_consist_rgrpd(rgd))
 486                        fs_err(sdp, "used metadata mismatch:  %u != %u\n",
 487                               count[2] + count[3], rgd->rd_dinodes);
 488                return;
 489        }
 490}
 491
 492/**
 493 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
 494 * @sdp: The GFS2 superblock
 495 * @blk: The data block number
 496 * @exact: True if this needs to be an exact match
 497 *
 498 * The @exact argument should be set to true by most callers. The exception
 499 * is when we need to match blocks which are not represented by the rgrp
 500 * bitmap, but which are part of the rgrp (i.e. padding blocks) which are
 501 * there for alignment purposes. Another way of looking at it is that @exact
 502 * matches only valid data/metadata blocks, but with @exact false, it will
 503 * match any block within the extent of the rgrp.
 504 *
 505 * Returns: The resource group, or NULL if not found
 506 */
 507
 508struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)
 509{
 510        struct rb_node *n, *next;
 511        struct gfs2_rgrpd *cur;
 512
 513        spin_lock(&sdp->sd_rindex_spin);
 514        n = sdp->sd_rindex_tree.rb_node;
 515        while (n) {
 516                cur = rb_entry(n, struct gfs2_rgrpd, rd_node);
 517                next = NULL;
 518                if (blk < cur->rd_addr)
 519                        next = n->rb_left;
 520                else if (blk >= cur->rd_data0 + cur->rd_data)
 521                        next = n->rb_right;
 522                if (next == NULL) {
 523                        spin_unlock(&sdp->sd_rindex_spin);
 524                        if (exact) {
 525                                if (blk < cur->rd_addr)
 526                                        return NULL;
 527                                if (blk >= cur->rd_data0 + cur->rd_data)
 528                                        return NULL;
 529                        }
 530                        return cur;
 531                }
 532                n = next;
 533        }
 534        spin_unlock(&sdp->sd_rindex_spin);
 535
 536        return NULL;
 537}
 538
 539/**
 540 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
 541 * @sdp: The GFS2 superblock
 542 *
 543 * Returns: The first rgrp in the filesystem
 544 */
 545
 546struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
 547{
 548        const struct rb_node *n;
 549        struct gfs2_rgrpd *rgd;
 550
 551        spin_lock(&sdp->sd_rindex_spin);
 552        n = rb_first(&sdp->sd_rindex_tree);
 553        rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 554        spin_unlock(&sdp->sd_rindex_spin);
 555
 556        return rgd;
 557}
 558
 559/**
 560 * gfs2_rgrpd_get_next - get the next RG
 561 * @rgd: the resource group descriptor
 562 *
 563 * Returns: The next rgrp
 564 */
 565
 566struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
 567{
 568        struct gfs2_sbd *sdp = rgd->rd_sbd;
 569        const struct rb_node *n;
 570
 571        spin_lock(&sdp->sd_rindex_spin);
 572        n = rb_next(&rgd->rd_node);
 573        if (n == NULL)
 574                n = rb_first(&sdp->sd_rindex_tree);
 575
 576        if (unlikely(&rgd->rd_node == n)) {
 577                spin_unlock(&sdp->sd_rindex_spin);
 578                return NULL;
 579        }
 580        rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 581        spin_unlock(&sdp->sd_rindex_spin);
 582        return rgd;
 583}
 584
 585void check_and_update_goal(struct gfs2_inode *ip)
 586{
 587        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 588        if (!ip->i_goal || gfs2_blk2rgrpd(sdp, ip->i_goal, 1) == NULL)
 589                ip->i_goal = ip->i_no_addr;
 590}
 591
 592void gfs2_free_clones(struct gfs2_rgrpd *rgd)
 593{
 594        int x;
 595
 596        for (x = 0; x < rgd->rd_length; x++) {
 597                struct gfs2_bitmap *bi = rgd->rd_bits + x;
 598                kfree(bi->bi_clone);
 599                bi->bi_clone = NULL;
 600        }
 601}
 602
 603/**
 604 * gfs2_rsqa_alloc - make sure we have a reservation assigned to the inode
 605 *                 plus a quota allocations data structure, if necessary
 606 * @ip: the inode for this reservation
 607 */
 608int gfs2_rsqa_alloc(struct gfs2_inode *ip)
 609{
 610        return gfs2_qa_alloc(ip);
 611}
 612
 613static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
 614{
 615        struct gfs2_inode *ip = container_of(rs, struct gfs2_inode, i_res);
 616
 617        gfs2_print_dbg(seq, "  B: n:%llu s:%llu b:%u f:%u\n",
 618                       (unsigned long long)ip->i_no_addr,
 619                       (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
 620                       rs->rs_rbm.offset, rs->rs_free);
 621}
 622
 623/**
 624 * __rs_deltree - remove a multi-block reservation from the rgd tree
 625 * @rs: The reservation to remove
 626 *
 627 */
 628static void __rs_deltree(struct gfs2_blkreserv *rs)
 629{
 630        struct gfs2_rgrpd *rgd;
 631
 632        if (!gfs2_rs_active(rs))
 633                return;
 634
 635        rgd = rs->rs_rbm.rgd;
 636        trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
 637        rb_erase(&rs->rs_node, &rgd->rd_rstree);
 638        RB_CLEAR_NODE(&rs->rs_node);
 639
 640        if (rs->rs_free) {
 641                u64 last_block = gfs2_rbm_to_block(&rs->rs_rbm) +
 642                                 rs->rs_free - 1;
 643                struct gfs2_rbm last_rbm = { .rgd = rs->rs_rbm.rgd, };
 644                struct gfs2_bitmap *start, *last;
 645
 646                /* return reserved blocks to the rgrp */
 647                BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
 648                rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
 649                /* The rgrp extent failure point is likely not to increase;
 650                   it will only do so if the freed blocks are somehow
 651                   contiguous with a span of free blocks that follows. Still,
 652                   it will force the number to be recalculated later. */
 653                rgd->rd_extfail_pt += rs->rs_free;
 654                rs->rs_free = 0;
 655                if (gfs2_rbm_from_block(&last_rbm, last_block))
 656                        return;
 657                start = rbm_bi(&rs->rs_rbm);
 658                last = rbm_bi(&last_rbm);
 659                do
 660                        clear_bit(GBF_FULL, &start->bi_flags);
 661                while (start++ != last);
 662        }
 663}
 664
 665/**
 666 * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
 667 * @rs: The reservation to remove
 668 *
 669 */
 670void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
 671{
 672        struct gfs2_rgrpd *rgd;
 673
 674        rgd = rs->rs_rbm.rgd;
 675        if (rgd) {
 676                spin_lock(&rgd->rd_rsspin);
 677                __rs_deltree(rs);
 678                BUG_ON(rs->rs_free);
 679                spin_unlock(&rgd->rd_rsspin);
 680        }
 681}
 682
 683/**
 684 * gfs2_rsqa_delete - delete a multi-block reservation and quota allocation
 685 * @ip: The inode for this reservation
 686 * @wcount: The inode's write count, or NULL
 687 *
 688 */
 689void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount)
 690{
 691        down_write(&ip->i_rw_mutex);
 692        if ((wcount == NULL) || (atomic_read(wcount) <= 1))
 693                gfs2_rs_deltree(&ip->i_res);
 694        up_write(&ip->i_rw_mutex);
 695        gfs2_qa_delete(ip, wcount);
 696}
 697
 698/**
 699 * return_all_reservations - return all reserved blocks back to the rgrp.
 700 * @rgd: the rgrp that needs its space back
 701 *
 702 * We previously reserved a bunch of blocks for allocation. Now we need to
 703 * give them back. This leave the reservation structures in tact, but removes
 704 * all of their corresponding "no-fly zones".
 705 */
 706static void return_all_reservations(struct gfs2_rgrpd *rgd)
 707{
 708        struct rb_node *n;
 709        struct gfs2_blkreserv *rs;
 710
 711        spin_lock(&rgd->rd_rsspin);
 712        while ((n = rb_first(&rgd->rd_rstree))) {
 713                rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
 714                __rs_deltree(rs);
 715        }
 716        spin_unlock(&rgd->rd_rsspin);
 717}
 718
 719void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 720{
 721        struct rb_node *n;
 722        struct gfs2_rgrpd *rgd;
 723        struct gfs2_glock *gl;
 724
 725        while ((n = rb_first(&sdp->sd_rindex_tree))) {
 726                rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 727                gl = rgd->rd_gl;
 728
 729                rb_erase(n, &sdp->sd_rindex_tree);
 730
 731                if (gl) {
 732                        glock_clear_object(gl, rgd);
 733                        gfs2_rgrp_brelse(rgd);
 734                        gfs2_glock_put(gl);
 735                }
 736
 737                gfs2_free_clones(rgd);
 738                kfree(rgd->rd_bits);
 739                rgd->rd_bits = NULL;
 740                return_all_reservations(rgd);
 741                kmem_cache_free(gfs2_rgrpd_cachep, rgd);
 742        }
 743}
 744
 745static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
 746{
 747        struct gfs2_sbd *sdp = rgd->rd_sbd;
 748
 749        fs_info(sdp, "ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
 750        fs_info(sdp, "ri_length = %u\n", rgd->rd_length);
 751        fs_info(sdp, "ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
 752        fs_info(sdp, "ri_data = %u\n", rgd->rd_data);
 753        fs_info(sdp, "ri_bitbytes = %u\n", rgd->rd_bitbytes);
 754}
 755
 756/**
 757 * gfs2_compute_bitstructs - Compute the bitmap sizes
 758 * @rgd: The resource group descriptor
 759 *
 760 * Calculates bitmap descriptors, one for each block that contains bitmap data
 761 *
 762 * Returns: errno
 763 */
 764
 765static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 766{
 767        struct gfs2_sbd *sdp = rgd->rd_sbd;
 768        struct gfs2_bitmap *bi;
 769        u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
 770        u32 bytes_left, bytes;
 771        int x;
 772
 773        if (!length)
 774                return -EINVAL;
 775
 776        rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS);
 777        if (!rgd->rd_bits)
 778                return -ENOMEM;
 779
 780        bytes_left = rgd->rd_bitbytes;
 781
 782        for (x = 0; x < length; x++) {
 783                bi = rgd->rd_bits + x;
 784
 785                bi->bi_flags = 0;
 786                /* small rgrp; bitmap stored completely in header block */
 787                if (length == 1) {
 788                        bytes = bytes_left;
 789                        bi->bi_offset = sizeof(struct gfs2_rgrp);
 790                        bi->bi_start = 0;
 791                        bi->bi_bytes = bytes;
 792                        bi->bi_blocks = bytes * GFS2_NBBY;
 793                /* header block */
 794                } else if (x == 0) {
 795                        bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
 796                        bi->bi_offset = sizeof(struct gfs2_rgrp);
 797                        bi->bi_start = 0;
 798                        bi->bi_bytes = bytes;
 799                        bi->bi_blocks = bytes * GFS2_NBBY;
 800                /* last block */
 801                } else if (x + 1 == length) {
 802                        bytes = bytes_left;
 803                        bi->bi_offset = sizeof(struct gfs2_meta_header);
 804                        bi->bi_start = rgd->rd_bitbytes - bytes_left;
 805                        bi->bi_bytes = bytes;
 806                        bi->bi_blocks = bytes * GFS2_NBBY;
 807                /* other blocks */
 808                } else {
 809                        bytes = sdp->sd_sb.sb_bsize -
 810                                sizeof(struct gfs2_meta_header);
 811                        bi->bi_offset = sizeof(struct gfs2_meta_header);
 812                        bi->bi_start = rgd->rd_bitbytes - bytes_left;
 813                        bi->bi_bytes = bytes;
 814                        bi->bi_blocks = bytes * GFS2_NBBY;
 815                }
 816
 817                bytes_left -= bytes;
 818        }
 819
 820        if (bytes_left) {
 821                gfs2_consist_rgrpd(rgd);
 822                return -EIO;
 823        }
 824        bi = rgd->rd_bits + (length - 1);
 825        if ((bi->bi_start + bi->bi_bytes) * GFS2_NBBY != rgd->rd_data) {
 826                if (gfs2_consist_rgrpd(rgd)) {
 827                        gfs2_rindex_print(rgd);
 828                        fs_err(sdp, "start=%u len=%u offset=%u\n",
 829                               bi->bi_start, bi->bi_bytes, bi->bi_offset);
 830                }
 831                return -EIO;
 832        }
 833
 834        return 0;
 835}
 836
 837/**
 838 * gfs2_ri_total - Total up the file system space, according to the rindex.
 839 * @sdp: the filesystem
 840 *
 841 */
 842u64 gfs2_ri_total(struct gfs2_sbd *sdp)
 843{
 844        u64 total_data = 0;     
 845        struct inode *inode = sdp->sd_rindex;
 846        struct gfs2_inode *ip = GFS2_I(inode);
 847        char buf[sizeof(struct gfs2_rindex)];
 848        int error, rgrps;
 849
 850        for (rgrps = 0;; rgrps++) {
 851                loff_t pos = rgrps * sizeof(struct gfs2_rindex);
 852
 853                if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
 854                        break;
 855                error = gfs2_internal_read(ip, buf, &pos,
 856                                           sizeof(struct gfs2_rindex));
 857                if (error != sizeof(struct gfs2_rindex))
 858                        break;
 859                total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
 860        }
 861        return total_data;
 862}
 863
 864static int rgd_insert(struct gfs2_rgrpd *rgd)
 865{
 866        struct gfs2_sbd *sdp = rgd->rd_sbd;
 867        struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
 868
 869        /* Figure out where to put new node */
 870        while (*newn) {
 871                struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd,
 872                                                  rd_node);
 873
 874                parent = *newn;
 875                if (rgd->rd_addr < cur->rd_addr)
 876                        newn = &((*newn)->rb_left);
 877                else if (rgd->rd_addr > cur->rd_addr)
 878                        newn = &((*newn)->rb_right);
 879                else
 880                        return -EEXIST;
 881        }
 882
 883        rb_link_node(&rgd->rd_node, parent, newn);
 884        rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
 885        sdp->sd_rgrps++;
 886        return 0;
 887}
 888
 889/**
 890 * read_rindex_entry - Pull in a new resource index entry from the disk
 891 * @ip: Pointer to the rindex inode
 892 *
 893 * Returns: 0 on success, > 0 on EOF, error code otherwise
 894 */
 895
 896static int read_rindex_entry(struct gfs2_inode *ip)
 897{
 898        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 899        const unsigned bsize = sdp->sd_sb.sb_bsize;
 900        loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
 901        struct gfs2_rindex buf;
 902        int error;
 903        struct gfs2_rgrpd *rgd;
 904
 905        if (pos >= i_size_read(&ip->i_inode))
 906                return 1;
 907
 908        error = gfs2_internal_read(ip, (char *)&buf, &pos,
 909                                   sizeof(struct gfs2_rindex));
 910
 911        if (error != sizeof(struct gfs2_rindex))
 912                return (error == 0) ? 1 : error;
 913
 914        rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);
 915        error = -ENOMEM;
 916        if (!rgd)
 917                return error;
 918
 919        rgd->rd_sbd = sdp;
 920        rgd->rd_addr = be64_to_cpu(buf.ri_addr);
 921        rgd->rd_length = be32_to_cpu(buf.ri_length);
 922        rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
 923        rgd->rd_data = be32_to_cpu(buf.ri_data);
 924        rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
 925        spin_lock_init(&rgd->rd_rsspin);
 926
 927        error = compute_bitstructs(rgd);
 928        if (error)
 929                goto fail;
 930
 931        error = gfs2_glock_get(sdp, rgd->rd_addr,
 932                               &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
 933        if (error)
 934                goto fail;
 935
 936        rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
 937        rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED);
 938        if (rgd->rd_data > sdp->sd_max_rg_data)
 939                sdp->sd_max_rg_data = rgd->rd_data;
 940        spin_lock(&sdp->sd_rindex_spin);
 941        error = rgd_insert(rgd);
 942        spin_unlock(&sdp->sd_rindex_spin);
 943        if (!error) {
 944                glock_set_object(rgd->rd_gl, rgd);
 945                rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK;
 946                rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr +
 947                                                    rgd->rd_length) * bsize) - 1;
 948                return 0;
 949        }
 950
 951        error = 0; /* someone else read in the rgrp; free it and ignore it */
 952        gfs2_glock_put(rgd->rd_gl);
 953
 954fail:
 955        kfree(rgd->rd_bits);
 956        rgd->rd_bits = NULL;
 957        kmem_cache_free(gfs2_rgrpd_cachep, rgd);
 958        return error;
 959}
 960
 961/**
 962 * set_rgrp_preferences - Run all the rgrps, selecting some we prefer to use
 963 * @sdp: the GFS2 superblock
 964 *
 965 * The purpose of this function is to select a subset of the resource groups
 966 * and mark them as PREFERRED. We do it in such a way that each node prefers
 967 * to use a unique set of rgrps to minimize glock contention.
 968 */
 969static void set_rgrp_preferences(struct gfs2_sbd *sdp)
 970{
 971        struct gfs2_rgrpd *rgd, *first;
 972        int i;
 973
 974        /* Skip an initial number of rgrps, based on this node's journal ID.
 975           That should start each node out on its own set. */
 976        rgd = gfs2_rgrpd_get_first(sdp);
 977        for (i = 0; i < sdp->sd_lockstruct.ls_jid; i++)
 978                rgd = gfs2_rgrpd_get_next(rgd);
 979        first = rgd;
 980
 981        do {
 982                rgd->rd_flags |= GFS2_RDF_PREFERRED;
 983                for (i = 0; i < sdp->sd_journals; i++) {
 984                        rgd = gfs2_rgrpd_get_next(rgd);
 985                        if (!rgd || rgd == first)
 986                                break;
 987                }
 988        } while (rgd && rgd != first);
 989}
 990
 991/**
 992 * gfs2_ri_update - Pull in a new resource index from the disk
 993 * @ip: pointer to the rindex inode
 994 *
 995 * Returns: 0 on successful update, error code otherwise
 996 */
 997
 998static int gfs2_ri_update(struct gfs2_inode *ip)
 999{
1000        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1001        int error;
1002
1003        do {
1004                error = read_rindex_entry(ip);
1005        } while (error == 0);
1006
1007        if (error < 0)
1008                return error;
1009
1010        set_rgrp_preferences(sdp);
1011
1012        sdp->sd_rindex_uptodate = 1;
1013        return 0;
1014}
1015
1016/**
1017 * gfs2_rindex_update - Update the rindex if required
1018 * @sdp: The GFS2 superblock
1019 *
1020 * We grab a lock on the rindex inode to make sure that it doesn't
1021 * change whilst we are performing an operation. We keep this lock
1022 * for quite long periods of time compared to other locks. This
1023 * doesn't matter, since it is shared and it is very, very rarely
1024 * accessed in the exclusive mode (i.e. only when expanding the filesystem).
1025 *
1026 * This makes sure that we're using the latest copy of the resource index
1027 * special file, which might have been updated if someone expanded the
1028 * filesystem (via gfs2_grow utility), which adds new resource groups.
1029 *
1030 * Returns: 0 on succeess, error code otherwise
1031 */
1032
1033int gfs2_rindex_update(struct gfs2_sbd *sdp)
1034{
1035        struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
1036        struct gfs2_glock *gl = ip->i_gl;
1037        struct gfs2_holder ri_gh;
1038        int error = 0;
1039        int unlock_required = 0;
1040
1041        /* Read new copy from disk if we don't have the latest */
1042        if (!sdp->sd_rindex_uptodate) {
1043                if (!gfs2_glock_is_locked_by_me(gl)) {
1044                        error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
1045                        if (error)
1046                                return error;
1047                        unlock_required = 1;
1048                }
1049                if (!sdp->sd_rindex_uptodate)
1050                        error = gfs2_ri_update(ip);
1051                if (unlock_required)
1052                        gfs2_glock_dq_uninit(&ri_gh);
1053        }
1054
1055        return error;
1056}
1057
1058static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
1059{
1060        const struct gfs2_rgrp *str = buf;
1061        u32 rg_flags;
1062
1063        rg_flags = be32_to_cpu(str->rg_flags);
1064        rg_flags &= ~GFS2_RDF_MASK;
1065        rgd->rd_flags &= GFS2_RDF_MASK;
1066        rgd->rd_flags |= rg_flags;
1067        rgd->rd_free = be32_to_cpu(str->rg_free);
1068        rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
1069        rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
1070        /* rd_data0, rd_data and rd_bitbytes already set from rindex */
1071}
1072
1073static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
1074{
1075        const struct gfs2_rgrp *str = buf;
1076
1077        rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
1078        rgl->rl_flags = str->rg_flags;
1079        rgl->rl_free = str->rg_free;
1080        rgl->rl_dinodes = str->rg_dinodes;
1081        rgl->rl_igeneration = str->rg_igeneration;
1082        rgl->__pad = 0UL;
1083}
1084
1085static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
1086{
1087        struct gfs2_rgrpd *next = gfs2_rgrpd_get_next(rgd);
1088        struct gfs2_rgrp *str = buf;
1089        u32 crc;
1090
1091        str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
1092        str->rg_free = cpu_to_be32(rgd->rd_free);
1093        str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
1094        if (next == NULL)
1095                str->rg_skip = 0;
1096        else if (next->rd_addr > rgd->rd_addr)
1097                str->rg_skip = cpu_to_be32(next->rd_addr - rgd->rd_addr);
1098        str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
1099        str->rg_data0 = cpu_to_be64(rgd->rd_data0);
1100        str->rg_data = cpu_to_be32(rgd->rd_data);
1101        str->rg_bitbytes = cpu_to_be32(rgd->rd_bitbytes);
1102        str->rg_crc = 0;
1103        crc = gfs2_disk_hash(buf, sizeof(struct gfs2_rgrp));
1104        str->rg_crc = cpu_to_be32(crc);
1105
1106        memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
1107        gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, buf);
1108}
1109
1110static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
1111{
1112        struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1113        struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
1114        int valid = 1;
1115
1116        if (rgl->rl_flags != str->rg_flags) {
1117                printk(KERN_WARNING "GFS2: rgd: %llu lvb flag mismatch %u/%u",
1118                       (unsigned long long)rgd->rd_addr,
1119                       be32_to_cpu(rgl->rl_flags), be32_to_cpu(str->rg_flags));
1120                valid = 0;
1121        }
1122        if (rgl->rl_free != str->rg_free) {
1123                printk(KERN_WARNING "GFS2: rgd: %llu lvb free mismatch %u/%u",
1124                       (unsigned long long)rgd->rd_addr,
1125                       be32_to_cpu(rgl->rl_free), be32_to_cpu(str->rg_free));
1126                valid = 0;
1127        }
1128        if (rgl->rl_dinodes != str->rg_dinodes) {
1129                printk(KERN_WARNING "GFS2: rgd: %llu lvb dinode mismatch %u/%u",
1130                       (unsigned long long)rgd->rd_addr,
1131                       be32_to_cpu(rgl->rl_dinodes),
1132                       be32_to_cpu(str->rg_dinodes));
1133                valid = 0;
1134        }
1135        if (rgl->rl_igeneration != str->rg_igeneration) {
1136                printk(KERN_WARNING "GFS2: rgd: %llu lvb igen mismatch "
1137                       "%llu/%llu", (unsigned long long)rgd->rd_addr,
1138                       (unsigned long long)be64_to_cpu(rgl->rl_igeneration),
1139                       (unsigned long long)be64_to_cpu(str->rg_igeneration));
1140                valid = 0;
1141        }
1142        return valid;
1143}
1144
1145static u32 count_unlinked(struct gfs2_rgrpd *rgd)
1146{
1147        struct gfs2_bitmap *bi;
1148        const u32 length = rgd->rd_length;
1149        const u8 *buffer = NULL;
1150        u32 i, goal, count = 0;
1151
1152        for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) {
1153                goal = 0;
1154                buffer = bi->bi_bh->b_data + bi->bi_offset;
1155                WARN_ON(!buffer_uptodate(bi->bi_bh));
1156                while (goal < bi->bi_blocks) {
1157                        goal = gfs2_bitfit(buffer, bi->bi_bytes, goal,
1158                                           GFS2_BLKST_UNLINKED);
1159                        if (goal == BFITNOENT)
1160                                break;
1161                        count++;
1162                        goal++;
1163                }
1164        }
1165
1166        return count;
1167}
1168
1169
1170/**
1171 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
1172 * @rgd: the struct gfs2_rgrpd describing the RG to read in
1173 *
1174 * Read in all of a Resource Group's header and bitmap blocks.
1175 * Caller must eventually call gfs2_rgrp_brelse() to free the bitmaps.
1176 *
1177 * Returns: errno
1178 */
1179
1180static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
1181{
1182        struct gfs2_sbd *sdp = rgd->rd_sbd;
1183        struct gfs2_glock *gl = rgd->rd_gl;
1184        unsigned int length = rgd->rd_length;
1185        struct gfs2_bitmap *bi;
1186        unsigned int x, y;
1187        int error;
1188
1189        if (rgd->rd_bits[0].bi_bh != NULL)
1190                return 0;
1191
1192        for (x = 0; x < length; x++) {
1193                bi = rgd->rd_bits + x;
1194                error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, 0, &bi->bi_bh);
1195                if (error)
1196                        goto fail;
1197        }
1198
1199        for (y = length; y--;) {
1200                bi = rgd->rd_bits + y;
1201                error = gfs2_meta_wait(sdp, bi->bi_bh);
1202                if (error)
1203                        goto fail;
1204                if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB :
1205                                              GFS2_METATYPE_RG)) {
1206                        error = -EIO;
1207                        goto fail;
1208                }
1209        }
1210
1211        if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
1212                for (x = 0; x < length; x++)
1213                        clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
1214                gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
1215                rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1216                rgd->rd_free_clone = rgd->rd_free;
1217                /* max out the rgrp allocation failure point */
1218                rgd->rd_extfail_pt = rgd->rd_free;
1219        }
1220        if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
1221                rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
1222                gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
1223                                     rgd->rd_bits[0].bi_bh->b_data);
1224        }
1225        else if (sdp->sd_args.ar_rgrplvb) {
1226                if (!gfs2_rgrp_lvb_valid(rgd)){
1227                        gfs2_consist_rgrpd(rgd);
1228                        error = -EIO;
1229                        goto fail;
1230                }
1231                if (rgd->rd_rgl->rl_unlinked == 0)
1232                        rgd->rd_flags &= ~GFS2_RDF_CHECK;
1233        }
1234        return 0;
1235
1236fail:
1237        while (x--) {
1238                bi = rgd->rd_bits + x;
1239                brelse(bi->bi_bh);
1240                bi->bi_bh = NULL;
1241                gfs2_assert_warn(sdp, !bi->bi_clone);
1242        }
1243
1244        return error;
1245}
1246
1247static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
1248{
1249        u32 rl_flags;
1250
1251        if (rgd->rd_flags & GFS2_RDF_UPTODATE)
1252                return 0;
1253
1254        if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
1255                return gfs2_rgrp_bh_get(rgd);
1256
1257        rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
1258        rl_flags &= ~GFS2_RDF_MASK;
1259        rgd->rd_flags &= GFS2_RDF_MASK;
1260        rgd->rd_flags |= (rl_flags | GFS2_RDF_CHECK);
1261        if (rgd->rd_rgl->rl_unlinked == 0)
1262                rgd->rd_flags &= ~GFS2_RDF_CHECK;
1263        rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
1264        rgd->rd_free_clone = rgd->rd_free;
1265        rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
1266        rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
1267        return 0;
1268}
1269
1270int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
1271{
1272        struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1273        struct gfs2_sbd *sdp = rgd->rd_sbd;
1274
1275        if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb)
1276                return 0;
1277        return gfs2_rgrp_bh_get(rgd);
1278}
1279
1280/**
1281 * gfs2_rgrp_brelse - Release RG bitmaps read in with gfs2_rgrp_bh_get()
1282 * @rgd: The resource group
1283 *
1284 */
1285
1286void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd)
1287{
1288        int x, length = rgd->rd_length;
1289
1290        for (x = 0; x < length; x++) {
1291                struct gfs2_bitmap *bi = rgd->rd_bits + x;
1292                if (bi->bi_bh) {
1293                        brelse(bi->bi_bh);
1294                        bi->bi_bh = NULL;
1295                }
1296        }
1297
1298}
1299
1300/**
1301 * gfs2_rgrp_go_unlock - Unlock a rgrp glock
1302 * @gh: The glock holder for the resource group
1303 *
1304 */
1305
1306void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
1307{
1308        struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1309        int demote_requested = test_bit(GLF_DEMOTE, &gh->gh_gl->gl_flags) |
1310                test_bit(GLF_PENDING_DEMOTE, &gh->gh_gl->gl_flags);
1311
1312        if (rgd && demote_requested)
1313                gfs2_rgrp_brelse(rgd);
1314}
1315
1316int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
1317                             struct buffer_head *bh,
1318                             const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
1319{
1320        struct super_block *sb = sdp->sd_vfs;
1321        u64 blk;
1322        sector_t start = 0;
1323        sector_t nr_blks = 0;
1324        int rv;
1325        unsigned int x;
1326        u32 trimmed = 0;
1327        u8 diff;
1328
1329        for (x = 0; x < bi->bi_bytes; x++) {
1330                const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
1331                clone += bi->bi_offset;
1332                clone += x;
1333                if (bh) {
1334                        const u8 *orig = bh->b_data + bi->bi_offset + x;
1335                        diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
1336                } else {
1337                        diff = ~(*clone | (*clone >> 1));
1338                }
1339                diff &= 0x55;
1340                if (diff == 0)
1341                        continue;
1342                blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
1343                while(diff) {
1344                        if (diff & 1) {
1345                                if (nr_blks == 0)
1346                                        goto start_new_extent;
1347                                if ((start + nr_blks) != blk) {
1348                                        if (nr_blks >= minlen) {
1349                                                rv = sb_issue_discard(sb,
1350                                                        start, nr_blks,
1351                                                        GFP_NOFS, 0);
1352                                                if (rv)
1353                                                        goto fail;
1354                                                trimmed += nr_blks;
1355                                        }
1356                                        nr_blks = 0;
1357start_new_extent:
1358                                        start = blk;
1359                                }
1360                                nr_blks++;
1361                        }
1362                        diff >>= 2;
1363                        blk++;
1364                }
1365        }
1366        if (nr_blks >= minlen) {
1367                rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0);
1368                if (rv)
1369                        goto fail;
1370                trimmed += nr_blks;
1371        }
1372        if (ptrimmed)
1373                *ptrimmed = trimmed;
1374        return 0;
1375
1376fail:
1377        if (sdp->sd_args.ar_discard)
1378                fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem\n", rv);
1379        sdp->sd_args.ar_discard = 0;
1380        return -EIO;
1381}
1382
1383/**
1384 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem
1385 * @filp: Any file on the filesystem
1386 * @argp: Pointer to the arguments (also used to pass result)
1387 *
1388 * Returns: 0 on success, otherwise error code
1389 */
1390
1391int gfs2_fitrim(struct file *filp, void __user *argp)
1392{
1393        struct inode *inode = file_inode(filp);
1394        struct gfs2_sbd *sdp = GFS2_SB(inode);
1395        struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
1396        struct buffer_head *bh;
1397        struct gfs2_rgrpd *rgd;
1398        struct gfs2_rgrpd *rgd_end;
1399        struct gfs2_holder gh;
1400        struct fstrim_range r;
1401        int ret = 0;
1402        u64 amt;
1403        u64 trimmed = 0;
1404        u64 start, end, minlen;
1405        unsigned int x;
1406        unsigned bs_shift = sdp->sd_sb.sb_bsize_shift;
1407
1408        if (!capable(CAP_SYS_ADMIN))
1409                return -EPERM;
1410
1411        if (!blk_queue_discard(q))
1412                return -EOPNOTSUPP;
1413
1414        if (copy_from_user(&r, argp, sizeof(r)))
1415                return -EFAULT;
1416
1417        ret = gfs2_rindex_update(sdp);
1418        if (ret)
1419                return ret;
1420
1421        start = r.start >> bs_shift;
1422        end = start + (r.len >> bs_shift);
1423        minlen = max_t(u64, r.minlen,
1424                       q->limits.discard_granularity) >> bs_shift;
1425
1426        if (end <= start || minlen > sdp->sd_max_rg_data)
1427                return -EINVAL;
1428
1429        rgd = gfs2_blk2rgrpd(sdp, start, 0);
1430        rgd_end = gfs2_blk2rgrpd(sdp, end, 0);
1431
1432        if ((gfs2_rgrpd_get_first(sdp) == gfs2_rgrpd_get_next(rgd_end))
1433            && (start > rgd_end->rd_data0 + rgd_end->rd_data))
1434                return -EINVAL; /* start is beyond the end of the fs */
1435
1436        while (1) {
1437
1438                ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
1439                if (ret)
1440                        goto out;
1441
1442                if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) {
1443                        /* Trim each bitmap in the rgrp */
1444                        for (x = 0; x < rgd->rd_length; x++) {
1445                                struct gfs2_bitmap *bi = rgd->rd_bits + x;
1446                                ret = gfs2_rgrp_send_discards(sdp,
1447                                                rgd->rd_data0, NULL, bi, minlen,
1448                                                &amt);
1449                                if (ret) {
1450                                        gfs2_glock_dq_uninit(&gh);
1451                                        goto out;
1452                                }
1453                                trimmed += amt;
1454                        }
1455
1456                        /* Mark rgrp as having been trimmed */
1457                        ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
1458                        if (ret == 0) {
1459                                bh = rgd->rd_bits[0].bi_bh;
1460                                rgd->rd_flags |= GFS2_RGF_TRIMMED;
1461                                gfs2_trans_add_meta(rgd->rd_gl, bh);
1462                                gfs2_rgrp_out(rgd, bh->b_data);
1463                                gfs2_trans_end(sdp);
1464                        }
1465                }
1466                gfs2_glock_dq_uninit(&gh);
1467
1468                if (rgd == rgd_end)
1469                        break;
1470
1471                rgd = gfs2_rgrpd_get_next(rgd);
1472        }
1473
1474out:
1475        r.len = trimmed << bs_shift;
1476        if (copy_to_user(argp, &r, sizeof(r)))
1477                return -EFAULT;
1478
1479        return ret;
1480}
1481
1482/**
1483 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
1484 * @ip: the inode structure
1485 *
1486 */
1487static void rs_insert(struct gfs2_inode *ip)
1488{
1489        struct rb_node **newn, *parent = NULL;
1490        int rc;
1491        struct gfs2_blkreserv *rs = &ip->i_res;
1492        struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
1493        u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
1494
1495        BUG_ON(gfs2_rs_active(rs));
1496
1497        spin_lock(&rgd->rd_rsspin);
1498        newn = &rgd->rd_rstree.rb_node;
1499        while (*newn) {
1500                struct gfs2_blkreserv *cur =
1501                        rb_entry(*newn, struct gfs2_blkreserv, rs_node);
1502
1503                parent = *newn;
1504                rc = rs_cmp(fsblock, rs->rs_free, cur);
1505                if (rc > 0)
1506                        newn = &((*newn)->rb_right);
1507                else if (rc < 0)
1508                        newn = &((*newn)->rb_left);
1509                else {
1510                        spin_unlock(&rgd->rd_rsspin);
1511                        WARN_ON(1);
1512                        return;
1513                }
1514        }
1515
1516        rb_link_node(&rs->rs_node, parent, newn);
1517        rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
1518
1519        /* Do our rgrp accounting for the reservation */
1520        rgd->rd_reserved += rs->rs_free; /* blocks reserved */
1521        spin_unlock(&rgd->rd_rsspin);
1522        trace_gfs2_rs(rs, TRACE_RS_INSERT);
1523}
1524
1525/**
1526 * rgd_free - return the number of free blocks we can allocate.
1527 * @rgd: the resource group
1528 *
1529 * This function returns the number of free blocks for an rgrp.
1530 * That's the clone-free blocks (blocks that are free, not including those
1531 * still being used for unlinked files that haven't been deleted.)
1532 *
1533 * It also subtracts any blocks reserved by someone else, but does not
1534 * include free blocks that are still part of our current reservation,
1535 * because obviously we can (and will) allocate them.
1536 */
1537static inline u32 rgd_free(struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *rs)
1538{
1539        u32 tot_reserved, tot_free;
1540
1541        if (WARN_ON_ONCE(rgd->rd_reserved < rs->rs_free))
1542                return 0;
1543        tot_reserved = rgd->rd_reserved - rs->rs_free;
1544
1545        if (rgd->rd_free_clone < tot_reserved)
1546                tot_reserved = 0;
1547
1548        tot_free = rgd->rd_free_clone - tot_reserved;
1549
1550        return tot_free;
1551}
1552
1553/**
1554 * rg_mblk_search - find a group of multiple free blocks to form a reservation
1555 * @rgd: the resource group descriptor
1556 * @ip: pointer to the inode for which we're reserving blocks
1557 * @ap: the allocation parameters
1558 *
1559 */
1560
1561static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
1562                           const struct gfs2_alloc_parms *ap)
1563{
1564        struct gfs2_rbm rbm = { .rgd = rgd, };
1565        u64 goal;
1566        struct gfs2_blkreserv *rs = &ip->i_res;
1567        u32 extlen;
1568        u32 free_blocks = rgd_free(rgd, rs);
1569        int ret;
1570        struct inode *inode = &ip->i_inode;
1571
1572        if (S_ISDIR(inode->i_mode))
1573                extlen = 1;
1574        else {
1575                extlen = max_t(u32, atomic_read(&ip->i_sizehint), ap->target);
1576                extlen = clamp(extlen, (u32)RGRP_RSRV_MINBLKS, free_blocks);
1577        }
1578        if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
1579                return;
1580
1581        /* Find bitmap block that contains bits for goal block */
1582        if (rgrp_contains_block(rgd, ip->i_goal))
1583                goal = ip->i_goal;
1584        else
1585                goal = rgd->rd_last_alloc + rgd->rd_data0;
1586
1587        if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
1588                return;
1589
1590        ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true);
1591        if (ret == 0) {
1592                rs->rs_rbm = rbm;
1593                rs->rs_free = extlen;
1594                rs_insert(ip);
1595        } else {
1596                if (goal == rgd->rd_last_alloc + rgd->rd_data0)
1597                        rgd->rd_last_alloc = 0;
1598        }
1599}
1600
1601/**
1602 * gfs2_next_unreserved_block - Return next block that is not reserved
1603 * @rgd: The resource group
1604 * @block: The starting block
1605 * @length: The required length
1606 * @ip: Ignore any reservations for this inode
1607 *
1608 * If the block does not appear in any reservation, then return the
1609 * block number unchanged. If it does appear in the reservation, then
1610 * keep looking through the tree of reservations in order to find the
1611 * first block number which is not reserved.
1612 */
1613
1614static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
1615                                      u32 length,
1616                                      const struct gfs2_inode *ip)
1617{
1618        struct gfs2_blkreserv *rs;
1619        struct rb_node *n;
1620        int rc;
1621
1622        spin_lock(&rgd->rd_rsspin);
1623        n = rgd->rd_rstree.rb_node;
1624        while (n) {
1625                rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1626                rc = rs_cmp(block, length, rs);
1627                if (rc < 0)
1628                        n = n->rb_left;
1629                else if (rc > 0)
1630                        n = n->rb_right;
1631                else
1632                        break;
1633        }
1634
1635        if (n) {
1636                while ((rs_cmp(block, length, rs) == 0) && (&ip->i_res != rs)) {
1637                        block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
1638                        n = n->rb_right;
1639                        if (n == NULL)
1640                                break;
1641                        rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1642                }
1643        }
1644
1645        spin_unlock(&rgd->rd_rsspin);
1646        return block;
1647}
1648
1649/**
1650 * gfs2_reservation_check_and_update - Check for reservations during block alloc
1651 * @rbm: The current position in the resource group
1652 * @ip: The inode for which we are searching for blocks
1653 * @minext: The minimum extent length
1654 * @maxext: A pointer to the maximum extent structure
1655 *
1656 * This checks the current position in the rgrp to see whether there is
1657 * a reservation covering this block. If not then this function is a
1658 * no-op. If there is, then the position is moved to the end of the
1659 * contiguous reservation(s) so that we are pointing at the first
1660 * non-reserved block.
1661 *
1662 * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error
1663 */
1664
1665static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
1666                                             const struct gfs2_inode *ip,
1667                                             u32 minext,
1668                                             struct gfs2_extent *maxext)
1669{
1670        u64 block = gfs2_rbm_to_block(rbm);
1671        u32 extlen = 1;
1672        u64 nblock;
1673        int ret;
1674
1675        /*
1676         * If we have a minimum extent length, then skip over any extent
1677         * which is less than the min extent length in size.
1678         */
1679        if (minext) {
1680                extlen = gfs2_free_extlen(rbm, minext);
1681                if (extlen <= maxext->len)
1682                        goto fail;
1683        }
1684
1685        /*
1686         * Check the extent which has been found against the reservations
1687         * and skip if parts of it are already reserved
1688         */
1689        nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
1690        if (nblock == block) {
1691                if (!minext || extlen >= minext)
1692                        return 0;
1693
1694                if (extlen > maxext->len) {
1695                        maxext->len = extlen;
1696                        maxext->rbm = *rbm;
1697                }
1698fail:
1699                nblock = block + extlen;
1700        }
1701        ret = gfs2_rbm_from_block(rbm, nblock);
1702        if (ret < 0)
1703                return ret;
1704        return 1;
1705}
1706
1707/**
1708 * gfs2_rbm_find - Look for blocks of a particular state
1709 * @rbm: Value/result starting position and final position
1710 * @state: The state which we want to find
1711 * @minext: Pointer to the requested extent length (NULL for a single block)
1712 *          This is updated to be the actual reservation size.
1713 * @ip: If set, check for reservations
1714 * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
1715 *          around until we've reached the starting point.
1716 *
1717 * Side effects:
1718 * - If looking for free blocks, we set GBF_FULL on each bitmap which
1719 *   has no free blocks in it.
1720 * - If looking for free blocks, we set rd_extfail_pt on each rgrp which
1721 *   has come up short on a free block search.
1722 *
1723 * Returns: 0 on success, -ENOSPC if there is no block of the requested state
1724 */
1725
1726static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
1727                         const struct gfs2_inode *ip, bool nowrap)
1728{
1729        bool scan_from_start = rbm->bii == 0 && rbm->offset == 0;
1730        struct buffer_head *bh;
1731        int last_bii;
1732        u32 offset;
1733        u8 *buffer;
1734        bool wrapped = false;
1735        int ret;
1736        struct gfs2_bitmap *bi;
1737        struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, };
1738
1739        /*
1740         * Determine the last bitmap to search.  If we're not starting at the
1741         * beginning of a bitmap, we need to search that bitmap twice to scan
1742         * the entire resource group.
1743         */
1744        last_bii = rbm->bii - (rbm->offset == 0);
1745
1746        while(1) {
1747                bi = rbm_bi(rbm);
1748                if ((ip == NULL || !gfs2_rs_active(&ip->i_res)) &&
1749                    test_bit(GBF_FULL, &bi->bi_flags) &&
1750                    (state == GFS2_BLKST_FREE))
1751                        goto next_bitmap;
1752
1753                bh = bi->bi_bh;
1754                buffer = bh->b_data + bi->bi_offset;
1755                WARN_ON(!buffer_uptodate(bh));
1756                if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1757                        buffer = bi->bi_clone + bi->bi_offset;
1758                offset = gfs2_bitfit(buffer, bi->bi_bytes, rbm->offset, state);
1759                if (offset == BFITNOENT) {
1760                        if (state == GFS2_BLKST_FREE && rbm->offset == 0)
1761                                set_bit(GBF_FULL, &bi->bi_flags);
1762                        goto next_bitmap;
1763                }
1764                rbm->offset = offset;
1765                if (ip == NULL)
1766                        return 0;
1767
1768                ret = gfs2_reservation_check_and_update(rbm, ip,
1769                                                        minext ? *minext : 0,
1770                                                        &maxext);
1771                if (ret == 0)
1772                        return 0;
1773                if (ret > 0)
1774                        goto next_iter;
1775                if (ret == -E2BIG) {
1776                        rbm->bii = 0;
1777                        rbm->offset = 0;
1778                        goto res_covered_end_of_rgrp;
1779                }
1780                return ret;
1781
1782next_bitmap:    /* Find next bitmap in the rgrp */
1783                rbm->offset = 0;
1784                rbm->bii++;
1785                if (rbm->bii == rbm->rgd->rd_length)
1786                        rbm->bii = 0;
1787res_covered_end_of_rgrp:
1788                if (rbm->bii == 0) {
1789                        if (wrapped)
1790                                break;
1791                        wrapped = true;
1792                        if (nowrap)
1793                                break;
1794                }
1795next_iter:
1796                /* Have we scanned the entire resource group? */
1797                if (wrapped && rbm->bii > last_bii)
1798                        break;
1799        }
1800
1801        if (minext == NULL || state != GFS2_BLKST_FREE)
1802                return -ENOSPC;
1803
1804        /* If the extent was too small, and it's smaller than the smallest
1805           to have failed before, remember for future reference that it's
1806           useless to search this rgrp again for this amount or more. */
1807        if (wrapped && (scan_from_start || rbm->bii > last_bii) &&
1808            *minext < rbm->rgd->rd_extfail_pt)
1809                rbm->rgd->rd_extfail_pt = *minext;
1810
1811        /* If the maximum extent we found is big enough to fulfill the
1812           minimum requirements, use it anyway. */
1813        if (maxext.len) {
1814                *rbm = maxext.rbm;
1815                *minext = maxext.len;
1816                return 0;
1817        }
1818
1819        return -ENOSPC;
1820}
1821
1822/**
1823 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
1824 * @rgd: The rgrp
1825 * @last_unlinked: block address of the last dinode we unlinked
1826 * @skip: block address we should explicitly not unlink
1827 *
1828 * Returns: 0 if no error
1829 *          The inode, if one has been found, in inode.
1830 */
1831
1832static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
1833{
1834        u64 block;
1835        struct gfs2_sbd *sdp = rgd->rd_sbd;
1836        struct gfs2_glock *gl;
1837        struct gfs2_inode *ip;
1838        int error;
1839        int found = 0;
1840        struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 };
1841
1842        while (1) {
1843                down_write(&sdp->sd_log_flush_lock);
1844                error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
1845                                      true);
1846                up_write(&sdp->sd_log_flush_lock);
1847                if (error == -ENOSPC)
1848                        break;
1849                if (WARN_ON_ONCE(error))
1850                        break;
1851
1852                block = gfs2_rbm_to_block(&rbm);
1853                if (gfs2_rbm_from_block(&rbm, block + 1))
1854                        break;
1855                if (*last_unlinked != NO_BLOCK && block <= *last_unlinked)
1856                        continue;
1857                if (block == skip)
1858                        continue;
1859                *last_unlinked = block;
1860
1861                error = gfs2_glock_get(sdp, block, &gfs2_iopen_glops, CREATE, &gl);
1862                if (error)
1863                        continue;
1864
1865                /* If the inode is already in cache, we can ignore it here
1866                 * because the existing inode disposal code will deal with
1867                 * it when all refs have gone away. Accessing gl_object like
1868                 * this is not safe in general. Here it is ok because we do
1869                 * not dereference the pointer, and we only need an approx
1870                 * answer to whether it is NULL or not.
1871                 */
1872                ip = gl->gl_object;
1873
1874                if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
1875                        gfs2_glock_put(gl);
1876                else
1877                        found++;
1878
1879                /* Limit reclaim to sensible number of tasks */
1880                if (found > NR_CPUS)
1881                        return;
1882        }
1883
1884        rgd->rd_flags &= ~GFS2_RDF_CHECK;
1885        return;
1886}
1887
1888/**
1889 * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
1890 * @rgd: The rgrp in question
1891 * @loops: An indication of how picky we can be (0=very, 1=less so)
1892 *
1893 * This function uses the recently added glock statistics in order to
1894 * figure out whether a parciular resource group is suffering from
1895 * contention from multiple nodes. This is done purely on the basis
1896 * of timings, since this is the only data we have to work with and
1897 * our aim here is to reject a resource group which is highly contended
1898 * but (very important) not to do this too often in order to ensure that
1899 * we do not land up introducing fragmentation by changing resource
1900 * groups when not actually required.
1901 *
1902 * The calculation is fairly simple, we want to know whether the SRTTB
1903 * (i.e. smoothed round trip time for blocking operations) to acquire
1904 * the lock for this rgrp's glock is significantly greater than the
1905 * time taken for resource groups on average. We introduce a margin in
1906 * the form of the variable @var which is computed as the sum of the two
1907 * respective variences, and multiplied by a factor depending on @loops
1908 * and whether we have a lot of data to base the decision on. This is
1909 * then tested against the square difference of the means in order to
1910 * decide whether the result is statistically significant or not.
1911 *
1912 * Returns: A boolean verdict on the congestion status
1913 */
1914
1915static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
1916{
1917        const struct gfs2_glock *gl = rgd->rd_gl;
1918        const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
1919        struct gfs2_lkstats *st;
1920        u64 r_dcount, l_dcount;
1921        u64 l_srttb, a_srttb = 0;
1922        s64 srttb_diff;
1923        u64 sqr_diff;
1924        u64 var;
1925        int cpu, nonzero = 0;
1926
1927        preempt_disable();
1928        for_each_present_cpu(cpu) {
1929                st = &per_cpu_ptr(sdp->sd_lkstats, cpu)->lkstats[LM_TYPE_RGRP];
1930                if (st->stats[GFS2_LKS_SRTTB]) {
1931                        a_srttb += st->stats[GFS2_LKS_SRTTB];
1932                        nonzero++;
1933                }
1934        }
1935        st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
1936        if (nonzero)
1937                do_div(a_srttb, nonzero);
1938        r_dcount = st->stats[GFS2_LKS_DCOUNT];
1939        var = st->stats[GFS2_LKS_SRTTVARB] +
1940              gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
1941        preempt_enable();
1942
1943        l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
1944        l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
1945
1946        if ((l_dcount < 1) || (r_dcount < 1) || (a_srttb == 0))
1947                return false;
1948
1949        srttb_diff = a_srttb - l_srttb;
1950        sqr_diff = srttb_diff * srttb_diff;
1951
1952        var *= 2;
1953        if (l_dcount < 8 || r_dcount < 8)
1954                var *= 2;
1955        if (loops == 1)
1956                var *= 2;
1957
1958        return ((srttb_diff < 0) && (sqr_diff > var));
1959}
1960
1961/**
1962 * gfs2_rgrp_used_recently
1963 * @rs: The block reservation with the rgrp to test
1964 * @msecs: The time limit in milliseconds
1965 *
1966 * Returns: True if the rgrp glock has been used within the time limit
1967 */
1968static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
1969                                    u64 msecs)
1970{
1971        u64 tdiff;
1972
1973        tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
1974                            rs->rs_rbm.rgd->rd_gl->gl_dstamp));
1975
1976        return tdiff > (msecs * 1000 * 1000);
1977}
1978
1979static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
1980{
1981        const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1982        u32 skip;
1983
1984        get_random_bytes(&skip, sizeof(skip));
1985        return skip % sdp->sd_rgrps;
1986}
1987
1988static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
1989{
1990        struct gfs2_rgrpd *rgd = *pos;
1991        struct gfs2_sbd *sdp = rgd->rd_sbd;
1992
1993        rgd = gfs2_rgrpd_get_next(rgd);
1994        if (rgd == NULL)
1995                rgd = gfs2_rgrpd_get_first(sdp);
1996        *pos = rgd;
1997        if (rgd != begin) /* If we didn't wrap */
1998                return true;
1999        return false;
2000}
2001
2002/**
2003 * fast_to_acquire - determine if a resource group will be fast to acquire
2004 *
2005 * If this is one of our preferred rgrps, it should be quicker to acquire,
2006 * because we tried to set ourselves up as dlm lock master.
2007 */
2008static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
2009{
2010        struct gfs2_glock *gl = rgd->rd_gl;
2011
2012        if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) &&
2013            !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
2014            !test_bit(GLF_DEMOTE, &gl->gl_flags))
2015                return 1;
2016        if (rgd->rd_flags & GFS2_RDF_PREFERRED)
2017                return 1;
2018        return 0;
2019}
2020
2021/**
2022 * gfs2_inplace_reserve - Reserve space in the filesystem
2023 * @ip: the inode to reserve space for
2024 * @ap: the allocation parameters
2025 *
2026 * We try our best to find an rgrp that has at least ap->target blocks
2027 * available. After a couple of passes (loops == 2), the prospects of finding
2028 * such an rgrp diminish. At this stage, we return the first rgrp that has
2029 * at least ap->min_target blocks available. Either way, we set ap->allowed to
2030 * the number of blocks available in the chosen rgrp.
2031 *
2032 * Returns: 0 on success,
2033 *          -ENOMEM if a suitable rgrp can't be found
2034 *          errno otherwise
2035 */
2036
2037int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
2038{
2039        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2040        struct gfs2_rgrpd *begin = NULL;
2041        struct gfs2_blkreserv *rs = &ip->i_res;
2042        int error = 0, rg_locked, flags = 0;
2043        u64 last_unlinked = NO_BLOCK;
2044        int loops = 0;
2045        u32 free_blocks, skip = 0;
2046
2047        if (sdp->sd_args.ar_rgrplvb)
2048                flags |= GL_SKIP;
2049        if (gfs2_assert_warn(sdp, ap->target))
2050                return -EINVAL;
2051        if (gfs2_rs_active(rs)) {
2052                begin = rs->rs_rbm.rgd;
2053        } else if (rs->rs_rbm.rgd &&
2054                   rgrp_contains_block(rs->rs_rbm.rgd, ip->i_goal)) {
2055                begin = rs->rs_rbm.rgd;
2056        } else {
2057                check_and_update_goal(ip);
2058                rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
2059        }
2060        if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV))
2061                skip = gfs2_orlov_skip(ip);
2062        if (rs->rs_rbm.rgd == NULL)
2063                return -EBADSLT;
2064
2065        while (loops < 3) {
2066                rg_locked = 1;
2067
2068                if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
2069                        rg_locked = 0;
2070                        if (skip && skip--)
2071                                goto next_rgrp;
2072                        if (!gfs2_rs_active(rs)) {
2073                                if (loops == 0 &&
2074                                    !fast_to_acquire(rs->rs_rbm.rgd))
2075                                        goto next_rgrp;
2076                                if ((loops < 2) &&
2077                                    gfs2_rgrp_used_recently(rs, 1000) &&
2078                                    gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
2079                                        goto next_rgrp;
2080                        }
2081                        error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
2082                                                   LM_ST_EXCLUSIVE, flags,
2083                                                   &ip->i_rgd_gh);
2084                        if (unlikely(error))
2085                                return error;
2086                        if (!gfs2_rs_active(rs) && (loops < 2) &&
2087                            gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
2088                                goto skip_rgrp;
2089                        if (sdp->sd_args.ar_rgrplvb) {
2090                                error = update_rgrp_lvb(rs->rs_rbm.rgd);
2091                                if (unlikely(error)) {
2092                                        gfs2_glock_dq_uninit(&ip->i_rgd_gh);
2093                                        return error;
2094                                }
2095                        }
2096                }
2097
2098                /* Skip unusable resource groups */
2099                if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
2100                                                 GFS2_RDF_ERROR)) ||
2101                    (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
2102                        goto skip_rgrp;
2103
2104                if (sdp->sd_args.ar_rgrplvb)
2105                        gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
2106
2107                /* Get a reservation if we don't already have one */
2108                if (!gfs2_rs_active(rs))
2109                        rg_mblk_search(rs->rs_rbm.rgd, ip, ap);
2110
2111                /* Skip rgrps when we can't get a reservation on first pass */
2112                if (!gfs2_rs_active(rs) && (loops < 1))
2113                        goto check_rgrp;
2114
2115                /* If rgrp has enough free space, use it */
2116                free_blocks = rgd_free(rs->rs_rbm.rgd, rs);
2117                if (free_blocks >= ap->target ||
2118                    (loops == 2 && ap->min_target &&
2119                     free_blocks >= ap->min_target)) {
2120                        ap->allowed = free_blocks;
2121                        return 0;
2122                }
2123check_rgrp:
2124                /* Check for unlinked inodes which can be reclaimed */
2125                if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
2126                        try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
2127                                        ip->i_no_addr);
2128skip_rgrp:
2129                /* Drop reservation, if we couldn't use reserved rgrp */
2130                if (gfs2_rs_active(rs))
2131                        gfs2_rs_deltree(rs);
2132
2133                /* Unlock rgrp if required */
2134                if (!rg_locked)
2135                        gfs2_glock_dq_uninit(&ip->i_rgd_gh);
2136next_rgrp:
2137                /* Find the next rgrp, and continue looking */
2138                if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
2139                        continue;
2140                if (skip)
2141                        continue;
2142
2143                /* If we've scanned all the rgrps, but found no free blocks
2144                 * then this checks for some less likely conditions before
2145                 * trying again.
2146                 */
2147                loops++;
2148                /* Check that fs hasn't grown if writing to rindex */
2149                if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
2150                        error = gfs2_ri_update(ip);
2151                        if (error)
2152                                return error;
2153                }
2154                /* Flushing the log may release space */
2155                if (loops == 2)
2156                        gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
2157                                       GFS2_LFC_INPLACE_RESERVE);
2158        }
2159
2160        return -ENOSPC;
2161}
2162
2163/**
2164 * gfs2_inplace_release - release an inplace reservation
2165 * @ip: the inode the reservation was taken out on
2166 *
2167 * Release a reservation made by gfs2_inplace_reserve().
2168 */
2169
2170void gfs2_inplace_release(struct gfs2_inode *ip)
2171{
2172        if (gfs2_holder_initialized(&ip->i_rgd_gh))
2173                gfs2_glock_dq_uninit(&ip->i_rgd_gh);
2174}
2175
2176/**
2177 * gfs2_alloc_extent - allocate an extent from a given bitmap
2178 * @rbm: the resource group information
2179 * @dinode: TRUE if the first block we allocate is for a dinode
2180 * @n: The extent length (value/result)
2181 *
2182 * Add the bitmap buffer to the transaction.
2183 * Set the found bits to @new_state to change block's allocation state.
2184 */
2185static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
2186                             unsigned int *n)
2187{
2188        struct gfs2_rbm pos = { .rgd = rbm->rgd, };
2189        const unsigned int elen = *n;
2190        u64 block;
2191        int ret;
2192
2193        *n = 1;
2194        block = gfs2_rbm_to_block(rbm);
2195        gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh);
2196        gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2197        block++;
2198        while (*n < elen) {
2199                ret = gfs2_rbm_from_block(&pos, block);
2200                if (ret || gfs2_testbit(&pos, true) != GFS2_BLKST_FREE)
2201                        break;
2202                gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh);
2203                gfs2_setbit(&pos, true, GFS2_BLKST_USED);
2204                (*n)++;
2205                block++;
2206        }
2207}
2208
2209/**
2210 * rgblk_free - Change alloc state of given block(s)
2211 * @sdp: the filesystem
2212 * @rgd: the resource group the blocks are in
2213 * @bstart: the start of a run of blocks to free
2214 * @blen: the length of the block run (all must lie within ONE RG!)
2215 * @new_state: GFS2_BLKST_XXX the after-allocation block state
2216 */
2217
2218static void rgblk_free(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd,
2219                       u64 bstart, u32 blen, unsigned char new_state)
2220{
2221        struct gfs2_rbm rbm;
2222        struct gfs2_bitmap *bi, *bi_prev = NULL;
2223
2224        rbm.rgd = rgd;
2225        if (WARN_ON_ONCE(gfs2_rbm_from_block(&rbm, bstart)))
2226                return;
2227        while (blen--) {
2228                bi = rbm_bi(&rbm);
2229                if (bi != bi_prev) {
2230                        if (!bi->bi_clone) {
2231                                bi->bi_clone = kmalloc(bi->bi_bh->b_size,
2232                                                      GFP_NOFS | __GFP_NOFAIL);
2233                                memcpy(bi->bi_clone + bi->bi_offset,
2234                                       bi->bi_bh->b_data + bi->bi_offset,
2235                                       bi->bi_bytes);
2236                        }
2237                        gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh);
2238                        bi_prev = bi;
2239                }
2240                gfs2_setbit(&rbm, false, new_state);
2241                gfs2_rbm_incr(&rbm);
2242        }
2243}
2244
2245/**
2246 * gfs2_rgrp_dump - print out an rgrp
2247 * @seq: The iterator
2248 * @gl: The glock in question
2249 *
2250 */
2251
2252void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl)
2253{
2254        struct gfs2_rgrpd *rgd = gl->gl_object;
2255        struct gfs2_blkreserv *trs;
2256        const struct rb_node *n;
2257
2258        if (rgd == NULL)
2259                return;
2260        gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
2261                       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
2262                       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
2263                       rgd->rd_reserved, rgd->rd_extfail_pt);
2264        if (rgd->rd_sbd->sd_args.ar_rgrplvb) {
2265                struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
2266
2267                gfs2_print_dbg(seq, "  L: f:%02x b:%u i:%u\n",
2268                               be32_to_cpu(rgl->rl_flags),
2269                               be32_to_cpu(rgl->rl_free),
2270                               be32_to_cpu(rgl->rl_dinodes));
2271        }
2272        spin_lock(&rgd->rd_rsspin);
2273        for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
2274                trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
2275                dump_rs(seq, trs);
2276        }
2277        spin_unlock(&rgd->rd_rsspin);
2278}
2279
2280static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
2281{
2282        struct gfs2_sbd *sdp = rgd->rd_sbd;
2283        fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
2284                (unsigned long long)rgd->rd_addr);
2285        fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
2286        gfs2_rgrp_dump(NULL, rgd->rd_gl);
2287        rgd->rd_flags |= GFS2_RDF_ERROR;
2288}
2289
2290/**
2291 * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation
2292 * @ip: The inode we have just allocated blocks for
2293 * @rbm: The start of the allocated blocks
2294 * @len: The extent length
2295 *
2296 * Adjusts a reservation after an allocation has taken place. If the
2297 * reservation does not match the allocation, or if it is now empty
2298 * then it is removed.
2299 */
2300
2301static void gfs2_adjust_reservation(struct gfs2_inode *ip,
2302                                    const struct gfs2_rbm *rbm, unsigned len)
2303{
2304        struct gfs2_blkreserv *rs = &ip->i_res;
2305        struct gfs2_rgrpd *rgd = rbm->rgd;
2306        unsigned rlen;
2307        u64 block;
2308        int ret;
2309
2310        spin_lock(&rgd->rd_rsspin);
2311        if (gfs2_rs_active(rs)) {
2312                if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
2313                        block = gfs2_rbm_to_block(rbm);
2314                        ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
2315                        rlen = min(rs->rs_free, len);
2316                        rs->rs_free -= rlen;
2317                        rgd->rd_reserved -= rlen;
2318                        trace_gfs2_rs(rs, TRACE_RS_CLAIM);
2319                        if (rs->rs_free && !ret)
2320                                goto out;
2321                        /* We used up our block reservation, so we should
2322                           reserve more blocks next time. */
2323                        atomic_add(RGRP_RSRV_ADDBLKS, &ip->i_sizehint);
2324                }
2325                __rs_deltree(rs);
2326        }
2327out:
2328        spin_unlock(&rgd->rd_rsspin);
2329}
2330
2331/**
2332 * gfs2_set_alloc_start - Set starting point for block allocation
2333 * @rbm: The rbm which will be set to the required location
2334 * @ip: The gfs2 inode
2335 * @dinode: Flag to say if allocation includes a new inode
2336 *
2337 * This sets the starting point from the reservation if one is active
2338 * otherwise it falls back to guessing a start point based on the
2339 * inode's goal block or the last allocation point in the rgrp.
2340 */
2341
2342static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
2343                                 const struct gfs2_inode *ip, bool dinode)
2344{
2345        u64 goal;
2346
2347        if (gfs2_rs_active(&ip->i_res)) {
2348                *rbm = ip->i_res.rs_rbm;
2349                return;
2350        }
2351
2352        if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal))
2353                goal = ip->i_goal;
2354        else
2355                goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
2356
2357        if (WARN_ON_ONCE(gfs2_rbm_from_block(rbm, goal))) {
2358                rbm->bii = 0;
2359                rbm->offset = 0;
2360        }
2361}
2362
2363/**
2364 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
2365 * @ip: the inode to allocate the block for
2366 * @bn: Used to return the starting block number
2367 * @nblocks: requested number of blocks/extent length (value/result)
2368 * @dinode: 1 if we're allocating a dinode block, else 0
2369 * @generation: the generation number of the inode
2370 *
2371 * Returns: 0 or error
2372 */
2373
2374int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2375                      bool dinode, u64 *generation)
2376{
2377        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2378        struct buffer_head *dibh;
2379        struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rbm.rgd, };
2380        unsigned int ndata;
2381        u64 block; /* block, within the file system scope */
2382        int error;
2383
2384        gfs2_set_alloc_start(&rbm, ip, dinode);
2385        error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false);
2386
2387        if (error == -ENOSPC) {
2388                gfs2_set_alloc_start(&rbm, ip, dinode);
2389                error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false);
2390        }
2391
2392        /* Since all blocks are reserved in advance, this shouldn't happen */
2393        if (error) {
2394                fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d fail_pt=%d\n",
2395                        (unsigned long long)ip->i_no_addr, error, *nblocks,
2396                        test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags),
2397                        rbm.rgd->rd_extfail_pt);
2398                goto rgrp_error;
2399        }
2400
2401        gfs2_alloc_extent(&rbm, dinode, nblocks);
2402        block = gfs2_rbm_to_block(&rbm);
2403        rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
2404        if (gfs2_rs_active(&ip->i_res))
2405                gfs2_adjust_reservation(ip, &rbm, *nblocks);
2406        ndata = *nblocks;
2407        if (dinode)
2408                ndata--;
2409
2410        if (!dinode) {
2411                ip->i_goal = block + ndata - 1;
2412                error = gfs2_meta_inode_buffer(ip, &dibh);
2413                if (error == 0) {
2414                        struct gfs2_dinode *di =
2415                                (struct gfs2_dinode *)dibh->b_data;
2416                        gfs2_trans_add_meta(ip->i_gl, dibh);
2417                        di->di_goal_meta = di->di_goal_data =
2418                                cpu_to_be64(ip->i_goal);
2419                        brelse(dibh);
2420                }
2421        }
2422        if (rbm.rgd->rd_free < *nblocks) {
2423                fs_warn(sdp, "nblocks=%u\n", *nblocks);
2424                goto rgrp_error;
2425        }
2426
2427        rbm.rgd->rd_free -= *nblocks;
2428        if (dinode) {
2429                rbm.rgd->rd_dinodes++;
2430                *generation = rbm.rgd->rd_igeneration++;
2431                if (*generation == 0)
2432                        *generation = rbm.rgd->rd_igeneration++;
2433        }
2434
2435        gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
2436        gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
2437
2438        gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
2439        if (dinode)
2440                gfs2_trans_remove_revoke(sdp, block, *nblocks);
2441
2442        gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
2443
2444        rbm.rgd->rd_free_clone -= *nblocks;
2445        trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
2446                               dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2447        *bn = block;
2448        return 0;
2449
2450rgrp_error:
2451        gfs2_rgrp_error(rbm.rgd);
2452        return -EIO;
2453}
2454
2455/**
2456 * __gfs2_free_blocks - free a contiguous run of block(s)
2457 * @ip: the inode these blocks are being freed from
2458 * @rgd: the resource group the blocks are in
2459 * @bstart: first block of a run of contiguous blocks
2460 * @blen: the length of the block run
2461 * @meta: 1 if the blocks represent metadata
2462 *
2463 */
2464
2465void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
2466                        u64 bstart, u32 blen, int meta)
2467{
2468        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2469
2470        rgblk_free(sdp, rgd, bstart, blen, GFS2_BLKST_FREE);
2471        trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
2472        rgd->rd_free += blen;
2473        rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
2474        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2475        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2476
2477        /* Directories keep their data in the metadata address space */
2478        if (meta || ip->i_depth)
2479                gfs2_meta_wipe(ip, bstart, blen);
2480}
2481
2482/**
2483 * gfs2_free_meta - free a contiguous run of data block(s)
2484 * @ip: the inode these blocks are being freed from
2485 * @rgd: the resource group the blocks are in
2486 * @bstart: first block of a run of contiguous blocks
2487 * @blen: the length of the block run
2488 *
2489 */
2490
2491void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
2492                    u64 bstart, u32 blen)
2493{
2494        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2495
2496        __gfs2_free_blocks(ip, rgd, bstart, blen, 1);
2497        gfs2_statfs_change(sdp, 0, +blen, 0);
2498        gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
2499}
2500
2501void gfs2_unlink_di(struct inode *inode)
2502{
2503        struct gfs2_inode *ip = GFS2_I(inode);
2504        struct gfs2_sbd *sdp = GFS2_SB(inode);
2505        struct gfs2_rgrpd *rgd;
2506        u64 blkno = ip->i_no_addr;
2507
2508        rgd = gfs2_blk2rgrpd(sdp, blkno, true);
2509        if (!rgd)
2510                return;
2511        rgblk_free(sdp, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
2512        trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
2513        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2514        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2515        be32_add_cpu(&rgd->rd_rgl->rl_unlinked, 1);
2516}
2517
2518void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
2519{
2520        struct gfs2_sbd *sdp = rgd->rd_sbd;
2521
2522        rgblk_free(sdp, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
2523        if (!rgd->rd_dinodes)
2524                gfs2_consist_rgrpd(rgd);
2525        rgd->rd_dinodes--;
2526        rgd->rd_free++;
2527
2528        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2529        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2530        be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1);
2531
2532        gfs2_statfs_change(sdp, 0, +1, -1);
2533        trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
2534        gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
2535        gfs2_meta_wipe(ip, ip->i_no_addr, 1);
2536}
2537
2538/**
2539 * gfs2_check_blk_type - Check the type of a block
2540 * @sdp: The superblock
2541 * @no_addr: The block number to check
2542 * @type: The block type we are looking for
2543 *
2544 * Returns: 0 if the block type matches the expected type
2545 *          -ESTALE if it doesn't match
2546 *          or -ve errno if something went wrong while checking
2547 */
2548
2549int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
2550{
2551        struct gfs2_rgrpd *rgd;
2552        struct gfs2_holder rgd_gh;
2553        struct gfs2_rbm rbm;
2554        int error = -EINVAL;
2555
2556        rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
2557        if (!rgd)
2558                goto fail;
2559
2560        error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
2561        if (error)
2562                goto fail;
2563
2564        rbm.rgd = rgd;
2565        error = gfs2_rbm_from_block(&rbm, no_addr);
2566        if (WARN_ON_ONCE(error))
2567                goto fail;
2568
2569        if (gfs2_testbit(&rbm, false) != type)
2570                error = -ESTALE;
2571
2572        gfs2_glock_dq_uninit(&rgd_gh);
2573fail:
2574        return error;
2575}
2576
2577/**
2578 * gfs2_rlist_add - add a RG to a list of RGs
2579 * @ip: the inode
2580 * @rlist: the list of resource groups
2581 * @block: the block
2582 *
2583 * Figure out what RG a block belongs to and add that RG to the list
2584 *
2585 * FIXME: Don't use NOFAIL
2586 *
2587 */
2588
2589void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
2590                    u64 block)
2591{
2592        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2593        struct gfs2_rgrpd *rgd;
2594        struct gfs2_rgrpd **tmp;
2595        unsigned int new_space;
2596        unsigned int x;
2597
2598        if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
2599                return;
2600
2601        /*
2602         * The resource group last accessed is kept in the last position.
2603         */
2604
2605        if (rlist->rl_rgrps) {
2606                rgd = rlist->rl_rgd[rlist->rl_rgrps - 1];
2607                if (rgrp_contains_block(rgd, block))
2608                        return;
2609                rgd = gfs2_blk2rgrpd(sdp, block, 1);
2610        } else {
2611                rgd = ip->i_res.rs_rbm.rgd;
2612                if (!rgd || !rgrp_contains_block(rgd, block))
2613                        rgd = gfs2_blk2rgrpd(sdp, block, 1);
2614        }
2615
2616        if (!rgd) {
2617                fs_err(sdp, "rlist_add: no rgrp for block %llu\n",
2618                       (unsigned long long)block);
2619                return;
2620        }
2621
2622        for (x = 0; x < rlist->rl_rgrps; x++) {
2623                if (rlist->rl_rgd[x] == rgd) {
2624                        swap(rlist->rl_rgd[x],
2625                             rlist->rl_rgd[rlist->rl_rgrps - 1]);
2626                        return;
2627                }
2628        }
2629
2630        if (rlist->rl_rgrps == rlist->rl_space) {
2631                new_space = rlist->rl_space + 10;
2632
2633                tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
2634                              GFP_NOFS | __GFP_NOFAIL);
2635
2636                if (rlist->rl_rgd) {
2637                        memcpy(tmp, rlist->rl_rgd,
2638                               rlist->rl_space * sizeof(struct gfs2_rgrpd *));
2639                        kfree(rlist->rl_rgd);
2640                }
2641
2642                rlist->rl_space = new_space;
2643                rlist->rl_rgd = tmp;
2644        }
2645
2646        rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
2647}
2648
2649/**
2650 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
2651 *      and initialize an array of glock holders for them
2652 * @rlist: the list of resource groups
2653 *
2654 * FIXME: Don't use NOFAIL
2655 *
2656 */
2657
2658void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist)
2659{
2660        unsigned int x;
2661
2662        rlist->rl_ghs = kmalloc_array(rlist->rl_rgrps,
2663                                      sizeof(struct gfs2_holder),
2664                                      GFP_NOFS | __GFP_NOFAIL);
2665        for (x = 0; x < rlist->rl_rgrps; x++)
2666                gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
2667                                LM_ST_EXCLUSIVE, 0,
2668                                &rlist->rl_ghs[x]);
2669}
2670
2671/**
2672 * gfs2_rlist_free - free a resource group list
2673 * @rlist: the list of resource groups
2674 *
2675 */
2676
2677void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
2678{
2679        unsigned int x;
2680
2681        kfree(rlist->rl_rgd);
2682
2683        if (rlist->rl_ghs) {
2684                for (x = 0; x < rlist->rl_rgrps; x++)
2685                        gfs2_holder_uninit(&rlist->rl_ghs[x]);
2686                kfree(rlist->rl_ghs);
2687                rlist->rl_ghs = NULL;
2688        }
2689}
2690
2691