linux/fs/gfs2/rgrp.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   3 * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
   4 *
   5 * This copyrighted material is made available to anyone wishing to use,
   6 * modify, copy, or redistribute it subject to the terms and conditions
   7 * of the GNU General Public License version 2.
   8 */
   9
  10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  11
  12#include <linux/slab.h>
  13#include <linux/spinlock.h>
  14#include <linux/completion.h>
  15#include <linux/buffer_head.h>
  16#include <linux/fs.h>
  17#include <linux/gfs2_ondisk.h>
  18#include <linux/prefetch.h>
  19#include <linux/blkdev.h>
  20#include <linux/rbtree.h>
  21#include <linux/random.h>
  22
  23#include "gfs2.h"
  24#include "incore.h"
  25#include "glock.h"
  26#include "glops.h"
  27#include "lops.h"
  28#include "meta_io.h"
  29#include "quota.h"
  30#include "rgrp.h"
  31#include "super.h"
  32#include "trans.h"
  33#include "util.h"
  34#include "log.h"
  35#include "inode.h"
  36#include "trace_gfs2.h"
  37
  38#define BFITNOENT ((u32)~0)
  39#define NO_BLOCK ((u64)~0)
  40
  41#if BITS_PER_LONG == 32
  42#define LBITMASK   (0x55555555UL)
  43#define LBITSKIP55 (0x55555555UL)
  44#define LBITSKIP00 (0x00000000UL)
  45#else
  46#define LBITMASK   (0x5555555555555555UL)
  47#define LBITSKIP55 (0x5555555555555555UL)
  48#define LBITSKIP00 (0x0000000000000000UL)
  49#endif
  50
  51/*
  52 * These routines are used by the resource group routines (rgrp.c)
  53 * to keep track of block allocation.  Each block is represented by two
  54 * bits.  So, each byte represents GFS2_NBBY (i.e. 4) blocks.
  55 *
  56 * 0 = Free
  57 * 1 = Used (not metadata)
  58 * 2 = Unlinked (still in use) inode
  59 * 3 = Used (metadata)
  60 */
  61
  62struct gfs2_extent {
  63        struct gfs2_rbm rbm;
  64        u32 len;
  65};
  66
  67static const char valid_change[16] = {
  68                /* current */
  69        /* n */ 0, 1, 1, 1,
  70        /* e */ 1, 0, 0, 0,
  71        /* w */ 0, 0, 0, 1,
  72                1, 0, 0, 0
  73};
  74
  75static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
  76                         const struct gfs2_inode *ip, bool nowrap,
  77                         const struct gfs2_alloc_parms *ap);
  78
  79
  80/**
  81 * gfs2_setbit - Set a bit in the bitmaps
  82 * @rbm: The position of the bit to set
  83 * @do_clone: Also set the clone bitmap, if it exists
  84 * @new_state: the new state of the block
  85 *
  86 */
  87
  88static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
  89                               unsigned char new_state)
  90{
  91        unsigned char *byte1, *byte2, *end, cur_state;
  92        struct gfs2_bitmap *bi = rbm_bi(rbm);
  93        unsigned int buflen = bi->bi_len;
  94        const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
  95
  96        byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY);
  97        end = bi->bi_bh->b_data + bi->bi_offset + buflen;
  98
  99        BUG_ON(byte1 >= end);
 100
 101        cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
 102
 103        if (unlikely(!valid_change[new_state * 4 + cur_state])) {
 104                pr_warn("buf_blk = 0x%x old_state=%d, new_state=%d\n",
 105                        rbm->offset, cur_state, new_state);
 106                pr_warn("rgrp=0x%llx bi_start=0x%x\n",
 107                        (unsigned long long)rbm->rgd->rd_addr, bi->bi_start);
 108                pr_warn("bi_offset=0x%x bi_len=0x%x\n",
 109                        bi->bi_offset, bi->bi_len);
 110                dump_stack();
 111                gfs2_consist_rgrpd(rbm->rgd);
 112                return;
 113        }
 114        *byte1 ^= (cur_state ^ new_state) << bit;
 115
 116        if (do_clone && bi->bi_clone) {
 117                byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY);
 118                cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
 119                *byte2 ^= (cur_state ^ new_state) << bit;
 120        }
 121}
 122
 123/**
 124 * gfs2_testbit - test a bit in the bitmaps
 125 * @rbm: The bit to test
 126 *
 127 * Returns: The two bit block state of the requested bit
 128 */
 129
 130static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)
 131{
 132        struct gfs2_bitmap *bi = rbm_bi(rbm);
 133        const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset;
 134        const u8 *byte;
 135        unsigned int bit;
 136
 137        byte = buffer + (rbm->offset / GFS2_NBBY);
 138        bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
 139
 140        return (*byte >> bit) & GFS2_BIT_MASK;
 141}
 142
 143/**
 144 * gfs2_bit_search
 145 * @ptr: Pointer to bitmap data
 146 * @mask: Mask to use (normally 0x55555.... but adjusted for search start)
 147 * @state: The state we are searching for
 148 *
 149 * We xor the bitmap data with a patter which is the bitwise opposite
 150 * of what we are looking for, this gives rise to a pattern of ones
 151 * wherever there is a match. Since we have two bits per entry, we
 152 * take this pattern, shift it down by one place and then and it with
 153 * the original. All the even bit positions (0,2,4, etc) then represent
 154 * successful matches, so we mask with 0x55555..... to remove the unwanted
 155 * odd bit positions.
 156 *
 157 * This allows searching of a whole u64 at once (32 blocks) with a
 158 * single test (on 64 bit arches).
 159 */
 160
 161static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
 162{
 163        u64 tmp;
 164        static const u64 search[] = {
 165                [0] = 0xffffffffffffffffULL,
 166                [1] = 0xaaaaaaaaaaaaaaaaULL,
 167                [2] = 0x5555555555555555ULL,
 168                [3] = 0x0000000000000000ULL,
 169        };
 170        tmp = le64_to_cpu(*ptr) ^ search[state];
 171        tmp &= (tmp >> 1);
 172        tmp &= mask;
 173        return tmp;
 174}
 175
 176/**
 177 * rs_cmp - multi-block reservation range compare
 178 * @blk: absolute file system block number of the new reservation
 179 * @len: number of blocks in the new reservation
 180 * @rs: existing reservation to compare against
 181 *
 182 * returns: 1 if the block range is beyond the reach of the reservation
 183 *         -1 if the block range is before the start of the reservation
 184 *          0 if the block range overlaps with the reservation
 185 */
 186static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
 187{
 188        u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
 189
 190        if (blk >= startblk + rs->rs_free)
 191                return 1;
 192        if (blk + len - 1 < startblk)
 193                return -1;
 194        return 0;
 195}
 196
 197/**
 198 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
 199 *       a block in a given allocation state.
 200 * @buf: the buffer that holds the bitmaps
 201 * @len: the length (in bytes) of the buffer
 202 * @goal: start search at this block's bit-pair (within @buffer)
 203 * @state: GFS2_BLKST_XXX the state of the block we're looking for.
 204 *
 205 * Scope of @goal and returned block number is only within this bitmap buffer,
 206 * not entire rgrp or filesystem.  @buffer will be offset from the actual
 207 * beginning of a bitmap block buffer, skipping any header structures, but
 208 * headers are always a multiple of 64 bits long so that the buffer is
 209 * always aligned to a 64 bit boundary.
 210 *
 211 * The size of the buffer is in bytes, but is it assumed that it is
 212 * always ok to read a complete multiple of 64 bits at the end
 213 * of the block in case the end is no aligned to a natural boundary.
 214 *
 215 * Return: the block number (bitmap buffer scope) that was found
 216 */
 217
 218static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
 219                       u32 goal, u8 state)
 220{
 221        u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
 222        const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
 223        const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
 224        u64 tmp;
 225        u64 mask = 0x5555555555555555ULL;
 226        u32 bit;
 227
 228        /* Mask off bits we don't care about at the start of the search */
 229        mask <<= spoint;
 230        tmp = gfs2_bit_search(ptr, mask, state);
 231        ptr++;
 232        while(tmp == 0 && ptr < end) {
 233                tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
 234                ptr++;
 235        }
 236        /* Mask off any bits which are more than len bytes from the start */
 237        if (ptr == end && (len & (sizeof(u64) - 1)))
 238                tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1))));
 239        /* Didn't find anything, so return */
 240        if (tmp == 0)
 241                return BFITNOENT;
 242        ptr--;
 243        bit = __ffs64(tmp);
 244        bit /= 2;       /* two bits per entry in the bitmap */
 245        return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
 246}
 247
 248/**
 249 * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
 250 * @rbm: The rbm with rgd already set correctly
 251 * @block: The block number (filesystem relative)
 252 *
 253 * This sets the bi and offset members of an rbm based on a
 254 * resource group and a filesystem relative block number. The
 255 * resource group must be set in the rbm on entry, the bi and
 256 * offset members will be set by this function.
 257 *
 258 * Returns: 0 on success, or an error code
 259 */
 260
 261static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
 262{
 263        u64 rblock = block - rbm->rgd->rd_data0;
 264
 265        if (WARN_ON_ONCE(rblock > UINT_MAX))
 266                return -EINVAL;
 267        if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
 268                return -E2BIG;
 269
 270        rbm->bii = 0;
 271        rbm->offset = (u32)(rblock);
 272        /* Check if the block is within the first block */
 273        if (rbm->offset < rbm_bi(rbm)->bi_blocks)
 274                return 0;
 275
 276        /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */
 277        rbm->offset += (sizeof(struct gfs2_rgrp) -
 278                        sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
 279        rbm->bii = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
 280        rbm->offset -= rbm->bii * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
 281        return 0;
 282}
 283
 284/**
 285 * gfs2_rbm_incr - increment an rbm structure
 286 * @rbm: The rbm with rgd already set correctly
 287 *
 288 * This function takes an existing rbm structure and increments it to the next
 289 * viable block offset.
 290 *
 291 * Returns: If incrementing the offset would cause the rbm to go past the
 292 *          end of the rgrp, true is returned, otherwise false.
 293 *
 294 */
 295
 296static bool gfs2_rbm_incr(struct gfs2_rbm *rbm)
 297{
 298        if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */
 299                rbm->offset++;
 300                return false;
 301        }
 302        if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */
 303                return true;
 304
 305        rbm->offset = 0;
 306        rbm->bii++;
 307        return false;
 308}
 309
 310/**
 311 * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
 312 * @rbm: Position to search (value/result)
 313 * @n_unaligned: Number of unaligned blocks to check
 314 * @len: Decremented for each block found (terminate on zero)
 315 *
 316 * Returns: true if a non-free block is encountered
 317 */
 318
 319static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
 320{
 321        u32 n;
 322        u8 res;
 323
 324        for (n = 0; n < n_unaligned; n++) {
 325                res = gfs2_testbit(rbm);
 326                if (res != GFS2_BLKST_FREE)
 327                        return true;
 328                (*len)--;
 329                if (*len == 0)
 330                        return true;
 331                if (gfs2_rbm_incr(rbm))
 332                        return true;
 333        }
 334
 335        return false;
 336}
 337
 338/**
 339 * gfs2_free_extlen - Return extent length of free blocks
 340 * @rrbm: Starting position
 341 * @len: Max length to check
 342 *
 343 * Starting at the block specified by the rbm, see how many free blocks
 344 * there are, not reading more than len blocks ahead. This can be done
 345 * using memchr_inv when the blocks are byte aligned, but has to be done
 346 * on a block by block basis in case of unaligned blocks. Also this
 347 * function can cope with bitmap boundaries (although it must stop on
 348 * a resource group boundary)
 349 *
 350 * Returns: Number of free blocks in the extent
 351 */
 352
 353static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
 354{
 355        struct gfs2_rbm rbm = *rrbm;
 356        u32 n_unaligned = rbm.offset & 3;
 357        u32 size = len;
 358        u32 bytes;
 359        u32 chunk_size;
 360        u8 *ptr, *start, *end;
 361        u64 block;
 362        struct gfs2_bitmap *bi;
 363
 364        if (n_unaligned &&
 365            gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
 366                goto out;
 367
 368        n_unaligned = len & 3;
 369        /* Start is now byte aligned */
 370        while (len > 3) {
 371                bi = rbm_bi(&rbm);
 372                start = bi->bi_bh->b_data;
 373                if (bi->bi_clone)
 374                        start = bi->bi_clone;
 375                end = start + bi->bi_bh->b_size;
 376                start += bi->bi_offset;
 377                BUG_ON(rbm.offset & 3);
 378                start += (rbm.offset / GFS2_NBBY);
 379                bytes = min_t(u32, len / GFS2_NBBY, (end - start));
 380                ptr = memchr_inv(start, 0, bytes);
 381                chunk_size = ((ptr == NULL) ? bytes : (ptr - start));
 382                chunk_size *= GFS2_NBBY;
 383                BUG_ON(len < chunk_size);
 384                len -= chunk_size;
 385                block = gfs2_rbm_to_block(&rbm);
 386                if (gfs2_rbm_from_block(&rbm, block + chunk_size)) {
 387                        n_unaligned = 0;
 388                        break;
 389                }
 390                if (ptr) {
 391                        n_unaligned = 3;
 392                        break;
 393                }
 394                n_unaligned = len & 3;
 395        }
 396
 397        /* Deal with any bits left over at the end */
 398        if (n_unaligned)
 399                gfs2_unaligned_extlen(&rbm, n_unaligned, &len);
 400out:
 401        return size - len;
 402}
 403
 404/**
 405 * gfs2_bitcount - count the number of bits in a certain state
 406 * @rgd: the resource group descriptor
 407 * @buffer: the buffer that holds the bitmaps
 408 * @buflen: the length (in bytes) of the buffer
 409 * @state: the state of the block we're looking for
 410 *
 411 * Returns: The number of bits
 412 */
 413
 414static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
 415                         unsigned int buflen, u8 state)
 416{
 417        const u8 *byte = buffer;
 418        const u8 *end = buffer + buflen;
 419        const u8 state1 = state << 2;
 420        const u8 state2 = state << 4;
 421        const u8 state3 = state << 6;
 422        u32 count = 0;
 423
 424        for (; byte < end; byte++) {
 425                if (((*byte) & 0x03) == state)
 426                        count++;
 427                if (((*byte) & 0x0C) == state1)
 428                        count++;
 429                if (((*byte) & 0x30) == state2)
 430                        count++;
 431                if (((*byte) & 0xC0) == state3)
 432                        count++;
 433        }
 434
 435        return count;
 436}
 437
 438/**
 439 * gfs2_rgrp_verify - Verify that a resource group is consistent
 440 * @rgd: the rgrp
 441 *
 442 */
 443
 444void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 445{
 446        struct gfs2_sbd *sdp = rgd->rd_sbd;
 447        struct gfs2_bitmap *bi = NULL;
 448        u32 length = rgd->rd_length;
 449        u32 count[4], tmp;
 450        int buf, x;
 451
 452        memset(count, 0, 4 * sizeof(u32));
 453
 454        /* Count # blocks in each of 4 possible allocation states */
 455        for (buf = 0; buf < length; buf++) {
 456                bi = rgd->rd_bits + buf;
 457                for (x = 0; x < 4; x++)
 458                        count[x] += gfs2_bitcount(rgd,
 459                                                  bi->bi_bh->b_data +
 460                                                  bi->bi_offset,
 461                                                  bi->bi_len, x);
 462        }
 463
 464        if (count[0] != rgd->rd_free) {
 465                if (gfs2_consist_rgrpd(rgd))
 466                        fs_err(sdp, "free data mismatch:  %u != %u\n",
 467                               count[0], rgd->rd_free);
 468                return;
 469        }
 470
 471        tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
 472        if (count[1] != tmp) {
 473                if (gfs2_consist_rgrpd(rgd))
 474                        fs_err(sdp, "used data mismatch:  %u != %u\n",
 475                               count[1], tmp);
 476                return;
 477        }
 478
 479        if (count[2] + count[3] != rgd->rd_dinodes) {
 480                if (gfs2_consist_rgrpd(rgd))
 481                        fs_err(sdp, "used metadata mismatch:  %u != %u\n",
 482                               count[2] + count[3], rgd->rd_dinodes);
 483                return;
 484        }
 485}
 486
 487static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
 488{
 489        u64 first = rgd->rd_data0;
 490        u64 last = first + rgd->rd_data;
 491        return first <= block && block < last;
 492}
 493
 494/**
 495 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
 496 * @sdp: The GFS2 superblock
 497 * @blk: The data block number
 498 * @exact: True if this needs to be an exact match
 499 *
 500 * Returns: The resource group, or NULL if not found
 501 */
 502
 503struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)
 504{
 505        struct rb_node *n, *next;
 506        struct gfs2_rgrpd *cur;
 507
 508        spin_lock(&sdp->sd_rindex_spin);
 509        n = sdp->sd_rindex_tree.rb_node;
 510        while (n) {
 511                cur = rb_entry(n, struct gfs2_rgrpd, rd_node);
 512                next = NULL;
 513                if (blk < cur->rd_addr)
 514                        next = n->rb_left;
 515                else if (blk >= cur->rd_data0 + cur->rd_data)
 516                        next = n->rb_right;
 517                if (next == NULL) {
 518                        spin_unlock(&sdp->sd_rindex_spin);
 519                        if (exact) {
 520                                if (blk < cur->rd_addr)
 521                                        return NULL;
 522                                if (blk >= cur->rd_data0 + cur->rd_data)
 523                                        return NULL;
 524                        }
 525                        return cur;
 526                }
 527                n = next;
 528        }
 529        spin_unlock(&sdp->sd_rindex_spin);
 530
 531        return NULL;
 532}
 533
 534/**
 535 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
 536 * @sdp: The GFS2 superblock
 537 *
 538 * Returns: The first rgrp in the filesystem
 539 */
 540
 541struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
 542{
 543        const struct rb_node *n;
 544        struct gfs2_rgrpd *rgd;
 545
 546        spin_lock(&sdp->sd_rindex_spin);
 547        n = rb_first(&sdp->sd_rindex_tree);
 548        rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 549        spin_unlock(&sdp->sd_rindex_spin);
 550
 551        return rgd;
 552}
 553
 554/**
 555 * gfs2_rgrpd_get_next - get the next RG
 556 * @rgd: the resource group descriptor
 557 *
 558 * Returns: The next rgrp
 559 */
 560
 561struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
 562{
 563        struct gfs2_sbd *sdp = rgd->rd_sbd;
 564        const struct rb_node *n;
 565
 566        spin_lock(&sdp->sd_rindex_spin);
 567        n = rb_next(&rgd->rd_node);
 568        if (n == NULL)
 569                n = rb_first(&sdp->sd_rindex_tree);
 570
 571        if (unlikely(&rgd->rd_node == n)) {
 572                spin_unlock(&sdp->sd_rindex_spin);
 573                return NULL;
 574        }
 575        rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 576        spin_unlock(&sdp->sd_rindex_spin);
 577        return rgd;
 578}
 579
 580void check_and_update_goal(struct gfs2_inode *ip)
 581{
 582        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 583        if (!ip->i_goal || gfs2_blk2rgrpd(sdp, ip->i_goal, 1) == NULL)
 584                ip->i_goal = ip->i_no_addr;
 585}
 586
 587void gfs2_free_clones(struct gfs2_rgrpd *rgd)
 588{
 589        int x;
 590
 591        for (x = 0; x < rgd->rd_length; x++) {
 592                struct gfs2_bitmap *bi = rgd->rd_bits + x;
 593                kfree(bi->bi_clone);
 594                bi->bi_clone = NULL;
 595        }
 596}
 597
 598/**
 599 * gfs2_rs_alloc - make sure we have a reservation assigned to the inode
 600 * @ip: the inode for this reservation
 601 */
 602int gfs2_rs_alloc(struct gfs2_inode *ip)
 603{
 604        int error = 0;
 605
 606        down_write(&ip->i_rw_mutex);
 607        if (ip->i_res)
 608                goto out;
 609
 610        ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
 611        if (!ip->i_res) {
 612                error = -ENOMEM;
 613                goto out;
 614        }
 615
 616        RB_CLEAR_NODE(&ip->i_res->rs_node);
 617out:
 618        up_write(&ip->i_rw_mutex);
 619        return error;
 620}
 621
 622static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
 623{
 624        gfs2_print_dbg(seq, "  B: n:%llu s:%llu b:%u f:%u\n",
 625                       (unsigned long long)rs->rs_inum,
 626                       (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
 627                       rs->rs_rbm.offset, rs->rs_free);
 628}
 629
 630/**
 631 * __rs_deltree - remove a multi-block reservation from the rgd tree
 632 * @rs: The reservation to remove
 633 *
 634 */
 635static void __rs_deltree(struct gfs2_blkreserv *rs)
 636{
 637        struct gfs2_rgrpd *rgd;
 638
 639        if (!gfs2_rs_active(rs))
 640                return;
 641
 642        rgd = rs->rs_rbm.rgd;
 643        trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
 644        rb_erase(&rs->rs_node, &rgd->rd_rstree);
 645        RB_CLEAR_NODE(&rs->rs_node);
 646
 647        if (rs->rs_free) {
 648                struct gfs2_bitmap *bi = rbm_bi(&rs->rs_rbm);
 649
 650                /* return reserved blocks to the rgrp */
 651                BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
 652                rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
 653                /* The rgrp extent failure point is likely not to increase;
 654                   it will only do so if the freed blocks are somehow
 655                   contiguous with a span of free blocks that follows. Still,
 656                   it will force the number to be recalculated later. */
 657                rgd->rd_extfail_pt += rs->rs_free;
 658                rs->rs_free = 0;
 659                clear_bit(GBF_FULL, &bi->bi_flags);
 660        }
 661}
 662
 663/**
 664 * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
 665 * @rs: The reservation to remove
 666 *
 667 */
 668void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
 669{
 670        struct gfs2_rgrpd *rgd;
 671
 672        rgd = rs->rs_rbm.rgd;
 673        if (rgd) {
 674                spin_lock(&rgd->rd_rsspin);
 675                __rs_deltree(rs);
 676                spin_unlock(&rgd->rd_rsspin);
 677        }
 678}
 679
 680/**
 681 * gfs2_rs_delete - delete a multi-block reservation
 682 * @ip: The inode for this reservation
 683 * @wcount: The inode's write count, or NULL
 684 *
 685 */
 686void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount)
 687{
 688        down_write(&ip->i_rw_mutex);
 689        if (ip->i_res && ((wcount == NULL) || (atomic_read(wcount) <= 1))) {
 690                gfs2_rs_deltree(ip->i_res);
 691                BUG_ON(ip->i_res->rs_free);
 692                kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
 693                ip->i_res = NULL;
 694        }
 695        up_write(&ip->i_rw_mutex);
 696}
 697
 698/**
 699 * return_all_reservations - return all reserved blocks back to the rgrp.
 700 * @rgd: the rgrp that needs its space back
 701 *
 702 * We previously reserved a bunch of blocks for allocation. Now we need to
 703 * give them back. This leave the reservation structures in tact, but removes
 704 * all of their corresponding "no-fly zones".
 705 */
 706static void return_all_reservations(struct gfs2_rgrpd *rgd)
 707{
 708        struct rb_node *n;
 709        struct gfs2_blkreserv *rs;
 710
 711        spin_lock(&rgd->rd_rsspin);
 712        while ((n = rb_first(&rgd->rd_rstree))) {
 713                rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
 714                __rs_deltree(rs);
 715        }
 716        spin_unlock(&rgd->rd_rsspin);
 717}
 718
 719void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 720{
 721        struct rb_node *n;
 722        struct gfs2_rgrpd *rgd;
 723        struct gfs2_glock *gl;
 724
 725        while ((n = rb_first(&sdp->sd_rindex_tree))) {
 726                rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 727                gl = rgd->rd_gl;
 728
 729                rb_erase(n, &sdp->sd_rindex_tree);
 730
 731                if (gl) {
 732                        spin_lock(&gl->gl_lockref.lock);
 733                        gl->gl_object = NULL;
 734                        spin_unlock(&gl->gl_lockref.lock);
 735                        gfs2_glock_add_to_lru(gl);
 736                        gfs2_glock_put(gl);
 737                }
 738
 739                gfs2_free_clones(rgd);
 740                kfree(rgd->rd_bits);
 741                return_all_reservations(rgd);
 742                kmem_cache_free(gfs2_rgrpd_cachep, rgd);
 743        }
 744}
 745
 746static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
 747{
 748        pr_info("ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
 749        pr_info("ri_length = %u\n", rgd->rd_length);
 750        pr_info("ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
 751        pr_info("ri_data = %u\n", rgd->rd_data);
 752        pr_info("ri_bitbytes = %u\n", rgd->rd_bitbytes);
 753}
 754
 755/**
 756 * gfs2_compute_bitstructs - Compute the bitmap sizes
 757 * @rgd: The resource group descriptor
 758 *
 759 * Calculates bitmap descriptors, one for each block that contains bitmap data
 760 *
 761 * Returns: errno
 762 */
 763
 764static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 765{
 766        struct gfs2_sbd *sdp = rgd->rd_sbd;
 767        struct gfs2_bitmap *bi;
 768        u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
 769        u32 bytes_left, bytes;
 770        int x;
 771
 772        if (!length)
 773                return -EINVAL;
 774
 775        rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS);
 776        if (!rgd->rd_bits)
 777                return -ENOMEM;
 778
 779        bytes_left = rgd->rd_bitbytes;
 780
 781        for (x = 0; x < length; x++) {
 782                bi = rgd->rd_bits + x;
 783
 784                bi->bi_flags = 0;
 785                /* small rgrp; bitmap stored completely in header block */
 786                if (length == 1) {
 787                        bytes = bytes_left;
 788                        bi->bi_offset = sizeof(struct gfs2_rgrp);
 789                        bi->bi_start = 0;
 790                        bi->bi_len = bytes;
 791                        bi->bi_blocks = bytes * GFS2_NBBY;
 792                /* header block */
 793                } else if (x == 0) {
 794                        bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
 795                        bi->bi_offset = sizeof(struct gfs2_rgrp);
 796                        bi->bi_start = 0;
 797                        bi->bi_len = bytes;
 798                        bi->bi_blocks = bytes * GFS2_NBBY;
 799                /* last block */
 800                } else if (x + 1 == length) {
 801                        bytes = bytes_left;
 802                        bi->bi_offset = sizeof(struct gfs2_meta_header);
 803                        bi->bi_start = rgd->rd_bitbytes - bytes_left;
 804                        bi->bi_len = bytes;
 805                        bi->bi_blocks = bytes * GFS2_NBBY;
 806                /* other blocks */
 807                } else {
 808                        bytes = sdp->sd_sb.sb_bsize -
 809                                sizeof(struct gfs2_meta_header);
 810                        bi->bi_offset = sizeof(struct gfs2_meta_header);
 811                        bi->bi_start = rgd->rd_bitbytes - bytes_left;
 812                        bi->bi_len = bytes;
 813                        bi->bi_blocks = bytes * GFS2_NBBY;
 814                }
 815
 816                bytes_left -= bytes;
 817        }
 818
 819        if (bytes_left) {
 820                gfs2_consist_rgrpd(rgd);
 821                return -EIO;
 822        }
 823        bi = rgd->rd_bits + (length - 1);
 824        if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
 825                if (gfs2_consist_rgrpd(rgd)) {
 826                        gfs2_rindex_print(rgd);
 827                        fs_err(sdp, "start=%u len=%u offset=%u\n",
 828                               bi->bi_start, bi->bi_len, bi->bi_offset);
 829                }
 830                return -EIO;
 831        }
 832
 833        return 0;
 834}
 835
 836/**
 837 * gfs2_ri_total - Total up the file system space, according to the rindex.
 838 * @sdp: the filesystem
 839 *
 840 */
 841u64 gfs2_ri_total(struct gfs2_sbd *sdp)
 842{
 843        u64 total_data = 0;     
 844        struct inode *inode = sdp->sd_rindex;
 845        struct gfs2_inode *ip = GFS2_I(inode);
 846        char buf[sizeof(struct gfs2_rindex)];
 847        int error, rgrps;
 848
 849        for (rgrps = 0;; rgrps++) {
 850                loff_t pos = rgrps * sizeof(struct gfs2_rindex);
 851
 852                if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
 853                        break;
 854                error = gfs2_internal_read(ip, buf, &pos,
 855                                           sizeof(struct gfs2_rindex));
 856                if (error != sizeof(struct gfs2_rindex))
 857                        break;
 858                total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
 859        }
 860        return total_data;
 861}
 862
 863static int rgd_insert(struct gfs2_rgrpd *rgd)
 864{
 865        struct gfs2_sbd *sdp = rgd->rd_sbd;
 866        struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
 867
 868        /* Figure out where to put new node */
 869        while (*newn) {
 870                struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd,
 871                                                  rd_node);
 872
 873                parent = *newn;
 874                if (rgd->rd_addr < cur->rd_addr)
 875                        newn = &((*newn)->rb_left);
 876                else if (rgd->rd_addr > cur->rd_addr)
 877                        newn = &((*newn)->rb_right);
 878                else
 879                        return -EEXIST;
 880        }
 881
 882        rb_link_node(&rgd->rd_node, parent, newn);
 883        rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
 884        sdp->sd_rgrps++;
 885        return 0;
 886}
 887
 888/**
 889 * read_rindex_entry - Pull in a new resource index entry from the disk
 890 * @ip: Pointer to the rindex inode
 891 *
 892 * Returns: 0 on success, > 0 on EOF, error code otherwise
 893 */
 894
 895static int read_rindex_entry(struct gfs2_inode *ip)
 896{
 897        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 898        const unsigned bsize = sdp->sd_sb.sb_bsize;
 899        loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
 900        struct gfs2_rindex buf;
 901        int error;
 902        struct gfs2_rgrpd *rgd;
 903
 904        if (pos >= i_size_read(&ip->i_inode))
 905                return 1;
 906
 907        error = gfs2_internal_read(ip, (char *)&buf, &pos,
 908                                   sizeof(struct gfs2_rindex));
 909
 910        if (error != sizeof(struct gfs2_rindex))
 911                return (error == 0) ? 1 : error;
 912
 913        rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);
 914        error = -ENOMEM;
 915        if (!rgd)
 916                return error;
 917
 918        rgd->rd_sbd = sdp;
 919        rgd->rd_addr = be64_to_cpu(buf.ri_addr);
 920        rgd->rd_length = be32_to_cpu(buf.ri_length);
 921        rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
 922        rgd->rd_data = be32_to_cpu(buf.ri_data);
 923        rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
 924        spin_lock_init(&rgd->rd_rsspin);
 925
 926        error = compute_bitstructs(rgd);
 927        if (error)
 928                goto fail;
 929
 930        error = gfs2_glock_get(sdp, rgd->rd_addr,
 931                               &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
 932        if (error)
 933                goto fail;
 934
 935        rgd->rd_gl->gl_object = rgd;
 936        rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_CACHE_MASK;
 937        rgd->rd_gl->gl_vm.end = PAGE_CACHE_ALIGN((rgd->rd_addr +
 938                                                  rgd->rd_length) * bsize) - 1;
 939        rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
 940        rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED);
 941        if (rgd->rd_data > sdp->sd_max_rg_data)
 942                sdp->sd_max_rg_data = rgd->rd_data;
 943        spin_lock(&sdp->sd_rindex_spin);
 944        error = rgd_insert(rgd);
 945        spin_unlock(&sdp->sd_rindex_spin);
 946        if (!error)
 947                return 0;
 948
 949        error = 0; /* someone else read in the rgrp; free it and ignore it */
 950        gfs2_glock_put(rgd->rd_gl);
 951
 952fail:
 953        kfree(rgd->rd_bits);
 954        kmem_cache_free(gfs2_rgrpd_cachep, rgd);
 955        return error;
 956}
 957
 958/**
 959 * set_rgrp_preferences - Run all the rgrps, selecting some we prefer to use
 960 * @sdp: the GFS2 superblock
 961 *
 962 * The purpose of this function is to select a subset of the resource groups
 963 * and mark them as PREFERRED. We do it in such a way that each node prefers
 964 * to use a unique set of rgrps to minimize glock contention.
 965 */
 966static void set_rgrp_preferences(struct gfs2_sbd *sdp)
 967{
 968        struct gfs2_rgrpd *rgd, *first;
 969        int i;
 970
 971        /* Skip an initial number of rgrps, based on this node's journal ID.
 972           That should start each node out on its own set. */
 973        rgd = gfs2_rgrpd_get_first(sdp);
 974        for (i = 0; i < sdp->sd_lockstruct.ls_jid; i++)
 975                rgd = gfs2_rgrpd_get_next(rgd);
 976        first = rgd;
 977
 978        do {
 979                rgd->rd_flags |= GFS2_RDF_PREFERRED;
 980                for (i = 0; i < sdp->sd_journals; i++) {
 981                        rgd = gfs2_rgrpd_get_next(rgd);
 982                        if (!rgd || rgd == first)
 983                                break;
 984                }
 985        } while (rgd && rgd != first);
 986}
 987
 988/**
 989 * gfs2_ri_update - Pull in a new resource index from the disk
 990 * @ip: pointer to the rindex inode
 991 *
 992 * Returns: 0 on successful update, error code otherwise
 993 */
 994
 995static int gfs2_ri_update(struct gfs2_inode *ip)
 996{
 997        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 998        int error;
 999
1000        do {
1001                error = read_rindex_entry(ip);
1002        } while (error == 0);
1003
1004        if (error < 0)
1005                return error;
1006
1007        set_rgrp_preferences(sdp);
1008
1009        sdp->sd_rindex_uptodate = 1;
1010        return 0;
1011}
1012
1013/**
1014 * gfs2_rindex_update - Update the rindex if required
1015 * @sdp: The GFS2 superblock
1016 *
1017 * We grab a lock on the rindex inode to make sure that it doesn't
1018 * change whilst we are performing an operation. We keep this lock
1019 * for quite long periods of time compared to other locks. This
1020 * doesn't matter, since it is shared and it is very, very rarely
1021 * accessed in the exclusive mode (i.e. only when expanding the filesystem).
1022 *
1023 * This makes sure that we're using the latest copy of the resource index
1024 * special file, which might have been updated if someone expanded the
1025 * filesystem (via gfs2_grow utility), which adds new resource groups.
1026 *
1027 * Returns: 0 on succeess, error code otherwise
1028 */
1029
1030int gfs2_rindex_update(struct gfs2_sbd *sdp)
1031{
1032        struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
1033        struct gfs2_glock *gl = ip->i_gl;
1034        struct gfs2_holder ri_gh;
1035        int error = 0;
1036        int unlock_required = 0;
1037
1038        /* Read new copy from disk if we don't have the latest */
1039        if (!sdp->sd_rindex_uptodate) {
1040                if (!gfs2_glock_is_locked_by_me(gl)) {
1041                        error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
1042                        if (error)
1043                                return error;
1044                        unlock_required = 1;
1045                }
1046                if (!sdp->sd_rindex_uptodate)
1047                        error = gfs2_ri_update(ip);
1048                if (unlock_required)
1049                        gfs2_glock_dq_uninit(&ri_gh);
1050        }
1051
1052        return error;
1053}
1054
1055static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
1056{
1057        const struct gfs2_rgrp *str = buf;
1058        u32 rg_flags;
1059
1060        rg_flags = be32_to_cpu(str->rg_flags);
1061        rg_flags &= ~GFS2_RDF_MASK;
1062        rgd->rd_flags &= GFS2_RDF_MASK;
1063        rgd->rd_flags |= rg_flags;
1064        rgd->rd_free = be32_to_cpu(str->rg_free);
1065        rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
1066        rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
1067}
1068
1069static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
1070{
1071        struct gfs2_rgrp *str = buf;
1072
1073        str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
1074        str->rg_free = cpu_to_be32(rgd->rd_free);
1075        str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
1076        str->__pad = cpu_to_be32(0);
1077        str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
1078        memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
1079}
1080
1081static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
1082{
1083        struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1084        struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
1085
1086        if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free ||
1087            rgl->rl_dinodes != str->rg_dinodes ||
1088            rgl->rl_igeneration != str->rg_igeneration)
1089                return 0;
1090        return 1;
1091}
1092
1093static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
1094{
1095        const struct gfs2_rgrp *str = buf;
1096
1097        rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
1098        rgl->rl_flags = str->rg_flags;
1099        rgl->rl_free = str->rg_free;
1100        rgl->rl_dinodes = str->rg_dinodes;
1101        rgl->rl_igeneration = str->rg_igeneration;
1102        rgl->__pad = 0UL;
1103}
1104
1105static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change)
1106{
1107        struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1108        u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change;
1109        rgl->rl_unlinked = cpu_to_be32(unlinked);
1110}
1111
1112static u32 count_unlinked(struct gfs2_rgrpd *rgd)
1113{
1114        struct gfs2_bitmap *bi;
1115        const u32 length = rgd->rd_length;
1116        const u8 *buffer = NULL;
1117        u32 i, goal, count = 0;
1118
1119        for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) {
1120                goal = 0;
1121                buffer = bi->bi_bh->b_data + bi->bi_offset;
1122                WARN_ON(!buffer_uptodate(bi->bi_bh));
1123                while (goal < bi->bi_len * GFS2_NBBY) {
1124                        goal = gfs2_bitfit(buffer, bi->bi_len, goal,
1125                                           GFS2_BLKST_UNLINKED);
1126                        if (goal == BFITNOENT)
1127                                break;
1128                        count++;
1129                        goal++;
1130                }
1131        }
1132
1133        return count;
1134}
1135
1136
1137/**
1138 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
1139 * @rgd: the struct gfs2_rgrpd describing the RG to read in
1140 *
1141 * Read in all of a Resource Group's header and bitmap blocks.
1142 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
1143 *
1144 * Returns: errno
1145 */
1146
1147static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
1148{
1149        struct gfs2_sbd *sdp = rgd->rd_sbd;
1150        struct gfs2_glock *gl = rgd->rd_gl;
1151        unsigned int length = rgd->rd_length;
1152        struct gfs2_bitmap *bi;
1153        unsigned int x, y;
1154        int error;
1155
1156        if (rgd->rd_bits[0].bi_bh != NULL)
1157                return 0;
1158
1159        for (x = 0; x < length; x++) {
1160                bi = rgd->rd_bits + x;
1161                error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
1162                if (error)
1163                        goto fail;
1164        }
1165
1166        for (y = length; y--;) {
1167                bi = rgd->rd_bits + y;
1168                error = gfs2_meta_wait(sdp, bi->bi_bh);
1169                if (error)
1170                        goto fail;
1171                if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB :
1172                                              GFS2_METATYPE_RG)) {
1173                        error = -EIO;
1174                        goto fail;
1175                }
1176        }
1177
1178        if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
1179                for (x = 0; x < length; x++)
1180                        clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
1181                gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
1182                rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1183                rgd->rd_free_clone = rgd->rd_free;
1184                /* max out the rgrp allocation failure point */
1185                rgd->rd_extfail_pt = rgd->rd_free;
1186        }
1187        if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
1188                rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
1189                gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
1190                                     rgd->rd_bits[0].bi_bh->b_data);
1191        }
1192        else if (sdp->sd_args.ar_rgrplvb) {
1193                if (!gfs2_rgrp_lvb_valid(rgd)){
1194                        gfs2_consist_rgrpd(rgd);
1195                        error = -EIO;
1196                        goto fail;
1197                }
1198                if (rgd->rd_rgl->rl_unlinked == 0)
1199                        rgd->rd_flags &= ~GFS2_RDF_CHECK;
1200        }
1201        return 0;
1202
1203fail:
1204        while (x--) {
1205                bi = rgd->rd_bits + x;
1206                brelse(bi->bi_bh);
1207                bi->bi_bh = NULL;
1208                gfs2_assert_warn(sdp, !bi->bi_clone);
1209        }
1210
1211        return error;
1212}
1213
1214static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
1215{
1216        u32 rl_flags;
1217
1218        if (rgd->rd_flags & GFS2_RDF_UPTODATE)
1219                return 0;
1220
1221        if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
1222                return gfs2_rgrp_bh_get(rgd);
1223
1224        rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
1225        rl_flags &= ~GFS2_RDF_MASK;
1226        rgd->rd_flags &= GFS2_RDF_MASK;
1227        rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1228        if (rgd->rd_rgl->rl_unlinked == 0)
1229                rgd->rd_flags &= ~GFS2_RDF_CHECK;
1230        rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
1231        rgd->rd_free_clone = rgd->rd_free;
1232        rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
1233        rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
1234        return 0;
1235}
1236
1237int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
1238{
1239        struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1240        struct gfs2_sbd *sdp = rgd->rd_sbd;
1241
1242        if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb)
1243                return 0;
1244        return gfs2_rgrp_bh_get(rgd);
1245}
1246
1247/**
1248 * gfs2_rgrp_brelse - Release RG bitmaps read in with gfs2_rgrp_bh_get()
1249 * @rgd: The resource group
1250 *
1251 */
1252
1253void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd)
1254{
1255        int x, length = rgd->rd_length;
1256
1257        for (x = 0; x < length; x++) {
1258                struct gfs2_bitmap *bi = rgd->rd_bits + x;
1259                if (bi->bi_bh) {
1260                        brelse(bi->bi_bh);
1261                        bi->bi_bh = NULL;
1262                }
1263        }
1264
1265}
1266
1267/**
1268 * gfs2_rgrp_go_unlock - Unlock a rgrp glock
1269 * @gh: The glock holder for the resource group
1270 *
1271 */
1272
1273void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
1274{
1275        struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1276        int demote_requested = test_bit(GLF_DEMOTE, &gh->gh_gl->gl_flags) |
1277                test_bit(GLF_PENDING_DEMOTE, &gh->gh_gl->gl_flags);
1278
1279        if (rgd && demote_requested)
1280                gfs2_rgrp_brelse(rgd);
1281}
1282
1283int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
1284                             struct buffer_head *bh,
1285                             const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
1286{
1287        struct super_block *sb = sdp->sd_vfs;
1288        u64 blk;
1289        sector_t start = 0;
1290        sector_t nr_blks = 0;
1291        int rv;
1292        unsigned int x;
1293        u32 trimmed = 0;
1294        u8 diff;
1295
1296        for (x = 0; x < bi->bi_len; x++) {
1297                const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
1298                clone += bi->bi_offset;
1299                clone += x;
1300                if (bh) {
1301                        const u8 *orig = bh->b_data + bi->bi_offset + x;
1302                        diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
1303                } else {
1304                        diff = ~(*clone | (*clone >> 1));
1305                }
1306                diff &= 0x55;
1307                if (diff == 0)
1308                        continue;
1309                blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
1310                while(diff) {
1311                        if (diff & 1) {
1312                                if (nr_blks == 0)
1313                                        goto start_new_extent;
1314                                if ((start + nr_blks) != blk) {
1315                                        if (nr_blks >= minlen) {
1316                                                rv = sb_issue_discard(sb,
1317                                                        start, nr_blks,
1318                                                        GFP_NOFS, 0);
1319                                                if (rv)
1320                                                        goto fail;
1321                                                trimmed += nr_blks;
1322                                        }
1323                                        nr_blks = 0;
1324start_new_extent:
1325                                        start = blk;
1326                                }
1327                                nr_blks++;
1328                        }
1329                        diff >>= 2;
1330                        blk++;
1331                }
1332        }
1333        if (nr_blks >= minlen) {
1334                rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0);
1335                if (rv)
1336                        goto fail;
1337                trimmed += nr_blks;
1338        }
1339        if (ptrimmed)
1340                *ptrimmed = trimmed;
1341        return 0;
1342
1343fail:
1344        if (sdp->sd_args.ar_discard)
1345                fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
1346        sdp->sd_args.ar_discard = 0;
1347        return -EIO;
1348}
1349
1350/**
1351 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem
1352 * @filp: Any file on the filesystem
1353 * @argp: Pointer to the arguments (also used to pass result)
1354 *
1355 * Returns: 0 on success, otherwise error code
1356 */
1357
1358int gfs2_fitrim(struct file *filp, void __user *argp)
1359{
1360        struct inode *inode = file_inode(filp);
1361        struct gfs2_sbd *sdp = GFS2_SB(inode);
1362        struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
1363        struct buffer_head *bh;
1364        struct gfs2_rgrpd *rgd;
1365        struct gfs2_rgrpd *rgd_end;
1366        struct gfs2_holder gh;
1367        struct fstrim_range r;
1368        int ret = 0;
1369        u64 amt;
1370        u64 trimmed = 0;
1371        u64 start, end, minlen;
1372        unsigned int x;
1373        unsigned bs_shift = sdp->sd_sb.sb_bsize_shift;
1374
1375        if (!capable(CAP_SYS_ADMIN))
1376                return -EPERM;
1377
1378        if (!blk_queue_discard(q))
1379                return -EOPNOTSUPP;
1380
1381        if (copy_from_user(&r, argp, sizeof(r)))
1382                return -EFAULT;
1383
1384        ret = gfs2_rindex_update(sdp);
1385        if (ret)
1386                return ret;
1387
1388        start = r.start >> bs_shift;
1389        end = start + (r.len >> bs_shift);
1390        minlen = max_t(u64, r.minlen,
1391                       q->limits.discard_granularity) >> bs_shift;
1392
1393        if (end <= start || minlen > sdp->sd_max_rg_data)
1394                return -EINVAL;
1395
1396        rgd = gfs2_blk2rgrpd(sdp, start, 0);
1397        rgd_end = gfs2_blk2rgrpd(sdp, end, 0);
1398
1399        if ((gfs2_rgrpd_get_first(sdp) == gfs2_rgrpd_get_next(rgd_end))
1400            && (start > rgd_end->rd_data0 + rgd_end->rd_data))
1401                return -EINVAL; /* start is beyond the end of the fs */
1402
1403        while (1) {
1404
1405                ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
1406                if (ret)
1407                        goto out;
1408
1409                if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) {
1410                        /* Trim each bitmap in the rgrp */
1411                        for (x = 0; x < rgd->rd_length; x++) {
1412                                struct gfs2_bitmap *bi = rgd->rd_bits + x;
1413                                ret = gfs2_rgrp_send_discards(sdp,
1414                                                rgd->rd_data0, NULL, bi, minlen,
1415                                                &amt);
1416                                if (ret) {
1417                                        gfs2_glock_dq_uninit(&gh);
1418                                        goto out;
1419                                }
1420                                trimmed += amt;
1421                        }
1422
1423                        /* Mark rgrp as having been trimmed */
1424                        ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
1425                        if (ret == 0) {
1426                                bh = rgd->rd_bits[0].bi_bh;
1427                                rgd->rd_flags |= GFS2_RGF_TRIMMED;
1428                                gfs2_trans_add_meta(rgd->rd_gl, bh);
1429                                gfs2_rgrp_out(rgd, bh->b_data);
1430                                gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
1431                                gfs2_trans_end(sdp);
1432                        }
1433                }
1434                gfs2_glock_dq_uninit(&gh);
1435
1436                if (rgd == rgd_end)
1437                        break;
1438
1439                rgd = gfs2_rgrpd_get_next(rgd);
1440        }
1441
1442out:
1443        r.len = trimmed << bs_shift;
1444        if (copy_to_user(argp, &r, sizeof(r)))
1445                return -EFAULT;
1446
1447        return ret;
1448}
1449
1450/**
1451 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
1452 * @ip: the inode structure
1453 *
1454 */
1455static void rs_insert(struct gfs2_inode *ip)
1456{
1457        struct rb_node **newn, *parent = NULL;
1458        int rc;
1459        struct gfs2_blkreserv *rs = ip->i_res;
1460        struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
1461        u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
1462
1463        BUG_ON(gfs2_rs_active(rs));
1464
1465        spin_lock(&rgd->rd_rsspin);
1466        newn = &rgd->rd_rstree.rb_node;
1467        while (*newn) {
1468                struct gfs2_blkreserv *cur =
1469                        rb_entry(*newn, struct gfs2_blkreserv, rs_node);
1470
1471                parent = *newn;
1472                rc = rs_cmp(fsblock, rs->rs_free, cur);
1473                if (rc > 0)
1474                        newn = &((*newn)->rb_right);
1475                else if (rc < 0)
1476                        newn = &((*newn)->rb_left);
1477                else {
1478                        spin_unlock(&rgd->rd_rsspin);
1479                        WARN_ON(1);
1480                        return;
1481                }
1482        }
1483
1484        rb_link_node(&rs->rs_node, parent, newn);
1485        rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
1486
1487        /* Do our rgrp accounting for the reservation */
1488        rgd->rd_reserved += rs->rs_free; /* blocks reserved */
1489        spin_unlock(&rgd->rd_rsspin);
1490        trace_gfs2_rs(rs, TRACE_RS_INSERT);
1491}
1492
1493/**
1494 * rg_mblk_search - find a group of multiple free blocks to form a reservation
1495 * @rgd: the resource group descriptor
1496 * @ip: pointer to the inode for which we're reserving blocks
1497 * @ap: the allocation parameters
1498 *
1499 */
1500
1501static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
1502                           const struct gfs2_alloc_parms *ap)
1503{
1504        struct gfs2_rbm rbm = { .rgd = rgd, };
1505        u64 goal;
1506        struct gfs2_blkreserv *rs = ip->i_res;
1507        u32 extlen;
1508        u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;
1509        int ret;
1510        struct inode *inode = &ip->i_inode;
1511
1512        if (S_ISDIR(inode->i_mode))
1513                extlen = 1;
1514        else {
1515                extlen = max_t(u32, atomic_read(&rs->rs_sizehint), ap->target);
1516                extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
1517        }
1518        if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
1519                return;
1520
1521        /* Find bitmap block that contains bits for goal block */
1522        if (rgrp_contains_block(rgd, ip->i_goal))
1523                goal = ip->i_goal;
1524        else
1525                goal = rgd->rd_last_alloc + rgd->rd_data0;
1526
1527        if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
1528                return;
1529
1530        ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true, ap);
1531        if (ret == 0) {
1532                rs->rs_rbm = rbm;
1533                rs->rs_free = extlen;
1534                rs->rs_inum = ip->i_no_addr;
1535                rs_insert(ip);
1536        } else {
1537                if (goal == rgd->rd_last_alloc + rgd->rd_data0)
1538                        rgd->rd_last_alloc = 0;
1539        }
1540}
1541
1542/**
1543 * gfs2_next_unreserved_block - Return next block that is not reserved
1544 * @rgd: The resource group
1545 * @block: The starting block
1546 * @length: The required length
1547 * @ip: Ignore any reservations for this inode
1548 *
1549 * If the block does not appear in any reservation, then return the
1550 * block number unchanged. If it does appear in the reservation, then
1551 * keep looking through the tree of reservations in order to find the
1552 * first block number which is not reserved.
1553 */
1554
1555static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
1556                                      u32 length,
1557                                      const struct gfs2_inode *ip)
1558{
1559        struct gfs2_blkreserv *rs;
1560        struct rb_node *n;
1561        int rc;
1562
1563        spin_lock(&rgd->rd_rsspin);
1564        n = rgd->rd_rstree.rb_node;
1565        while (n) {
1566                rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1567                rc = rs_cmp(block, length, rs);
1568                if (rc < 0)
1569                        n = n->rb_left;
1570                else if (rc > 0)
1571                        n = n->rb_right;
1572                else
1573                        break;
1574        }
1575
1576        if (n) {
1577                while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) {
1578                        block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
1579                        n = n->rb_right;
1580                        if (n == NULL)
1581                                break;
1582                        rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1583                }
1584        }
1585
1586        spin_unlock(&rgd->rd_rsspin);
1587        return block;
1588}
1589
1590/**
1591 * gfs2_reservation_check_and_update - Check for reservations during block alloc
1592 * @rbm: The current position in the resource group
1593 * @ip: The inode for which we are searching for blocks
1594 * @minext: The minimum extent length
1595 * @maxext: A pointer to the maximum extent structure
1596 *
1597 * This checks the current position in the rgrp to see whether there is
1598 * a reservation covering this block. If not then this function is a
1599 * no-op. If there is, then the position is moved to the end of the
1600 * contiguous reservation(s) so that we are pointing at the first
1601 * non-reserved block.
1602 *
1603 * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error
1604 */
1605
1606static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
1607                                             const struct gfs2_inode *ip,
1608                                             u32 minext,
1609                                             struct gfs2_extent *maxext)
1610{
1611        u64 block = gfs2_rbm_to_block(rbm);
1612        u32 extlen = 1;
1613        u64 nblock;
1614        int ret;
1615
1616        /*
1617         * If we have a minimum extent length, then skip over any extent
1618         * which is less than the min extent length in size.
1619         */
1620        if (minext) {
1621                extlen = gfs2_free_extlen(rbm, minext);
1622                if (extlen <= maxext->len)
1623                        goto fail;
1624        }
1625
1626        /*
1627         * Check the extent which has been found against the reservations
1628         * and skip if parts of it are already reserved
1629         */
1630        nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
1631        if (nblock == block) {
1632                if (!minext || extlen >= minext)
1633                        return 0;
1634
1635                if (extlen > maxext->len) {
1636                        maxext->len = extlen;
1637                        maxext->rbm = *rbm;
1638                }
1639fail:
1640                nblock = block + extlen;
1641        }
1642        ret = gfs2_rbm_from_block(rbm, nblock);
1643        if (ret < 0)
1644                return ret;
1645        return 1;
1646}
1647
1648/**
1649 * gfs2_rbm_find - Look for blocks of a particular state
1650 * @rbm: Value/result starting position and final position
1651 * @state: The state which we want to find
1652 * @minext: Pointer to the requested extent length (NULL for a single block)
1653 *          This is updated to be the actual reservation size.
1654 * @ip: If set, check for reservations
1655 * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
1656 *          around until we've reached the starting point.
1657 * @ap: the allocation parameters
1658 *
1659 * Side effects:
1660 * - If looking for free blocks, we set GBF_FULL on each bitmap which
1661 *   has no free blocks in it.
1662 * - If looking for free blocks, we set rd_extfail_pt on each rgrp which
1663 *   has come up short on a free block search.
1664 *
1665 * Returns: 0 on success, -ENOSPC if there is no block of the requested state
1666 */
1667
1668static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
1669                         const struct gfs2_inode *ip, bool nowrap,
1670                         const struct gfs2_alloc_parms *ap)
1671{
1672        struct buffer_head *bh;
1673        int initial_bii;
1674        u32 initial_offset;
1675        int first_bii = rbm->bii;
1676        u32 first_offset = rbm->offset;
1677        u32 offset;
1678        u8 *buffer;
1679        int n = 0;
1680        int iters = rbm->rgd->rd_length;
1681        int ret;
1682        struct gfs2_bitmap *bi;
1683        struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, };
1684
1685        /* If we are not starting at the beginning of a bitmap, then we
1686         * need to add one to the bitmap count to ensure that we search
1687         * the starting bitmap twice.
1688         */
1689        if (rbm->offset != 0)
1690                iters++;
1691
1692        while(1) {
1693                bi = rbm_bi(rbm);
1694                if (test_bit(GBF_FULL, &bi->bi_flags) &&
1695                    (state == GFS2_BLKST_FREE))
1696                        goto next_bitmap;
1697
1698                bh = bi->bi_bh;
1699                buffer = bh->b_data + bi->bi_offset;
1700                WARN_ON(!buffer_uptodate(bh));
1701                if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1702                        buffer = bi->bi_clone + bi->bi_offset;
1703                initial_offset = rbm->offset;
1704                offset = gfs2_bitfit(buffer, bi->bi_len, rbm->offset, state);
1705                if (offset == BFITNOENT)
1706                        goto bitmap_full;
1707                rbm->offset = offset;
1708                if (ip == NULL)
1709                        return 0;
1710
1711                initial_bii = rbm->bii;
1712                ret = gfs2_reservation_check_and_update(rbm, ip,
1713                                                        minext ? *minext : 0,
1714                                                        &maxext);
1715                if (ret == 0)
1716                        return 0;
1717                if (ret > 0) {
1718                        n += (rbm->bii - initial_bii);
1719                        goto next_iter;
1720                }
1721                if (ret == -E2BIG) {
1722                        rbm->bii = 0;
1723                        rbm->offset = 0;
1724                        n += (rbm->bii - initial_bii);
1725                        goto res_covered_end_of_rgrp;
1726                }
1727                return ret;
1728
1729bitmap_full:    /* Mark bitmap as full and fall through */
1730                if ((state == GFS2_BLKST_FREE) && initial_offset == 0)
1731                        set_bit(GBF_FULL, &bi->bi_flags);
1732
1733next_bitmap:    /* Find next bitmap in the rgrp */
1734                rbm->offset = 0;
1735                rbm->bii++;
1736                if (rbm->bii == rbm->rgd->rd_length)
1737                        rbm->bii = 0;
1738res_covered_end_of_rgrp:
1739                if ((rbm->bii == 0) && nowrap)
1740                        break;
1741                n++;
1742next_iter:
1743                if (n >= iters)
1744                        break;
1745        }
1746
1747        if (minext == NULL || state != GFS2_BLKST_FREE)
1748                return -ENOSPC;
1749
1750        /* If the extent was too small, and it's smaller than the smallest
1751           to have failed before, remember for future reference that it's
1752           useless to search this rgrp again for this amount or more. */
1753        if ((first_offset == 0) && (first_bii == 0) &&
1754            (*minext < rbm->rgd->rd_extfail_pt))
1755                rbm->rgd->rd_extfail_pt = *minext;
1756
1757        /* If the maximum extent we found is big enough to fulfill the
1758           minimum requirements, use it anyway. */
1759        if (maxext.len) {
1760                *rbm = maxext.rbm;
1761                *minext = maxext.len;
1762                return 0;
1763        }
1764
1765        return -ENOSPC;
1766}
1767
1768/**
1769 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
1770 * @rgd: The rgrp
1771 * @last_unlinked: block address of the last dinode we unlinked
1772 * @skip: block address we should explicitly not unlink
1773 *
1774 * Returns: 0 if no error
1775 *          The inode, if one has been found, in inode.
1776 */
1777
1778static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
1779{
1780        u64 block;
1781        struct gfs2_sbd *sdp = rgd->rd_sbd;
1782        struct gfs2_glock *gl;
1783        struct gfs2_inode *ip;
1784        int error;
1785        int found = 0;
1786        struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 };
1787
1788        while (1) {
1789                down_write(&sdp->sd_log_flush_lock);
1790                error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
1791                                      true, NULL);
1792                up_write(&sdp->sd_log_flush_lock);
1793                if (error == -ENOSPC)
1794                        break;
1795                if (WARN_ON_ONCE(error))
1796                        break;
1797
1798                block = gfs2_rbm_to_block(&rbm);
1799                if (gfs2_rbm_from_block(&rbm, block + 1))
1800                        break;
1801                if (*last_unlinked != NO_BLOCK && block <= *last_unlinked)
1802                        continue;
1803                if (block == skip)
1804                        continue;
1805                *last_unlinked = block;
1806
1807                error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl);
1808                if (error)
1809                        continue;
1810
1811                /* If the inode is already in cache, we can ignore it here
1812                 * because the existing inode disposal code will deal with
1813                 * it when all refs have gone away. Accessing gl_object like
1814                 * this is not safe in general. Here it is ok because we do
1815                 * not dereference the pointer, and we only need an approx
1816                 * answer to whether it is NULL or not.
1817                 */
1818                ip = gl->gl_object;
1819
1820                if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
1821                        gfs2_glock_put(gl);
1822                else
1823                        found++;
1824
1825                /* Limit reclaim to sensible number of tasks */
1826                if (found > NR_CPUS)
1827                        return;
1828        }
1829
1830        rgd->rd_flags &= ~GFS2_RDF_CHECK;
1831        return;
1832}
1833
1834/**
1835 * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
1836 * @rgd: The rgrp in question
1837 * @loops: An indication of how picky we can be (0=very, 1=less so)
1838 *
1839 * This function uses the recently added glock statistics in order to
1840 * figure out whether a parciular resource group is suffering from
1841 * contention from multiple nodes. This is done purely on the basis
1842 * of timings, since this is the only data we have to work with and
1843 * our aim here is to reject a resource group which is highly contended
1844 * but (very important) not to do this too often in order to ensure that
1845 * we do not land up introducing fragmentation by changing resource
1846 * groups when not actually required.
1847 *
1848 * The calculation is fairly simple, we want to know whether the SRTTB
1849 * (i.e. smoothed round trip time for blocking operations) to acquire
1850 * the lock for this rgrp's glock is significantly greater than the
1851 * time taken for resource groups on average. We introduce a margin in
1852 * the form of the variable @var which is computed as the sum of the two
1853 * respective variences, and multiplied by a factor depending on @loops
1854 * and whether we have a lot of data to base the decision on. This is
1855 * then tested against the square difference of the means in order to
1856 * decide whether the result is statistically significant or not.
1857 *
1858 * Returns: A boolean verdict on the congestion status
1859 */
1860
1861static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
1862{
1863        const struct gfs2_glock *gl = rgd->rd_gl;
1864        const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
1865        struct gfs2_lkstats *st;
1866        u64 r_dcount, l_dcount;
1867        u64 l_srttb, a_srttb = 0;
1868        s64 srttb_diff;
1869        u64 sqr_diff;
1870        u64 var;
1871        int cpu, nonzero = 0;
1872
1873        preempt_disable();
1874        for_each_present_cpu(cpu) {
1875                st = &per_cpu_ptr(sdp->sd_lkstats, cpu)->lkstats[LM_TYPE_RGRP];
1876                if (st->stats[GFS2_LKS_SRTTB]) {
1877                        a_srttb += st->stats[GFS2_LKS_SRTTB];
1878                        nonzero++;
1879                }
1880        }
1881        st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
1882        if (nonzero)
1883                do_div(a_srttb, nonzero);
1884        r_dcount = st->stats[GFS2_LKS_DCOUNT];
1885        var = st->stats[GFS2_LKS_SRTTVARB] +
1886              gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
1887        preempt_enable();
1888
1889        l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
1890        l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
1891
1892        if ((l_dcount < 1) || (r_dcount < 1) || (a_srttb == 0))
1893                return false;
1894
1895        srttb_diff = a_srttb - l_srttb;
1896        sqr_diff = srttb_diff * srttb_diff;
1897
1898        var *= 2;
1899        if (l_dcount < 8 || r_dcount < 8)
1900                var *= 2;
1901        if (loops == 1)
1902                var *= 2;
1903
1904        return ((srttb_diff < 0) && (sqr_diff > var));
1905}
1906
1907/**
1908 * gfs2_rgrp_used_recently
1909 * @rs: The block reservation with the rgrp to test
1910 * @msecs: The time limit in milliseconds
1911 *
1912 * Returns: True if the rgrp glock has been used within the time limit
1913 */
1914static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
1915                                    u64 msecs)
1916{
1917        u64 tdiff;
1918
1919        tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
1920                            rs->rs_rbm.rgd->rd_gl->gl_dstamp));
1921
1922        return tdiff > (msecs * 1000 * 1000);
1923}
1924
1925static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
1926{
1927        const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1928        u32 skip;
1929
1930        get_random_bytes(&skip, sizeof(skip));
1931        return skip % sdp->sd_rgrps;
1932}
1933
1934static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
1935{
1936        struct gfs2_rgrpd *rgd = *pos;
1937        struct gfs2_sbd *sdp = rgd->rd_sbd;
1938
1939        rgd = gfs2_rgrpd_get_next(rgd);
1940        if (rgd == NULL)
1941                rgd = gfs2_rgrpd_get_first(sdp);
1942        *pos = rgd;
1943        if (rgd != begin) /* If we didn't wrap */
1944                return true;
1945        return false;
1946}
1947
1948/**
1949 * fast_to_acquire - determine if a resource group will be fast to acquire
1950 *
1951 * If this is one of our preferred rgrps, it should be quicker to acquire,
1952 * because we tried to set ourselves up as dlm lock master.
1953 */
1954static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
1955{
1956        struct gfs2_glock *gl = rgd->rd_gl;
1957
1958        if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) &&
1959            !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
1960            !test_bit(GLF_DEMOTE, &gl->gl_flags))
1961                return 1;
1962        if (rgd->rd_flags & GFS2_RDF_PREFERRED)
1963                return 1;
1964        return 0;
1965}
1966
1967/**
1968 * gfs2_inplace_reserve - Reserve space in the filesystem
1969 * @ip: the inode to reserve space for
1970 * @ap: the allocation parameters
1971 *
1972 * We try our best to find an rgrp that has at least ap->target blocks
1973 * available. After a couple of passes (loops == 2), the prospects of finding
1974 * such an rgrp diminish. At this stage, we return the first rgrp that has
1975 * atleast ap->min_target blocks available. Either way, we set ap->allowed to
1976 * the number of blocks available in the chosen rgrp.
1977 *
1978 * Returns: 0 on success,
1979 *          -ENOMEM if a suitable rgrp can't be found
1980 *          errno otherwise
1981 */
1982
1983int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
1984{
1985        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1986        struct gfs2_rgrpd *begin = NULL;
1987        struct gfs2_blkreserv *rs = ip->i_res;
1988        int error = 0, rg_locked, flags = 0;
1989        u64 last_unlinked = NO_BLOCK;
1990        int loops = 0;
1991        u32 skip = 0;
1992
1993        if (sdp->sd_args.ar_rgrplvb)
1994                flags |= GL_SKIP;
1995        if (gfs2_assert_warn(sdp, ap->target))
1996                return -EINVAL;
1997        if (gfs2_rs_active(rs)) {
1998                begin = rs->rs_rbm.rgd;
1999        } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
2000                rs->rs_rbm.rgd = begin = ip->i_rgd;
2001        } else {
2002                check_and_update_goal(ip);
2003                rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
2004        }
2005        if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV))
2006                skip = gfs2_orlov_skip(ip);
2007        if (rs->rs_rbm.rgd == NULL)
2008                return -EBADSLT;
2009
2010        while (loops < 3) {
2011                rg_locked = 1;
2012
2013                if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
2014                        rg_locked = 0;
2015                        if (skip && skip--)
2016                                goto next_rgrp;
2017                        if (!gfs2_rs_active(rs)) {
2018                                if (loops == 0 &&
2019                                    !fast_to_acquire(rs->rs_rbm.rgd))
2020                                        goto next_rgrp;
2021                                if ((loops < 2) &&
2022                                    gfs2_rgrp_used_recently(rs, 1000) &&
2023                                    gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
2024                                        goto next_rgrp;
2025                        }
2026                        error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
2027                                                   LM_ST_EXCLUSIVE, flags,
2028                                                   &rs->rs_rgd_gh);
2029                        if (unlikely(error))
2030                                return error;
2031                        if (!gfs2_rs_active(rs) && (loops < 2) &&
2032                            gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
2033                                goto skip_rgrp;
2034                        if (sdp->sd_args.ar_rgrplvb) {
2035                                error = update_rgrp_lvb(rs->rs_rbm.rgd);
2036                                if (unlikely(error)) {
2037                                        gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2038                                        return error;
2039                                }
2040                        }
2041                }
2042
2043                /* Skip unuseable resource groups */
2044                if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
2045                                                 GFS2_RDF_ERROR)) ||
2046                    (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
2047                        goto skip_rgrp;
2048
2049                if (sdp->sd_args.ar_rgrplvb)
2050                        gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
2051
2052                /* Get a reservation if we don't already have one */
2053                if (!gfs2_rs_active(rs))
2054                        rg_mblk_search(rs->rs_rbm.rgd, ip, ap);
2055
2056                /* Skip rgrps when we can't get a reservation on first pass */
2057                if (!gfs2_rs_active(rs) && (loops < 1))
2058                        goto check_rgrp;
2059
2060                /* If rgrp has enough free space, use it */
2061                if (rs->rs_rbm.rgd->rd_free_clone >= ap->target ||
2062                    (loops == 2 && ap->min_target &&
2063                     rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) {
2064                        ip->i_rgd = rs->rs_rbm.rgd;
2065                        ap->allowed = ip->i_rgd->rd_free_clone;
2066                        return 0;
2067                }
2068check_rgrp:
2069                /* Check for unlinked inodes which can be reclaimed */
2070                if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
2071                        try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
2072                                        ip->i_no_addr);
2073skip_rgrp:
2074                /* Drop reservation, if we couldn't use reserved rgrp */
2075                if (gfs2_rs_active(rs))
2076                        gfs2_rs_deltree(rs);
2077
2078                /* Unlock rgrp if required */
2079                if (!rg_locked)
2080                        gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2081next_rgrp:
2082                /* Find the next rgrp, and continue looking */
2083                if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
2084                        continue;
2085                if (skip)
2086                        continue;
2087
2088                /* If we've scanned all the rgrps, but found no free blocks
2089                 * then this checks for some less likely conditions before
2090                 * trying again.
2091                 */
2092                loops++;
2093                /* Check that fs hasn't grown if writing to rindex */
2094                if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
2095                        error = gfs2_ri_update(ip);
2096                        if (error)
2097                                return error;
2098                }
2099                /* Flushing the log may release space */
2100                if (loops == 2)
2101                        gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
2102        }
2103
2104        return -ENOSPC;
2105}
2106
2107/**
2108 * gfs2_inplace_release - release an inplace reservation
2109 * @ip: the inode the reservation was taken out on
2110 *
2111 * Release a reservation made by gfs2_inplace_reserve().
2112 */
2113
2114void gfs2_inplace_release(struct gfs2_inode *ip)
2115{
2116        struct gfs2_blkreserv *rs = ip->i_res;
2117
2118        if (rs->rs_rgd_gh.gh_gl)
2119                gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2120}
2121
2122/**
2123 * gfs2_get_block_type - Check a block in a RG is of given type
2124 * @rgd: the resource group holding the block
2125 * @block: the block number
2126 *
2127 * Returns: The block type (GFS2_BLKST_*)
2128 */
2129
2130static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
2131{
2132        struct gfs2_rbm rbm = { .rgd = rgd, };
2133        int ret;
2134
2135        ret = gfs2_rbm_from_block(&rbm, block);
2136        WARN_ON_ONCE(ret != 0);
2137
2138        return gfs2_testbit(&rbm);
2139}
2140
2141
2142/**
2143 * gfs2_alloc_extent - allocate an extent from a given bitmap
2144 * @rbm: the resource group information
2145 * @dinode: TRUE if the first block we allocate is for a dinode
2146 * @n: The extent length (value/result)
2147 *
2148 * Add the bitmap buffer to the transaction.
2149 * Set the found bits to @new_state to change block's allocation state.
2150 */
2151static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
2152                             unsigned int *n)
2153{
2154        struct gfs2_rbm pos = { .rgd = rbm->rgd, };
2155        const unsigned int elen = *n;
2156        u64 block;
2157        int ret;
2158
2159        *n = 1;
2160        block = gfs2_rbm_to_block(rbm);
2161        gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh);
2162        gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2163        block++;
2164        while (*n < elen) {
2165                ret = gfs2_rbm_from_block(&pos, block);
2166                if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
2167                        break;
2168                gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh);
2169                gfs2_setbit(&pos, true, GFS2_BLKST_USED);
2170                (*n)++;
2171                block++;
2172        }
2173}
2174
2175/**
2176 * rgblk_free - Change alloc state of given block(s)
2177 * @sdp: the filesystem
2178 * @bstart: the start of a run of blocks to free
2179 * @blen: the length of the block run (all must lie within ONE RG!)
2180 * @new_state: GFS2_BLKST_XXX the after-allocation block state
2181 *
2182 * Returns:  Resource group containing the block(s)
2183 */
2184
2185static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
2186                                     u32 blen, unsigned char new_state)
2187{
2188        struct gfs2_rbm rbm;
2189        struct gfs2_bitmap *bi, *bi_prev = NULL;
2190
2191        rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
2192        if (!rbm.rgd) {
2193                if (gfs2_consist(sdp))
2194                        fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
2195                return NULL;
2196        }
2197
2198        gfs2_rbm_from_block(&rbm, bstart);
2199        while (blen--) {
2200                bi = rbm_bi(&rbm);
2201                if (bi != bi_prev) {
2202                        if (!bi->bi_clone) {
2203                                bi->bi_clone = kmalloc(bi->bi_bh->b_size,
2204                                                      GFP_NOFS | __GFP_NOFAIL);
2205                                memcpy(bi->bi_clone + bi->bi_offset,
2206                                       bi->bi_bh->b_data + bi->bi_offset,
2207                                       bi->bi_len);
2208                        }
2209                        gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh);
2210                        bi_prev = bi;
2211                }
2212                gfs2_setbit(&rbm, false, new_state);
2213                gfs2_rbm_incr(&rbm);
2214        }
2215
2216        return rbm.rgd;
2217}
2218
2219/**
2220 * gfs2_rgrp_dump - print out an rgrp
2221 * @seq: The iterator
2222 * @gl: The glock in question
2223 *
2224 */
2225
2226void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
2227{
2228        struct gfs2_rgrpd *rgd = gl->gl_object;
2229        struct gfs2_blkreserv *trs;
2230        const struct rb_node *n;
2231
2232        if (rgd == NULL)
2233                return;
2234        gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
2235                       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
2236                       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
2237                       rgd->rd_reserved, rgd->rd_extfail_pt);
2238        spin_lock(&rgd->rd_rsspin);
2239        for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
2240                trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
2241                dump_rs(seq, trs);
2242        }
2243        spin_unlock(&rgd->rd_rsspin);
2244}
2245
2246static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
2247{
2248        struct gfs2_sbd *sdp = rgd->rd_sbd;
2249        fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
2250                (unsigned long long)rgd->rd_addr);
2251        fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
2252        gfs2_rgrp_dump(NULL, rgd->rd_gl);
2253        rgd->rd_flags |= GFS2_RDF_ERROR;
2254}
2255
2256/**
2257 * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation
2258 * @ip: The inode we have just allocated blocks for
2259 * @rbm: The start of the allocated blocks
2260 * @len: The extent length
2261 *
2262 * Adjusts a reservation after an allocation has taken place. If the
2263 * reservation does not match the allocation, or if it is now empty
2264 * then it is removed.
2265 */
2266
2267static void gfs2_adjust_reservation(struct gfs2_inode *ip,
2268                                    const struct gfs2_rbm *rbm, unsigned len)
2269{
2270        struct gfs2_blkreserv *rs = ip->i_res;
2271        struct gfs2_rgrpd *rgd = rbm->rgd;
2272        unsigned rlen;
2273        u64 block;
2274        int ret;
2275
2276        spin_lock(&rgd->rd_rsspin);
2277        if (gfs2_rs_active(rs)) {
2278                if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
2279                        block = gfs2_rbm_to_block(rbm);
2280                        ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
2281                        rlen = min(rs->rs_free, len);
2282                        rs->rs_free -= rlen;
2283                        rgd->rd_reserved -= rlen;
2284                        trace_gfs2_rs(rs, TRACE_RS_CLAIM);
2285                        if (rs->rs_free && !ret)
2286                                goto out;
2287                        /* We used up our block reservation, so we should
2288                           reserve more blocks next time. */
2289                        atomic_add(RGRP_RSRV_ADDBLKS, &rs->rs_sizehint);
2290                }
2291                __rs_deltree(rs);
2292        }
2293out:
2294        spin_unlock(&rgd->rd_rsspin);
2295}
2296
2297/**
2298 * gfs2_set_alloc_start - Set starting point for block allocation
2299 * @rbm: The rbm which will be set to the required location
2300 * @ip: The gfs2 inode
2301 * @dinode: Flag to say if allocation includes a new inode
2302 *
2303 * This sets the starting point from the reservation if one is active
2304 * otherwise it falls back to guessing a start point based on the
2305 * inode's goal block or the last allocation point in the rgrp.
2306 */
2307
2308static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
2309                                 const struct gfs2_inode *ip, bool dinode)
2310{
2311        u64 goal;
2312
2313        if (gfs2_rs_active(ip->i_res)) {
2314                *rbm = ip->i_res->rs_rbm;
2315                return;
2316        }
2317
2318        if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal))
2319                goal = ip->i_goal;
2320        else
2321                goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
2322
2323        gfs2_rbm_from_block(rbm, goal);
2324}
2325
2326/**
2327 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
2328 * @ip: the inode to allocate the block for
2329 * @bn: Used to return the starting block number
2330 * @nblocks: requested number of blocks/extent length (value/result)
2331 * @dinode: 1 if we're allocating a dinode block, else 0
2332 * @generation: the generation number of the inode
2333 *
2334 * Returns: 0 or error
2335 */
2336
2337int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2338                      bool dinode, u64 *generation)
2339{
2340        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2341        struct buffer_head *dibh;
2342        struct gfs2_rbm rbm = { .rgd = ip->i_rgd, };
2343        unsigned int ndata;
2344        u64 block; /* block, within the file system scope */
2345        int error;
2346
2347        gfs2_set_alloc_start(&rbm, ip, dinode);
2348        error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false, NULL);
2349
2350        if (error == -ENOSPC) {
2351                gfs2_set_alloc_start(&rbm, ip, dinode);
2352                error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false,
2353                                      NULL);
2354        }
2355
2356        /* Since all blocks are reserved in advance, this shouldn't happen */
2357        if (error) {
2358                fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d fail_pt=%d\n",
2359                        (unsigned long long)ip->i_no_addr, error, *nblocks,
2360                        test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags),
2361                        rbm.rgd->rd_extfail_pt);
2362                goto rgrp_error;
2363        }
2364
2365        gfs2_alloc_extent(&rbm, dinode, nblocks);
2366        block = gfs2_rbm_to_block(&rbm);
2367        rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
2368        if (gfs2_rs_active(ip->i_res))
2369                gfs2_adjust_reservation(ip, &rbm, *nblocks);
2370        ndata = *nblocks;
2371        if (dinode)
2372                ndata--;
2373
2374        if (!dinode) {
2375                ip->i_goal = block + ndata - 1;
2376                error = gfs2_meta_inode_buffer(ip, &dibh);
2377                if (error == 0) {
2378                        struct gfs2_dinode *di =
2379                                (struct gfs2_dinode *)dibh->b_data;
2380                        gfs2_trans_add_meta(ip->i_gl, dibh);
2381                        di->di_goal_meta = di->di_goal_data =
2382                                cpu_to_be64(ip->i_goal);
2383                        brelse(dibh);
2384                }
2385        }
2386        if (rbm.rgd->rd_free < *nblocks) {
2387                pr_warn("nblocks=%u\n", *nblocks);
2388                goto rgrp_error;
2389        }
2390
2391        rbm.rgd->rd_free -= *nblocks;
2392        if (dinode) {
2393                rbm.rgd->rd_dinodes++;
2394                *generation = rbm.rgd->rd_igeneration++;
2395                if (*generation == 0)
2396                        *generation = rbm.rgd->rd_igeneration++;
2397        }
2398
2399        gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
2400        gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
2401        gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data);
2402
2403        gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
2404        if (dinode)
2405                gfs2_trans_add_unrevoke(sdp, block, *nblocks);
2406
2407        gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
2408
2409        rbm.rgd->rd_free_clone -= *nblocks;
2410        trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
2411                               dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2412        *bn = block;
2413        return 0;
2414
2415rgrp_error:
2416        gfs2_rgrp_error(rbm.rgd);
2417        return -EIO;
2418}
2419
2420/**
2421 * __gfs2_free_blocks - free a contiguous run of block(s)
2422 * @ip: the inode these blocks are being freed from
2423 * @bstart: first block of a run of contiguous blocks
2424 * @blen: the length of the block run
2425 * @meta: 1 if the blocks represent metadata
2426 *
2427 */
2428
2429void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
2430{
2431        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2432        struct gfs2_rgrpd *rgd;
2433
2434        rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
2435        if (!rgd)
2436                return;
2437        trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
2438        rgd->rd_free += blen;
2439        rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
2440        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2441        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2442        gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2443
2444        /* Directories keep their data in the metadata address space */
2445        if (meta || ip->i_depth)
2446                gfs2_meta_wipe(ip, bstart, blen);
2447}
2448
2449/**
2450 * gfs2_free_meta - free a contiguous run of data block(s)
2451 * @ip: the inode these blocks are being freed from
2452 * @bstart: first block of a run of contiguous blocks
2453 * @blen: the length of the block run
2454 *
2455 */
2456
2457void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
2458{
2459        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2460
2461        __gfs2_free_blocks(ip, bstart, blen, 1);
2462        gfs2_statfs_change(sdp, 0, +blen, 0);
2463        gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
2464}
2465
2466void gfs2_unlink_di(struct inode *inode)
2467{
2468        struct gfs2_inode *ip = GFS2_I(inode);
2469        struct gfs2_sbd *sdp = GFS2_SB(inode);
2470        struct gfs2_rgrpd *rgd;
2471        u64 blkno = ip->i_no_addr;
2472
2473        rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
2474        if (!rgd)
2475                return;
2476        trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
2477        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2478        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2479        gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2480        update_rgrp_lvb_unlinked(rgd, 1);
2481}
2482
2483static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
2484{
2485        struct gfs2_sbd *sdp = rgd->rd_sbd;
2486        struct gfs2_rgrpd *tmp_rgd;
2487
2488        tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
2489        if (!tmp_rgd)
2490                return;
2491        gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
2492
2493        if (!rgd->rd_dinodes)
2494                gfs2_consist_rgrpd(rgd);
2495        rgd->rd_dinodes--;
2496        rgd->rd_free++;
2497
2498        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2499        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2500        gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2501        update_rgrp_lvb_unlinked(rgd, -1);
2502
2503        gfs2_statfs_change(sdp, 0, +1, -1);
2504}
2505
2506
2507void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
2508{
2509        gfs2_free_uninit_di(rgd, ip->i_no_addr);
2510        trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
2511        gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
2512        gfs2_meta_wipe(ip, ip->i_no_addr, 1);
2513}
2514
2515/**
2516 * gfs2_check_blk_type - Check the type of a block
2517 * @sdp: The superblock
2518 * @no_addr: The block number to check
2519 * @type: The block type we are looking for
2520 *
2521 * Returns: 0 if the block type matches the expected type
2522 *          -ESTALE if it doesn't match
2523 *          or -ve errno if something went wrong while checking
2524 */
2525
2526int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
2527{
2528        struct gfs2_rgrpd *rgd;
2529        struct gfs2_holder rgd_gh;
2530        int error = -EINVAL;
2531
2532        rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
2533        if (!rgd)
2534                goto fail;
2535
2536        error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
2537        if (error)
2538                goto fail;
2539
2540        if (gfs2_get_block_type(rgd, no_addr) != type)
2541                error = -ESTALE;
2542
2543        gfs2_glock_dq_uninit(&rgd_gh);
2544fail:
2545        return error;
2546}
2547
2548/**
2549 * gfs2_rlist_add - add a RG to a list of RGs
2550 * @ip: the inode
2551 * @rlist: the list of resource groups
2552 * @block: the block
2553 *
2554 * Figure out what RG a block belongs to and add that RG to the list
2555 *
2556 * FIXME: Don't use NOFAIL
2557 *
2558 */
2559
2560void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
2561                    u64 block)
2562{
2563        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2564        struct gfs2_rgrpd *rgd;
2565        struct gfs2_rgrpd **tmp;
2566        unsigned int new_space;
2567        unsigned int x;
2568
2569        if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
2570                return;
2571
2572        if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block))
2573                rgd = ip->i_rgd;
2574        else
2575                rgd = gfs2_blk2rgrpd(sdp, block, 1);
2576        if (!rgd) {
2577                fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block);
2578                return;
2579        }
2580        ip->i_rgd = rgd;
2581
2582        for (x = 0; x < rlist->rl_rgrps; x++)
2583                if (rlist->rl_rgd[x] == rgd)
2584                        return;
2585
2586        if (rlist->rl_rgrps == rlist->rl_space) {
2587                new_space = rlist->rl_space + 10;
2588
2589                tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
2590                              GFP_NOFS | __GFP_NOFAIL);
2591
2592                if (rlist->rl_rgd) {
2593                        memcpy(tmp, rlist->rl_rgd,
2594                               rlist->rl_space * sizeof(struct gfs2_rgrpd *));
2595                        kfree(rlist->rl_rgd);
2596                }
2597
2598                rlist->rl_space = new_space;
2599                rlist->rl_rgd = tmp;
2600        }
2601
2602        rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
2603}
2604
2605/**
2606 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
2607 *      and initialize an array of glock holders for them
2608 * @rlist: the list of resource groups
2609 * @state: the lock state to acquire the RG lock in
2610 *
2611 * FIXME: Don't use NOFAIL
2612 *
2613 */
2614
2615void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
2616{
2617        unsigned int x;
2618
2619        rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder),
2620                                GFP_NOFS | __GFP_NOFAIL);
2621        for (x = 0; x < rlist->rl_rgrps; x++)
2622                gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
2623                                state, 0,
2624                                &rlist->rl_ghs[x]);
2625}
2626
2627/**
2628 * gfs2_rlist_free - free a resource group list
2629 * @rlist: the list of resource groups
2630 *
2631 */
2632
2633void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
2634{
2635        unsigned int x;
2636
2637        kfree(rlist->rl_rgd);
2638
2639        if (rlist->rl_ghs) {
2640                for (x = 0; x < rlist->rl_rgrps; x++)
2641                        gfs2_holder_uninit(&rlist->rl_ghs[x]);
2642                kfree(rlist->rl_ghs);
2643                rlist->rl_ghs = NULL;
2644        }
2645}
2646
2647