LXR linux/fs/gfs2/bmap.c

   1/*
   2 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   3 * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
   4 *
   5 * This copyrighted material is made available to anyone wishing to use,
   6 * modify, copy, or redistribute it subject to the terms and conditions
   7 * of the GNU General Public License version 2.
   8 */
   9
  10#include <linux/spinlock.h>
  11#include <linux/completion.h>
  12#include <linux/buffer_head.h>
  13#include <linux/blkdev.h>
  14#include <linux/gfs2_ondisk.h>
  15#include <linux/crc32.h>
  16#include <linux/iomap.h>
  17
  18#include "gfs2.h"
  19#include "incore.h"
  20#include "bmap.h"
  21#include "glock.h"
  22#include "inode.h"
  23#include "meta_io.h"
  24#include "quota.h"
  25#include "rgrp.h"
  26#include "log.h"
  27#include "super.h"
  28#include "trans.h"
  29#include "dir.h"
  30#include "util.h"
  31#include "trace_gfs2.h"
  32
  33/* This doesn't need to be that large as max 64 bit pointers in a 4k
  34 * block is 512, so __u16 is fine for that. It saves stack space to
  35 * keep it small.
  36 */
  37struct metapath {
  38        struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
  39        __u16 mp_list[GFS2_MAX_META_HEIGHT];
  40        int mp_fheight; /* find_metapath height */
  41        int mp_aheight; /* actual height (lookup height) */
  42};
  43
  44/**
  45 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
  46 * @ip: the inode
  47 * @dibh: the dinode buffer
  48 * @block: the block number that was allocated
  49 * @page: The (optional) page. This is looked up if @page is NULL
  50 *
  51 * Returns: errno
  52 */
  53
  54static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
  55                               u64 block, struct page *page)
  56{
  57        struct inode *inode = &ip->i_inode;
  58        struct buffer_head *bh;
  59        int release = 0;
  60
  61        if (!page || page->index) {
  62                page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
  63                if (!page)
  64                        return -ENOMEM;
  65                release = 1;
  66        }
  67
  68        if (!PageUptodate(page)) {
  69                void *kaddr = kmap(page);
  70                u64 dsize = i_size_read(inode);
  71 
  72                if (dsize > gfs2_max_stuffed_size(ip))
  73                        dsize = gfs2_max_stuffed_size(ip);
  74
  75                memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
  76                memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
  77                kunmap(page);
  78
  79                SetPageUptodate(page);
  80        }
  81
  82        if (!page_has_buffers(page))
  83                create_empty_buffers(page, BIT(inode->i_blkbits),
  84                                     BIT(BH_Uptodate));
  85
  86        bh = page_buffers(page);
  87
  88        if (!buffer_mapped(bh))
  89                map_bh(bh, inode->i_sb, block);
  90
  91        set_buffer_uptodate(bh);
  92        if (!gfs2_is_jdata(ip))
  93                mark_buffer_dirty(bh);
  94        if (!gfs2_is_writeback(ip))
  95                gfs2_trans_add_data(ip->i_gl, bh);
  96
  97        if (release) {
  98                unlock_page(page);
  99                put_page(page);
 100        }
 101
 102        return 0;
 103}
 104
 105/**
 106 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
 107 * @ip: The GFS2 inode to unstuff
 108 * @page: The (optional) page. This is looked up if the @page is NULL
 109 *
 110 * This routine unstuffs a dinode and returns it to a "normal" state such
 111 * that the height can be grown in the traditional way.
 112 *
 113 * Returns: errno
 114 */
 115
 116int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
 117{
 118        struct buffer_head *bh, *dibh;
 119        struct gfs2_dinode *di;
 120        u64 block = 0;
 121        int isdir = gfs2_is_dir(ip);
 122        int error;
 123
 124        down_write(&ip->i_rw_mutex);
 125
 126        error = gfs2_meta_inode_buffer(ip, &dibh);
 127        if (error)
 128                goto out;
 129
 130        if (i_size_read(&ip->i_inode)) {
 131                /* Get a free block, fill it with the stuffed data,
 132                   and write it out to disk */
 133
 134                unsigned int n = 1;
 135                error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
 136                if (error)
 137                        goto out_brelse;
 138                if (isdir) {
 139                        gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1);
 140                        error = gfs2_dir_get_new_buffer(ip, block, &bh);
 141                        if (error)
 142                                goto out_brelse;
 143                        gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
 144                                              dibh, sizeof(struct gfs2_dinode));
 145                        brelse(bh);
 146                } else {
 147                        error = gfs2_unstuffer_page(ip, dibh, block, page);
 148                        if (error)
 149                                goto out_brelse;
 150                }
 151        }
 152
 153        /*  Set up the pointer to the new block  */
 154
 155        gfs2_trans_add_meta(ip->i_gl, dibh);
 156        di = (struct gfs2_dinode *)dibh->b_data;
 157        gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
 158
 159        if (i_size_read(&ip->i_inode)) {
 160                *(__be64 *)(di + 1) = cpu_to_be64(block);
 161                gfs2_add_inode_blocks(&ip->i_inode, 1);
 162                di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
 163        }
 164
 165        ip->i_height = 1;
 166        di->di_height = cpu_to_be16(1);
 167
 168out_brelse:
 169        brelse(dibh);
 170out:
 171        up_write(&ip->i_rw_mutex);
 172        return error;
 173}
 174
 175
 176/**
 177 * find_metapath - Find path through the metadata tree
 178 * @sdp: The superblock
 179 * @mp: The metapath to return the result in
 180 * @block: The disk block to look up
 181 * @height: The pre-calculated height of the metadata tree
 182 *
 183 *   This routine returns a struct metapath structure that defines a path
 184 *   through the metadata of inode "ip" to get to block "block".
 185 *
 186 *   Example:
 187 *   Given:  "ip" is a height 3 file, "offset" is 101342453, and this is a
 188 *   filesystem with a blocksize of 4096.
 189 *
 190 *   find_metapath() would return a struct metapath structure set to:
 191 *   mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48,
 192 *   and mp_list[2] = 165.
 193 *
 194 *   That means that in order to get to the block containing the byte at
 195 *   offset 101342453, we would load the indirect block pointed to by pointer
 196 *   0 in the dinode.  We would then load the indirect block pointed to by
 197 *   pointer 48 in that indirect block.  We would then load the data block
 198 *   pointed to by pointer 165 in that indirect block.
 199 *
 200 *             ----------------------------------------
 201 *             | Dinode |                             |
 202 *             |        |                            4|
 203 *             |        |0 1 2 3 4 5                 9|
 204 *             |        |                            6|
 205 *             ----------------------------------------
 206 *                       |
 207 *                       |
 208 *                       V
 209 *             ----------------------------------------
 210 *             | Indirect Block                       |
 211 *             |                                     5|
 212 *             |            4 4 4 4 4 5 5            1|
 213 *             |0           5 6 7 8 9 0 1            2|
 214 *             ----------------------------------------
 215 *                                |
 216 *                                |
 217 *                                V
 218 *             ----------------------------------------
 219 *             | Indirect Block                       |
 220 *             |                         1 1 1 1 1   5|
 221 *             |                         6 6 6 6 6   1|
 222 *             |0                        3 4 5 6 7   2|
 223 *             ----------------------------------------
 224 *                                           |
 225 *                                           |
 226 *                                           V
 227 *             ----------------------------------------
 228 *             | Data block containing offset         |
 229 *             |            101342453                 |
 230 *             |                                      |
 231 *             |                                      |
 232 *             ----------------------------------------
 233 *
 234 */
 235
 236static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
 237                          struct metapath *mp, unsigned int height)
 238{
 239        unsigned int i;
 240
 241        mp->mp_fheight = height;
 242        for (i = height; i--;)
 243                mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
 244}
 245
 246static inline unsigned int metapath_branch_start(const struct metapath *mp)
 247{
 248        if (mp->mp_list[0] == 0)
 249                return 2;
 250        return 1;
 251}
 252
 253/**
 254 * metaptr1 - Return the first possible metadata pointer in a metapath buffer
 255 * @height: The metadata height (0 = dinode)
 256 * @mp: The metapath
 257 */
 258static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
 259{
 260        struct buffer_head *bh = mp->mp_bh[height];
 261        if (height == 0)
 262                return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
 263        return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
 264}
 265
 266/**
 267 * metapointer - Return pointer to start of metadata in a buffer
 268 * @height: The metadata height (0 = dinode)
 269 * @mp: The metapath
 270 *
 271 * Return a pointer to the block number of the next height of the metadata
 272 * tree given a buffer containing the pointer to the current height of the
 273 * metadata tree.
 274 */
 275
 276static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
 277{
 278        __be64 *p = metaptr1(height, mp);
 279        return p + mp->mp_list[height];
 280}
 281
 282static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
 283{
 284        const __be64 *t;
 285
 286        for (t = start; t < end; t++) {
 287                struct buffer_head *rabh;
 288
 289                if (!*t)
 290                        continue;
 291
 292                rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
 293                if (trylock_buffer(rabh)) {
 294                        if (!buffer_uptodate(rabh)) {
 295                                rabh->b_end_io = end_buffer_read_sync;
 296                                submit_bh(REQ_OP_READ,
 297                                          REQ_RAHEAD | REQ_META | REQ_PRIO,
 298                                          rabh);
 299                                continue;
 300                        }
 301                        unlock_buffer(rabh);
 302                }
 303                brelse(rabh);
 304        }
 305}
 306
 307static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
 308                             unsigned int x, unsigned int h)
 309{
 310        for (; x < h; x++) {
 311                __be64 *ptr = metapointer(x, mp);
 312                u64 dblock = be64_to_cpu(*ptr);
 313                int ret;
 314
 315                if (!dblock)
 316                        break;
 317                ret = gfs2_meta_indirect_buffer(ip, x + 1, dblock, &mp->mp_bh[x + 1]);
 318                if (ret)
 319                        return ret;
 320        }
 321        mp->mp_aheight = x + 1;
 322        return 0;
 323}
 324
 325/**
 326 * lookup_metapath - Walk the metadata tree to a specific point
 327 * @ip: The inode
 328 * @mp: The metapath
 329 *
 330 * Assumes that the inode's buffer has already been looked up and
 331 * hooked onto mp->mp_bh[0] and that the metapath has been initialised
 332 * by find_metapath().
 333 *
 334 * If this function encounters part of the tree which has not been
 335 * allocated, it returns the current height of the tree at the point
 336 * at which it found the unallocated block. Blocks which are found are
 337 * added to the mp->mp_bh[] list.
 338 *
 339 * Returns: error
 340 */
 341
 342static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
 343{
 344        return __fillup_metapath(ip, mp, 0, ip->i_height - 1);
 345}
 346
 347/**
 348 * fillup_metapath - fill up buffers for the metadata path to a specific height
 349 * @ip: The inode
 350 * @mp: The metapath
 351 * @h: The height to which it should be mapped
 352 *
 353 * Similar to lookup_metapath, but does lookups for a range of heights
 354 *
 355 * Returns: error or the number of buffers filled
 356 */
 357
 358static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
 359{
 360        unsigned int x = 0;
 361        int ret;
 362
 363        if (h) {
 364                /* find the first buffer we need to look up. */
 365                for (x = h - 1; x > 0; x--) {
 366                        if (mp->mp_bh[x])
 367                                break;
 368                }
 369        }
 370        ret = __fillup_metapath(ip, mp, x, h);
 371        if (ret)
 372                return ret;
 373        return mp->mp_aheight - x - 1;
 374}
 375
 376static inline void release_metapath(struct metapath *mp)
 377{
 378        int i;
 379
 380        for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
 381                if (mp->mp_bh[i] == NULL)
 382                        break;
 383                brelse(mp->mp_bh[i]);
 384        }
 385}
 386
 387/**
 388 * gfs2_extent_length - Returns length of an extent of blocks
 389 * @start: Start of the buffer
 390 * @len: Length of the buffer in bytes
 391 * @ptr: Current position in the buffer
 392 * @limit: Max extent length to return (0 = unlimited)
 393 * @eob: Set to 1 if we hit "end of block"
 394 *
 395 * If the first block is zero (unallocated) it will return the number of
 396 * unallocated blocks in the extent, otherwise it will return the number
 397 * of contiguous blocks in the extent.
 398 *
 399 * Returns: The length of the extent (minimum of one block)
 400 */
 401
 402static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, size_t limit, int *eob)
 403{
 404        const __be64 *end = (start + len);
 405        const __be64 *first = ptr;
 406        u64 d = be64_to_cpu(*ptr);
 407
 408        *eob = 0;
 409        do {
 410                ptr++;
 411                if (ptr >= end)
 412                        break;
 413                if (limit && --limit == 0)
 414                        break;
 415                if (d)
 416                        d++;
 417        } while(be64_to_cpu(*ptr) == d);
 418        if (ptr >= end)
 419                *eob = 1;
 420        return (ptr - first);
 421}
 422
 423static inline void bmap_lock(struct gfs2_inode *ip, int create)
 424{
 425        if (create)
 426                down_write(&ip->i_rw_mutex);
 427        else
 428                down_read(&ip->i_rw_mutex);
 429}
 430
 431static inline void bmap_unlock(struct gfs2_inode *ip, int create)
 432{
 433        if (create)
 434                up_write(&ip->i_rw_mutex);
 435        else
 436                up_read(&ip->i_rw_mutex);
 437}
 438
 439static inline __be64 *gfs2_indirect_init(struct metapath *mp,
 440                                         struct gfs2_glock *gl, unsigned int i,
 441                                         unsigned offset, u64 bn)
 442{
 443        __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
 444                       ((i > 1) ? sizeof(struct gfs2_meta_header) :
 445                                 sizeof(struct gfs2_dinode)));
 446        BUG_ON(i < 1);
 447        BUG_ON(mp->mp_bh[i] != NULL);
 448        mp->mp_bh[i] = gfs2_meta_new(gl, bn);
 449        gfs2_trans_add_meta(gl, mp->mp_bh[i]);
 450        gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
 451        gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
 452        ptr += offset;
 453        *ptr = cpu_to_be64(bn);
 454        return ptr;
 455}
 456
 457enum alloc_state {
 458        ALLOC_DATA = 0,
 459        ALLOC_GROW_DEPTH = 1,
 460        ALLOC_GROW_HEIGHT = 2,
 461        /* ALLOC_UNSTUFF = 3,   TBD and rather complicated */
 462};
 463
 464/**
 465 * gfs2_bmap_alloc - Build a metadata tree of the requested height
 466 * @inode: The GFS2 inode
 467 * @lblock: The logical starting block of the extent
 468 * @bh_map: This is used to return the mapping details
 469 * @zero_new: True if newly allocated blocks should be zeroed
 470 * @mp: The metapath, with proper height information calculated
 471 * @maxlen: The max number of data blocks to alloc
 472 * @dblock: Pointer to return the resulting new block
 473 * @dblks: Pointer to return the number of blocks allocated
 474 *
 475 * In this routine we may have to alloc:
 476 *   i) Indirect blocks to grow the metadata tree height
 477 *  ii) Indirect blocks to fill in lower part of the metadata tree
 478 * iii) Data blocks
 479 *
 480 * The function is in two parts. The first part works out the total
 481 * number of blocks which we need. The second part does the actual
 482 * allocation asking for an extent at a time (if enough contiguous free
 483 * blocks are available, there will only be one request per bmap call)
 484 * and uses the state machine to initialise the blocks in order.
 485 *
 486 * Returns: errno on error
 487 */
 488
 489static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
 490                            unsigned flags, struct metapath *mp)
 491{
 492        struct gfs2_inode *ip = GFS2_I(inode);
 493        struct gfs2_sbd *sdp = GFS2_SB(inode);
 494        struct super_block *sb = sdp->sd_vfs;
 495        struct buffer_head *dibh = mp->mp_bh[0];
 496        u64 bn;
 497        unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
 498        unsigned dblks = 0;
 499        unsigned ptrs_per_blk;
 500        const unsigned end_of_metadata = mp->mp_fheight - 1;
 501        int ret;
 502        enum alloc_state state;
 503        __be64 *ptr;
 504        __be64 zero_bn = 0;
 505        size_t maxlen = iomap->length >> inode->i_blkbits;
 506
 507        BUG_ON(mp->mp_aheight < 1);
 508        BUG_ON(dibh == NULL);
 509
 510        gfs2_trans_add_meta(ip->i_gl, dibh);
 511
 512        if (mp->mp_fheight == mp->mp_aheight) {
 513                struct buffer_head *bh;
 514                int eob;
 515
 516                /* Bottom indirect block exists, find unalloced extent size */
 517                ptr = metapointer(end_of_metadata, mp);
 518                bh = mp->mp_bh[end_of_metadata];
 519                dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr,
 520                                           maxlen, &eob);
 521                BUG_ON(dblks < 1);
 522                state = ALLOC_DATA;
 523        } else {
 524                /* Need to allocate indirect blocks */
 525                ptrs_per_blk = mp->mp_fheight > 1 ? sdp->sd_inptrs :
 526                        sdp->sd_diptrs;
 527                dblks = min(maxlen, (size_t)(ptrs_per_blk -
 528                                             mp->mp_list[end_of_metadata]));
 529                if (mp->mp_fheight == ip->i_height) {
 530                        /* Writing into existing tree, extend tree down */
 531                        iblks = mp->mp_fheight - mp->mp_aheight;
 532                        state = ALLOC_GROW_DEPTH;
 533                } else {
 534                        /* Building up tree height */
 535                        state = ALLOC_GROW_HEIGHT;
 536                        iblks = mp->mp_fheight - ip->i_height;
 537                        branch_start = metapath_branch_start(mp);
 538                        iblks += (mp->mp_fheight - branch_start);
 539                }
 540        }
 541
 542        /* start of the second part of the function (state machine) */
 543
 544        blks = dblks + iblks;
 545        i = mp->mp_aheight;
 546        do {
 547                int error;
 548                n = blks - alloced;
 549                error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
 550                if (error)
 551                        return error;
 552                alloced += n;
 553                if (state != ALLOC_DATA || gfs2_is_jdata(ip))
 554                        gfs2_trans_add_unrevoke(sdp, bn, n);
 555                switch (state) {
 556                /* Growing height of tree */
 557                case ALLOC_GROW_HEIGHT:
 558                        if (i == 1) {
 559                                ptr = (__be64 *)(dibh->b_data +
 560                                                 sizeof(struct gfs2_dinode));
 561                                zero_bn = *ptr;
 562                        }
 563                        for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0;
 564                             i++, n--)
 565                                gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
 566                        if (i - 1 == mp->mp_fheight - ip->i_height) {
 567                                i--;
 568                                gfs2_buffer_copy_tail(mp->mp_bh[i],
 569                                                sizeof(struct gfs2_meta_header),
 570                                                dibh, sizeof(struct gfs2_dinode));
 571                                gfs2_buffer_clear_tail(dibh,
 572                                                sizeof(struct gfs2_dinode) +
 573                                                sizeof(__be64));
 574                                ptr = (__be64 *)(mp->mp_bh[i]->b_data +
 575                                        sizeof(struct gfs2_meta_header));
 576                                *ptr = zero_bn;
 577                                state = ALLOC_GROW_DEPTH;
 578                                for(i = branch_start; i < mp->mp_fheight; i++) {
 579                                        if (mp->mp_bh[i] == NULL)
 580                                                break;
 581                                        brelse(mp->mp_bh[i]);
 582                                        mp->mp_bh[i] = NULL;
 583                                }
 584                                i = branch_start;
 585                        }
 586                        if (n == 0)
 587                                break;
 588                /* Branching from existing tree */
 589                case ALLOC_GROW_DEPTH:
 590                        if (i > 1 && i < mp->mp_fheight)
 591                                gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
 592                        for (; i < mp->mp_fheight && n > 0; i++, n--)
 593                                gfs2_indirect_init(mp, ip->i_gl, i,
 594                                                   mp->mp_list[i-1], bn++);
 595                        if (i == mp->mp_fheight)
 596                                state = ALLOC_DATA;
 597                        if (n == 0)
 598                                break;
 599                /* Tree complete, adding data blocks */
 600                case ALLOC_DATA:
 601                        BUG_ON(n > dblks);
 602                        BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
 603                        gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
 604                        dblks = n;
 605                        ptr = metapointer(end_of_metadata, mp);
 606                        iomap->addr = bn << inode->i_blkbits;
 607                        iomap->flags |= IOMAP_F_NEW;
 608                        while (n-- > 0)
 609                                *ptr++ = cpu_to_be64(bn++);
 610                        if (flags & IOMAP_ZERO) {
 611                                ret = sb_issue_zeroout(sb, iomap->addr >> inode->i_blkbits,
 612                                                       dblks, GFP_NOFS);
 613                                if (ret) {
 614                                        fs_err(sdp,
 615                                               "Failed to zero data buffers\n");
 616                                        flags &= ~IOMAP_ZERO;
 617                                }
 618                        }
 619                        break;
 620                }
 621        } while (iomap->addr == IOMAP_NULL_ADDR);
 622
 623        iomap->length = (u64)dblks << inode->i_blkbits;
 624        ip->i_height = mp->mp_fheight;
 625        gfs2_add_inode_blocks(&ip->i_inode, alloced);
 626        gfs2_dinode_out(ip, mp->mp_bh[0]->b_data);
 627        return 0;
 628}
 629
 630/**
 631 * hole_size - figure out the size of a hole
 632 * @inode: The inode
 633 * @lblock: The logical starting block number
 634 * @mp: The metapath
 635 *
 636 * Returns: The hole size in bytes
 637 *
 638 */
 639static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp)
 640{
 641        struct gfs2_inode *ip = GFS2_I(inode);
 642        struct gfs2_sbd *sdp = GFS2_SB(inode);
 643        struct metapath mp_eof;
 644        u64 factor = 1;
 645        int hgt;
 646        u64 holesz = 0;
 647        const __be64 *first, *end, *ptr;
 648        const struct buffer_head *bh;
 649        u64 lblock_stop = (i_size_read(inode) - 1) >> inode->i_blkbits;
 650        int zeroptrs;
 651        bool done = false;
 652
 653        /* Get another metapath, to the very last byte */
 654        find_metapath(sdp, lblock_stop, &mp_eof, ip->i_height);
 655        for (hgt = ip->i_height - 1; hgt >= 0 && !done; hgt--) {
 656                bh = mp->mp_bh[hgt];
 657                if (bh) {
 658                        zeroptrs = 0;
 659                        first = metapointer(hgt, mp);
 660                        end = (const __be64 *)(bh->b_data + bh->b_size);
 661
 662                        for (ptr = first; ptr < end; ptr++) {
 663                                if (*ptr) {
 664                                        done = true;
 665                                        break;
 666                                } else {
 667                                        zeroptrs++;
 668                                }
 669                        }
 670                } else {
 671                        zeroptrs = sdp->sd_inptrs;
 672                }
 673                if (factor * zeroptrs >= lblock_stop - lblock + 1) {
 674                        holesz = lblock_stop - lblock + 1;
 675                        break;
 676                }
 677                holesz += factor * zeroptrs;
 678
 679                factor *= sdp->sd_inptrs;
 680                if (hgt && (mp->mp_list[hgt - 1] < mp_eof.mp_list[hgt - 1]))
 681                        (mp->mp_list[hgt - 1])++;
 682        }
 683        return holesz << inode->i_blkbits;
 684}
 685
 686static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap)
 687{
 688        struct gfs2_inode *ip = GFS2_I(inode);
 689
 690        iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
 691                      sizeof(struct gfs2_dinode);
 692        iomap->offset = 0;
 693        iomap->length = i_size_read(inode);
 694        iomap->type = IOMAP_MAPPED;
 695        iomap->flags = IOMAP_F_DATA_INLINE;
 696}
 697
 698/**
 699 * gfs2_iomap_begin - Map blocks from an inode to disk blocks
 700 * @inode: The inode
 701 * @pos: Starting position in bytes
 702 * @length: Length to map, in bytes
 703 * @flags: iomap flags
 704 * @iomap: The iomap structure
 705 *
 706 * Returns: errno
 707 */
 708int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 709                     unsigned flags, struct iomap *iomap)
 710{
 711        struct gfs2_inode *ip = GFS2_I(inode);
 712        struct gfs2_sbd *sdp = GFS2_SB(inode);
 713        struct metapath mp = { .mp_aheight = 1, };
 714        unsigned int factor = sdp->sd_sb.sb_bsize;
 715        const u64 *arr = sdp->sd_heightsize;
 716        __be64 *ptr;
 717        sector_t lblock;
 718        sector_t lend;
 719        int ret = 0;
 720        int eob;
 721        unsigned int len;
 722        struct buffer_head *bh;
 723        u8 height;
 724
 725        trace_gfs2_iomap_start(ip, pos, length, flags);
 726        if (!length) {
 727                ret = -EINVAL;
 728                goto out;
 729        }
 730
 731        if (gfs2_is_stuffed(ip)) {
 732                if (flags & IOMAP_REPORT) {
 733                        gfs2_stuffed_iomap(inode, iomap);
 734                        if (pos >= iomap->length)
 735                                ret = -ENOENT;
 736                        goto out;
 737                }
 738                BUG_ON(!(flags & IOMAP_WRITE));
 739        }
 740
 741        lblock = pos >> inode->i_blkbits;
 742        lend = (pos + length + sdp->sd_sb.sb_bsize - 1) >> inode->i_blkbits;
 743
 744        iomap->offset = lblock << inode->i_blkbits;
 745        iomap->addr = IOMAP_NULL_ADDR;
 746        iomap->type = IOMAP_HOLE;
 747        iomap->length = (u64)(lend - lblock) << inode->i_blkbits;
 748        iomap->flags = IOMAP_F_MERGED;
 749        bmap_lock(ip, flags & IOMAP_WRITE);
 750
 751        /*
 752         * Directory data blocks have a struct gfs2_meta_header header, so the
 753         * remaining size is smaller than the filesystem block size.  Logical
 754         * block numbers for directories are in units of this remaining size!
 755         */
 756        if (gfs2_is_dir(ip)) {
 757                factor = sdp->sd_jbsize;
 758                arr = sdp->sd_jheightsize;
 759        }
 760
 761        ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]);
 762        if (ret)
 763                goto out_release;
 764
 765        height = ip->i_height;
 766        while ((lblock + 1) * factor > arr[height])
 767                height++;
 768        find_metapath(sdp, lblock, &mp, height);
 769        if (height > ip->i_height || gfs2_is_stuffed(ip))
 770                goto do_alloc;
 771
 772        ret = lookup_metapath(ip, &mp);
 773        if (ret)
 774                goto out_release;
 775
 776        if (mp.mp_aheight != ip->i_height)
 777                goto do_alloc;
 778
 779        ptr = metapointer(ip->i_height - 1, &mp);
 780        if (*ptr == 0)
 781                goto do_alloc;
 782
 783        iomap->type = IOMAP_MAPPED;
 784        iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
 785
 786        bh = mp.mp_bh[ip->i_height - 1];
 787        len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, lend - lblock, &eob);
 788        if (eob)
 789                iomap->flags |= IOMAP_F_BOUNDARY;
 790        iomap->length = (u64)len << inode->i_blkbits;
 791
 792out_release:
 793        release_metapath(&mp);
 794        bmap_unlock(ip, flags & IOMAP_WRITE);
 795out:
 796        trace_gfs2_iomap_end(ip, iomap, ret);
 797        return ret;
 798
 799do_alloc:
 800        if (flags & IOMAP_WRITE) {
 801                ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
 802        } else if (flags & IOMAP_REPORT) {
 803                loff_t size = i_size_read(inode);
 804                if (pos >= size)
 805                        ret = -ENOENT;
 806                else if (height <= ip->i_height)
 807                        iomap->length = hole_size(inode, lblock, &mp);
 808                else
 809                        iomap->length = size - pos;
 810        }
 811        goto out_release;
 812}
 813
 814/**
 815 * gfs2_block_map - Map a block from an inode to a disk block
 816 * @inode: The inode
 817 * @lblock: The logical block number
 818 * @bh_map: The bh to be mapped
 819 * @create: True if its ok to alloc blocks to satify the request
 820 *
 821 * Sets buffer_mapped() if successful, sets buffer_boundary() if a
 822 * read of metadata will be required before the next block can be
 823 * mapped. Sets buffer_new() if new blocks were allocated.
 824 *
 825 * Returns: errno
 826 */
 827
 828int gfs2_block_map(struct inode *inode, sector_t lblock,
 829                   struct buffer_head *bh_map, int create)
 830{
 831        struct gfs2_inode *ip = GFS2_I(inode);
 832        struct iomap iomap;
 833        int ret, flags = 0;
 834
 835        clear_buffer_mapped(bh_map);
 836        clear_buffer_new(bh_map);
 837        clear_buffer_boundary(bh_map);
 838        trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
 839
 840        if (create)
 841                flags |= IOMAP_WRITE;
 842        if (buffer_zeronew(bh_map))
 843                flags |= IOMAP_ZERO;
 844        ret = gfs2_iomap_begin(inode, (loff_t)lblock << inode->i_blkbits,
 845                               bh_map->b_size, flags, &iomap);
 846        if (ret) {
 847                if (!create && ret == -ENOENT) {
 848                        /* Return unmapped buffer beyond the end of file.  */
 849                        ret = 0;
 850                }
 851                goto out;
 852        }
 853
 854        if (iomap.length > bh_map->b_size) {
 855                iomap.length = bh_map->b_size;
 856                iomap.flags &= ~IOMAP_F_BOUNDARY;
 857        }
 858        if (iomap.addr != IOMAP_NULL_ADDR)
 859                map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
 860        bh_map->b_size = iomap.length;
 861        if (iomap.flags & IOMAP_F_BOUNDARY)
 862                set_buffer_boundary(bh_map);
 863        if (iomap.flags & IOMAP_F_NEW)
 864                set_buffer_new(bh_map);
 865
 866out:
 867        trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
 868        return ret;
 869}
 870
 871/*
 872 * Deprecated: do not use in new code
 873 */
 874int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
 875{
 876        struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
 877        int ret;
 878        int create = *new;
 879
 880        BUG_ON(!extlen);
 881        BUG_ON(!dblock);
 882        BUG_ON(!new);
 883
 884        bh.b_size = BIT(inode->i_blkbits + (create ? 0 : 5));
 885        ret = gfs2_block_map(inode, lblock, &bh, create);
 886        *extlen = bh.b_size >> inode->i_blkbits;
 887        *dblock = bh.b_blocknr;
 888        if (buffer_new(&bh))
 889                *new = 1;
 890        else
 891                *new = 0;
 892        return ret;
 893}
 894
 895/**
 896 * gfs2_block_zero_range - Deal with zeroing out data
 897 *
 898 * This is partly borrowed from ext3.
 899 */
 900static int gfs2_block_zero_range(struct inode *inode, loff_t from,
 901                                 unsigned int length)
 902{
 903        struct address_space *mapping = inode->i_mapping;
 904        struct gfs2_inode *ip = GFS2_I(inode);
 905        unsigned long index = from >> PAGE_SHIFT;
 906        unsigned offset = from & (PAGE_SIZE-1);
 907        unsigned blocksize, iblock, pos;
 908        struct buffer_head *bh;
 909        struct page *page;
 910        int err;
 911
 912        page = find_or_create_page(mapping, index, GFP_NOFS);
 913        if (!page)
 914                return 0;
 915
 916        blocksize = inode->i_sb->s_blocksize;
 917        iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
 918
 919        if (!page_has_buffers(page))
 920                create_empty_buffers(page, blocksize, 0);
 921
 922        /* Find the buffer that contains "offset" */
 923        bh = page_buffers(page);
 924        pos = blocksize;
 925        while (offset >= pos) {
 926                bh = bh->b_this_page;
 927                iblock++;
 928                pos += blocksize;
 929        }
 930
 931        err = 0;
 932
 933        if (!buffer_mapped(bh)) {
 934                gfs2_block_map(inode, iblock, bh, 0);
 935                /* unmapped? It's a hole - nothing to do */
 936                if (!buffer_mapped(bh))
 937                        goto unlock;
 938        }
 939
 940        /* Ok, it's mapped. Make sure it's up-to-date */
 941        if (PageUptodate(page))
 942                set_buffer_uptodate(bh);
 943
 944        if (!buffer_uptodate(bh)) {
 945                err = -EIO;
 946                ll_rw_block(REQ_OP_READ, 0, 1, &bh);
 947                wait_on_buffer(bh);
 948                /* Uhhuh. Read error. Complain and punt. */
 949                if (!buffer_uptodate(bh))
 950                        goto unlock;
 951                err = 0;
 952        }
 953
 954        if (!gfs2_is_writeback(ip))
 955                gfs2_trans_add_data(ip->i_gl, bh);
 956
 957        zero_user(page, offset, length);
 958        mark_buffer_dirty(bh);
 959unlock:
 960        unlock_page(page);
 961        put_page(page);
 962        return err;
 963}
 964
 965#define GFS2_JTRUNC_REVOKES 8192
 966
 967/**
 968 * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
 969 * @inode: The inode being truncated
 970 * @oldsize: The original (larger) size
 971 * @newsize: The new smaller size
 972 *
 973 * With jdata files, we have to journal a revoke for each block which is
 974 * truncated. As a result, we need to split this into separate transactions
 975 * if the number of pages being truncated gets too large.
 976 */
 977
 978static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
 979{
 980        struct gfs2_sbd *sdp = GFS2_SB(inode);
 981        u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
 982        u64 chunk;
 983        int error;
 984
 985        while (oldsize != newsize) {
 986                struct gfs2_trans *tr;
 987                unsigned int offs;
 988
 989                chunk = oldsize - newsize;
 990                if (chunk > max_chunk)
 991                        chunk = max_chunk;
 992
 993                offs = oldsize & ~PAGE_MASK;
 994                if (offs && chunk > PAGE_SIZE)
 995                        chunk = offs + ((chunk - offs) & PAGE_MASK);
 996
 997                truncate_pagecache(inode, oldsize - chunk);
 998                oldsize -= chunk;
 999
1000                tr = current->journal_info;

1001                if (!test_bit(TR_TOUCHED, &tr->tr_flags))
1002                        continue;
1003
1004                gfs2_trans_end(sdp);
1005                error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1006                if (error)
1007                        return error;
1008        }
1009
1010        return 0;
1011}
1012
1013static int trunc_start(struct inode *inode, u64 newsize)
1014{
1015        struct gfs2_inode *ip = GFS2_I(inode);
1016        struct gfs2_sbd *sdp = GFS2_SB(inode);
1017        struct buffer_head *dibh = NULL;
1018        int journaled = gfs2_is_jdata(ip);
1019        u64 oldsize = inode->i_size;
1020        int error;
1021
1022        if (journaled)
1023                error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1024        else
1025                error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1026        if (error)
1027                return error;
1028
1029        error = gfs2_meta_inode_buffer(ip, &dibh);
1030        if (error)
1031                goto out;
1032
1033        gfs2_trans_add_meta(ip->i_gl, dibh);
1034
1035        if (gfs2_is_stuffed(ip)) {
1036                gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
1037        } else {
1038                unsigned int blocksize = i_blocksize(inode);
1039                unsigned int offs = newsize & (blocksize - 1);
1040                if (offs) {
1041                        error = gfs2_block_zero_range(inode, newsize,
1042                                                      blocksize - offs);
1043                        if (error)
1044                                goto out;
1045                }
1046                ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
1047        }
1048
1049        i_size_write(inode, newsize);
1050        ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1051        gfs2_dinode_out(ip, dibh->b_data);
1052
1053        if (journaled)
1054                error = gfs2_journaled_truncate(inode, oldsize, newsize);
1055        else
1056                truncate_pagecache(inode, newsize);
1057
1058out:
1059        brelse(dibh);
1060        if (current->journal_info)
1061                gfs2_trans_end(sdp);
1062        return error;
1063}
1064
1065/**
1066 * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1067 * @ip: inode
1068 * @rg_gh: holder of resource group glock
1069 * @bh: buffer head to sweep
1070 * @start: starting point in bh
1071 * @end: end point in bh
1072 * @meta: true if bh points to metadata (rather than data)
1073 * @btotal: place to keep count of total blocks freed
1074 *
1075 * We sweep a metadata buffer (provided by the metapath) for blocks we need to
1076 * free, and free them all. However, we do it one rgrp at a time. If this
1077 * block has references to multiple rgrps, we break it into individual
1078 * transactions. This allows other processes to use the rgrps while we're
1079 * focused on a single one, for better concurrency / performance.
1080 * At every transaction boundary, we rewrite the inode into the journal.
1081 * That way the bitmaps are kept consistent with the inode and we can recover
1082 * if we're interrupted by power-outages.
1083 *
1084 * Returns: 0, or return code if an error occurred.
1085 *          *btotal has the total number of blocks freed
1086 */
1087static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
1088                              struct buffer_head *bh, __be64 *start, __be64 *end,
1089                              bool meta, u32 *btotal)
1090{
1091        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1092        struct gfs2_rgrpd *rgd;
1093        struct gfs2_trans *tr;
1094        __be64 *p;
1095        int blks_outside_rgrp;
1096        u64 bn, bstart, isize_blks;
1097        s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
1098        int ret = 0;
1099        bool buf_in_tr = false; /* buffer was added to transaction */
1100
1101more_rgrps:
1102        rgd = NULL;
1103        if (gfs2_holder_initialized(rd_gh)) {
1104                rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
1105                gfs2_assert_withdraw(sdp,
1106                             gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1107        }
1108        blks_outside_rgrp = 0;
1109        bstart = 0;
1110        blen = 0;
1111
1112        for (p = start; p < end; p++) {
1113                if (!*p)
1114                        continue;
1115                bn = be64_to_cpu(*p);
1116
1117                if (rgd) {
1118                        if (!rgrp_contains_block(rgd, bn)) {
1119                                blks_outside_rgrp++;
1120                                continue;
1121                        }
1122                } else {
1123                        rgd = gfs2_blk2rgrpd(sdp, bn, true);
1124                        if (unlikely(!rgd)) {
1125                                ret = -EIO;
1126                                goto out;
1127                        }
1128                        ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1129                                                 0, rd_gh);
1130                        if (ret)
1131                                goto out;
1132
1133                        /* Must be done with the rgrp glock held: */
1134                        if (gfs2_rs_active(&ip->i_res) &&
1135                            rgd == ip->i_res.rs_rbm.rgd)
1136                                gfs2_rs_deltree(&ip->i_res);
1137                }
1138
1139                /* The size of our transactions will be unknown until we
1140                   actually process all the metadata blocks that relate to
1141                   the rgrp. So we estimate. We know it can't be more than
1142                   the dinode's i_blocks and we don't want to exceed the
1143                   journal flush threshold, sd_log_thresh2. */
1144                if (current->journal_info == NULL) {
1145                        unsigned int jblocks_rqsted, revokes;
1146
1147                        jblocks_rqsted = rgd->rd_length + RES_DINODE +
1148                                RES_INDIRECT;
1149                        isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
1150                        if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
1151                                jblocks_rqsted +=
1152                                        atomic_read(&sdp->sd_log_thresh2);
1153                        else
1154                                jblocks_rqsted += isize_blks;
1155                        revokes = jblocks_rqsted;
1156                        if (meta)
1157                                revokes += end - start;
1158                        else if (ip->i_depth)
1159                                revokes += sdp->sd_inptrs;
1160                        ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
1161                        if (ret)
1162                                goto out_unlock;
1163                        down_write(&ip->i_rw_mutex);
1164                }
1165                /* check if we will exceed the transaction blocks requested */
1166                tr = current->journal_info;
1167                if (tr->tr_num_buf_new + RES_STATFS +
1168                    RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
1169                        /* We set blks_outside_rgrp to ensure the loop will
1170                           be repeated for the same rgrp, but with a new
1171                           transaction. */
1172                        blks_outside_rgrp++;
1173                        /* This next part is tricky. If the buffer was added
1174                           to the transaction, we've already set some block
1175                           pointers to 0, so we better follow through and free
1176                           them, or we will introduce corruption (so break).
1177                           This may be impossible, or at least rare, but I
1178                           decided to cover the case regardless.
1179
1180                           If the buffer was not added to the transaction
1181                           (this call), doing so would exceed our transaction
1182                           size, so we need to end the transaction and start a
1183                           new one (so goto). */
1184
1185                        if (buf_in_tr)
1186                                break;
1187                        goto out_unlock;
1188                }
1189
1190                gfs2_trans_add_meta(ip->i_gl, bh);
1191                buf_in_tr = true;
1192                *p = 0;
1193                if (bstart + blen == bn) {
1194                        blen++;
1195                        continue;
1196                }
1197                if (bstart) {
1198                        __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1199                        (*btotal) += blen;
1200                        gfs2_add_inode_blocks(&ip->i_inode, -blen);
1201                }
1202                bstart = bn;
1203                blen = 1;
1204        }
1205        if (bstart) {
1206                __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1207                (*btotal) += blen;
1208                gfs2_add_inode_blocks(&ip->i_inode, -blen);
1209        }
1210out_unlock:
1211        if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
1212                                            outside the rgrp we just processed,
1213                                            do it all over again. */
1214                if (current->journal_info) {
1215                        struct buffer_head *dibh;
1216
1217                        ret = gfs2_meta_inode_buffer(ip, &dibh);
1218                        if (ret)
1219                                goto out;
1220
1221                        /* Every transaction boundary, we rewrite the dinode
1222                           to keep its di_blocks current in case of failure. */
1223                        ip->i_inode.i_mtime = ip->i_inode.i_ctime =
1224                                current_time(&ip->i_inode);
1225                        gfs2_trans_add_meta(ip->i_gl, dibh);
1226                        gfs2_dinode_out(ip, dibh->b_data);
1227                        brelse(dibh);
1228                        up_write(&ip->i_rw_mutex);
1229                        gfs2_trans_end(sdp);
1230                }
1231                gfs2_glock_dq_uninit(rd_gh);
1232                cond_resched();
1233                goto more_rgrps;
1234        }
1235out:
1236        return ret;
1237}
1238
1239static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
1240{
1241        if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0])))
1242                return false;
1243        return true;
1244}
1245
1246/**
1247 * find_nonnull_ptr - find a non-null pointer given a metapath and height
1248 * @mp: starting metapath
1249 * @h: desired height to search
1250 *
1251 * Assumes the metapath is valid (with buffers) out to height h.
1252 * Returns: true if a non-null pointer was found in the metapath buffer
1253 *          false if all remaining pointers are NULL in the buffer
1254 */
1255static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
1256                             unsigned int h,
1257                             __u16 *end_list, unsigned int end_aligned)
1258{
1259        struct buffer_head *bh = mp->mp_bh[h];
1260        __be64 *first, *ptr, *end;
1261
1262        first = metaptr1(h, mp);
1263        ptr = first + mp->mp_list[h];
1264        end = (__be64 *)(bh->b_data + bh->b_size);
1265        if (end_list && mp_eq_to_hgt(mp, end_list, h)) {
1266                bool keep_end = h < end_aligned;
1267                end = first + end_list[h] + keep_end;
1268        }
1269
1270        while (ptr < end) {
1271                if (*ptr) { /* if we have a non-null pointer */
1272                        mp->mp_list[h] = ptr - first;
1273                        h++;
1274                        if (h < GFS2_MAX_META_HEIGHT)
1275                                mp->mp_list[h] = 0;
1276                        return true;
1277                }
1278                ptr++;
1279        }
1280        return false;
1281}
1282
1283enum dealloc_states {
1284        DEALLOC_MP_FULL = 0,    /* Strip a metapath with all buffers read in */
1285        DEALLOC_MP_LOWER = 1,   /* lower the metapath strip height */
1286        DEALLOC_FILL_MP = 2,  /* Fill in the metapath to the given height. */
1287        DEALLOC_DONE = 3,       /* process complete */
1288};
1289
1290static inline void
1291metapointer_range(struct metapath *mp, int height,
1292                  __u16 *start_list, unsigned int start_aligned,
1293                  __u16 *end_list, unsigned int end_aligned,
1294                  __be64 **start, __be64 **end)
1295{
1296        struct buffer_head *bh = mp->mp_bh[height];
1297        __be64 *first;
1298
1299        first = metaptr1(height, mp);
1300        *start = first;
1301        if (mp_eq_to_hgt(mp, start_list, height)) {
1302                bool keep_start = height < start_aligned;
1303                *start = first + start_list[height] + keep_start;
1304        }
1305        *end = (__be64 *)(bh->b_data + bh->b_size);
1306        if (end_list && mp_eq_to_hgt(mp, end_list, height)) {
1307                bool keep_end = height < end_aligned;
1308                *end = first + end_list[height] + keep_end;
1309        }
1310}
1311
1312static inline bool walk_done(struct gfs2_sbd *sdp,
1313                             struct metapath *mp, int height,
1314                             __u16 *end_list, unsigned int end_aligned)
1315{
1316        __u16 end;
1317
1318        if (end_list) {
1319                bool keep_end = height < end_aligned;
1320                if (!mp_eq_to_hgt(mp, end_list, height))
1321                        return false;
1322                end = end_list[height] + keep_end;
1323        } else
1324                end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs;
1325        return mp->mp_list[height] >= end;
1326}
1327
1328/**
1329 * punch_hole - deallocate blocks in a file
1330 * @ip: inode to truncate
1331 * @offset: the start of the hole
1332 * @length: the size of the hole (or 0 for truncate)
1333 *
1334 * Punch a hole into a file or truncate a file at a given position.  This
1335 * function operates in whole blocks (@offset and @length are rounded
1336 * accordingly); partially filled blocks must be cleared otherwise.
1337 *
1338 * This function works from the bottom up, and from the right to the left. In
1339 * other words, it strips off the highest layer (data) before stripping any of
1340 * the metadata. Doing it this way is best in case the operation is interrupted
1341 * by power failure, etc.  The dinode is rewritten in every transaction to
1342 * guarantee integrity.
1343 */
1344static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
1345{
1346        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1347        struct metapath mp = {};
1348        struct buffer_head *dibh, *bh;
1349        struct gfs2_holder rd_gh;
1350        unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
1351        u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
1352        __u16 start_list[GFS2_MAX_META_HEIGHT];
1353        __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
1354        unsigned int start_aligned, uninitialized_var(end_aligned);
1355        unsigned int strip_h = ip->i_height - 1;
1356        u32 btotal = 0;
1357        int ret, state;
1358        int mp_h; /* metapath buffers are read in to this height */
1359        u64 prev_bnr = 0;
1360        __be64 *start, *end;
1361
1362        /*
1363         * The start position of the hole is defined by lblock, start_list, and
1364         * start_aligned.  The end position of the hole is defined by lend,
1365         * end_list, and end_aligned.
1366         *
1367         * start_aligned and end_aligned define down to which height the start
1368         * and end positions are aligned to the metadata tree (i.e., the
1369         * position is a multiple of the metadata granularity at the height
1370         * above).  This determines at which heights additional meta pointers
1371         * needs to be preserved for the remaining data.
1372         */
1373
1374        if (length) {
1375                u64 maxsize = sdp->sd_heightsize[ip->i_height];
1376                u64 end_offset = offset + length;
1377                u64 lend;
1378
1379                /*
1380                 * Clip the end at the maximum file size for the given height:
1381                 * that's how far the metadata goes; files bigger than that
1382                 * will have additional layers of indirection.
1383                 */
1384                if (end_offset > maxsize)
1385                        end_offset = maxsize;
1386                lend = end_offset >> bsize_shift;
1387
1388                if (lblock >= lend)
1389                        return 0;
1390
1391                find_metapath(sdp, lend, &mp, ip->i_height);
1392                end_list = __end_list;
1393                memcpy(end_list, mp.mp_list, sizeof(mp.mp_list));
1394
1395                for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1396                        if (end_list[mp_h])
1397                                break;
1398                }
1399                end_aligned = mp_h;
1400        }
1401
1402        find_metapath(sdp, lblock, &mp, ip->i_height);
1403        memcpy(start_list, mp.mp_list, sizeof(start_list));
1404
1405        for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1406                if (start_list[mp_h])
1407                        break;
1408        }
1409        start_aligned = mp_h;
1410
1411        ret = gfs2_meta_inode_buffer(ip, &dibh);
1412        if (ret)
1413                return ret;
1414
1415        mp.mp_bh[0] = dibh;
1416        ret = lookup_metapath(ip, &mp);
1417        if (ret)
1418                goto out_metapath;
1419
1420        /* issue read-ahead on metadata */
1421        for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) {
1422                metapointer_range(&mp, mp_h, start_list, start_aligned,
1423                                  end_list, end_aligned, &start, &end);
1424                gfs2_metapath_ra(ip->i_gl, start, end);
1425        }
1426
1427        if (mp.mp_aheight == ip->i_height)
1428                state = DEALLOC_MP_FULL; /* We have a complete metapath */
1429        else
1430                state = DEALLOC_FILL_MP; /* deal with partial metapath */
1431
1432        ret = gfs2_rindex_update(sdp);
1433        if (ret)
1434                goto out_metapath;
1435
1436        ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1437        if (ret)
1438                goto out_metapath;
1439        gfs2_holder_mark_uninitialized(&rd_gh);
1440
1441        mp_h = strip_h;
1442
1443        while (state != DEALLOC_DONE) {
1444                switch (state) {
1445                /* Truncate a full metapath at the given strip height.
1446                 * Note that strip_h == mp_h in order to be in this state. */
1447                case DEALLOC_MP_FULL:
1448                        bh = mp.mp_bh[mp_h];
1449                        gfs2_assert_withdraw(sdp, bh);
1450                        if (gfs2_assert_withdraw(sdp,
1451                                                 prev_bnr != bh->b_blocknr)) {
1452                                printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, "
1453                                       "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n",
1454                                       sdp->sd_fsname,
1455                                       (unsigned long long)ip->i_no_addr,
1456                                       prev_bnr, ip->i_height, strip_h, mp_h);
1457                        }
1458                        prev_bnr = bh->b_blocknr;
1459
1460                        if (gfs2_metatype_check(sdp, bh,
1461                                                (mp_h ? GFS2_METATYPE_IN :
1462                                                        GFS2_METATYPE_DI))) {
1463                                ret = -EIO;
1464                                goto out;
1465                        }
1466
1467                        /*
1468                         * Below, passing end_aligned as 0 gives us the
1469                         * metapointer range excluding the end point: the end
1470                         * point is the first metapath we must not deallocate!
1471                         */
1472
1473                        metapointer_range(&mp, mp_h, start_list, start_aligned,
1474                                          end_list, 0 /* end_aligned */,
1475                                          &start, &end);
1476                        ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h],
1477                                                 start, end,
1478                                                 mp_h != ip->i_height - 1,
1479                                                 &btotal);
1480
1481                        /* If we hit an error or just swept dinode buffer,
1482                           just exit. */
1483                        if (ret || !mp_h) {
1484                                state = DEALLOC_DONE;
1485                                break;
1486                        }
1487                        state = DEALLOC_MP_LOWER;
1488                        break;
1489
1490                /* lower the metapath strip height */
1491                case DEALLOC_MP_LOWER:
1492                        /* We're done with the current buffer, so release it,
1493                           unless it's the dinode buffer. Then back up to the
1494                           previous pointer. */
1495                        if (mp_h) {
1496                                brelse(mp.mp_bh[mp_h]);
1497                                mp.mp_bh[mp_h] = NULL;
1498                        }
1499                        /* If we can't get any lower in height, we've stripped
1500                           off all we can. Next step is to back up and start
1501                           stripping the previous level of metadata. */
1502                        if (mp_h == 0) {
1503                                strip_h--;
1504                                memcpy(mp.mp_list, start_list, sizeof(start_list));
1505                                mp_h = strip_h;
1506                                state = DEALLOC_FILL_MP;
1507                                break;
1508                        }
1509                        mp.mp_list[mp_h] = 0;
1510                        mp_h--; /* search one metadata height down */
1511                        mp.mp_list[mp_h]++;
1512                        if (walk_done(sdp, &mp, mp_h, end_list, end_aligned))
1513                                break;
1514                        /* Here we've found a part of the metapath that is not
1515                         * allocated. We need to search at that height for the
1516                         * next non-null pointer. */
1517                        if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) {
1518                                state = DEALLOC_FILL_MP;
1519                                mp_h++;
1520                        }
1521                        /* No more non-null pointers at this height. Back up
1522                           to the previous height and try again. */
1523                        break; /* loop around in the same state */
1524
1525                /* Fill the metapath with buffers to the given height. */
1526                case DEALLOC_FILL_MP:
1527                        /* Fill the buffers out to the current height. */
1528                        ret = fillup_metapath(ip, &mp, mp_h);
1529                        if (ret < 0)
1530                                goto out;
1531
1532                        /* issue read-ahead on metadata */
1533                        if (mp.mp_aheight > 1) {
1534                                for (; ret > 1; ret--) {
1535                                        metapointer_range(&mp, mp.mp_aheight - ret,
1536                                                          start_list, start_aligned,
1537                                                          end_list, end_aligned,
1538                                                          &start, &end);
1539                                        gfs2_metapath_ra(ip->i_gl, start, end);
1540                                }
1541                        }
1542
1543                        /* If buffers found for the entire strip height */
1544                        if (mp.mp_aheight - 1 == strip_h) {
1545                                state = DEALLOC_MP_FULL;
1546                                break;
1547                        }
1548                        if (mp.mp_aheight < ip->i_height) /* We have a partial height */
1549                                mp_h = mp.mp_aheight - 1;
1550
1551                        /* If we find a non-null block pointer, crawl a bit
1552                           higher up in the metapath and try again, otherwise
1553                           we need to look lower for a new starting point. */
1554                        if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned))
1555                                mp_h++;
1556                        else
1557                                state = DEALLOC_MP_LOWER;
1558                        break;
1559                }
1560        }
1561
1562        if (btotal) {
1563                if (current->journal_info == NULL) {
1564                        ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
1565                                               RES_QUOTA, 0);
1566                        if (ret)
1567                                goto out;
1568                        down_write(&ip->i_rw_mutex);
1569                }
1570                gfs2_statfs_change(sdp, 0, +btotal, 0);
1571                gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
1572                                  ip->i_inode.i_gid);
1573                ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1574                gfs2_trans_add_meta(ip->i_gl, dibh);
1575                gfs2_dinode_out(ip, dibh->b_data);
1576                up_write(&ip->i_rw_mutex);
1577                gfs2_trans_end(sdp);
1578        }
1579
1580out:
1581        if (gfs2_holder_initialized(&rd_gh))
1582                gfs2_glock_dq_uninit(&rd_gh);
1583        if (current->journal_info) {
1584                up_write(&ip->i_rw_mutex);
1585                gfs2_trans_end(sdp);
1586                cond_resched();
1587        }
1588        gfs2_quota_unhold(ip);
1589out_metapath:
1590        release_metapath(&mp);
1591        return ret;
1592}
1593
1594static int trunc_end(struct gfs2_inode *ip)
1595{
1596        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1597        struct buffer_head *dibh;
1598        int error;
1599
1600        error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1601        if (error)
1602                return error;
1603
1604        down_write(&ip->i_rw_mutex);
1605
1606        error = gfs2_meta_inode_buffer(ip, &dibh);
1607        if (error)
1608                goto out;
1609
1610        if (!i_size_read(&ip->i_inode)) {
1611                ip->i_height = 0;
1612                ip->i_goal = ip->i_no_addr;
1613                gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1614                gfs2_ordered_del_inode(ip);
1615        }
1616        ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1617        ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
1618
1619        gfs2_trans_add_meta(ip->i_gl, dibh);
1620        gfs2_dinode_out(ip, dibh->b_data);
1621        brelse(dibh);
1622
1623out:
1624        up_write(&ip->i_rw_mutex);
1625        gfs2_trans_end(sdp);
1626        return error;
1627}
1628
1629/**
1630 * do_shrink - make a file smaller
1631 * @inode: the inode
1632 * @newsize: the size to make the file
1633 *
1634 * Called with an exclusive lock on @inode. The @size must
1635 * be equal to or smaller than the current inode size.
1636 *
1637 * Returns: errno
1638 */
1639
1640static int do_shrink(struct inode *inode, u64 newsize)
1641{
1642        struct gfs2_inode *ip = GFS2_I(inode);
1643        int error;
1644
1645        error = trunc_start(inode, newsize);
1646        if (error < 0)
1647                return error;
1648        if (gfs2_is_stuffed(ip))
1649                return 0;
1650
1651        error = punch_hole(ip, newsize, 0);
1652        if (error == 0)
1653                error = trunc_end(ip);
1654
1655        return error;
1656}
1657
1658void gfs2_trim_blocks(struct inode *inode)
1659{
1660        int ret;
1661
1662        ret = do_shrink(inode, inode->i_size);
1663        WARN_ON(ret != 0);
1664}
1665
1666/**
1667 * do_grow - Touch and update inode size
1668 * @inode: The inode
1669 * @size: The new size
1670 *
1671 * This function updates the timestamps on the inode and
1672 * may also increase the size of the inode. This function
1673 * must not be called with @size any smaller than the current
1674 * inode size.
1675 *
1676 * Although it is not strictly required to unstuff files here,
1677 * earlier versions of GFS2 have a bug in the stuffed file reading
1678 * code which will result in a buffer overrun if the size is larger
1679 * than the max stuffed file size. In order to prevent this from
1680 * occurring, such files are unstuffed, but in other cases we can
1681 * just update the inode size directly.
1682 *
1683 * Returns: 0 on success, or -ve on error
1684 */
1685
1686static int do_grow(struct inode *inode, u64 size)
1687{
1688        struct gfs2_inode *ip = GFS2_I(inode);
1689        struct gfs2_sbd *sdp = GFS2_SB(inode);
1690        struct gfs2_alloc_parms ap = { .target = 1, };
1691        struct buffer_head *dibh;
1692        int error;
1693        int unstuff = 0;
1694
1695        if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) {
1696                error = gfs2_quota_lock_check(ip, &ap);
1697                if (error)
1698                        return error;
1699
1700                error = gfs2_inplace_reserve(ip, &ap);
1701                if (error)
1702                        goto do_grow_qunlock;
1703                unstuff = 1;
1704        }
1705
1706        error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
1707                                 (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
1708                                  0 : RES_QUOTA), 0);
1709        if (error)
1710                goto do_grow_release;
1711
1712        if (unstuff) {
1713                error = gfs2_unstuff_dinode(ip, NULL);
1714                if (error)
1715                        goto do_end_trans;
1716        }
1717
1718        error = gfs2_meta_inode_buffer(ip, &dibh);
1719        if (error)
1720                goto do_end_trans;
1721
1722        i_size_write(inode, size);
1723        ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1724        gfs2_trans_add_meta(ip->i_gl, dibh);
1725        gfs2_dinode_out(ip, dibh->b_data);
1726        brelse(dibh);
1727
1728do_end_trans:
1729        gfs2_trans_end(sdp);
1730do_grow_release:
1731        if (unstuff) {
1732                gfs2_inplace_release(ip);
1733do_grow_qunlock:
1734                gfs2_quota_unlock(ip);
1735        }
1736        return error;
1737}
1738
1739/**
1740 * gfs2_setattr_size - make a file a given size
1741 * @inode: the inode
1742 * @newsize: the size to make the file
1743 *
1744 * The file size can grow, shrink, or stay the same size. This
1745 * is called holding i_mutex and an exclusive glock on the inode
1746 * in question.
1747 *
1748 * Returns: errno
1749 */
1750
1751int gfs2_setattr_size(struct inode *inode, u64 newsize)
1752{
1753        struct gfs2_inode *ip = GFS2_I(inode);
1754        int ret;
1755
1756        BUG_ON(!S_ISREG(inode->i_mode));
1757
1758        ret = inode_newsize_ok(inode, newsize);
1759        if (ret)
1760                return ret;
1761
1762        inode_dio_wait(inode);
1763
1764        ret = gfs2_rsqa_alloc(ip);
1765        if (ret)
1766                goto out;
1767
1768        if (newsize >= inode->i_size) {
1769                ret = do_grow(inode, newsize);
1770                goto out;
1771        }
1772
1773        ret = do_shrink(inode, newsize);
1774out:
1775        gfs2_rsqa_delete(ip, NULL);
1776        return ret;
1777}
1778
1779int gfs2_truncatei_resume(struct gfs2_inode *ip)
1780{
1781        int error;
1782        error = punch_hole(ip, i_size_read(&ip->i_inode), 0);
1783        if (!error)
1784                error = trunc_end(ip);
1785        return error;
1786}
1787
1788int gfs2_file_dealloc(struct gfs2_inode *ip)
1789{
1790        return punch_hole(ip, 0, 0);
1791}
1792
1793/**
1794 * gfs2_free_journal_extents - Free cached journal bmap info
1795 * @jd: The journal
1796 *
1797 */
1798
1799void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
1800{
1801        struct gfs2_journal_extent *jext;
1802
1803        while(!list_empty(&jd->extent_list)) {
1804                jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list);
1805                list_del(&jext->list);
1806                kfree(jext);
1807        }
1808}
1809
1810/**
1811 * gfs2_add_jextent - Add or merge a new extent to extent cache
1812 * @jd: The journal descriptor
1813 * @lblock: The logical block at start of new extent
1814 * @dblock: The physical block at start of new extent
1815 * @blocks: Size of extent in fs blocks
1816 *
1817 * Returns: 0 on success or -ENOMEM
1818 */
1819
1820static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
1821{
1822        struct gfs2_journal_extent *jext;
1823
1824        if (!list_empty(&jd->extent_list)) {
1825                jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list);
1826                if ((jext->dblock + jext->blocks) == dblock) {
1827                        jext->blocks += blocks;
1828                        return 0;
1829                }
1830        }
1831
1832        jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS);
1833        if (jext == NULL)
1834                return -ENOMEM;
1835        jext->dblock = dblock;
1836        jext->lblock = lblock;
1837        jext->blocks = blocks;
1838        list_add_tail(&jext->list, &jd->extent_list);
1839        jd->nr_extents++;
1840        return 0;
1841}
1842
1843/**
1844 * gfs2_map_journal_extents - Cache journal bmap info
1845 * @sdp: The super block
1846 * @jd: The journal to map
1847 *
1848 * Create a reusable "extent" mapping from all logical
1849 * blocks to all physical blocks for the given journal.  This will save
1850 * us time when writing journal blocks.  Most journals will have only one
1851 * extent that maps all their logical blocks.  That's because gfs2.mkfs
1852 * arranges the journal blocks sequentially to maximize performance.
1853 * So the extent would map the first block for the entire file length.
1854 * However, gfs2_jadd can happen while file activity is happening, so
1855 * those journals may not be sequential.  Less likely is the case where
1856 * the users created their own journals by mounting the metafs and
1857 * laying it out.  But it's still possible.  These journals might have
1858 * several extents.
1859 *
1860 * Returns: 0 on success, or error on failure
1861 */
1862
1863int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
1864{
1865        u64 lblock = 0;
1866        u64 lblock_stop;
1867        struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
1868        struct buffer_head bh;
1869        unsigned int shift = sdp->sd_sb.sb_bsize_shift;
1870        u64 size;
1871        int rc;
1872
1873        lblock_stop = i_size_read(jd->jd_inode) >> shift;
1874        size = (lblock_stop - lblock) << shift;
1875        jd->nr_extents = 0;
1876        WARN_ON(!list_empty(&jd->extent_list));
1877
1878        do {
1879                bh.b_state = 0;
1880                bh.b_blocknr = 0;
1881                bh.b_size = size;
1882                rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0);
1883                if (rc || !buffer_mapped(&bh))
1884                        goto fail;
1885                rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift);
1886                if (rc)
1887                        goto fail;
1888                size -= bh.b_size;
1889                lblock += (bh.b_size >> ip->i_inode.i_blkbits);
1890        } while(size > 0);
1891
1892        fs_info(sdp, "journal %d mapped with %u extents\n", jd->jd_jid,
1893                jd->nr_extents);
1894        return 0;
1895
1896fail:
1897        fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
1898                rc, jd->jd_jid,
1899                (unsigned long long)(i_size_read(jd->jd_inode) - size),
1900                jd->nr_extents);
1901        fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
1902                rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
1903                bh.b_state, (unsigned long long)bh.b_size);
1904        gfs2_free_journal_extents(jd);
1905        return rc;
1906}
1907
1908/**
1909 * gfs2_write_alloc_required - figure out if a write will require an allocation
1910 * @ip: the file being written to
1911 * @offset: the offset to write to
1912 * @len: the number of bytes being written
1913 *
1914 * Returns: 1 if an alloc is required, 0 otherwise
1915 */
1916
1917int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
1918                              unsigned int len)
1919{
1920        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1921        struct buffer_head bh;
1922        unsigned int shift;
1923        u64 lblock, lblock_stop, size;
1924        u64 end_of_file;
1925
1926        if (!len)
1927                return 0;
1928
1929        if (gfs2_is_stuffed(ip)) {
1930                if (offset + len > gfs2_max_stuffed_size(ip))
1931                        return 1;
1932                return 0;
1933        }
1934
1935        shift = sdp->sd_sb.sb_bsize_shift;
1936        BUG_ON(gfs2_is_dir(ip));
1937        end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
1938        lblock = offset >> shift;
1939        lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
1940        if (lblock_stop > end_of_file)
1941                return 1;
1942
1943        size = (lblock_stop - lblock) << shift;
1944        do {
1945                bh.b_state = 0;
1946                bh.b_size = size;
1947                gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
1948                if (!buffer_mapped(&bh))
1949                        return 1;
1950                size -= bh.b_size;
1951                lblock += (bh.b_size >> ip->i_inode.i_blkbits);
1952        } while(size > 0);
1953
1954        return 0;
1955}
1956
1957static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
1958{
1959        struct gfs2_inode *ip = GFS2_I(inode);
1960        struct buffer_head *dibh;
1961        int error;
1962
1963        if (offset >= inode->i_size)
1964                return 0;
1965        if (offset + length > inode->i_size)
1966                length = inode->i_size - offset;
1967
1968        error = gfs2_meta_inode_buffer(ip, &dibh);
1969        if (error)
1970                return error;
1971        gfs2_trans_add_meta(ip->i_gl, dibh);
1972        memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0,
1973               length);
1974        brelse(dibh);
1975        return 0;
1976}
1977
1978static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
1979                                         loff_t length)
1980{
1981        struct gfs2_sbd *sdp = GFS2_SB(inode);
1982        loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1983        int error;
1984
1985        while (length) {
1986                struct gfs2_trans *tr;
1987                loff_t chunk;
1988                unsigned int offs;
1989
1990                chunk = length;
1991                if (chunk > max_chunk)
1992                        chunk = max_chunk;
1993
1994                offs = offset & ~PAGE_MASK;
1995                if (offs && chunk > PAGE_SIZE)
1996                        chunk = offs + ((chunk - offs) & PAGE_MASK);
1997
1998                truncate_pagecache_range(inode, offset, chunk);
1999                offset += chunk;
2000                length -= chunk;

2001
2002                tr = current->journal_info;
2003                if (!test_bit(TR_TOUCHED, &tr->tr_flags))
2004                        continue;
2005
2006                gfs2_trans_end(sdp);
2007                error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
2008                if (error)
2009                        return error;
2010        }
2011        return 0;
2012}
2013
2014int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
2015{
2016        struct inode *inode = file_inode(file);
2017        struct gfs2_inode *ip = GFS2_I(inode);
2018        struct gfs2_sbd *sdp = GFS2_SB(inode);
2019        int error;
2020
2021        if (gfs2_is_jdata(ip))
2022                error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA,
2023                                         GFS2_JTRUNC_REVOKES);
2024        else
2025                error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2026        if (error)
2027                return error;
2028
2029        if (gfs2_is_stuffed(ip)) {
2030                error = stuffed_zero_range(inode, offset, length);
2031                if (error)
2032                        goto out;
2033        } else {
2034                unsigned int start_off, end_off, blocksize;
2035
2036                blocksize = i_blocksize(inode);
2037                start_off = offset & (blocksize - 1);
2038                end_off = (offset + length) & (blocksize - 1);
2039                if (start_off) {
2040                        unsigned int len = length;
2041                        if (length > blocksize - start_off)
2042                                len = blocksize - start_off;
2043                        error = gfs2_block_zero_range(inode, offset, len);
2044                        if (error)
2045                                goto out;
2046                        if (start_off + length < blocksize)
2047                                end_off = 0;
2048                }
2049                if (end_off) {
2050                        error = gfs2_block_zero_range(inode,
2051                                offset + length - end_off, end_off);
2052                        if (error)
2053                                goto out;
2054                }
2055        }
2056
2057        if (gfs2_is_jdata(ip)) {
2058                BUG_ON(!current->journal_info);
2059                gfs2_journaled_truncate_range(inode, offset, length);
2060        } else
2061                truncate_pagecache_range(inode, offset, offset + length - 1);
2062
2063        file_update_time(file);
2064        mark_inode_dirty(inode);
2065
2066        if (current->journal_info)
2067                gfs2_trans_end(sdp);
2068
2069        if (!gfs2_is_stuffed(ip))
2070                error = punch_hole(ip, offset, length);
2071
2072out:
2073        if (current->journal_info)
2074                gfs2_trans_end(sdp);
2075        return error;
2076}
2077