linux/fs/gfs2/bmap.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   4 * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
   5 */
   6
   7#include <linux/spinlock.h>
   8#include <linux/completion.h>
   9#include <linux/buffer_head.h>
  10#include <linux/blkdev.h>
  11#include <linux/gfs2_ondisk.h>
  12#include <linux/crc32.h>
  13#include <linux/iomap.h>
  14#include <linux/ktime.h>
  15
  16#include "gfs2.h"
  17#include "incore.h"
  18#include "bmap.h"
  19#include "glock.h"
  20#include "inode.h"
  21#include "meta_io.h"
  22#include "quota.h"
  23#include "rgrp.h"
  24#include "log.h"
  25#include "super.h"
  26#include "trans.h"
  27#include "dir.h"
  28#include "util.h"
  29#include "aops.h"
  30#include "trace_gfs2.h"
  31
  32/* This doesn't need to be that large as max 64 bit pointers in a 4k
  33 * block is 512, so __u16 is fine for that. It saves stack space to
  34 * keep it small.
  35 */
  36struct metapath {
  37        struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
  38        __u16 mp_list[GFS2_MAX_META_HEIGHT];
  39        int mp_fheight; /* find_metapath height */
  40        int mp_aheight; /* actual height (lookup height) */
  41};
  42
  43static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length);
  44
  45/**
  46 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
  47 * @ip: the inode
  48 * @dibh: the dinode buffer
  49 * @block: the block number that was allocated
  50 * @page: The (optional) page. This is looked up if @page is NULL
  51 *
  52 * Returns: errno
  53 */
  54
  55static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
  56                               u64 block, struct page *page)
  57{
  58        struct inode *inode = &ip->i_inode;
  59
  60        if (!PageUptodate(page)) {
  61                void *kaddr = kmap(page);
  62                u64 dsize = i_size_read(inode);
  63 
  64                if (dsize > gfs2_max_stuffed_size(ip))
  65                        dsize = gfs2_max_stuffed_size(ip);
  66
  67                memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
  68                memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
  69                kunmap(page);
  70
  71                SetPageUptodate(page);
  72        }
  73
  74        if (gfs2_is_jdata(ip)) {
  75                struct buffer_head *bh;
  76
  77                if (!page_has_buffers(page))
  78                        create_empty_buffers(page, BIT(inode->i_blkbits),
  79                                             BIT(BH_Uptodate));
  80
  81                bh = page_buffers(page);
  82                if (!buffer_mapped(bh))
  83                        map_bh(bh, inode->i_sb, block);
  84
  85                set_buffer_uptodate(bh);
  86                gfs2_trans_add_data(ip->i_gl, bh);
  87        } else {
  88                set_page_dirty(page);
  89                gfs2_ordered_add_inode(ip);
  90        }
  91
  92        return 0;
  93}
  94
  95static int __gfs2_unstuff_inode(struct gfs2_inode *ip, struct page *page)
  96{
  97        struct buffer_head *bh, *dibh;
  98        struct gfs2_dinode *di;
  99        u64 block = 0;
 100        int isdir = gfs2_is_dir(ip);
 101        int error;
 102
 103        error = gfs2_meta_inode_buffer(ip, &dibh);
 104        if (error)
 105                return error;
 106
 107        if (i_size_read(&ip->i_inode)) {
 108                /* Get a free block, fill it with the stuffed data,
 109                   and write it out to disk */
 110
 111                unsigned int n = 1;
 112                error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
 113                if (error)
 114                        goto out_brelse;
 115                if (isdir) {
 116                        gfs2_trans_remove_revoke(GFS2_SB(&ip->i_inode), block, 1);
 117                        error = gfs2_dir_get_new_buffer(ip, block, &bh);
 118                        if (error)
 119                                goto out_brelse;
 120                        gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
 121                                              dibh, sizeof(struct gfs2_dinode));
 122                        brelse(bh);
 123                } else {
 124                        error = gfs2_unstuffer_page(ip, dibh, block, page);
 125                        if (error)
 126                                goto out_brelse;
 127                }
 128        }
 129
 130        /*  Set up the pointer to the new block  */
 131
 132        gfs2_trans_add_meta(ip->i_gl, dibh);
 133        di = (struct gfs2_dinode *)dibh->b_data;
 134        gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
 135
 136        if (i_size_read(&ip->i_inode)) {
 137                *(__be64 *)(di + 1) = cpu_to_be64(block);
 138                gfs2_add_inode_blocks(&ip->i_inode, 1);
 139                di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
 140        }
 141
 142        ip->i_height = 1;
 143        di->di_height = cpu_to_be16(1);
 144
 145out_brelse:
 146        brelse(dibh);
 147        return error;
 148}
 149
 150/**
 151 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
 152 * @ip: The GFS2 inode to unstuff
 153 *
 154 * This routine unstuffs a dinode and returns it to a "normal" state such
 155 * that the height can be grown in the traditional way.
 156 *
 157 * Returns: errno
 158 */
 159
 160int gfs2_unstuff_dinode(struct gfs2_inode *ip)
 161{
 162        struct inode *inode = &ip->i_inode;
 163        struct page *page;
 164        int error;
 165
 166        down_write(&ip->i_rw_mutex);
 167        page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
 168        error = -ENOMEM;
 169        if (!page)
 170                goto out;
 171        error = __gfs2_unstuff_inode(ip, page);
 172        unlock_page(page);
 173        put_page(page);
 174out:
 175        up_write(&ip->i_rw_mutex);
 176        return error;
 177}
 178
 179/**
 180 * find_metapath - Find path through the metadata tree
 181 * @sdp: The superblock
 182 * @block: The disk block to look up
 183 * @mp: The metapath to return the result in
 184 * @height: The pre-calculated height of the metadata tree
 185 *
 186 *   This routine returns a struct metapath structure that defines a path
 187 *   through the metadata of inode "ip" to get to block "block".
 188 *
 189 *   Example:
 190 *   Given:  "ip" is a height 3 file, "offset" is 101342453, and this is a
 191 *   filesystem with a blocksize of 4096.
 192 *
 193 *   find_metapath() would return a struct metapath structure set to:
 194 *   mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165.
 195 *
 196 *   That means that in order to get to the block containing the byte at
 197 *   offset 101342453, we would load the indirect block pointed to by pointer
 198 *   0 in the dinode.  We would then load the indirect block pointed to by
 199 *   pointer 48 in that indirect block.  We would then load the data block
 200 *   pointed to by pointer 165 in that indirect block.
 201 *
 202 *             ----------------------------------------
 203 *             | Dinode |                             |
 204 *             |        |                            4|
 205 *             |        |0 1 2 3 4 5                 9|
 206 *             |        |                            6|
 207 *             ----------------------------------------
 208 *                       |
 209 *                       |
 210 *                       V
 211 *             ----------------------------------------
 212 *             | Indirect Block                       |
 213 *             |                                     5|
 214 *             |            4 4 4 4 4 5 5            1|
 215 *             |0           5 6 7 8 9 0 1            2|
 216 *             ----------------------------------------
 217 *                                |
 218 *                                |
 219 *                                V
 220 *             ----------------------------------------
 221 *             | Indirect Block                       |
 222 *             |                         1 1 1 1 1   5|
 223 *             |                         6 6 6 6 6   1|
 224 *             |0                        3 4 5 6 7   2|
 225 *             ----------------------------------------
 226 *                                           |
 227 *                                           |
 228 *                                           V
 229 *             ----------------------------------------
 230 *             | Data block containing offset         |
 231 *             |            101342453                 |
 232 *             |                                      |
 233 *             |                                      |
 234 *             ----------------------------------------
 235 *
 236 */
 237
 238static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
 239                          struct metapath *mp, unsigned int height)
 240{
 241        unsigned int i;
 242
 243        mp->mp_fheight = height;
 244        for (i = height; i--;)
 245                mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
 246}
 247
 248static inline unsigned int metapath_branch_start(const struct metapath *mp)
 249{
 250        if (mp->mp_list[0] == 0)
 251                return 2;
 252        return 1;
 253}
 254
 255/**
 256 * metaptr1 - Return the first possible metadata pointer in a metapath buffer
 257 * @height: The metadata height (0 = dinode)
 258 * @mp: The metapath
 259 */
 260static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
 261{
 262        struct buffer_head *bh = mp->mp_bh[height];
 263        if (height == 0)
 264                return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
 265        return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
 266}
 267
 268/**
 269 * metapointer - Return pointer to start of metadata in a buffer
 270 * @height: The metadata height (0 = dinode)
 271 * @mp: The metapath
 272 *
 273 * Return a pointer to the block number of the next height of the metadata
 274 * tree given a buffer containing the pointer to the current height of the
 275 * metadata tree.
 276 */
 277
 278static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
 279{
 280        __be64 *p = metaptr1(height, mp);
 281        return p + mp->mp_list[height];
 282}
 283
 284static inline const __be64 *metaend(unsigned int height, const struct metapath *mp)
 285{
 286        const struct buffer_head *bh = mp->mp_bh[height];
 287        return (const __be64 *)(bh->b_data + bh->b_size);
 288}
 289
 290static void clone_metapath(struct metapath *clone, struct metapath *mp)
 291{
 292        unsigned int hgt;
 293
 294        *clone = *mp;
 295        for (hgt = 0; hgt < mp->mp_aheight; hgt++)
 296                get_bh(clone->mp_bh[hgt]);
 297}
 298
 299static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
 300{
 301        const __be64 *t;
 302
 303        for (t = start; t < end; t++) {
 304                struct buffer_head *rabh;
 305
 306                if (!*t)
 307                        continue;
 308
 309                rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
 310                if (trylock_buffer(rabh)) {
 311                        if (!buffer_uptodate(rabh)) {
 312                                rabh->b_end_io = end_buffer_read_sync;
 313                                submit_bh(REQ_OP_READ,
 314                                          REQ_RAHEAD | REQ_META | REQ_PRIO,
 315                                          rabh);
 316                                continue;
 317                        }
 318                        unlock_buffer(rabh);
 319                }
 320                brelse(rabh);
 321        }
 322}
 323
 324static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
 325                             unsigned int x, unsigned int h)
 326{
 327        for (; x < h; x++) {
 328                __be64 *ptr = metapointer(x, mp);
 329                u64 dblock = be64_to_cpu(*ptr);
 330                int ret;
 331
 332                if (!dblock)
 333                        break;
 334                ret = gfs2_meta_buffer(ip, GFS2_METATYPE_IN, dblock, &mp->mp_bh[x + 1]);
 335                if (ret)
 336                        return ret;
 337        }
 338        mp->mp_aheight = x + 1;
 339        return 0;
 340}
 341
 342/**
 343 * lookup_metapath - Walk the metadata tree to a specific point
 344 * @ip: The inode
 345 * @mp: The metapath
 346 *
 347 * Assumes that the inode's buffer has already been looked up and
 348 * hooked onto mp->mp_bh[0] and that the metapath has been initialised
 349 * by find_metapath().
 350 *
 351 * If this function encounters part of the tree which has not been
 352 * allocated, it returns the current height of the tree at the point
 353 * at which it found the unallocated block. Blocks which are found are
 354 * added to the mp->mp_bh[] list.
 355 *
 356 * Returns: error
 357 */
 358
 359static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
 360{
 361        return __fillup_metapath(ip, mp, 0, ip->i_height - 1);
 362}
 363
 364/**
 365 * fillup_metapath - fill up buffers for the metadata path to a specific height
 366 * @ip: The inode
 367 * @mp: The metapath
 368 * @h: The height to which it should be mapped
 369 *
 370 * Similar to lookup_metapath, but does lookups for a range of heights
 371 *
 372 * Returns: error or the number of buffers filled
 373 */
 374
 375static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
 376{
 377        unsigned int x = 0;
 378        int ret;
 379
 380        if (h) {
 381                /* find the first buffer we need to look up. */
 382                for (x = h - 1; x > 0; x--) {
 383                        if (mp->mp_bh[x])
 384                                break;
 385                }
 386        }
 387        ret = __fillup_metapath(ip, mp, x, h);
 388        if (ret)
 389                return ret;
 390        return mp->mp_aheight - x - 1;
 391}
 392
 393static sector_t metapath_to_block(struct gfs2_sbd *sdp, struct metapath *mp)
 394{
 395        sector_t factor = 1, block = 0;
 396        int hgt;
 397
 398        for (hgt = mp->mp_fheight - 1; hgt >= 0; hgt--) {
 399                if (hgt < mp->mp_aheight)
 400                        block += mp->mp_list[hgt] * factor;
 401                factor *= sdp->sd_inptrs;
 402        }
 403        return block;
 404}
 405
 406static void release_metapath(struct metapath *mp)
 407{
 408        int i;
 409
 410        for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
 411                if (mp->mp_bh[i] == NULL)
 412                        break;
 413                brelse(mp->mp_bh[i]);
 414                mp->mp_bh[i] = NULL;
 415        }
 416}
 417
 418/**
 419 * gfs2_extent_length - Returns length of an extent of blocks
 420 * @bh: The metadata block
 421 * @ptr: Current position in @bh
 422 * @limit: Max extent length to return
 423 * @eob: Set to 1 if we hit "end of block"
 424 *
 425 * Returns: The length of the extent (minimum of one block)
 426 */
 427
 428static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, size_t limit, int *eob)
 429{
 430        const __be64 *end = (__be64 *)(bh->b_data + bh->b_size);
 431        const __be64 *first = ptr;
 432        u64 d = be64_to_cpu(*ptr);
 433
 434        *eob = 0;
 435        do {
 436                ptr++;
 437                if (ptr >= end)
 438                        break;
 439                d++;
 440        } while(be64_to_cpu(*ptr) == d);
 441        if (ptr >= end)
 442                *eob = 1;
 443        return ptr - first;
 444}
 445
 446enum walker_status { WALK_STOP, WALK_FOLLOW, WALK_CONTINUE };
 447
 448/*
 449 * gfs2_metadata_walker - walk an indirect block
 450 * @mp: Metapath to indirect block
 451 * @ptrs: Number of pointers to look at
 452 *
 453 * When returning WALK_FOLLOW, the walker must update @mp to point at the right
 454 * indirect block to follow.
 455 */
 456typedef enum walker_status (*gfs2_metadata_walker)(struct metapath *mp,
 457                                                   unsigned int ptrs);
 458
 459/*
 460 * gfs2_walk_metadata - walk a tree of indirect blocks
 461 * @inode: The inode
 462 * @mp: Starting point of walk
 463 * @max_len: Maximum number of blocks to walk
 464 * @walker: Called during the walk
 465 *
 466 * Returns 1 if the walk was stopped by @walker, 0 if we went past @max_len or
 467 * past the end of metadata, and a negative error code otherwise.
 468 */
 469
 470static int gfs2_walk_metadata(struct inode *inode, struct metapath *mp,
 471                u64 max_len, gfs2_metadata_walker walker)
 472{
 473        struct gfs2_inode *ip = GFS2_I(inode);
 474        struct gfs2_sbd *sdp = GFS2_SB(inode);
 475        u64 factor = 1;
 476        unsigned int hgt;
 477        int ret;
 478
 479        /*
 480         * The walk starts in the lowest allocated indirect block, which may be
 481         * before the position indicated by @mp.  Adjust @max_len accordingly
 482         * to avoid a short walk.
 483         */
 484        for (hgt = mp->mp_fheight - 1; hgt >= mp->mp_aheight; hgt--) {
 485                max_len += mp->mp_list[hgt] * factor;
 486                mp->mp_list[hgt] = 0;
 487                factor *= sdp->sd_inptrs;
 488        }
 489
 490        for (;;) {
 491                u16 start = mp->mp_list[hgt];
 492                enum walker_status status;
 493                unsigned int ptrs;
 494                u64 len;
 495
 496                /* Walk indirect block. */
 497                ptrs = (hgt >= 1 ? sdp->sd_inptrs : sdp->sd_diptrs) - start;
 498                len = ptrs * factor;
 499                if (len > max_len)
 500                        ptrs = DIV_ROUND_UP_ULL(max_len, factor);
 501                status = walker(mp, ptrs);
 502                switch (status) {
 503                case WALK_STOP:
 504                        return 1;
 505                case WALK_FOLLOW:
 506                        BUG_ON(mp->mp_aheight == mp->mp_fheight);
 507                        ptrs = mp->mp_list[hgt] - start;
 508                        len = ptrs * factor;
 509                        break;
 510                case WALK_CONTINUE:
 511                        break;
 512                }
 513                if (len >= max_len)
 514                        break;
 515                max_len -= len;
 516                if (status == WALK_FOLLOW)
 517                        goto fill_up_metapath;
 518
 519lower_metapath:
 520                /* Decrease height of metapath. */
 521                brelse(mp->mp_bh[hgt]);
 522                mp->mp_bh[hgt] = NULL;
 523                mp->mp_list[hgt] = 0;
 524                if (!hgt)
 525                        break;
 526                hgt--;
 527                factor *= sdp->sd_inptrs;
 528
 529                /* Advance in metadata tree. */
 530                (mp->mp_list[hgt])++;
 531                if (hgt) {
 532                        if (mp->mp_list[hgt] >= sdp->sd_inptrs)
 533                                goto lower_metapath;
 534                } else {
 535                        if (mp->mp_list[hgt] >= sdp->sd_diptrs)
 536                                break;
 537                }
 538
 539fill_up_metapath:
 540                /* Increase height of metapath. */
 541                ret = fillup_metapath(ip, mp, ip->i_height - 1);
 542                if (ret < 0)
 543                        return ret;
 544                hgt += ret;
 545                for (; ret; ret--)
 546                        do_div(factor, sdp->sd_inptrs);
 547                mp->mp_aheight = hgt + 1;
 548        }
 549        return 0;
 550}
 551
 552static enum walker_status gfs2_hole_walker(struct metapath *mp,
 553                                           unsigned int ptrs)
 554{
 555        const __be64 *start, *ptr, *end;
 556        unsigned int hgt;
 557
 558        hgt = mp->mp_aheight - 1;
 559        start = metapointer(hgt, mp);
 560        end = start + ptrs;
 561
 562        for (ptr = start; ptr < end; ptr++) {
 563                if (*ptr) {
 564                        mp->mp_list[hgt] += ptr - start;
 565                        if (mp->mp_aheight == mp->mp_fheight)
 566                                return WALK_STOP;
 567                        return WALK_FOLLOW;
 568                }
 569        }
 570        return WALK_CONTINUE;
 571}
 572
 573/**
 574 * gfs2_hole_size - figure out the size of a hole
 575 * @inode: The inode
 576 * @lblock: The logical starting block number
 577 * @len: How far to look (in blocks)
 578 * @mp: The metapath at lblock
 579 * @iomap: The iomap to store the hole size in
 580 *
 581 * This function modifies @mp.
 582 *
 583 * Returns: errno on error
 584 */
 585static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len,
 586                          struct metapath *mp, struct iomap *iomap)
 587{
 588        struct metapath clone;
 589        u64 hole_size;
 590        int ret;
 591
 592        clone_metapath(&clone, mp);
 593        ret = gfs2_walk_metadata(inode, &clone, len, gfs2_hole_walker);
 594        if (ret < 0)
 595                goto out;
 596
 597        if (ret == 1)
 598                hole_size = metapath_to_block(GFS2_SB(inode), &clone) - lblock;
 599        else
 600                hole_size = len;
 601        iomap->length = hole_size << inode->i_blkbits;
 602        ret = 0;
 603
 604out:
 605        release_metapath(&clone);
 606        return ret;
 607}
 608
 609static inline __be64 *gfs2_indirect_init(struct metapath *mp,
 610                                         struct gfs2_glock *gl, unsigned int i,
 611                                         unsigned offset, u64 bn)
 612{
 613        __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
 614                       ((i > 1) ? sizeof(struct gfs2_meta_header) :
 615                                 sizeof(struct gfs2_dinode)));
 616        BUG_ON(i < 1);
 617        BUG_ON(mp->mp_bh[i] != NULL);
 618        mp->mp_bh[i] = gfs2_meta_new(gl, bn);
 619        gfs2_trans_add_meta(gl, mp->mp_bh[i]);
 620        gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
 621        gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
 622        ptr += offset;
 623        *ptr = cpu_to_be64(bn);
 624        return ptr;
 625}
 626
 627enum alloc_state {
 628        ALLOC_DATA = 0,
 629        ALLOC_GROW_DEPTH = 1,
 630        ALLOC_GROW_HEIGHT = 2,
 631        /* ALLOC_UNSTUFF = 3,   TBD and rather complicated */
 632};
 633
 634/**
 635 * __gfs2_iomap_alloc - Build a metadata tree of the requested height
 636 * @inode: The GFS2 inode
 637 * @iomap: The iomap structure
 638 * @mp: The metapath, with proper height information calculated
 639 *
 640 * In this routine we may have to alloc:
 641 *   i) Indirect blocks to grow the metadata tree height
 642 *  ii) Indirect blocks to fill in lower part of the metadata tree
 643 * iii) Data blocks
 644 *
 645 * This function is called after __gfs2_iomap_get, which works out the
 646 * total number of blocks which we need via gfs2_alloc_size.
 647 *
 648 * We then do the actual allocation asking for an extent at a time (if
 649 * enough contiguous free blocks are available, there will only be one
 650 * allocation request per call) and uses the state machine to initialise
 651 * the blocks in order.
 652 *
 653 * Right now, this function will allocate at most one indirect block
 654 * worth of data -- with a default block size of 4K, that's slightly
 655 * less than 2M.  If this limitation is ever removed to allow huge
 656 * allocations, we would probably still want to limit the iomap size we
 657 * return to avoid stalling other tasks during huge writes; the next
 658 * iomap iteration would then find the blocks already allocated.
 659 *
 660 * Returns: errno on error
 661 */
 662
 663static int __gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
 664                              struct metapath *mp)
 665{
 666        struct gfs2_inode *ip = GFS2_I(inode);
 667        struct gfs2_sbd *sdp = GFS2_SB(inode);
 668        struct buffer_head *dibh = mp->mp_bh[0];
 669        u64 bn;
 670        unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
 671        size_t dblks = iomap->length >> inode->i_blkbits;
 672        const unsigned end_of_metadata = mp->mp_fheight - 1;
 673        int ret;
 674        enum alloc_state state;
 675        __be64 *ptr;
 676        __be64 zero_bn = 0;
 677
 678        BUG_ON(mp->mp_aheight < 1);
 679        BUG_ON(dibh == NULL);
 680        BUG_ON(dblks < 1);
 681
 682        gfs2_trans_add_meta(ip->i_gl, dibh);
 683
 684        down_write(&ip->i_rw_mutex);
 685
 686        if (mp->mp_fheight == mp->mp_aheight) {
 687                /* Bottom indirect block exists */
 688                state = ALLOC_DATA;
 689        } else {
 690                /* Need to allocate indirect blocks */
 691                if (mp->mp_fheight == ip->i_height) {
 692                        /* Writing into existing tree, extend tree down */
 693                        iblks = mp->mp_fheight - mp->mp_aheight;
 694                        state = ALLOC_GROW_DEPTH;
 695                } else {
 696                        /* Building up tree height */
 697                        state = ALLOC_GROW_HEIGHT;
 698                        iblks = mp->mp_fheight - ip->i_height;
 699                        branch_start = metapath_branch_start(mp);
 700                        iblks += (mp->mp_fheight - branch_start);
 701                }
 702        }
 703
 704        /* start of the second part of the function (state machine) */
 705
 706        blks = dblks + iblks;
 707        i = mp->mp_aheight;
 708        do {
 709                n = blks - alloced;
 710                ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
 711                if (ret)
 712                        goto out;
 713                alloced += n;
 714                if (state != ALLOC_DATA || gfs2_is_jdata(ip))
 715                        gfs2_trans_remove_revoke(sdp, bn, n);
 716                switch (state) {
 717                /* Growing height of tree */
 718                case ALLOC_GROW_HEIGHT:
 719                        if (i == 1) {
 720                                ptr = (__be64 *)(dibh->b_data +
 721                                                 sizeof(struct gfs2_dinode));
 722                                zero_bn = *ptr;
 723                        }
 724                        for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0;
 725                             i++, n--)
 726                                gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
 727                        if (i - 1 == mp->mp_fheight - ip->i_height) {
 728                                i--;
 729                                gfs2_buffer_copy_tail(mp->mp_bh[i],
 730                                                sizeof(struct gfs2_meta_header),
 731                                                dibh, sizeof(struct gfs2_dinode));
 732                                gfs2_buffer_clear_tail(dibh,
 733                                                sizeof(struct gfs2_dinode) +
 734                                                sizeof(__be64));
 735                                ptr = (__be64 *)(mp->mp_bh[i]->b_data +
 736                                        sizeof(struct gfs2_meta_header));
 737                                *ptr = zero_bn;
 738                                state = ALLOC_GROW_DEPTH;
 739                                for(i = branch_start; i < mp->mp_fheight; i++) {
 740                                        if (mp->mp_bh[i] == NULL)
 741                                                break;
 742                                        brelse(mp->mp_bh[i]);
 743                                        mp->mp_bh[i] = NULL;
 744                                }
 745                                i = branch_start;
 746                        }
 747                        if (n == 0)
 748                                break;
 749                        fallthrough;    /* To branching from existing tree */
 750                case ALLOC_GROW_DEPTH:
 751                        if (i > 1 && i < mp->mp_fheight)
 752                                gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
 753                        for (; i < mp->mp_fheight && n > 0; i++, n--)
 754                                gfs2_indirect_init(mp, ip->i_gl, i,
 755                                                   mp->mp_list[i-1], bn++);
 756                        if (i == mp->mp_fheight)
 757                                state = ALLOC_DATA;
 758                        if (n == 0)
 759                                break;
 760                        fallthrough;    /* To tree complete, adding data blocks */
 761                case ALLOC_DATA:
 762                        BUG_ON(n > dblks);
 763                        BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
 764                        gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
 765                        dblks = n;
 766                        ptr = metapointer(end_of_metadata, mp);
 767                        iomap->addr = bn << inode->i_blkbits;
 768                        iomap->flags |= IOMAP_F_MERGED | IOMAP_F_NEW;
 769                        while (n-- > 0)
 770                                *ptr++ = cpu_to_be64(bn++);
 771                        break;
 772                }
 773        } while (iomap->addr == IOMAP_NULL_ADDR);
 774
 775        iomap->type = IOMAP_MAPPED;
 776        iomap->length = (u64)dblks << inode->i_blkbits;
 777        ip->i_height = mp->mp_fheight;
 778        gfs2_add_inode_blocks(&ip->i_inode, alloced);
 779        gfs2_dinode_out(ip, dibh->b_data);
 780out:
 781        up_write(&ip->i_rw_mutex);
 782        return ret;
 783}
 784
 785#define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE
 786
 787/**
 788 * gfs2_alloc_size - Compute the maximum allocation size
 789 * @inode: The inode
 790 * @mp: The metapath
 791 * @size: Requested size in blocks
 792 *
 793 * Compute the maximum size of the next allocation at @mp.
 794 *
 795 * Returns: size in blocks
 796 */
 797static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size)
 798{
 799        struct gfs2_inode *ip = GFS2_I(inode);
 800        struct gfs2_sbd *sdp = GFS2_SB(inode);
 801        const __be64 *first, *ptr, *end;
 802
 803        /*
 804         * For writes to stuffed files, this function is called twice via
 805         * __gfs2_iomap_get, before and after unstuffing. The size we return the
 806         * first time needs to be large enough to get the reservation and
 807         * allocation sizes right.  The size we return the second time must
 808         * be exact or else __gfs2_iomap_alloc won't do the right thing.
 809         */
 810
 811        if (gfs2_is_stuffed(ip) || mp->mp_fheight != mp->mp_aheight) {
 812                unsigned int maxsize = mp->mp_fheight > 1 ?
 813                        sdp->sd_inptrs : sdp->sd_diptrs;
 814                maxsize -= mp->mp_list[mp->mp_fheight - 1];
 815                if (size > maxsize)
 816                        size = maxsize;
 817                return size;
 818        }
 819
 820        first = metapointer(ip->i_height - 1, mp);
 821        end = metaend(ip->i_height - 1, mp);
 822        if (end - first > size)
 823                end = first + size;
 824        for (ptr = first; ptr < end; ptr++) {
 825                if (*ptr)
 826                        break;
 827        }
 828        return ptr - first;
 829}
 830
 831/**
 832 * __gfs2_iomap_get - Map blocks from an inode to disk blocks
 833 * @inode: The inode
 834 * @pos: Starting position in bytes
 835 * @length: Length to map, in bytes
 836 * @flags: iomap flags
 837 * @iomap: The iomap structure
 838 * @mp: The metapath
 839 *
 840 * Returns: errno
 841 */
 842static int __gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
 843                            unsigned flags, struct iomap *iomap,
 844                            struct metapath *mp)
 845{
 846        struct gfs2_inode *ip = GFS2_I(inode);
 847        struct gfs2_sbd *sdp = GFS2_SB(inode);
 848        loff_t size = i_size_read(inode);
 849        __be64 *ptr;
 850        sector_t lblock;
 851        sector_t lblock_stop;
 852        int ret;
 853        int eob;
 854        u64 len;
 855        struct buffer_head *dibh = NULL, *bh;
 856        u8 height;
 857
 858        if (!length)
 859                return -EINVAL;
 860
 861        down_read(&ip->i_rw_mutex);
 862
 863        ret = gfs2_meta_inode_buffer(ip, &dibh);
 864        if (ret)
 865                goto unlock;
 866        mp->mp_bh[0] = dibh;
 867
 868        if (gfs2_is_stuffed(ip)) {
 869                if (flags & IOMAP_WRITE) {
 870                        loff_t max_size = gfs2_max_stuffed_size(ip);
 871
 872                        if (pos + length > max_size)
 873                                goto unstuff;
 874                        iomap->length = max_size;
 875                } else {
 876                        if (pos >= size) {
 877                                if (flags & IOMAP_REPORT) {
 878                                        ret = -ENOENT;
 879                                        goto unlock;
 880                                } else {
 881                                        iomap->offset = pos;
 882                                        iomap->length = length;
 883                                        goto hole_found;
 884                                }
 885                        }
 886                        iomap->length = size;
 887                }
 888                iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
 889                              sizeof(struct gfs2_dinode);
 890                iomap->type = IOMAP_INLINE;
 891                iomap->inline_data = dibh->b_data + sizeof(struct gfs2_dinode);
 892                goto out;
 893        }
 894
 895unstuff:
 896        lblock = pos >> inode->i_blkbits;
 897        iomap->offset = lblock << inode->i_blkbits;
 898        lblock_stop = (pos + length - 1) >> inode->i_blkbits;
 899        len = lblock_stop - lblock + 1;
 900        iomap->length = len << inode->i_blkbits;
 901
 902        height = ip->i_height;
 903        while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
 904                height++;
 905        find_metapath(sdp, lblock, mp, height);
 906        if (height > ip->i_height || gfs2_is_stuffed(ip))
 907                goto do_alloc;
 908
 909        ret = lookup_metapath(ip, mp);
 910        if (ret)
 911                goto unlock;
 912
 913        if (mp->mp_aheight != ip->i_height)
 914                goto do_alloc;
 915
 916        ptr = metapointer(ip->i_height - 1, mp);
 917        if (*ptr == 0)
 918                goto do_alloc;
 919
 920        bh = mp->mp_bh[ip->i_height - 1];
 921        len = gfs2_extent_length(bh, ptr, len, &eob);
 922
 923        iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
 924        iomap->length = len << inode->i_blkbits;
 925        iomap->type = IOMAP_MAPPED;
 926        iomap->flags |= IOMAP_F_MERGED;
 927        if (eob)
 928                iomap->flags |= IOMAP_F_GFS2_BOUNDARY;
 929
 930out:
 931        iomap->bdev = inode->i_sb->s_bdev;
 932unlock:
 933        up_read(&ip->i_rw_mutex);
 934        return ret;
 935
 936do_alloc:
 937        if (flags & IOMAP_REPORT) {
 938                if (pos >= size)
 939                        ret = -ENOENT;
 940                else if (height == ip->i_height)
 941                        ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
 942                else
 943                        iomap->length = size - pos;
 944        } else if (flags & IOMAP_WRITE) {
 945                u64 alloc_size;
 946
 947                if (flags & IOMAP_DIRECT)
 948                        goto out;  /* (see gfs2_file_direct_write) */
 949
 950                len = gfs2_alloc_size(inode, mp, len);
 951                alloc_size = len << inode->i_blkbits;
 952                if (alloc_size < iomap->length)
 953                        iomap->length = alloc_size;
 954        } else {
 955                if (pos < size && height == ip->i_height)
 956                        ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
 957        }
 958hole_found:
 959        iomap->addr = IOMAP_NULL_ADDR;
 960        iomap->type = IOMAP_HOLE;
 961        goto out;
 962}
 963
 964static int gfs2_write_lock(struct inode *inode)
 965{
 966        struct gfs2_inode *ip = GFS2_I(inode);
 967        struct gfs2_sbd *sdp = GFS2_SB(inode);
 968        int error;
 969
 970        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
 971        error = gfs2_glock_nq(&ip->i_gh);
 972        if (error)
 973                goto out_uninit;
 974        if (&ip->i_inode == sdp->sd_rindex) {
 975                struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 976
 977                error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
 978                                           GL_NOCACHE, &m_ip->i_gh);
 979                if (error)
 980                        goto out_unlock;
 981        }
 982        return 0;
 983
 984out_unlock:
 985        gfs2_glock_dq(&ip->i_gh);
 986out_uninit:
 987        gfs2_holder_uninit(&ip->i_gh);
 988        return error;
 989}
 990
 991static void gfs2_write_unlock(struct inode *inode)
 992{
 993        struct gfs2_inode *ip = GFS2_I(inode);
 994        struct gfs2_sbd *sdp = GFS2_SB(inode);
 995
 996        if (&ip->i_inode == sdp->sd_rindex) {
 997                struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 998
 999                gfs2_glock_dq_uninit(&m_ip->i_gh);
1000        }
1001        gfs2_glock_dq_uninit(&ip->i_gh);
1002}
1003
1004static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
1005                                   unsigned len, struct iomap *iomap)
1006{
1007        unsigned int blockmask = i_blocksize(inode) - 1;
1008        struct gfs2_sbd *sdp = GFS2_SB(inode);
1009        unsigned int blocks;
1010
1011        blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits;
1012        return gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
1013}
1014
1015static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
1016                                 unsigned copied, struct page *page,
1017                                 struct iomap *iomap)
1018{
1019        struct gfs2_trans *tr = current->journal_info;
1020        struct gfs2_inode *ip = GFS2_I(inode);
1021        struct gfs2_sbd *sdp = GFS2_SB(inode);
1022
1023        if (page && !gfs2_is_stuffed(ip))
1024                gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
1025
1026        if (tr->tr_num_buf_new)
1027                __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
1028
1029        gfs2_trans_end(sdp);
1030}
1031
1032static const struct iomap_page_ops gfs2_iomap_page_ops = {
1033        .page_prepare = gfs2_iomap_page_prepare,
1034        .page_done = gfs2_iomap_page_done,
1035};
1036
1037static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
1038                                  loff_t length, unsigned flags,
1039                                  struct iomap *iomap,
1040                                  struct metapath *mp)
1041{
1042        struct gfs2_inode *ip = GFS2_I(inode);
1043        struct gfs2_sbd *sdp = GFS2_SB(inode);
1044        bool unstuff;
1045        int ret;
1046
1047        unstuff = gfs2_is_stuffed(ip) &&
1048                  pos + length > gfs2_max_stuffed_size(ip);
1049
1050        if (unstuff || iomap->type == IOMAP_HOLE) {
1051                unsigned int data_blocks, ind_blocks;
1052                struct gfs2_alloc_parms ap = {};
1053                unsigned int rblocks;
1054                struct gfs2_trans *tr;
1055
1056                gfs2_write_calc_reserv(ip, iomap->length, &data_blocks,
1057                                       &ind_blocks);
1058                ap.target = data_blocks + ind_blocks;
1059                ret = gfs2_quota_lock_check(ip, &ap);
1060                if (ret)
1061                        return ret;
1062
1063                ret = gfs2_inplace_reserve(ip, &ap);
1064                if (ret)
1065                        goto out_qunlock;
1066
1067                rblocks = RES_DINODE + ind_blocks;
1068                if (gfs2_is_jdata(ip))
1069                        rblocks += data_blocks;
1070                if (ind_blocks || data_blocks)
1071                        rblocks += RES_STATFS + RES_QUOTA;
1072                if (inode == sdp->sd_rindex)
1073                        rblocks += 2 * RES_STATFS;
1074                rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
1075
1076                ret = gfs2_trans_begin(sdp, rblocks,
1077                                       iomap->length >> inode->i_blkbits);
1078                if (ret)
1079                        goto out_trans_fail;
1080
1081                if (unstuff) {
1082                        ret = gfs2_unstuff_dinode(ip);
1083                        if (ret)
1084                                goto out_trans_end;
1085                        release_metapath(mp);
1086                        ret = __gfs2_iomap_get(inode, iomap->offset,
1087                                               iomap->length, flags, iomap, mp);
1088                        if (ret)
1089                                goto out_trans_end;
1090                }
1091
1092                if (iomap->type == IOMAP_HOLE) {
1093                        ret = __gfs2_iomap_alloc(inode, iomap, mp);
1094                        if (ret) {
1095                                gfs2_trans_end(sdp);
1096                                gfs2_inplace_release(ip);
1097                                punch_hole(ip, iomap->offset, iomap->length);
1098                                goto out_qunlock;
1099                        }
1100                }
1101
1102                tr = current->journal_info;
1103                if (tr->tr_num_buf_new)
1104                        __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
1105
1106                gfs2_trans_end(sdp);
1107        }
1108
1109        if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip))
1110                iomap->page_ops = &gfs2_iomap_page_ops;
1111        return 0;
1112
1113out_trans_end:
1114        gfs2_trans_end(sdp);
1115out_trans_fail:
1116        gfs2_inplace_release(ip);
1117out_qunlock:
1118        gfs2_quota_unlock(ip);
1119        return ret;
1120}
1121
1122static inline bool gfs2_iomap_need_write_lock(unsigned flags)
1123{
1124        return (flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT);
1125}
1126
1127static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
1128                            unsigned flags, struct iomap *iomap,
1129                            struct iomap *srcmap)
1130{
1131        struct gfs2_inode *ip = GFS2_I(inode);
1132        struct metapath mp = { .mp_aheight = 1, };
1133        int ret;
1134
1135        if (gfs2_is_jdata(ip))
1136                iomap->flags |= IOMAP_F_BUFFER_HEAD;
1137
1138        trace_gfs2_iomap_start(ip, pos, length, flags);
1139        if (gfs2_iomap_need_write_lock(flags)) {
1140                ret = gfs2_write_lock(inode);
1141                if (ret)
1142                        goto out;
1143        }
1144
1145        ret = __gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
1146        if (ret)
1147                goto out_unlock;
1148
1149        switch(flags & (IOMAP_WRITE | IOMAP_ZERO)) {
1150        case IOMAP_WRITE:
1151                if (flags & IOMAP_DIRECT) {
1152                        /*
1153                         * Silently fall back to buffered I/O for stuffed files
1154                         * or if we've got a hole (see gfs2_file_direct_write).
1155                         */
1156                        if (iomap->type != IOMAP_MAPPED)
1157                                ret = -ENOTBLK;
1158                        goto out_unlock;
1159                }
1160                break;
1161        case IOMAP_ZERO:
1162                if (iomap->type == IOMAP_HOLE)
1163                        goto out_unlock;
1164                break;
1165        default:
1166                goto out_unlock;
1167        }
1168
1169        ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp);
1170
1171out_unlock:
1172        if (ret && gfs2_iomap_need_write_lock(flags))
1173                gfs2_write_unlock(inode);
1174        release_metapath(&mp);
1175out:
1176        trace_gfs2_iomap_end(ip, iomap, ret);
1177        return ret;
1178}
1179
1180static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
1181                          ssize_t written, unsigned flags, struct iomap *iomap)
1182{
1183        struct gfs2_inode *ip = GFS2_I(inode);
1184        struct gfs2_sbd *sdp = GFS2_SB(inode);
1185
1186        switch (flags & (IOMAP_WRITE | IOMAP_ZERO)) {
1187        case IOMAP_WRITE:
1188                if (flags & IOMAP_DIRECT)
1189                        return 0;
1190                break;
1191        case IOMAP_ZERO:
1192                 if (iomap->type == IOMAP_HOLE)
1193                         return 0;
1194                 break;
1195        default:
1196                 return 0;
1197        }
1198
1199        if (!gfs2_is_stuffed(ip))
1200                gfs2_ordered_add_inode(ip);
1201
1202        if (inode == sdp->sd_rindex)
1203                adjust_fs_space(inode);
1204
1205        gfs2_inplace_release(ip);
1206
1207        if (ip->i_qadata && ip->i_qadata->qa_qd_num)
1208                gfs2_quota_unlock(ip);
1209
1210        if (length != written && (iomap->flags & IOMAP_F_NEW)) {
1211                /* Deallocate blocks that were just allocated. */
1212                loff_t blockmask = i_blocksize(inode) - 1;
1213                loff_t end = (pos + length) & ~blockmask;
1214
1215                pos = (pos + written + blockmask) & ~blockmask;
1216                if (pos < end) {
1217                        truncate_pagecache_range(inode, pos, end - 1);
1218                        punch_hole(ip, pos, end - pos);
1219                }
1220        }
1221
1222        if (unlikely(!written))
1223                goto out_unlock;
1224
1225        if (iomap->flags & IOMAP_F_SIZE_CHANGED)
1226                mark_inode_dirty(inode);
1227        set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
1228
1229out_unlock:
1230        if (gfs2_iomap_need_write_lock(flags))
1231                gfs2_write_unlock(inode);
1232        return 0;
1233}
1234
1235const struct iomap_ops gfs2_iomap_ops = {
1236        .iomap_begin = gfs2_iomap_begin,
1237        .iomap_end = gfs2_iomap_end,
1238};
1239
1240/**
1241 * gfs2_block_map - Map one or more blocks of an inode to a disk block
1242 * @inode: The inode
1243 * @lblock: The logical block number
1244 * @bh_map: The bh to be mapped
1245 * @create: True if its ok to alloc blocks to satify the request
1246 *
1247 * The size of the requested mapping is defined in bh_map->b_size.
1248 *
1249 * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged
1250 * when @lblock is not mapped.  Sets buffer_mapped(bh_map) and
1251 * bh_map->b_size to indicate the size of the mapping when @lblock and
1252 * successive blocks are mapped, up to the requested size.
1253 *
1254 * Sets buffer_boundary() if a read of metadata will be required
1255 * before the next block can be mapped. Sets buffer_new() if new
1256 * blocks were allocated.
1257 *
1258 * Returns: errno
1259 */
1260
1261int gfs2_block_map(struct inode *inode, sector_t lblock,
1262                   struct buffer_head *bh_map, int create)
1263{
1264        struct gfs2_inode *ip = GFS2_I(inode);
1265        loff_t pos = (loff_t)lblock << inode->i_blkbits;
1266        loff_t length = bh_map->b_size;
1267        struct iomap iomap = { };
1268        int ret;
1269
1270        clear_buffer_mapped(bh_map);
1271        clear_buffer_new(bh_map);
1272        clear_buffer_boundary(bh_map);
1273        trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
1274
1275        if (!create)
1276                ret = gfs2_iomap_get(inode, pos, length, &iomap);
1277        else
1278                ret = gfs2_iomap_alloc(inode, pos, length, &iomap);
1279        if (ret)
1280                goto out;
1281
1282        if (iomap.length > bh_map->b_size) {
1283                iomap.length = bh_map->b_size;
1284                iomap.flags &= ~IOMAP_F_GFS2_BOUNDARY;
1285        }
1286        if (iomap.addr != IOMAP_NULL_ADDR)
1287                map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
1288        bh_map->b_size = iomap.length;
1289        if (iomap.flags & IOMAP_F_GFS2_BOUNDARY)
1290                set_buffer_boundary(bh_map);
1291        if (iomap.flags & IOMAP_F_NEW)
1292                set_buffer_new(bh_map);
1293
1294out:
1295        trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
1296        return ret;
1297}
1298
1299int gfs2_get_extent(struct inode *inode, u64 lblock, u64 *dblock,
1300                    unsigned int *extlen)
1301{
1302        unsigned int blkbits = inode->i_blkbits;
1303        struct iomap iomap = { };
1304        unsigned int len;
1305        int ret;
1306
1307        ret = gfs2_iomap_get(inode, lblock << blkbits, *extlen << blkbits,
1308                             &iomap);
1309        if (ret)
1310                return ret;
1311        if (iomap.type != IOMAP_MAPPED)
1312                return -EIO;
1313        *dblock = iomap.addr >> blkbits;
1314        len = iomap.length >> blkbits;
1315        if (len < *extlen)
1316                *extlen = len;
1317        return 0;
1318}
1319
1320int gfs2_alloc_extent(struct inode *inode, u64 lblock, u64 *dblock,
1321                      unsigned int *extlen, bool *new)
1322{
1323        unsigned int blkbits = inode->i_blkbits;
1324        struct iomap iomap = { };
1325        unsigned int len;
1326        int ret;
1327
1328        ret = gfs2_iomap_alloc(inode, lblock << blkbits, *extlen << blkbits,
1329                               &iomap);
1330        if (ret)
1331                return ret;
1332        if (iomap.type != IOMAP_MAPPED)
1333                return -EIO;
1334        *dblock = iomap.addr >> blkbits;
1335        len = iomap.length >> blkbits;
1336        if (len < *extlen)
1337                *extlen = len;
1338        *new = iomap.flags & IOMAP_F_NEW;
1339        return 0;
1340}
1341
1342/*
1343 * NOTE: Never call gfs2_block_zero_range with an open transaction because it
1344 * uses iomap write to perform its actions, which begin their own transactions
1345 * (iomap_begin, page_prepare, etc.)
1346 */
1347static int gfs2_block_zero_range(struct inode *inode, loff_t from,
1348                                 unsigned int length)
1349{
1350        BUG_ON(current->journal_info);
1351        return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops);
1352}
1353
1354#define GFS2_JTRUNC_REVOKES 8192
1355
1356/**
1357 * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
1358 * @inode: The inode being truncated
1359 * @oldsize: The original (larger) size
1360 * @newsize: The new smaller size
1361 *
1362 * With jdata files, we have to journal a revoke for each block which is
1363 * truncated. As a result, we need to split this into separate transactions
1364 * if the number of pages being truncated gets too large.
1365 */
1366
1367static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
1368{
1369        struct gfs2_sbd *sdp = GFS2_SB(inode);
1370        u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1371        u64 chunk;
1372        int error;
1373
1374        while (oldsize != newsize) {
1375                struct gfs2_trans *tr;
1376                unsigned int offs;
1377
1378                chunk = oldsize - newsize;
1379                if (chunk > max_chunk)
1380                        chunk = max_chunk;
1381
1382                offs = oldsize & ~PAGE_MASK;
1383                if (offs && chunk > PAGE_SIZE)
1384                        chunk = offs + ((chunk - offs) & PAGE_MASK);
1385
1386                truncate_pagecache(inode, oldsize - chunk);
1387                oldsize -= chunk;
1388
1389                tr = current->journal_info;
1390                if (!test_bit(TR_TOUCHED, &tr->tr_flags))
1391                        continue;
1392
1393                gfs2_trans_end(sdp);
1394                error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1395                if (error)
1396                        return error;
1397        }
1398
1399        return 0;
1400}
1401
1402static int trunc_start(struct inode *inode, u64 newsize)
1403{
1404        struct gfs2_inode *ip = GFS2_I(inode);
1405        struct gfs2_sbd *sdp = GFS2_SB(inode);
1406        struct buffer_head *dibh = NULL;
1407        int journaled = gfs2_is_jdata(ip);
1408        u64 oldsize = inode->i_size;
1409        int error;
1410
1411        if (!gfs2_is_stuffed(ip)) {
1412                unsigned int blocksize = i_blocksize(inode);
1413                unsigned int offs = newsize & (blocksize - 1);
1414                if (offs) {
1415                        error = gfs2_block_zero_range(inode, newsize,
1416                                                      blocksize - offs);
1417                        if (error)
1418                                return error;
1419                }
1420        }
1421        if (journaled)
1422                error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1423        else
1424                error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1425        if (error)
1426                return error;
1427
1428        error = gfs2_meta_inode_buffer(ip, &dibh);
1429        if (error)
1430                goto out;
1431
1432        gfs2_trans_add_meta(ip->i_gl, dibh);
1433
1434        if (gfs2_is_stuffed(ip))
1435                gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
1436        else
1437                ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
1438
1439        i_size_write(inode, newsize);
1440        ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1441        gfs2_dinode_out(ip, dibh->b_data);
1442
1443        if (journaled)
1444                error = gfs2_journaled_truncate(inode, oldsize, newsize);
1445        else
1446                truncate_pagecache(inode, newsize);
1447
1448out:
1449        brelse(dibh);
1450        if (current->journal_info)
1451                gfs2_trans_end(sdp);
1452        return error;
1453}
1454
1455int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
1456                   struct iomap *iomap)
1457{
1458        struct metapath mp = { .mp_aheight = 1, };
1459        int ret;
1460
1461        ret = __gfs2_iomap_get(inode, pos, length, 0, iomap, &mp);
1462        release_metapath(&mp);
1463        return ret;
1464}
1465
1466int gfs2_iomap_alloc(struct inode *inode, loff_t pos, loff_t length,
1467                     struct iomap *iomap)
1468{
1469        struct metapath mp = { .mp_aheight = 1, };
1470        int ret;
1471
1472        ret = __gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp);
1473        if (!ret && iomap->type == IOMAP_HOLE)
1474                ret = __gfs2_iomap_alloc(inode, iomap, &mp);
1475        release_metapath(&mp);
1476        return ret;
1477}
1478
1479/**
1480 * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1481 * @ip: inode
1482 * @rd_gh: holder of resource group glock
1483 * @bh: buffer head to sweep
1484 * @start: starting point in bh
1485 * @end: end point in bh
1486 * @meta: true if bh points to metadata (rather than data)
1487 * @btotal: place to keep count of total blocks freed
1488 *
1489 * We sweep a metadata buffer (provided by the metapath) for blocks we need to
1490 * free, and free them all. However, we do it one rgrp at a time. If this
1491 * block has references to multiple rgrps, we break it into individual
1492 * transactions. This allows other processes to use the rgrps while we're
1493 * focused on a single one, for better concurrency / performance.
1494 * At every transaction boundary, we rewrite the inode into the journal.
1495 * That way the bitmaps are kept consistent with the inode and we can recover
1496 * if we're interrupted by power-outages.
1497 *
1498 * Returns: 0, or return code if an error occurred.
1499 *          *btotal has the total number of blocks freed
1500 */
1501static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
1502                              struct buffer_head *bh, __be64 *start, __be64 *end,
1503                              bool meta, u32 *btotal)
1504{
1505        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1506        struct gfs2_rgrpd *rgd;
1507        struct gfs2_trans *tr;
1508        __be64 *p;
1509        int blks_outside_rgrp;
1510        u64 bn, bstart, isize_blks;
1511        s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
1512        int ret = 0;
1513        bool buf_in_tr = false; /* buffer was added to transaction */
1514
1515more_rgrps:
1516        rgd = NULL;
1517        if (gfs2_holder_initialized(rd_gh)) {
1518                rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
1519                gfs2_assert_withdraw(sdp,
1520                             gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1521        }
1522        blks_outside_rgrp = 0;
1523        bstart = 0;
1524        blen = 0;
1525
1526        for (p = start; p < end; p++) {
1527                if (!*p)
1528                        continue;
1529                bn = be64_to_cpu(*p);
1530
1531                if (rgd) {
1532                        if (!rgrp_contains_block(rgd, bn)) {
1533                                blks_outside_rgrp++;
1534                                continue;
1535                        }
1536                } else {
1537                        rgd = gfs2_blk2rgrpd(sdp, bn, true);
1538                        if (unlikely(!rgd)) {
1539                                ret = -EIO;
1540                                goto out;
1541                        }
1542                        ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1543                                                 LM_FLAG_NODE_SCOPE, rd_gh);
1544                        if (ret)
1545                                goto out;
1546
1547                        /* Must be done with the rgrp glock held: */
1548                        if (gfs2_rs_active(&ip->i_res) &&
1549                            rgd == ip->i_res.rs_rgd)
1550                                gfs2_rs_deltree(&ip->i_res);
1551                }
1552
1553                /* The size of our transactions will be unknown until we
1554                   actually process all the metadata blocks that relate to
1555                   the rgrp. So we estimate. We know it can't be more than
1556                   the dinode's i_blocks and we don't want to exceed the
1557                   journal flush threshold, sd_log_thresh2. */
1558                if (current->journal_info == NULL) {
1559                        unsigned int jblocks_rqsted, revokes;
1560
1561                        jblocks_rqsted = rgd->rd_length + RES_DINODE +
1562                                RES_INDIRECT;
1563                        isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
1564                        if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
1565                                jblocks_rqsted +=
1566                                        atomic_read(&sdp->sd_log_thresh2);
1567                        else
1568                                jblocks_rqsted += isize_blks;
1569                        revokes = jblocks_rqsted;
1570                        if (meta)
1571                                revokes += end - start;
1572                        else if (ip->i_depth)
1573                                revokes += sdp->sd_inptrs;
1574                        ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
1575                        if (ret)
1576                                goto out_unlock;
1577                        down_write(&ip->i_rw_mutex);
1578                }
1579                /* check if we will exceed the transaction blocks requested */
1580                tr = current->journal_info;
1581                if (tr->tr_num_buf_new + RES_STATFS +
1582                    RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
1583                        /* We set blks_outside_rgrp to ensure the loop will
1584                           be repeated for the same rgrp, but with a new
1585                           transaction. */
1586                        blks_outside_rgrp++;
1587                        /* This next part is tricky. If the buffer was added
1588                           to the transaction, we've already set some block
1589                           pointers to 0, so we better follow through and free
1590                           them, or we will introduce corruption (so break).
1591                           This may be impossible, or at least rare, but I
1592                           decided to cover the case regardless.
1593
1594                           If the buffer was not added to the transaction
1595                           (this call), doing so would exceed our transaction
1596                           size, so we need to end the transaction and start a
1597                           new one (so goto). */
1598
1599                        if (buf_in_tr)
1600                                break;
1601                        goto out_unlock;
1602                }
1603
1604                gfs2_trans_add_meta(ip->i_gl, bh);
1605                buf_in_tr = true;
1606                *p = 0;
1607                if (bstart + blen == bn) {
1608                        blen++;
1609                        continue;
1610                }
1611                if (bstart) {
1612                        __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta);
1613                        (*btotal) += blen;
1614                        gfs2_add_inode_blocks(&ip->i_inode, -blen);
1615                }
1616                bstart = bn;
1617                blen = 1;
1618        }
1619        if (bstart) {
1620                __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta);
1621                (*btotal) += blen;
1622                gfs2_add_inode_blocks(&ip->i_inode, -blen);
1623        }
1624out_unlock:
1625        if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
1626                                            outside the rgrp we just processed,
1627                                            do it all over again. */
1628                if (current->journal_info) {
1629                        struct buffer_head *dibh;
1630
1631                        ret = gfs2_meta_inode_buffer(ip, &dibh);
1632                        if (ret)
1633                                goto out;
1634
1635                        /* Every transaction boundary, we rewrite the dinode
1636                           to keep its di_blocks current in case of failure. */
1637                        ip->i_inode.i_mtime = ip->i_inode.i_ctime =
1638                                current_time(&ip->i_inode);
1639                        gfs2_trans_add_meta(ip->i_gl, dibh);
1640                        gfs2_dinode_out(ip, dibh->b_data);
1641                        brelse(dibh);
1642                        up_write(&ip->i_rw_mutex);
1643                        gfs2_trans_end(sdp);
1644                        buf_in_tr = false;
1645                }
1646                gfs2_glock_dq_uninit(rd_gh);
1647                cond_resched();
1648                goto more_rgrps;
1649        }
1650out:
1651        return ret;
1652}
1653
1654static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
1655{
1656        if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0])))
1657                return false;
1658        return true;
1659}
1660
1661/**
1662 * find_nonnull_ptr - find a non-null pointer given a metapath and height
1663 * @sdp: The superblock
1664 * @mp: starting metapath
1665 * @h: desired height to search
1666 * @end_list: See punch_hole().
1667 * @end_aligned: See punch_hole().
1668 *
1669 * Assumes the metapath is valid (with buffers) out to height h.
1670 * Returns: true if a non-null pointer was found in the metapath buffer
1671 *          false if all remaining pointers are NULL in the buffer
1672 */
1673static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
1674                             unsigned int h,
1675                             __u16 *end_list, unsigned int end_aligned)
1676{
1677        struct buffer_head *bh = mp->mp_bh[h];
1678        __be64 *first, *ptr, *end;
1679
1680        first = metaptr1(h, mp);
1681        ptr = first + mp->mp_list[h];
1682        end = (__be64 *)(bh->b_data + bh->b_size);
1683        if (end_list && mp_eq_to_hgt(mp, end_list, h)) {
1684                bool keep_end = h < end_aligned;
1685                end = first + end_list[h] + keep_end;
1686        }
1687
1688        while (ptr < end) {
1689                if (*ptr) { /* if we have a non-null pointer */
1690                        mp->mp_list[h] = ptr - first;
1691                        h++;
1692                        if (h < GFS2_MAX_META_HEIGHT)
1693                                mp->mp_list[h] = 0;
1694                        return true;
1695                }
1696                ptr++;
1697        }
1698        return false;
1699}
1700
1701enum dealloc_states {
1702        DEALLOC_MP_FULL = 0,    /* Strip a metapath with all buffers read in */
1703        DEALLOC_MP_LOWER = 1,   /* lower the metapath strip height */
1704        DEALLOC_FILL_MP = 2,  /* Fill in the metapath to the given height. */
1705        DEALLOC_DONE = 3,       /* process complete */
1706};
1707
1708static inline void
1709metapointer_range(struct metapath *mp, int height,
1710                  __u16 *start_list, unsigned int start_aligned,
1711                  __u16 *end_list, unsigned int end_aligned,
1712                  __be64 **start, __be64 **end)
1713{
1714        struct buffer_head *bh = mp->mp_bh[height];
1715        __be64 *first;
1716
1717        first = metaptr1(height, mp);
1718        *start = first;
1719        if (mp_eq_to_hgt(mp, start_list, height)) {
1720                bool keep_start = height < start_aligned;
1721                *start = first + start_list[height] + keep_start;
1722        }
1723        *end = (__be64 *)(bh->b_data + bh->b_size);
1724        if (end_list && mp_eq_to_hgt(mp, end_list, height)) {
1725                bool keep_end = height < end_aligned;
1726                *end = first + end_list[height] + keep_end;
1727        }
1728}
1729
1730static inline bool walk_done(struct gfs2_sbd *sdp,
1731                             struct metapath *mp, int height,
1732                             __u16 *end_list, unsigned int end_aligned)
1733{
1734        __u16 end;
1735
1736        if (end_list) {
1737                bool keep_end = height < end_aligned;
1738                if (!mp_eq_to_hgt(mp, end_list, height))
1739                        return false;
1740                end = end_list[height] + keep_end;
1741        } else
1742                end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs;
1743        return mp->mp_list[height] >= end;
1744}
1745
1746/**
1747 * punch_hole - deallocate blocks in a file
1748 * @ip: inode to truncate
1749 * @offset: the start of the hole
1750 * @length: the size of the hole (or 0 for truncate)
1751 *
1752 * Punch a hole into a file or truncate a file at a given position.  This
1753 * function operates in whole blocks (@offset and @length are rounded
1754 * accordingly); partially filled blocks must be cleared otherwise.
1755 *
1756 * This function works from the bottom up, and from the right to the left. In
1757 * other words, it strips off the highest layer (data) before stripping any of
1758 * the metadata. Doing it this way is best in case the operation is interrupted
1759 * by power failure, etc.  The dinode is rewritten in every transaction to
1760 * guarantee integrity.
1761 */
1762static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
1763{
1764        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1765        u64 maxsize = sdp->sd_heightsize[ip->i_height];
1766        struct metapath mp = {};
1767        struct buffer_head *dibh, *bh;
1768        struct gfs2_holder rd_gh;
1769        unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
1770        u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
1771        __u16 start_list[GFS2_MAX_META_HEIGHT];
1772        __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
1773        unsigned int start_aligned, end_aligned;
1774        unsigned int strip_h = ip->i_height - 1;
1775        u32 btotal = 0;
1776        int ret, state;
1777        int mp_h; /* metapath buffers are read in to this height */
1778        u64 prev_bnr = 0;
1779        __be64 *start, *end;
1780
1781        if (offset >= maxsize) {
1782                /*
1783                 * The starting point lies beyond the allocated meta-data;
1784                 * there are no blocks do deallocate.
1785                 */
1786                return 0;
1787        }
1788
1789        /*
1790         * The start position of the hole is defined by lblock, start_list, and
1791         * start_aligned.  The end position of the hole is defined by lend,
1792         * end_list, and end_aligned.
1793         *
1794         * start_aligned and end_aligned define down to which height the start
1795         * and end positions are aligned to the metadata tree (i.e., the
1796         * position is a multiple of the metadata granularity at the height
1797         * above).  This determines at which heights additional meta pointers
1798         * needs to be preserved for the remaining data.
1799         */
1800
1801        if (length) {
1802                u64 end_offset = offset + length;
1803                u64 lend;
1804
1805                /*
1806                 * Clip the end at the maximum file size for the given height:
1807                 * that's how far the metadata goes; files bigger than that
1808                 * will have additional layers of indirection.
1809                 */
1810                if (end_offset > maxsize)
1811                        end_offset = maxsize;
1812                lend = end_offset >> bsize_shift;
1813
1814                if (lblock >= lend)
1815                        return 0;
1816
1817                find_metapath(sdp, lend, &mp, ip->i_height);
1818                end_list = __end_list;
1819                memcpy(end_list, mp.mp_list, sizeof(mp.mp_list));
1820
1821                for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1822                        if (end_list[mp_h])
1823                                break;
1824                }
1825                end_aligned = mp_h;
1826        }
1827
1828        find_metapath(sdp, lblock, &mp, ip->i_height);
1829        memcpy(start_list, mp.mp_list, sizeof(start_list));
1830
1831        for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1832                if (start_list[mp_h])
1833                        break;
1834        }
1835        start_aligned = mp_h;
1836
1837        ret = gfs2_meta_inode_buffer(ip, &dibh);
1838        if (ret)
1839                return ret;
1840
1841        mp.mp_bh[0] = dibh;
1842        ret = lookup_metapath(ip, &mp);
1843        if (ret)
1844                goto out_metapath;
1845
1846        /* issue read-ahead on metadata */
1847        for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) {
1848                metapointer_range(&mp, mp_h, start_list, start_aligned,
1849                                  end_list, end_aligned, &start, &end);
1850                gfs2_metapath_ra(ip->i_gl, start, end);
1851        }
1852
1853        if (mp.mp_aheight == ip->i_height)
1854                state = DEALLOC_MP_FULL; /* We have a complete metapath */
1855        else
1856                state = DEALLOC_FILL_MP; /* deal with partial metapath */
1857
1858        ret = gfs2_rindex_update(sdp);
1859        if (ret)
1860                goto out_metapath;
1861
1862        ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1863        if (ret)
1864                goto out_metapath;
1865        gfs2_holder_mark_uninitialized(&rd_gh);
1866
1867        mp_h = strip_h;
1868
1869        while (state != DEALLOC_DONE) {
1870                switch (state) {
1871                /* Truncate a full metapath at the given strip height.
1872                 * Note that strip_h == mp_h in order to be in this state. */
1873                case DEALLOC_MP_FULL:
1874                        bh = mp.mp_bh[mp_h];
1875                        gfs2_assert_withdraw(sdp, bh);
1876                        if (gfs2_assert_withdraw(sdp,
1877                                                 prev_bnr != bh->b_blocknr)) {
1878                                fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u,"
1879                                         "s_h:%u, mp_h:%u\n",
1880                                       (unsigned long long)ip->i_no_addr,
1881                                       prev_bnr, ip->i_height, strip_h, mp_h);
1882                        }
1883                        prev_bnr = bh->b_blocknr;
1884
1885                        if (gfs2_metatype_check(sdp, bh,
1886                                                (mp_h ? GFS2_METATYPE_IN :
1887                                                        GFS2_METATYPE_DI))) {
1888                                ret = -EIO;
1889                                goto out;
1890                        }
1891
1892                        /*
1893                         * Below, passing end_aligned as 0 gives us the
1894                         * metapointer range excluding the end point: the end
1895                         * point is the first metapath we must not deallocate!
1896                         */
1897
1898                        metapointer_range(&mp, mp_h, start_list, start_aligned,
1899                                          end_list, 0 /* end_aligned */,
1900                                          &start, &end);
1901                        ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h],
1902                                                 start, end,
1903                                                 mp_h != ip->i_height - 1,
1904                                                 &btotal);
1905
1906                        /* If we hit an error or just swept dinode buffer,
1907                           just exit. */
1908                        if (ret || !mp_h) {
1909                                state = DEALLOC_DONE;
1910                                break;
1911                        }
1912                        state = DEALLOC_MP_LOWER;
1913                        break;
1914
1915                /* lower the metapath strip height */
1916                case DEALLOC_MP_LOWER:
1917                        /* We're done with the current buffer, so release it,
1918                           unless it's the dinode buffer. Then back up to the
1919                           previous pointer. */
1920                        if (mp_h) {
1921                                brelse(mp.mp_bh[mp_h]);
1922                                mp.mp_bh[mp_h] = NULL;
1923                        }
1924                        /* If we can't get any lower in height, we've stripped
1925                           off all we can. Next step is to back up and start
1926                           stripping the previous level of metadata. */
1927                        if (mp_h == 0) {
1928                                strip_h--;
1929                                memcpy(mp.mp_list, start_list, sizeof(start_list));
1930                                mp_h = strip_h;
1931                                state = DEALLOC_FILL_MP;
1932                                break;
1933                        }
1934                        mp.mp_list[mp_h] = 0;
1935                        mp_h--; /* search one metadata height down */
1936                        mp.mp_list[mp_h]++;
1937                        if (walk_done(sdp, &mp, mp_h, end_list, end_aligned))
1938                                break;
1939                        /* Here we've found a part of the metapath that is not
1940                         * allocated. We need to search at that height for the
1941                         * next non-null pointer. */
1942                        if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) {
1943                                state = DEALLOC_FILL_MP;
1944                                mp_h++;
1945                        }
1946                        /* No more non-null pointers at this height. Back up
1947                           to the previous height and try again. */
1948                        break; /* loop around in the same state */
1949
1950                /* Fill the metapath with buffers to the given height. */
1951                case DEALLOC_FILL_MP:
1952                        /* Fill the buffers out to the current height. */
1953                        ret = fillup_metapath(ip, &mp, mp_h);
1954                        if (ret < 0)
1955                                goto out;
1956
1957                        /* On the first pass, issue read-ahead on metadata. */
1958                        if (mp.mp_aheight > 1 && strip_h == ip->i_height - 1) {
1959                                unsigned int height = mp.mp_aheight - 1;
1960
1961                                /* No read-ahead for data blocks. */
1962                                if (mp.mp_aheight - 1 == strip_h)
1963                                        height--;
1964
1965                                for (; height >= mp.mp_aheight - ret; height--) {
1966                                        metapointer_range(&mp, height,
1967                                                          start_list, start_aligned,
1968                                                          end_list, end_aligned,
1969                                                          &start, &end);
1970                                        gfs2_metapath_ra(ip->i_gl, start, end);
1971                                }
1972                        }
1973
1974                        /* If buffers found for the entire strip height */
1975                        if (mp.mp_aheight - 1 == strip_h) {
1976                                state = DEALLOC_MP_FULL;
1977                                break;
1978                        }
1979                        if (mp.mp_aheight < ip->i_height) /* We have a partial height */
1980                                mp_h = mp.mp_aheight - 1;
1981
1982                        /* If we find a non-null block pointer, crawl a bit
1983                           higher up in the metapath and try again, otherwise
1984                           we need to look lower for a new starting point. */
1985                        if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned))
1986                                mp_h++;
1987                        else
1988                                state = DEALLOC_MP_LOWER;
1989                        break;
1990                }
1991        }
1992
1993        if (btotal) {
1994                if (current->journal_info == NULL) {
1995                        ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
1996                                               RES_QUOTA, 0);
1997                        if (ret)
1998                                goto out;
1999                        down_write(&ip->i_rw_mutex);
2000                }
2001                gfs2_statfs_change(sdp, 0, +btotal, 0);
2002                gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
2003                                  ip->i_inode.i_gid);
2004                ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
2005                gfs2_trans_add_meta(ip->i_gl, dibh);
2006                gfs2_dinode_out(ip, dibh->b_data);
2007                up_write(&ip->i_rw_mutex);
2008                gfs2_trans_end(sdp);
2009        }
2010
2011out:
2012        if (gfs2_holder_initialized(&rd_gh))
2013                gfs2_glock_dq_uninit(&rd_gh);
2014        if (current->journal_info) {
2015                up_write(&ip->i_rw_mutex);
2016                gfs2_trans_end(sdp);
2017                cond_resched();
2018        }
2019        gfs2_quota_unhold(ip);
2020out_metapath:
2021        release_metapath(&mp);
2022        return ret;
2023}
2024
2025static int trunc_end(struct gfs2_inode *ip)
2026{
2027        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2028        struct buffer_head *dibh;
2029        int error;
2030
2031        error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2032        if (error)
2033                return error;
2034
2035        down_write(&ip->i_rw_mutex);
2036
2037        error = gfs2_meta_inode_buffer(ip, &dibh);
2038        if (error)
2039                goto out;
2040
2041        if (!i_size_read(&ip->i_inode)) {
2042                ip->i_height = 0;
2043                ip->i_goal = ip->i_no_addr;
2044                gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
2045                gfs2_ordered_del_inode(ip);
2046        }
2047        ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
2048        ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
2049
2050        gfs2_trans_add_meta(ip->i_gl, dibh);
2051        gfs2_dinode_out(ip, dibh->b_data);
2052        brelse(dibh);
2053
2054out:
2055        up_write(&ip->i_rw_mutex);
2056        gfs2_trans_end(sdp);
2057        return error;
2058}
2059
2060/**
2061 * do_shrink - make a file smaller
2062 * @inode: the inode
2063 * @newsize: the size to make the file
2064 *
2065 * Called with an exclusive lock on @inode. The @size must
2066 * be equal to or smaller than the current inode size.
2067 *
2068 * Returns: errno
2069 */
2070
2071static int do_shrink(struct inode *inode, u64 newsize)
2072{
2073        struct gfs2_inode *ip = GFS2_I(inode);
2074        int error;
2075
2076        error = trunc_start(inode, newsize);
2077        if (error < 0)
2078                return error;
2079        if (gfs2_is_stuffed(ip))
2080                return 0;
2081
2082        error = punch_hole(ip, newsize, 0);
2083        if (error == 0)
2084                error = trunc_end(ip);
2085
2086        return error;
2087}
2088
2089void gfs2_trim_blocks(struct inode *inode)
2090{
2091        int ret;
2092
2093        ret = do_shrink(inode, inode->i_size);
2094        WARN_ON(ret != 0);
2095}
2096
2097/**
2098 * do_grow - Touch and update inode size
2099 * @inode: The inode
2100 * @size: The new size
2101 *
2102 * This function updates the timestamps on the inode and
2103 * may also increase the size of the inode. This function
2104 * must not be called with @size any smaller than the current
2105 * inode size.
2106 *
2107 * Although it is not strictly required to unstuff files here,
2108 * earlier versions of GFS2 have a bug in the stuffed file reading
2109 * code which will result in a buffer overrun if the size is larger
2110 * than the max stuffed file size. In order to prevent this from
2111 * occurring, such files are unstuffed, but in other cases we can
2112 * just update the inode size directly.
2113 *
2114 * Returns: 0 on success, or -ve on error
2115 */
2116
2117static int do_grow(struct inode *inode, u64 size)
2118{
2119        struct gfs2_inode *ip = GFS2_I(inode);
2120        struct gfs2_sbd *sdp = GFS2_SB(inode);
2121        struct gfs2_alloc_parms ap = { .target = 1, };
2122        struct buffer_head *dibh;
2123        int error;
2124        int unstuff = 0;
2125
2126        if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) {
2127                error = gfs2_quota_lock_check(ip, &ap);
2128                if (error)
2129                        return error;
2130
2131                error = gfs2_inplace_reserve(ip, &ap);
2132                if (error)
2133                        goto do_grow_qunlock;
2134                unstuff = 1;
2135        }
2136
2137        error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
2138                                 (unstuff &&
2139                                  gfs2_is_jdata(ip) ? RES_JDATA : 0) +
2140                                 (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
2141                                  0 : RES_QUOTA), 0);
2142        if (error)
2143                goto do_grow_release;
2144
2145        if (unstuff) {
2146                error = gfs2_unstuff_dinode(ip);
2147                if (error)
2148                        goto do_end_trans;
2149        }
2150
2151        error = gfs2_meta_inode_buffer(ip, &dibh);
2152        if (error)
2153                goto do_end_trans;
2154
2155        truncate_setsize(inode, size);
2156        ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
2157        gfs2_trans_add_meta(ip->i_gl, dibh);
2158        gfs2_dinode_out(ip, dibh->b_data);
2159        brelse(dibh);
2160
2161do_end_trans:
2162        gfs2_trans_end(sdp);
2163do_grow_release:
2164        if (unstuff) {
2165                gfs2_inplace_release(ip);
2166do_grow_qunlock:
2167                gfs2_quota_unlock(ip);
2168        }
2169        return error;
2170}
2171
2172/**
2173 * gfs2_setattr_size - make a file a given size
2174 * @inode: the inode
2175 * @newsize: the size to make the file
2176 *
2177 * The file size can grow, shrink, or stay the same size. This
2178 * is called holding i_rwsem and an exclusive glock on the inode
2179 * in question.
2180 *
2181 * Returns: errno
2182 */
2183
2184int gfs2_setattr_size(struct inode *inode, u64 newsize)
2185{
2186        struct gfs2_inode *ip = GFS2_I(inode);
2187        int ret;
2188
2189        BUG_ON(!S_ISREG(inode->i_mode));
2190
2191        ret = inode_newsize_ok(inode, newsize);
2192        if (ret)
2193                return ret;
2194
2195        inode_dio_wait(inode);
2196
2197        ret = gfs2_qa_get(ip);
2198        if (ret)
2199                goto out;
2200
2201        if (newsize >= inode->i_size) {
2202                ret = do_grow(inode, newsize);
2203                goto out;
2204        }
2205
2206        ret = do_shrink(inode, newsize);
2207out:
2208        gfs2_rs_delete(ip, NULL);
2209        gfs2_qa_put(ip);
2210        return ret;
2211}
2212
2213int gfs2_truncatei_resume(struct gfs2_inode *ip)
2214{
2215        int error;
2216        error = punch_hole(ip, i_size_read(&ip->i_inode), 0);
2217        if (!error)
2218                error = trunc_end(ip);
2219        return error;
2220}
2221
2222int gfs2_file_dealloc(struct gfs2_inode *ip)
2223{
2224        return punch_hole(ip, 0, 0);
2225}
2226
2227/**
2228 * gfs2_free_journal_extents - Free cached journal bmap info
2229 * @jd: The journal
2230 *
2231 */
2232
2233void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
2234{
2235        struct gfs2_journal_extent *jext;
2236
2237        while(!list_empty(&jd->extent_list)) {
2238                jext = list_first_entry(&jd->extent_list, struct gfs2_journal_extent, list);
2239                list_del(&jext->list);
2240                kfree(jext);
2241        }
2242}
2243
2244/**
2245 * gfs2_add_jextent - Add or merge a new extent to extent cache
2246 * @jd: The journal descriptor
2247 * @lblock: The logical block at start of new extent
2248 * @dblock: The physical block at start of new extent
2249 * @blocks: Size of extent in fs blocks
2250 *
2251 * Returns: 0 on success or -ENOMEM
2252 */
2253
2254static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
2255{
2256        struct gfs2_journal_extent *jext;
2257
2258        if (!list_empty(&jd->extent_list)) {
2259                jext = list_last_entry(&jd->extent_list, struct gfs2_journal_extent, list);
2260                if ((jext->dblock + jext->blocks) == dblock) {
2261                        jext->blocks += blocks;
2262                        return 0;
2263                }
2264        }
2265
2266        jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS);
2267        if (jext == NULL)
2268                return -ENOMEM;
2269        jext->dblock = dblock;
2270        jext->lblock = lblock;
2271        jext->blocks = blocks;
2272        list_add_tail(&jext->list, &jd->extent_list);
2273        jd->nr_extents++;
2274        return 0;
2275}
2276
2277/**
2278 * gfs2_map_journal_extents - Cache journal bmap info
2279 * @sdp: The super block
2280 * @jd: The journal to map
2281 *
2282 * Create a reusable "extent" mapping from all logical
2283 * blocks to all physical blocks for the given journal.  This will save
2284 * us time when writing journal blocks.  Most journals will have only one
2285 * extent that maps all their logical blocks.  That's because gfs2.mkfs
2286 * arranges the journal blocks sequentially to maximize performance.
2287 * So the extent would map the first block for the entire file length.
2288 * However, gfs2_jadd can happen while file activity is happening, so
2289 * those journals may not be sequential.  Less likely is the case where
2290 * the users created their own journals by mounting the metafs and
2291 * laying it out.  But it's still possible.  These journals might have
2292 * several extents.
2293 *
2294 * Returns: 0 on success, or error on failure
2295 */
2296
2297int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
2298{
2299        u64 lblock = 0;
2300        u64 lblock_stop;
2301        struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
2302        struct buffer_head bh;
2303        unsigned int shift = sdp->sd_sb.sb_bsize_shift;
2304        u64 size;
2305        int rc;
2306        ktime_t start, end;
2307
2308        start = ktime_get();
2309        lblock_stop = i_size_read(jd->jd_inode) >> shift;
2310        size = (lblock_stop - lblock) << shift;
2311        jd->nr_extents = 0;
2312        WARN_ON(!list_empty(&jd->extent_list));
2313
2314        do {
2315                bh.b_state = 0;
2316                bh.b_blocknr = 0;
2317                bh.b_size = size;
2318                rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0);
2319                if (rc || !buffer_mapped(&bh))
2320                        goto fail;
2321                rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift);
2322                if (rc)
2323                        goto fail;
2324                size -= bh.b_size;
2325                lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2326        } while(size > 0);
2327
2328        end = ktime_get();
2329        fs_info(sdp, "journal %d mapped with %u extents in %lldms\n", jd->jd_jid,
2330                jd->nr_extents, ktime_ms_delta(end, start));
2331        return 0;
2332
2333fail:
2334        fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
2335                rc, jd->jd_jid,
2336                (unsigned long long)(i_size_read(jd->jd_inode) - size),
2337                jd->nr_extents);
2338        fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
2339                rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
2340                bh.b_state, (unsigned long long)bh.b_size);
2341        gfs2_free_journal_extents(jd);
2342        return rc;
2343}
2344
2345/**
2346 * gfs2_write_alloc_required - figure out if a write will require an allocation
2347 * @ip: the file being written to
2348 * @offset: the offset to write to
2349 * @len: the number of bytes being written
2350 *
2351 * Returns: 1 if an alloc is required, 0 otherwise
2352 */
2353
2354int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
2355                              unsigned int len)
2356{
2357        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2358        struct buffer_head bh;
2359        unsigned int shift;
2360        u64 lblock, lblock_stop, size;
2361        u64 end_of_file;
2362
2363        if (!len)
2364                return 0;
2365
2366        if (gfs2_is_stuffed(ip)) {
2367                if (offset + len > gfs2_max_stuffed_size(ip))
2368                        return 1;
2369                return 0;
2370        }
2371
2372        shift = sdp->sd_sb.sb_bsize_shift;
2373        BUG_ON(gfs2_is_dir(ip));
2374        end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
2375        lblock = offset >> shift;
2376        lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
2377        if (lblock_stop > end_of_file && ip != GFS2_I(sdp->sd_rindex))
2378                return 1;
2379
2380        size = (lblock_stop - lblock) << shift;
2381        do {
2382                bh.b_state = 0;
2383                bh.b_size = size;
2384                gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
2385                if (!buffer_mapped(&bh))
2386                        return 1;
2387                size -= bh.b_size;
2388                lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2389        } while(size > 0);
2390
2391        return 0;
2392}
2393
2394static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
2395{
2396        struct gfs2_inode *ip = GFS2_I(inode);
2397        struct buffer_head *dibh;
2398        int error;
2399
2400        if (offset >= inode->i_size)
2401                return 0;
2402        if (offset + length > inode->i_size)
2403                length = inode->i_size - offset;
2404
2405        error = gfs2_meta_inode_buffer(ip, &dibh);
2406        if (error)
2407                return error;
2408        gfs2_trans_add_meta(ip->i_gl, dibh);
2409        memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0,
2410               length);
2411        brelse(dibh);
2412        return 0;
2413}
2414
2415static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
2416                                         loff_t length)
2417{
2418        struct gfs2_sbd *sdp = GFS2_SB(inode);
2419        loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
2420        int error;
2421
2422        while (length) {
2423                struct gfs2_trans *tr;
2424                loff_t chunk;
2425                unsigned int offs;
2426
2427                chunk = length;
2428                if (chunk > max_chunk)
2429                        chunk = max_chunk;
2430
2431                offs = offset & ~PAGE_MASK;
2432                if (offs && chunk > PAGE_SIZE)
2433                        chunk = offs + ((chunk - offs) & PAGE_MASK);
2434
2435                truncate_pagecache_range(inode, offset, chunk);
2436                offset += chunk;
2437                length -= chunk;
2438
2439                tr = current->journal_info;
2440                if (!test_bit(TR_TOUCHED, &tr->tr_flags))
2441                        continue;
2442
2443                gfs2_trans_end(sdp);
2444                error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
2445                if (error)
2446                        return error;
2447        }
2448        return 0;
2449}
2450
2451int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
2452{
2453        struct inode *inode = file_inode(file);
2454        struct gfs2_inode *ip = GFS2_I(inode);
2455        struct gfs2_sbd *sdp = GFS2_SB(inode);
2456        unsigned int blocksize = i_blocksize(inode);
2457        loff_t start, end;
2458        int error;
2459
2460        if (!gfs2_is_stuffed(ip)) {
2461                unsigned int start_off, end_len;
2462
2463                start_off = offset & (blocksize - 1);
2464                end_len = (offset + length) & (blocksize - 1);
2465                if (start_off) {
2466                        unsigned int len = length;
2467                        if (length > blocksize - start_off)
2468                                len = blocksize - start_off;
2469                        error = gfs2_block_zero_range(inode, offset, len);
2470                        if (error)
2471                                goto out;
2472                        if (start_off + length < blocksize)
2473                                end_len = 0;
2474                }
2475                if (end_len) {
2476                        error = gfs2_block_zero_range(inode,
2477                                offset + length - end_len, end_len);
2478                        if (error)
2479                                goto out;
2480                }
2481        }
2482
2483        start = round_down(offset, blocksize);
2484        end = round_up(offset + length, blocksize) - 1;
2485        error = filemap_write_and_wait_range(inode->i_mapping, start, end);
2486        if (error)
2487                return error;
2488
2489        if (gfs2_is_jdata(ip))
2490                error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA,
2491                                         GFS2_JTRUNC_REVOKES);
2492        else
2493                error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2494        if (error)
2495                return error;
2496
2497        if (gfs2_is_stuffed(ip)) {
2498                error = stuffed_zero_range(inode, offset, length);
2499                if (error)
2500                        goto out;
2501        }
2502
2503        if (gfs2_is_jdata(ip)) {
2504                BUG_ON(!current->journal_info);
2505                gfs2_journaled_truncate_range(inode, offset, length);
2506        } else
2507                truncate_pagecache_range(inode, offset, offset + length - 1);
2508
2509        file_update_time(file);
2510        mark_inode_dirty(inode);
2511
2512        if (current->journal_info)
2513                gfs2_trans_end(sdp);
2514
2515        if (!gfs2_is_stuffed(ip))
2516                error = punch_hole(ip, offset, length);
2517
2518out:
2519        if (current->journal_info)
2520                gfs2_trans_end(sdp);
2521        return error;
2522}
2523
2524static int gfs2_map_blocks(struct iomap_writepage_ctx *wpc, struct inode *inode,
2525                loff_t offset)
2526{
2527        int ret;
2528
2529        if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(inode))))
2530                return -EIO;
2531
2532        if (offset >= wpc->iomap.offset &&
2533            offset < wpc->iomap.offset + wpc->iomap.length)
2534                return 0;
2535
2536        memset(&wpc->iomap, 0, sizeof(wpc->iomap));
2537        ret = gfs2_iomap_get(inode, offset, INT_MAX, &wpc->iomap);
2538        return ret;
2539}
2540
2541const struct iomap_writeback_ops gfs2_writeback_ops = {
2542        .map_blocks             = gfs2_map_blocks,
2543};
2544