linux/fs/xfs/libxfs/xfs_bmap.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_shared.h"
  21#include "xfs_format.h"
  22#include "xfs_log_format.h"
  23#include "xfs_trans_resv.h"
  24#include "xfs_bit.h"
  25#include "xfs_sb.h"
  26#include "xfs_mount.h"
  27#include "xfs_da_format.h"
  28#include "xfs_da_btree.h"
  29#include "xfs_dir2.h"
  30#include "xfs_inode.h"
  31#include "xfs_btree.h"
  32#include "xfs_trans.h"
  33#include "xfs_inode_item.h"
  34#include "xfs_extfree_item.h"
  35#include "xfs_alloc.h"
  36#include "xfs_bmap.h"
  37#include "xfs_bmap_util.h"
  38#include "xfs_bmap_btree.h"
  39#include "xfs_rtalloc.h"
  40#include "xfs_error.h"
  41#include "xfs_quota.h"
  42#include "xfs_trans_space.h"
  43#include "xfs_buf_item.h"
  44#include "xfs_trace.h"
  45#include "xfs_symlink.h"
  46#include "xfs_attr_leaf.h"
  47#include "xfs_filestream.h"
  48
  49
  50kmem_zone_t             *xfs_bmap_free_item_zone;
  51
  52/*
  53 * Miscellaneous helper functions
  54 */
  55
  56/*
  57 * Compute and fill in the value of the maximum depth of a bmap btree
  58 * in this filesystem.  Done once, during mount.
  59 */
  60void
  61xfs_bmap_compute_maxlevels(
  62        xfs_mount_t     *mp,            /* file system mount structure */
  63        int             whichfork)      /* data or attr fork */
  64{
  65        int             level;          /* btree level */
  66        uint            maxblocks;      /* max blocks at this level */
  67        uint            maxleafents;    /* max leaf entries possible */
  68        int             maxrootrecs;    /* max records in root block */
  69        int             minleafrecs;    /* min records in leaf block */
  70        int             minnoderecs;    /* min records in node block */
  71        int             sz;             /* root block size */
  72
  73        /*
  74         * The maximum number of extents in a file, hence the maximum
  75         * number of leaf entries, is controlled by the type of di_nextents
  76         * (a signed 32-bit number, xfs_extnum_t), or by di_anextents
  77         * (a signed 16-bit number, xfs_aextnum_t).
  78         *
  79         * Note that we can no longer assume that if we are in ATTR1 that
  80         * the fork offset of all the inodes will be
  81         * (xfs_default_attroffset(ip) >> 3) because we could have mounted
  82         * with ATTR2 and then mounted back with ATTR1, keeping the
  83         * di_forkoff's fixed but probably at various positions. Therefore,
  84         * for both ATTR1 and ATTR2 we have to assume the worst case scenario
  85         * of a minimum size available.
  86         */
  87        if (whichfork == XFS_DATA_FORK) {
  88                maxleafents = MAXEXTNUM;
  89                sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
  90        } else {
  91                maxleafents = MAXAEXTNUM;
  92                sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
  93        }
  94        maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
  95        minleafrecs = mp->m_bmap_dmnr[0];
  96        minnoderecs = mp->m_bmap_dmnr[1];
  97        maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
  98        for (level = 1; maxblocks > 1; level++) {
  99                if (maxblocks <= maxrootrecs)
 100                        maxblocks = 1;
 101                else
 102                        maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
 103        }
 104        mp->m_bm_maxlevels[whichfork] = level;
 105}
 106
 107STATIC int                              /* error */
 108xfs_bmbt_lookup_eq(
 109        struct xfs_btree_cur    *cur,
 110        xfs_fileoff_t           off,
 111        xfs_fsblock_t           bno,
 112        xfs_filblks_t           len,
 113        int                     *stat)  /* success/failure */
 114{
 115        cur->bc_rec.b.br_startoff = off;
 116        cur->bc_rec.b.br_startblock = bno;
 117        cur->bc_rec.b.br_blockcount = len;
 118        return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
 119}
 120
 121STATIC int                              /* error */
 122xfs_bmbt_lookup_ge(
 123        struct xfs_btree_cur    *cur,
 124        xfs_fileoff_t           off,
 125        xfs_fsblock_t           bno,
 126        xfs_filblks_t           len,
 127        int                     *stat)  /* success/failure */
 128{
 129        cur->bc_rec.b.br_startoff = off;
 130        cur->bc_rec.b.br_startblock = bno;
 131        cur->bc_rec.b.br_blockcount = len;
 132        return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
 133}
 134
 135/*
 136 * Check if the inode needs to be converted to btree format.
 137 */
 138static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
 139{
 140        return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
 141                XFS_IFORK_NEXTENTS(ip, whichfork) >
 142                        XFS_IFORK_MAXEXT(ip, whichfork);
 143}
 144
 145/*
 146 * Check if the inode should be converted to extent format.
 147 */
 148static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
 149{
 150        return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
 151                XFS_IFORK_NEXTENTS(ip, whichfork) <=
 152                        XFS_IFORK_MAXEXT(ip, whichfork);
 153}
 154
 155/*
 156 * Update the record referred to by cur to the value given
 157 * by [off, bno, len, state].
 158 * This either works (return 0) or gets an EFSCORRUPTED error.
 159 */
 160STATIC int
 161xfs_bmbt_update(
 162        struct xfs_btree_cur    *cur,
 163        xfs_fileoff_t           off,
 164        xfs_fsblock_t           bno,
 165        xfs_filblks_t           len,
 166        xfs_exntst_t            state)
 167{
 168        union xfs_btree_rec     rec;
 169
 170        xfs_bmbt_disk_set_allf(&rec.bmbt, off, bno, len, state);
 171        return xfs_btree_update(cur, &rec);
 172}
 173
 174/*
 175 * Compute the worst-case number of indirect blocks that will be used
 176 * for ip's delayed extent of length "len".
 177 */
 178STATIC xfs_filblks_t
 179xfs_bmap_worst_indlen(
 180        xfs_inode_t     *ip,            /* incore inode pointer */
 181        xfs_filblks_t   len)            /* delayed extent length */
 182{
 183        int             level;          /* btree level number */
 184        int             maxrecs;        /* maximum record count at this level */
 185        xfs_mount_t     *mp;            /* mount structure */
 186        xfs_filblks_t   rval;           /* return value */
 187
 188        mp = ip->i_mount;
 189        maxrecs = mp->m_bmap_dmxr[0];
 190        for (level = 0, rval = 0;
 191             level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
 192             level++) {
 193                len += maxrecs - 1;
 194                do_div(len, maxrecs);
 195                rval += len;
 196                if (len == 1)
 197                        return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
 198                                level - 1;
 199                if (level == 0)
 200                        maxrecs = mp->m_bmap_dmxr[1];
 201        }
 202        return rval;
 203}
 204
 205/*
 206 * Calculate the default attribute fork offset for newly created inodes.
 207 */
 208uint
 209xfs_default_attroffset(
 210        struct xfs_inode        *ip)
 211{
 212        struct xfs_mount        *mp = ip->i_mount;
 213        uint                    offset;
 214
 215        if (mp->m_sb.sb_inodesize == 256) {
 216                offset = XFS_LITINO(mp, ip->i_d.di_version) -
 217                                XFS_BMDR_SPACE_CALC(MINABTPTRS);
 218        } else {
 219                offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
 220        }
 221
 222        ASSERT(offset < XFS_LITINO(mp, ip->i_d.di_version));
 223        return offset;
 224}
 225
 226/*
 227 * Helper routine to reset inode di_forkoff field when switching
 228 * attribute fork from local to extent format - we reset it where
 229 * possible to make space available for inline data fork extents.
 230 */
 231STATIC void
 232xfs_bmap_forkoff_reset(
 233        xfs_inode_t     *ip,
 234        int             whichfork)
 235{
 236        if (whichfork == XFS_ATTR_FORK &&
 237            ip->i_d.di_format != XFS_DINODE_FMT_DEV &&
 238            ip->i_d.di_format != XFS_DINODE_FMT_UUID &&
 239            ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
 240                uint    dfl_forkoff = xfs_default_attroffset(ip) >> 3;
 241
 242                if (dfl_forkoff > ip->i_d.di_forkoff)
 243                        ip->i_d.di_forkoff = dfl_forkoff;
 244        }
 245}
 246
 247#ifdef DEBUG
 248STATIC struct xfs_buf *
 249xfs_bmap_get_bp(
 250        struct xfs_btree_cur    *cur,
 251        xfs_fsblock_t           bno)
 252{
 253        struct xfs_log_item_desc *lidp;
 254        int                     i;
 255
 256        if (!cur)
 257                return NULL;
 258
 259        for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
 260                if (!cur->bc_bufs[i])
 261                        break;
 262                if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
 263                        return cur->bc_bufs[i];
 264        }
 265
 266        /* Chase down all the log items to see if the bp is there */
 267        list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) {
 268                struct xfs_buf_log_item *bip;
 269                bip = (struct xfs_buf_log_item *)lidp->lid_item;
 270                if (bip->bli_item.li_type == XFS_LI_BUF &&
 271                    XFS_BUF_ADDR(bip->bli_buf) == bno)
 272                        return bip->bli_buf;
 273        }
 274
 275        return NULL;
 276}
 277
 278STATIC void
 279xfs_check_block(
 280        struct xfs_btree_block  *block,
 281        xfs_mount_t             *mp,
 282        int                     root,
 283        short                   sz)
 284{
 285        int                     i, j, dmxr;
 286        __be64                  *pp, *thispa;   /* pointer to block address */
 287        xfs_bmbt_key_t          *prevp, *keyp;
 288
 289        ASSERT(be16_to_cpu(block->bb_level) > 0);
 290
 291        prevp = NULL;
 292        for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
 293                dmxr = mp->m_bmap_dmxr[0];
 294                keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
 295
 296                if (prevp) {
 297                        ASSERT(be64_to_cpu(prevp->br_startoff) <
 298                               be64_to_cpu(keyp->br_startoff));
 299                }
 300                prevp = keyp;
 301
 302                /*
 303                 * Compare the block numbers to see if there are dups.
 304                 */
 305                if (root)
 306                        pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
 307                else
 308                        pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
 309
 310                for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
 311                        if (root)
 312                                thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
 313                        else
 314                                thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
 315                        if (*thispa == *pp) {
 316                                xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
 317                                        __func__, j, i,
 318                                        (unsigned long long)be64_to_cpu(*thispa));
 319                                panic("%s: ptrs are equal in node\n",
 320                                        __func__);
 321                        }
 322                }
 323        }
 324}
 325
 326/*
 327 * Check that the extents for the inode ip are in the right order in all
 328 * btree leaves. THis becomes prohibitively expensive for large extent count
 329 * files, so don't bother with inodes that have more than 10,000 extents in
 330 * them. The btree record ordering checks will still be done, so for such large
 331 * bmapbt constructs that is going to catch most corruptions.
 332 */
 333STATIC void
 334xfs_bmap_check_leaf_extents(
 335        xfs_btree_cur_t         *cur,   /* btree cursor or null */
 336        xfs_inode_t             *ip,            /* incore inode pointer */
 337        int                     whichfork)      /* data or attr fork */
 338{
 339        struct xfs_btree_block  *block; /* current btree block */
 340        xfs_fsblock_t           bno;    /* block # of "block" */
 341        xfs_buf_t               *bp;    /* buffer for "block" */
 342        int                     error;  /* error return value */
 343        xfs_extnum_t            i=0, j; /* index into the extents list */
 344        xfs_ifork_t             *ifp;   /* fork structure */
 345        int                     level;  /* btree level, for checking */
 346        xfs_mount_t             *mp;    /* file system mount structure */
 347        __be64                  *pp;    /* pointer to block address */
 348        xfs_bmbt_rec_t          *ep;    /* pointer to current extent */
 349        xfs_bmbt_rec_t          last = {0, 0}; /* last extent in prev block */
 350        xfs_bmbt_rec_t          *nextp; /* pointer to next extent */
 351        int                     bp_release = 0;
 352
 353        if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) {
 354                return;
 355        }
 356
 357        /* skip large extent count inodes */
 358        if (ip->i_d.di_nextents > 10000)
 359                return;
 360
 361        bno = NULLFSBLOCK;
 362        mp = ip->i_mount;
 363        ifp = XFS_IFORK_PTR(ip, whichfork);
 364        block = ifp->if_broot;
 365        /*
 366         * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
 367         */
 368        level = be16_to_cpu(block->bb_level);
 369        ASSERT(level > 0);
 370        xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
 371        pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
 372        bno = be64_to_cpu(*pp);
 373
 374        ASSERT(bno != NULLFSBLOCK);
 375        ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
 376        ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
 377
 378        /*
 379         * Go down the tree until leaf level is reached, following the first
 380         * pointer (leftmost) at each level.
 381         */
 382        while (level-- > 0) {
 383                /* See if buf is in cur first */
 384                bp_release = 0;
 385                bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
 386                if (!bp) {
 387                        bp_release = 1;
 388                        error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
 389                                                XFS_BMAP_BTREE_REF,
 390                                                &xfs_bmbt_buf_ops);
 391                        if (error)
 392                                goto error_norelse;
 393                }
 394                block = XFS_BUF_TO_BLOCK(bp);
 395                if (level == 0)
 396                        break;
 397
 398                /*
 399                 * Check this block for basic sanity (increasing keys and
 400                 * no duplicate blocks).
 401                 */
 402
 403                xfs_check_block(block, mp, 0, 0);
 404                pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
 405                bno = be64_to_cpu(*pp);
 406                XFS_WANT_CORRUPTED_GOTO(mp,
 407                                        XFS_FSB_SANITY_CHECK(mp, bno), error0);
 408                if (bp_release) {
 409                        bp_release = 0;
 410                        xfs_trans_brelse(NULL, bp);
 411                }
 412        }
 413
 414        /*
 415         * Here with bp and block set to the leftmost leaf node in the tree.
 416         */
 417        i = 0;
 418
 419        /*
 420         * Loop over all leaf nodes checking that all extents are in the right order.
 421         */
 422        for (;;) {
 423                xfs_fsblock_t   nextbno;
 424                xfs_extnum_t    num_recs;
 425
 426
 427                num_recs = xfs_btree_get_numrecs(block);
 428
 429                /*
 430                 * Read-ahead the next leaf block, if any.
 431                 */
 432
 433                nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
 434
 435                /*
 436                 * Check all the extents to make sure they are OK.
 437                 * If we had a previous block, the last entry should
 438                 * conform with the first entry in this one.
 439                 */
 440
 441                ep = XFS_BMBT_REC_ADDR(mp, block, 1);
 442                if (i) {
 443                        ASSERT(xfs_bmbt_disk_get_startoff(&last) +
 444                               xfs_bmbt_disk_get_blockcount(&last) <=
 445                               xfs_bmbt_disk_get_startoff(ep));
 446                }
 447                for (j = 1; j < num_recs; j++) {
 448                        nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
 449                        ASSERT(xfs_bmbt_disk_get_startoff(ep) +
 450                               xfs_bmbt_disk_get_blockcount(ep) <=
 451                               xfs_bmbt_disk_get_startoff(nextp));
 452                        ep = nextp;
 453                }
 454
 455                last = *ep;
 456                i += num_recs;
 457                if (bp_release) {
 458                        bp_release = 0;
 459                        xfs_trans_brelse(NULL, bp);
 460                }
 461                bno = nextbno;
 462                /*
 463                 * If we've reached the end, stop.
 464                 */
 465                if (bno == NULLFSBLOCK)
 466                        break;
 467
 468                bp_release = 0;
 469                bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
 470                if (!bp) {
 471                        bp_release = 1;
 472                        error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
 473                                                XFS_BMAP_BTREE_REF,
 474                                                &xfs_bmbt_buf_ops);
 475                        if (error)
 476                                goto error_norelse;
 477                }
 478                block = XFS_BUF_TO_BLOCK(bp);
 479        }
 480        if (bp_release) {
 481                bp_release = 0;
 482                xfs_trans_brelse(NULL, bp);
 483        }
 484        return;
 485
 486error0:
 487        xfs_warn(mp, "%s: at error0", __func__);
 488        if (bp_release)
 489                xfs_trans_brelse(NULL, bp);
 490error_norelse:
 491        xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
 492                __func__, i);
 493        panic("%s: CORRUPTED BTREE OR SOMETHING", __func__);
 494        return;
 495}
 496
 497/*
 498 * Add bmap trace insert entries for all the contents of the extent records.
 499 */
 500void
 501xfs_bmap_trace_exlist(
 502        xfs_inode_t     *ip,            /* incore inode pointer */
 503        xfs_extnum_t    cnt,            /* count of entries in the list */
 504        int             whichfork,      /* data or attr fork */
 505        unsigned long   caller_ip)
 506{
 507        xfs_extnum_t    idx;            /* extent record index */
 508        xfs_ifork_t     *ifp;           /* inode fork pointer */
 509        int             state = 0;
 510
 511        if (whichfork == XFS_ATTR_FORK)
 512                state |= BMAP_ATTRFORK;
 513
 514        ifp = XFS_IFORK_PTR(ip, whichfork);
 515        ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
 516        for (idx = 0; idx < cnt; idx++)
 517                trace_xfs_extlist(ip, idx, whichfork, caller_ip);
 518}
 519
 520/*
 521 * Validate that the bmbt_irecs being returned from bmapi are valid
 522 * given the caller's original parameters.  Specifically check the
 523 * ranges of the returned irecs to ensure that they only extend beyond
 524 * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
 525 */
 526STATIC void
 527xfs_bmap_validate_ret(
 528        xfs_fileoff_t           bno,
 529        xfs_filblks_t           len,
 530        int                     flags,
 531        xfs_bmbt_irec_t         *mval,
 532        int                     nmap,
 533        int                     ret_nmap)
 534{
 535        int                     i;              /* index to map values */
 536
 537        ASSERT(ret_nmap <= nmap);
 538
 539        for (i = 0; i < ret_nmap; i++) {
 540                ASSERT(mval[i].br_blockcount > 0);
 541                if (!(flags & XFS_BMAPI_ENTIRE)) {
 542                        ASSERT(mval[i].br_startoff >= bno);
 543                        ASSERT(mval[i].br_blockcount <= len);
 544                        ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
 545                               bno + len);
 546                } else {
 547                        ASSERT(mval[i].br_startoff < bno + len);
 548                        ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
 549                               bno);
 550                }
 551                ASSERT(i == 0 ||
 552                       mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
 553                       mval[i].br_startoff);
 554                ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
 555                       mval[i].br_startblock != HOLESTARTBLOCK);
 556                ASSERT(mval[i].br_state == XFS_EXT_NORM ||
 557                       mval[i].br_state == XFS_EXT_UNWRITTEN);
 558        }
 559}
 560
 561#else
 562#define xfs_bmap_check_leaf_extents(cur, ip, whichfork)         do { } while (0)
 563#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)
 564#endif /* DEBUG */
 565
 566/*
 567 * bmap free list manipulation functions
 568 */
 569
 570/*
 571 * Add the extent to the list of extents to be free at transaction end.
 572 * The list is maintained sorted (by block number).
 573 */
 574void
 575xfs_bmap_add_free(
 576        xfs_fsblock_t           bno,            /* fs block number of extent */
 577        xfs_filblks_t           len,            /* length of extent */
 578        xfs_bmap_free_t         *flist,         /* list of extents */
 579        xfs_mount_t             *mp)            /* mount point structure */
 580{
 581        xfs_bmap_free_item_t    *cur;           /* current (next) element */
 582        xfs_bmap_free_item_t    *new;           /* new element */
 583        xfs_bmap_free_item_t    *prev;          /* previous element */
 584#ifdef DEBUG
 585        xfs_agnumber_t          agno;
 586        xfs_agblock_t           agbno;
 587
 588        ASSERT(bno != NULLFSBLOCK);
 589        ASSERT(len > 0);
 590        ASSERT(len <= MAXEXTLEN);
 591        ASSERT(!isnullstartblock(bno));
 592        agno = XFS_FSB_TO_AGNO(mp, bno);
 593        agbno = XFS_FSB_TO_AGBNO(mp, bno);
 594        ASSERT(agno < mp->m_sb.sb_agcount);
 595        ASSERT(agbno < mp->m_sb.sb_agblocks);
 596        ASSERT(len < mp->m_sb.sb_agblocks);
 597        ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
 598#endif
 599        ASSERT(xfs_bmap_free_item_zone != NULL);
 600        new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
 601        new->xbfi_startblock = bno;
 602        new->xbfi_blockcount = (xfs_extlen_t)len;
 603        for (prev = NULL, cur = flist->xbf_first;
 604             cur != NULL;
 605             prev = cur, cur = cur->xbfi_next) {
 606                if (cur->xbfi_startblock >= bno)
 607                        break;
 608        }
 609        if (prev)
 610                prev->xbfi_next = new;
 611        else
 612                flist->xbf_first = new;
 613        new->xbfi_next = cur;
 614        flist->xbf_count++;
 615}
 616
 617/*
 618 * Remove the entry "free" from the free item list.  Prev points to the
 619 * previous entry, unless "free" is the head of the list.
 620 */
 621void
 622xfs_bmap_del_free(
 623        xfs_bmap_free_t         *flist, /* free item list header */
 624        xfs_bmap_free_item_t    *prev,  /* previous item on list, if any */
 625        xfs_bmap_free_item_t    *free)  /* list item to be freed */
 626{
 627        if (prev)
 628                prev->xbfi_next = free->xbfi_next;
 629        else
 630                flist->xbf_first = free->xbfi_next;
 631        flist->xbf_count--;
 632        kmem_zone_free(xfs_bmap_free_item_zone, free);
 633}
 634
 635/*
 636 * Free up any items left in the list.
 637 */
 638void
 639xfs_bmap_cancel(
 640        xfs_bmap_free_t         *flist) /* list of bmap_free_items */
 641{
 642        xfs_bmap_free_item_t    *free;  /* free list item */
 643        xfs_bmap_free_item_t    *next;
 644
 645        if (flist->xbf_count == 0)
 646                return;
 647        ASSERT(flist->xbf_first != NULL);
 648        for (free = flist->xbf_first; free; free = next) {
 649                next = free->xbfi_next;
 650                xfs_bmap_del_free(flist, NULL, free);
 651        }
 652        ASSERT(flist->xbf_count == 0);
 653}
 654
 655/*
 656 * Inode fork format manipulation functions
 657 */
 658
 659/*
 660 * Transform a btree format file with only one leaf node, where the
 661 * extents list will fit in the inode, into an extents format file.
 662 * Since the file extents are already in-core, all we have to do is
 663 * give up the space for the btree root and pitch the leaf block.
 664 */
 665STATIC int                              /* error */
 666xfs_bmap_btree_to_extents(
 667        xfs_trans_t             *tp,    /* transaction pointer */
 668        xfs_inode_t             *ip,    /* incore inode pointer */
 669        xfs_btree_cur_t         *cur,   /* btree cursor */
 670        int                     *logflagsp, /* inode logging flags */
 671        int                     whichfork)  /* data or attr fork */
 672{
 673        /* REFERENCED */
 674        struct xfs_btree_block  *cblock;/* child btree block */
 675        xfs_fsblock_t           cbno;   /* child block number */
 676        xfs_buf_t               *cbp;   /* child block's buffer */
 677        int                     error;  /* error return value */
 678        xfs_ifork_t             *ifp;   /* inode fork data */
 679        xfs_mount_t             *mp;    /* mount point structure */
 680        __be64                  *pp;    /* ptr to block address */
 681        struct xfs_btree_block  *rblock;/* root btree block */
 682
 683        mp = ip->i_mount;
 684        ifp = XFS_IFORK_PTR(ip, whichfork);
 685        ASSERT(ifp->if_flags & XFS_IFEXTENTS);
 686        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
 687        rblock = ifp->if_broot;
 688        ASSERT(be16_to_cpu(rblock->bb_level) == 1);
 689        ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
 690        ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
 691        pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
 692        cbno = be64_to_cpu(*pp);
 693        *logflagsp = 0;
 694#ifdef DEBUG
 695        if ((error = xfs_btree_check_lptr(cur, cbno, 1)))
 696                return error;
 697#endif
 698        error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
 699                                &xfs_bmbt_buf_ops);
 700        if (error)
 701                return error;
 702        cblock = XFS_BUF_TO_BLOCK(cbp);
 703        if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
 704                return error;
 705        xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
 706        ip->i_d.di_nblocks--;
 707        xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 708        xfs_trans_binval(tp, cbp);
 709        if (cur->bc_bufs[0] == cbp)
 710                cur->bc_bufs[0] = NULL;
 711        xfs_iroot_realloc(ip, -1, whichfork);
 712        ASSERT(ifp->if_broot == NULL);
 713        ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
 714        XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
 715        *logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
 716        return 0;
 717}
 718
 719/*
 720 * Convert an extents-format file into a btree-format file.
 721 * The new file will have a root block (in the inode) and a single child block.
 722 */
 723STATIC int                                      /* error */
 724xfs_bmap_extents_to_btree(
 725        xfs_trans_t             *tp,            /* transaction pointer */
 726        xfs_inode_t             *ip,            /* incore inode pointer */
 727        xfs_fsblock_t           *firstblock,    /* first-block-allocated */
 728        xfs_bmap_free_t         *flist,         /* blocks freed in xaction */
 729        xfs_btree_cur_t         **curp,         /* cursor returned to caller */
 730        int                     wasdel,         /* converting a delayed alloc */
 731        int                     *logflagsp,     /* inode logging flags */
 732        int                     whichfork)      /* data or attr fork */
 733{
 734        struct xfs_btree_block  *ablock;        /* allocated (child) bt block */
 735        xfs_buf_t               *abp;           /* buffer for ablock */
 736        xfs_alloc_arg_t         args;           /* allocation arguments */
 737        xfs_bmbt_rec_t          *arp;           /* child record pointer */
 738        struct xfs_btree_block  *block;         /* btree root block */
 739        xfs_btree_cur_t         *cur;           /* bmap btree cursor */
 740        xfs_bmbt_rec_host_t     *ep;            /* extent record pointer */
 741        int                     error;          /* error return value */
 742        xfs_extnum_t            i, cnt;         /* extent record index */
 743        xfs_ifork_t             *ifp;           /* inode fork pointer */
 744        xfs_bmbt_key_t          *kp;            /* root block key pointer */
 745        xfs_mount_t             *mp;            /* mount structure */
 746        xfs_extnum_t            nextents;       /* number of file extents */
 747        xfs_bmbt_ptr_t          *pp;            /* root block address pointer */
 748
 749        mp = ip->i_mount;
 750        ifp = XFS_IFORK_PTR(ip, whichfork);
 751        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
 752
 753        /*
 754         * Make space in the inode incore.
 755         */
 756        xfs_iroot_realloc(ip, 1, whichfork);
 757        ifp->if_flags |= XFS_IFBROOT;
 758
 759        /*
 760         * Fill in the root.
 761         */
 762        block = ifp->if_broot;
 763        if (xfs_sb_version_hascrc(&mp->m_sb))
 764                xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
 765                                 XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino,
 766                                 XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
 767        else
 768                xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
 769                                 XFS_BMAP_MAGIC, 1, 1, ip->i_ino,
 770                                 XFS_BTREE_LONG_PTRS);
 771
 772        /*
 773         * Need a cursor.  Can't allocate until bb_level is filled in.
 774         */
 775        cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 776        cur->bc_private.b.firstblock = *firstblock;
 777        cur->bc_private.b.flist = flist;
 778        cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
 779        /*
 780         * Convert to a btree with two levels, one record in root.
 781         */
 782        XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
 783        memset(&args, 0, sizeof(args));
 784        args.tp = tp;
 785        args.mp = mp;
 786        args.firstblock = *firstblock;
 787        if (*firstblock == NULLFSBLOCK) {
 788                args.type = XFS_ALLOCTYPE_START_BNO;
 789                args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
 790        } else if (flist->xbf_low) {
 791                args.type = XFS_ALLOCTYPE_START_BNO;
 792                args.fsbno = *firstblock;
 793        } else {
 794                args.type = XFS_ALLOCTYPE_NEAR_BNO;
 795                args.fsbno = *firstblock;
 796        }
 797        args.minlen = args.maxlen = args.prod = 1;
 798        args.wasdel = wasdel;
 799        *logflagsp = 0;
 800        if ((error = xfs_alloc_vextent(&args))) {
 801                xfs_iroot_realloc(ip, -1, whichfork);
 802                xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 803                return error;
 804        }
 805        /*
 806         * Allocation can't fail, the space was reserved.
 807         */
 808        ASSERT(args.fsbno != NULLFSBLOCK);
 809        ASSERT(*firstblock == NULLFSBLOCK ||
 810               args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
 811               (flist->xbf_low &&
 812                args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
 813        *firstblock = cur->bc_private.b.firstblock = args.fsbno;
 814        cur->bc_private.b.allocated++;
 815        ip->i_d.di_nblocks++;
 816        xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
 817        abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
 818        /*
 819         * Fill in the child block.
 820         */
 821        abp->b_ops = &xfs_bmbt_buf_ops;
 822        ablock = XFS_BUF_TO_BLOCK(abp);
 823        if (xfs_sb_version_hascrc(&mp->m_sb))
 824                xfs_btree_init_block_int(mp, ablock, abp->b_bn,
 825                                XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
 826                                XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
 827        else
 828                xfs_btree_init_block_int(mp, ablock, abp->b_bn,
 829                                XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
 830                                XFS_BTREE_LONG_PTRS);
 831
 832        arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
 833        nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
 834        for (cnt = i = 0; i < nextents; i++) {
 835                ep = xfs_iext_get_ext(ifp, i);
 836                if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) {
 837                        arp->l0 = cpu_to_be64(ep->l0);
 838                        arp->l1 = cpu_to_be64(ep->l1);
 839                        arp++; cnt++;
 840                }
 841        }
 842        ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
 843        xfs_btree_set_numrecs(ablock, cnt);
 844
 845        /*
 846         * Fill in the root key and pointer.
 847         */
 848        kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
 849        arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
 850        kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
 851        pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
 852                                                be16_to_cpu(block->bb_level)));
 853        *pp = cpu_to_be64(args.fsbno);
 854
 855        /*
 856         * Do all this logging at the end so that
 857         * the root is at the right level.
 858         */
 859        xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
 860        xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
 861        ASSERT(*curp == NULL);
 862        *curp = cur;
 863        *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
 864        return 0;
 865}
 866
 867/*
 868 * Convert a local file to an extents file.
 869 * This code is out of bounds for data forks of regular files,
 870 * since the file data needs to get logged so things will stay consistent.
 871 * (The bmap-level manipulations are ok, though).
 872 */
 873void
 874xfs_bmap_local_to_extents_empty(
 875        struct xfs_inode        *ip,
 876        int                     whichfork)
 877{
 878        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
 879
 880        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
 881        ASSERT(ifp->if_bytes == 0);
 882        ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
 883
 884        xfs_bmap_forkoff_reset(ip, whichfork);
 885        ifp->if_flags &= ~XFS_IFINLINE;
 886        ifp->if_flags |= XFS_IFEXTENTS;
 887        XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
 888}
 889
 890
 891STATIC int                              /* error */
 892xfs_bmap_local_to_extents(
 893        xfs_trans_t     *tp,            /* transaction pointer */
 894        xfs_inode_t     *ip,            /* incore inode pointer */
 895        xfs_fsblock_t   *firstblock,    /* first block allocated in xaction */
 896        xfs_extlen_t    total,          /* total blocks needed by transaction */
 897        int             *logflagsp,     /* inode logging flags */
 898        int             whichfork,
 899        void            (*init_fn)(struct xfs_trans *tp,
 900                                   struct xfs_buf *bp,
 901                                   struct xfs_inode *ip,
 902                                   struct xfs_ifork *ifp))
 903{
 904        int             error = 0;
 905        int             flags;          /* logging flags returned */
 906        xfs_ifork_t     *ifp;           /* inode fork pointer */
 907        xfs_alloc_arg_t args;           /* allocation arguments */
 908        xfs_buf_t       *bp;            /* buffer for extent block */
 909        xfs_bmbt_rec_host_t *ep;        /* extent record pointer */
 910
 911        /*
 912         * We don't want to deal with the case of keeping inode data inline yet.
 913         * So sending the data fork of a regular inode is invalid.
 914         */
 915        ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK));
 916        ifp = XFS_IFORK_PTR(ip, whichfork);
 917        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
 918
 919        if (!ifp->if_bytes) {
 920                xfs_bmap_local_to_extents_empty(ip, whichfork);
 921                flags = XFS_ILOG_CORE;
 922                goto done;
 923        }
 924
 925        flags = 0;
 926        error = 0;
 927        ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) ==
 928                                                                XFS_IFINLINE);
 929        memset(&args, 0, sizeof(args));
 930        args.tp = tp;
 931        args.mp = ip->i_mount;
 932        args.firstblock = *firstblock;
 933        /*
 934         * Allocate a block.  We know we need only one, since the
 935         * file currently fits in an inode.
 936         */
 937        if (*firstblock == NULLFSBLOCK) {
 938                args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
 939                args.type = XFS_ALLOCTYPE_START_BNO;
 940        } else {
 941                args.fsbno = *firstblock;
 942                args.type = XFS_ALLOCTYPE_NEAR_BNO;
 943        }
 944        args.total = total;
 945        args.minlen = args.maxlen = args.prod = 1;
 946        error = xfs_alloc_vextent(&args);
 947        if (error)
 948                goto done;
 949
 950        /* Can't fail, the space was reserved. */
 951        ASSERT(args.fsbno != NULLFSBLOCK);
 952        ASSERT(args.len == 1);
 953        *firstblock = args.fsbno;
 954        bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
 955
 956        /*
 957         * Initialize the block, copy the data and log the remote buffer.
 958         *
 959         * The callout is responsible for logging because the remote format
 960         * might differ from the local format and thus we don't know how much to
 961         * log here. Note that init_fn must also set the buffer log item type
 962         * correctly.
 963         */
 964        init_fn(tp, bp, ip, ifp);
 965
 966        /* account for the change in fork size */
 967        xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
 968        xfs_bmap_local_to_extents_empty(ip, whichfork);
 969        flags |= XFS_ILOG_CORE;
 970
 971        xfs_iext_add(ifp, 0, 1);
 972        ep = xfs_iext_get_ext(ifp, 0);
 973        xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
 974        trace_xfs_bmap_post_update(ip, 0,
 975                        whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0,
 976                        _THIS_IP_);
 977        XFS_IFORK_NEXT_SET(ip, whichfork, 1);
 978        ip->i_d.di_nblocks = 1;
 979        xfs_trans_mod_dquot_byino(tp, ip,
 980                XFS_TRANS_DQ_BCOUNT, 1L);
 981        flags |= xfs_ilog_fext(whichfork);
 982
 983done:
 984        *logflagsp = flags;
 985        return error;
 986}
 987
 988/*
 989 * Called from xfs_bmap_add_attrfork to handle btree format files.
 990 */
 991STATIC int                                      /* error */
 992xfs_bmap_add_attrfork_btree(
 993        xfs_trans_t             *tp,            /* transaction pointer */
 994        xfs_inode_t             *ip,            /* incore inode pointer */
 995        xfs_fsblock_t           *firstblock,    /* first block allocated */
 996        xfs_bmap_free_t         *flist,         /* blocks to free at commit */
 997        int                     *flags)         /* inode logging flags */
 998{
 999        xfs_btree_cur_t         *cur;           /* btree cursor */
1000        int                     error;          /* error return value */
1001        xfs_mount_t             *mp;            /* file system mount struct */
1002        int                     stat;           /* newroot status */
1003
1004        mp = ip->i_mount;
1005        if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip))
1006                *flags |= XFS_ILOG_DBROOT;
1007        else {
1008                cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
1009                cur->bc_private.b.flist = flist;
1010                cur->bc_private.b.firstblock = *firstblock;
1011                if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
1012                        goto error0;
1013                /* must be at least one entry */
1014                XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0);
1015                if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
1016                        goto error0;
1017                if (stat == 0) {
1018                        xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1019                        return -ENOSPC;
1020                }
1021                *firstblock = cur->bc_private.b.firstblock;
1022                cur->bc_private.b.allocated = 0;
1023                xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1024        }
1025        return 0;
1026error0:
1027        xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1028        return error;
1029}
1030
1031/*
1032 * Called from xfs_bmap_add_attrfork to handle extents format files.
1033 */
1034STATIC int                                      /* error */
1035xfs_bmap_add_attrfork_extents(
1036        xfs_trans_t             *tp,            /* transaction pointer */
1037        xfs_inode_t             *ip,            /* incore inode pointer */
1038        xfs_fsblock_t           *firstblock,    /* first block allocated */
1039        xfs_bmap_free_t         *flist,         /* blocks to free at commit */
1040        int                     *flags)         /* inode logging flags */
1041{
1042        xfs_btree_cur_t         *cur;           /* bmap btree cursor */
1043        int                     error;          /* error return value */
1044
1045        if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip))
1046                return 0;
1047        cur = NULL;
1048        error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, &cur, 0,
1049                flags, XFS_DATA_FORK);
1050        if (cur) {
1051                cur->bc_private.b.allocated = 0;
1052                xfs_btree_del_cursor(cur,
1053                        error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
1054        }
1055        return error;
1056}
1057
1058/*
1059 * Called from xfs_bmap_add_attrfork to handle local format files. Each
1060 * different data fork content type needs a different callout to do the
1061 * conversion. Some are basic and only require special block initialisation
1062 * callouts for the data formating, others (directories) are so specialised they
1063 * handle everything themselves.
1064 *
1065 * XXX (dgc): investigate whether directory conversion can use the generic
1066 * formatting callout. It should be possible - it's just a very complex
1067 * formatter.
1068 */
1069STATIC int                                      /* error */
1070xfs_bmap_add_attrfork_local(
1071        xfs_trans_t             *tp,            /* transaction pointer */
1072        xfs_inode_t             *ip,            /* incore inode pointer */
1073        xfs_fsblock_t           *firstblock,    /* first block allocated */
1074        xfs_bmap_free_t         *flist,         /* blocks to free at commit */
1075        int                     *flags)         /* inode logging flags */
1076{
1077        xfs_da_args_t           dargs;          /* args for dir/attr code */
1078
1079        if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
1080                return 0;
1081
1082        if (S_ISDIR(ip->i_d.di_mode)) {
1083                memset(&dargs, 0, sizeof(dargs));
1084                dargs.geo = ip->i_mount->m_dir_geo;
1085                dargs.dp = ip;
1086                dargs.firstblock = firstblock;
1087                dargs.flist = flist;
1088                dargs.total = dargs.geo->fsbcount;
1089                dargs.whichfork = XFS_DATA_FORK;
1090                dargs.trans = tp;
1091                return xfs_dir2_sf_to_block(&dargs);
1092        }
1093
1094        if (S_ISLNK(ip->i_d.di_mode))
1095                return xfs_bmap_local_to_extents(tp, ip, firstblock, 1,
1096                                                 flags, XFS_DATA_FORK,
1097                                                 xfs_symlink_local_to_remote);
1098
1099        /* should only be called for types that support local format data */
1100        ASSERT(0);
1101        return -EFSCORRUPTED;
1102}
1103
1104/*
1105 * Convert inode from non-attributed to attributed.
1106 * Must not be in a transaction, ip must not be locked.
1107 */
1108int                                             /* error code */
1109xfs_bmap_add_attrfork(
1110        xfs_inode_t             *ip,            /* incore inode pointer */
1111        int                     size,           /* space new attribute needs */
1112        int                     rsvd)           /* xact may use reserved blks */
1113{
1114        xfs_fsblock_t           firstblock;     /* 1st block/ag allocated */
1115        xfs_bmap_free_t         flist;          /* freed extent records */
1116        xfs_mount_t             *mp;            /* mount structure */
1117        xfs_trans_t             *tp;            /* transaction pointer */
1118        int                     blks;           /* space reservation */
1119        int                     version = 1;    /* superblock attr version */
1120        int                     logflags;       /* logging flags */
1121        int                     error;          /* error return value */
1122
1123        ASSERT(XFS_IFORK_Q(ip) == 0);
1124
1125        mp = ip->i_mount;
1126        ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1127        tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK);
1128        blks = XFS_ADDAFORK_SPACE_RES(mp);
1129        if (rsvd)
1130                tp->t_flags |= XFS_TRANS_RESERVE;
1131        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0);
1132        if (error) {
1133                xfs_trans_cancel(tp);
1134                return error;
1135        }
1136        xfs_ilock(ip, XFS_ILOCK_EXCL);
1137        error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
1138                        XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
1139                        XFS_QMOPT_RES_REGBLKS);
1140        if (error)
1141                goto trans_cancel;
1142        if (XFS_IFORK_Q(ip))
1143                goto trans_cancel;
1144        if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
1145                /*
1146                 * For inodes coming from pre-6.2 filesystems.
1147                 */
1148                ASSERT(ip->i_d.di_aformat == 0);
1149                ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
1150        }
1151        ASSERT(ip->i_d.di_anextents == 0);
1152
1153        xfs_trans_ijoin(tp, ip, 0);
1154        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1155
1156        switch (ip->i_d.di_format) {
1157        case XFS_DINODE_FMT_DEV:
1158                ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
1159                break;
1160        case XFS_DINODE_FMT_UUID:
1161                ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3;
1162                break;
1163        case XFS_DINODE_FMT_LOCAL:
1164        case XFS_DINODE_FMT_EXTENTS:
1165        case XFS_DINODE_FMT_BTREE:
1166                ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
1167                if (!ip->i_d.di_forkoff)
1168                        ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
1169                else if (mp->m_flags & XFS_MOUNT_ATTR2)
1170                        version = 2;
1171                break;
1172        default:
1173                ASSERT(0);
1174                error = -EINVAL;
1175                goto trans_cancel;
1176        }
1177
1178        ASSERT(ip->i_afp == NULL);
1179        ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
1180        ip->i_afp->if_flags = XFS_IFEXTENTS;
1181        logflags = 0;
1182        xfs_bmap_init(&flist, &firstblock);
1183        switch (ip->i_d.di_format) {
1184        case XFS_DINODE_FMT_LOCAL:
1185                error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &flist,
1186                        &logflags);
1187                break;
1188        case XFS_DINODE_FMT_EXTENTS:
1189                error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock,
1190                        &flist, &logflags);
1191                break;
1192        case XFS_DINODE_FMT_BTREE:
1193                error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &flist,
1194                        &logflags);
1195                break;
1196        default:
1197                error = 0;
1198                break;
1199        }
1200        if (logflags)
1201                xfs_trans_log_inode(tp, ip, logflags);
1202        if (error)
1203                goto bmap_cancel;
1204        if (!xfs_sb_version_hasattr(&mp->m_sb) ||
1205           (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
1206                bool log_sb = false;
1207
1208                spin_lock(&mp->m_sb_lock);
1209                if (!xfs_sb_version_hasattr(&mp->m_sb)) {
1210                        xfs_sb_version_addattr(&mp->m_sb);
1211                        log_sb = true;
1212                }
1213                if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
1214                        xfs_sb_version_addattr2(&mp->m_sb);
1215                        log_sb = true;
1216                }
1217                spin_unlock(&mp->m_sb_lock);
1218                if (log_sb)
1219                        xfs_log_sb(tp);
1220        }
1221
1222        error = xfs_bmap_finish(&tp, &flist, NULL);
1223        if (error)
1224                goto bmap_cancel;
1225        error = xfs_trans_commit(tp);
1226        xfs_iunlock(ip, XFS_ILOCK_EXCL);
1227        return error;
1228
1229bmap_cancel:
1230        xfs_bmap_cancel(&flist);
1231trans_cancel:
1232        xfs_trans_cancel(tp);
1233        xfs_iunlock(ip, XFS_ILOCK_EXCL);
1234        return error;
1235}
1236
1237/*
1238 * Internal and external extent tree search functions.
1239 */
1240
1241/*
1242 * Read in the extents to if_extents.
1243 * All inode fields are set up by caller, we just traverse the btree
1244 * and copy the records in. If the file system cannot contain unwritten
1245 * extents, the records are checked for no "state" flags.
1246 */
1247int                                     /* error */
1248xfs_bmap_read_extents(
1249        xfs_trans_t             *tp,    /* transaction pointer */
1250        xfs_inode_t             *ip,    /* incore inode */
1251        int                     whichfork) /* data or attr fork */
1252{
1253        struct xfs_btree_block  *block; /* current btree block */
1254        xfs_fsblock_t           bno;    /* block # of "block" */
1255        xfs_buf_t               *bp;    /* buffer for "block" */
1256        int                     error;  /* error return value */
1257        xfs_exntfmt_t           exntf;  /* XFS_EXTFMT_NOSTATE, if checking */
1258        xfs_extnum_t            i, j;   /* index into the extents list */
1259        xfs_ifork_t             *ifp;   /* fork structure */
1260        int                     level;  /* btree level, for checking */
1261        xfs_mount_t             *mp;    /* file system mount structure */
1262        __be64                  *pp;    /* pointer to block address */
1263        /* REFERENCED */
1264        xfs_extnum_t            room;   /* number of entries there's room for */
1265
1266        bno = NULLFSBLOCK;
1267        mp = ip->i_mount;
1268        ifp = XFS_IFORK_PTR(ip, whichfork);
1269        exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE :
1270                                        XFS_EXTFMT_INODE(ip);
1271        block = ifp->if_broot;
1272        /*
1273         * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
1274         */
1275        level = be16_to_cpu(block->bb_level);
1276        ASSERT(level > 0);
1277        pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
1278        bno = be64_to_cpu(*pp);
1279        ASSERT(bno != NULLFSBLOCK);
1280        ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
1281        ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
1282        /*
1283         * Go down the tree until leaf level is reached, following the first
1284         * pointer (leftmost) at each level.
1285         */
1286        while (level-- > 0) {
1287                error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1288                                XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1289                if (error)
1290                        return error;
1291                block = XFS_BUF_TO_BLOCK(bp);
1292                if (level == 0)
1293                        break;
1294                pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
1295                bno = be64_to_cpu(*pp);
1296                XFS_WANT_CORRUPTED_GOTO(mp,
1297                        XFS_FSB_SANITY_CHECK(mp, bno), error0);
1298                xfs_trans_brelse(tp, bp);
1299        }
1300        /*
1301         * Here with bp and block set to the leftmost leaf node in the tree.
1302         */
1303        room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1304        i = 0;
1305        /*
1306         * Loop over all leaf nodes.  Copy information to the extent records.
1307         */
1308        for (;;) {
1309                xfs_bmbt_rec_t  *frp;
1310                xfs_fsblock_t   nextbno;
1311                xfs_extnum_t    num_recs;
1312                xfs_extnum_t    start;
1313
1314                num_recs = xfs_btree_get_numrecs(block);
1315                if (unlikely(i + num_recs > room)) {
1316                        ASSERT(i + num_recs <= room);
1317                        xfs_warn(ip->i_mount,
1318                                "corrupt dinode %Lu, (btree extents).",
1319                                (unsigned long long) ip->i_ino);
1320                        XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)",
1321                                XFS_ERRLEVEL_LOW, ip->i_mount, block);
1322                        goto error0;
1323                }
1324                /*
1325                 * Read-ahead the next leaf block, if any.
1326                 */
1327                nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
1328                if (nextbno != NULLFSBLOCK)
1329                        xfs_btree_reada_bufl(mp, nextbno, 1,
1330                                             &xfs_bmbt_buf_ops);
1331                /*
1332                 * Copy records into the extent records.
1333                 */
1334                frp = XFS_BMBT_REC_ADDR(mp, block, 1);
1335                start = i;
1336                for (j = 0; j < num_recs; j++, i++, frp++) {
1337                        xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
1338                        trp->l0 = be64_to_cpu(frp->l0);
1339                        trp->l1 = be64_to_cpu(frp->l1);
1340                }
1341                if (exntf == XFS_EXTFMT_NOSTATE) {
1342                        /*
1343                         * Check all attribute bmap btree records and
1344                         * any "older" data bmap btree records for a
1345                         * set bit in the "extent flag" position.
1346                         */
1347                        if (unlikely(xfs_check_nostate_extents(ifp,
1348                                        start, num_recs))) {
1349                                XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
1350                                                 XFS_ERRLEVEL_LOW,
1351                                                 ip->i_mount);
1352                                goto error0;
1353                        }
1354                }
1355                xfs_trans_brelse(tp, bp);
1356                bno = nextbno;
1357                /*
1358                 * If we've reached the end, stop.
1359                 */
1360                if (bno == NULLFSBLOCK)
1361                        break;
1362                error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1363                                XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1364                if (error)
1365                        return error;
1366                block = XFS_BUF_TO_BLOCK(bp);
1367        }
1368        ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
1369        ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
1370        XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
1371        return 0;
1372error0:
1373        xfs_trans_brelse(tp, bp);
1374        return -EFSCORRUPTED;
1375}
1376
1377
1378/*
1379 * Search the extent records for the entry containing block bno.
1380 * If bno lies in a hole, point to the next entry.  If bno lies
1381 * past eof, *eofp will be set, and *prevp will contain the last
1382 * entry (null if none).  Else, *lastxp will be set to the index
1383 * of the found entry; *gotp will contain the entry.
1384 */
1385STATIC xfs_bmbt_rec_host_t *            /* pointer to found extent entry */
1386xfs_bmap_search_multi_extents(
1387        xfs_ifork_t     *ifp,           /* inode fork pointer */
1388        xfs_fileoff_t   bno,            /* block number searched for */
1389        int             *eofp,          /* out: end of file found */
1390        xfs_extnum_t    *lastxp,        /* out: last extent index */
1391        xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
1392        xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
1393{
1394        xfs_bmbt_rec_host_t *ep;                /* extent record pointer */
1395        xfs_extnum_t    lastx;          /* last extent index */
1396
1397        /*
1398         * Initialize the extent entry structure to catch access to
1399         * uninitialized br_startblock field.
1400         */
1401        gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL;
1402        gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL;
1403        gotp->br_state = XFS_EXT_INVALID;
1404        gotp->br_startblock = 0xffffa5a5a5a5a5a5LL;
1405        prevp->br_startoff = NULLFILEOFF;
1406
1407        ep = xfs_iext_bno_to_ext(ifp, bno, &lastx);
1408        if (lastx > 0) {
1409                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp);
1410        }
1411        if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
1412                xfs_bmbt_get_all(ep, gotp);
1413                *eofp = 0;
1414        } else {
1415                if (lastx > 0) {
1416                        *gotp = *prevp;
1417                }
1418                *eofp = 1;
1419                ep = NULL;
1420        }
1421        *lastxp = lastx;
1422        return ep;
1423}
1424
1425/*
1426 * Search the extents list for the inode, for the extent containing bno.
1427 * If bno lies in a hole, point to the next entry.  If bno lies past eof,
1428 * *eofp will be set, and *prevp will contain the last entry (null if none).
1429 * Else, *lastxp will be set to the index of the found
1430 * entry; *gotp will contain the entry.
1431 */
1432STATIC xfs_bmbt_rec_host_t *                 /* pointer to found extent entry */
1433xfs_bmap_search_extents(
1434        xfs_inode_t     *ip,            /* incore inode pointer */
1435        xfs_fileoff_t   bno,            /* block number searched for */
1436        int             fork,           /* data or attr fork */
1437        int             *eofp,          /* out: end of file found */
1438        xfs_extnum_t    *lastxp,        /* out: last extent index */
1439        xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
1440        xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
1441{
1442        xfs_ifork_t     *ifp;           /* inode fork pointer */
1443        xfs_bmbt_rec_host_t  *ep;            /* extent record pointer */
1444
1445        XFS_STATS_INC(ip->i_mount, xs_look_exlist);
1446        ifp = XFS_IFORK_PTR(ip, fork);
1447
1448        ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
1449
1450        if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) &&
1451                     !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) {
1452                xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
1453                                "Access to block zero in inode %llu "
1454                                "start_block: %llx start_off: %llx "
1455                                "blkcnt: %llx extent-state: %x lastx: %x",
1456                        (unsigned long long)ip->i_ino,
1457                        (unsigned long long)gotp->br_startblock,
1458                        (unsigned long long)gotp->br_startoff,
1459                        (unsigned long long)gotp->br_blockcount,
1460                        gotp->br_state, *lastxp);
1461                *lastxp = NULLEXTNUM;
1462                *eofp = 1;
1463                return NULL;
1464        }
1465        return ep;
1466}
1467
1468/*
1469 * Returns the file-relative block number of the first unused block(s)
1470 * in the file with at least "len" logically contiguous blocks free.
1471 * This is the lowest-address hole if the file has holes, else the first block
1472 * past the end of file.
1473 * Return 0 if the file is currently local (in-inode).
1474 */
1475int                                             /* error */
1476xfs_bmap_first_unused(
1477        xfs_trans_t     *tp,                    /* transaction pointer */
1478        xfs_inode_t     *ip,                    /* incore inode */
1479        xfs_extlen_t    len,                    /* size of hole to find */
1480        xfs_fileoff_t   *first_unused,          /* unused block */
1481        int             whichfork)              /* data or attr fork */
1482{
1483        int             error;                  /* error return value */
1484        int             idx;                    /* extent record index */
1485        xfs_ifork_t     *ifp;                   /* inode fork pointer */
1486        xfs_fileoff_t   lastaddr;               /* last block number seen */
1487        xfs_fileoff_t   lowest;                 /* lowest useful block */
1488        xfs_fileoff_t   max;                    /* starting useful block */
1489        xfs_fileoff_t   off;                    /* offset for this block */
1490        xfs_extnum_t    nextents;               /* number of extent entries */
1491
1492        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
1493               XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ||
1494               XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
1495        if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
1496                *first_unused = 0;
1497                return 0;
1498        }
1499        ifp = XFS_IFORK_PTR(ip, whichfork);
1500        if (!(ifp->if_flags & XFS_IFEXTENTS) &&
1501            (error = xfs_iread_extents(tp, ip, whichfork)))
1502                return error;
1503        lowest = *first_unused;
1504        nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1505        for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
1506                xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
1507                off = xfs_bmbt_get_startoff(ep);
1508                /*
1509                 * See if the hole before this extent will work.
1510                 */
1511                if (off >= lowest + len && off - max >= len) {
1512                        *first_unused = max;
1513                        return 0;
1514                }
1515                lastaddr = off + xfs_bmbt_get_blockcount(ep);
1516                max = XFS_FILEOFF_MAX(lastaddr, lowest);
1517        }
1518        *first_unused = max;
1519        return 0;
1520}
1521
1522/*
1523 * Returns the file-relative block number of the last block - 1 before
1524 * last_block (input value) in the file.
1525 * This is not based on i_size, it is based on the extent records.
1526 * Returns 0 for local files, as they do not have extent records.
1527 */
1528int                                             /* error */
1529xfs_bmap_last_before(
1530        xfs_trans_t     *tp,                    /* transaction pointer */
1531        xfs_inode_t     *ip,                    /* incore inode */
1532        xfs_fileoff_t   *last_block,            /* last block */
1533        int             whichfork)              /* data or attr fork */
1534{
1535        xfs_fileoff_t   bno;                    /* input file offset */
1536        int             eof;                    /* hit end of file */
1537        xfs_bmbt_rec_host_t *ep;                /* pointer to last extent */
1538        int             error;                  /* error return value */
1539        xfs_bmbt_irec_t got;                    /* current extent value */
1540        xfs_ifork_t     *ifp;                   /* inode fork pointer */
1541        xfs_extnum_t    lastx;                  /* last extent used */
1542        xfs_bmbt_irec_t prev;                   /* previous extent value */
1543
1544        if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
1545            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
1546            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
1547               return -EIO;
1548        if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
1549                *last_block = 0;
1550                return 0;
1551        }
1552        ifp = XFS_IFORK_PTR(ip, whichfork);
1553        if (!(ifp->if_flags & XFS_IFEXTENTS) &&
1554            (error = xfs_iread_extents(tp, ip, whichfork)))
1555                return error;
1556        bno = *last_block - 1;
1557        ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
1558                &prev);
1559        if (eof || xfs_bmbt_get_startoff(ep) > bno) {
1560                if (prev.br_startoff == NULLFILEOFF)
1561                        *last_block = 0;
1562                else
1563                        *last_block = prev.br_startoff + prev.br_blockcount;
1564        }
1565        /*
1566         * Otherwise *last_block is already the right answer.
1567         */
1568        return 0;
1569}
1570
1571int
1572xfs_bmap_last_extent(
1573        struct xfs_trans        *tp,
1574        struct xfs_inode        *ip,
1575        int                     whichfork,
1576        struct xfs_bmbt_irec    *rec,
1577        int                     *is_empty)
1578{
1579        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1580        int                     error;
1581        int                     nextents;
1582
1583        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1584                error = xfs_iread_extents(tp, ip, whichfork);
1585                if (error)
1586                        return error;
1587        }
1588
1589        nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
1590        if (nextents == 0) {
1591                *is_empty = 1;
1592                return 0;
1593        }
1594
1595        xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec);
1596        *is_empty = 0;
1597        return 0;
1598}
1599
1600/*
1601 * Check the last inode extent to determine whether this allocation will result
1602 * in blocks being allocated at the end of the file. When we allocate new data
1603 * blocks at the end of the file which do not start at the previous data block,
1604 * we will try to align the new blocks at stripe unit boundaries.
1605 *
1606 * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
1607 * at, or past the EOF.
1608 */
1609STATIC int
1610xfs_bmap_isaeof(
1611        struct xfs_bmalloca     *bma,
1612        int                     whichfork)
1613{
1614        struct xfs_bmbt_irec    rec;
1615        int                     is_empty;
1616        int                     error;
1617
1618        bma->aeof = 0;
1619        error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1620                                     &is_empty);
1621        if (error)
1622                return error;
1623
1624        if (is_empty) {
1625                bma->aeof = 1;
1626                return 0;
1627        }
1628
1629        /*
1630         * Check if we are allocation or past the last extent, or at least into
1631         * the last delayed allocated extent.
1632         */
1633        bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
1634                (bma->offset >= rec.br_startoff &&
1635                 isnullstartblock(rec.br_startblock));
1636        return 0;
1637}
1638
1639/*
1640 * Returns the file-relative block number of the first block past eof in
1641 * the file.  This is not based on i_size, it is based on the extent records.
1642 * Returns 0 for local files, as they do not have extent records.
1643 */
1644int
1645xfs_bmap_last_offset(
1646        struct xfs_inode        *ip,
1647        xfs_fileoff_t           *last_block,
1648        int                     whichfork)
1649{
1650        struct xfs_bmbt_irec    rec;
1651        int                     is_empty;
1652        int                     error;
1653
1654        *last_block = 0;
1655
1656        if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL)
1657                return 0;
1658
1659        if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
1660            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
1661               return -EIO;
1662
1663        error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
1664        if (error || is_empty)
1665                return error;
1666
1667        *last_block = rec.br_startoff + rec.br_blockcount;
1668        return 0;
1669}
1670
1671/*
1672 * Returns whether the selected fork of the inode has exactly one
1673 * block or not.  For the data fork we check this matches di_size,
1674 * implying the file's range is 0..bsize-1.
1675 */
1676int                                     /* 1=>1 block, 0=>otherwise */
1677xfs_bmap_one_block(
1678        xfs_inode_t     *ip,            /* incore inode */
1679        int             whichfork)      /* data or attr fork */
1680{
1681        xfs_bmbt_rec_host_t *ep;        /* ptr to fork's extent */
1682        xfs_ifork_t     *ifp;           /* inode fork pointer */
1683        int             rval;           /* return value */
1684        xfs_bmbt_irec_t s;              /* internal version of extent */
1685
1686#ifndef DEBUG
1687        if (whichfork == XFS_DATA_FORK)
1688                return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
1689#endif  /* !DEBUG */
1690        if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
1691                return 0;
1692        if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
1693                return 0;
1694        ifp = XFS_IFORK_PTR(ip, whichfork);
1695        ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1696        ep = xfs_iext_get_ext(ifp, 0);
1697        xfs_bmbt_get_all(ep, &s);
1698        rval = s.br_startoff == 0 && s.br_blockcount == 1;
1699        if (rval && whichfork == XFS_DATA_FORK)
1700                ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
1701        return rval;
1702}
1703
1704/*
1705 * Extent tree manipulation functions used during allocation.
1706 */
1707
1708/*
1709 * Convert a delayed allocation to a real allocation.
1710 */
1711STATIC int                              /* error */
1712xfs_bmap_add_extent_delay_real(
1713        struct xfs_bmalloca     *bma)
1714{
1715        struct xfs_bmbt_irec    *new = &bma->got;
1716        int                     diff;   /* temp value */
1717        xfs_bmbt_rec_host_t     *ep;    /* extent entry for idx */
1718        int                     error;  /* error return value */
1719        int                     i;      /* temp state */
1720        xfs_ifork_t             *ifp;   /* inode fork pointer */
1721        xfs_fileoff_t           new_endoff;     /* end offset of new entry */
1722        xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
1723                                        /* left is 0, right is 1, prev is 2 */
1724        int                     rval=0; /* return value (logging flags) */
1725        int                     state = 0;/* state bits, accessed thru macros */
1726        xfs_filblks_t           da_new; /* new count del alloc blocks used */
1727        xfs_filblks_t           da_old; /* old count del alloc blocks used */
1728        xfs_filblks_t           temp=0; /* value for da_new calculations */
1729        xfs_filblks_t           temp2=0;/* value for da_new calculations */
1730        int                     tmp_rval;       /* partial logging flags */
1731        int                     whichfork = XFS_DATA_FORK;
1732        struct xfs_mount        *mp;
1733
1734        mp = bma->ip->i_mount;
1735        ifp = XFS_IFORK_PTR(bma->ip, whichfork);
1736
1737        ASSERT(bma->idx >= 0);
1738        ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
1739        ASSERT(!isnullstartblock(new->br_startblock));
1740        ASSERT(!bma->cur ||
1741               (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
1742
1743        XFS_STATS_INC(mp, xs_add_exlist);
1744
1745#define LEFT            r[0]
1746#define RIGHT           r[1]
1747#define PREV            r[2]
1748
1749        /*
1750         * Set up a bunch of variables to make the tests simpler.
1751         */
1752        ep = xfs_iext_get_ext(ifp, bma->idx);
1753        xfs_bmbt_get_all(ep, &PREV);
1754        new_endoff = new->br_startoff + new->br_blockcount;
1755        ASSERT(PREV.br_startoff <= new->br_startoff);
1756        ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1757
1758        da_old = startblockval(PREV.br_startblock);
1759        da_new = 0;
1760
1761        /*
1762         * Set flags determining what part of the previous delayed allocation
1763         * extent is being replaced by a real allocation.
1764         */
1765        if (PREV.br_startoff == new->br_startoff)
1766                state |= BMAP_LEFT_FILLING;
1767        if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1768                state |= BMAP_RIGHT_FILLING;
1769
1770        /*
1771         * Check and set flags if this segment has a left neighbor.
1772         * Don't set contiguous if the combined extent would be too large.
1773         */
1774        if (bma->idx > 0) {
1775                state |= BMAP_LEFT_VALID;
1776                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &LEFT);
1777
1778                if (isnullstartblock(LEFT.br_startblock))
1779                        state |= BMAP_LEFT_DELAY;
1780        }
1781
1782        if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1783            LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1784            LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1785            LEFT.br_state == new->br_state &&
1786            LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1787                state |= BMAP_LEFT_CONTIG;
1788
1789        /*
1790         * Check and set flags if this segment has a right neighbor.
1791         * Don't set contiguous if the combined extent would be too large.
1792         * Also check for all-three-contiguous being too large.
1793         */
1794        if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
1795                state |= BMAP_RIGHT_VALID;
1796                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT);
1797
1798                if (isnullstartblock(RIGHT.br_startblock))
1799                        state |= BMAP_RIGHT_DELAY;
1800        }
1801
1802        if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1803            new_endoff == RIGHT.br_startoff &&
1804            new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1805            new->br_state == RIGHT.br_state &&
1806            new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
1807            ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1808                       BMAP_RIGHT_FILLING)) !=
1809                      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1810                       BMAP_RIGHT_FILLING) ||
1811             LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1812                        <= MAXEXTLEN))
1813                state |= BMAP_RIGHT_CONTIG;
1814
1815        error = 0;
1816        /*
1817         * Switch out based on the FILLING and CONTIG state bits.
1818         */
1819        switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1820                         BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1821        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1822             BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1823                /*
1824                 * Filling in all of a previously delayed allocation extent.
1825                 * The left and right neighbors are both contiguous with new.
1826                 */
1827                bma->idx--;
1828                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1829                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
1830                        LEFT.br_blockcount + PREV.br_blockcount +
1831                        RIGHT.br_blockcount);
1832                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1833
1834                xfs_iext_remove(bma->ip, bma->idx + 1, 2, state);
1835                bma->ip->i_d.di_nextents--;
1836                if (bma->cur == NULL)
1837                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1838                else {
1839                        rval = XFS_ILOG_CORE;
1840                        error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
1841                                        RIGHT.br_startblock,
1842                                        RIGHT.br_blockcount, &i);
1843                        if (error)
1844                                goto done;
1845                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1846                        error = xfs_btree_delete(bma->cur, &i);
1847                        if (error)
1848                                goto done;
1849                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1850                        error = xfs_btree_decrement(bma->cur, 0, &i);
1851                        if (error)
1852                                goto done;
1853                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1854                        error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
1855                                        LEFT.br_startblock,
1856                                        LEFT.br_blockcount +
1857                                        PREV.br_blockcount +
1858                                        RIGHT.br_blockcount, LEFT.br_state);
1859                        if (error)
1860                                goto done;
1861                }
1862                break;
1863
1864        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1865                /*
1866                 * Filling in all of a previously delayed allocation extent.
1867                 * The left neighbor is contiguous, the right is not.
1868                 */
1869                bma->idx--;
1870
1871                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1872                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
1873                        LEFT.br_blockcount + PREV.br_blockcount);
1874                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1875
1876                xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
1877                if (bma->cur == NULL)
1878                        rval = XFS_ILOG_DEXT;
1879                else {
1880                        rval = 0;
1881                        error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff,
1882                                        LEFT.br_startblock, LEFT.br_blockcount,
1883                                        &i);
1884                        if (error)
1885                                goto done;
1886                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1887                        error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
1888                                        LEFT.br_startblock,
1889                                        LEFT.br_blockcount +
1890                                        PREV.br_blockcount, LEFT.br_state);
1891                        if (error)
1892                                goto done;
1893                }
1894                break;
1895
1896        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1897                /*
1898                 * Filling in all of a previously delayed allocation extent.
1899                 * The right neighbor is contiguous, the left is not.
1900                 */
1901                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1902                xfs_bmbt_set_startblock(ep, new->br_startblock);
1903                xfs_bmbt_set_blockcount(ep,
1904                        PREV.br_blockcount + RIGHT.br_blockcount);
1905                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1906
1907                xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
1908                if (bma->cur == NULL)
1909                        rval = XFS_ILOG_DEXT;
1910                else {
1911                        rval = 0;
1912                        error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
1913                                        RIGHT.br_startblock,
1914                                        RIGHT.br_blockcount, &i);
1915                        if (error)
1916                                goto done;
1917                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1918                        error = xfs_bmbt_update(bma->cur, PREV.br_startoff,
1919                                        new->br_startblock,
1920                                        PREV.br_blockcount +
1921                                        RIGHT.br_blockcount, PREV.br_state);
1922                        if (error)
1923                                goto done;
1924                }
1925                break;
1926
1927        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1928                /*
1929                 * Filling in all of a previously delayed allocation extent.
1930                 * Neither the left nor right neighbors are contiguous with
1931                 * the new one.
1932                 */
1933                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1934                xfs_bmbt_set_startblock(ep, new->br_startblock);
1935                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1936
1937                bma->ip->i_d.di_nextents++;
1938                if (bma->cur == NULL)
1939                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1940                else {
1941                        rval = XFS_ILOG_CORE;
1942                        error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
1943                                        new->br_startblock, new->br_blockcount,
1944                                        &i);
1945                        if (error)
1946                                goto done;
1947                        XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1948                        bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
1949                        error = xfs_btree_insert(bma->cur, &i);
1950                        if (error)
1951                                goto done;
1952                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1953                }
1954                break;
1955
1956        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1957                /*
1958                 * Filling in the first part of a previous delayed allocation.
1959                 * The left neighbor is contiguous.
1960                 */
1961                trace_xfs_bmap_pre_update(bma->ip, bma->idx - 1, state, _THIS_IP_);
1962                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx - 1),
1963                        LEFT.br_blockcount + new->br_blockcount);
1964                xfs_bmbt_set_startoff(ep,
1965                        PREV.br_startoff + new->br_blockcount);
1966                trace_xfs_bmap_post_update(bma->ip, bma->idx - 1, state, _THIS_IP_);
1967
1968                temp = PREV.br_blockcount - new->br_blockcount;
1969                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1970                xfs_bmbt_set_blockcount(ep, temp);
1971                if (bma->cur == NULL)
1972                        rval = XFS_ILOG_DEXT;
1973                else {
1974                        rval = 0;
1975                        error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff,
1976                                        LEFT.br_startblock, LEFT.br_blockcount,
1977                                        &i);
1978                        if (error)
1979                                goto done;
1980                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1981                        error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
1982                                        LEFT.br_startblock,
1983                                        LEFT.br_blockcount +
1984                                        new->br_blockcount,
1985                                        LEFT.br_state);
1986                        if (error)
1987                                goto done;
1988                }
1989                da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1990                        startblockval(PREV.br_startblock));
1991                xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
1992                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1993
1994                bma->idx--;
1995                break;
1996
1997        case BMAP_LEFT_FILLING:
1998                /*
1999                 * Filling in the first part of a previous delayed allocation.
2000                 * The left neighbor is not contiguous.
2001                 */
2002                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
2003                xfs_bmbt_set_startoff(ep, new_endoff);
2004                temp = PREV.br_blockcount - new->br_blockcount;
2005                xfs_bmbt_set_blockcount(ep, temp);
2006                xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
2007                bma->ip->i_d.di_nextents++;
2008                if (bma->cur == NULL)
2009                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2010                else {
2011                        rval = XFS_ILOG_CORE;
2012                        error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
2013                                        new->br_startblock, new->br_blockcount,
2014                                        &i);
2015                        if (error)
2016                                goto done;
2017                        XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2018                        bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2019                        error = xfs_btree_insert(bma->cur, &i);
2020                        if (error)
2021                                goto done;
2022                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2023                }
2024
2025                if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
2026                        error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2027                                        bma->firstblock, bma->flist,
2028                                        &bma->cur, 1, &tmp_rval, whichfork);
2029                        rval |= tmp_rval;
2030                        if (error)
2031                                goto done;
2032                }
2033                da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
2034                        startblockval(PREV.br_startblock) -
2035                        (bma->cur ? bma->cur->bc_private.b.allocated : 0));
2036                ep = xfs_iext_get_ext(ifp, bma->idx + 1);
2037                xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
2038                trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
2039                break;
2040
2041        case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2042                /*
2043                 * Filling in the last part of a previous delayed allocation.
2044                 * The right neighbor is contiguous with the new allocation.
2045                 */
2046                temp = PREV.br_blockcount - new->br_blockcount;
2047                trace_xfs_bmap_pre_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
2048                xfs_bmbt_set_blockcount(ep, temp);
2049                xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx + 1),
2050                        new->br_startoff, new->br_startblock,
2051                        new->br_blockcount + RIGHT.br_blockcount,
2052                        RIGHT.br_state);
2053                trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
2054                if (bma->cur == NULL)
2055                        rval = XFS_ILOG_DEXT;
2056                else {
2057                        rval = 0;
2058                        error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
2059                                        RIGHT.br_startblock,
2060                                        RIGHT.br_blockcount, &i);
2061                        if (error)
2062                                goto done;
2063                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2064                        error = xfs_bmbt_update(bma->cur, new->br_startoff,
2065                                        new->br_startblock,
2066                                        new->br_blockcount +
2067                                        RIGHT.br_blockcount,
2068                                        RIGHT.br_state);
2069                        if (error)
2070                                goto done;
2071                }
2072
2073                da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
2074                        startblockval(PREV.br_startblock));
2075                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
2076                xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
2077                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
2078
2079                bma->idx++;
2080                break;
2081
2082        case BMAP_RIGHT_FILLING:
2083                /*
2084                 * Filling in the last part of a previous delayed allocation.
2085                 * The right neighbor is not contiguous.
2086                 */
2087                temp = PREV.br_blockcount - new->br_blockcount;
2088                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
2089                xfs_bmbt_set_blockcount(ep, temp);
2090                xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state);
2091                bma->ip->i_d.di_nextents++;
2092                if (bma->cur == NULL)
2093                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2094                else {
2095                        rval = XFS_ILOG_CORE;
2096                        error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
2097                                        new->br_startblock, new->br_blockcount,
2098                                        &i);
2099                        if (error)
2100                                goto done;
2101                        XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2102                        bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2103                        error = xfs_btree_insert(bma->cur, &i);
2104                        if (error)
2105                                goto done;
2106                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2107                }
2108
2109                if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
2110                        error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2111                                bma->firstblock, bma->flist, &bma->cur, 1,
2112                                &tmp_rval, whichfork);
2113                        rval |= tmp_rval;
2114                        if (error)
2115                                goto done;
2116                }
2117                da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
2118                        startblockval(PREV.br_startblock) -
2119                        (bma->cur ? bma->cur->bc_private.b.allocated : 0));
2120                ep = xfs_iext_get_ext(ifp, bma->idx);
2121                xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
2122                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
2123
2124                bma->idx++;
2125                break;
2126
2127        case 0:
2128                /*
2129                 * Filling in the middle part of a previous delayed allocation.
2130                 * Contiguity is impossible here.
2131                 * This case is avoided almost all the time.
2132                 *
2133                 * We start with a delayed allocation:
2134                 *
2135                 * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
2136                 *  PREV @ idx
2137                 *
2138                 * and we are allocating:
2139                 *                     +rrrrrrrrrrrrrrrrr+
2140                 *                            new
2141                 *
2142                 * and we set it up for insertion as:
2143                 * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
2144                 *                            new
2145                 *  PREV @ idx          LEFT              RIGHT
2146                 *                      inserted at idx + 1
2147                 */
2148                temp = new->br_startoff - PREV.br_startoff;
2149                temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
2150                trace_xfs_bmap_pre_update(bma->ip, bma->idx, 0, _THIS_IP_);
2151                xfs_bmbt_set_blockcount(ep, temp);      /* truncate PREV */
2152                LEFT = *new;
2153                RIGHT.br_state = PREV.br_state;
2154                RIGHT.br_startblock = nullstartblock(
2155                                (int)xfs_bmap_worst_indlen(bma->ip, temp2));
2156                RIGHT.br_startoff = new_endoff;
2157                RIGHT.br_blockcount = temp2;
2158                /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */
2159                xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state);
2160                bma->ip->i_d.di_nextents++;
2161                if (bma->cur == NULL)
2162                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2163                else {
2164                        rval = XFS_ILOG_CORE;
2165                        error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
2166                                        new->br_startblock, new->br_blockcount,
2167                                        &i);
2168                        if (error)
2169                                goto done;
2170                        XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2171                        bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2172                        error = xfs_btree_insert(bma->cur, &i);
2173                        if (error)
2174                                goto done;
2175                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2176                }
2177
2178                if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
2179                        error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2180                                        bma->firstblock, bma->flist, &bma->cur,
2181                                        1, &tmp_rval, whichfork);
2182                        rval |= tmp_rval;
2183                        if (error)
2184                                goto done;
2185                }
2186                temp = xfs_bmap_worst_indlen(bma->ip, temp);
2187                temp2 = xfs_bmap_worst_indlen(bma->ip, temp2);
2188                diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
2189                        (bma->cur ? bma->cur->bc_private.b.allocated : 0));
2190                if (diff > 0) {
2191                        error = xfs_mod_fdblocks(bma->ip->i_mount,
2192                                                 -((int64_t)diff), false);
2193                        ASSERT(!error);
2194                        if (error)
2195                                goto done;
2196                }
2197
2198                ep = xfs_iext_get_ext(ifp, bma->idx);
2199                xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
2200                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
2201                trace_xfs_bmap_pre_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
2202                xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, bma->idx + 2),
2203                        nullstartblock((int)temp2));
2204                trace_xfs_bmap_post_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
2205
2206                bma->idx++;
2207                da_new = temp + temp2;
2208                break;
2209
2210        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2211        case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2212        case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2213        case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2214        case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2215        case BMAP_LEFT_CONTIG:
2216        case BMAP_RIGHT_CONTIG:
2217                /*
2218                 * These cases are all impossible.
2219                 */
2220                ASSERT(0);
2221        }
2222
2223        /* convert to a btree if necessary */
2224        if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
2225                int     tmp_logflags;   /* partial log flag return val */
2226
2227                ASSERT(bma->cur == NULL);
2228                error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2229                                bma->firstblock, bma->flist, &bma->cur,
2230                                da_old > 0, &tmp_logflags, whichfork);
2231                bma->logflags |= tmp_logflags;
2232                if (error)
2233                        goto done;
2234        }
2235
2236        /* adjust for changes in reserved delayed indirect blocks */
2237        if (da_old || da_new) {
2238                temp = da_new;
2239                if (bma->cur)
2240                        temp += bma->cur->bc_private.b.allocated;
2241                ASSERT(temp <= da_old);
2242                if (temp < da_old)
2243                        xfs_mod_fdblocks(bma->ip->i_mount,
2244                                        (int64_t)(da_old - temp), false);
2245        }
2246
2247        /* clear out the allocated field, done with it now in any case. */
2248        if (bma->cur)
2249                bma->cur->bc_private.b.allocated = 0;
2250
2251        xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
2252done:
2253        bma->logflags |= rval;
2254        return error;
2255#undef  LEFT
2256#undef  RIGHT
2257#undef  PREV
2258}
2259
2260/*
2261 * Convert an unwritten allocation to a real allocation or vice versa.
2262 */
2263STATIC int                              /* error */
2264xfs_bmap_add_extent_unwritten_real(
2265        struct xfs_trans        *tp,
2266        xfs_inode_t             *ip,    /* incore inode pointer */
2267        xfs_extnum_t            *idx,   /* extent number to update/insert */
2268        xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
2269        xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
2270        xfs_fsblock_t           *first, /* pointer to firstblock variable */
2271        xfs_bmap_free_t         *flist, /* list of extents to be freed */
2272        int                     *logflagsp) /* inode logging flags */
2273{
2274        xfs_btree_cur_t         *cur;   /* btree cursor */
2275        xfs_bmbt_rec_host_t     *ep;    /* extent entry for idx */
2276        int                     error;  /* error return value */
2277        int                     i;      /* temp state */
2278        xfs_ifork_t             *ifp;   /* inode fork pointer */
2279        xfs_fileoff_t           new_endoff;     /* end offset of new entry */
2280        xfs_exntst_t            newext; /* new extent state */
2281        xfs_exntst_t            oldext; /* old extent state */
2282        xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
2283                                        /* left is 0, right is 1, prev is 2 */
2284        int                     rval=0; /* return value (logging flags) */
2285        int                     state = 0;/* state bits, accessed thru macros */
2286        struct xfs_mount        *mp = tp->t_mountp;
2287
2288        *logflagsp = 0;
2289
2290        cur = *curp;
2291        ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
2292
2293        ASSERT(*idx >= 0);
2294        ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
2295        ASSERT(!isnullstartblock(new->br_startblock));
2296
2297        XFS_STATS_INC(mp, xs_add_exlist);
2298
2299#define LEFT            r[0]
2300#define RIGHT           r[1]
2301#define PREV            r[2]
2302
2303        /*
2304         * Set up a bunch of variables to make the tests simpler.
2305         */
2306        error = 0;
2307        ep = xfs_iext_get_ext(ifp, *idx);
2308        xfs_bmbt_get_all(ep, &PREV);
2309        newext = new->br_state;
2310        oldext = (newext == XFS_EXT_UNWRITTEN) ?
2311                XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
2312        ASSERT(PREV.br_state == oldext);
2313        new_endoff = new->br_startoff + new->br_blockcount;
2314        ASSERT(PREV.br_startoff <= new->br_startoff);
2315        ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
2316
2317        /*
2318         * Set flags determining what part of the previous oldext allocation
2319         * extent is being replaced by a newext allocation.
2320         */
2321        if (PREV.br_startoff == new->br_startoff)
2322                state |= BMAP_LEFT_FILLING;
2323        if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
2324                state |= BMAP_RIGHT_FILLING;
2325
2326        /*
2327         * Check and set flags if this segment has a left neighbor.
2328         * Don't set contiguous if the combined extent would be too large.
2329         */
2330        if (*idx > 0) {
2331                state |= BMAP_LEFT_VALID;
2332                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT);
2333
2334                if (isnullstartblock(LEFT.br_startblock))
2335                        state |= BMAP_LEFT_DELAY;
2336        }
2337
2338        if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2339            LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
2340            LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
2341            LEFT.br_state == newext &&
2342            LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2343                state |= BMAP_LEFT_CONTIG;
2344
2345        /*
2346         * Check and set flags if this segment has a right neighbor.
2347         * Don't set contiguous if the combined extent would be too large.
2348         * Also check for all-three-contiguous being too large.
2349         */
2350        if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
2351                state |= BMAP_RIGHT_VALID;
2352                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
2353                if (isnullstartblock(RIGHT.br_startblock))
2354                        state |= BMAP_RIGHT_DELAY;
2355        }
2356
2357        if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2358            new_endoff == RIGHT.br_startoff &&
2359            new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
2360            newext == RIGHT.br_state &&
2361            new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
2362            ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2363                       BMAP_RIGHT_FILLING)) !=
2364                      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2365                       BMAP_RIGHT_FILLING) ||
2366             LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
2367                        <= MAXEXTLEN))
2368                state |= BMAP_RIGHT_CONTIG;
2369
2370        /*
2371         * Switch out based on the FILLING and CONTIG state bits.
2372         */
2373        switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2374                         BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
2375        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2376             BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2377                /*
2378                 * Setting all of a previous oldext extent to newext.
2379                 * The left and right neighbors are both contiguous with new.
2380                 */
2381                --*idx;
2382
2383                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2384                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
2385                        LEFT.br_blockcount + PREV.br_blockcount +
2386                        RIGHT.br_blockcount);
2387                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2388
2389                xfs_iext_remove(ip, *idx + 1, 2, state);
2390                ip->i_d.di_nextents -= 2;
2391                if (cur == NULL)
2392                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2393                else {
2394                        rval = XFS_ILOG_CORE;
2395                        if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
2396                                        RIGHT.br_startblock,
2397                                        RIGHT.br_blockcount, &i)))
2398                                goto done;
2399                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2400                        if ((error = xfs_btree_delete(cur, &i)))
2401                                goto done;
2402                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2403                        if ((error = xfs_btree_decrement(cur, 0, &i)))
2404                                goto done;
2405                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2406                        if ((error = xfs_btree_delete(cur, &i)))
2407                                goto done;
2408                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2409                        if ((error = xfs_btree_decrement(cur, 0, &i)))
2410                                goto done;
2411                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2412                        if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
2413                                LEFT.br_startblock,
2414                                LEFT.br_blockcount + PREV.br_blockcount +
2415                                RIGHT.br_blockcount, LEFT.br_state)))
2416                                goto done;
2417                }
2418                break;
2419
2420        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2421                /*
2422                 * Setting all of a previous oldext extent to newext.
2423                 * The left neighbor is contiguous, the right is not.
2424                 */
2425                --*idx;
2426
2427                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2428                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
2429                        LEFT.br_blockcount + PREV.br_blockcount);
2430                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2431
2432                xfs_iext_remove(ip, *idx + 1, 1, state);
2433                ip->i_d.di_nextents--;
2434                if (cur == NULL)
2435                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2436                else {
2437                        rval = XFS_ILOG_CORE;
2438                        if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2439                                        PREV.br_startblock, PREV.br_blockcount,
2440                                        &i)))
2441                                goto done;
2442                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2443                        if ((error = xfs_btree_delete(cur, &i)))
2444                                goto done;
2445                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2446                        if ((error = xfs_btree_decrement(cur, 0, &i)))
2447                                goto done;
2448                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2449                        if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
2450                                LEFT.br_startblock,
2451                                LEFT.br_blockcount + PREV.br_blockcount,
2452                                LEFT.br_state)))
2453                                goto done;
2454                }
2455                break;
2456
2457        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2458                /*
2459                 * Setting all of a previous oldext extent to newext.
2460                 * The right neighbor is contiguous, the left is not.
2461                 */
2462                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2463                xfs_bmbt_set_blockcount(ep,
2464                        PREV.br_blockcount + RIGHT.br_blockcount);
2465                xfs_bmbt_set_state(ep, newext);
2466                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2467                xfs_iext_remove(ip, *idx + 1, 1, state);
2468                ip->i_d.di_nextents--;
2469                if (cur == NULL)
2470                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2471                else {
2472                        rval = XFS_ILOG_CORE;
2473                        if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
2474                                        RIGHT.br_startblock,
2475                                        RIGHT.br_blockcount, &i)))
2476                                goto done;
2477                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2478                        if ((error = xfs_btree_delete(cur, &i)))
2479                                goto done;
2480                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2481                        if ((error = xfs_btree_decrement(cur, 0, &i)))
2482                                goto done;
2483                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2484                        if ((error = xfs_bmbt_update(cur, new->br_startoff,
2485                                new->br_startblock,
2486                                new->br_blockcount + RIGHT.br_blockcount,
2487                                newext)))
2488                                goto done;
2489                }
2490                break;
2491
2492        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
2493                /*
2494                 * Setting all of a previous oldext extent to newext.
2495                 * Neither the left nor right neighbors are contiguous with
2496                 * the new one.
2497                 */
2498                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2499                xfs_bmbt_set_state(ep, newext);
2500                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2501
2502                if (cur == NULL)
2503                        rval = XFS_ILOG_DEXT;
2504                else {
2505                        rval = 0;
2506                        if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
2507                                        new->br_startblock, new->br_blockcount,
2508                                        &i)))
2509                                goto done;
2510                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2511                        if ((error = xfs_bmbt_update(cur, new->br_startoff,
2512                                new->br_startblock, new->br_blockcount,
2513                                newext)))
2514                                goto done;
2515                }
2516                break;
2517
2518        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
2519                /*
2520                 * Setting the first part of a previous oldext extent to newext.
2521                 * The left neighbor is contiguous.
2522                 */
2523                trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_);
2524                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1),
2525                        LEFT.br_blockcount + new->br_blockcount);
2526                xfs_bmbt_set_startoff(ep,
2527                        PREV.br_startoff + new->br_blockcount);
2528                trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_);
2529
2530                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2531                xfs_bmbt_set_startblock(ep,
2532                        new->br_startblock + new->br_blockcount);
2533                xfs_bmbt_set_blockcount(ep,
2534                        PREV.br_blockcount - new->br_blockcount);
2535                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2536
2537                --*idx;
2538
2539                if (cur == NULL)
2540                        rval = XFS_ILOG_DEXT;
2541                else {
2542                        rval = 0;
2543                        if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2544                                        PREV.br_startblock, PREV.br_blockcount,
2545                                        &i)))
2546                                goto done;
2547                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2548                        if ((error = xfs_bmbt_update(cur,
2549                                PREV.br_startoff + new->br_blockcount,
2550                                PREV.br_startblock + new->br_blockcount,
2551                                PREV.br_blockcount - new->br_blockcount,
2552                                oldext)))
2553                                goto done;
2554                        if ((error = xfs_btree_decrement(cur, 0, &i)))
2555                                goto done;
2556                        error = xfs_bmbt_update(cur, LEFT.br_startoff,
2557                                LEFT.br_startblock,
2558                                LEFT.br_blockcount + new->br_blockcount,
2559                                LEFT.br_state);
2560                        if (error)
2561                                goto done;
2562                }
2563                break;
2564
2565        case BMAP_LEFT_FILLING:
2566                /*
2567                 * Setting the first part of a previous oldext extent to newext.
2568                 * The left neighbor is not contiguous.
2569                 */
2570                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2571                ASSERT(ep && xfs_bmbt_get_state(ep) == oldext);
2572                xfs_bmbt_set_startoff(ep, new_endoff);
2573                xfs_bmbt_set_blockcount(ep,
2574                        PREV.br_blockcount - new->br_blockcount);
2575                xfs_bmbt_set_startblock(ep,
2576                        new->br_startblock + new->br_blockcount);
2577                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2578
2579                xfs_iext_insert(ip, *idx, 1, new, state);
2580                ip->i_d.di_nextents++;
2581                if (cur == NULL)
2582                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2583                else {
2584                        rval = XFS_ILOG_CORE;
2585                        if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2586                                        PREV.br_startblock, PREV.br_blockcount,
2587                                        &i)))
2588                                goto done;
2589                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2590                        if ((error = xfs_bmbt_update(cur,
2591                                PREV.br_startoff + new->br_blockcount,
2592                                PREV.br_startblock + new->br_blockcount,
2593                                PREV.br_blockcount - new->br_blockcount,
2594                                oldext)))
2595                                goto done;
2596                        cur->bc_rec.b = *new;
2597                        if ((error = xfs_btree_insert(cur, &i)))
2598                                goto done;
2599                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2600                }
2601                break;
2602
2603        case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2604                /*
2605                 * Setting the last part of a previous oldext extent to newext.
2606                 * The right neighbor is contiguous with the new allocation.
2607                 */
2608                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2609                xfs_bmbt_set_blockcount(ep,
2610                        PREV.br_blockcount - new->br_blockcount);
2611                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2612
2613                ++*idx;
2614
2615                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2616                xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
2617                        new->br_startoff, new->br_startblock,
2618                        new->br_blockcount + RIGHT.br_blockcount, newext);
2619                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2620
2621                if (cur == NULL)
2622                        rval = XFS_ILOG_DEXT;
2623                else {
2624                        rval = 0;
2625                        if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2626                                        PREV.br_startblock,
2627                                        PREV.br_blockcount, &i)))
2628                                goto done;
2629                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2630                        if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
2631                                PREV.br_startblock,
2632                                PREV.br_blockcount - new->br_blockcount,
2633                                oldext)))
2634                                goto done;
2635                        if ((error = xfs_btree_increment(cur, 0, &i)))
2636                                goto done;
2637                        if ((error = xfs_bmbt_update(cur, new->br_startoff,
2638                                new->br_startblock,
2639                                new->br_blockcount + RIGHT.br_blockcount,
2640                                newext)))
2641                                goto done;
2642                }
2643                break;
2644
2645        case BMAP_RIGHT_FILLING:
2646                /*
2647                 * Setting the last part of a previous oldext extent to newext.
2648                 * The right neighbor is not contiguous.
2649                 */
2650                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2651                xfs_bmbt_set_blockcount(ep,
2652                        PREV.br_blockcount - new->br_blockcount);
2653                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2654
2655                ++*idx;
2656                xfs_iext_insert(ip, *idx, 1, new, state);
2657
2658                ip->i_d.di_nextents++;
2659                if (cur == NULL)
2660                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2661                else {
2662                        rval = XFS_ILOG_CORE;
2663                        if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2664                                        PREV.br_startblock, PREV.br_blockcount,
2665                                        &i)))
2666                                goto done;
2667                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2668                        if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
2669                                PREV.br_startblock,
2670                                PREV.br_blockcount - new->br_blockcount,
2671                                oldext)))
2672                                goto done;
2673                        if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
2674                                        new->br_startblock, new->br_blockcount,
2675                                        &i)))
2676                                goto done;
2677                        XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2678                        cur->bc_rec.b.br_state = XFS_EXT_NORM;
2679                        if ((error = xfs_btree_insert(cur, &i)))
2680                                goto done;
2681                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2682                }
2683                break;
2684
2685        case 0:
2686                /*
2687                 * Setting the middle part of a previous oldext extent to
2688                 * newext.  Contiguity is impossible here.
2689                 * One extent becomes three extents.
2690                 */
2691                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2692                xfs_bmbt_set_blockcount(ep,
2693                        new->br_startoff - PREV.br_startoff);
2694                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2695
2696                r[0] = *new;
2697                r[1].br_startoff = new_endoff;
2698                r[1].br_blockcount =
2699                        PREV.br_startoff + PREV.br_blockcount - new_endoff;
2700                r[1].br_startblock = new->br_startblock + new->br_blockcount;
2701                r[1].br_state = oldext;
2702
2703                ++*idx;
2704                xfs_iext_insert(ip, *idx, 2, &r[0], state);
2705
2706                ip->i_d.di_nextents += 2;
2707                if (cur == NULL)
2708                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2709                else {
2710                        rval = XFS_ILOG_CORE;
2711                        if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2712                                        PREV.br_startblock, PREV.br_blockcount,
2713                                        &i)))
2714                                goto done;
2715                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2716                        /* new right extent - oldext */
2717                        if ((error = xfs_bmbt_update(cur, r[1].br_startoff,
2718                                r[1].br_startblock, r[1].br_blockcount,
2719                                r[1].br_state)))
2720                                goto done;
2721                        /* new left extent - oldext */
2722                        cur->bc_rec.b = PREV;
2723                        cur->bc_rec.b.br_blockcount =
2724                                new->br_startoff - PREV.br_startoff;
2725                        if ((error = xfs_btree_insert(cur, &i)))
2726                                goto done;
2727                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2728                        /*
2729                         * Reset the cursor to the position of the new extent
2730                         * we are about to insert as we can't trust it after
2731                         * the previous insert.
2732                         */
2733                        if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
2734                                        new->br_startblock, new->br_blockcount,
2735                                        &i)))
2736                                goto done;
2737                        XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2738                        /* new middle extent - newext */
2739                        cur->bc_rec.b.br_state = new->br_state;
2740                        if ((error = xfs_btree_insert(cur, &i)))
2741                                goto done;
2742                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2743                }
2744                break;
2745
2746        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2747        case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2748        case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2749        case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2750        case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2751        case BMAP_LEFT_CONTIG:
2752        case BMAP_RIGHT_CONTIG:
2753                /*
2754                 * These cases are all impossible.
2755                 */
2756                ASSERT(0);
2757        }
2758
2759        /* convert to a btree if necessary */
2760        if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) {
2761                int     tmp_logflags;   /* partial log flag return val */
2762
2763                ASSERT(cur == NULL);
2764                error = xfs_bmap_extents_to_btree(tp, ip, first, flist, &cur,
2765                                0, &tmp_logflags, XFS_DATA_FORK);
2766                *logflagsp |= tmp_logflags;
2767                if (error)
2768                        goto done;
2769        }
2770
2771        /* clear out the allocated field, done with it now in any case. */
2772        if (cur) {
2773                cur->bc_private.b.allocated = 0;
2774                *curp = cur;
2775        }
2776
2777        xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK);
2778done:
2779        *logflagsp |= rval;
2780        return error;
2781#undef  LEFT
2782#undef  RIGHT
2783#undef  PREV
2784}
2785
2786/*
2787 * Convert a hole to a delayed allocation.
2788 */
2789STATIC void
2790xfs_bmap_add_extent_hole_delay(
2791        xfs_inode_t             *ip,    /* incore inode pointer */
2792        xfs_extnum_t            *idx,   /* extent number to update/insert */
2793        xfs_bmbt_irec_t         *new)   /* new data to add to file extents */
2794{
2795        xfs_ifork_t             *ifp;   /* inode fork pointer */
2796        xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
2797        xfs_filblks_t           newlen=0;       /* new indirect size */
2798        xfs_filblks_t           oldlen=0;       /* old indirect size */
2799        xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
2800        int                     state;  /* state bits, accessed thru macros */
2801        xfs_filblks_t           temp=0; /* temp for indirect calculations */
2802
2803        ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
2804        state = 0;
2805        ASSERT(isnullstartblock(new->br_startblock));
2806
2807        /*
2808         * Check and set flags if this segment has a left neighbor
2809         */
2810        if (*idx > 0) {
2811                state |= BMAP_LEFT_VALID;
2812                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left);
2813
2814                if (isnullstartblock(left.br_startblock))
2815                        state |= BMAP_LEFT_DELAY;
2816        }
2817
2818        /*
2819         * Check and set flags if the current (right) segment exists.
2820         * If it doesn't exist, we're converting the hole at end-of-file.
2821         */
2822        if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
2823                state |= BMAP_RIGHT_VALID;
2824                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
2825
2826                if (isnullstartblock(right.br_startblock))
2827                        state |= BMAP_RIGHT_DELAY;
2828        }
2829
2830        /*
2831         * Set contiguity flags on the left and right neighbors.
2832         * Don't let extents get too large, even if the pieces are contiguous.
2833         */
2834        if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
2835            left.br_startoff + left.br_blockcount == new->br_startoff &&
2836            left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2837                state |= BMAP_LEFT_CONTIG;
2838
2839        if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
2840            new->br_startoff + new->br_blockcount == right.br_startoff &&
2841            new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2842            (!(state & BMAP_LEFT_CONTIG) ||
2843             (left.br_blockcount + new->br_blockcount +
2844              right.br_blockcount <= MAXEXTLEN)))
2845                state |= BMAP_RIGHT_CONTIG;
2846
2847        /*
2848         * Switch out based on the contiguity flags.
2849         */
2850        switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2851        case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2852                /*
2853                 * New allocation is contiguous with delayed allocations
2854                 * on the left and on the right.
2855                 * Merge all three into a single extent record.
2856                 */
2857                --*idx;
2858                temp = left.br_blockcount + new->br_blockcount +
2859                        right.br_blockcount;
2860
2861                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2862                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
2863                oldlen = startblockval(left.br_startblock) +
2864                        startblockval(new->br_startblock) +
2865                        startblockval(right.br_startblock);
2866                newlen = xfs_bmap_worst_indlen(ip, temp);
2867                xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
2868                        nullstartblock((int)newlen));
2869                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2870
2871                xfs_iext_remove(ip, *idx + 1, 1, state);
2872                break;
2873
2874        case BMAP_LEFT_CONTIG:
2875                /*
2876                 * New allocation is contiguous with a delayed allocation
2877                 * on the left.
2878                 * Merge the new allocation with the left neighbor.
2879                 */
2880                --*idx;
2881                temp = left.br_blockcount + new->br_blockcount;
2882
2883                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2884                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
2885                oldlen = startblockval(left.br_startblock) +
2886                        startblockval(new->br_startblock);
2887                newlen = xfs_bmap_worst_indlen(ip, temp);
2888                xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
2889                        nullstartblock((int)newlen));
2890                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2891                break;
2892
2893        case BMAP_RIGHT_CONTIG:
2894                /*
2895                 * New allocation is contiguous with a delayed allocation
2896                 * on the right.
2897                 * Merge the new allocation with the right neighbor.
2898                 */
2899                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2900                temp = new->br_blockcount + right.br_blockcount;
2901                oldlen = startblockval(new->br_startblock) +
2902                        startblockval(right.br_startblock);
2903                newlen = xfs_bmap_worst_indlen(ip, temp);
2904                xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
2905                        new->br_startoff,
2906                        nullstartblock((int)newlen), temp, right.br_state);
2907                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2908                break;
2909
2910        case 0:
2911                /*
2912                 * New allocation is not contiguous with another
2913                 * delayed allocation.
2914                 * Insert a new entry.
2915                 */
2916                oldlen = newlen = 0;
2917                xfs_iext_insert(ip, *idx, 1, new, state);
2918                break;
2919        }
2920        if (oldlen != newlen) {
2921                ASSERT(oldlen > newlen);
2922                xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
2923                                 false);
2924                /*
2925                 * Nothing to do for disk quota accounting here.
2926                 */
2927        }
2928}
2929
2930/*
2931 * Convert a hole to a real allocation.
2932 */
2933STATIC int                              /* error */
2934xfs_bmap_add_extent_hole_real(
2935        struct xfs_bmalloca     *bma,
2936        int                     whichfork)
2937{
2938        struct xfs_bmbt_irec    *new = &bma->got;
2939        int                     error;  /* error return value */
2940        int                     i;      /* temp state */
2941        xfs_ifork_t             *ifp;   /* inode fork pointer */
2942        xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
2943        xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
2944        int                     rval=0; /* return value (logging flags) */
2945        int                     state;  /* state bits, accessed thru macros */
2946        struct xfs_mount        *mp;
2947
2948        mp = bma->ip->i_mount;
2949        ifp = XFS_IFORK_PTR(bma->ip, whichfork);
2950
2951        ASSERT(bma->idx >= 0);
2952        ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
2953        ASSERT(!isnullstartblock(new->br_startblock));
2954        ASSERT(!bma->cur ||
2955               !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
2956
2957        XFS_STATS_INC(mp, xs_add_exlist);
2958
2959        state = 0;
2960        if (whichfork == XFS_ATTR_FORK)
2961                state |= BMAP_ATTRFORK;
2962
2963        /*
2964         * Check and set flags if this segment has a left neighbor.
2965         */
2966        if (bma->idx > 0) {
2967                state |= BMAP_LEFT_VALID;
2968                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &left);
2969                if (isnullstartblock(left.br_startblock))
2970                        state |= BMAP_LEFT_DELAY;
2971        }
2972
2973        /*
2974         * Check and set flags if this segment has a current value.
2975         * Not true if we're inserting into the "hole" at eof.
2976         */
2977        if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
2978                state |= BMAP_RIGHT_VALID;
2979                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right);
2980                if (isnullstartblock(right.br_startblock))
2981                        state |= BMAP_RIGHT_DELAY;
2982        }
2983
2984        /*
2985         * We're inserting a real allocation between "left" and "right".
2986         * Set the contiguity flags.  Don't let extents get too large.
2987         */
2988        if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2989            left.br_startoff + left.br_blockcount == new->br_startoff &&
2990            left.br_startblock + left.br_blockcount == new->br_startblock &&
2991            left.br_state == new->br_state &&
2992            left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2993                state |= BMAP_LEFT_CONTIG;
2994
2995        if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2996            new->br_startoff + new->br_blockcount == right.br_startoff &&
2997            new->br_startblock + new->br_blockcount == right.br_startblock &&
2998            new->br_state == right.br_state &&
2999            new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
3000            (!(state & BMAP_LEFT_CONTIG) ||
3001             left.br_blockcount + new->br_blockcount +
3002             right.br_blockcount <= MAXEXTLEN))
3003                state |= BMAP_RIGHT_CONTIG;
3004
3005        error = 0;
3006        /*
3007         * Select which case we're in here, and implement it.
3008         */
3009        switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
3010        case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
3011                /*
3012                 * New allocation is contiguous with real allocations on the
3013                 * left and on the right.
3014                 * Merge all three into a single extent record.
3015                 */
3016                --bma->idx;
3017                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
3018                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
3019                        left.br_blockcount + new->br_blockcount +
3020                        right.br_blockcount);
3021                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
3022
3023                xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
3024
3025                XFS_IFORK_NEXT_SET(bma->ip, whichfork,
3026                        XFS_IFORK_NEXTENTS(bma->ip, whichfork) - 1);
3027                if (bma->cur == NULL) {
3028                        rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
3029                } else {
3030                        rval = XFS_ILOG_CORE;
3031                        error = xfs_bmbt_lookup_eq(bma->cur, right.br_startoff,
3032                                        right.br_startblock, right.br_blockcount,
3033                                        &i);
3034                        if (error)
3035                                goto done;
3036                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3037                        error = xfs_btree_delete(bma->cur, &i);
3038                        if (error)
3039                                goto done;
3040                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3041                        error = xfs_btree_decrement(bma->cur, 0, &i);
3042                        if (error)
3043                                goto done;
3044                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3045                        error = xfs_bmbt_update(bma->cur, left.br_startoff,
3046                                        left.br_startblock,
3047                                        left.br_blockcount +
3048                                                new->br_blockcount +
3049                                                right.br_blockcount,
3050                                        left.br_state);
3051                        if (error)
3052                                goto done;
3053                }
3054                break;
3055
3056        case BMAP_LEFT_CONTIG:
3057                /*
3058                 * New allocation is contiguous with a real allocation
3059                 * on the left.
3060                 * Merge the new allocation with the left neighbor.
3061                 */
3062                --bma->idx;
3063                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
3064                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
3065                        left.br_blockcount + new->br_blockcount);
3066                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
3067
3068                if (bma->cur == NULL) {
3069                        rval = xfs_ilog_fext(whichfork);
3070                } else {
3071                        rval = 0;
3072                        error = xfs_bmbt_lookup_eq(bma->cur, left.br_startoff,
3073                                        left.br_startblock, left.br_blockcount,
3074                                        &i);
3075                        if (error)
3076                                goto done;
3077                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3078                        error = xfs_bmbt_update(bma->cur, left.br_startoff,
3079                                        left.br_startblock,
3080                                        left.br_blockcount +
3081                                                new->br_blockcount,
3082                                        left.br_state);
3083                        if (error)
3084                                goto done;
3085                }
3086                break;
3087
3088        case BMAP_RIGHT_CONTIG:
3089                /*
3090                 * New allocation is contiguous with a real allocation
3091                 * on the right.
3092                 * Merge the new allocation with the right neighbor.
3093                 */
3094                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
3095                xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx),
3096                        new->br_startoff, new->br_startblock,
3097                        new->br_blockcount + right.br_blockcount,
3098                        right.br_state);
3099                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
3100
3101                if (bma->cur == NULL) {
3102                        rval = xfs_ilog_fext(whichfork);
3103                } else {
3104                        rval = 0;
3105                        error = xfs_bmbt_lookup_eq(bma->cur,
3106                                        right.br_startoff,
3107                                        right.br_startblock,
3108                                        right.br_blockcount, &i);
3109                        if (error)
3110                                goto done;
3111                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3112                        error = xfs_bmbt_update(bma->cur, new->br_startoff,
3113                                        new->br_startblock,
3114                                        new->br_blockcount +
3115                                                right.br_blockcount,
3116                                        right.br_state);
3117                        if (error)
3118                                goto done;
3119                }
3120                break;
3121
3122        case 0:
3123                /*
3124                 * New allocation is not contiguous with another
3125                 * real allocation.
3126                 * Insert a new entry.
3127                 */
3128                xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
3129                XFS_IFORK_NEXT_SET(bma->ip, whichfork,
3130                        XFS_IFORK_NEXTENTS(bma->ip, whichfork) + 1);
3131                if (bma->cur == NULL) {
3132                        rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
3133                } else {
3134                        rval = XFS_ILOG_CORE;
3135                        error = xfs_bmbt_lookup_eq(bma->cur,
3136                                        new->br_startoff,
3137                                        new->br_startblock,
3138                                        new->br_blockcount, &i);
3139                        if (error)
3140                                goto done;
3141                        XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
3142                        bma->cur->bc_rec.b.br_state = new->br_state;
3143                        error = xfs_btree_insert(bma->cur, &i);
3144                        if (error)
3145                                goto done;
3146                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3147                }
3148                break;
3149        }
3150
3151        /* convert to a btree if necessary */
3152        if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
3153                int     tmp_logflags;   /* partial log flag return val */
3154
3155                ASSERT(bma->cur == NULL);
3156                error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
3157                                bma->firstblock, bma->flist, &bma->cur,
3158                                0, &tmp_logflags, whichfork);
3159                bma->logflags |= tmp_logflags;
3160                if (error)
3161                        goto done;
3162        }
3163
3164        /* clear out the allocated field, done with it now in any case. */
3165        if (bma->cur)
3166                bma->cur->bc_private.b.allocated = 0;
3167
3168        xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
3169done:
3170        bma->logflags |= rval;
3171        return error;
3172}
3173
3174/*
3175 * Functions used in the extent read, allocate and remove paths
3176 */
3177
3178/*
3179 * Adjust the size of the new extent based on di_extsize and rt extsize.
3180 */
3181int
3182xfs_bmap_extsize_align(
3183        xfs_mount_t     *mp,
3184        xfs_bmbt_irec_t *gotp,          /* next extent pointer */
3185        xfs_bmbt_irec_t *prevp,         /* previous extent pointer */
3186        xfs_extlen_t    extsz,          /* align to this extent size */
3187        int             rt,             /* is this a realtime inode? */
3188        int             eof,            /* is extent at end-of-file? */
3189        int             delay,          /* creating delalloc extent? */
3190        int             convert,        /* overwriting unwritten extent? */
3191        xfs_fileoff_t   *offp,          /* in/out: aligned offset */
3192        xfs_extlen_t    *lenp)          /* in/out: aligned length */
3193{
3194        xfs_fileoff_t   orig_off;       /* original offset */
3195        xfs_extlen_t    orig_alen;      /* original length */
3196        xfs_fileoff_t   orig_end;       /* original off+len */
3197        xfs_fileoff_t   nexto;          /* next file offset */
3198        xfs_fileoff_t   prevo;          /* previous file offset */
3199        xfs_fileoff_t   align_off;      /* temp for offset */
3200        xfs_extlen_t    align_alen;     /* temp for length */
3201        xfs_extlen_t    temp;           /* temp for calculations */
3202
3203        if (convert)
3204                return 0;
3205
3206        orig_off = align_off = *offp;
3207        orig_alen = align_alen = *lenp;
3208        orig_end = orig_off + orig_alen;
3209
3210        /*
3211         * If this request overlaps an existing extent, then don't
3212         * attempt to perform any additional alignment.
3213         */
3214        if (!delay && !eof &&
3215            (orig_off >= gotp->br_startoff) &&
3216            (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
3217                return 0;
3218        }
3219
3220        /*
3221         * If the file offset is unaligned vs. the extent size
3222         * we need to align it.  This will be possible unless
3223         * the file was previously written with a kernel that didn't
3224         * perform this alignment, or if a truncate shot us in the
3225         * foot.
3226         */
3227        temp = do_mod(orig_off, extsz);
3228        if (temp) {
3229                align_alen += temp;
3230                align_off -= temp;
3231        }
3232
3233        /* Same adjustment for the end of the requested area. */
3234        temp = (align_alen % extsz);
3235        if (temp)
3236                align_alen += extsz - temp;
3237
3238        /*
3239         * For large extent hint sizes, the aligned extent might be larger than
3240         * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls
3241         * the length back under MAXEXTLEN. The outer allocation loops handle
3242         * short allocation just fine, so it is safe to do this. We only want to
3243         * do it when we are forced to, though, because it means more allocation
3244         * operations are required.
3245         */
3246        while (align_alen > MAXEXTLEN)
3247                align_alen -= extsz;
3248        ASSERT(align_alen <= MAXEXTLEN);
3249
3250        /*
3251         * If the previous block overlaps with this proposed allocation
3252         * then move the start forward without adjusting the length.
3253         */
3254        if (prevp->br_startoff != NULLFILEOFF) {
3255                if (prevp->br_startblock == HOLESTARTBLOCK)
3256                        prevo = prevp->br_startoff;
3257                else
3258                        prevo = prevp->br_startoff + prevp->br_blockcount;
3259        } else
3260                prevo = 0;
3261        if (align_off != orig_off && align_off < prevo)
3262                align_off = prevo;
3263        /*
3264         * If the next block overlaps with this proposed allocation
3265         * then move the start back without adjusting the length,
3266         * but not before offset 0.
3267         * This may of course make the start overlap previous block,
3268         * and if we hit the offset 0 limit then the next block
3269         * can still overlap too.
3270         */
3271        if (!eof && gotp->br_startoff != NULLFILEOFF) {
3272                if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
3273                    (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
3274                        nexto = gotp->br_startoff + gotp->br_blockcount;
3275                else
3276                        nexto = gotp->br_startoff;
3277        } else
3278                nexto = NULLFILEOFF;
3279        if (!eof &&
3280            align_off + align_alen != orig_end &&
3281            align_off + align_alen > nexto)
3282                align_off = nexto > align_alen ? nexto - align_alen : 0;
3283        /*
3284         * If we're now overlapping the next or previous extent that
3285         * means we can't fit an extsz piece in this hole.  Just move
3286         * the start forward to the first valid spot and set
3287         * the length so we hit the end.
3288         */
3289        if (align_off != orig_off && align_off < prevo)
3290                align_off = prevo;
3291        if (align_off + align_alen != orig_end &&
3292            align_off + align_alen > nexto &&
3293            nexto != NULLFILEOFF) {
3294                ASSERT(nexto > prevo);
3295                align_alen = nexto - align_off;
3296        }
3297
3298        /*
3299         * If realtime, and the result isn't a multiple of the realtime
3300         * extent size we need to remove blocks until it is.
3301         */
3302        if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
3303                /*
3304                 * We're not covering the original request, or
3305                 * we won't be able to once we fix the length.
3306                 */
3307                if (orig_off < align_off ||
3308                    orig_end > align_off + align_alen ||
3309                    align_alen - temp < orig_alen)
3310                        return -EINVAL;
3311                /*
3312                 * Try to fix it by moving the start up.
3313                 */
3314                if (align_off + temp <= orig_off) {
3315                        align_alen -= temp;
3316                        align_off += temp;
3317                }
3318                /*
3319                 * Try to fix it by moving the end in.
3320                 */
3321                else if (align_off + align_alen - temp >= orig_end)
3322                        align_alen -= temp;
3323                /*
3324                 * Set the start to the minimum then trim the length.
3325                 */
3326                else {
3327                        align_alen -= orig_off - align_off;
3328                        align_off = orig_off;
3329                        align_alen -= align_alen % mp->m_sb.sb_rextsize;
3330                }
3331                /*
3332                 * Result doesn't cover the request, fail it.
3333                 */
3334                if (orig_off < align_off || orig_end > align_off + align_alen)
3335                        return -EINVAL;
3336        } else {
3337                ASSERT(orig_off >= align_off);
3338                /* see MAXEXTLEN handling above */
3339                ASSERT(orig_end <= align_off + align_alen ||
3340                       align_alen + extsz > MAXEXTLEN);
3341        }
3342
3343#ifdef DEBUG
3344        if (!eof && gotp->br_startoff != NULLFILEOFF)
3345                ASSERT(align_off + align_alen <= gotp->br_startoff);
3346        if (prevp->br_startoff != NULLFILEOFF)
3347                ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
3348#endif
3349
3350        *lenp = align_alen;
3351        *offp = align_off;
3352        return 0;
3353}
3354
3355#define XFS_ALLOC_GAP_UNITS     4
3356
3357void
3358xfs_bmap_adjacent(
3359        struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
3360{
3361        xfs_fsblock_t   adjust;         /* adjustment to block numbers */
3362        xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
3363        xfs_mount_t     *mp;            /* mount point structure */
3364        int             nullfb;         /* true if ap->firstblock isn't set */
3365        int             rt;             /* true if inode is realtime */
3366
3367#define ISVALID(x,y)    \
3368        (rt ? \
3369                (x) < mp->m_sb.sb_rblocks : \
3370                XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
3371                XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
3372                XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
3373
3374        mp = ap->ip->i_mount;
3375        nullfb = *ap->firstblock == NULLFSBLOCK;
3376        rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;
3377        fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
3378        /*
3379         * If allocating at eof, and there's a previous real block,
3380         * try to use its last block as our starting point.
3381         */
3382        if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
3383            !isnullstartblock(ap->prev.br_startblock) &&
3384            ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
3385                    ap->prev.br_startblock)) {
3386                ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
3387                /*
3388                 * Adjust for the gap between prevp and us.
3389                 */
3390                adjust = ap->offset -
3391                        (ap->prev.br_startoff + ap->prev.br_blockcount);
3392                if (adjust &&
3393                    ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
3394                        ap->blkno += adjust;
3395        }
3396        /*
3397         * If not at eof, then compare the two neighbor blocks.
3398         * Figure out whether either one gives us a good starting point,
3399         * and pick the better one.
3400         */
3401        else if (!ap->eof) {
3402                xfs_fsblock_t   gotbno;         /* right side block number */
3403                xfs_fsblock_t   gotdiff=0;      /* right side difference */
3404                xfs_fsblock_t   prevbno;        /* left side block number */
3405                xfs_fsblock_t   prevdiff=0;     /* left side difference */
3406
3407                /*
3408                 * If there's a previous (left) block, select a requested
3409                 * start block based on it.
3410                 */
3411                if (ap->prev.br_startoff != NULLFILEOFF &&
3412                    !isnullstartblock(ap->prev.br_startblock) &&
3413                    (prevbno = ap->prev.br_startblock +
3414                               ap->prev.br_blockcount) &&
3415                    ISVALID(prevbno, ap->prev.br_startblock)) {
3416                        /*
3417                         * Calculate gap to end of previous block.
3418                         */
3419                        adjust = prevdiff = ap->offset -
3420                                (ap->prev.br_startoff +
3421                                 ap->prev.br_blockcount);
3422                        /*
3423                         * Figure the startblock based on the previous block's
3424                         * end and the gap size.
3425                         * Heuristic!
3426                         * If the gap is large relative to the piece we're
3427                         * allocating, or using it gives us an invalid block
3428                         * number, then just use the end of the previous block.
3429                         */
3430                        if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3431                            ISVALID(prevbno + prevdiff,
3432                                    ap->prev.br_startblock))
3433                                prevbno += adjust;
3434                        else
3435                                prevdiff += adjust;
3436                        /*
3437                         * If the firstblock forbids it, can't use it,
3438                         * must use default.
3439                         */
3440                        if (!rt && !nullfb &&
3441                            XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
3442                                prevbno = NULLFSBLOCK;
3443                }
3444                /*
3445                 * No previous block or can't follow it, just default.
3446                 */
3447                else
3448                        prevbno = NULLFSBLOCK;
3449                /*
3450                 * If there's a following (right) block, select a requested
3451                 * start block based on it.
3452                 */
3453                if (!isnullstartblock(ap->got.br_startblock)) {
3454                        /*
3455                         * Calculate gap to start of next block.
3456                         */
3457                        adjust = gotdiff = ap->got.br_startoff - ap->offset;
3458                        /*
3459                         * Figure the startblock based on the next block's
3460                         * start and the gap size.
3461                         */
3462                        gotbno = ap->got.br_startblock;
3463                        /*
3464                         * Heuristic!
3465                         * If the gap is large relative to the piece we're
3466                         * allocating, or using it gives us an invalid block
3467                         * number, then just use the start of the next block
3468                         * offset by our length.
3469                         */
3470                        if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3471                            ISVALID(gotbno - gotdiff, gotbno))
3472                                gotbno -= adjust;
3473                        else if (ISVALID(gotbno - ap->length, gotbno)) {
3474                                gotbno -= ap->length;
3475                                gotdiff += adjust - ap->length;
3476                        } else
3477                                gotdiff += adjust;
3478                        /*
3479                         * If the firstblock forbids it, can't use it,
3480                         * must use default.
3481                         */
3482                        if (!rt && !nullfb &&
3483                            XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
3484                                gotbno = NULLFSBLOCK;
3485                }
3486                /*
3487                 * No next block, just default.
3488                 */
3489                else
3490                        gotbno = NULLFSBLOCK;
3491                /*
3492                 * If both valid, pick the better one, else the only good
3493                 * one, else ap->blkno is already set (to 0 or the inode block).
3494                 */
3495                if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
3496                        ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
3497                else if (prevbno != NULLFSBLOCK)
3498                        ap->blkno = prevbno;
3499                else if (gotbno != NULLFSBLOCK)
3500                        ap->blkno = gotbno;
3501        }
3502#undef ISVALID
3503}
3504
3505static int
3506xfs_bmap_longest_free_extent(
3507        struct xfs_trans        *tp,
3508        xfs_agnumber_t          ag,
3509        xfs_extlen_t            *blen,
3510        int                     *notinit)
3511{
3512        struct xfs_mount        *mp = tp->t_mountp;
3513        struct xfs_perag        *pag;
3514        xfs_extlen_t            longest;
3515        int                     error = 0;
3516
3517        pag = xfs_perag_get(mp, ag);
3518        if (!pag->pagf_init) {
3519                error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
3520                if (error)
3521                        goto out;
3522
3523                if (!pag->pagf_init) {
3524                        *notinit = 1;
3525                        goto out;
3526                }
3527        }
3528
3529        longest = xfs_alloc_longest_free_extent(mp, pag,
3530                                        xfs_alloc_min_freelist(mp, pag));
3531        if (*blen < longest)
3532                *blen = longest;
3533
3534out:
3535        xfs_perag_put(pag);
3536        return error;
3537}
3538
3539static void
3540xfs_bmap_select_minlen(
3541        struct xfs_bmalloca     *ap,
3542        struct xfs_alloc_arg    *args,
3543        xfs_extlen_t            *blen,
3544        int                     notinit)
3545{
3546        if (notinit || *blen < ap->minlen) {
3547                /*
3548                 * Since we did a BUF_TRYLOCK above, it is possible that
3549                 * there is space for this request.
3550                 */
3551                args->minlen = ap->minlen;
3552        } else if (*blen < args->maxlen) {
3553                /*
3554                 * If the best seen length is less than the request length,
3555                 * use the best as the minimum.
3556                 */
3557                args->minlen = *blen;
3558        } else {
3559                /*
3560                 * Otherwise we've seen an extent as big as maxlen, use that
3561                 * as the minimum.
3562                 */
3563                args->minlen = args->maxlen;
3564        }
3565}
3566
3567STATIC int
3568xfs_bmap_btalloc_nullfb(
3569        struct xfs_bmalloca     *ap,
3570        struct xfs_alloc_arg    *args,
3571        xfs_extlen_t            *blen)
3572{
3573        struct xfs_mount        *mp = ap->ip->i_mount;
3574        xfs_agnumber_t          ag, startag;
3575        int                     notinit = 0;
3576        int                     error;
3577
3578        args->type = XFS_ALLOCTYPE_START_BNO;
3579        args->total = ap->total;
3580
3581        startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3582        if (startag == NULLAGNUMBER)
3583                startag = ag = 0;
3584
3585        while (*blen < args->maxlen) {
3586                error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3587                                                     &notinit);
3588                if (error)
3589                        return error;
3590
3591                if (++ag == mp->m_sb.sb_agcount)
3592                        ag = 0;
3593                if (ag == startag)
3594                        break;
3595        }
3596
3597        xfs_bmap_select_minlen(ap, args, blen, notinit);
3598        return 0;
3599}
3600
3601STATIC int
3602xfs_bmap_btalloc_filestreams(
3603        struct xfs_bmalloca     *ap,
3604        struct xfs_alloc_arg    *args,
3605        xfs_extlen_t            *blen)
3606{
3607        struct xfs_mount        *mp = ap->ip->i_mount;
3608        xfs_agnumber_t          ag;
3609        int                     notinit = 0;
3610        int                     error;
3611
3612        args->type = XFS_ALLOCTYPE_NEAR_BNO;
3613        args->total = ap->total;
3614
3615        ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3616        if (ag == NULLAGNUMBER)
3617                ag = 0;
3618
3619        error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
3620        if (error)
3621                return error;
3622
3623        if (*blen < args->maxlen) {
3624                error = xfs_filestream_new_ag(ap, &ag);
3625                if (error)
3626                        return error;
3627
3628                error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3629                                                     &notinit);
3630                if (error)
3631                        return error;
3632
3633        }
3634
3635        xfs_bmap_select_minlen(ap, args, blen, notinit);
3636
3637        /*
3638         * Set the failure fallback case to look in the selected AG as stream
3639         * may have moved.
3640         */
3641        ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
3642        return 0;
3643}
3644
3645STATIC int
3646xfs_bmap_btalloc(
3647        struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
3648{
3649        xfs_mount_t     *mp;            /* mount point structure */
3650        xfs_alloctype_t atype = 0;      /* type for allocation routines */
3651        xfs_extlen_t    align;          /* minimum allocation alignment */
3652        xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
3653        xfs_agnumber_t  ag;
3654        xfs_alloc_arg_t args;
3655        xfs_extlen_t    blen;
3656        xfs_extlen_t    nextminlen = 0;
3657        int             nullfb;         /* true if ap->firstblock isn't set */
3658        int             isaligned;
3659        int             tryagain;
3660        int             error;
3661        int             stripe_align;
3662
3663        ASSERT(ap->length);
3664
3665        mp = ap->ip->i_mount;
3666
3667        /* stripe alignment for allocation is determined by mount parameters */
3668        stripe_align = 0;
3669        if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
3670                stripe_align = mp->m_swidth;
3671        else if (mp->m_dalign)
3672                stripe_align = mp->m_dalign;
3673
3674        align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
3675        if (unlikely(align)) {
3676                error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
3677                                                align, 0, ap->eof, 0, ap->conv,
3678                                                &ap->offset, &ap->length);
3679                ASSERT(!error);
3680                ASSERT(ap->length);
3681        }
3682
3683
3684        nullfb = *ap->firstblock == NULLFSBLOCK;
3685        fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
3686        if (nullfb) {
3687                if (ap->userdata && xfs_inode_is_filestream(ap->ip)) {
3688                        ag = xfs_filestream_lookup_ag(ap->ip);
3689                        ag = (ag != NULLAGNUMBER) ? ag : 0;
3690                        ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
3691                } else {
3692                        ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
3693                }
3694        } else
3695                ap->blkno = *ap->firstblock;
3696
3697        xfs_bmap_adjacent(ap);
3698
3699        /*
3700         * If allowed, use ap->blkno; otherwise must use firstblock since
3701         * it's in the right allocation group.
3702         */
3703        if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
3704                ;
3705        else
3706                ap->blkno = *ap->firstblock;
3707        /*
3708         * Normal allocation, done through xfs_alloc_vextent.
3709         */
3710        tryagain = isaligned = 0;
3711        memset(&args, 0, sizeof(args));
3712        args.tp = ap->tp;
3713        args.mp = mp;
3714        args.fsbno = ap->blkno;
3715
3716        /* Trim the allocation back to the maximum an AG can fit. */
3717        args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp));
3718        args.firstblock = *ap->firstblock;
3719        blen = 0;
3720        if (nullfb) {
3721                /*
3722                 * Search for an allocation group with a single extent large
3723                 * enough for the request.  If one isn't found, then adjust
3724                 * the minimum allocation size to the largest space found.
3725                 */
3726                if (ap->userdata && xfs_inode_is_filestream(ap->ip))
3727                        error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
3728                else
3729                        error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
3730                if (error)
3731                        return error;
3732        } else if (ap->flist->xbf_low) {
3733                if (xfs_inode_is_filestream(ap->ip))
3734                        args.type = XFS_ALLOCTYPE_FIRST_AG;
3735                else
3736                        args.type = XFS_ALLOCTYPE_START_BNO;
3737                args.total = args.minlen = ap->minlen;
3738        } else {
3739                args.type = XFS_ALLOCTYPE_NEAR_BNO;
3740                args.total = ap->total;
3741                args.minlen = ap->minlen;
3742        }
3743        /* apply extent size hints if obtained earlier */
3744        if (unlikely(align)) {
3745                args.prod = align;
3746                if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod)))
3747                        args.mod = (xfs_extlen_t)(args.prod - args.mod);
3748        } else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) {
3749                args.prod = 1;
3750                args.mod = 0;
3751        } else {
3752                args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog;
3753                if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod))))
3754                        args.mod = (xfs_extlen_t)(args.prod - args.mod);
3755        }
3756        /*
3757         * If we are not low on available data blocks, and the
3758         * underlying logical volume manager is a stripe, and
3759         * the file offset is zero then try to allocate data
3760         * blocks on stripe unit boundary.
3761         * NOTE: ap->aeof is only set if the allocation length
3762         * is >= the stripe unit and the allocation offset is
3763         * at the end of file.
3764         */
3765        if (!ap->flist->xbf_low && ap->aeof) {
3766                if (!ap->offset) {
3767                        args.alignment = stripe_align;
3768                        atype = args.type;
3769                        isaligned = 1;
3770                        /*
3771                         * Adjust for alignment
3772                         */
3773                        if (blen > args.alignment && blen <= args.maxlen)
3774                                args.minlen = blen - args.alignment;
3775                        args.minalignslop = 0;
3776                } else {
3777                        /*
3778                         * First try an exact bno allocation.
3779                         * If it fails then do a near or start bno
3780                         * allocation with alignment turned on.
3781                         */
3782                        atype = args.type;
3783                        tryagain = 1;
3784                        args.type = XFS_ALLOCTYPE_THIS_BNO;
3785                        args.alignment = 1;
3786                        /*
3787                         * Compute the minlen+alignment for the
3788                         * next case.  Set slop so that the value
3789                         * of minlen+alignment+slop doesn't go up
3790                         * between the calls.
3791                         */
3792                        if (blen > stripe_align && blen <= args.maxlen)
3793                                nextminlen = blen - stripe_align;
3794                        else
3795                                nextminlen = args.minlen;
3796                        if (nextminlen + stripe_align > args.minlen + 1)
3797                                args.minalignslop =
3798                                        nextminlen + stripe_align -
3799                                        args.minlen - 1;
3800                        else
3801                                args.minalignslop = 0;
3802                }
3803        } else {
3804                args.alignment = 1;
3805                args.minalignslop = 0;
3806        }
3807        args.minleft = ap->minleft;
3808        args.wasdel = ap->wasdel;
3809        args.isfl = 0;
3810        args.userdata = ap->userdata;
3811        if (ap->userdata & XFS_ALLOC_USERDATA_ZERO)
3812                args.ip = ap->ip;
3813
3814        error = xfs_alloc_vextent(&args);
3815        if (error)
3816                return error;
3817
3818        if (tryagain && args.fsbno == NULLFSBLOCK) {
3819                /*
3820                 * Exact allocation failed. Now try with alignment
3821                 * turned on.
3822                 */
3823                args.type = atype;
3824                args.fsbno = ap->blkno;
3825                args.alignment = stripe_align;
3826                args.minlen = nextminlen;
3827                args.minalignslop = 0;
3828                isaligned = 1;
3829                if ((error = xfs_alloc_vextent(&args)))
3830                        return error;
3831        }
3832        if (isaligned && args.fsbno == NULLFSBLOCK) {
3833                /*
3834                 * allocation failed, so turn off alignment and
3835                 * try again.
3836                 */
3837                args.type = atype;
3838                args.fsbno = ap->blkno;
3839                args.alignment = 0;
3840                if ((error = xfs_alloc_vextent(&args)))
3841                        return error;
3842        }
3843        if (args.fsbno == NULLFSBLOCK && nullfb &&
3844            args.minlen > ap->minlen) {
3845                args.minlen = ap->minlen;
3846                args.type = XFS_ALLOCTYPE_START_BNO;
3847                args.fsbno = ap->blkno;
3848                if ((error = xfs_alloc_vextent(&args)))
3849                        return error;
3850        }
3851        if (args.fsbno == NULLFSBLOCK && nullfb) {
3852                args.fsbno = 0;
3853                args.type = XFS_ALLOCTYPE_FIRST_AG;
3854                args.total = ap->minlen;
3855                args.minleft = 0;
3856                if ((error = xfs_alloc_vextent(&args)))
3857                        return error;
3858                ap->flist->xbf_low = 1;
3859        }
3860        if (args.fsbno != NULLFSBLOCK) {
3861                /*
3862                 * check the allocation happened at the same or higher AG than
3863                 * the first block that was allocated.
3864                 */
3865                ASSERT(*ap->firstblock == NULLFSBLOCK ||
3866                       XFS_FSB_TO_AGNO(mp, *ap->firstblock) ==
3867                       XFS_FSB_TO_AGNO(mp, args.fsbno) ||
3868                       (ap->flist->xbf_low &&
3869                        XFS_FSB_TO_AGNO(mp, *ap->firstblock) <
3870                        XFS_FSB_TO_AGNO(mp, args.fsbno)));
3871
3872                ap->blkno = args.fsbno;
3873                if (*ap->firstblock == NULLFSBLOCK)
3874                        *ap->firstblock = args.fsbno;
3875                ASSERT(nullfb || fb_agno == args.agno ||
3876                       (ap->flist->xbf_low && fb_agno < args.agno));
3877                ap->length = args.len;
3878                ap->ip->i_d.di_nblocks += args.len;
3879                xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3880                if (ap->wasdel)
3881                        ap->ip->i_delayed_blks -= args.len;
3882                /*
3883                 * Adjust the disk quota also. This was reserved
3884                 * earlier.
3885                 */
3886                xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
3887                        ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
3888                                        XFS_TRANS_DQ_BCOUNT,
3889                        (long) args.len);
3890        } else {
3891                ap->blkno = NULLFSBLOCK;
3892                ap->length = 0;
3893        }
3894        return 0;
3895}
3896
3897/*
3898 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
3899 * It figures out where to ask the underlying allocator to put the new extent.
3900 */
3901STATIC int
3902xfs_bmap_alloc(
3903        struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
3904{
3905        if (XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata)
3906                return xfs_bmap_rtalloc(ap);
3907        return xfs_bmap_btalloc(ap);
3908}
3909
3910/*
3911 * Trim the returned map to the required bounds
3912 */
3913STATIC void
3914xfs_bmapi_trim_map(
3915        struct xfs_bmbt_irec    *mval,
3916        struct xfs_bmbt_irec    *got,
3917        xfs_fileoff_t           *bno,
3918        xfs_filblks_t           len,
3919        xfs_fileoff_t           obno,
3920        xfs_fileoff_t           end,
3921        int                     n,
3922        int                     flags)
3923{
3924        if ((flags & XFS_BMAPI_ENTIRE) ||
3925            got->br_startoff + got->br_blockcount <= obno) {
3926                *mval = *got;
3927                if (isnullstartblock(got->br_startblock))
3928                        mval->br_startblock = DELAYSTARTBLOCK;
3929                return;
3930        }
3931
3932        if (obno > *bno)
3933                *bno = obno;
3934        ASSERT((*bno >= obno) || (n == 0));
3935        ASSERT(*bno < end);
3936        mval->br_startoff = *bno;
3937        if (isnullstartblock(got->br_startblock))
3938                mval->br_startblock = DELAYSTARTBLOCK;
3939        else
3940                mval->br_startblock = got->br_startblock +
3941                                        (*bno - got->br_startoff);
3942        /*
3943         * Return the minimum of what we got and what we asked for for
3944         * the length.  We can use the len variable here because it is
3945         * modified below and we could have been there before coming
3946         * here if the first part of the allocation didn't overlap what
3947         * was asked for.
3948         */
3949        mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
3950                        got->br_blockcount - (*bno - got->br_startoff));
3951        mval->br_state = got->br_state;
3952        ASSERT(mval->br_blockcount <= len);
3953        return;
3954}
3955
3956/*
3957 * Update and validate the extent map to return
3958 */
3959STATIC void
3960xfs_bmapi_update_map(
3961        struct xfs_bmbt_irec    **map,
3962        xfs_fileoff_t           *bno,
3963        xfs_filblks_t           *len,
3964        xfs_fileoff_t           obno,
3965        xfs_fileoff_t           end,
3966        int                     *n,
3967        int                     flags)
3968{
3969        xfs_bmbt_irec_t *mval = *map;
3970
3971        ASSERT((flags & XFS_BMAPI_ENTIRE) ||
3972               ((mval->br_startoff + mval->br_blockcount) <= end));
3973        ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
3974               (mval->br_startoff < obno));
3975
3976        *bno = mval->br_startoff + mval->br_blockcount;
3977        *len = end - *bno;
3978        if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
3979                /* update previous map with new information */
3980                ASSERT(mval->br_startblock == mval[-1].br_startblock);
3981                ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
3982                ASSERT(mval->br_state == mval[-1].br_state);
3983                mval[-1].br_blockcount = mval->br_blockcount;
3984                mval[-1].br_state = mval->br_state;
3985        } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
3986                   mval[-1].br_startblock != DELAYSTARTBLOCK &&
3987                   mval[-1].br_startblock != HOLESTARTBLOCK &&
3988                   mval->br_startblock == mval[-1].br_startblock +
3989                                          mval[-1].br_blockcount &&
3990                   ((flags & XFS_BMAPI_IGSTATE) ||
3991                        mval[-1].br_state == mval->br_state)) {
3992                ASSERT(mval->br_startoff ==
3993                       mval[-1].br_startoff + mval[-1].br_blockcount);
3994                mval[-1].br_blockcount += mval->br_blockcount;
3995        } else if (*n > 0 &&
3996                   mval->br_startblock == DELAYSTARTBLOCK &&
3997                   mval[-1].br_startblock == DELAYSTARTBLOCK &&
3998                   mval->br_startoff ==
3999                   mval[-1].br_startoff + mval[-1].br_blockcount) {
4000                mval[-1].br_blockcount += mval->br_blockcount;
4001                mval[-1].br_state = mval->br_state;
4002        } else if (!((*n == 0) &&
4003                     ((mval->br_startoff + mval->br_blockcount) <=
4004                      obno))) {
4005                mval++;
4006                (*n)++;
4007        }
4008        *map = mval;
4009}
4010
4011/*
4012 * Map file blocks to filesystem blocks without allocation.
4013 */
4014int
4015xfs_bmapi_read(
4016        struct xfs_inode        *ip,
4017        xfs_fileoff_t           bno,
4018        xfs_filblks_t           len,
4019        struct xfs_bmbt_irec    *mval,
4020        int                     *nmap,
4021        int                     flags)
4022{
4023        struct xfs_mount        *mp = ip->i_mount;
4024        struct xfs_ifork        *ifp;
4025        struct xfs_bmbt_irec    got;
4026        struct xfs_bmbt_irec    prev;
4027        xfs_fileoff_t           obno;
4028        xfs_fileoff_t           end;
4029        xfs_extnum_t            lastx;
4030        int                     error;
4031        int                     eof;
4032        int                     n = 0;
4033        int                     whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
4034                                                XFS_ATTR_FORK : XFS_DATA_FORK;
4035
4036        ASSERT(*nmap >= 1);
4037        ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE|
4038                           XFS_BMAPI_IGSTATE)));
4039        ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
4040
4041        if (unlikely(XFS_TEST_ERROR(
4042            (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
4043             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
4044             mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4045                XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp);
4046                return -EFSCORRUPTED;
4047        }
4048
4049        if (XFS_FORCED_SHUTDOWN(mp))
4050                return -EIO;
4051
4052        XFS_STATS_INC(mp, xs_blk_mapr);
4053
4054        ifp = XFS_IFORK_PTR(ip, whichfork);
4055
4056        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4057                error = xfs_iread_extents(NULL, ip, whichfork);
4058                if (error)
4059                        return error;
4060        }
4061
4062        xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev);
4063        end = bno + len;
4064        obno = bno;
4065
4066        while (bno < end && n < *nmap) {
4067                /* Reading past eof, act as though there's a hole up to end. */
4068                if (eof)
4069                        got.br_startoff = end;
4070                if (got.br_startoff > bno) {
4071                        /* Reading in a hole.  */
4072                        mval->br_startoff = bno;
4073                        mval->br_startblock = HOLESTARTBLOCK;
4074                        mval->br_blockcount =
4075                                XFS_FILBLKS_MIN(len, got.br_startoff - bno);
4076                        mval->br_state = XFS_EXT_NORM;
4077                        bno += mval->br_blockcount;
4078                        len -= mval->br_blockcount;
4079                        mval++;
4080                        n++;
4081                        continue;
4082                }
4083
4084                /* set up the extent map to return. */
4085                xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
4086                xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4087
4088                /* If we're done, stop now. */
4089                if (bno >= end || n >= *nmap)
4090                        break;
4091
4092                /* Else go on to the next record. */
4093                if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
4094                        xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
4095                else
4096                        eof = 1;
4097        }
4098        *nmap = n;
4099        return 0;
4100}
4101
4102STATIC int
4103xfs_bmapi_reserve_delalloc(
4104        struct xfs_inode        *ip,
4105        xfs_fileoff_t           aoff,
4106        xfs_filblks_t           len,
4107        struct xfs_bmbt_irec    *got,
4108        struct xfs_bmbt_irec    *prev,
4109        xfs_extnum_t            *lastx,
4110        int                     eof)
4111{
4112        struct xfs_mount        *mp = ip->i_mount;
4113        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
4114        xfs_extlen_t            alen;
4115        xfs_extlen_t            indlen;
4116        char                    rt = XFS_IS_REALTIME_INODE(ip);
4117        xfs_extlen_t            extsz;
4118        int                     error;
4119
4120        alen = XFS_FILBLKS_MIN(len, MAXEXTLEN);
4121        if (!eof)
4122                alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
4123
4124        /* Figure out the extent size, adjust alen */
4125        extsz = xfs_get_extsz_hint(ip);
4126        if (extsz) {
4127                error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
4128                                               1, 0, &aoff, &alen);
4129                ASSERT(!error);
4130        }
4131
4132        if (rt)
4133                extsz = alen / mp->m_sb.sb_rextsize;
4134
4135        /*
4136         * Make a transaction-less quota reservation for delayed allocation
4137         * blocks.  This number gets adjusted later.  We return if we haven't
4138         * allocated blocks already inside this loop.
4139         */
4140        error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0,
4141                        rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4142        if (error)
4143                return error;
4144
4145        /*
4146         * Split changing sb for alen and indlen since they could be coming
4147         * from different places.
4148         */
4149        indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
4150        ASSERT(indlen > 0);
4151
4152        if (rt) {
4153                error = xfs_mod_frextents(mp, -((int64_t)extsz));
4154        } else {
4155                error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
4156        }
4157
4158        if (error)
4159                goto out_unreserve_quota;
4160
4161        error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
4162        if (error)
4163                goto out_unreserve_blocks;
4164
4165
4166        ip->i_delayed_blks += alen;
4167
4168        got->br_startoff = aoff;
4169        got->br_startblock = nullstartblock(indlen);
4170        got->br_blockcount = alen;
4171        got->br_state = XFS_EXT_NORM;
4172        xfs_bmap_add_extent_hole_delay(ip, lastx, got);
4173
4174        /*
4175         * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
4176         * might have merged it into one of the neighbouring ones.
4177         */
4178        xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
4179
4180        ASSERT(got->br_startoff <= aoff);
4181        ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
4182        ASSERT(isnullstartblock(got->br_startblock));
4183        ASSERT(got->br_state == XFS_EXT_NORM);
4184        return 0;
4185
4186out_unreserve_blocks:
4187        if (rt)
4188                xfs_mod_frextents(mp, extsz);
4189        else
4190                xfs_mod_fdblocks(mp, alen, false);
4191out_unreserve_quota:
4192        if (XFS_IS_QUOTA_ON(mp))
4193                xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ?
4194                                XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4195        return error;
4196}
4197
4198/*
4199 * Map file blocks to filesystem blocks, adding delayed allocations as needed.
4200 */
4201int
4202xfs_bmapi_delay(
4203        struct xfs_inode        *ip,    /* incore inode */
4204        xfs_fileoff_t           bno,    /* starting file offs. mapped */
4205        xfs_filblks_t           len,    /* length to map in file */
4206        struct xfs_bmbt_irec    *mval,  /* output: map values */
4207        int                     *nmap,  /* i/o: mval size/count */
4208        int                     flags)  /* XFS_BMAPI_... */
4209{
4210        struct xfs_mount        *mp = ip->i_mount;
4211        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
4212        struct xfs_bmbt_irec    got;    /* current file extent record */
4213        struct xfs_bmbt_irec    prev;   /* previous file extent record */
4214        xfs_fileoff_t           obno;   /* old block number (offset) */
4215        xfs_fileoff_t           end;    /* end of mapped file region */
4216        xfs_extnum_t            lastx;  /* last useful extent number */
4217        int                     eof;    /* we've hit the end of extents */
4218        int                     n = 0;  /* current extent index */
4219        int                     error = 0;
4220
4221        ASSERT(*nmap >= 1);
4222        ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4223        ASSERT(!(flags & ~XFS_BMAPI_ENTIRE));
4224        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4225
4226        if (unlikely(XFS_TEST_ERROR(
4227            (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
4228             XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
4229             mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4230                XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp);
4231                return -EFSCORRUPTED;
4232        }
4233
4234        if (XFS_FORCED_SHUTDOWN(mp))
4235                return -EIO;
4236
4237        XFS_STATS_INC(mp, xs_blk_mapw);
4238
4239        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4240                error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
4241                if (error)
4242                        return error;
4243        }
4244
4245        xfs_bmap_search_extents(ip, bno, XFS_DATA_FORK, &eof, &lastx, &got, &prev);
4246        end = bno + len;
4247        obno = bno;
4248
4249        while (bno < end && n < *nmap) {
4250                if (eof || got.br_startoff > bno) {
4251                        error = xfs_bmapi_reserve_delalloc(ip, bno, len, &got,
4252                                                           &prev, &lastx, eof);
4253                        if (error) {
4254                                if (n == 0) {
4255                                        *nmap = 0;
4256                                        return error;
4257                                }
4258                                break;
4259                        }
4260                }
4261
4262                /* set up the extent map to return. */
4263                xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
4264                xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4265
4266                /* If we're done, stop now. */
4267                if (bno >= end || n >= *nmap)
4268                        break;
4269
4270                /* Else go on to the next record. */
4271                prev = got;
4272                if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
4273                        xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
4274                else
4275                        eof = 1;
4276        }
4277
4278        *nmap = n;
4279        return 0;
4280}
4281
4282
4283static int
4284xfs_bmapi_allocate(
4285        struct xfs_bmalloca     *bma)
4286{
4287        struct xfs_mount        *mp = bma->ip->i_mount;
4288        int                     whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ?
4289                                                XFS_ATTR_FORK : XFS_DATA_FORK;
4290        struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4291        int                     tmp_logflags = 0;
4292        int                     error;
4293
4294        ASSERT(bma->length > 0);
4295
4296        /*
4297         * For the wasdelay case, we could also just allocate the stuff asked
4298         * for in this bmap call but that wouldn't be as good.
4299         */
4300        if (bma->wasdel) {
4301                bma->length = (xfs_extlen_t)bma->got.br_blockcount;
4302                bma->offset = bma->got.br_startoff;
4303                if (bma->idx != NULLEXTNUM && bma->idx) {
4304                        xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1),
4305                                         &bma->prev);
4306                }
4307        } else {
4308                bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
4309                if (!bma->eof)
4310                        bma->length = XFS_FILBLKS_MIN(bma->length,
4311                                        bma->got.br_startoff - bma->offset);
4312        }
4313
4314        /*
4315         * Indicate if this is the first user data in the file, or just any
4316         * user data. And if it is userdata, indicate whether it needs to
4317         * be initialised to zero during allocation.
4318         */
4319        if (!(bma->flags & XFS_BMAPI_METADATA)) {
4320                bma->userdata = (bma->offset == 0) ?
4321                        XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
4322                if (bma->flags & XFS_BMAPI_ZERO)
4323                        bma->userdata |= XFS_ALLOC_USERDATA_ZERO;
4324        }
4325
4326        bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
4327
4328        /*
4329         * Only want to do the alignment at the eof if it is userdata and
4330         * allocation length is larger than a stripe unit.
4331         */
4332        if (mp->m_dalign && bma->length >= mp->m_dalign &&
4333            !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
4334                error = xfs_bmap_isaeof(bma, whichfork);
4335                if (error)
4336                        return error;
4337        }
4338
4339        error = xfs_bmap_alloc(bma);
4340        if (error)
4341                return error;
4342
4343        if (bma->flist->xbf_low)
4344                bma->minleft = 0;
4345        if (bma->cur)
4346                bma->cur->bc_private.b.firstblock = *bma->firstblock;
4347        if (bma->blkno == NULLFSBLOCK)
4348                return 0;
4349        if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4350                bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
4351                bma->cur->bc_private.b.firstblock = *bma->firstblock;
4352                bma->cur->bc_private.b.flist = bma->flist;
4353        }
4354        /*
4355         * Bump the number of extents we've allocated
4356         * in this call.
4357         */
4358        bma->nallocs++;
4359
4360        if (bma->cur)
4361                bma->cur->bc_private.b.flags =
4362                        bma->wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
4363
4364        bma->got.br_startoff = bma->offset;
4365        bma->got.br_startblock = bma->blkno;
4366        bma->got.br_blockcount = bma->length;
4367        bma->got.br_state = XFS_EXT_NORM;
4368
4369        /*
4370         * A wasdelay extent has been initialized, so shouldn't be flagged
4371         * as unwritten.
4372         */
4373        if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
4374            xfs_sb_version_hasextflgbit(&mp->m_sb))
4375                bma->got.br_state = XFS_EXT_UNWRITTEN;
4376
4377        if (bma->wasdel)
4378                error = xfs_bmap_add_extent_delay_real(bma);
4379        else
4380                error = xfs_bmap_add_extent_hole_real(bma, whichfork);
4381
4382        bma->logflags |= tmp_logflags;
4383        if (error)
4384                return error;
4385
4386        /*
4387         * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
4388         * or xfs_bmap_add_extent_hole_real might have merged it into one of
4389         * the neighbouring ones.
4390         */
4391        xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got);
4392
4393        ASSERT(bma->got.br_startoff <= bma->offset);
4394        ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
4395               bma->offset + bma->length);
4396        ASSERT(bma->got.br_state == XFS_EXT_NORM ||
4397               bma->got.br_state == XFS_EXT_UNWRITTEN);
4398        return 0;
4399}
4400
4401STATIC int
4402xfs_bmapi_convert_unwritten(
4403        struct xfs_bmalloca     *bma,
4404        struct xfs_bmbt_irec    *mval,
4405        xfs_filblks_t           len,
4406        int                     flags)
4407{
4408        int                     whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
4409                                                XFS_ATTR_FORK : XFS_DATA_FORK;
4410        struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4411        int                     tmp_logflags = 0;
4412        int                     error;
4413
4414        /* check if we need to do unwritten->real conversion */
4415        if (mval->br_state == XFS_EXT_UNWRITTEN &&
4416            (flags & XFS_BMAPI_PREALLOC))
4417                return 0;
4418
4419        /* check if we need to do real->unwritten conversion */
4420        if (mval->br_state == XFS_EXT_NORM &&
4421            (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
4422                        (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
4423                return 0;
4424
4425        /*
4426         * Modify (by adding) the state flag, if writing.
4427         */
4428        ASSERT(mval->br_blockcount <= len);
4429        if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4430                bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
4431                                        bma->ip, whichfork);
4432                bma->cur->bc_private.b.firstblock = *bma->firstblock;
4433                bma->cur->bc_private.b.flist = bma->flist;
4434        }
4435        mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4436                                ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
4437
4438        /*
4439         * Before insertion into the bmbt, zero the range being converted
4440         * if required.
4441         */
4442        if (flags & XFS_BMAPI_ZERO) {
4443                error = xfs_zero_extent(bma->ip, mval->br_startblock,
4444                                        mval->br_blockcount);
4445                if (error)
4446                        return error;
4447        }
4448
4449        error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
4450                        &bma->cur, mval, bma->firstblock, bma->flist,
4451                        &tmp_logflags);
4452        /*
4453         * Log the inode core unconditionally in the unwritten extent conversion
4454         * path because the conversion might not have done so (e.g., if the
4455         * extent count hasn't changed). We need to make sure the inode is dirty
4456         * in the transaction for the sake of fsync(), even if nothing has
4457         * changed, because fsync() will not force the log for this transaction
4458         * unless it sees the inode pinned.
4459         */
4460        bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
4461        if (error)
4462                return error;
4463
4464        /*
4465         * Update our extent pointer, given that
4466         * xfs_bmap_add_extent_unwritten_real might have merged it into one
4467         * of the neighbouring ones.
4468         */
4469        xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got);
4470
4471        /*
4472         * We may have combined previously unwritten space with written space,
4473         * so generate another request.
4474         */
4475        if (mval->br_blockcount < len)
4476                return -EAGAIN;
4477        return 0;
4478}
4479
4480/*
4481 * Map file blocks to filesystem blocks, and allocate blocks or convert the
4482 * extent state if necessary.  Details behaviour is controlled by the flags
4483 * parameter.  Only allocates blocks from a single allocation group, to avoid
4484 * locking problems.
4485 *
4486 * The returned value in "firstblock" from the first call in a transaction
4487 * must be remembered and presented to subsequent calls in "firstblock".
4488 * An upper bound for the number of blocks to be allocated is supplied to
4489 * the first call in "total"; if no allocation group has that many free
4490 * blocks then the call will fail (return NULLFSBLOCK in "firstblock").
4491 */
4492int
4493xfs_bmapi_write(
4494        struct xfs_trans        *tp,            /* transaction pointer */
4495        struct xfs_inode        *ip,            /* incore inode */
4496        xfs_fileoff_t           bno,            /* starting file offs. mapped */
4497        xfs_filblks_t           len,            /* length to map in file */
4498        int                     flags,          /* XFS_BMAPI_... */
4499        xfs_fsblock_t           *firstblock,    /* first allocated block
4500                                                   controls a.g. for allocs */
4501        xfs_extlen_t            total,          /* total blocks needed */
4502        struct xfs_bmbt_irec    *mval,          /* output: map values */
4503        int                     *nmap,          /* i/o: mval size/count */
4504        struct xfs_bmap_free    *flist)         /* i/o: list extents to free */
4505{
4506        struct xfs_mount        *mp = ip->i_mount;
4507        struct xfs_ifork        *ifp;
4508        struct xfs_bmalloca     bma = { NULL }; /* args for xfs_bmap_alloc */
4509        xfs_fileoff_t           end;            /* end of mapped file region */
4510        int                     eof;            /* after the end of extents */
4511        int                     error;          /* error return */
4512        int                     n;              /* current extent index */
4513        xfs_fileoff_t           obno;           /* old block number (offset) */
4514        int                     whichfork;      /* data or attr fork */
4515        char                    inhole;         /* current location is hole in file */
4516        char                    wasdelay;       /* old extent was delayed */
4517
4518#ifdef DEBUG
4519        xfs_fileoff_t           orig_bno;       /* original block number value */
4520        int                     orig_flags;     /* original flags arg value */
4521        xfs_filblks_t           orig_len;       /* original value of len arg */
4522        struct xfs_bmbt_irec    *orig_mval;     /* original value of mval */
4523        int                     orig_nmap;      /* original value of *nmap */
4524
4525        orig_bno = bno;
4526        orig_len = len;
4527        orig_flags = flags;
4528        orig_mval = mval;
4529        orig_nmap = *nmap;
4530#endif
4531        whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
4532                XFS_ATTR_FORK : XFS_DATA_FORK;
4533
4534        ASSERT(*nmap >= 1);
4535        ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4536        ASSERT(!(flags & XFS_BMAPI_IGSTATE));
4537        ASSERT(tp != NULL);
4538        ASSERT(len > 0);
4539        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
4540        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4541
4542        /* zeroing is for currently only for data extents, not metadata */
4543        ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
4544                        (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
4545        /*
4546         * we can allocate unwritten extents or pre-zero allocated blocks,
4547         * but it makes no sense to do both at once. This would result in
4548         * zeroing the unwritten extent twice, but it still being an
4549         * unwritten extent....
4550         */
4551        ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
4552                        (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
4553
4554        if (unlikely(XFS_TEST_ERROR(
4555            (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
4556             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
4557             mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4558                XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp);
4559                return -EFSCORRUPTED;
4560        }
4561
4562        if (XFS_FORCED_SHUTDOWN(mp))
4563                return -EIO;
4564
4565        ifp = XFS_IFORK_PTR(ip, whichfork);
4566
4567        XFS_STATS_INC(mp, xs_blk_mapw);
4568
4569        if (*firstblock == NULLFSBLOCK) {
4570                if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
4571                        bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1;
4572                else
4573                        bma.minleft = 1;
4574        } else {
4575                bma.minleft = 0;
4576        }
4577
4578        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4579                error = xfs_iread_extents(tp, ip, whichfork);
4580                if (error)
4581                        goto error0;
4582        }
4583
4584        xfs_bmap_search_extents(ip, bno, whichfork, &eof, &bma.idx, &bma.got,
4585                                &bma.prev);
4586        n = 0;
4587        end = bno + len;
4588        obno = bno;
4589
4590        bma.tp = tp;
4591        bma.ip = ip;
4592        bma.total = total;
4593        bma.userdata = 0;
4594        bma.flist = flist;
4595        bma.firstblock = firstblock;
4596
4597        while (bno < end && n < *nmap) {
4598                inhole = eof || bma.got.br_startoff > bno;
4599                wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
4600
4601                /*
4602                 * First, deal with the hole before the allocated space
4603                 * that we found, if any.
4604                 */
4605                if (inhole || wasdelay) {
4606                        bma.eof = eof;
4607                        bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4608                        bma.wasdel = wasdelay;
4609                        bma.offset = bno;
4610                        bma.flags = flags;
4611
4612                        /*
4613                         * There's a 32/64 bit type mismatch between the
4614                         * allocation length request (which can be 64 bits in
4615                         * length) and the bma length request, which is
4616                         * xfs_extlen_t and therefore 32 bits. Hence we have to
4617                         * check for 32-bit overflows and handle them here.
4618                         */
4619                        if (len > (xfs_filblks_t)MAXEXTLEN)
4620                                bma.length = MAXEXTLEN;
4621                        else
4622                                bma.length = len;
4623
4624                        ASSERT(len > 0);
4625                        ASSERT(bma.length > 0);
4626                        error = xfs_bmapi_allocate(&bma);
4627                        if (error)
4628                                goto error0;
4629                        if (bma.blkno == NULLFSBLOCK)
4630                                break;
4631                }
4632
4633                /* Deal with the allocated space we found.  */
4634                xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
4635                                                        end, n, flags);
4636
4637                /* Execute unwritten extent conversion if necessary */
4638                error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
4639                if (error == -EAGAIN)
4640                        continue;
4641                if (error)
4642                        goto error0;
4643
4644                /* update the extent map to return */
4645                xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4646
4647                /*
4648                 * If we're done, stop now.  Stop when we've allocated
4649                 * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
4650                 * the transaction may get too big.
4651                 */
4652                if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
4653                        break;
4654
4655                /* Else go on to the next record. */
4656                bma.prev = bma.got;
4657                if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) {
4658                        xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx),
4659                                         &bma.got);
4660                } else
4661                        eof = 1;
4662        }
4663        *nmap = n;
4664
4665        /*
4666         * Transform from btree to extents, give it cur.
4667         */
4668        if (xfs_bmap_wants_extents(ip, whichfork)) {
4669                int             tmp_logflags = 0;
4670
4671                ASSERT(bma.cur);
4672                error = xfs_bmap_btree_to_extents(tp, ip, bma.cur,
4673                        &tmp_logflags, whichfork);
4674                bma.logflags |= tmp_logflags;
4675                if (error)
4676                        goto error0;
4677        }
4678
4679        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
4680               XFS_IFORK_NEXTENTS(ip, whichfork) >
4681                XFS_IFORK_MAXEXT(ip, whichfork));
4682        error = 0;
4683error0:
4684        /*
4685         * Log everything.  Do this after conversion, there's no point in
4686         * logging the extent records if we've converted to btree format.
4687         */
4688        if ((bma.logflags & xfs_ilog_fext(whichfork)) &&
4689            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
4690                bma.logflags &= ~xfs_ilog_fext(whichfork);
4691        else if ((bma.logflags & xfs_ilog_fbroot(whichfork)) &&
4692                 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
4693                bma.logflags &= ~xfs_ilog_fbroot(whichfork);
4694        /*
4695         * Log whatever the flags say, even if error.  Otherwise we might miss
4696         * detecting a case where the data is changed, there's an error,
4697         * and it's not logged so we don't shutdown when we should.
4698         */
4699        if (bma.logflags)
4700                xfs_trans_log_inode(tp, ip, bma.logflags);
4701
4702        if (bma.cur) {
4703                if (!error) {
4704                        ASSERT(*firstblock == NULLFSBLOCK ||
4705                               XFS_FSB_TO_AGNO(mp, *firstblock) ==
4706                               XFS_FSB_TO_AGNO(mp,
4707                                       bma.cur->bc_private.b.firstblock) ||
4708                               (flist->xbf_low &&
4709                                XFS_FSB_TO_AGNO(mp, *firstblock) <
4710                                XFS_FSB_TO_AGNO(mp,
4711                                        bma.cur->bc_private.b.firstblock)));
4712                        *firstblock = bma.cur->bc_private.b.firstblock;
4713                }
4714                xfs_btree_del_cursor(bma.cur,
4715                        error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
4716        }
4717        if (!error)
4718                xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
4719                        orig_nmap, *nmap);
4720        return error;
4721}
4722
4723/*
4724 * Called by xfs_bmapi to update file extent records and the btree
4725 * after removing space (or undoing a delayed allocation).
4726 */
4727STATIC int                              /* error */
4728xfs_bmap_del_extent(
4729        xfs_inode_t             *ip,    /* incore inode pointer */
4730        xfs_trans_t             *tp,    /* current transaction pointer */
4731        xfs_extnum_t            *idx,   /* extent number to update/delete */
4732        xfs_bmap_free_t         *flist, /* list of extents to be freed */
4733        xfs_btree_cur_t         *cur,   /* if null, not a btree */
4734        xfs_bmbt_irec_t         *del,   /* data to remove from extents */
4735        int                     *logflagsp, /* inode logging flags */
4736        int                     whichfork) /* data or attr fork */
4737{
4738        xfs_filblks_t           da_new; /* new delay-alloc indirect blocks */
4739        xfs_filblks_t           da_old; /* old delay-alloc indirect blocks */
4740        xfs_fsblock_t           del_endblock=0; /* first block past del */
4741        xfs_fileoff_t           del_endoff;     /* first offset past del */
4742        int                     delay;  /* current block is delayed allocated */
4743        int                     do_fx;  /* free extent at end of routine */
4744        xfs_bmbt_rec_host_t     *ep;    /* current extent entry pointer */
4745        int                     error;  /* error return value */
4746        int                     flags;  /* inode logging flags */
4747        xfs_bmbt_irec_t         got;    /* current extent entry */
4748        xfs_fileoff_t           got_endoff;     /* first offset past got */
4749        int                     i;      /* temp state */
4750        xfs_ifork_t             *ifp;   /* inode fork pointer */
4751        xfs_mount_t             *mp;    /* mount structure */
4752        xfs_filblks_t           nblks;  /* quota/sb block count */
4753        xfs_bmbt_irec_t         new;    /* new record to be inserted */
4754        /* REFERENCED */
4755        uint                    qfield; /* quota field to update */
4756        xfs_filblks_t           temp;   /* for indirect length calculations */
4757        xfs_filblks_t           temp2;  /* for indirect length calculations */
4758        int                     state = 0;
4759
4760        mp = ip->i_mount;
4761        XFS_STATS_INC(mp, xs_del_exlist);
4762
4763        if (whichfork == XFS_ATTR_FORK)
4764                state |= BMAP_ATTRFORK;
4765
4766        ifp = XFS_IFORK_PTR(ip, whichfork);
4767        ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
4768                (uint)sizeof(xfs_bmbt_rec_t)));
4769        ASSERT(del->br_blockcount > 0);
4770        ep = xfs_iext_get_ext(ifp, *idx);
4771        xfs_bmbt_get_all(ep, &got);
4772        ASSERT(got.br_startoff <= del->br_startoff);
4773        del_endoff = del->br_startoff + del->br_blockcount;
4774        got_endoff = got.br_startoff + got.br_blockcount;
4775        ASSERT(got_endoff >= del_endoff);
4776        delay = isnullstartblock(got.br_startblock);
4777        ASSERT(isnullstartblock(del->br_startblock) == delay);
4778        flags = 0;
4779        qfield = 0;
4780        error = 0;
4781        /*
4782         * If deleting a real allocation, must free up the disk space.
4783         */
4784        if (!delay) {
4785                flags = XFS_ILOG_CORE;
4786                /*
4787                 * Realtime allocation.  Free it and record di_nblocks update.
4788                 */
4789                if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
4790                        xfs_fsblock_t   bno;
4791                        xfs_filblks_t   len;
4792
4793                        ASSERT(do_mod(del->br_blockcount,
4794                                      mp->m_sb.sb_rextsize) == 0);
4795                        ASSERT(do_mod(del->br_startblock,
4796                                      mp->m_sb.sb_rextsize) == 0);
4797                        bno = del->br_startblock;
4798                        len = del->br_blockcount;
4799                        do_div(bno, mp->m_sb.sb_rextsize);
4800                        do_div(len, mp->m_sb.sb_rextsize);
4801                        error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
4802                        if (error)
4803                                goto done;
4804                        do_fx = 0;
4805                        nblks = len * mp->m_sb.sb_rextsize;
4806                        qfield = XFS_TRANS_DQ_RTBCOUNT;
4807                }
4808                /*
4809                 * Ordinary allocation.
4810                 */
4811                else {
4812                        do_fx = 1;
4813                        nblks = del->br_blockcount;
4814                        qfield = XFS_TRANS_DQ_BCOUNT;
4815                }
4816                /*
4817                 * Set up del_endblock and cur for later.
4818                 */
4819                del_endblock = del->br_startblock + del->br_blockcount;
4820                if (cur) {
4821                        if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
4822                                        got.br_startblock, got.br_blockcount,
4823                                        &i)))
4824                                goto done;
4825                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4826                }
4827                da_old = da_new = 0;
4828        } else {
4829                da_old = startblockval(got.br_startblock);
4830                da_new = 0;
4831                nblks = 0;
4832                do_fx = 0;
4833        }
4834        /*
4835         * Set flag value to use in switch statement.
4836         * Left-contig is 2, right-contig is 1.
4837         */
4838        switch (((got.br_startoff == del->br_startoff) << 1) |
4839                (got_endoff == del_endoff)) {
4840        case 3:
4841                /*
4842                 * Matches the whole extent.  Delete the entry.
4843                 */
4844                xfs_iext_remove(ip, *idx, 1,
4845                                whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
4846                --*idx;
4847                if (delay)
4848                        break;
4849
4850                XFS_IFORK_NEXT_SET(ip, whichfork,
4851                        XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
4852                flags |= XFS_ILOG_CORE;
4853                if (!cur) {
4854                        flags |= xfs_ilog_fext(whichfork);
4855                        break;
4856                }
4857                if ((error = xfs_btree_delete(cur, &i)))
4858                        goto done;
4859                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4860                break;
4861
4862        case 2:
4863                /*
4864                 * Deleting the first part of the extent.
4865                 */
4866                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4867                xfs_bmbt_set_startoff(ep, del_endoff);
4868                temp = got.br_blockcount - del->br_blockcount;
4869                xfs_bmbt_set_blockcount(ep, temp);
4870                if (delay) {
4871                        temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
4872                                da_old);
4873                        xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
4874                        trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4875                        da_new = temp;
4876                        break;
4877                }
4878                xfs_bmbt_set_startblock(ep, del_endblock);
4879                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4880                if (!cur) {
4881                        flags |= xfs_ilog_fext(whichfork);
4882                        break;
4883                }
4884                if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock,
4885                                got.br_blockcount - del->br_blockcount,
4886                                got.br_state)))
4887                        goto done;
4888                break;
4889
4890        case 1:
4891                /*
4892                 * Deleting the last part of the extent.
4893                 */
4894                temp = got.br_blockcount - del->br_blockcount;
4895                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4896                xfs_bmbt_set_blockcount(ep, temp);
4897                if (delay) {
4898                        temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
4899                                da_old);
4900                        xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
4901                        trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4902                        da_new = temp;
4903                        break;
4904                }
4905                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4906                if (!cur) {
4907                        flags |= xfs_ilog_fext(whichfork);
4908                        break;
4909                }
4910                if ((error = xfs_bmbt_update(cur, got.br_startoff,
4911                                got.br_startblock,
4912                                got.br_blockcount - del->br_blockcount,
4913                                got.br_state)))
4914                        goto done;
4915                break;
4916
4917        case 0:
4918                /*
4919                 * Deleting the middle of the extent.
4920                 */
4921                temp = del->br_startoff - got.br_startoff;
4922                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4923                xfs_bmbt_set_blockcount(ep, temp);
4924                new.br_startoff = del_endoff;
4925                temp2 = got_endoff - del_endoff;
4926                new.br_blockcount = temp2;
4927                new.br_state = got.br_state;
4928                if (!delay) {
4929                        new.br_startblock = del_endblock;
4930                        flags |= XFS_ILOG_CORE;
4931                        if (cur) {
4932                                if ((error = xfs_bmbt_update(cur,
4933                                                got.br_startoff,
4934                                                got.br_startblock, temp,
4935                                                got.br_state)))
4936                                        goto done;
4937                                if ((error = xfs_btree_increment(cur, 0, &i)))
4938                                        goto done;
4939                                cur->bc_rec.b = new;
4940                                error = xfs_btree_insert(cur, &i);
4941                                if (error && error != -ENOSPC)
4942                                        goto done;
4943                                /*
4944                                 * If get no-space back from btree insert,
4945                                 * it tried a split, and we have a zero
4946                                 * block reservation.
4947                                 * Fix up our state and return the error.
4948                                 */
4949                                if (error == -ENOSPC) {
4950                                        /*
4951                                         * Reset the cursor, don't trust
4952                                         * it after any insert operation.
4953                                         */
4954                                        if ((error = xfs_bmbt_lookup_eq(cur,
4955                                                        got.br_startoff,
4956                                                        got.br_startblock,
4957                                                        temp, &i)))
4958                                                goto done;
4959                                        XFS_WANT_CORRUPTED_GOTO(mp,
4960                                                                i == 1, done);
4961                                        /*
4962                                         * Update the btree record back
4963                                         * to the original value.
4964                                         */
4965                                        if ((error = xfs_bmbt_update(cur,
4966                                                        got.br_startoff,
4967                                                        got.br_startblock,
4968                                                        got.br_blockcount,
4969                                                        got.br_state)))
4970                                                goto done;
4971                                        /*
4972                                         * Reset the extent record back
4973                                         * to the original value.
4974                                         */
4975                                        xfs_bmbt_set_blockcount(ep,
4976                                                got.br_blockcount);
4977                                        flags = 0;
4978                                        error = -ENOSPC;
4979                                        goto done;
4980                                }
4981                                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4982                        } else
4983                                flags |= xfs_ilog_fext(whichfork);
4984                        XFS_IFORK_NEXT_SET(ip, whichfork,
4985                                XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
4986                } else {
4987                        ASSERT(whichfork == XFS_DATA_FORK);
4988                        temp = xfs_bmap_worst_indlen(ip, temp);
4989                        xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
4990                        temp2 = xfs_bmap_worst_indlen(ip, temp2);
4991                        new.br_startblock = nullstartblock((int)temp2);
4992                        da_new = temp + temp2;
4993                        while (da_new > da_old) {
4994                                if (temp) {
4995                                        temp--;
4996                                        da_new--;
4997                                        xfs_bmbt_set_startblock(ep,
4998                                                nullstartblock((int)temp));
4999                                }
5000                                if (da_new == da_old)
5001                                        break;
5002                                if (temp2) {
5003                                        temp2--;
5004                                        da_new--;
5005                                        new.br_startblock =
5006                                                nullstartblock((int)temp2);
5007                                }
5008                        }
5009                }
5010                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5011                xfs_iext_insert(ip, *idx + 1, 1, &new, state);
5012                ++*idx;
5013                break;
5014        }
5015        /*
5016         * If we need to, add to list of extents to delete.
5017         */
5018        if (do_fx)
5019                xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist,
5020                        mp);
5021        /*
5022         * Adjust inode # blocks in the file.
5023         */
5024        if (nblks)
5025                ip->i_d.di_nblocks -= nblks;
5026        /*
5027         * Adjust quota data.
5028         */
5029        if (qfield)
5030                xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
5031
5032        /*
5033         * Account for change in delayed indirect blocks.
5034         * Nothing to do for disk quota accounting here.
5035         */
5036        ASSERT(da_old >= da_new);
5037        if (da_old > da_new)
5038                xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
5039done:
5040        *logflagsp = flags;
5041        return error;
5042}
5043
5044/*
5045 * Unmap (remove) blocks from a file.
5046 * If nexts is nonzero then the number of extents to remove is limited to
5047 * that value.  If not all extents in the block range can be removed then
5048 * *done is set.
5049 */
5050int                                             /* error */
5051xfs_bunmapi(
5052        xfs_trans_t             *tp,            /* transaction pointer */
5053        struct xfs_inode        *ip,            /* incore inode */
5054        xfs_fileoff_t           bno,            /* starting offset to unmap */
5055        xfs_filblks_t           len,            /* length to unmap in file */
5056        int                     flags,          /* misc flags */
5057        xfs_extnum_t            nexts,          /* number of extents max */
5058        xfs_fsblock_t           *firstblock,    /* first allocated block
5059                                                   controls a.g. for allocs */
5060        xfs_bmap_free_t         *flist,         /* i/o: list extents to free */
5061        int                     *done)          /* set if not done yet */
5062{
5063        xfs_btree_cur_t         *cur;           /* bmap btree cursor */
5064        xfs_bmbt_irec_t         del;            /* extent being deleted */
5065        int                     eof;            /* is deleting at eof */
5066        xfs_bmbt_rec_host_t     *ep;            /* extent record pointer */
5067        int                     error;          /* error return value */
5068        xfs_extnum_t            extno;          /* extent number in list */
5069        xfs_bmbt_irec_t         got;            /* current extent record */
5070        xfs_ifork_t             *ifp;           /* inode fork pointer */
5071        int                     isrt;           /* freeing in rt area */
5072        xfs_extnum_t            lastx;          /* last extent index used */
5073        int                     logflags;       /* transaction logging flags */
5074        xfs_extlen_t            mod;            /* rt extent offset */
5075        xfs_mount_t             *mp;            /* mount structure */
5076        xfs_extnum_t            nextents;       /* number of file extents */
5077        xfs_bmbt_irec_t         prev;           /* previous extent record */
5078        xfs_fileoff_t           start;          /* first file offset deleted */
5079        int                     tmp_logflags;   /* partial logging flags */
5080        int                     wasdel;         /* was a delayed alloc extent */
5081        int                     whichfork;      /* data or attribute fork */
5082        xfs_fsblock_t           sum;
5083
5084        trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);
5085
5086        whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
5087                XFS_ATTR_FORK : XFS_DATA_FORK;
5088        ifp = XFS_IFORK_PTR(ip, whichfork);
5089        if (unlikely(
5090            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5091            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
5092                XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW,
5093                                 ip->i_mount);
5094                return -EFSCORRUPTED;
5095        }
5096        mp = ip->i_mount;
5097        if (XFS_FORCED_SHUTDOWN(mp))
5098                return -EIO;
5099
5100        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5101        ASSERT(len > 0);
5102        ASSERT(nexts >= 0);
5103
5104        if (!(ifp->if_flags & XFS_IFEXTENTS) &&
5105            (error = xfs_iread_extents(tp, ip, whichfork)))
5106                return error;
5107        nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
5108        if (nextents == 0) {
5109                *done = 1;
5110                return 0;
5111        }
5112        XFS_STATS_INC(mp, xs_blk_unmap);
5113        isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
5114        start = bno;
5115        bno = start + len - 1;
5116        ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
5117                &prev);
5118
5119        /*
5120         * Check to see if the given block number is past the end of the
5121         * file, back up to the last block if so...
5122         */
5123        if (eof) {
5124                ep = xfs_iext_get_ext(ifp, --lastx);
5125                xfs_bmbt_get_all(ep, &got);
5126                bno = got.br_startoff + got.br_blockcount - 1;
5127        }
5128        logflags = 0;
5129        if (ifp->if_flags & XFS_IFBROOT) {
5130                ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
5131                cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5132                cur->bc_private.b.firstblock = *firstblock;
5133                cur->bc_private.b.flist = flist;
5134                cur->bc_private.b.flags = 0;
5135        } else
5136                cur = NULL;
5137
5138        if (isrt) {
5139                /*
5140                 * Synchronize by locking the bitmap inode.
5141                 */
5142                xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
5143                xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
5144        }
5145
5146        extno = 0;
5147        while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 &&
5148               (nexts == 0 || extno < nexts)) {
5149                /*
5150                 * Is the found extent after a hole in which bno lives?
5151                 * Just back up to the previous extent, if so.
5152                 */
5153                if (got.br_startoff > bno) {
5154                        if (--lastx < 0)
5155                                break;
5156                        ep = xfs_iext_get_ext(ifp, lastx);
5157                        xfs_bmbt_get_all(ep, &got);
5158                }
5159                /*
5160                 * Is the last block of this extent before the range
5161                 * we're supposed to delete?  If so, we're done.
5162                 */
5163                bno = XFS_FILEOFF_MIN(bno,
5164                        got.br_startoff + got.br_blockcount - 1);
5165                if (bno < start)
5166                        break;
5167                /*
5168                 * Then deal with the (possibly delayed) allocated space
5169                 * we found.
5170                 */
5171                ASSERT(ep != NULL);
5172                del = got;
5173                wasdel = isnullstartblock(del.br_startblock);
5174                if (got.br_startoff < start) {
5175                        del.br_startoff = start;
5176                        del.br_blockcount -= start - got.br_startoff;
5177                        if (!wasdel)
5178                                del.br_startblock += start - got.br_startoff;
5179                }
5180                if (del.br_startoff + del.br_blockcount > bno + 1)
5181                        del.br_blockcount = bno + 1 - del.br_startoff;
5182                sum = del.br_startblock + del.br_blockcount;
5183                if (isrt &&
5184                    (mod = do_mod(sum, mp->m_sb.sb_rextsize))) {
5185                        /*
5186                         * Realtime extent not lined up at the end.
5187                         * The extent could have been split into written
5188                         * and unwritten pieces, or we could just be
5189                         * unmapping part of it.  But we can't really
5190                         * get rid of part of a realtime extent.
5191                         */
5192                        if (del.br_state == XFS_EXT_UNWRITTEN ||
5193                            !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
5194                                /*
5195                                 * This piece is unwritten, or we're not
5196                                 * using unwritten extents.  Skip over it.
5197                                 */
5198                                ASSERT(bno >= mod);
5199                                bno -= mod > del.br_blockcount ?
5200                                        del.br_blockcount : mod;
5201                                if (bno < got.br_startoff) {
5202                                        if (--lastx >= 0)
5203                                                xfs_bmbt_get_all(xfs_iext_get_ext(
5204                                                        ifp, lastx), &got);
5205                                }
5206                                continue;
5207                        }
5208                        /*
5209                         * It's written, turn it unwritten.
5210                         * This is better than zeroing it.
5211                         */
5212                        ASSERT(del.br_state == XFS_EXT_NORM);
5213                        ASSERT(xfs_trans_get_block_res(tp) > 0);
5214                        /*
5215                         * If this spans a realtime extent boundary,
5216                         * chop it back to the start of the one we end at.
5217                         */
5218                        if (del.br_blockcount > mod) {
5219                                del.br_startoff += del.br_blockcount - mod;
5220                                del.br_startblock += del.br_blockcount - mod;
5221                                del.br_blockcount = mod;
5222                        }
5223                        del.br_state = XFS_EXT_UNWRITTEN;
5224                        error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5225                                        &lastx, &cur, &del, firstblock, flist,
5226                                        &logflags);
5227                        if (error)
5228                                goto error0;
5229                        goto nodelete;
5230                }
5231                if (isrt && (mod = do_mod(del.br_startblock, mp->m_sb.sb_rextsize))) {
5232                        /*
5233                         * Realtime extent is lined up at the end but not
5234                         * at the front.  We'll get rid of full extents if
5235                         * we can.
5236                         */
5237                        mod = mp->m_sb.sb_rextsize - mod;
5238                        if (del.br_blockcount > mod) {
5239                                del.br_blockcount -= mod;
5240                                del.br_startoff += mod;
5241                                del.br_startblock += mod;
5242                        } else if ((del.br_startoff == start &&
5243                                    (del.br_state == XFS_EXT_UNWRITTEN ||
5244                                     xfs_trans_get_block_res(tp) == 0)) ||
5245                                   !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
5246                                /*
5247                                 * Can't make it unwritten.  There isn't
5248                                 * a full extent here so just skip it.
5249                                 */
5250                                ASSERT(bno >= del.br_blockcount);
5251                                bno -= del.br_blockcount;
5252                                if (got.br_startoff > bno) {
5253                                        if (--lastx >= 0) {
5254                                                ep = xfs_iext_get_ext(ifp,
5255                                                                      lastx);
5256                                                xfs_bmbt_get_all(ep, &got);
5257                                        }
5258                                }
5259                                continue;
5260                        } else if (del.br_state == XFS_EXT_UNWRITTEN) {
5261                                /*
5262                                 * This one is already unwritten.
5263                                 * It must have a written left neighbor.
5264                                 * Unwrite the killed part of that one and
5265                                 * try again.
5266                                 */
5267                                ASSERT(lastx > 0);
5268                                xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
5269                                                lastx - 1), &prev);
5270                                ASSERT(prev.br_state == XFS_EXT_NORM);
5271                                ASSERT(!isnullstartblock(prev.br_startblock));
5272                                ASSERT(del.br_startblock ==
5273                                       prev.br_startblock + prev.br_blockcount);
5274                                if (prev.br_startoff < start) {
5275                                        mod = start - prev.br_startoff;
5276                                        prev.br_blockcount -= mod;
5277                                        prev.br_startblock += mod;
5278                                        prev.br_startoff = start;
5279                                }
5280                                prev.br_state = XFS_EXT_UNWRITTEN;
5281                                lastx--;
5282                                error = xfs_bmap_add_extent_unwritten_real(tp,
5283                                                ip, &lastx, &cur, &prev,
5284                                                firstblock, flist, &logflags);
5285                                if (error)
5286                                        goto error0;
5287                                goto nodelete;
5288                        } else {
5289                                ASSERT(del.br_state == XFS_EXT_NORM);
5290                                del.br_state = XFS_EXT_UNWRITTEN;
5291                                error = xfs_bmap_add_extent_unwritten_real(tp,
5292                                                ip, &lastx, &cur, &del,
5293                                                firstblock, flist, &logflags);
5294                                if (error)
5295                                        goto error0;
5296                                goto nodelete;
5297                        }
5298                }
5299                if (wasdel) {
5300                        ASSERT(startblockval(del.br_startblock) > 0);
5301                        /* Update realtime/data freespace, unreserve quota */
5302                        if (isrt) {
5303                                xfs_filblks_t rtexts;
5304
5305                                rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
5306                                do_div(rtexts, mp->m_sb.sb_rextsize);
5307                                xfs_mod_frextents(mp, (int64_t)rtexts);
5308                                (void)xfs_trans_reserve_quota_nblks(NULL,
5309                                        ip, -((long)del.br_blockcount), 0,
5310                                        XFS_QMOPT_RES_RTBLKS);
5311                        } else {
5312                                xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount,
5313                                                 false);
5314                                (void)xfs_trans_reserve_quota_nblks(NULL,
5315                                        ip, -((long)del.br_blockcount), 0,
5316                                        XFS_QMOPT_RES_REGBLKS);
5317                        }
5318                        ip->i_delayed_blks -= del.br_blockcount;
5319                        if (cur)
5320                                cur->bc_private.b.flags |=
5321                                        XFS_BTCUR_BPRV_WASDEL;
5322                } else if (cur)
5323                        cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
5324                /*
5325                 * If it's the case where the directory code is running
5326                 * with no block reservation, and the deleted block is in
5327                 * the middle of its extent, and the resulting insert
5328                 * of an extent would cause transformation to btree format,
5329                 * then reject it.  The calling code will then swap
5330                 * blocks around instead.
5331                 * We have to do this now, rather than waiting for the
5332                 * conversion to btree format, since the transaction
5333                 * will be dirty.
5334                 */
5335                if (!wasdel && xfs_trans_get_block_res(tp) == 0 &&
5336                    XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
5337                    XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */
5338                        XFS_IFORK_MAXEXT(ip, whichfork) &&
5339                    del.br_startoff > got.br_startoff &&
5340                    del.br_startoff + del.br_blockcount <
5341                    got.br_startoff + got.br_blockcount) {
5342                        error = -ENOSPC;
5343                        goto error0;
5344                }
5345                error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del,
5346                                &tmp_logflags, whichfork);
5347                logflags |= tmp_logflags;
5348                if (error)
5349                        goto error0;
5350                bno = del.br_startoff - 1;
5351nodelete:
5352                /*
5353                 * If not done go on to the next (previous) record.
5354                 */
5355                if (bno != (xfs_fileoff_t)-1 && bno >= start) {
5356                        if (lastx >= 0) {
5357                                ep = xfs_iext_get_ext(ifp, lastx);
5358                                if (xfs_bmbt_get_startoff(ep) > bno) {
5359                                        if (--lastx >= 0)
5360                                                ep = xfs_iext_get_ext(ifp,
5361                                                                      lastx);
5362                                }
5363                                xfs_bmbt_get_all(ep, &got);
5364                        }
5365                        extno++;
5366                }
5367        }
5368        *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0;
5369
5370        /*
5371         * Convert to a btree if necessary.
5372         */
5373        if (xfs_bmap_needs_btree(ip, whichfork)) {
5374                ASSERT(cur == NULL);
5375                error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist,
5376                        &cur, 0, &tmp_logflags, whichfork);
5377                logflags |= tmp_logflags;
5378                if (error)
5379                        goto error0;
5380        }
5381        /*
5382         * transform from btree to extents, give it cur
5383         */
5384        else if (xfs_bmap_wants_extents(ip, whichfork)) {
5385                ASSERT(cur != NULL);
5386                error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags,
5387                        whichfork);
5388                logflags |= tmp_logflags;
5389                if (error)
5390                        goto error0;
5391        }
5392        /*
5393         * transform from extents to local?
5394         */
5395        error = 0;
5396error0:
5397        /*
5398         * Log everything.  Do this after conversion, there's no point in
5399         * logging the extent records if we've converted to btree format.
5400         */
5401        if ((logflags & xfs_ilog_fext(whichfork)) &&
5402            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
5403                logflags &= ~xfs_ilog_fext(whichfork);
5404        else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
5405                 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
5406                logflags &= ~xfs_ilog_fbroot(whichfork);
5407        /*
5408         * Log inode even in the error case, if the transaction
5409         * is dirty we'll need to shut down the filesystem.
5410         */
5411        if (logflags)
5412                xfs_trans_log_inode(tp, ip, logflags);
5413        if (cur) {
5414                if (!error) {
5415                        *firstblock = cur->bc_private.b.firstblock;
5416                        cur->bc_private.b.allocated = 0;
5417                }
5418                xfs_btree_del_cursor(cur,
5419                        error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5420        }
5421        return error;
5422}
5423
5424/*
5425 * Determine whether an extent shift can be accomplished by a merge with the
5426 * extent that precedes the target hole of the shift.
5427 */
5428STATIC bool
5429xfs_bmse_can_merge(
5430        struct xfs_bmbt_irec    *left,  /* preceding extent */
5431        struct xfs_bmbt_irec    *got,   /* current extent to shift */
5432        xfs_fileoff_t           shift)  /* shift fsb */
5433{
5434        xfs_fileoff_t           startoff;
5435
5436        startoff = got->br_startoff - shift;
5437
5438        /*
5439         * The extent, once shifted, must be adjacent in-file and on-disk with
5440         * the preceding extent.
5441         */
5442        if ((left->br_startoff + left->br_blockcount != startoff) ||
5443            (left->br_startblock + left->br_blockcount != got->br_startblock) ||
5444            (left->br_state != got->br_state) ||
5445            (left->br_blockcount + got->br_blockcount > MAXEXTLEN))
5446                return false;
5447
5448        return true;
5449}
5450
5451/*
5452 * A bmap extent shift adjusts the file offset of an extent to fill a preceding
5453 * hole in the file. If an extent shift would result in the extent being fully
5454 * adjacent to the extent that currently precedes the hole, we can merge with
5455 * the preceding extent rather than do the shift.
5456 *
5457 * This function assumes the caller has verified a shift-by-merge is possible
5458 * with the provided extents via xfs_bmse_can_merge().
5459 */
5460STATIC int
5461xfs_bmse_merge(
5462        struct xfs_inode                *ip,
5463        int                             whichfork,
5464        xfs_fileoff_t                   shift,          /* shift fsb */
5465        int                             current_ext,    /* idx of gotp */
5466        struct xfs_bmbt_rec_host        *gotp,          /* extent to shift */
5467        struct xfs_bmbt_rec_host        *leftp,         /* preceding extent */
5468        struct xfs_btree_cur            *cur,
5469        int                             *logflags)      /* output */
5470{
5471        struct xfs_bmbt_irec            got;
5472        struct xfs_bmbt_irec            left;
5473        xfs_filblks_t                   blockcount;
5474        int                             error, i;
5475        struct xfs_mount                *mp = ip->i_mount;
5476
5477        xfs_bmbt_get_all(gotp, &got);
5478        xfs_bmbt_get_all(leftp, &left);
5479        blockcount = left.br_blockcount + got.br_blockcount;
5480
5481        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5482        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5483        ASSERT(xfs_bmse_can_merge(&left, &got, shift));
5484
5485        /*
5486         * Merge the in-core extents. Note that the host record pointers and
5487         * current_ext index are invalid once the extent has been removed via
5488         * xfs_iext_remove().
5489         */
5490        xfs_bmbt_set_blockcount(leftp, blockcount);
5491        xfs_iext_remove(ip, current_ext, 1, 0);
5492
5493        /*
5494         * Update the on-disk extent count, the btree if necessary and log the
5495         * inode.
5496         */
5497        XFS_IFORK_NEXT_SET(ip, whichfork,
5498                           XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
5499        *logflags |= XFS_ILOG_CORE;
5500        if (!cur) {
5501                *logflags |= XFS_ILOG_DEXT;
5502                return 0;
5503        }
5504
5505        /* lookup and remove the extent to merge */
5506        error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
5507                                   got.br_blockcount, &i);
5508        if (error)
5509                return error;
5510        XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5511
5512        error = xfs_btree_delete(cur, &i);
5513        if (error)
5514                return error;
5515        XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5516
5517        /* lookup and update size of the previous extent */
5518        error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock,
5519                                   left.br_blockcount, &i);
5520        if (error)
5521                return error;
5522        XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5523
5524        left.br_blockcount = blockcount;
5525
5526        return xfs_bmbt_update(cur, left.br_startoff, left.br_startblock,
5527                               left.br_blockcount, left.br_state);
5528}
5529
5530/*
5531 * Shift a single extent.
5532 */
5533STATIC int
5534xfs_bmse_shift_one(
5535        struct xfs_inode                *ip,
5536        int                             whichfork,
5537        xfs_fileoff_t                   offset_shift_fsb,
5538        int                             *current_ext,
5539        struct xfs_bmbt_rec_host        *gotp,
5540        struct xfs_btree_cur            *cur,
5541        int                             *logflags,
5542        enum shift_direction            direction)
5543{
5544        struct xfs_ifork                *ifp;
5545        struct xfs_mount                *mp;
5546        xfs_fileoff_t                   startoff;
5547        struct xfs_bmbt_rec_host        *adj_irecp;
5548        struct xfs_bmbt_irec            got;
5549        struct xfs_bmbt_irec            adj_irec;
5550        int                             error;
5551        int                             i;
5552        int                             total_extents;
5553
5554        mp = ip->i_mount;
5555        ifp = XFS_IFORK_PTR(ip, whichfork);
5556        total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5557
5558        xfs_bmbt_get_all(gotp, &got);
5559
5560        /* delalloc extents should be prevented by caller */
5561        XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
5562
5563        if (direction == SHIFT_LEFT) {
5564                startoff = got.br_startoff - offset_shift_fsb;
5565
5566                /*
5567                 * Check for merge if we've got an extent to the left,
5568                 * otherwise make sure there's enough room at the start
5569                 * of the file for the shift.
5570                 */
5571                if (!*current_ext) {
5572                        if (got.br_startoff < offset_shift_fsb)
5573                                return -EINVAL;
5574                        goto update_current_ext;
5575                }
5576                /*
5577                 * grab the left extent and check for a large
5578                 * enough hole.
5579                 */
5580                adj_irecp = xfs_iext_get_ext(ifp, *current_ext - 1);
5581                xfs_bmbt_get_all(adj_irecp, &adj_irec);
5582
5583                if (startoff <
5584                    adj_irec.br_startoff + adj_irec.br_blockcount)
5585                        return -EINVAL;
5586
5587                /* check whether to merge the extent or shift it down */
5588                if (xfs_bmse_can_merge(&adj_irec, &got,
5589                                       offset_shift_fsb)) {
5590                        return xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
5591                                              *current_ext, gotp, adj_irecp,
5592                                              cur, logflags);
5593                }
5594        } else {
5595                startoff = got.br_startoff + offset_shift_fsb;
5596                /* nothing to move if this is the last extent */
5597                if (*current_ext >= (total_extents - 1))
5598                        goto update_current_ext;
5599                /*
5600                 * If this is not the last extent in the file, make sure there
5601                 * is enough room between current extent and next extent for
5602                 * accommodating the shift.
5603                 */
5604                adj_irecp = xfs_iext_get_ext(ifp, *current_ext + 1);
5605                xfs_bmbt_get_all(adj_irecp, &adj_irec);
5606                if (startoff + got.br_blockcount > adj_irec.br_startoff)
5607                        return -EINVAL;
5608                /*
5609                 * Unlike a left shift (which involves a hole punch),
5610                 * a right shift does not modify extent neighbors
5611                 * in any way. We should never find mergeable extents
5612                 * in this scenario. Check anyways and warn if we
5613                 * encounter two extents that could be one.
5614                 */
5615                if (xfs_bmse_can_merge(&got, &adj_irec, offset_shift_fsb))
5616                        WARN_ON_ONCE(1);
5617        }
5618        /*
5619         * Increment the extent index for the next iteration, update the start
5620         * offset of the in-core extent and update the btree if applicable.
5621         */
5622update_current_ext:
5623        if (direction == SHIFT_LEFT)
5624                (*current_ext)++;
5625        else
5626                (*current_ext)--;
5627        xfs_bmbt_set_startoff(gotp, startoff);
5628        *logflags |= XFS_ILOG_CORE;
5629        if (!cur) {
5630                *logflags |= XFS_ILOG_DEXT;
5631                return 0;
5632        }
5633
5634        error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
5635                                   got.br_blockcount, &i);
5636        if (error)
5637                return error;
5638        XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5639
5640        got.br_startoff = startoff;
5641        return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock,
5642                               got.br_blockcount, got.br_state);
5643}
5644
5645/*
5646 * Shift extent records to the left/right to cover/create a hole.
5647 *
5648 * The maximum number of extents to be shifted in a single operation is
5649 * @num_exts. @stop_fsb specifies the file offset at which to stop shift and the
5650 * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb
5651 * is the length by which each extent is shifted. If there is no hole to shift
5652 * the extents into, this will be considered invalid operation and we abort
5653 * immediately.
5654 */
5655int
5656xfs_bmap_shift_extents(
5657        struct xfs_trans        *tp,
5658        struct xfs_inode        *ip,
5659        xfs_fileoff_t           *next_fsb,
5660        xfs_fileoff_t           offset_shift_fsb,
5661        int                     *done,
5662        xfs_fileoff_t           stop_fsb,
5663        xfs_fsblock_t           *firstblock,
5664        struct xfs_bmap_free    *flist,
5665        enum shift_direction    direction,
5666        int                     num_exts)
5667{
5668        struct xfs_btree_cur            *cur = NULL;
5669        struct xfs_bmbt_rec_host        *gotp;
5670        struct xfs_bmbt_irec            got;
5671        struct xfs_mount                *mp = ip->i_mount;
5672        struct xfs_ifork                *ifp;
5673        xfs_extnum_t                    nexts = 0;
5674        xfs_extnum_t                    current_ext;
5675        xfs_extnum_t                    total_extents;
5676        xfs_extnum_t                    stop_extent;
5677        int                             error = 0;
5678        int                             whichfork = XFS_DATA_FORK;
5679        int                             logflags = 0;
5680
5681        if (unlikely(XFS_TEST_ERROR(
5682            (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5683             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5684             mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
5685                XFS_ERROR_REPORT("xfs_bmap_shift_extents",
5686                                 XFS_ERRLEVEL_LOW, mp);
5687                return -EFSCORRUPTED;
5688        }
5689
5690        if (XFS_FORCED_SHUTDOWN(mp))
5691                return -EIO;
5692
5693        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5694        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5695        ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
5696        ASSERT(*next_fsb != NULLFSBLOCK || direction == SHIFT_RIGHT);
5697
5698        ifp = XFS_IFORK_PTR(ip, whichfork);
5699        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5700                /* Read in all the extents */
5701                error = xfs_iread_extents(tp, ip, whichfork);
5702                if (error)
5703                        return error;
5704        }
5705
5706        if (ifp->if_flags & XFS_IFBROOT) {
5707                cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5708                cur->bc_private.b.firstblock = *firstblock;
5709                cur->bc_private.b.flist = flist;
5710                cur->bc_private.b.flags = 0;
5711        }
5712
5713        /*
5714         * There may be delalloc extents in the data fork before the range we
5715         * are collapsing out, so we cannot use the count of real extents here.
5716         * Instead we have to calculate it from the incore fork.
5717         */
5718        total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5719        if (total_extents == 0) {
5720                *done = 1;
5721                goto del_cursor;
5722        }
5723
5724        /*
5725         * In case of first right shift, we need to initialize next_fsb
5726         */
5727        if (*next_fsb == NULLFSBLOCK) {
5728                gotp = xfs_iext_get_ext(ifp, total_extents - 1);
5729                xfs_bmbt_get_all(gotp, &got);
5730                *next_fsb = got.br_startoff;
5731                if (stop_fsb > *next_fsb) {
5732                        *done = 1;
5733                        goto del_cursor;
5734                }
5735        }
5736
5737        /* Lookup the extent index at which we have to stop */
5738        if (direction == SHIFT_RIGHT) {
5739                gotp = xfs_iext_bno_to_ext(ifp, stop_fsb, &stop_extent);
5740                /* Make stop_extent exclusive of shift range */
5741                stop_extent--;
5742        } else
5743                stop_extent = total_extents;
5744
5745        /*
5746         * Look up the extent index for the fsb where we start shifting. We can
5747         * henceforth iterate with current_ext as extent list changes are locked
5748         * out via ilock.
5749         *
5750         * gotp can be null in 2 cases: 1) if there are no extents or 2)
5751         * *next_fsb lies in a hole beyond which there are no extents. Either
5752         * way, we are done.
5753         */
5754        gotp = xfs_iext_bno_to_ext(ifp, *next_fsb, &current_ext);
5755        if (!gotp) {
5756                *done = 1;
5757                goto del_cursor;
5758        }
5759
5760        /* some sanity checking before we finally start shifting extents */
5761        if ((direction == SHIFT_LEFT && current_ext >= stop_extent) ||
5762             (direction == SHIFT_RIGHT && current_ext <= stop_extent)) {
5763                error = -EIO;
5764                goto del_cursor;
5765        }
5766
5767        while (nexts++ < num_exts) {
5768                error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
5769                                           &current_ext, gotp, cur, &logflags,
5770                                           direction);
5771                if (error)
5772                        goto del_cursor;
5773                /*
5774                 * If there was an extent merge during the shift, the extent
5775                 * count can change. Update the total and grade the next record.
5776                 */
5777                if (direction == SHIFT_LEFT) {
5778                        total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5779                        stop_extent = total_extents;
5780                }
5781
5782                if (current_ext == stop_extent) {
5783                        *done = 1;
5784                        *next_fsb = NULLFSBLOCK;
5785                        break;
5786                }
5787                gotp = xfs_iext_get_ext(ifp, current_ext);
5788        }
5789
5790        if (!*done) {
5791                xfs_bmbt_get_all(gotp, &got);
5792                *next_fsb = got.br_startoff;
5793        }
5794
5795del_cursor:
5796        if (cur)
5797                xfs_btree_del_cursor(cur,
5798                        error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5799
5800        if (logflags)
5801                xfs_trans_log_inode(tp, ip, logflags);
5802
5803        return error;
5804}
5805
5806/*
5807 * Splits an extent into two extents at split_fsb block such that it is
5808 * the first block of the current_ext. @current_ext is a target extent
5809 * to be split. @split_fsb is a block where the extents is split.
5810 * If split_fsb lies in a hole or the first block of extents, just return 0.
5811 */
5812STATIC int
5813xfs_bmap_split_extent_at(
5814        struct xfs_trans        *tp,
5815        struct xfs_inode        *ip,
5816        xfs_fileoff_t           split_fsb,
5817        xfs_fsblock_t           *firstfsb,
5818        struct xfs_bmap_free    *free_list)
5819{
5820        int                             whichfork = XFS_DATA_FORK;
5821        struct xfs_btree_cur            *cur = NULL;
5822        struct xfs_bmbt_rec_host        *gotp;
5823        struct xfs_bmbt_irec            got;
5824        struct xfs_bmbt_irec            new; /* split extent */
5825        struct xfs_mount                *mp = ip->i_mount;
5826        struct xfs_ifork                *ifp;
5827        xfs_fsblock_t                   gotblkcnt; /* new block count for got */
5828        xfs_extnum_t                    current_ext;
5829        int                             error = 0;
5830        int                             logflags = 0;
5831        int                             i = 0;
5832
5833        if (unlikely(XFS_TEST_ERROR(
5834            (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5835             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5836             mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
5837                XFS_ERROR_REPORT("xfs_bmap_split_extent_at",
5838                                 XFS_ERRLEVEL_LOW, mp);
5839                return -EFSCORRUPTED;
5840        }
5841
5842        if (XFS_FORCED_SHUTDOWN(mp))
5843                return -EIO;
5844
5845        ifp = XFS_IFORK_PTR(ip, whichfork);
5846        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5847                /* Read in all the extents */
5848                error = xfs_iread_extents(tp, ip, whichfork);
5849                if (error)
5850                        return error;
5851        }
5852
5853        /*
5854         * gotp can be null in 2 cases: 1) if there are no extents
5855         * or 2) split_fsb lies in a hole beyond which there are
5856         * no extents. Either way, we are done.
5857         */
5858        gotp = xfs_iext_bno_to_ext(ifp, split_fsb, &current_ext);
5859        if (!gotp)
5860                return 0;
5861
5862        xfs_bmbt_get_all(gotp, &got);
5863
5864        /*
5865         * Check split_fsb lies in a hole or the start boundary offset
5866         * of the extent.
5867         */
5868        if (got.br_startoff >= split_fsb)
5869                return 0;
5870
5871        gotblkcnt = split_fsb - got.br_startoff;
5872        new.br_startoff = split_fsb;
5873        new.br_startblock = got.br_startblock + gotblkcnt;
5874        new.br_blockcount = got.br_blockcount - gotblkcnt;
5875        new.br_state = got.br_state;
5876
5877        if (ifp->if_flags & XFS_IFBROOT) {
5878                cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5879                cur->bc_private.b.firstblock = *firstfsb;
5880                cur->bc_private.b.flist = free_list;
5881                cur->bc_private.b.flags = 0;
5882                error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
5883                                got.br_startblock,
5884                                got.br_blockcount,
5885                                &i);
5886                if (error)
5887                        goto del_cursor;
5888                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
5889        }
5890
5891        xfs_bmbt_set_blockcount(gotp, gotblkcnt);
5892        got.br_blockcount = gotblkcnt;
5893
5894        logflags = XFS_ILOG_CORE;
5895        if (cur) {
5896                error = xfs_bmbt_update(cur, got.br_startoff,
5897                                got.br_startblock,
5898                                got.br_blockcount,
5899                                got.br_state);
5900                if (error)
5901                        goto del_cursor;
5902        } else
5903                logflags |= XFS_ILOG_DEXT;
5904
5905        /* Add new extent */
5906        current_ext++;
5907        xfs_iext_insert(ip, current_ext, 1, &new, 0);
5908        XFS_IFORK_NEXT_SET(ip, whichfork,
5909                           XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
5910
5911        if (cur) {
5912                error = xfs_bmbt_lookup_eq(cur, new.br_startoff,
5913                                new.br_startblock, new.br_blockcount,
5914                                &i);
5915                if (error)
5916                        goto del_cursor;
5917                XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor);
5918                cur->bc_rec.b.br_state = new.br_state;
5919
5920                error = xfs_btree_insert(cur, &i);
5921                if (error)
5922                        goto del_cursor;
5923                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
5924        }
5925
5926        /*
5927         * Convert to a btree if necessary.
5928         */
5929        if (xfs_bmap_needs_btree(ip, whichfork)) {
5930                int tmp_logflags; /* partial log flag return val */
5931
5932                ASSERT(cur == NULL);
5933                error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, free_list,
5934                                &cur, 0, &tmp_logflags, whichfork);
5935                logflags |= tmp_logflags;
5936        }
5937
5938del_cursor:
5939        if (cur) {
5940                cur->bc_private.b.allocated = 0;
5941                xfs_btree_del_cursor(cur,
5942                                error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5943        }
5944
5945        if (logflags)
5946                xfs_trans_log_inode(tp, ip, logflags);
5947        return error;
5948}
5949
5950int
5951xfs_bmap_split_extent(
5952        struct xfs_inode        *ip,
5953        xfs_fileoff_t           split_fsb)
5954{
5955        struct xfs_mount        *mp = ip->i_mount;
5956        struct xfs_trans        *tp;
5957        struct xfs_bmap_free    free_list;
5958        xfs_fsblock_t           firstfsb;
5959        int                     error;
5960
5961        tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
5962        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
5963                        XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
5964        if (error) {
5965                xfs_trans_cancel(tp);
5966                return error;
5967        }
5968
5969        xfs_ilock(ip, XFS_ILOCK_EXCL);
5970        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
5971
5972        xfs_bmap_init(&free_list, &firstfsb);
5973
5974        error = xfs_bmap_split_extent_at(tp, ip, split_fsb,
5975                        &firstfsb, &free_list);
5976        if (error)
5977                goto out;
5978
5979        error = xfs_bmap_finish(&tp, &free_list, NULL);
5980        if (error)
5981                goto out;
5982
5983        return xfs_trans_commit(tp);
5984
5985out:
5986        xfs_bmap_cancel(&free_list);
5987        xfs_trans_cancel(tp);
5988        return error;
5989}
5990