linux/fs/xfs/libxfs/xfs_bmap.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   4 * All Rights Reserved.
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_log_format.h"
  11#include "xfs_trans_resv.h"
  12#include "xfs_bit.h"
  13#include "xfs_sb.h"
  14#include "xfs_mount.h"
  15#include "xfs_defer.h"
  16#include "xfs_dir2.h"
  17#include "xfs_inode.h"
  18#include "xfs_btree.h"
  19#include "xfs_trans.h"
  20#include "xfs_alloc.h"
  21#include "xfs_bmap.h"
  22#include "xfs_bmap_util.h"
  23#include "xfs_bmap_btree.h"
  24#include "xfs_rtalloc.h"
  25#include "xfs_errortag.h"
  26#include "xfs_error.h"
  27#include "xfs_quota.h"
  28#include "xfs_trans_space.h"
  29#include "xfs_buf_item.h"
  30#include "xfs_trace.h"
  31#include "xfs_attr_leaf.h"
  32#include "xfs_filestream.h"
  33#include "xfs_rmap.h"
  34#include "xfs_ag.h"
  35#include "xfs_ag_resv.h"
  36#include "xfs_refcount.h"
  37#include "xfs_icache.h"
  38#include "xfs_iomap.h"
  39
  40
  41kmem_zone_t             *xfs_bmap_free_item_zone;
  42
  43/*
  44 * Miscellaneous helper functions
  45 */
  46
  47/*
  48 * Compute and fill in the value of the maximum depth of a bmap btree
  49 * in this filesystem.  Done once, during mount.
  50 */
  51void
  52xfs_bmap_compute_maxlevels(
  53        xfs_mount_t     *mp,            /* file system mount structure */
  54        int             whichfork)      /* data or attr fork */
  55{
  56        int             level;          /* btree level */
  57        uint            maxblocks;      /* max blocks at this level */
  58        uint            maxleafents;    /* max leaf entries possible */
  59        int             maxrootrecs;    /* max records in root block */
  60        int             minleafrecs;    /* min records in leaf block */
  61        int             minnoderecs;    /* min records in node block */
  62        int             sz;             /* root block size */
  63
  64        /*
  65         * The maximum number of extents in a file, hence the maximum number of
  66         * leaf entries, is controlled by the size of the on-disk extent count,
  67         * either a signed 32-bit number for the data fork, or a signed 16-bit
  68         * number for the attr fork.
  69         *
  70         * Note that we can no longer assume that if we are in ATTR1 that the
  71         * fork offset of all the inodes will be
  72         * (xfs_default_attroffset(ip) >> 3) because we could have mounted with
  73         * ATTR2 and then mounted back with ATTR1, keeping the i_forkoff's fixed
  74         * but probably at various positions. Therefore, for both ATTR1 and
  75         * ATTR2 we have to assume the worst case scenario of a minimum size
  76         * available.
  77         */
  78        if (whichfork == XFS_DATA_FORK) {
  79                maxleafents = MAXEXTNUM;
  80                sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
  81        } else {
  82                maxleafents = MAXAEXTNUM;
  83                sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
  84        }
  85        maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
  86        minleafrecs = mp->m_bmap_dmnr[0];
  87        minnoderecs = mp->m_bmap_dmnr[1];
  88        maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
  89        for (level = 1; maxblocks > 1; level++) {
  90                if (maxblocks <= maxrootrecs)
  91                        maxblocks = 1;
  92                else
  93                        maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
  94        }
  95        mp->m_bm_maxlevels[whichfork] = level;
  96}
  97
  98unsigned int
  99xfs_bmap_compute_attr_offset(
 100        struct xfs_mount        *mp)
 101{
 102        if (mp->m_sb.sb_inodesize == 256)
 103                return XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS);
 104        return XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
 105}
 106
 107STATIC int                              /* error */
 108xfs_bmbt_lookup_eq(
 109        struct xfs_btree_cur    *cur,
 110        struct xfs_bmbt_irec    *irec,
 111        int                     *stat)  /* success/failure */
 112{
 113        cur->bc_rec.b = *irec;
 114        return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
 115}
 116
 117STATIC int                              /* error */
 118xfs_bmbt_lookup_first(
 119        struct xfs_btree_cur    *cur,
 120        int                     *stat)  /* success/failure */
 121{
 122        cur->bc_rec.b.br_startoff = 0;
 123        cur->bc_rec.b.br_startblock = 0;
 124        cur->bc_rec.b.br_blockcount = 0;
 125        return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
 126}
 127
 128/*
 129 * Check if the inode needs to be converted to btree format.
 130 */
 131static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
 132{
 133        struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
 134
 135        return whichfork != XFS_COW_FORK &&
 136                ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
 137                ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork);
 138}
 139
 140/*
 141 * Check if the inode should be converted to extent format.
 142 */
 143static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
 144{
 145        struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
 146
 147        return whichfork != XFS_COW_FORK &&
 148                ifp->if_format == XFS_DINODE_FMT_BTREE &&
 149                ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork);
 150}
 151
 152/*
 153 * Update the record referred to by cur to the value given by irec
 154 * This either works (return 0) or gets an EFSCORRUPTED error.
 155 */
 156STATIC int
 157xfs_bmbt_update(
 158        struct xfs_btree_cur    *cur,
 159        struct xfs_bmbt_irec    *irec)
 160{
 161        union xfs_btree_rec     rec;
 162
 163        xfs_bmbt_disk_set_all(&rec.bmbt, irec);
 164        return xfs_btree_update(cur, &rec);
 165}
 166
 167/*
 168 * Compute the worst-case number of indirect blocks that will be used
 169 * for ip's delayed extent of length "len".
 170 */
 171STATIC xfs_filblks_t
 172xfs_bmap_worst_indlen(
 173        xfs_inode_t     *ip,            /* incore inode pointer */
 174        xfs_filblks_t   len)            /* delayed extent length */
 175{
 176        int             level;          /* btree level number */
 177        int             maxrecs;        /* maximum record count at this level */
 178        xfs_mount_t     *mp;            /* mount structure */
 179        xfs_filblks_t   rval;           /* return value */
 180
 181        mp = ip->i_mount;
 182        maxrecs = mp->m_bmap_dmxr[0];
 183        for (level = 0, rval = 0;
 184             level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
 185             level++) {
 186                len += maxrecs - 1;
 187                do_div(len, maxrecs);
 188                rval += len;
 189                if (len == 1)
 190                        return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
 191                                level - 1;
 192                if (level == 0)
 193                        maxrecs = mp->m_bmap_dmxr[1];
 194        }
 195        return rval;
 196}
 197
 198/*
 199 * Calculate the default attribute fork offset for newly created inodes.
 200 */
 201uint
 202xfs_default_attroffset(
 203        struct xfs_inode        *ip)
 204{
 205        if (ip->i_df.if_format == XFS_DINODE_FMT_DEV)
 206                return roundup(sizeof(xfs_dev_t), 8);
 207        return M_IGEO(ip->i_mount)->attr_fork_offset;
 208}
 209
 210/*
 211 * Helper routine to reset inode i_forkoff field when switching attribute fork
 212 * from local to extent format - we reset it where possible to make space
 213 * available for inline data fork extents.
 214 */
 215STATIC void
 216xfs_bmap_forkoff_reset(
 217        xfs_inode_t     *ip,
 218        int             whichfork)
 219{
 220        if (whichfork == XFS_ATTR_FORK &&
 221            ip->i_df.if_format != XFS_DINODE_FMT_DEV &&
 222            ip->i_df.if_format != XFS_DINODE_FMT_BTREE) {
 223                uint    dfl_forkoff = xfs_default_attroffset(ip) >> 3;
 224
 225                if (dfl_forkoff > ip->i_forkoff)
 226                        ip->i_forkoff = dfl_forkoff;
 227        }
 228}
 229
 230#ifdef DEBUG
 231STATIC struct xfs_buf *
 232xfs_bmap_get_bp(
 233        struct xfs_btree_cur    *cur,
 234        xfs_fsblock_t           bno)
 235{
 236        struct xfs_log_item     *lip;
 237        int                     i;
 238
 239        if (!cur)
 240                return NULL;
 241
 242        for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
 243                if (!cur->bc_bufs[i])
 244                        break;
 245                if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
 246                        return cur->bc_bufs[i];
 247        }
 248
 249        /* Chase down all the log items to see if the bp is there */
 250        list_for_each_entry(lip, &cur->bc_tp->t_items, li_trans) {
 251                struct xfs_buf_log_item *bip = (struct xfs_buf_log_item *)lip;
 252
 253                if (bip->bli_item.li_type == XFS_LI_BUF &&
 254                    XFS_BUF_ADDR(bip->bli_buf) == bno)
 255                        return bip->bli_buf;
 256        }
 257
 258        return NULL;
 259}
 260
 261STATIC void
 262xfs_check_block(
 263        struct xfs_btree_block  *block,
 264        xfs_mount_t             *mp,
 265        int                     root,
 266        short                   sz)
 267{
 268        int                     i, j, dmxr;
 269        __be64                  *pp, *thispa;   /* pointer to block address */
 270        xfs_bmbt_key_t          *prevp, *keyp;
 271
 272        ASSERT(be16_to_cpu(block->bb_level) > 0);
 273
 274        prevp = NULL;
 275        for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
 276                dmxr = mp->m_bmap_dmxr[0];
 277                keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
 278
 279                if (prevp) {
 280                        ASSERT(be64_to_cpu(prevp->br_startoff) <
 281                               be64_to_cpu(keyp->br_startoff));
 282                }
 283                prevp = keyp;
 284
 285                /*
 286                 * Compare the block numbers to see if there are dups.
 287                 */
 288                if (root)
 289                        pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
 290                else
 291                        pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
 292
 293                for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
 294                        if (root)
 295                                thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
 296                        else
 297                                thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
 298                        if (*thispa == *pp) {
 299                                xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
 300                                        __func__, j, i,
 301                                        (unsigned long long)be64_to_cpu(*thispa));
 302                                xfs_err(mp, "%s: ptrs are equal in node\n",
 303                                        __func__);
 304                                xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 305                        }
 306                }
 307        }
 308}
 309
 310/*
 311 * Check that the extents for the inode ip are in the right order in all
 312 * btree leaves. THis becomes prohibitively expensive for large extent count
 313 * files, so don't bother with inodes that have more than 10,000 extents in
 314 * them. The btree record ordering checks will still be done, so for such large
 315 * bmapbt constructs that is going to catch most corruptions.
 316 */
 317STATIC void
 318xfs_bmap_check_leaf_extents(
 319        xfs_btree_cur_t         *cur,   /* btree cursor or null */
 320        xfs_inode_t             *ip,            /* incore inode pointer */
 321        int                     whichfork)      /* data or attr fork */
 322{
 323        struct xfs_mount        *mp = ip->i_mount;
 324        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
 325        struct xfs_btree_block  *block; /* current btree block */
 326        xfs_fsblock_t           bno;    /* block # of "block" */
 327        struct xfs_buf          *bp;    /* buffer for "block" */
 328        int                     error;  /* error return value */
 329        xfs_extnum_t            i=0, j; /* index into the extents list */
 330        int                     level;  /* btree level, for checking */
 331        __be64                  *pp;    /* pointer to block address */
 332        xfs_bmbt_rec_t          *ep;    /* pointer to current extent */
 333        xfs_bmbt_rec_t          last = {0, 0}; /* last extent in prev block */
 334        xfs_bmbt_rec_t          *nextp; /* pointer to next extent */
 335        int                     bp_release = 0;
 336
 337        if (ifp->if_format != XFS_DINODE_FMT_BTREE)
 338                return;
 339
 340        /* skip large extent count inodes */
 341        if (ip->i_df.if_nextents > 10000)
 342                return;
 343
 344        bno = NULLFSBLOCK;
 345        block = ifp->if_broot;
 346        /*
 347         * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
 348         */
 349        level = be16_to_cpu(block->bb_level);
 350        ASSERT(level > 0);
 351        xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
 352        pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
 353        bno = be64_to_cpu(*pp);
 354
 355        ASSERT(bno != NULLFSBLOCK);
 356        ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
 357        ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
 358
 359        /*
 360         * Go down the tree until leaf level is reached, following the first
 361         * pointer (leftmost) at each level.
 362         */
 363        while (level-- > 0) {
 364                /* See if buf is in cur first */
 365                bp_release = 0;
 366                bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
 367                if (!bp) {
 368                        bp_release = 1;
 369                        error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
 370                                                XFS_BMAP_BTREE_REF,
 371                                                &xfs_bmbt_buf_ops);
 372                        if (error)
 373                                goto error_norelse;
 374                }
 375                block = XFS_BUF_TO_BLOCK(bp);
 376                if (level == 0)
 377                        break;
 378
 379                /*
 380                 * Check this block for basic sanity (increasing keys and
 381                 * no duplicate blocks).
 382                 */
 383
 384                xfs_check_block(block, mp, 0, 0);
 385                pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
 386                bno = be64_to_cpu(*pp);
 387                if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, bno))) {
 388                        error = -EFSCORRUPTED;
 389                        goto error0;
 390                }
 391                if (bp_release) {
 392                        bp_release = 0;
 393                        xfs_trans_brelse(NULL, bp);
 394                }
 395        }
 396
 397        /*
 398         * Here with bp and block set to the leftmost leaf node in the tree.
 399         */
 400        i = 0;
 401
 402        /*
 403         * Loop over all leaf nodes checking that all extents are in the right order.
 404         */
 405        for (;;) {
 406                xfs_fsblock_t   nextbno;
 407                xfs_extnum_t    num_recs;
 408
 409
 410                num_recs = xfs_btree_get_numrecs(block);
 411
 412                /*
 413                 * Read-ahead the next leaf block, if any.
 414                 */
 415
 416                nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
 417
 418                /*
 419                 * Check all the extents to make sure they are OK.
 420                 * If we had a previous block, the last entry should
 421                 * conform with the first entry in this one.
 422                 */
 423
 424                ep = XFS_BMBT_REC_ADDR(mp, block, 1);
 425                if (i) {
 426                        ASSERT(xfs_bmbt_disk_get_startoff(&last) +
 427                               xfs_bmbt_disk_get_blockcount(&last) <=
 428                               xfs_bmbt_disk_get_startoff(ep));
 429                }
 430                for (j = 1; j < num_recs; j++) {
 431                        nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
 432                        ASSERT(xfs_bmbt_disk_get_startoff(ep) +
 433                               xfs_bmbt_disk_get_blockcount(ep) <=
 434                               xfs_bmbt_disk_get_startoff(nextp));
 435                        ep = nextp;
 436                }
 437
 438                last = *ep;
 439                i += num_recs;
 440                if (bp_release) {
 441                        bp_release = 0;
 442                        xfs_trans_brelse(NULL, bp);
 443                }
 444                bno = nextbno;
 445                /*
 446                 * If we've reached the end, stop.
 447                 */
 448                if (bno == NULLFSBLOCK)
 449                        break;
 450
 451                bp_release = 0;
 452                bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
 453                if (!bp) {
 454                        bp_release = 1;
 455                        error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
 456                                                XFS_BMAP_BTREE_REF,
 457                                                &xfs_bmbt_buf_ops);
 458                        if (error)
 459                                goto error_norelse;
 460                }
 461                block = XFS_BUF_TO_BLOCK(bp);
 462        }
 463
 464        return;
 465
 466error0:
 467        xfs_warn(mp, "%s: at error0", __func__);
 468        if (bp_release)
 469                xfs_trans_brelse(NULL, bp);
 470error_norelse:
 471        xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
 472                __func__, i);
 473        xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__);
 474        xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 475        return;
 476}
 477
 478/*
 479 * Validate that the bmbt_irecs being returned from bmapi are valid
 480 * given the caller's original parameters.  Specifically check the
 481 * ranges of the returned irecs to ensure that they only extend beyond
 482 * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
 483 */
 484STATIC void
 485xfs_bmap_validate_ret(
 486        xfs_fileoff_t           bno,
 487        xfs_filblks_t           len,
 488        int                     flags,
 489        xfs_bmbt_irec_t         *mval,
 490        int                     nmap,
 491        int                     ret_nmap)
 492{
 493        int                     i;              /* index to map values */
 494
 495        ASSERT(ret_nmap <= nmap);
 496
 497        for (i = 0; i < ret_nmap; i++) {
 498                ASSERT(mval[i].br_blockcount > 0);
 499                if (!(flags & XFS_BMAPI_ENTIRE)) {
 500                        ASSERT(mval[i].br_startoff >= bno);
 501                        ASSERT(mval[i].br_blockcount <= len);
 502                        ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
 503                               bno + len);
 504                } else {
 505                        ASSERT(mval[i].br_startoff < bno + len);
 506                        ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
 507                               bno);
 508                }
 509                ASSERT(i == 0 ||
 510                       mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
 511                       mval[i].br_startoff);
 512                ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
 513                       mval[i].br_startblock != HOLESTARTBLOCK);
 514                ASSERT(mval[i].br_state == XFS_EXT_NORM ||
 515                       mval[i].br_state == XFS_EXT_UNWRITTEN);
 516        }
 517}
 518
 519#else
 520#define xfs_bmap_check_leaf_extents(cur, ip, whichfork)         do { } while (0)
 521#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)    do { } while (0)
 522#endif /* DEBUG */
 523
 524/*
 525 * bmap free list manipulation functions
 526 */
 527
 528/*
 529 * Add the extent to the list of extents to be free at transaction end.
 530 * The list is maintained sorted (by block number).
 531 */
 532void
 533__xfs_bmap_add_free(
 534        struct xfs_trans                *tp,
 535        xfs_fsblock_t                   bno,
 536        xfs_filblks_t                   len,
 537        const struct xfs_owner_info     *oinfo,
 538        bool                            skip_discard)
 539{
 540        struct xfs_extent_free_item     *new;           /* new element */
 541#ifdef DEBUG
 542        struct xfs_mount                *mp = tp->t_mountp;
 543        xfs_agnumber_t                  agno;
 544        xfs_agblock_t                   agbno;
 545
 546        ASSERT(bno != NULLFSBLOCK);
 547        ASSERT(len > 0);
 548        ASSERT(len <= MAXEXTLEN);
 549        ASSERT(!isnullstartblock(bno));
 550        agno = XFS_FSB_TO_AGNO(mp, bno);
 551        agbno = XFS_FSB_TO_AGBNO(mp, bno);
 552        ASSERT(agno < mp->m_sb.sb_agcount);
 553        ASSERT(agbno < mp->m_sb.sb_agblocks);
 554        ASSERT(len < mp->m_sb.sb_agblocks);
 555        ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
 556#endif
 557        ASSERT(xfs_bmap_free_item_zone != NULL);
 558
 559        new = kmem_cache_alloc(xfs_bmap_free_item_zone,
 560                               GFP_KERNEL | __GFP_NOFAIL);
 561        new->xefi_startblock = bno;
 562        new->xefi_blockcount = (xfs_extlen_t)len;
 563        if (oinfo)
 564                new->xefi_oinfo = *oinfo;
 565        else
 566                new->xefi_oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
 567        new->xefi_skip_discard = skip_discard;
 568        trace_xfs_bmap_free_defer(tp->t_mountp,
 569                        XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
 570                        XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
 571        xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
 572}
 573
 574/*
 575 * Inode fork format manipulation functions
 576 */
 577
 578/*
 579 * Convert the inode format to extent format if it currently is in btree format,
 580 * but the extent list is small enough that it fits into the extent format.
 581 *
 582 * Since the extents are already in-core, all we have to do is give up the space
 583 * for the btree root and pitch the leaf block.
 584 */
 585STATIC int                              /* error */
 586xfs_bmap_btree_to_extents(
 587        struct xfs_trans        *tp,    /* transaction pointer */
 588        struct xfs_inode        *ip,    /* incore inode pointer */
 589        struct xfs_btree_cur    *cur,   /* btree cursor */
 590        int                     *logflagsp, /* inode logging flags */
 591        int                     whichfork)  /* data or attr fork */
 592{
 593        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
 594        struct xfs_mount        *mp = ip->i_mount;
 595        struct xfs_btree_block  *rblock = ifp->if_broot;
 596        struct xfs_btree_block  *cblock;/* child btree block */
 597        xfs_fsblock_t           cbno;   /* child block number */
 598        struct xfs_buf          *cbp;   /* child block's buffer */
 599        int                     error;  /* error return value */
 600        __be64                  *pp;    /* ptr to block address */
 601        struct xfs_owner_info   oinfo;
 602
 603        /* check if we actually need the extent format first: */
 604        if (!xfs_bmap_wants_extents(ip, whichfork))
 605                return 0;
 606
 607        ASSERT(cur);
 608        ASSERT(whichfork != XFS_COW_FORK);
 609        ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
 610        ASSERT(be16_to_cpu(rblock->bb_level) == 1);
 611        ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
 612        ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
 613
 614        pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
 615        cbno = be64_to_cpu(*pp);
 616#ifdef DEBUG
 617        if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_btree_check_lptr(cur, cbno, 1)))
 618                return -EFSCORRUPTED;
 619#endif
 620        error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF,
 621                                &xfs_bmbt_buf_ops);
 622        if (error)
 623                return error;
 624        cblock = XFS_BUF_TO_BLOCK(cbp);
 625        if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
 626                return error;
 627        xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
 628        xfs_bmap_add_free(cur->bc_tp, cbno, 1, &oinfo);
 629        ip->i_nblocks--;
 630        xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 631        xfs_trans_binval(tp, cbp);
 632        if (cur->bc_bufs[0] == cbp)
 633                cur->bc_bufs[0] = NULL;
 634        xfs_iroot_realloc(ip, -1, whichfork);
 635        ASSERT(ifp->if_broot == NULL);
 636        ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 637        *logflagsp |= XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
 638        return 0;
 639}
 640
 641/*
 642 * Convert an extents-format file into a btree-format file.
 643 * The new file will have a root block (in the inode) and a single child block.
 644 */
 645STATIC int                                      /* error */
 646xfs_bmap_extents_to_btree(
 647        struct xfs_trans        *tp,            /* transaction pointer */
 648        struct xfs_inode        *ip,            /* incore inode pointer */
 649        struct xfs_btree_cur    **curp,         /* cursor returned to caller */
 650        int                     wasdel,         /* converting a delayed alloc */
 651        int                     *logflagsp,     /* inode logging flags */
 652        int                     whichfork)      /* data or attr fork */
 653{
 654        struct xfs_btree_block  *ablock;        /* allocated (child) bt block */
 655        struct xfs_buf          *abp;           /* buffer for ablock */
 656        struct xfs_alloc_arg    args;           /* allocation arguments */
 657        struct xfs_bmbt_rec     *arp;           /* child record pointer */
 658        struct xfs_btree_block  *block;         /* btree root block */
 659        struct xfs_btree_cur    *cur;           /* bmap btree cursor */
 660        int                     error;          /* error return value */
 661        struct xfs_ifork        *ifp;           /* inode fork pointer */
 662        struct xfs_bmbt_key     *kp;            /* root block key pointer */
 663        struct xfs_mount        *mp;            /* mount structure */
 664        xfs_bmbt_ptr_t          *pp;            /* root block address pointer */
 665        struct xfs_iext_cursor  icur;
 666        struct xfs_bmbt_irec    rec;
 667        xfs_extnum_t            cnt = 0;
 668
 669        mp = ip->i_mount;
 670        ASSERT(whichfork != XFS_COW_FORK);
 671        ifp = XFS_IFORK_PTR(ip, whichfork);
 672        ASSERT(ifp->if_format == XFS_DINODE_FMT_EXTENTS);
 673
 674        /*
 675         * Make space in the inode incore. This needs to be undone if we fail
 676         * to expand the root.
 677         */
 678        xfs_iroot_realloc(ip, 1, whichfork);
 679
 680        /*
 681         * Fill in the root.
 682         */
 683        block = ifp->if_broot;
 684        xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
 685                                 XFS_BTNUM_BMAP, 1, 1, ip->i_ino,
 686                                 XFS_BTREE_LONG_PTRS);
 687        /*
 688         * Need a cursor.  Can't allocate until bb_level is filled in.
 689         */
 690        cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 691        cur->bc_ino.flags = wasdel ? XFS_BTCUR_BMBT_WASDEL : 0;
 692        /*
 693         * Convert to a btree with two levels, one record in root.
 694         */
 695        ifp->if_format = XFS_DINODE_FMT_BTREE;
 696        memset(&args, 0, sizeof(args));
 697        args.tp = tp;
 698        args.mp = mp;
 699        xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
 700        if (tp->t_firstblock == NULLFSBLOCK) {
 701                args.type = XFS_ALLOCTYPE_START_BNO;
 702                args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
 703        } else if (tp->t_flags & XFS_TRANS_LOWMODE) {
 704                args.type = XFS_ALLOCTYPE_START_BNO;
 705                args.fsbno = tp->t_firstblock;
 706        } else {
 707                args.type = XFS_ALLOCTYPE_NEAR_BNO;
 708                args.fsbno = tp->t_firstblock;
 709        }
 710        args.minlen = args.maxlen = args.prod = 1;
 711        args.wasdel = wasdel;
 712        *logflagsp = 0;
 713        error = xfs_alloc_vextent(&args);
 714        if (error)
 715                goto out_root_realloc;
 716
 717        if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
 718                error = -ENOSPC;
 719                goto out_root_realloc;
 720        }
 721
 722        /*
 723         * Allocation can't fail, the space was reserved.
 724         */
 725        ASSERT(tp->t_firstblock == NULLFSBLOCK ||
 726               args.agno >= XFS_FSB_TO_AGNO(mp, tp->t_firstblock));
 727        tp->t_firstblock = args.fsbno;
 728        cur->bc_ino.allocated++;
 729        ip->i_nblocks++;
 730        xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
 731        error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
 732                        XFS_FSB_TO_DADDR(mp, args.fsbno),
 733                        mp->m_bsize, 0, &abp);
 734        if (error)
 735                goto out_unreserve_dquot;
 736
 737        /*
 738         * Fill in the child block.
 739         */
 740        abp->b_ops = &xfs_bmbt_buf_ops;
 741        ablock = XFS_BUF_TO_BLOCK(abp);
 742        xfs_btree_init_block_int(mp, ablock, abp->b_bn,
 743                                XFS_BTNUM_BMAP, 0, 0, ip->i_ino,
 744                                XFS_BTREE_LONG_PTRS);
 745
 746        for_each_xfs_iext(ifp, &icur, &rec) {
 747                if (isnullstartblock(rec.br_startblock))
 748                        continue;
 749                arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt);
 750                xfs_bmbt_disk_set_all(arp, &rec);
 751                cnt++;
 752        }
 753        ASSERT(cnt == ifp->if_nextents);
 754        xfs_btree_set_numrecs(ablock, cnt);
 755
 756        /*
 757         * Fill in the root key and pointer.
 758         */
 759        kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
 760        arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
 761        kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
 762        pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
 763                                                be16_to_cpu(block->bb_level)));
 764        *pp = cpu_to_be64(args.fsbno);
 765
 766        /*
 767         * Do all this logging at the end so that
 768         * the root is at the right level.
 769         */
 770        xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
 771        xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
 772        ASSERT(*curp == NULL);
 773        *curp = cur;
 774        *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
 775        return 0;
 776
 777out_unreserve_dquot:
 778        xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 779out_root_realloc:
 780        xfs_iroot_realloc(ip, -1, whichfork);
 781        ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 782        ASSERT(ifp->if_broot == NULL);
 783        xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 784
 785        return error;
 786}
 787
 788/*
 789 * Convert a local file to an extents file.
 790 * This code is out of bounds for data forks of regular files,
 791 * since the file data needs to get logged so things will stay consistent.
 792 * (The bmap-level manipulations are ok, though).
 793 */
 794void
 795xfs_bmap_local_to_extents_empty(
 796        struct xfs_trans        *tp,
 797        struct xfs_inode        *ip,
 798        int                     whichfork)
 799{
 800        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
 801
 802        ASSERT(whichfork != XFS_COW_FORK);
 803        ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
 804        ASSERT(ifp->if_bytes == 0);
 805        ASSERT(ifp->if_nextents == 0);
 806
 807        xfs_bmap_forkoff_reset(ip, whichfork);
 808        ifp->if_u1.if_root = NULL;
 809        ifp->if_height = 0;
 810        ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 811        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 812}
 813
 814
 815STATIC int                              /* error */
 816xfs_bmap_local_to_extents(
 817        xfs_trans_t     *tp,            /* transaction pointer */
 818        xfs_inode_t     *ip,            /* incore inode pointer */
 819        xfs_extlen_t    total,          /* total blocks needed by transaction */
 820        int             *logflagsp,     /* inode logging flags */
 821        int             whichfork,
 822        void            (*init_fn)(struct xfs_trans *tp,
 823                                   struct xfs_buf *bp,
 824                                   struct xfs_inode *ip,
 825                                   struct xfs_ifork *ifp))
 826{
 827        int             error = 0;
 828        int             flags;          /* logging flags returned */
 829        struct xfs_ifork *ifp;          /* inode fork pointer */
 830        xfs_alloc_arg_t args;           /* allocation arguments */
 831        struct xfs_buf  *bp;            /* buffer for extent block */
 832        struct xfs_bmbt_irec rec;
 833        struct xfs_iext_cursor icur;
 834
 835        /*
 836         * We don't want to deal with the case of keeping inode data inline yet.
 837         * So sending the data fork of a regular inode is invalid.
 838         */
 839        ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK));
 840        ifp = XFS_IFORK_PTR(ip, whichfork);
 841        ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
 842
 843        if (!ifp->if_bytes) {
 844                xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
 845                flags = XFS_ILOG_CORE;
 846                goto done;
 847        }
 848
 849        flags = 0;
 850        error = 0;
 851        memset(&args, 0, sizeof(args));
 852        args.tp = tp;
 853        args.mp = ip->i_mount;
 854        xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
 855        /*
 856         * Allocate a block.  We know we need only one, since the
 857         * file currently fits in an inode.
 858         */
 859        if (tp->t_firstblock == NULLFSBLOCK) {
 860                args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
 861                args.type = XFS_ALLOCTYPE_START_BNO;
 862        } else {
 863                args.fsbno = tp->t_firstblock;
 864                args.type = XFS_ALLOCTYPE_NEAR_BNO;
 865        }
 866        args.total = total;
 867        args.minlen = args.maxlen = args.prod = 1;
 868        error = xfs_alloc_vextent(&args);
 869        if (error)
 870                goto done;
 871
 872        /* Can't fail, the space was reserved. */
 873        ASSERT(args.fsbno != NULLFSBLOCK);
 874        ASSERT(args.len == 1);
 875        tp->t_firstblock = args.fsbno;
 876        error = xfs_trans_get_buf(tp, args.mp->m_ddev_targp,
 877                        XFS_FSB_TO_DADDR(args.mp, args.fsbno),
 878                        args.mp->m_bsize, 0, &bp);
 879        if (error)
 880                goto done;
 881
 882        /*
 883         * Initialize the block, copy the data and log the remote buffer.
 884         *
 885         * The callout is responsible for logging because the remote format
 886         * might differ from the local format and thus we don't know how much to
 887         * log here. Note that init_fn must also set the buffer log item type
 888         * correctly.
 889         */
 890        init_fn(tp, bp, ip, ifp);
 891
 892        /* account for the change in fork size */
 893        xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
 894        xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
 895        flags |= XFS_ILOG_CORE;
 896
 897        ifp->if_u1.if_root = NULL;
 898        ifp->if_height = 0;
 899
 900        rec.br_startoff = 0;
 901        rec.br_startblock = args.fsbno;
 902        rec.br_blockcount = 1;
 903        rec.br_state = XFS_EXT_NORM;
 904        xfs_iext_first(ifp, &icur);
 905        xfs_iext_insert(ip, &icur, &rec, 0);
 906
 907        ifp->if_nextents = 1;
 908        ip->i_nblocks = 1;
 909        xfs_trans_mod_dquot_byino(tp, ip,
 910                XFS_TRANS_DQ_BCOUNT, 1L);
 911        flags |= xfs_ilog_fext(whichfork);
 912
 913done:
 914        *logflagsp = flags;
 915        return error;
 916}
 917
 918/*
 919 * Called from xfs_bmap_add_attrfork to handle btree format files.
 920 */
 921STATIC int                                      /* error */
 922xfs_bmap_add_attrfork_btree(
 923        xfs_trans_t             *tp,            /* transaction pointer */
 924        xfs_inode_t             *ip,            /* incore inode pointer */
 925        int                     *flags)         /* inode logging flags */
 926{
 927        struct xfs_btree_block  *block = ip->i_df.if_broot;
 928        xfs_btree_cur_t         *cur;           /* btree cursor */
 929        int                     error;          /* error return value */
 930        xfs_mount_t             *mp;            /* file system mount struct */
 931        int                     stat;           /* newroot status */
 932
 933        mp = ip->i_mount;
 934
 935        if (XFS_BMAP_BMDR_SPACE(block) <= XFS_IFORK_DSIZE(ip))
 936                *flags |= XFS_ILOG_DBROOT;
 937        else {
 938                cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
 939                error = xfs_bmbt_lookup_first(cur, &stat);
 940                if (error)
 941                        goto error0;
 942                /* must be at least one entry */
 943                if (XFS_IS_CORRUPT(mp, stat != 1)) {
 944                        error = -EFSCORRUPTED;
 945                        goto error0;
 946                }
 947                if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
 948                        goto error0;
 949                if (stat == 0) {
 950                        xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 951                        return -ENOSPC;
 952                }
 953                cur->bc_ino.allocated = 0;
 954                xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 955        }
 956        return 0;
 957error0:
 958        xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 959        return error;
 960}
 961
 962/*
 963 * Called from xfs_bmap_add_attrfork to handle extents format files.
 964 */
 965STATIC int                                      /* error */
 966xfs_bmap_add_attrfork_extents(
 967        struct xfs_trans        *tp,            /* transaction pointer */
 968        struct xfs_inode        *ip,            /* incore inode pointer */
 969        int                     *flags)         /* inode logging flags */
 970{
 971        xfs_btree_cur_t         *cur;           /* bmap btree cursor */
 972        int                     error;          /* error return value */
 973
 974        if (ip->i_df.if_nextents * sizeof(struct xfs_bmbt_rec) <=
 975            XFS_IFORK_DSIZE(ip))
 976                return 0;
 977        cur = NULL;
 978        error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags,
 979                                          XFS_DATA_FORK);
 980        if (cur) {
 981                cur->bc_ino.allocated = 0;
 982                xfs_btree_del_cursor(cur, error);
 983        }
 984        return error;
 985}
 986
 987/*
 988 * Called from xfs_bmap_add_attrfork to handle local format files. Each
 989 * different data fork content type needs a different callout to do the
 990 * conversion. Some are basic and only require special block initialisation
 991 * callouts for the data formating, others (directories) are so specialised they
 992 * handle everything themselves.
 993 *
 994 * XXX (dgc): investigate whether directory conversion can use the generic
 995 * formatting callout. It should be possible - it's just a very complex
 996 * formatter.
 997 */
 998STATIC int                                      /* error */
 999xfs_bmap_add_attrfork_local(
1000        struct xfs_trans        *tp,            /* transaction pointer */
1001        struct xfs_inode        *ip,            /* incore inode pointer */
1002        int                     *flags)         /* inode logging flags */
1003{
1004        struct xfs_da_args      dargs;          /* args for dir/attr code */
1005
1006        if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
1007                return 0;
1008
1009        if (S_ISDIR(VFS_I(ip)->i_mode)) {
1010                memset(&dargs, 0, sizeof(dargs));
1011                dargs.geo = ip->i_mount->m_dir_geo;
1012                dargs.dp = ip;
1013                dargs.total = dargs.geo->fsbcount;
1014                dargs.whichfork = XFS_DATA_FORK;
1015                dargs.trans = tp;
1016                return xfs_dir2_sf_to_block(&dargs);
1017        }
1018
1019        if (S_ISLNK(VFS_I(ip)->i_mode))
1020                return xfs_bmap_local_to_extents(tp, ip, 1, flags,
1021                                                 XFS_DATA_FORK,
1022                                                 xfs_symlink_local_to_remote);
1023
1024        /* should only be called for types that support local format data */
1025        ASSERT(0);
1026        return -EFSCORRUPTED;
1027}
1028
1029/*
1030 * Set an inode attr fork offset based on the format of the data fork.
1031 */
1032static int
1033xfs_bmap_set_attrforkoff(
1034        struct xfs_inode        *ip,
1035        int                     size,
1036        int                     *version)
1037{
1038        int                     default_size = xfs_default_attroffset(ip) >> 3;
1039
1040        switch (ip->i_df.if_format) {
1041        case XFS_DINODE_FMT_DEV:
1042                ip->i_forkoff = default_size;
1043                break;
1044        case XFS_DINODE_FMT_LOCAL:
1045        case XFS_DINODE_FMT_EXTENTS:
1046        case XFS_DINODE_FMT_BTREE:
1047                ip->i_forkoff = xfs_attr_shortform_bytesfit(ip, size);
1048                if (!ip->i_forkoff)
1049                        ip->i_forkoff = default_size;
1050                else if ((ip->i_mount->m_flags & XFS_MOUNT_ATTR2) && version)
1051                        *version = 2;
1052                break;
1053        default:
1054                ASSERT(0);
1055                return -EINVAL;
1056        }
1057
1058        return 0;
1059}
1060
1061/*
1062 * Convert inode from non-attributed to attributed.
1063 * Must not be in a transaction, ip must not be locked.
1064 */
1065int                                             /* error code */
1066xfs_bmap_add_attrfork(
1067        xfs_inode_t             *ip,            /* incore inode pointer */
1068        int                     size,           /* space new attribute needs */
1069        int                     rsvd)           /* xact may use reserved blks */
1070{
1071        xfs_mount_t             *mp;            /* mount structure */
1072        xfs_trans_t             *tp;            /* transaction pointer */
1073        int                     blks;           /* space reservation */
1074        int                     version = 1;    /* superblock attr version */
1075        int                     logflags;       /* logging flags */
1076        int                     error;          /* error return value */
1077
1078        ASSERT(XFS_IFORK_Q(ip) == 0);
1079
1080        mp = ip->i_mount;
1081        ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1082
1083        blks = XFS_ADDAFORK_SPACE_RES(mp);
1084
1085        error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_addafork, blks, 0,
1086                        rsvd, &tp);
1087        if (error)
1088                return error;
1089        if (XFS_IFORK_Q(ip))
1090                goto trans_cancel;
1091
1092        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1093        error = xfs_bmap_set_attrforkoff(ip, size, &version);
1094        if (error)
1095                goto trans_cancel;
1096        ASSERT(ip->i_afp == NULL);
1097
1098        ip->i_afp = xfs_ifork_alloc(XFS_DINODE_FMT_EXTENTS, 0);
1099        logflags = 0;
1100        switch (ip->i_df.if_format) {
1101        case XFS_DINODE_FMT_LOCAL:
1102                error = xfs_bmap_add_attrfork_local(tp, ip, &logflags);
1103                break;
1104        case XFS_DINODE_FMT_EXTENTS:
1105                error = xfs_bmap_add_attrfork_extents(tp, ip, &logflags);
1106                break;
1107        case XFS_DINODE_FMT_BTREE:
1108                error = xfs_bmap_add_attrfork_btree(tp, ip, &logflags);
1109                break;
1110        default:
1111                error = 0;
1112                break;
1113        }
1114        if (logflags)
1115                xfs_trans_log_inode(tp, ip, logflags);
1116        if (error)
1117                goto trans_cancel;
1118        if (!xfs_sb_version_hasattr(&mp->m_sb) ||
1119           (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
1120                bool log_sb = false;
1121
1122                spin_lock(&mp->m_sb_lock);
1123                if (!xfs_sb_version_hasattr(&mp->m_sb)) {
1124                        xfs_sb_version_addattr(&mp->m_sb);
1125                        log_sb = true;
1126                }
1127                if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
1128                        xfs_sb_version_addattr2(&mp->m_sb);
1129                        log_sb = true;
1130                }
1131                spin_unlock(&mp->m_sb_lock);
1132                if (log_sb)
1133                        xfs_log_sb(tp);
1134        }
1135
1136        error = xfs_trans_commit(tp);
1137        xfs_iunlock(ip, XFS_ILOCK_EXCL);
1138        return error;
1139
1140trans_cancel:
1141        xfs_trans_cancel(tp);
1142        xfs_iunlock(ip, XFS_ILOCK_EXCL);
1143        return error;
1144}
1145
1146/*
1147 * Internal and external extent tree search functions.
1148 */
1149
1150struct xfs_iread_state {
1151        struct xfs_iext_cursor  icur;
1152        xfs_extnum_t            loaded;
1153};
1154
1155/* Stuff every bmbt record from this block into the incore extent map. */
1156static int
1157xfs_iread_bmbt_block(
1158        struct xfs_btree_cur    *cur,
1159        int                     level,
1160        void                    *priv)
1161{
1162        struct xfs_iread_state  *ir = priv;
1163        struct xfs_mount        *mp = cur->bc_mp;
1164        struct xfs_inode        *ip = cur->bc_ino.ip;
1165        struct xfs_btree_block  *block;
1166        struct xfs_buf          *bp;
1167        struct xfs_bmbt_rec     *frp;
1168        xfs_extnum_t            num_recs;
1169        xfs_extnum_t            j;
1170        int                     whichfork = cur->bc_ino.whichfork;
1171        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1172
1173        block = xfs_btree_get_block(cur, level, &bp);
1174
1175        /* Abort if we find more records than nextents. */
1176        num_recs = xfs_btree_get_numrecs(block);
1177        if (unlikely(ir->loaded + num_recs > ifp->if_nextents)) {
1178                xfs_warn(ip->i_mount, "corrupt dinode %llu, (btree extents).",
1179                                (unsigned long long)ip->i_ino);
1180                xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, block,
1181                                sizeof(*block), __this_address);
1182                return -EFSCORRUPTED;
1183        }
1184
1185        /* Copy records into the incore cache. */
1186        frp = XFS_BMBT_REC_ADDR(mp, block, 1);
1187        for (j = 0; j < num_recs; j++, frp++, ir->loaded++) {
1188                struct xfs_bmbt_irec    new;
1189                xfs_failaddr_t          fa;
1190
1191                xfs_bmbt_disk_get_all(frp, &new);
1192                fa = xfs_bmap_validate_extent(ip, whichfork, &new);
1193                if (fa) {
1194                        xfs_inode_verifier_error(ip, -EFSCORRUPTED,
1195                                        "xfs_iread_extents(2)", frp,
1196                                        sizeof(*frp), fa);
1197                        return -EFSCORRUPTED;
1198                }
1199                xfs_iext_insert(ip, &ir->icur, &new,
1200                                xfs_bmap_fork_to_state(whichfork));
1201                trace_xfs_read_extent(ip, &ir->icur,
1202                                xfs_bmap_fork_to_state(whichfork), _THIS_IP_);
1203                xfs_iext_next(ifp, &ir->icur);
1204        }
1205
1206        return 0;
1207}
1208
1209/*
1210 * Read in extents from a btree-format inode.
1211 */
1212int
1213xfs_iread_extents(
1214        struct xfs_trans        *tp,
1215        struct xfs_inode        *ip,
1216        int                     whichfork)
1217{
1218        struct xfs_iread_state  ir;
1219        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1220        struct xfs_mount        *mp = ip->i_mount;
1221        struct xfs_btree_cur    *cur;
1222        int                     error;
1223
1224        if (!xfs_need_iread_extents(ifp))
1225                return 0;
1226
1227        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1228
1229        ir.loaded = 0;
1230        xfs_iext_first(ifp, &ir.icur);
1231        cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
1232        error = xfs_btree_visit_blocks(cur, xfs_iread_bmbt_block,
1233                        XFS_BTREE_VISIT_RECORDS, &ir);
1234        xfs_btree_del_cursor(cur, error);
1235        if (error)
1236                goto out;
1237
1238        if (XFS_IS_CORRUPT(mp, ir.loaded != ifp->if_nextents)) {
1239                error = -EFSCORRUPTED;
1240                goto out;
1241        }
1242        ASSERT(ir.loaded == xfs_iext_count(ifp));
1243        return 0;
1244out:
1245        xfs_iext_destroy(ifp);
1246        return error;
1247}
1248
1249/*
1250 * Returns the relative block number of the first unused block(s) in the given
1251 * fork with at least "len" logically contiguous blocks free.  This is the
1252 * lowest-address hole if the fork has holes, else the first block past the end
1253 * of fork.  Return 0 if the fork is currently local (in-inode).
1254 */
1255int                                             /* error */
1256xfs_bmap_first_unused(
1257        struct xfs_trans        *tp,            /* transaction pointer */
1258        struct xfs_inode        *ip,            /* incore inode */
1259        xfs_extlen_t            len,            /* size of hole to find */
1260        xfs_fileoff_t           *first_unused,  /* unused block */
1261        int                     whichfork)      /* data or attr fork */
1262{
1263        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1264        struct xfs_bmbt_irec    got;
1265        struct xfs_iext_cursor  icur;
1266        xfs_fileoff_t           lastaddr = 0;
1267        xfs_fileoff_t           lowest, max;
1268        int                     error;
1269
1270        if (ifp->if_format == XFS_DINODE_FMT_LOCAL) {
1271                *first_unused = 0;
1272                return 0;
1273        }
1274
1275        ASSERT(xfs_ifork_has_extents(ifp));
1276
1277        error = xfs_iread_extents(tp, ip, whichfork);
1278        if (error)
1279                return error;
1280
1281        lowest = max = *first_unused;
1282        for_each_xfs_iext(ifp, &icur, &got) {
1283                /*
1284                 * See if the hole before this extent will work.
1285                 */
1286                if (got.br_startoff >= lowest + len &&
1287                    got.br_startoff - max >= len)
1288                        break;
1289                lastaddr = got.br_startoff + got.br_blockcount;
1290                max = XFS_FILEOFF_MAX(lastaddr, lowest);
1291        }
1292
1293        *first_unused = max;
1294        return 0;
1295}
1296
1297/*
1298 * Returns the file-relative block number of the last block - 1 before
1299 * last_block (input value) in the file.
1300 * This is not based on i_size, it is based on the extent records.
1301 * Returns 0 for local files, as they do not have extent records.
1302 */
1303int                                             /* error */
1304xfs_bmap_last_before(
1305        struct xfs_trans        *tp,            /* transaction pointer */
1306        struct xfs_inode        *ip,            /* incore inode */
1307        xfs_fileoff_t           *last_block,    /* last block */
1308        int                     whichfork)      /* data or attr fork */
1309{
1310        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1311        struct xfs_bmbt_irec    got;
1312        struct xfs_iext_cursor  icur;
1313        int                     error;
1314
1315        switch (ifp->if_format) {
1316        case XFS_DINODE_FMT_LOCAL:
1317                *last_block = 0;
1318                return 0;
1319        case XFS_DINODE_FMT_BTREE:
1320        case XFS_DINODE_FMT_EXTENTS:
1321                break;
1322        default:
1323                ASSERT(0);
1324                return -EFSCORRUPTED;
1325        }
1326
1327        error = xfs_iread_extents(tp, ip, whichfork);
1328        if (error)
1329                return error;
1330
1331        if (!xfs_iext_lookup_extent_before(ip, ifp, last_block, &icur, &got))
1332                *last_block = 0;
1333        return 0;
1334}
1335
1336int
1337xfs_bmap_last_extent(
1338        struct xfs_trans        *tp,
1339        struct xfs_inode        *ip,
1340        int                     whichfork,
1341        struct xfs_bmbt_irec    *rec,
1342        int                     *is_empty)
1343{
1344        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1345        struct xfs_iext_cursor  icur;
1346        int                     error;
1347
1348        error = xfs_iread_extents(tp, ip, whichfork);
1349        if (error)
1350                return error;
1351
1352        xfs_iext_last(ifp, &icur);
1353        if (!xfs_iext_get_extent(ifp, &icur, rec))
1354                *is_empty = 1;
1355        else
1356                *is_empty = 0;
1357        return 0;
1358}
1359
1360/*
1361 * Check the last inode extent to determine whether this allocation will result
1362 * in blocks being allocated at the end of the file. When we allocate new data
1363 * blocks at the end of the file which do not start at the previous data block,
1364 * we will try to align the new blocks at stripe unit boundaries.
1365 *
1366 * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
1367 * at, or past the EOF.
1368 */
1369STATIC int
1370xfs_bmap_isaeof(
1371        struct xfs_bmalloca     *bma,
1372        int                     whichfork)
1373{
1374        struct xfs_bmbt_irec    rec;
1375        int                     is_empty;
1376        int                     error;
1377
1378        bma->aeof = false;
1379        error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1380                                     &is_empty);
1381        if (error)
1382                return error;
1383
1384        if (is_empty) {
1385                bma->aeof = true;
1386                return 0;
1387        }
1388
1389        /*
1390         * Check if we are allocation or past the last extent, or at least into
1391         * the last delayed allocated extent.
1392         */
1393        bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
1394                (bma->offset >= rec.br_startoff &&
1395                 isnullstartblock(rec.br_startblock));
1396        return 0;
1397}
1398
1399/*
1400 * Returns the file-relative block number of the first block past eof in
1401 * the file.  This is not based on i_size, it is based on the extent records.
1402 * Returns 0 for local files, as they do not have extent records.
1403 */
1404int
1405xfs_bmap_last_offset(
1406        struct xfs_inode        *ip,
1407        xfs_fileoff_t           *last_block,
1408        int                     whichfork)
1409{
1410        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1411        struct xfs_bmbt_irec    rec;
1412        int                     is_empty;
1413        int                     error;
1414
1415        *last_block = 0;
1416
1417        if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
1418                return 0;
1419
1420        if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ifp)))
1421                return -EFSCORRUPTED;
1422
1423        error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
1424        if (error || is_empty)
1425                return error;
1426
1427        *last_block = rec.br_startoff + rec.br_blockcount;
1428        return 0;
1429}
1430
1431/*
1432 * Extent tree manipulation functions used during allocation.
1433 */
1434
1435/*
1436 * Convert a delayed allocation to a real allocation.
1437 */
1438STATIC int                              /* error */
1439xfs_bmap_add_extent_delay_real(
1440        struct xfs_bmalloca     *bma,
1441        int                     whichfork)
1442{
1443        struct xfs_mount        *mp = bma->ip->i_mount;
1444        struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
1445        struct xfs_bmbt_irec    *new = &bma->got;
1446        int                     error;  /* error return value */
1447        int                     i;      /* temp state */
1448        xfs_fileoff_t           new_endoff;     /* end offset of new entry */
1449        xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
1450                                        /* left is 0, right is 1, prev is 2 */
1451        int                     rval=0; /* return value (logging flags) */
1452        int                     state = xfs_bmap_fork_to_state(whichfork);
1453        xfs_filblks_t           da_new; /* new count del alloc blocks used */
1454        xfs_filblks_t           da_old; /* old count del alloc blocks used */
1455        xfs_filblks_t           temp=0; /* value for da_new calculations */
1456        int                     tmp_rval;       /* partial logging flags */
1457        struct xfs_bmbt_irec    old;
1458
1459        ASSERT(whichfork != XFS_ATTR_FORK);
1460        ASSERT(!isnullstartblock(new->br_startblock));
1461        ASSERT(!bma->cur ||
1462               (bma->cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL));
1463
1464        XFS_STATS_INC(mp, xs_add_exlist);
1465
1466#define LEFT            r[0]
1467#define RIGHT           r[1]
1468#define PREV            r[2]
1469
1470        /*
1471         * Set up a bunch of variables to make the tests simpler.
1472         */
1473        xfs_iext_get_extent(ifp, &bma->icur, &PREV);
1474        new_endoff = new->br_startoff + new->br_blockcount;
1475        ASSERT(isnullstartblock(PREV.br_startblock));
1476        ASSERT(PREV.br_startoff <= new->br_startoff);
1477        ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1478
1479        da_old = startblockval(PREV.br_startblock);
1480        da_new = 0;
1481
1482        /*
1483         * Set flags determining what part of the previous delayed allocation
1484         * extent is being replaced by a real allocation.
1485         */
1486        if (PREV.br_startoff == new->br_startoff)
1487                state |= BMAP_LEFT_FILLING;
1488        if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1489                state |= BMAP_RIGHT_FILLING;
1490
1491        /*
1492         * Check and set flags if this segment has a left neighbor.
1493         * Don't set contiguous if the combined extent would be too large.
1494         */
1495        if (xfs_iext_peek_prev_extent(ifp, &bma->icur, &LEFT)) {
1496                state |= BMAP_LEFT_VALID;
1497                if (isnullstartblock(LEFT.br_startblock))
1498                        state |= BMAP_LEFT_DELAY;
1499        }
1500
1501        if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1502            LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1503            LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1504            LEFT.br_state == new->br_state &&
1505            LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1506                state |= BMAP_LEFT_CONTIG;
1507
1508        /*
1509         * Check and set flags if this segment has a right neighbor.
1510         * Don't set contiguous if the combined extent would be too large.
1511         * Also check for all-three-contiguous being too large.
1512         */
1513        if (xfs_iext_peek_next_extent(ifp, &bma->icur, &RIGHT)) {
1514                state |= BMAP_RIGHT_VALID;
1515                if (isnullstartblock(RIGHT.br_startblock))
1516                        state |= BMAP_RIGHT_DELAY;
1517        }
1518
1519        if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1520            new_endoff == RIGHT.br_startoff &&
1521            new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1522            new->br_state == RIGHT.br_state &&
1523            new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
1524            ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1525                       BMAP_RIGHT_FILLING)) !=
1526                      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1527                       BMAP_RIGHT_FILLING) ||
1528             LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1529                        <= MAXEXTLEN))
1530                state |= BMAP_RIGHT_CONTIG;
1531
1532        error = 0;
1533        /*
1534         * Switch out based on the FILLING and CONTIG state bits.
1535         */
1536        switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1537                         BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1538        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1539             BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1540                /*
1541                 * Filling in all of a previously delayed allocation extent.
1542                 * The left and right neighbors are both contiguous with new.
1543                 */
1544                LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
1545
1546                xfs_iext_remove(bma->ip, &bma->icur, state);
1547                xfs_iext_remove(bma->ip, &bma->icur, state);
1548                xfs_iext_prev(ifp, &bma->icur);
1549                xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1550                ifp->if_nextents--;
1551
1552                if (bma->cur == NULL)
1553                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1554                else {
1555                        rval = XFS_ILOG_CORE;
1556                        error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1557                        if (error)
1558                                goto done;
1559                        if (XFS_IS_CORRUPT(mp, i != 1)) {
1560                                error = -EFSCORRUPTED;
1561                                goto done;
1562                        }
1563                        error = xfs_btree_delete(bma->cur, &i);
1564                        if (error)
1565                                goto done;
1566                        if (XFS_IS_CORRUPT(mp, i != 1)) {
1567                                error = -EFSCORRUPTED;
1568                                goto done;
1569                        }
1570                        error = xfs_btree_decrement(bma->cur, 0, &i);
1571                        if (error)
1572                                goto done;
1573                        if (XFS_IS_CORRUPT(mp, i != 1)) {
1574                                error = -EFSCORRUPTED;
1575                                goto done;
1576                        }
1577                        error = xfs_bmbt_update(bma->cur, &LEFT);
1578                        if (error)
1579                                goto done;
1580                }
1581                break;
1582
1583        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1584                /*
1585                 * Filling in all of a previously delayed allocation extent.
1586                 * The left neighbor is contiguous, the right is not.
1587                 */
1588                old = LEFT;
1589                LEFT.br_blockcount += PREV.br_blockcount;
1590
1591                xfs_iext_remove(bma->ip, &bma->icur, state);
1592                xfs_iext_prev(ifp, &bma->icur);
1593                xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1594
1595                if (bma->cur == NULL)
1596                        rval = XFS_ILOG_DEXT;
1597                else {
1598                        rval = 0;
1599                        error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1600                        if (error)
1601                                goto done;
1602                        if (XFS_IS_CORRUPT(mp, i != 1)) {
1603                                error = -EFSCORRUPTED;
1604                                goto done;
1605                        }
1606                        error = xfs_bmbt_update(bma->cur, &LEFT);
1607                        if (error)
1608                                goto done;
1609                }
1610                break;
1611
1612        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1613                /*
1614                 * Filling in all of a previously delayed allocation extent.
1615                 * The right neighbor is contiguous, the left is not. Take care
1616                 * with delay -> unwritten extent allocation here because the
1617                 * delalloc record we are overwriting is always written.
1618                 */
1619                PREV.br_startblock = new->br_startblock;
1620                PREV.br_blockcount += RIGHT.br_blockcount;
1621                PREV.br_state = new->br_state;
1622
1623                xfs_iext_next(ifp, &bma->icur);
1624                xfs_iext_remove(bma->ip, &bma->icur, state);
1625                xfs_iext_prev(ifp, &bma->icur);
1626                xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1627
1628                if (bma->cur == NULL)
1629                        rval = XFS_ILOG_DEXT;
1630                else {
1631                        rval = 0;
1632                        error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1633                        if (error)
1634                                goto done;
1635                        if (XFS_IS_CORRUPT(mp, i != 1)) {
1636                                error = -EFSCORRUPTED;
1637                                goto done;
1638                        }
1639                        error = xfs_bmbt_update(bma->cur, &PREV);
1640                        if (error)
1641                                goto done;
1642                }
1643                break;
1644
1645        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1646                /*
1647                 * Filling in all of a previously delayed allocation extent.
1648                 * Neither the left nor right neighbors are contiguous with
1649                 * the new one.
1650                 */
1651                PREV.br_startblock = new->br_startblock;
1652                PREV.br_state = new->br_state;
1653                xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1654                ifp->if_nextents++;
1655
1656                if (bma->cur == NULL)
1657                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1658                else {
1659                        rval = XFS_ILOG_CORE;
1660                        error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1661                        if (error)
1662                                goto done;
1663                        if (XFS_IS_CORRUPT(mp, i != 0)) {
1664                                error = -EFSCORRUPTED;
1665                                goto done;
1666                        }
1667                        error = xfs_btree_insert(bma->cur, &i);
1668                        if (error)
1669                                goto done;
1670                        if (XFS_IS_CORRUPT(mp, i != 1)) {
1671                                error = -EFSCORRUPTED;
1672                                goto done;
1673                        }
1674                }
1675                break;
1676
1677        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1678                /*
1679                 * Filling in the first part of a previous delayed allocation.
1680                 * The left neighbor is contiguous.
1681                 */
1682                old = LEFT;
1683                temp = PREV.br_blockcount - new->br_blockcount;
1684                da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1685                                startblockval(PREV.br_startblock));
1686
1687                LEFT.br_blockcount += new->br_blockcount;
1688
1689                PREV.br_blockcount = temp;
1690                PREV.br_startoff += new->br_blockcount;
1691                PREV.br_startblock = nullstartblock(da_new);
1692
1693                xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1694                xfs_iext_prev(ifp, &bma->icur);
1695                xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1696
1697                if (bma->cur == NULL)
1698                        rval = XFS_ILOG_DEXT;
1699                else {
1700                        rval = 0;
1701                        error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1702                        if (error)
1703                                goto done;
1704                        if (XFS_IS_CORRUPT(mp, i != 1)) {
1705                                error = -EFSCORRUPTED;
1706                                goto done;
1707                        }
1708                        error = xfs_bmbt_update(bma->cur, &LEFT);
1709                        if (error)
1710                                goto done;
1711                }
1712                break;
1713
1714        case BMAP_LEFT_FILLING:
1715                /*
1716                 * Filling in the first part of a previous delayed allocation.
1717                 * The left neighbor is not contiguous.
1718                 */
1719                xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1720                ifp->if_nextents++;
1721
1722                if (bma->cur == NULL)
1723                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1724                else {
1725                        rval = XFS_ILOG_CORE;
1726                        error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1727                        if (error)
1728                                goto done;
1729                        if (XFS_IS_CORRUPT(mp, i != 0)) {
1730                                error = -EFSCORRUPTED;
1731                                goto done;
1732                        }
1733                        error = xfs_btree_insert(bma->cur, &i);
1734                        if (error)
1735                                goto done;
1736                        if (XFS_IS_CORRUPT(mp, i != 1)) {
1737                                error = -EFSCORRUPTED;
1738                                goto done;
1739                        }
1740                }
1741
1742                if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1743                        error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1744                                        &bma->cur, 1, &tmp_rval, whichfork);
1745                        rval |= tmp_rval;
1746                        if (error)
1747                                goto done;
1748                }
1749
1750                temp = PREV.br_blockcount - new->br_blockcount;
1751                da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1752                        startblockval(PREV.br_startblock) -
1753                        (bma->cur ? bma->cur->bc_ino.allocated : 0));
1754
1755                PREV.br_startoff = new_endoff;
1756                PREV.br_blockcount = temp;
1757                PREV.br_startblock = nullstartblock(da_new);
1758                xfs_iext_next(ifp, &bma->icur);
1759                xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1760                xfs_iext_prev(ifp, &bma->icur);
1761                break;
1762
1763        case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1764                /*
1765                 * Filling in the last part of a previous delayed allocation.
1766                 * The right neighbor is contiguous with the new allocation.
1767                 */
1768                old = RIGHT;
1769                RIGHT.br_startoff = new->br_startoff;
1770                RIGHT.br_startblock = new->br_startblock;
1771                RIGHT.br_blockcount += new->br_blockcount;
1772
1773                if (bma->cur == NULL)
1774                        rval = XFS_ILOG_DEXT;
1775                else {
1776                        rval = 0;
1777                        error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1778                        if (error)
1779                                goto done;
1780                        if (XFS_IS_CORRUPT(mp, i != 1)) {
1781                                error = -EFSCORRUPTED;
1782                                goto done;
1783                        }
1784                        error = xfs_bmbt_update(bma->cur, &RIGHT);
1785                        if (error)
1786                                goto done;
1787                }
1788
1789                temp = PREV.br_blockcount - new->br_blockcount;
1790                da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1791                        startblockval(PREV.br_startblock));
1792
1793                PREV.br_blockcount = temp;
1794                PREV.br_startblock = nullstartblock(da_new);
1795
1796                xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1797                xfs_iext_next(ifp, &bma->icur);
1798                xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT);
1799                break;
1800
1801        case BMAP_RIGHT_FILLING:
1802                /*
1803                 * Filling in the last part of a previous delayed allocation.
1804                 * The right neighbor is not contiguous.
1805                 */
1806                xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1807                ifp->if_nextents++;
1808
1809                if (bma->cur == NULL)
1810                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1811                else {
1812                        rval = XFS_ILOG_CORE;
1813                        error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1814                        if (error)
1815                                goto done;
1816                        if (XFS_IS_CORRUPT(mp, i != 0)) {
1817                                error = -EFSCORRUPTED;
1818                                goto done;
1819                        }
1820                        error = xfs_btree_insert(bma->cur, &i);
1821                        if (error)
1822                                goto done;
1823                        if (XFS_IS_CORRUPT(mp, i != 1)) {
1824                                error = -EFSCORRUPTED;
1825                                goto done;
1826                        }
1827                }
1828
1829                if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1830                        error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1831                                &bma->cur, 1, &tmp_rval, whichfork);
1832                        rval |= tmp_rval;
1833                        if (error)
1834                                goto done;
1835                }
1836
1837                temp = PREV.br_blockcount - new->br_blockcount;
1838                da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1839                        startblockval(PREV.br_startblock) -
1840                        (bma->cur ? bma->cur->bc_ino.allocated : 0));
1841
1842                PREV.br_startblock = nullstartblock(da_new);
1843                PREV.br_blockcount = temp;
1844                xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1845                xfs_iext_next(ifp, &bma->icur);
1846                break;
1847
1848        case 0:
1849                /*
1850                 * Filling in the middle part of a previous delayed allocation.
1851                 * Contiguity is impossible here.
1852                 * This case is avoided almost all the time.
1853                 *
1854                 * We start with a delayed allocation:
1855                 *
1856                 * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
1857                 *  PREV @ idx
1858                 *
1859                 * and we are allocating:
1860                 *                     +rrrrrrrrrrrrrrrrr+
1861                 *                            new
1862                 *
1863                 * and we set it up for insertion as:
1864                 * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
1865                 *                            new
1866                 *  PREV @ idx          LEFT              RIGHT
1867                 *                      inserted at idx + 1
1868                 */
1869                old = PREV;
1870
1871                /* LEFT is the new middle */
1872                LEFT = *new;
1873
1874                /* RIGHT is the new right */
1875                RIGHT.br_state = PREV.br_state;
1876                RIGHT.br_startoff = new_endoff;
1877                RIGHT.br_blockcount =
1878                        PREV.br_startoff + PREV.br_blockcount - new_endoff;
1879                RIGHT.br_startblock =
1880                        nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1881                                        RIGHT.br_blockcount));
1882
1883                /* truncate PREV */
1884                PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
1885                PREV.br_startblock =
1886                        nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1887                                        PREV.br_blockcount));
1888                xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1889
1890                xfs_iext_next(ifp, &bma->icur);
1891                xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state);
1892                xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state);
1893                ifp->if_nextents++;
1894
1895                if (bma->cur == NULL)
1896                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1897                else {
1898                        rval = XFS_ILOG_CORE;
1899                        error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1900                        if (error)
1901                                goto done;
1902                        if (XFS_IS_CORRUPT(mp, i != 0)) {
1903                                error = -EFSCORRUPTED;
1904                                goto done;
1905                        }
1906                        error = xfs_btree_insert(bma->cur, &i);
1907                        if (error)
1908                                goto done;
1909                        if (XFS_IS_CORRUPT(mp, i != 1)) {
1910                                error = -EFSCORRUPTED;
1911                                goto done;
1912                        }
1913                }
1914
1915                if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1916                        error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1917                                        &bma->cur, 1, &tmp_rval, whichfork);
1918                        rval |= tmp_rval;
1919                        if (error)
1920                                goto done;
1921                }
1922
1923                da_new = startblockval(PREV.br_startblock) +
1924                         startblockval(RIGHT.br_startblock);
1925                break;
1926
1927        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1928        case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1929        case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
1930        case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1931        case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1932        case BMAP_LEFT_CONTIG:
1933        case BMAP_RIGHT_CONTIG:
1934                /*
1935                 * These cases are all impossible.
1936                 */
1937                ASSERT(0);
1938        }
1939
1940        /* add reverse mapping unless caller opted out */
1941        if (!(bma->flags & XFS_BMAPI_NORMAP))
1942                xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new);
1943
1944        /* convert to a btree if necessary */
1945        if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1946                int     tmp_logflags;   /* partial log flag return val */
1947
1948                ASSERT(bma->cur == NULL);
1949                error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1950                                &bma->cur, da_old > 0, &tmp_logflags,
1951                                whichfork);
1952                bma->logflags |= tmp_logflags;
1953                if (error)
1954                        goto done;
1955        }
1956
1957        if (da_new != da_old)
1958                xfs_mod_delalloc(mp, (int64_t)da_new - da_old);
1959
1960        if (bma->cur) {
1961                da_new += bma->cur->bc_ino.allocated;
1962                bma->cur->bc_ino.allocated = 0;
1963        }
1964
1965        /* adjust for changes in reserved delayed indirect blocks */
1966        if (da_new != da_old) {
1967                ASSERT(state == 0 || da_new < da_old);
1968                error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new),
1969                                false);
1970        }
1971
1972        xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
1973done:
1974        if (whichfork != XFS_COW_FORK)
1975                bma->logflags |= rval;
1976        return error;
1977#undef  LEFT
1978#undef  RIGHT
1979#undef  PREV
1980}
1981
1982/*
1983 * Convert an unwritten allocation to a real allocation or vice versa.
1984 */
1985int                                     /* error */
1986xfs_bmap_add_extent_unwritten_real(
1987        struct xfs_trans        *tp,
1988        xfs_inode_t             *ip,    /* incore inode pointer */
1989        int                     whichfork,
1990        struct xfs_iext_cursor  *icur,
1991        xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
1992        xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
1993        int                     *logflagsp) /* inode logging flags */
1994{
1995        xfs_btree_cur_t         *cur;   /* btree cursor */
1996        int                     error;  /* error return value */
1997        int                     i;      /* temp state */
1998        struct xfs_ifork        *ifp;   /* inode fork pointer */
1999        xfs_fileoff_t           new_endoff;     /* end offset of new entry */
2000        xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
2001                                        /* left is 0, right is 1, prev is 2 */
2002        int                     rval=0; /* return value (logging flags) */
2003        int                     state = xfs_bmap_fork_to_state(whichfork);
2004        struct xfs_mount        *mp = ip->i_mount;
2005        struct xfs_bmbt_irec    old;
2006
2007        *logflagsp = 0;
2008
2009        cur = *curp;
2010        ifp = XFS_IFORK_PTR(ip, whichfork);
2011
2012        ASSERT(!isnullstartblock(new->br_startblock));
2013
2014        XFS_STATS_INC(mp, xs_add_exlist);
2015
2016#define LEFT            r[0]
2017#define RIGHT           r[1]
2018#define PREV            r[2]
2019
2020        /*
2021         * Set up a bunch of variables to make the tests simpler.
2022         */
2023        error = 0;
2024        xfs_iext_get_extent(ifp, icur, &PREV);
2025        ASSERT(new->br_state != PREV.br_state);
2026        new_endoff = new->br_startoff + new->br_blockcount;
2027        ASSERT(PREV.br_startoff <= new->br_startoff);
2028        ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
2029
2030        /*
2031         * Set flags determining what part of the previous oldext allocation
2032         * extent is being replaced by a newext allocation.
2033         */
2034        if (PREV.br_startoff == new->br_startoff)
2035                state |= BMAP_LEFT_FILLING;
2036        if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
2037                state |= BMAP_RIGHT_FILLING;
2038
2039        /*
2040         * Check and set flags if this segment has a left neighbor.
2041         * Don't set contiguous if the combined extent would be too large.
2042         */
2043        if (xfs_iext_peek_prev_extent(ifp, icur, &LEFT)) {
2044                state |= BMAP_LEFT_VALID;
2045                if (isnullstartblock(LEFT.br_startblock))
2046                        state |= BMAP_LEFT_DELAY;
2047        }
2048
2049        if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2050            LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
2051            LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
2052            LEFT.br_state == new->br_state &&
2053            LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2054                state |= BMAP_LEFT_CONTIG;
2055
2056        /*
2057         * Check and set flags if this segment has a right neighbor.
2058         * Don't set contiguous if the combined extent would be too large.
2059         * Also check for all-three-contiguous being too large.
2060         */
2061        if (xfs_iext_peek_next_extent(ifp, icur, &RIGHT)) {
2062                state |= BMAP_RIGHT_VALID;
2063                if (isnullstartblock(RIGHT.br_startblock))
2064                        state |= BMAP_RIGHT_DELAY;
2065        }
2066
2067        if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2068            new_endoff == RIGHT.br_startoff &&
2069            new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
2070            new->br_state == RIGHT.br_state &&
2071            new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
2072            ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2073                       BMAP_RIGHT_FILLING)) !=
2074                      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2075                       BMAP_RIGHT_FILLING) ||
2076             LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
2077                        <= MAXEXTLEN))
2078                state |= BMAP_RIGHT_CONTIG;
2079
2080        /*
2081         * Switch out based on the FILLING and CONTIG state bits.
2082         */
2083        switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2084                         BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
2085        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2086             BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2087                /*
2088                 * Setting all of a previous oldext extent to newext.
2089                 * The left and right neighbors are both contiguous with new.
2090                 */
2091                LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
2092
2093                xfs_iext_remove(ip, icur, state);
2094                xfs_iext_remove(ip, icur, state);
2095                xfs_iext_prev(ifp, icur);
2096                xfs_iext_update_extent(ip, state, icur, &LEFT);
2097                ifp->if_nextents -= 2;
2098                if (cur == NULL)
2099                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2100                else {
2101                        rval = XFS_ILOG_CORE;
2102                        error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2103                        if (error)
2104                                goto done;
2105                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2106                                error = -EFSCORRUPTED;
2107                                goto done;
2108                        }
2109                        if ((error = xfs_btree_delete(cur, &i)))
2110                                goto done;
2111                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2112                                error = -EFSCORRUPTED;
2113                                goto done;
2114                        }
2115                        if ((error = xfs_btree_decrement(cur, 0, &i)))
2116                                goto done;
2117                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2118                                error = -EFSCORRUPTED;
2119                                goto done;
2120                        }
2121                        if ((error = xfs_btree_delete(cur, &i)))
2122                                goto done;
2123                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2124                                error = -EFSCORRUPTED;
2125                                goto done;
2126                        }
2127                        if ((error = xfs_btree_decrement(cur, 0, &i)))
2128                                goto done;
2129                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2130                                error = -EFSCORRUPTED;
2131                                goto done;
2132                        }
2133                        error = xfs_bmbt_update(cur, &LEFT);
2134                        if (error)
2135                                goto done;
2136                }
2137                break;
2138
2139        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2140                /*
2141                 * Setting all of a previous oldext extent to newext.
2142                 * The left neighbor is contiguous, the right is not.
2143                 */
2144                LEFT.br_blockcount += PREV.br_blockcount;
2145
2146                xfs_iext_remove(ip, icur, state);
2147                xfs_iext_prev(ifp, icur);
2148                xfs_iext_update_extent(ip, state, icur, &LEFT);
2149                ifp->if_nextents--;
2150                if (cur == NULL)
2151                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2152                else {
2153                        rval = XFS_ILOG_CORE;
2154                        error = xfs_bmbt_lookup_eq(cur, &PREV, &i);
2155                        if (error)
2156                                goto done;
2157                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2158                                error = -EFSCORRUPTED;
2159                                goto done;
2160                        }
2161                        if ((error = xfs_btree_delete(cur, &i)))
2162                                goto done;
2163                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2164                                error = -EFSCORRUPTED;
2165                                goto done;
2166                        }
2167                        if ((error = xfs_btree_decrement(cur, 0, &i)))
2168                                goto done;
2169                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2170                                error = -EFSCORRUPTED;
2171                                goto done;
2172                        }
2173                        error = xfs_bmbt_update(cur, &LEFT);
2174                        if (error)
2175                                goto done;
2176                }
2177                break;
2178
2179        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2180                /*
2181                 * Setting all of a previous oldext extent to newext.
2182                 * The right neighbor is contiguous, the left is not.
2183                 */
2184                PREV.br_blockcount += RIGHT.br_blockcount;
2185                PREV.br_state = new->br_state;
2186
2187                xfs_iext_next(ifp, icur);
2188                xfs_iext_remove(ip, icur, state);
2189                xfs_iext_prev(ifp, icur);
2190                xfs_iext_update_extent(ip, state, icur, &PREV);
2191                ifp->if_nextents--;
2192
2193                if (cur == NULL)
2194                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2195                else {
2196                        rval = XFS_ILOG_CORE;
2197                        error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2198                        if (error)
2199                                goto done;
2200                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2201                                error = -EFSCORRUPTED;
2202                                goto done;
2203                        }
2204                        if ((error = xfs_btree_delete(cur, &i)))
2205                                goto done;
2206                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2207                                error = -EFSCORRUPTED;
2208                                goto done;
2209                        }
2210                        if ((error = xfs_btree_decrement(cur, 0, &i)))
2211                                goto done;
2212                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2213                                error = -EFSCORRUPTED;
2214                                goto done;
2215                        }
2216                        error = xfs_bmbt_update(cur, &PREV);
2217                        if (error)
2218                                goto done;
2219                }
2220                break;
2221
2222        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
2223                /*
2224                 * Setting all of a previous oldext extent to newext.
2225                 * Neither the left nor right neighbors are contiguous with
2226                 * the new one.
2227                 */
2228                PREV.br_state = new->br_state;
2229                xfs_iext_update_extent(ip, state, icur, &PREV);
2230
2231                if (cur == NULL)
2232                        rval = XFS_ILOG_DEXT;
2233                else {
2234                        rval = 0;
2235                        error = xfs_bmbt_lookup_eq(cur, new, &i);
2236                        if (error)
2237                                goto done;
2238                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2239                                error = -EFSCORRUPTED;
2240                                goto done;
2241                        }
2242                        error = xfs_bmbt_update(cur, &PREV);
2243                        if (error)
2244                                goto done;
2245                }
2246                break;
2247
2248        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
2249                /*
2250                 * Setting the first part of a previous oldext extent to newext.
2251                 * The left neighbor is contiguous.
2252                 */
2253                LEFT.br_blockcount += new->br_blockcount;
2254
2255                old = PREV;
2256                PREV.br_startoff += new->br_blockcount;
2257                PREV.br_startblock += new->br_blockcount;
2258                PREV.br_blockcount -= new->br_blockcount;
2259
2260                xfs_iext_update_extent(ip, state, icur, &PREV);
2261                xfs_iext_prev(ifp, icur);
2262                xfs_iext_update_extent(ip, state, icur, &LEFT);
2263
2264                if (cur == NULL)
2265                        rval = XFS_ILOG_DEXT;
2266                else {
2267                        rval = 0;
2268                        error = xfs_bmbt_lookup_eq(cur, &old, &i);
2269                        if (error)
2270                                goto done;
2271                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2272                                error = -EFSCORRUPTED;
2273                                goto done;
2274                        }
2275                        error = xfs_bmbt_update(cur, &PREV);
2276                        if (error)
2277                                goto done;
2278                        error = xfs_btree_decrement(cur, 0, &i);
2279                        if (error)
2280                                goto done;
2281                        error = xfs_bmbt_update(cur, &LEFT);
2282                        if (error)
2283                                goto done;
2284                }
2285                break;
2286
2287        case BMAP_LEFT_FILLING:
2288                /*
2289                 * Setting the first part of a previous oldext extent to newext.
2290                 * The left neighbor is not contiguous.
2291                 */
2292                old = PREV;
2293                PREV.br_startoff += new->br_blockcount;
2294                PREV.br_startblock += new->br_blockcount;
2295                PREV.br_blockcount -= new->br_blockcount;
2296
2297                xfs_iext_update_extent(ip, state, icur, &PREV);
2298                xfs_iext_insert(ip, icur, new, state);
2299                ifp->if_nextents++;
2300
2301                if (cur == NULL)
2302                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2303                else {
2304                        rval = XFS_ILOG_CORE;
2305                        error = xfs_bmbt_lookup_eq(cur, &old, &i);
2306                        if (error)
2307                                goto done;
2308                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2309                                error = -EFSCORRUPTED;
2310                                goto done;
2311                        }
2312                        error = xfs_bmbt_update(cur, &PREV);
2313                        if (error)
2314                                goto done;
2315                        cur->bc_rec.b = *new;
2316                        if ((error = xfs_btree_insert(cur, &i)))
2317                                goto done;
2318                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2319                                error = -EFSCORRUPTED;
2320                                goto done;
2321                        }
2322                }
2323                break;
2324
2325        case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2326                /*
2327                 * Setting the last part of a previous oldext extent to newext.
2328                 * The right neighbor is contiguous with the new allocation.
2329                 */
2330                old = PREV;
2331                PREV.br_blockcount -= new->br_blockcount;
2332
2333                RIGHT.br_startoff = new->br_startoff;
2334                RIGHT.br_startblock = new->br_startblock;
2335                RIGHT.br_blockcount += new->br_blockcount;
2336
2337                xfs_iext_update_extent(ip, state, icur, &PREV);
2338                xfs_iext_next(ifp, icur);
2339                xfs_iext_update_extent(ip, state, icur, &RIGHT);
2340
2341                if (cur == NULL)
2342                        rval = XFS_ILOG_DEXT;
2343                else {
2344                        rval = 0;
2345                        error = xfs_bmbt_lookup_eq(cur, &old, &i);
2346                        if (error)
2347                                goto done;
2348                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2349                                error = -EFSCORRUPTED;
2350                                goto done;
2351                        }
2352                        error = xfs_bmbt_update(cur, &PREV);
2353                        if (error)
2354                                goto done;
2355                        error = xfs_btree_increment(cur, 0, &i);
2356                        if (error)
2357                                goto done;
2358                        error = xfs_bmbt_update(cur, &RIGHT);
2359                        if (error)
2360                                goto done;
2361                }
2362                break;
2363
2364        case BMAP_RIGHT_FILLING:
2365                /*
2366                 * Setting the last part of a previous oldext extent to newext.
2367                 * The right neighbor is not contiguous.
2368                 */
2369                old = PREV;
2370                PREV.br_blockcount -= new->br_blockcount;
2371
2372                xfs_iext_update_extent(ip, state, icur, &PREV);
2373                xfs_iext_next(ifp, icur);
2374                xfs_iext_insert(ip, icur, new, state);
2375                ifp->if_nextents++;
2376
2377                if (cur == NULL)
2378                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2379                else {
2380                        rval = XFS_ILOG_CORE;
2381                        error = xfs_bmbt_lookup_eq(cur, &old, &i);
2382                        if (error)
2383                                goto done;
2384                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2385                                error = -EFSCORRUPTED;
2386                                goto done;
2387                        }
2388                        error = xfs_bmbt_update(cur, &PREV);
2389                        if (error)
2390                                goto done;
2391                        error = xfs_bmbt_lookup_eq(cur, new, &i);
2392                        if (error)
2393                                goto done;
2394                        if (XFS_IS_CORRUPT(mp, i != 0)) {
2395                                error = -EFSCORRUPTED;
2396                                goto done;
2397                        }
2398                        if ((error = xfs_btree_insert(cur, &i)))
2399                                goto done;
2400                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2401                                error = -EFSCORRUPTED;
2402                                goto done;
2403                        }
2404                }
2405                break;
2406
2407        case 0:
2408                /*
2409                 * Setting the middle part of a previous oldext extent to
2410                 * newext.  Contiguity is impossible here.
2411                 * One extent becomes three extents.
2412                 */
2413                old = PREV;
2414                PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
2415
2416                r[0] = *new;
2417                r[1].br_startoff = new_endoff;
2418                r[1].br_blockcount =
2419                        old.br_startoff + old.br_blockcount - new_endoff;
2420                r[1].br_startblock = new->br_startblock + new->br_blockcount;
2421                r[1].br_state = PREV.br_state;
2422
2423                xfs_iext_update_extent(ip, state, icur, &PREV);
2424                xfs_iext_next(ifp, icur);
2425                xfs_iext_insert(ip, icur, &r[1], state);
2426                xfs_iext_insert(ip, icur, &r[0], state);
2427                ifp->if_nextents += 2;
2428
2429                if (cur == NULL)
2430                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2431                else {
2432                        rval = XFS_ILOG_CORE;
2433                        error = xfs_bmbt_lookup_eq(cur, &old, &i);
2434                        if (error)
2435                                goto done;
2436                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2437                                error = -EFSCORRUPTED;
2438                                goto done;
2439                        }
2440                        /* new right extent - oldext */
2441                        error = xfs_bmbt_update(cur, &r[1]);
2442                        if (error)
2443                                goto done;
2444                        /* new left extent - oldext */
2445                        cur->bc_rec.b = PREV;
2446                        if ((error = xfs_btree_insert(cur, &i)))
2447                                goto done;
2448                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2449                                error = -EFSCORRUPTED;
2450                                goto done;
2451                        }
2452                        /*
2453                         * Reset the cursor to the position of the new extent
2454                         * we are about to insert as we can't trust it after
2455                         * the previous insert.
2456                         */
2457                        error = xfs_bmbt_lookup_eq(cur, new, &i);
2458                        if (error)
2459                                goto done;
2460                        if (XFS_IS_CORRUPT(mp, i != 0)) {
2461                                error = -EFSCORRUPTED;
2462                                goto done;
2463                        }
2464                        /* new middle extent - newext */
2465                        if ((error = xfs_btree_insert(cur, &i)))
2466                                goto done;
2467                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2468                                error = -EFSCORRUPTED;
2469                                goto done;
2470                        }
2471                }
2472                break;
2473
2474        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2475        case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2476        case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2477        case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2478        case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2479        case BMAP_LEFT_CONTIG:
2480        case BMAP_RIGHT_CONTIG:
2481                /*
2482                 * These cases are all impossible.
2483                 */
2484                ASSERT(0);
2485        }
2486
2487        /* update reverse mappings */
2488        xfs_rmap_convert_extent(mp, tp, ip, whichfork, new);
2489
2490        /* convert to a btree if necessary */
2491        if (xfs_bmap_needs_btree(ip, whichfork)) {
2492                int     tmp_logflags;   /* partial log flag return val */
2493
2494                ASSERT(cur == NULL);
2495                error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
2496                                &tmp_logflags, whichfork);
2497                *logflagsp |= tmp_logflags;
2498                if (error)
2499                        goto done;
2500        }
2501
2502        /* clear out the allocated field, done with it now in any case. */
2503        if (cur) {
2504                cur->bc_ino.allocated = 0;
2505                *curp = cur;
2506        }
2507
2508        xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
2509done:
2510        *logflagsp |= rval;
2511        return error;
2512#undef  LEFT
2513#undef  RIGHT
2514#undef  PREV
2515}
2516
2517/*
2518 * Convert a hole to a delayed allocation.
2519 */
2520STATIC void
2521xfs_bmap_add_extent_hole_delay(
2522        xfs_inode_t             *ip,    /* incore inode pointer */
2523        int                     whichfork,
2524        struct xfs_iext_cursor  *icur,
2525        xfs_bmbt_irec_t         *new)   /* new data to add to file extents */
2526{
2527        struct xfs_ifork        *ifp;   /* inode fork pointer */
2528        xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
2529        xfs_filblks_t           newlen=0;       /* new indirect size */
2530        xfs_filblks_t           oldlen=0;       /* old indirect size */
2531        xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
2532        int                     state = xfs_bmap_fork_to_state(whichfork);
2533        xfs_filblks_t           temp;    /* temp for indirect calculations */
2534
2535        ifp = XFS_IFORK_PTR(ip, whichfork);
2536        ASSERT(isnullstartblock(new->br_startblock));
2537
2538        /*
2539         * Check and set flags if this segment has a left neighbor
2540         */
2541        if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2542                state |= BMAP_LEFT_VALID;
2543                if (isnullstartblock(left.br_startblock))
2544                        state |= BMAP_LEFT_DELAY;
2545        }
2546
2547        /*
2548         * Check and set flags if the current (right) segment exists.
2549         * If it doesn't exist, we're converting the hole at end-of-file.
2550         */
2551        if (xfs_iext_get_extent(ifp, icur, &right)) {
2552                state |= BMAP_RIGHT_VALID;
2553                if (isnullstartblock(right.br_startblock))
2554                        state |= BMAP_RIGHT_DELAY;
2555        }
2556
2557        /*
2558         * Set contiguity flags on the left and right neighbors.
2559         * Don't let extents get too large, even if the pieces are contiguous.
2560         */
2561        if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
2562            left.br_startoff + left.br_blockcount == new->br_startoff &&
2563            left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2564                state |= BMAP_LEFT_CONTIG;
2565
2566        if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
2567            new->br_startoff + new->br_blockcount == right.br_startoff &&
2568            new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2569            (!(state & BMAP_LEFT_CONTIG) ||
2570             (left.br_blockcount + new->br_blockcount +
2571              right.br_blockcount <= MAXEXTLEN)))
2572                state |= BMAP_RIGHT_CONTIG;
2573
2574        /*
2575         * Switch out based on the contiguity flags.
2576         */
2577        switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2578        case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2579                /*
2580                 * New allocation is contiguous with delayed allocations
2581                 * on the left and on the right.
2582                 * Merge all three into a single extent record.
2583                 */
2584                temp = left.br_blockcount + new->br_blockcount +
2585                        right.br_blockcount;
2586
2587                oldlen = startblockval(left.br_startblock) +
2588                        startblockval(new->br_startblock) +
2589                        startblockval(right.br_startblock);
2590                newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2591                                         oldlen);
2592                left.br_startblock = nullstartblock(newlen);
2593                left.br_blockcount = temp;
2594
2595                xfs_iext_remove(ip, icur, state);
2596                xfs_iext_prev(ifp, icur);
2597                xfs_iext_update_extent(ip, state, icur, &left);
2598                break;
2599
2600        case BMAP_LEFT_CONTIG:
2601                /*
2602                 * New allocation is contiguous with a delayed allocation
2603                 * on the left.
2604                 * Merge the new allocation with the left neighbor.
2605                 */
2606                temp = left.br_blockcount + new->br_blockcount;
2607
2608                oldlen = startblockval(left.br_startblock) +
2609                        startblockval(new->br_startblock);
2610                newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2611                                         oldlen);
2612                left.br_blockcount = temp;
2613                left.br_startblock = nullstartblock(newlen);
2614
2615                xfs_iext_prev(ifp, icur);
2616                xfs_iext_update_extent(ip, state, icur, &left);
2617                break;
2618
2619        case BMAP_RIGHT_CONTIG:
2620                /*
2621                 * New allocation is contiguous with a delayed allocation
2622                 * on the right.
2623                 * Merge the new allocation with the right neighbor.
2624                 */
2625                temp = new->br_blockcount + right.br_blockcount;
2626                oldlen = startblockval(new->br_startblock) +
2627                        startblockval(right.br_startblock);
2628                newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2629                                         oldlen);
2630                right.br_startoff = new->br_startoff;
2631                right.br_startblock = nullstartblock(newlen);
2632                right.br_blockcount = temp;
2633                xfs_iext_update_extent(ip, state, icur, &right);
2634                break;
2635
2636        case 0:
2637                /*
2638                 * New allocation is not contiguous with another
2639                 * delayed allocation.
2640                 * Insert a new entry.
2641                 */
2642                oldlen = newlen = 0;
2643                xfs_iext_insert(ip, icur, new, state);
2644                break;
2645        }
2646        if (oldlen != newlen) {
2647                ASSERT(oldlen > newlen);
2648                xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
2649                                 false);
2650                /*
2651                 * Nothing to do for disk quota accounting here.
2652                 */
2653                xfs_mod_delalloc(ip->i_mount, (int64_t)newlen - oldlen);
2654        }
2655}
2656
2657/*
2658 * Convert a hole to a real allocation.
2659 */
2660STATIC int                              /* error */
2661xfs_bmap_add_extent_hole_real(
2662        struct xfs_trans        *tp,
2663        struct xfs_inode        *ip,
2664        int                     whichfork,
2665        struct xfs_iext_cursor  *icur,
2666        struct xfs_btree_cur    **curp,
2667        struct xfs_bmbt_irec    *new,
2668        int                     *logflagsp,
2669        int                     flags)
2670{
2671        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
2672        struct xfs_mount        *mp = ip->i_mount;
2673        struct xfs_btree_cur    *cur = *curp;
2674        int                     error;  /* error return value */
2675        int                     i;      /* temp state */
2676        xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
2677        xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
2678        int                     rval=0; /* return value (logging flags) */
2679        int                     state = xfs_bmap_fork_to_state(whichfork);
2680        struct xfs_bmbt_irec    old;
2681
2682        ASSERT(!isnullstartblock(new->br_startblock));
2683        ASSERT(!cur || !(cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL));
2684
2685        XFS_STATS_INC(mp, xs_add_exlist);
2686
2687        /*
2688         * Check and set flags if this segment has a left neighbor.
2689         */
2690        if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2691                state |= BMAP_LEFT_VALID;
2692                if (isnullstartblock(left.br_startblock))
2693                        state |= BMAP_LEFT_DELAY;
2694        }
2695
2696        /*
2697         * Check and set flags if this segment has a current value.
2698         * Not true if we're inserting into the "hole" at eof.
2699         */
2700        if (xfs_iext_get_extent(ifp, icur, &right)) {
2701                state |= BMAP_RIGHT_VALID;
2702                if (isnullstartblock(right.br_startblock))
2703                        state |= BMAP_RIGHT_DELAY;
2704        }
2705
2706        /*
2707         * We're inserting a real allocation between "left" and "right".
2708         * Set the contiguity flags.  Don't let extents get too large.
2709         */
2710        if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2711            left.br_startoff + left.br_blockcount == new->br_startoff &&
2712            left.br_startblock + left.br_blockcount == new->br_startblock &&
2713            left.br_state == new->br_state &&
2714            left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2715                state |= BMAP_LEFT_CONTIG;
2716
2717        if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2718            new->br_startoff + new->br_blockcount == right.br_startoff &&
2719            new->br_startblock + new->br_blockcount == right.br_startblock &&
2720            new->br_state == right.br_state &&
2721            new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2722            (!(state & BMAP_LEFT_CONTIG) ||
2723             left.br_blockcount + new->br_blockcount +
2724             right.br_blockcount <= MAXEXTLEN))
2725                state |= BMAP_RIGHT_CONTIG;
2726
2727        error = 0;
2728        /*
2729         * Select which case we're in here, and implement it.
2730         */
2731        switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2732        case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2733                /*
2734                 * New allocation is contiguous with real allocations on the
2735                 * left and on the right.
2736                 * Merge all three into a single extent record.
2737                 */
2738                left.br_blockcount += new->br_blockcount + right.br_blockcount;
2739
2740                xfs_iext_remove(ip, icur, state);
2741                xfs_iext_prev(ifp, icur);
2742                xfs_iext_update_extent(ip, state, icur, &left);
2743                ifp->if_nextents--;
2744
2745                if (cur == NULL) {
2746                        rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2747                } else {
2748                        rval = XFS_ILOG_CORE;
2749                        error = xfs_bmbt_lookup_eq(cur, &right, &i);
2750                        if (error)
2751                                goto done;
2752                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2753                                error = -EFSCORRUPTED;
2754                                goto done;
2755                        }
2756                        error = xfs_btree_delete(cur, &i);
2757                        if (error)
2758                                goto done;
2759                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2760                                error = -EFSCORRUPTED;
2761                                goto done;
2762                        }
2763                        error = xfs_btree_decrement(cur, 0, &i);
2764                        if (error)
2765                                goto done;
2766                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2767                                error = -EFSCORRUPTED;
2768                                goto done;
2769                        }
2770                        error = xfs_bmbt_update(cur, &left);
2771                        if (error)
2772                                goto done;
2773                }
2774                break;
2775
2776        case BMAP_LEFT_CONTIG:
2777                /*
2778                 * New allocation is contiguous with a real allocation
2779                 * on the left.
2780                 * Merge the new allocation with the left neighbor.
2781                 */
2782                old = left;
2783                left.br_blockcount += new->br_blockcount;
2784
2785                xfs_iext_prev(ifp, icur);
2786                xfs_iext_update_extent(ip, state, icur, &left);
2787
2788                if (cur == NULL) {
2789                        rval = xfs_ilog_fext(whichfork);
2790                } else {
2791                        rval = 0;
2792                        error = xfs_bmbt_lookup_eq(cur, &old, &i);
2793                        if (error)
2794                                goto done;
2795                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2796                                error = -EFSCORRUPTED;
2797                                goto done;
2798                        }
2799                        error = xfs_bmbt_update(cur, &left);
2800                        if (error)
2801                                goto done;
2802                }
2803                break;
2804
2805        case BMAP_RIGHT_CONTIG:
2806                /*
2807                 * New allocation is contiguous with a real allocation
2808                 * on the right.
2809                 * Merge the new allocation with the right neighbor.
2810                 */
2811                old = right;
2812
2813                right.br_startoff = new->br_startoff;
2814                right.br_startblock = new->br_startblock;
2815                right.br_blockcount += new->br_blockcount;
2816                xfs_iext_update_extent(ip, state, icur, &right);
2817
2818                if (cur == NULL) {
2819                        rval = xfs_ilog_fext(whichfork);
2820                } else {
2821                        rval = 0;
2822                        error = xfs_bmbt_lookup_eq(cur, &old, &i);
2823                        if (error)
2824                                goto done;
2825                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2826                                error = -EFSCORRUPTED;
2827                                goto done;
2828                        }
2829                        error = xfs_bmbt_update(cur, &right);
2830                        if (error)
2831                                goto done;
2832                }
2833                break;
2834
2835        case 0:
2836                /*
2837                 * New allocation is not contiguous with another
2838                 * real allocation.
2839                 * Insert a new entry.
2840                 */
2841                xfs_iext_insert(ip, icur, new, state);
2842                ifp->if_nextents++;
2843
2844                if (cur == NULL) {
2845                        rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2846                } else {
2847                        rval = XFS_ILOG_CORE;
2848                        error = xfs_bmbt_lookup_eq(cur, new, &i);
2849                        if (error)
2850                                goto done;
2851                        if (XFS_IS_CORRUPT(mp, i != 0)) {
2852                                error = -EFSCORRUPTED;
2853                                goto done;
2854                        }
2855                        error = xfs_btree_insert(cur, &i);
2856                        if (error)
2857                                goto done;
2858                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2859                                error = -EFSCORRUPTED;
2860                                goto done;
2861                        }
2862                }
2863                break;
2864        }
2865
2866        /* add reverse mapping unless caller opted out */
2867        if (!(flags & XFS_BMAPI_NORMAP))
2868                xfs_rmap_map_extent(tp, ip, whichfork, new);
2869
2870        /* convert to a btree if necessary */
2871        if (xfs_bmap_needs_btree(ip, whichfork)) {
2872                int     tmp_logflags;   /* partial log flag return val */
2873
2874                ASSERT(cur == NULL);
2875                error = xfs_bmap_extents_to_btree(tp, ip, curp, 0,
2876                                &tmp_logflags, whichfork);
2877                *logflagsp |= tmp_logflags;
2878                cur = *curp;
2879                if (error)
2880                        goto done;
2881        }
2882
2883        /* clear out the allocated field, done with it now in any case. */
2884        if (cur)
2885                cur->bc_ino.allocated = 0;
2886
2887        xfs_bmap_check_leaf_extents(cur, ip, whichfork);
2888done:
2889        *logflagsp |= rval;
2890        return error;
2891}
2892
2893/*
2894 * Functions used in the extent read, allocate and remove paths
2895 */
2896
2897/*
2898 * Adjust the size of the new extent based on i_extsize and rt extsize.
2899 */
2900int
2901xfs_bmap_extsize_align(
2902        xfs_mount_t     *mp,
2903        xfs_bmbt_irec_t *gotp,          /* next extent pointer */
2904        xfs_bmbt_irec_t *prevp,         /* previous extent pointer */
2905        xfs_extlen_t    extsz,          /* align to this extent size */
2906        int             rt,             /* is this a realtime inode? */
2907        int             eof,            /* is extent at end-of-file? */
2908        int             delay,          /* creating delalloc extent? */
2909        int             convert,        /* overwriting unwritten extent? */
2910        xfs_fileoff_t   *offp,          /* in/out: aligned offset */
2911        xfs_extlen_t    *lenp)          /* in/out: aligned length */
2912{
2913        xfs_fileoff_t   orig_off;       /* original offset */
2914        xfs_extlen_t    orig_alen;      /* original length */
2915        xfs_fileoff_t   orig_end;       /* original off+len */
2916        xfs_fileoff_t   nexto;          /* next file offset */
2917        xfs_fileoff_t   prevo;          /* previous file offset */
2918        xfs_fileoff_t   align_off;      /* temp for offset */
2919        xfs_extlen_t    align_alen;     /* temp for length */
2920        xfs_extlen_t    temp;           /* temp for calculations */
2921
2922        if (convert)
2923                return 0;
2924
2925        orig_off = align_off = *offp;
2926        orig_alen = align_alen = *lenp;
2927        orig_end = orig_off + orig_alen;
2928
2929        /*
2930         * If this request overlaps an existing extent, then don't
2931         * attempt to perform any additional alignment.
2932         */
2933        if (!delay && !eof &&
2934            (orig_off >= gotp->br_startoff) &&
2935            (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
2936                return 0;
2937        }
2938
2939        /*
2940         * If the file offset is unaligned vs. the extent size
2941         * we need to align it.  This will be possible unless
2942         * the file was previously written with a kernel that didn't
2943         * perform this alignment, or if a truncate shot us in the
2944         * foot.
2945         */
2946        div_u64_rem(orig_off, extsz, &temp);
2947        if (temp) {
2948                align_alen += temp;
2949                align_off -= temp;
2950        }
2951
2952        /* Same adjustment for the end of the requested area. */
2953        temp = (align_alen % extsz);
2954        if (temp)
2955                align_alen += extsz - temp;
2956
2957        /*
2958         * For large extent hint sizes, the aligned extent might be larger than
2959         * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls
2960         * the length back under MAXEXTLEN. The outer allocation loops handle
2961         * short allocation just fine, so it is safe to do this. We only want to
2962         * do it when we are forced to, though, because it means more allocation
2963         * operations are required.
2964         */
2965        while (align_alen > MAXEXTLEN)
2966                align_alen -= extsz;
2967        ASSERT(align_alen <= MAXEXTLEN);
2968
2969        /*
2970         * If the previous block overlaps with this proposed allocation
2971         * then move the start forward without adjusting the length.
2972         */
2973        if (prevp->br_startoff != NULLFILEOFF) {
2974                if (prevp->br_startblock == HOLESTARTBLOCK)
2975                        prevo = prevp->br_startoff;
2976                else
2977                        prevo = prevp->br_startoff + prevp->br_blockcount;
2978        } else
2979                prevo = 0;
2980        if (align_off != orig_off && align_off < prevo)
2981                align_off = prevo;
2982        /*
2983         * If the next block overlaps with this proposed allocation
2984         * then move the start back without adjusting the length,
2985         * but not before offset 0.
2986         * This may of course make the start overlap previous block,
2987         * and if we hit the offset 0 limit then the next block
2988         * can still overlap too.
2989         */
2990        if (!eof && gotp->br_startoff != NULLFILEOFF) {
2991                if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
2992                    (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
2993                        nexto = gotp->br_startoff + gotp->br_blockcount;
2994                else
2995                        nexto = gotp->br_startoff;
2996        } else
2997                nexto = NULLFILEOFF;
2998        if (!eof &&
2999            align_off + align_alen != orig_end &&
3000            align_off + align_alen > nexto)
3001                align_off = nexto > align_alen ? nexto - align_alen : 0;
3002        /*
3003         * If we're now overlapping the next or previous extent that
3004         * means we can't fit an extsz piece in this hole.  Just move
3005         * the start forward to the first valid spot and set
3006         * the length so we hit the end.
3007         */
3008        if (align_off != orig_off && align_off < prevo)
3009                align_off = prevo;
3010        if (align_off + align_alen != orig_end &&
3011            align_off + align_alen > nexto &&
3012            nexto != NULLFILEOFF) {
3013                ASSERT(nexto > prevo);
3014                align_alen = nexto - align_off;
3015        }
3016
3017        /*
3018         * If realtime, and the result isn't a multiple of the realtime
3019         * extent size we need to remove blocks until it is.
3020         */
3021        if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
3022                /*
3023                 * We're not covering the original request, or
3024                 * we won't be able to once we fix the length.
3025                 */
3026                if (orig_off < align_off ||
3027                    orig_end > align_off + align_alen ||
3028                    align_alen - temp < orig_alen)
3029                        return -EINVAL;
3030                /*
3031                 * Try to fix it by moving the start up.
3032                 */
3033                if (align_off + temp <= orig_off) {
3034                        align_alen -= temp;
3035                        align_off += temp;
3036                }
3037                /*
3038                 * Try to fix it by moving the end in.
3039                 */
3040                else if (align_off + align_alen - temp >= orig_end)
3041                        align_alen -= temp;
3042                /*
3043                 * Set the start to the minimum then trim the length.
3044                 */
3045                else {
3046                        align_alen -= orig_off - align_off;
3047                        align_off = orig_off;
3048                        align_alen -= align_alen % mp->m_sb.sb_rextsize;
3049                }
3050                /*
3051                 * Result doesn't cover the request, fail it.
3052                 */
3053                if (orig_off < align_off || orig_end > align_off + align_alen)
3054                        return -EINVAL;
3055        } else {
3056                ASSERT(orig_off >= align_off);
3057                /* see MAXEXTLEN handling above */
3058                ASSERT(orig_end <= align_off + align_alen ||
3059                       align_alen + extsz > MAXEXTLEN);
3060        }
3061
3062#ifdef DEBUG
3063        if (!eof && gotp->br_startoff != NULLFILEOFF)
3064                ASSERT(align_off + align_alen <= gotp->br_startoff);
3065        if (prevp->br_startoff != NULLFILEOFF)
3066                ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
3067#endif
3068
3069        *lenp = align_alen;
3070        *offp = align_off;
3071        return 0;
3072}
3073
3074#define XFS_ALLOC_GAP_UNITS     4
3075
3076void
3077xfs_bmap_adjacent(
3078        struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
3079{
3080        xfs_fsblock_t   adjust;         /* adjustment to block numbers */
3081        xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
3082        xfs_mount_t     *mp;            /* mount point structure */
3083        int             nullfb;         /* true if ap->firstblock isn't set */
3084        int             rt;             /* true if inode is realtime */
3085
3086#define ISVALID(x,y)    \
3087        (rt ? \
3088                (x) < mp->m_sb.sb_rblocks : \
3089                XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
3090                XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
3091                XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
3092
3093        mp = ap->ip->i_mount;
3094        nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
3095        rt = XFS_IS_REALTIME_INODE(ap->ip) &&
3096                (ap->datatype & XFS_ALLOC_USERDATA);
3097        fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
3098                                                        ap->tp->t_firstblock);
3099        /*
3100         * If allocating at eof, and there's a previous real block,
3101         * try to use its last block as our starting point.
3102         */
3103        if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
3104            !isnullstartblock(ap->prev.br_startblock) &&
3105            ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
3106                    ap->prev.br_startblock)) {
3107                ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
3108                /*
3109                 * Adjust for the gap between prevp and us.
3110                 */
3111                adjust = ap->offset -
3112                        (ap->prev.br_startoff + ap->prev.br_blockcount);
3113                if (adjust &&
3114                    ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
3115                        ap->blkno += adjust;
3116        }
3117        /*
3118         * If not at eof, then compare the two neighbor blocks.
3119         * Figure out whether either one gives us a good starting point,
3120         * and pick the better one.
3121         */
3122        else if (!ap->eof) {
3123                xfs_fsblock_t   gotbno;         /* right side block number */
3124                xfs_fsblock_t   gotdiff=0;      /* right side difference */
3125                xfs_fsblock_t   prevbno;        /* left side block number */
3126                xfs_fsblock_t   prevdiff=0;     /* left side difference */
3127
3128                /*
3129                 * If there's a previous (left) block, select a requested
3130                 * start block based on it.
3131                 */
3132                if (ap->prev.br_startoff != NULLFILEOFF &&
3133                    !isnullstartblock(ap->prev.br_startblock) &&
3134                    (prevbno = ap->prev.br_startblock +
3135                               ap->prev.br_blockcount) &&
3136                    ISVALID(prevbno, ap->prev.br_startblock)) {
3137                        /*
3138                         * Calculate gap to end of previous block.
3139                         */
3140                        adjust = prevdiff = ap->offset -
3141                                (ap->prev.br_startoff +
3142                                 ap->prev.br_blockcount);
3143                        /*
3144                         * Figure the startblock based on the previous block's
3145                         * end and the gap size.
3146                         * Heuristic!
3147                         * If the gap is large relative to the piece we're
3148                         * allocating, or using it gives us an invalid block
3149                         * number, then just use the end of the previous block.
3150                         */
3151                        if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3152                            ISVALID(prevbno + prevdiff,
3153                                    ap->prev.br_startblock))
3154                                prevbno += adjust;
3155                        else
3156                                prevdiff += adjust;
3157                        /*
3158                         * If the firstblock forbids it, can't use it,
3159                         * must use default.
3160                         */
3161                        if (!rt && !nullfb &&
3162                            XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
3163                                prevbno = NULLFSBLOCK;
3164                }
3165                /*
3166                 * No previous block or can't follow it, just default.
3167                 */
3168                else
3169                        prevbno = NULLFSBLOCK;
3170                /*
3171                 * If there's a following (right) block, select a requested
3172                 * start block based on it.
3173                 */
3174                if (!isnullstartblock(ap->got.br_startblock)) {
3175                        /*
3176                         * Calculate gap to start of next block.
3177                         */
3178                        adjust = gotdiff = ap->got.br_startoff - ap->offset;
3179                        /*
3180                         * Figure the startblock based on the next block's
3181                         * start and the gap size.
3182                         */
3183                        gotbno = ap->got.br_startblock;
3184                        /*
3185                         * Heuristic!
3186                         * If the gap is large relative to the piece we're
3187                         * allocating, or using it gives us an invalid block
3188                         * number, then just use the start of the next block
3189                         * offset by our length.
3190                         */
3191                        if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3192                            ISVALID(gotbno - gotdiff, gotbno))
3193                                gotbno -= adjust;
3194                        else if (ISVALID(gotbno - ap->length, gotbno)) {
3195                                gotbno -= ap->length;
3196                                gotdiff += adjust - ap->length;
3197                        } else
3198                                gotdiff += adjust;
3199                        /*
3200                         * If the firstblock forbids it, can't use it,
3201                         * must use default.
3202                         */
3203                        if (!rt && !nullfb &&
3204                            XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
3205                                gotbno = NULLFSBLOCK;
3206                }
3207                /*
3208                 * No next block, just default.
3209                 */
3210                else
3211                        gotbno = NULLFSBLOCK;
3212                /*
3213                 * If both valid, pick the better one, else the only good
3214                 * one, else ap->blkno is already set (to 0 or the inode block).
3215                 */
3216                if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
3217                        ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
3218                else if (prevbno != NULLFSBLOCK)
3219                        ap->blkno = prevbno;
3220                else if (gotbno != NULLFSBLOCK)
3221                        ap->blkno = gotbno;
3222        }
3223#undef ISVALID
3224}
3225
3226static int
3227xfs_bmap_longest_free_extent(
3228        struct xfs_trans        *tp,
3229        xfs_agnumber_t          ag,
3230        xfs_extlen_t            *blen,
3231        int                     *notinit)
3232{
3233        struct xfs_mount        *mp = tp->t_mountp;
3234        struct xfs_perag        *pag;
3235        xfs_extlen_t            longest;
3236        int                     error = 0;
3237
3238        pag = xfs_perag_get(mp, ag);
3239        if (!pag->pagf_init) {
3240                error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
3241                if (error) {
3242                        /* Couldn't lock the AGF, so skip this AG. */
3243                        if (error == -EAGAIN) {
3244                                *notinit = 1;
3245                                error = 0;
3246                        }
3247                        goto out;
3248                }
3249        }
3250
3251        longest = xfs_alloc_longest_free_extent(pag,
3252                                xfs_alloc_min_freelist(mp, pag),
3253                                xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
3254        if (*blen < longest)
3255                *blen = longest;
3256
3257out:
3258        xfs_perag_put(pag);
3259        return error;
3260}
3261
3262static void
3263xfs_bmap_select_minlen(
3264        struct xfs_bmalloca     *ap,
3265        struct xfs_alloc_arg    *args,
3266        xfs_extlen_t            *blen,
3267        int                     notinit)
3268{
3269        if (notinit || *blen < ap->minlen) {
3270                /*
3271                 * Since we did a BUF_TRYLOCK above, it is possible that
3272                 * there is space for this request.
3273                 */
3274                args->minlen = ap->minlen;
3275        } else if (*blen < args->maxlen) {
3276                /*
3277                 * If the best seen length is less than the request length,
3278                 * use the best as the minimum.
3279                 */
3280                args->minlen = *blen;
3281        } else {
3282                /*
3283                 * Otherwise we've seen an extent as big as maxlen, use that
3284                 * as the minimum.
3285                 */
3286                args->minlen = args->maxlen;
3287        }
3288}
3289
3290STATIC int
3291xfs_bmap_btalloc_nullfb(
3292        struct xfs_bmalloca     *ap,
3293        struct xfs_alloc_arg    *args,
3294        xfs_extlen_t            *blen)
3295{
3296        struct xfs_mount        *mp = ap->ip->i_mount;
3297        xfs_agnumber_t          ag, startag;
3298        int                     notinit = 0;
3299        int                     error;
3300
3301        args->type = XFS_ALLOCTYPE_START_BNO;
3302        args->total = ap->total;
3303
3304        startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3305        if (startag == NULLAGNUMBER)
3306                startag = ag = 0;
3307
3308        while (*blen < args->maxlen) {
3309                error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3310                                                     &notinit);
3311                if (error)
3312                        return error;
3313
3314                if (++ag == mp->m_sb.sb_agcount)
3315                        ag = 0;
3316                if (ag == startag)
3317                        break;
3318        }
3319
3320        xfs_bmap_select_minlen(ap, args, blen, notinit);
3321        return 0;
3322}
3323
3324STATIC int
3325xfs_bmap_btalloc_filestreams(
3326        struct xfs_bmalloca     *ap,
3327        struct xfs_alloc_arg    *args,
3328        xfs_extlen_t            *blen)
3329{
3330        struct xfs_mount        *mp = ap->ip->i_mount;
3331        xfs_agnumber_t          ag;
3332        int                     notinit = 0;
3333        int                     error;
3334
3335        args->type = XFS_ALLOCTYPE_NEAR_BNO;
3336        args->total = ap->total;
3337
3338        ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3339        if (ag == NULLAGNUMBER)
3340                ag = 0;
3341
3342        error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
3343        if (error)
3344                return error;
3345
3346        if (*blen < args->maxlen) {
3347                error = xfs_filestream_new_ag(ap, &ag);
3348                if (error)
3349                        return error;
3350
3351                error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3352                                                     &notinit);
3353                if (error)
3354                        return error;
3355
3356        }
3357
3358        xfs_bmap_select_minlen(ap, args, blen, notinit);
3359
3360        /*
3361         * Set the failure fallback case to look in the selected AG as stream
3362         * may have moved.
3363         */
3364        ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
3365        return 0;
3366}
3367
3368/* Update all inode and quota accounting for the allocation we just did. */
3369static void
3370xfs_bmap_btalloc_accounting(
3371        struct xfs_bmalloca     *ap,
3372        struct xfs_alloc_arg    *args)
3373{
3374        if (ap->flags & XFS_BMAPI_COWFORK) {
3375                /*
3376                 * COW fork blocks are in-core only and thus are treated as
3377                 * in-core quota reservation (like delalloc blocks) even when
3378                 * converted to real blocks. The quota reservation is not
3379                 * accounted to disk until blocks are remapped to the data
3380                 * fork. So if these blocks were previously delalloc, we
3381                 * already have quota reservation and there's nothing to do
3382                 * yet.
3383                 */
3384                if (ap->wasdel) {
3385                        xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len);
3386                        return;
3387                }
3388
3389                /*
3390                 * Otherwise, we've allocated blocks in a hole. The transaction
3391                 * has acquired in-core quota reservation for this extent.
3392                 * Rather than account these as real blocks, however, we reduce
3393                 * the transaction quota reservation based on the allocation.
3394                 * This essentially transfers the transaction quota reservation
3395                 * to that of a delalloc extent.
3396                 */
3397                ap->ip->i_delayed_blks += args->len;
3398                xfs_trans_mod_dquot_byino(ap->tp, ap->ip, XFS_TRANS_DQ_RES_BLKS,
3399                                -(long)args->len);
3400                return;
3401        }
3402
3403        /* data/attr fork only */
3404        ap->ip->i_nblocks += args->len;
3405        xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3406        if (ap->wasdel) {
3407                ap->ip->i_delayed_blks -= args->len;
3408                xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len);
3409        }
3410        xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
3411                ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT,
3412                args->len);
3413}
3414
3415static int
3416xfs_bmap_compute_alignments(
3417        struct xfs_bmalloca     *ap,
3418        struct xfs_alloc_arg    *args)
3419{
3420        struct xfs_mount        *mp = args->mp;
3421        xfs_extlen_t            align = 0; /* minimum allocation alignment */
3422        int                     stripe_align = 0;
3423
3424        /* stripe alignment for allocation is determined by mount parameters */
3425        if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
3426                stripe_align = mp->m_swidth;
3427        else if (mp->m_dalign)
3428                stripe_align = mp->m_dalign;
3429
3430        if (ap->flags & XFS_BMAPI_COWFORK)
3431                align = xfs_get_cowextsz_hint(ap->ip);
3432        else if (ap->datatype & XFS_ALLOC_USERDATA)
3433                align = xfs_get_extsz_hint(ap->ip);
3434        if (align) {
3435                if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0,
3436                                        ap->eof, 0, ap->conv, &ap->offset,
3437                                        &ap->length))
3438                        ASSERT(0);
3439                ASSERT(ap->length);
3440        }
3441
3442        /* apply extent size hints if obtained earlier */
3443        if (align) {
3444                args->prod = align;
3445                div_u64_rem(ap->offset, args->prod, &args->mod);
3446                if (args->mod)
3447                        args->mod = args->prod - args->mod;
3448        } else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) {
3449                args->prod = 1;
3450                args->mod = 0;
3451        } else {
3452                args->prod = PAGE_SIZE >> mp->m_sb.sb_blocklog;
3453                div_u64_rem(ap->offset, args->prod, &args->mod);
3454                if (args->mod)
3455                        args->mod = args->prod - args->mod;
3456        }
3457
3458        return stripe_align;
3459}
3460
3461static void
3462xfs_bmap_process_allocated_extent(
3463        struct xfs_bmalloca     *ap,
3464        struct xfs_alloc_arg    *args,
3465        xfs_fileoff_t           orig_offset,
3466        xfs_extlen_t            orig_length)
3467{
3468        int                     nullfb;
3469
3470        nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
3471
3472        /*
3473         * check the allocation happened at the same or higher AG than
3474         * the first block that was allocated.
3475         */
3476        ASSERT(nullfb ||
3477                XFS_FSB_TO_AGNO(args->mp, ap->tp->t_firstblock) <=
3478                XFS_FSB_TO_AGNO(args->mp, args->fsbno));
3479
3480        ap->blkno = args->fsbno;
3481        if (nullfb)
3482                ap->tp->t_firstblock = args->fsbno;
3483        ap->length = args->len;
3484        /*
3485         * If the extent size hint is active, we tried to round the
3486         * caller's allocation request offset down to extsz and the
3487         * length up to another extsz boundary.  If we found a free
3488         * extent we mapped it in starting at this new offset.  If the
3489         * newly mapped space isn't long enough to cover any of the
3490         * range of offsets that was originally requested, move the
3491         * mapping up so that we can fill as much of the caller's
3492         * original request as possible.  Free space is apparently
3493         * very fragmented so we're unlikely to be able to satisfy the
3494         * hints anyway.
3495         */
3496        if (ap->length <= orig_length)
3497                ap->offset = orig_offset;
3498        else if (ap->offset + ap->length < orig_offset + orig_length)
3499                ap->offset = orig_offset + orig_length - ap->length;
3500        xfs_bmap_btalloc_accounting(ap, args);
3501}
3502
3503#ifdef DEBUG
3504static int
3505xfs_bmap_exact_minlen_extent_alloc(
3506        struct xfs_bmalloca     *ap)
3507{
3508        struct xfs_mount        *mp = ap->ip->i_mount;
3509        struct xfs_alloc_arg    args = { .tp = ap->tp, .mp = mp };
3510        xfs_fileoff_t           orig_offset;
3511        xfs_extlen_t            orig_length;
3512        int                     error;
3513
3514        ASSERT(ap->length);
3515
3516        if (ap->minlen != 1) {
3517                ap->blkno = NULLFSBLOCK;
3518                ap->length = 0;
3519                return 0;
3520        }
3521
3522        orig_offset = ap->offset;
3523        orig_length = ap->length;
3524
3525        args.alloc_minlen_only = 1;
3526
3527        xfs_bmap_compute_alignments(ap, &args);
3528
3529        if (ap->tp->t_firstblock == NULLFSBLOCK) {
3530                /*
3531                 * Unlike the longest extent available in an AG, we don't track
3532                 * the length of an AG's shortest extent.
3533                 * XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT is a debug only knob and
3534                 * hence we can afford to start traversing from the 0th AG since
3535                 * we need not be concerned about a drop in performance in
3536                 * "debug only" code paths.
3537                 */
3538                ap->blkno = XFS_AGB_TO_FSB(mp, 0, 0);
3539        } else {
3540                ap->blkno = ap->tp->t_firstblock;
3541        }
3542
3543        args.fsbno = ap->blkno;
3544        args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
3545        args.type = XFS_ALLOCTYPE_FIRST_AG;
3546        args.minlen = args.maxlen = ap->minlen;
3547        args.total = ap->total;
3548
3549        args.alignment = 1;
3550        args.minalignslop = 0;
3551
3552        args.minleft = ap->minleft;
3553        args.wasdel = ap->wasdel;
3554        args.resv = XFS_AG_RESV_NONE;
3555        args.datatype = ap->datatype;
3556
3557        error = xfs_alloc_vextent(&args);
3558        if (error)
3559                return error;
3560
3561        if (args.fsbno != NULLFSBLOCK) {
3562                xfs_bmap_process_allocated_extent(ap, &args, orig_offset,
3563                        orig_length);
3564        } else {
3565                ap->blkno = NULLFSBLOCK;
3566                ap->length = 0;
3567        }
3568
3569        return 0;
3570}
3571#else
3572
3573#define xfs_bmap_exact_minlen_extent_alloc(bma) (-EFSCORRUPTED)
3574
3575#endif
3576
3577STATIC int
3578xfs_bmap_btalloc(
3579        struct xfs_bmalloca     *ap)
3580{
3581        struct xfs_mount        *mp = ap->ip->i_mount;
3582        struct xfs_alloc_arg    args = { .tp = ap->tp, .mp = mp };
3583        xfs_alloctype_t         atype = 0;
3584        xfs_agnumber_t          fb_agno;        /* ag number of ap->firstblock */
3585        xfs_agnumber_t          ag;
3586        xfs_fileoff_t           orig_offset;
3587        xfs_extlen_t            orig_length;
3588        xfs_extlen_t            blen;
3589        xfs_extlen_t            nextminlen = 0;
3590        int                     nullfb; /* true if ap->firstblock isn't set */
3591        int                     isaligned;
3592        int                     tryagain;
3593        int                     error;
3594        int                     stripe_align;
3595
3596        ASSERT(ap->length);
3597        orig_offset = ap->offset;
3598        orig_length = ap->length;
3599
3600        stripe_align = xfs_bmap_compute_alignments(ap, &args);
3601
3602        nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
3603        fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
3604                                                        ap->tp->t_firstblock);
3605        if (nullfb) {
3606                if ((ap->datatype & XFS_ALLOC_USERDATA) &&
3607                    xfs_inode_is_filestream(ap->ip)) {
3608                        ag = xfs_filestream_lookup_ag(ap->ip);
3609                        ag = (ag != NULLAGNUMBER) ? ag : 0;
3610                        ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
3611                } else {
3612                        ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
3613                }
3614        } else
3615                ap->blkno = ap->tp->t_firstblock;
3616
3617        xfs_bmap_adjacent(ap);
3618
3619        /*
3620         * If allowed, use ap->blkno; otherwise must use firstblock since
3621         * it's in the right allocation group.
3622         */
3623        if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
3624                ;
3625        else
3626                ap->blkno = ap->tp->t_firstblock;
3627        /*
3628         * Normal allocation, done through xfs_alloc_vextent.
3629         */
3630        tryagain = isaligned = 0;
3631        args.fsbno = ap->blkno;
3632        args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
3633
3634        /* Trim the allocation back to the maximum an AG can fit. */
3635        args.maxlen = min(ap->length, mp->m_ag_max_usable);
3636        blen = 0;
3637        if (nullfb) {
3638                /*
3639                 * Search for an allocation group with a single extent large
3640                 * enough for the request.  If one isn't found, then adjust
3641                 * the minimum allocation size to the largest space found.
3642                 */
3643                if ((ap->datatype & XFS_ALLOC_USERDATA) &&
3644                    xfs_inode_is_filestream(ap->ip))
3645                        error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
3646                else
3647                        error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
3648                if (error)
3649                        return error;
3650        } else if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
3651                if (xfs_inode_is_filestream(ap->ip))
3652                        args.type = XFS_ALLOCTYPE_FIRST_AG;
3653                else
3654                        args.type = XFS_ALLOCTYPE_START_BNO;
3655                args.total = args.minlen = ap->minlen;
3656        } else {
3657                args.type = XFS_ALLOCTYPE_NEAR_BNO;
3658                args.total = ap->total;
3659                args.minlen = ap->minlen;
3660        }
3661
3662        /*
3663         * If we are not low on available data blocks, and the underlying
3664         * logical volume manager is a stripe, and the file offset is zero then
3665         * try to allocate data blocks on stripe unit boundary. NOTE: ap->aeof
3666         * is only set if the allocation length is >= the stripe unit and the
3667         * allocation offset is at the end of file.
3668         */
3669        if (!(ap->tp->t_flags & XFS_TRANS_LOWMODE) && ap->aeof) {
3670                if (!ap->offset) {
3671                        args.alignment = stripe_align;
3672                        atype = args.type;
3673                        isaligned = 1;
3674                        /*
3675                         * Adjust minlen to try and preserve alignment if we
3676                         * can't guarantee an aligned maxlen extent.
3677                         */
3678                        if (blen > args.alignment &&
3679                            blen <= args.maxlen + args.alignment)
3680                                args.minlen = blen - args.alignment;
3681                        args.minalignslop = 0;
3682                } else {
3683                        /*
3684                         * First try an exact bno allocation.
3685                         * If it fails then do a near or start bno
3686                         * allocation with alignment turned on.
3687                         */
3688                        atype = args.type;
3689                        tryagain = 1;
3690                        args.type = XFS_ALLOCTYPE_THIS_BNO;
3691                        args.alignment = 1;
3692                        /*
3693                         * Compute the minlen+alignment for the
3694                         * next case.  Set slop so that the value
3695                         * of minlen+alignment+slop doesn't go up
3696                         * between the calls.
3697                         */
3698                        if (blen > stripe_align && blen <= args.maxlen)
3699                                nextminlen = blen - stripe_align;
3700                        else
3701                                nextminlen = args.minlen;
3702                        if (nextminlen + stripe_align > args.minlen + 1)
3703                                args.minalignslop =
3704                                        nextminlen + stripe_align -
3705                                        args.minlen - 1;
3706                        else
3707                                args.minalignslop = 0;
3708                }
3709        } else {
3710                args.alignment = 1;
3711                args.minalignslop = 0;
3712        }
3713        args.minleft = ap->minleft;
3714        args.wasdel = ap->wasdel;
3715        args.resv = XFS_AG_RESV_NONE;
3716        args.datatype = ap->datatype;
3717
3718        error = xfs_alloc_vextent(&args);
3719        if (error)
3720                return error;
3721
3722        if (tryagain && args.fsbno == NULLFSBLOCK) {
3723                /*
3724                 * Exact allocation failed. Now try with alignment
3725                 * turned on.
3726                 */
3727                args.type = atype;
3728                args.fsbno = ap->blkno;
3729                args.alignment = stripe_align;
3730                args.minlen = nextminlen;
3731                args.minalignslop = 0;
3732                isaligned = 1;
3733                if ((error = xfs_alloc_vextent(&args)))
3734                        return error;
3735        }
3736        if (isaligned && args.fsbno == NULLFSBLOCK) {
3737                /*
3738                 * allocation failed, so turn off alignment and
3739                 * try again.
3740                 */
3741                args.type = atype;
3742                args.fsbno = ap->blkno;
3743                args.alignment = 0;
3744                if ((error = xfs_alloc_vextent(&args)))
3745                        return error;
3746        }
3747        if (args.fsbno == NULLFSBLOCK && nullfb &&
3748            args.minlen > ap->minlen) {
3749                args.minlen = ap->minlen;
3750                args.type = XFS_ALLOCTYPE_START_BNO;
3751                args.fsbno = ap->blkno;
3752                if ((error = xfs_alloc_vextent(&args)))
3753                        return error;
3754        }
3755        if (args.fsbno == NULLFSBLOCK && nullfb) {
3756                args.fsbno = 0;
3757                args.type = XFS_ALLOCTYPE_FIRST_AG;
3758                args.total = ap->minlen;
3759                if ((error = xfs_alloc_vextent(&args)))
3760                        return error;
3761                ap->tp->t_flags |= XFS_TRANS_LOWMODE;
3762        }
3763
3764        if (args.fsbno != NULLFSBLOCK) {
3765                xfs_bmap_process_allocated_extent(ap, &args, orig_offset,
3766                        orig_length);
3767        } else {
3768                ap->blkno = NULLFSBLOCK;
3769                ap->length = 0;
3770        }
3771        return 0;
3772}
3773
3774/* Trim extent to fit a logical block range. */
3775void
3776xfs_trim_extent(
3777        struct xfs_bmbt_irec    *irec,
3778        xfs_fileoff_t           bno,
3779        xfs_filblks_t           len)
3780{
3781        xfs_fileoff_t           distance;
3782        xfs_fileoff_t           end = bno + len;
3783
3784        if (irec->br_startoff + irec->br_blockcount <= bno ||
3785            irec->br_startoff >= end) {
3786                irec->br_blockcount = 0;
3787                return;
3788        }
3789
3790        if (irec->br_startoff < bno) {
3791                distance = bno - irec->br_startoff;
3792                if (isnullstartblock(irec->br_startblock))
3793                        irec->br_startblock = DELAYSTARTBLOCK;
3794                if (irec->br_startblock != DELAYSTARTBLOCK &&
3795                    irec->br_startblock != HOLESTARTBLOCK)
3796                        irec->br_startblock += distance;
3797                irec->br_startoff += distance;
3798                irec->br_blockcount -= distance;
3799        }
3800
3801        if (end < irec->br_startoff + irec->br_blockcount) {
3802                distance = irec->br_startoff + irec->br_blockcount - end;
3803                irec->br_blockcount -= distance;
3804        }
3805}
3806
3807/*
3808 * Trim the returned map to the required bounds
3809 */
3810STATIC void
3811xfs_bmapi_trim_map(
3812        struct xfs_bmbt_irec    *mval,
3813        struct xfs_bmbt_irec    *got,
3814        xfs_fileoff_t           *bno,
3815        xfs_filblks_t           len,
3816        xfs_fileoff_t           obno,
3817        xfs_fileoff_t           end,
3818        int                     n,
3819        int                     flags)
3820{
3821        if ((flags & XFS_BMAPI_ENTIRE) ||
3822            got->br_startoff + got->br_blockcount <= obno) {
3823                *mval = *got;
3824                if (isnullstartblock(got->br_startblock))
3825                        mval->br_startblock = DELAYSTARTBLOCK;
3826                return;
3827        }
3828
3829        if (obno > *bno)
3830                *bno = obno;
3831        ASSERT((*bno >= obno) || (n == 0));
3832        ASSERT(*bno < end);
3833        mval->br_startoff = *bno;
3834        if (isnullstartblock(got->br_startblock))
3835                mval->br_startblock = DELAYSTARTBLOCK;
3836        else
3837                mval->br_startblock = got->br_startblock +
3838                                        (*bno - got->br_startoff);
3839        /*
3840         * Return the minimum of what we got and what we asked for for
3841         * the length.  We can use the len variable here because it is
3842         * modified below and we could have been there before coming
3843         * here if the first part of the allocation didn't overlap what
3844         * was asked for.
3845         */
3846        mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
3847                        got->br_blockcount - (*bno - got->br_startoff));
3848        mval->br_state = got->br_state;
3849        ASSERT(mval->br_blockcount <= len);
3850        return;
3851}
3852
3853/*
3854 * Update and validate the extent map to return
3855 */
3856STATIC void
3857xfs_bmapi_update_map(
3858        struct xfs_bmbt_irec    **map,
3859        xfs_fileoff_t           *bno,
3860        xfs_filblks_t           *len,
3861        xfs_fileoff_t           obno,
3862        xfs_fileoff_t           end,
3863        int                     *n,
3864        int                     flags)
3865{
3866        xfs_bmbt_irec_t *mval = *map;
3867
3868        ASSERT((flags & XFS_BMAPI_ENTIRE) ||
3869               ((mval->br_startoff + mval->br_blockcount) <= end));
3870        ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
3871               (mval->br_startoff < obno));
3872
3873        *bno = mval->br_startoff + mval->br_blockcount;
3874        *len = end - *bno;
3875        if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
3876                /* update previous map with new information */
3877                ASSERT(mval->br_startblock == mval[-1].br_startblock);
3878                ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
3879                ASSERT(mval->br_state == mval[-1].br_state);
3880                mval[-1].br_blockcount = mval->br_blockcount;
3881                mval[-1].br_state = mval->br_state;
3882        } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
3883                   mval[-1].br_startblock != DELAYSTARTBLOCK &&
3884                   mval[-1].br_startblock != HOLESTARTBLOCK &&
3885                   mval->br_startblock == mval[-1].br_startblock +
3886                                          mval[-1].br_blockcount &&
3887                   mval[-1].br_state == mval->br_state) {
3888                ASSERT(mval->br_startoff ==
3889                       mval[-1].br_startoff + mval[-1].br_blockcount);
3890                mval[-1].br_blockcount += mval->br_blockcount;
3891        } else if (*n > 0 &&
3892                   mval->br_startblock == DELAYSTARTBLOCK &&
3893                   mval[-1].br_startblock == DELAYSTARTBLOCK &&
3894                   mval->br_startoff ==
3895                   mval[-1].br_startoff + mval[-1].br_blockcount) {
3896                mval[-1].br_blockcount += mval->br_blockcount;
3897                mval[-1].br_state = mval->br_state;
3898        } else if (!((*n == 0) &&
3899                     ((mval->br_startoff + mval->br_blockcount) <=
3900                      obno))) {
3901                mval++;
3902                (*n)++;
3903        }
3904        *map = mval;
3905}
3906
3907/*
3908 * Map file blocks to filesystem blocks without allocation.
3909 */
3910int
3911xfs_bmapi_read(
3912        struct xfs_inode        *ip,
3913        xfs_fileoff_t           bno,
3914        xfs_filblks_t           len,
3915        struct xfs_bmbt_irec    *mval,
3916        int                     *nmap,
3917        int                     flags)
3918{
3919        struct xfs_mount        *mp = ip->i_mount;
3920        int                     whichfork = xfs_bmapi_whichfork(flags);
3921        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
3922        struct xfs_bmbt_irec    got;
3923        xfs_fileoff_t           obno;
3924        xfs_fileoff_t           end;
3925        struct xfs_iext_cursor  icur;
3926        int                     error;
3927        bool                    eof = false;
3928        int                     n = 0;
3929
3930        ASSERT(*nmap >= 1);
3931        ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_ENTIRE)));
3932        ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
3933
3934        if (WARN_ON_ONCE(!ifp))
3935                return -EFSCORRUPTED;
3936
3937        if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
3938            XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT))
3939                return -EFSCORRUPTED;
3940
3941        if (XFS_FORCED_SHUTDOWN(mp))
3942                return -EIO;
3943
3944        XFS_STATS_INC(mp, xs_blk_mapr);
3945
3946        error = xfs_iread_extents(NULL, ip, whichfork);
3947        if (error)
3948                return error;
3949
3950        if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got))
3951                eof = true;
3952        end = bno + len;
3953        obno = bno;
3954
3955        while (bno < end && n < *nmap) {
3956                /* Reading past eof, act as though there's a hole up to end. */
3957                if (eof)
3958                        got.br_startoff = end;
3959                if (got.br_startoff > bno) {
3960                        /* Reading in a hole.  */
3961                        mval->br_startoff = bno;
3962                        mval->br_startblock = HOLESTARTBLOCK;
3963                        mval->br_blockcount =
3964                                XFS_FILBLKS_MIN(len, got.br_startoff - bno);
3965                        mval->br_state = XFS_EXT_NORM;
3966                        bno += mval->br_blockcount;
3967                        len -= mval->br_blockcount;
3968                        mval++;
3969                        n++;
3970                        continue;
3971                }
3972
3973                /* set up the extent map to return. */
3974                xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
3975                xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
3976
3977                /* If we're done, stop now. */
3978                if (bno >= end || n >= *nmap)
3979                        break;
3980
3981                /* Else go on to the next record. */
3982                if (!xfs_iext_next_extent(ifp, &icur, &got))
3983                        eof = true;
3984        }
3985        *nmap = n;
3986        return 0;
3987}
3988
3989/*
3990 * Add a delayed allocation extent to an inode. Blocks are reserved from the
3991 * global pool and the extent inserted into the inode in-core extent tree.
3992 *
3993 * On entry, got refers to the first extent beyond the offset of the extent to
3994 * allocate or eof is specified if no such extent exists. On return, got refers
3995 * to the extent record that was inserted to the inode fork.
3996 *
3997 * Note that the allocated extent may have been merged with contiguous extents
3998 * during insertion into the inode fork. Thus, got does not reflect the current
3999 * state of the inode fork on return. If necessary, the caller can use lastx to
4000 * look up the updated record in the inode fork.
4001 */
4002int
4003xfs_bmapi_reserve_delalloc(
4004        struct xfs_inode        *ip,
4005        int                     whichfork,
4006        xfs_fileoff_t           off,
4007        xfs_filblks_t           len,
4008        xfs_filblks_t           prealloc,
4009        struct xfs_bmbt_irec    *got,
4010        struct xfs_iext_cursor  *icur,
4011        int                     eof)
4012{
4013        struct xfs_mount        *mp = ip->i_mount;
4014        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
4015        xfs_extlen_t            alen;
4016        xfs_extlen_t            indlen;
4017        int                     error;
4018        xfs_fileoff_t           aoff = off;
4019
4020        /*
4021         * Cap the alloc length. Keep track of prealloc so we know whether to
4022         * tag the inode before we return.
4023         */
4024        alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN);
4025        if (!eof)
4026                alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
4027        if (prealloc && alen >= len)
4028                prealloc = alen - len;
4029
4030        /* Figure out the extent size, adjust alen */
4031        if (whichfork == XFS_COW_FORK) {
4032                struct xfs_bmbt_irec    prev;
4033                xfs_extlen_t            extsz = xfs_get_cowextsz_hint(ip);
4034
4035                if (!xfs_iext_peek_prev_extent(ifp, icur, &prev))
4036                        prev.br_startoff = NULLFILEOFF;
4037
4038                error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof,
4039                                               1, 0, &aoff, &alen);
4040                ASSERT(!error);
4041        }
4042
4043        /*
4044         * Make a transaction-less quota reservation for delayed allocation
4045         * blocks.  This number gets adjusted later.  We return if we haven't
4046         * allocated blocks already inside this loop.
4047         */
4048        error = xfs_quota_reserve_blkres(ip, alen);
4049        if (error)
4050                return error;
4051
4052        /*
4053         * Split changing sb for alen and indlen since they could be coming
4054         * from different places.
4055         */
4056        indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
4057        ASSERT(indlen > 0);
4058
4059        error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
4060        if (error)
4061                goto out_unreserve_quota;
4062
4063        error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
4064        if (error)
4065                goto out_unreserve_blocks;
4066
4067
4068        ip->i_delayed_blks += alen;
4069        xfs_mod_delalloc(ip->i_mount, alen + indlen);
4070
4071        got->br_startoff = aoff;
4072        got->br_startblock = nullstartblock(indlen);
4073        got->br_blockcount = alen;
4074        got->br_state = XFS_EXT_NORM;
4075
4076        xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got);
4077
4078        /*
4079         * Tag the inode if blocks were preallocated. Note that COW fork
4080         * preallocation can occur at the start or end of the extent, even when
4081         * prealloc == 0, so we must also check the aligned offset and length.
4082         */
4083        if (whichfork == XFS_DATA_FORK && prealloc)
4084                xfs_inode_set_eofblocks_tag(ip);
4085        if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
4086                xfs_inode_set_cowblocks_tag(ip);
4087
4088        return 0;
4089
4090out_unreserve_blocks:
4091        xfs_mod_fdblocks(mp, alen, false);
4092out_unreserve_quota:
4093        if (XFS_IS_QUOTA_ON(mp))
4094                xfs_quota_unreserve_blkres(ip, alen);
4095        return error;
4096}
4097
4098static int
4099xfs_bmap_alloc_userdata(
4100        struct xfs_bmalloca     *bma)
4101{
4102        struct xfs_mount        *mp = bma->ip->i_mount;
4103        int                     whichfork = xfs_bmapi_whichfork(bma->flags);
4104        int                     error;
4105
4106        /*
4107         * Set the data type being allocated. For the data fork, the first data
4108         * in the file is treated differently to all other allocations. For the
4109         * attribute fork, we only need to ensure the allocated range is not on
4110         * the busy list.
4111         */
4112        bma->datatype = XFS_ALLOC_NOBUSY;
4113        if (whichfork == XFS_DATA_FORK) {
4114                bma->datatype |= XFS_ALLOC_USERDATA;
4115                if (bma->offset == 0)
4116                        bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
4117
4118                if (mp->m_dalign && bma->length >= mp->m_dalign) {
4119                        error = xfs_bmap_isaeof(bma, whichfork);
4120                        if (error)
4121                                return error;
4122                }
4123
4124                if (XFS_IS_REALTIME_INODE(bma->ip))
4125                        return xfs_bmap_rtalloc(bma);
4126        }
4127
4128        if (unlikely(XFS_TEST_ERROR(false, mp,
4129                        XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
4130                return xfs_bmap_exact_minlen_extent_alloc(bma);
4131
4132        return xfs_bmap_btalloc(bma);
4133}
4134
4135static int
4136xfs_bmapi_allocate(
4137        struct xfs_bmalloca     *bma)
4138{
4139        struct xfs_mount        *mp = bma->ip->i_mount;
4140        int                     whichfork = xfs_bmapi_whichfork(bma->flags);
4141        struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4142        int                     tmp_logflags = 0;
4143        int                     error;
4144
4145        ASSERT(bma->length > 0);
4146
4147        /*
4148         * For the wasdelay case, we could also just allocate the stuff asked
4149         * for in this bmap call but that wouldn't be as good.
4150         */
4151        if (bma->wasdel) {
4152                bma->length = (xfs_extlen_t)bma->got.br_blockcount;
4153                bma->offset = bma->got.br_startoff;
4154                if (!xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev))
4155                        bma->prev.br_startoff = NULLFILEOFF;
4156        } else {
4157                bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
4158                if (!bma->eof)
4159                        bma->length = XFS_FILBLKS_MIN(bma->length,
4160                                        bma->got.br_startoff - bma->offset);
4161        }
4162
4163        if (bma->flags & XFS_BMAPI_CONTIG)
4164                bma->minlen = bma->length;
4165        else
4166                bma->minlen = 1;
4167
4168        if (bma->flags & XFS_BMAPI_METADATA) {
4169                if (unlikely(XFS_TEST_ERROR(false, mp,
4170                                XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
4171                        error = xfs_bmap_exact_minlen_extent_alloc(bma);
4172                else
4173                        error = xfs_bmap_btalloc(bma);
4174        } else {
4175                error = xfs_bmap_alloc_userdata(bma);
4176        }
4177        if (error || bma->blkno == NULLFSBLOCK)
4178                return error;
4179
4180        if (bma->flags & XFS_BMAPI_ZERO) {
4181                error = xfs_zero_extent(bma->ip, bma->blkno, bma->length);
4182                if (error)
4183                        return error;
4184        }
4185
4186        if (ifp->if_format == XFS_DINODE_FMT_BTREE && !bma->cur)
4187                bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
4188        /*
4189         * Bump the number of extents we've allocated
4190         * in this call.
4191         */
4192        bma->nallocs++;
4193
4194        if (bma->cur)
4195                bma->cur->bc_ino.flags =
4196                        bma->wasdel ? XFS_BTCUR_BMBT_WASDEL : 0;
4197
4198        bma->got.br_startoff = bma->offset;
4199        bma->got.br_startblock = bma->blkno;
4200        bma->got.br_blockcount = bma->length;
4201        bma->got.br_state = XFS_EXT_NORM;
4202
4203        if (bma->flags & XFS_BMAPI_PREALLOC)
4204                bma->got.br_state = XFS_EXT_UNWRITTEN;
4205
4206        if (bma->wasdel)
4207                error = xfs_bmap_add_extent_delay_real(bma, whichfork);
4208        else
4209                error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
4210                                whichfork, &bma->icur, &bma->cur, &bma->got,
4211                                &bma->logflags, bma->flags);
4212
4213        bma->logflags |= tmp_logflags;
4214        if (error)
4215                return error;
4216
4217        /*
4218         * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
4219         * or xfs_bmap_add_extent_hole_real might have merged it into one of
4220         * the neighbouring ones.
4221         */
4222        xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4223
4224        ASSERT(bma->got.br_startoff <= bma->offset);
4225        ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
4226               bma->offset + bma->length);
4227        ASSERT(bma->got.br_state == XFS_EXT_NORM ||
4228               bma->got.br_state == XFS_EXT_UNWRITTEN);
4229        return 0;
4230}
4231
4232STATIC int
4233xfs_bmapi_convert_unwritten(
4234        struct xfs_bmalloca     *bma,
4235        struct xfs_bmbt_irec    *mval,
4236        xfs_filblks_t           len,
4237        int                     flags)
4238{
4239        int                     whichfork = xfs_bmapi_whichfork(flags);
4240        struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4241        int                     tmp_logflags = 0;
4242        int                     error;
4243
4244        /* check if we need to do unwritten->real conversion */
4245        if (mval->br_state == XFS_EXT_UNWRITTEN &&
4246            (flags & XFS_BMAPI_PREALLOC))
4247                return 0;
4248
4249        /* check if we need to do real->unwritten conversion */
4250        if (mval->br_state == XFS_EXT_NORM &&
4251            (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
4252                        (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
4253                return 0;
4254
4255        /*
4256         * Modify (by adding) the state flag, if writing.
4257         */
4258        ASSERT(mval->br_blockcount <= len);
4259        if (ifp->if_format == XFS_DINODE_FMT_BTREE && !bma->cur) {
4260                bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
4261                                        bma->ip, whichfork);
4262        }
4263        mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4264                                ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
4265
4266        /*
4267         * Before insertion into the bmbt, zero the range being converted
4268         * if required.
4269         */
4270        if (flags & XFS_BMAPI_ZERO) {
4271                error = xfs_zero_extent(bma->ip, mval->br_startblock,
4272                                        mval->br_blockcount);
4273                if (error)
4274                        return error;
4275        }
4276
4277        error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
4278                        &bma->icur, &bma->cur, mval, &tmp_logflags);
4279        /*
4280         * Log the inode core unconditionally in the unwritten extent conversion
4281         * path because the conversion might not have done so (e.g., if the
4282         * extent count hasn't changed). We need to make sure the inode is dirty
4283         * in the transaction for the sake of fsync(), even if nothing has
4284         * changed, because fsync() will not force the log for this transaction
4285         * unless it sees the inode pinned.
4286         *
4287         * Note: If we're only converting cow fork extents, there aren't
4288         * any on-disk updates to make, so we don't need to log anything.
4289         */
4290        if (whichfork != XFS_COW_FORK)
4291                bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
4292        if (error)
4293                return error;
4294
4295        /*
4296         * Update our extent pointer, given that
4297         * xfs_bmap_add_extent_unwritten_real might have merged it into one
4298         * of the neighbouring ones.
4299         */
4300        xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4301
4302        /*
4303         * We may have combined previously unwritten space with written space,
4304         * so generate another request.
4305         */
4306        if (mval->br_blockcount < len)
4307                return -EAGAIN;
4308        return 0;
4309}
4310
4311static inline xfs_extlen_t
4312xfs_bmapi_minleft(
4313        struct xfs_trans        *tp,
4314        struct xfs_inode        *ip,
4315        int                     fork)
4316{
4317        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, fork);
4318
4319        if (tp && tp->t_firstblock != NULLFSBLOCK)
4320                return 0;
4321        if (ifp->if_format != XFS_DINODE_FMT_BTREE)
4322                return 1;
4323        return be16_to_cpu(ifp->if_broot->bb_level) + 1;
4324}
4325
4326/*
4327 * Log whatever the flags say, even if error.  Otherwise we might miss detecting
4328 * a case where the data is changed, there's an error, and it's not logged so we
4329 * don't shutdown when we should.  Don't bother logging extents/btree changes if
4330 * we converted to the other format.
4331 */
4332static void
4333xfs_bmapi_finish(
4334        struct xfs_bmalloca     *bma,
4335        int                     whichfork,
4336        int                     error)
4337{
4338        struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4339
4340        if ((bma->logflags & xfs_ilog_fext(whichfork)) &&
4341            ifp->if_format != XFS_DINODE_FMT_EXTENTS)
4342                bma->logflags &= ~xfs_ilog_fext(whichfork);
4343        else if ((bma->logflags & xfs_ilog_fbroot(whichfork)) &&
4344                 ifp->if_format != XFS_DINODE_FMT_BTREE)
4345                bma->logflags &= ~xfs_ilog_fbroot(whichfork);
4346
4347        if (bma->logflags)
4348                xfs_trans_log_inode(bma->tp, bma->ip, bma->logflags);
4349        if (bma->cur)
4350                xfs_btree_del_cursor(bma->cur, error);
4351}
4352
4353/*
4354 * Map file blocks to filesystem blocks, and allocate blocks or convert the
4355 * extent state if necessary.  Details behaviour is controlled by the flags
4356 * parameter.  Only allocates blocks from a single allocation group, to avoid
4357 * locking problems.
4358 */
4359int
4360xfs_bmapi_write(
4361        struct xfs_trans        *tp,            /* transaction pointer */
4362        struct xfs_inode        *ip,            /* incore inode */
4363        xfs_fileoff_t           bno,            /* starting file offs. mapped */
4364        xfs_filblks_t           len,            /* length to map in file */
4365        int                     flags,          /* XFS_BMAPI_... */
4366        xfs_extlen_t            total,          /* total blocks needed */
4367        struct xfs_bmbt_irec    *mval,          /* output: map values */
4368        int                     *nmap)          /* i/o: mval size/count */
4369{
4370        struct xfs_bmalloca     bma = {
4371                .tp             = tp,
4372                .ip             = ip,
4373                .total          = total,
4374        };
4375        struct xfs_mount        *mp = ip->i_mount;
4376        int                     whichfork = xfs_bmapi_whichfork(flags);
4377        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
4378        xfs_fileoff_t           end;            /* end of mapped file region */
4379        bool                    eof = false;    /* after the end of extents */
4380        int                     error;          /* error return */
4381        int                     n;              /* current extent index */
4382        xfs_fileoff_t           obno;           /* old block number (offset) */
4383
4384#ifdef DEBUG
4385        xfs_fileoff_t           orig_bno;       /* original block number value */
4386        int                     orig_flags;     /* original flags arg value */
4387        xfs_filblks_t           orig_len;       /* original value of len arg */
4388        struct xfs_bmbt_irec    *orig_mval;     /* original value of mval */
4389        int                     orig_nmap;      /* original value of *nmap */
4390
4391        orig_bno = bno;
4392        orig_len = len;
4393        orig_flags = flags;
4394        orig_mval = mval;
4395        orig_nmap = *nmap;
4396#endif
4397
4398        ASSERT(*nmap >= 1);
4399        ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4400        ASSERT(tp != NULL);
4401        ASSERT(len > 0);
4402        ASSERT(ifp->if_format != XFS_DINODE_FMT_LOCAL);
4403        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4404        ASSERT(!(flags & XFS_BMAPI_REMAP));
4405
4406        /* zeroing is for currently only for data extents, not metadata */
4407        ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
4408                        (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
4409        /*
4410         * we can allocate unwritten extents or pre-zero allocated blocks,
4411         * but it makes no sense to do both at once. This would result in
4412         * zeroing the unwritten extent twice, but it still being an
4413         * unwritten extent....
4414         */
4415        ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
4416                        (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
4417
4418        if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
4419            XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
4420                return -EFSCORRUPTED;
4421        }
4422
4423        if (XFS_FORCED_SHUTDOWN(mp))
4424                return -EIO;
4425
4426        XFS_STATS_INC(mp, xs_blk_mapw);
4427
4428        error = xfs_iread_extents(tp, ip, whichfork);
4429        if (error)
4430                goto error0;
4431
4432        if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.icur, &bma.got))
4433                eof = true;
4434        if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4435                bma.prev.br_startoff = NULLFILEOFF;
4436        bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
4437
4438        n = 0;
4439        end = bno + len;
4440        obno = bno;
4441        while (bno < end && n < *nmap) {
4442                bool                    need_alloc = false, wasdelay = false;
4443
4444                /* in hole or beyond EOF? */
4445                if (eof || bma.got.br_startoff > bno) {
4446                        /*
4447                         * CoW fork conversions should /never/ hit EOF or
4448                         * holes.  There should always be something for us
4449                         * to work on.
4450                         */
4451                        ASSERT(!((flags & XFS_BMAPI_CONVERT) &&
4452                                 (flags & XFS_BMAPI_COWFORK)));
4453
4454                        need_alloc = true;
4455                } else if (isnullstartblock(bma.got.br_startblock)) {
4456                        wasdelay = true;
4457                }
4458
4459                /*
4460                 * First, deal with the hole before the allocated space
4461                 * that we found, if any.
4462                 */
4463                if (need_alloc || wasdelay) {
4464                        bma.eof = eof;
4465                        bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4466                        bma.wasdel = wasdelay;
4467                        bma.offset = bno;
4468                        bma.flags = flags;
4469
4470                        /*
4471                         * There's a 32/64 bit type mismatch between the
4472                         * allocation length request (which can be 64 bits in
4473                         * length) and the bma length request, which is
4474                         * xfs_extlen_t and therefore 32 bits. Hence we have to
4475                         * check for 32-bit overflows and handle them here.
4476                         */
4477                        if (len > (xfs_filblks_t)MAXEXTLEN)
4478                                bma.length = MAXEXTLEN;
4479                        else
4480                                bma.length = len;
4481
4482                        ASSERT(len > 0);
4483                        ASSERT(bma.length > 0);
4484                        error = xfs_bmapi_allocate(&bma);
4485                        if (error)
4486                                goto error0;
4487                        if (bma.blkno == NULLFSBLOCK)
4488                                break;
4489
4490                        /*
4491                         * If this is a CoW allocation, record the data in
4492                         * the refcount btree for orphan recovery.
4493                         */
4494                        if (whichfork == XFS_COW_FORK)
4495                                xfs_refcount_alloc_cow_extent(tp, bma.blkno,
4496                                                bma.length);
4497                }
4498
4499                /* Deal with the allocated space we found.  */
4500                xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
4501                                                        end, n, flags);
4502
4503                /* Execute unwritten extent conversion if necessary */
4504                error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
4505                if (error == -EAGAIN)
4506                        continue;
4507                if (error)
4508                        goto error0;
4509
4510                /* update the extent map to return */
4511                xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4512
4513                /*
4514                 * If we're done, stop now.  Stop when we've allocated
4515                 * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
4516                 * the transaction may get too big.
4517                 */
4518                if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
4519                        break;
4520
4521                /* Else go on to the next record. */
4522                bma.prev = bma.got;
4523                if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got))
4524                        eof = true;
4525        }
4526        *nmap = n;
4527
4528        error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
4529                        whichfork);
4530        if (error)
4531                goto error0;
4532
4533        ASSERT(ifp->if_format != XFS_DINODE_FMT_BTREE ||
4534               ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork));
4535        xfs_bmapi_finish(&bma, whichfork, 0);
4536        xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
4537                orig_nmap, *nmap);
4538        return 0;
4539error0:
4540        xfs_bmapi_finish(&bma, whichfork, error);
4541        return error;
4542}
4543
4544/*
4545 * Convert an existing delalloc extent to real blocks based on file offset. This
4546 * attempts to allocate the entire delalloc extent and may require multiple
4547 * invocations to allocate the target offset if a large enough physical extent
4548 * is not available.
4549 */
4550int
4551xfs_bmapi_convert_delalloc(
4552        struct xfs_inode        *ip,
4553        int                     whichfork,
4554        xfs_off_t               offset,
4555        struct iomap            *iomap,
4556        unsigned int            *seq)
4557{
4558        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
4559        struct xfs_mount        *mp = ip->i_mount;
4560        xfs_fileoff_t           offset_fsb = XFS_B_TO_FSBT(mp, offset);
4561        struct xfs_bmalloca     bma = { NULL };
4562        uint16_t                flags = 0;
4563        struct xfs_trans        *tp;
4564        int                     error;
4565
4566        if (whichfork == XFS_COW_FORK)
4567                flags |= IOMAP_F_SHARED;
4568
4569        /*
4570         * Space for the extent and indirect blocks was reserved when the
4571         * delalloc extent was created so there's no need to do so here.
4572         */
4573        error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
4574                                XFS_TRANS_RESERVE, &tp);
4575        if (error)
4576                return error;
4577
4578        xfs_ilock(ip, XFS_ILOCK_EXCL);
4579
4580        error = xfs_iext_count_may_overflow(ip, whichfork,
4581                        XFS_IEXT_ADD_NOSPLIT_CNT);
4582        if (error)
4583                goto out_trans_cancel;
4584
4585        xfs_trans_ijoin(tp, ip, 0);
4586
4587        if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) ||
4588            bma.got.br_startoff > offset_fsb) {
4589                /*
4590                 * No extent found in the range we are trying to convert.  This
4591                 * should only happen for the COW fork, where another thread
4592                 * might have moved the extent to the data fork in the meantime.
4593                 */
4594                WARN_ON_ONCE(whichfork != XFS_COW_FORK);
4595                error = -EAGAIN;
4596                goto out_trans_cancel;
4597        }
4598
4599        /*
4600         * If we find a real extent here we raced with another thread converting
4601         * the extent.  Just return the real extent at this offset.
4602         */
4603        if (!isnullstartblock(bma.got.br_startblock)) {
4604                xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
4605                *seq = READ_ONCE(ifp->if_seq);
4606                goto out_trans_cancel;
4607        }
4608
4609        bma.tp = tp;
4610        bma.ip = ip;
4611        bma.wasdel = true;
4612        bma.offset = bma.got.br_startoff;
4613        bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount, MAXEXTLEN);
4614        bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
4615
4616        /*
4617         * When we're converting the delalloc reservations backing dirty pages
4618         * in the page cache, we must be careful about how we create the new
4619         * extents:
4620         *
4621         * New CoW fork extents are created unwritten, turned into real extents
4622         * when we're about to write the data to disk, and mapped into the data
4623         * fork after the write finishes.  End of story.
4624         *
4625         * New data fork extents must be mapped in as unwritten and converted
4626         * to real extents after the write succeeds to avoid exposing stale
4627         * disk contents if we crash.
4628         */
4629        bma.flags = XFS_BMAPI_PREALLOC;
4630        if (whichfork == XFS_COW_FORK)
4631                bma.flags |= XFS_BMAPI_COWFORK;
4632
4633        if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4634                bma.prev.br_startoff = NULLFILEOFF;
4635
4636        error = xfs_bmapi_allocate(&bma);
4637        if (error)
4638                goto out_finish;
4639
4640        error = -ENOSPC;
4641        if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK))
4642                goto out_finish;
4643        error = -EFSCORRUPTED;
4644        if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock)))
4645                goto out_finish;
4646
4647        XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length));
4648        XFS_STATS_INC(mp, xs_xstrat_quick);
4649
4650        ASSERT(!isnullstartblock(bma.got.br_startblock));
4651        xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
4652        *seq = READ_ONCE(ifp->if_seq);
4653
4654        if (whichfork == XFS_COW_FORK)
4655                xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length);
4656
4657        error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
4658                        whichfork);
4659        if (error)
4660                goto out_finish;
4661
4662        xfs_bmapi_finish(&bma, whichfork, 0);
4663        error = xfs_trans_commit(tp);
4664        xfs_iunlock(ip, XFS_ILOCK_EXCL);
4665        return error;
4666
4667out_finish:
4668        xfs_bmapi_finish(&bma, whichfork, error);
4669out_trans_cancel:
4670        xfs_trans_cancel(tp);
4671        xfs_iunlock(ip, XFS_ILOCK_EXCL);
4672        return error;
4673}
4674
4675int
4676xfs_bmapi_remap(
4677        struct xfs_trans        *tp,
4678        struct xfs_inode        *ip,
4679        xfs_fileoff_t           bno,
4680        xfs_filblks_t           len,
4681        xfs_fsblock_t           startblock,
4682        int                     flags)
4683{
4684        struct xfs_mount        *mp = ip->i_mount;
4685        struct xfs_ifork        *ifp;
4686        struct xfs_btree_cur    *cur = NULL;
4687        struct xfs_bmbt_irec    got;
4688        struct xfs_iext_cursor  icur;
4689        int                     whichfork = xfs_bmapi_whichfork(flags);
4690        int                     logflags = 0, error;
4691
4692        ifp = XFS_IFORK_PTR(ip, whichfork);
4693        ASSERT(len > 0);
4694        ASSERT(len <= (xfs_filblks_t)MAXEXTLEN);
4695        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4696        ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC |
4697                           XFS_BMAPI_NORMAP)));
4698        ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) !=
4699                        (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC));
4700
4701        if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
4702            XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
4703                return -EFSCORRUPTED;
4704        }
4705
4706        if (XFS_FORCED_SHUTDOWN(mp))
4707                return -EIO;
4708
4709        error = xfs_iread_extents(tp, ip, whichfork);
4710        if (error)
4711                return error;
4712
4713        if (xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) {
4714                /* make sure we only reflink into a hole. */
4715                ASSERT(got.br_startoff > bno);
4716                ASSERT(got.br_startoff - bno >= len);
4717        }
4718
4719        ip->i_nblocks += len;
4720        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
4721
4722        if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
4723                cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
4724                cur->bc_ino.flags = 0;
4725        }
4726
4727        got.br_startoff = bno;
4728        got.br_startblock = startblock;
4729        got.br_blockcount = len;
4730        if (flags & XFS_BMAPI_PREALLOC)
4731                got.br_state = XFS_EXT_UNWRITTEN;
4732        else
4733                got.br_state = XFS_EXT_NORM;
4734
4735        error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur,
4736                        &cur, &got, &logflags, flags);
4737        if (error)
4738                goto error0;
4739
4740        error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags, whichfork);
4741
4742error0:
4743        if (ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS)
4744                logflags &= ~XFS_ILOG_DEXT;
4745        else if (ip->i_df.if_format != XFS_DINODE_FMT_BTREE)
4746                logflags &= ~XFS_ILOG_DBROOT;
4747
4748        if (logflags)
4749                xfs_trans_log_inode(tp, ip, logflags);
4750        if (cur)
4751                xfs_btree_del_cursor(cur, error);
4752        return error;
4753}
4754
4755/*
4756 * When a delalloc extent is split (e.g., due to a hole punch), the original
4757 * indlen reservation must be shared across the two new extents that are left
4758 * behind.
4759 *
4760 * Given the original reservation and the worst case indlen for the two new
4761 * extents (as calculated by xfs_bmap_worst_indlen()), split the original
4762 * reservation fairly across the two new extents. If necessary, steal available
4763 * blocks from a deleted extent to make up a reservation deficiency (e.g., if
4764 * ores == 1). The number of stolen blocks is returned. The availability and
4765 * subsequent accounting of stolen blocks is the responsibility of the caller.
4766 */
4767static xfs_filblks_t
4768xfs_bmap_split_indlen(
4769        xfs_filblks_t                   ores,           /* original res. */
4770        xfs_filblks_t                   *indlen1,       /* ext1 worst indlen */
4771        xfs_filblks_t                   *indlen2,       /* ext2 worst indlen */
4772        xfs_filblks_t                   avail)          /* stealable blocks */
4773{
4774        xfs_filblks_t                   len1 = *indlen1;
4775        xfs_filblks_t                   len2 = *indlen2;
4776        xfs_filblks_t                   nres = len1 + len2; /* new total res. */
4777        xfs_filblks_t                   stolen = 0;
4778        xfs_filblks_t                   resfactor;
4779
4780        /*
4781         * Steal as many blocks as we can to try and satisfy the worst case
4782         * indlen for both new extents.
4783         */
4784        if (ores < nres && avail)
4785                stolen = XFS_FILBLKS_MIN(nres - ores, avail);
4786        ores += stolen;
4787
4788         /* nothing else to do if we've satisfied the new reservation */
4789        if (ores >= nres)
4790                return stolen;
4791
4792        /*
4793         * We can't meet the total required reservation for the two extents.
4794         * Calculate the percent of the overall shortage between both extents
4795         * and apply this percentage to each of the requested indlen values.
4796         * This distributes the shortage fairly and reduces the chances that one
4797         * of the two extents is left with nothing when extents are repeatedly
4798         * split.
4799         */
4800        resfactor = (ores * 100);
4801        do_div(resfactor, nres);
4802        len1 *= resfactor;
4803        do_div(len1, 100);
4804        len2 *= resfactor;
4805        do_div(len2, 100);
4806        ASSERT(len1 + len2 <= ores);
4807        ASSERT(len1 < *indlen1 && len2 < *indlen2);
4808
4809        /*
4810         * Hand out the remainder to each extent. If one of the two reservations
4811         * is zero, we want to make sure that one gets a block first. The loop
4812         * below starts with len1, so hand len2 a block right off the bat if it
4813         * is zero.
4814         */
4815        ores -= (len1 + len2);
4816        ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores);
4817        if (ores && !len2 && *indlen2) {
4818                len2++;
4819                ores--;
4820        }
4821        while (ores) {
4822                if (len1 < *indlen1) {
4823                        len1++;
4824                        ores--;
4825                }
4826                if (!ores)
4827                        break;
4828                if (len2 < *indlen2) {
4829                        len2++;
4830                        ores--;
4831                }
4832        }
4833
4834        *indlen1 = len1;
4835        *indlen2 = len2;
4836
4837        return stolen;
4838}
4839
4840int
4841xfs_bmap_del_extent_delay(
4842        struct xfs_inode        *ip,
4843        int                     whichfork,
4844        struct xfs_iext_cursor  *icur,
4845        struct xfs_bmbt_irec    *got,
4846        struct xfs_bmbt_irec    *del)
4847{
4848        struct xfs_mount        *mp = ip->i_mount;
4849        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
4850        struct xfs_bmbt_irec    new;
4851        int64_t                 da_old, da_new, da_diff = 0;
4852        xfs_fileoff_t           del_endoff, got_endoff;
4853        xfs_filblks_t           got_indlen, new_indlen, stolen;
4854        int                     state = xfs_bmap_fork_to_state(whichfork);
4855        int                     error = 0;
4856        bool                    isrt;
4857
4858        XFS_STATS_INC(mp, xs_del_exlist);
4859
4860        isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
4861        del_endoff = del->br_startoff + del->br_blockcount;
4862        got_endoff = got->br_startoff + got->br_blockcount;
4863        da_old = startblockval(got->br_startblock);
4864        da_new = 0;
4865
4866        ASSERT(del->br_blockcount > 0);
4867        ASSERT(got->br_startoff <= del->br_startoff);
4868        ASSERT(got_endoff >= del_endoff);
4869
4870        if (isrt) {
4871                uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
4872
4873                do_div(rtexts, mp->m_sb.sb_rextsize);
4874                xfs_mod_frextents(mp, rtexts);
4875        }
4876
4877        /*
4878         * Update the inode delalloc counter now and wait to update the
4879         * sb counters as we might have to borrow some blocks for the
4880         * indirect block accounting.
4881         */
4882        ASSERT(!isrt);
4883        error = xfs_quota_unreserve_blkres(ip, del->br_blockcount);
4884        if (error)
4885                return error;
4886        ip->i_delayed_blks -= del->br_blockcount;
4887
4888        if (got->br_startoff == del->br_startoff)
4889                state |= BMAP_LEFT_FILLING;
4890        if (got_endoff == del_endoff)
4891                state |= BMAP_RIGHT_FILLING;
4892
4893        switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4894        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4895                /*
4896                 * Matches the whole extent.  Delete the entry.
4897                 */
4898                xfs_iext_remove(ip, icur, state);
4899                xfs_iext_prev(ifp, icur);
4900                break;
4901        case BMAP_LEFT_FILLING:
4902                /*
4903                 * Deleting the first part of the extent.
4904                 */
4905                got->br_startoff = del_endoff;
4906                got->br_blockcount -= del->br_blockcount;
4907                da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4908                                got->br_blockcount), da_old);
4909                got->br_startblock = nullstartblock((int)da_new);
4910                xfs_iext_update_extent(ip, state, icur, got);
4911                break;
4912        case BMAP_RIGHT_FILLING:
4913                /*
4914                 * Deleting the last part of the extent.
4915                 */
4916                got->br_blockcount = got->br_blockcount - del->br_blockcount;
4917                da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4918                                got->br_blockcount), da_old);
4919                got->br_startblock = nullstartblock((int)da_new);
4920                xfs_iext_update_extent(ip, state, icur, got);
4921                break;
4922        case 0:
4923                /*
4924                 * Deleting the middle of the extent.
4925                 *
4926                 * Distribute the original indlen reservation across the two new
4927                 * extents.  Steal blocks from the deleted extent if necessary.
4928                 * Stealing blocks simply fudges the fdblocks accounting below.
4929                 * Warn if either of the new indlen reservations is zero as this
4930                 * can lead to delalloc problems.
4931                 */
4932                got->br_blockcount = del->br_startoff - got->br_startoff;
4933                got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
4934
4935                new.br_blockcount = got_endoff - del_endoff;
4936                new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
4937
4938                WARN_ON_ONCE(!got_indlen || !new_indlen);
4939                stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
4940                                                       del->br_blockcount);
4941
4942                got->br_startblock = nullstartblock((int)got_indlen);
4943
4944                new.br_startoff = del_endoff;
4945                new.br_state = got->br_state;
4946                new.br_startblock = nullstartblock((int)new_indlen);
4947
4948                xfs_iext_update_extent(ip, state, icur, got);
4949                xfs_iext_next(ifp, icur);
4950                xfs_iext_insert(ip, icur, &new, state);
4951
4952                da_new = got_indlen + new_indlen - stolen;
4953                del->br_blockcount -= stolen;
4954                break;
4955        }
4956
4957        ASSERT(da_old >= da_new);
4958        da_diff = da_old - da_new;
4959        if (!isrt)
4960                da_diff += del->br_blockcount;
4961        if (da_diff) {
4962                xfs_mod_fdblocks(mp, da_diff, false);
4963                xfs_mod_delalloc(mp, -da_diff);
4964        }
4965        return error;
4966}
4967
4968void
4969xfs_bmap_del_extent_cow(
4970        struct xfs_inode        *ip,
4971        struct xfs_iext_cursor  *icur,
4972        struct xfs_bmbt_irec    *got,
4973        struct xfs_bmbt_irec    *del)
4974{
4975        struct xfs_mount        *mp = ip->i_mount;
4976        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
4977        struct xfs_bmbt_irec    new;
4978        xfs_fileoff_t           del_endoff, got_endoff;
4979        int                     state = BMAP_COWFORK;
4980
4981        XFS_STATS_INC(mp, xs_del_exlist);
4982
4983        del_endoff = del->br_startoff + del->br_blockcount;
4984        got_endoff = got->br_startoff + got->br_blockcount;
4985
4986        ASSERT(del->br_blockcount > 0);
4987        ASSERT(got->br_startoff <= del->br_startoff);
4988        ASSERT(got_endoff >= del_endoff);
4989        ASSERT(!isnullstartblock(got->br_startblock));
4990
4991        if (got->br_startoff == del->br_startoff)
4992                state |= BMAP_LEFT_FILLING;
4993        if (got_endoff == del_endoff)
4994                state |= BMAP_RIGHT_FILLING;
4995
4996        switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4997        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4998                /*
4999                 * Matches the whole extent.  Delete the entry.
5000                 */
5001                xfs_iext_remove(ip, icur, state);
5002                xfs_iext_prev(ifp, icur);
5003                break;
5004        case BMAP_LEFT_FILLING:
5005                /*
5006                 * Deleting the first part of the extent.
5007                 */
5008                got->br_startoff = del_endoff;
5009                got->br_blockcount -= del->br_blockcount;
5010                got->br_startblock = del->br_startblock + del->br_blockcount;
5011                xfs_iext_update_extent(ip, state, icur, got);
5012                break;
5013        case BMAP_RIGHT_FILLING:
5014                /*
5015                 * Deleting the last part of the extent.
5016                 */
5017                got->br_blockcount -= del->br_blockcount;
5018                xfs_iext_update_extent(ip, state, icur, got);
5019                break;
5020        case 0:
5021                /*
5022                 * Deleting the middle of the extent.
5023                 */
5024                got->br_blockcount = del->br_startoff - got->br_startoff;
5025
5026                new.br_startoff = del_endoff;
5027                new.br_blockcount = got_endoff - del_endoff;
5028                new.br_state = got->br_state;
5029                new.br_startblock = del->br_startblock + del->br_blockcount;
5030
5031                xfs_iext_update_extent(ip, state, icur, got);
5032                xfs_iext_next(ifp, icur);
5033                xfs_iext_insert(ip, icur, &new, state);
5034                break;
5035        }
5036        ip->i_delayed_blks -= del->br_blockcount;
5037}
5038
5039/*
5040 * Called by xfs_bmapi to update file extent records and the btree
5041 * after removing space.
5042 */
5043STATIC int                              /* error */
5044xfs_bmap_del_extent_real(
5045        xfs_inode_t             *ip,    /* incore inode pointer */
5046        xfs_trans_t             *tp,    /* current transaction pointer */
5047        struct xfs_iext_cursor  *icur,
5048        xfs_btree_cur_t         *cur,   /* if null, not a btree */
5049        xfs_bmbt_irec_t         *del,   /* data to remove from extents */
5050        int                     *logflagsp, /* inode logging flags */
5051        int                     whichfork, /* data or attr fork */
5052        int                     bflags) /* bmapi flags */
5053{
5054        xfs_fsblock_t           del_endblock=0; /* first block past del */
5055        xfs_fileoff_t           del_endoff;     /* first offset past del */
5056        int                     do_fx;  /* free extent at end of routine */
5057        int                     error;  /* error return value */
5058        int                     flags = 0;/* inode logging flags */
5059        struct xfs_bmbt_irec    got;    /* current extent entry */
5060        xfs_fileoff_t           got_endoff;     /* first offset past got */
5061        int                     i;      /* temp state */
5062        struct xfs_ifork        *ifp;   /* inode fork pointer */
5063        xfs_mount_t             *mp;    /* mount structure */
5064        xfs_filblks_t           nblks;  /* quota/sb block count */
5065        xfs_bmbt_irec_t         new;    /* new record to be inserted */
5066        /* REFERENCED */
5067        uint                    qfield; /* quota field to update */
5068        int                     state = xfs_bmap_fork_to_state(whichfork);
5069        struct xfs_bmbt_irec    old;
5070
5071        mp = ip->i_mount;
5072        XFS_STATS_INC(mp, xs_del_exlist);
5073
5074        ifp = XFS_IFORK_PTR(ip, whichfork);
5075        ASSERT(del->br_blockcount > 0);
5076        xfs_iext_get_extent(ifp, icur, &got);
5077        ASSERT(got.br_startoff <= del->br_startoff);
5078        del_endoff = del->br_startoff + del->br_blockcount;
5079        got_endoff = got.br_startoff + got.br_blockcount;
5080        ASSERT(got_endoff >= del_endoff);
5081        ASSERT(!isnullstartblock(got.br_startblock));
5082        qfield = 0;
5083        error = 0;
5084
5085        /*
5086         * If it's the case where the directory code is running with no block
5087         * reservation, and the deleted block is in the middle of its extent,
5088         * and the resulting insert of an extent would cause transformation to
5089         * btree format, then reject it.  The calling code will then swap blocks
5090         * around instead.  We have to do this now, rather than waiting for the
5091         * conversion to btree format, since the transaction will be dirty then.
5092         */
5093        if (tp->t_blk_res == 0 &&
5094            ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
5095            ifp->if_nextents >= XFS_IFORK_MAXEXT(ip, whichfork) &&
5096            del->br_startoff > got.br_startoff && del_endoff < got_endoff)
5097                return -ENOSPC;
5098
5099        flags = XFS_ILOG_CORE;
5100        if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
5101                xfs_filblks_t   len;
5102                xfs_extlen_t    mod;
5103
5104                len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize,
5105                                  &mod);
5106                ASSERT(mod == 0);
5107
5108                if (!(bflags & XFS_BMAPI_REMAP)) {
5109                        xfs_fsblock_t   bno;
5110
5111                        bno = div_u64_rem(del->br_startblock,
5112                                        mp->m_sb.sb_rextsize, &mod);
5113                        ASSERT(mod == 0);
5114
5115                        error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
5116                        if (error)
5117                                goto done;
5118                }
5119
5120                do_fx = 0;
5121                nblks = len * mp->m_sb.sb_rextsize;
5122                qfield = XFS_TRANS_DQ_RTBCOUNT;
5123        } else {
5124                do_fx = 1;
5125                nblks = del->br_blockcount;
5126                qfield = XFS_TRANS_DQ_BCOUNT;
5127        }
5128
5129        del_endblock = del->br_startblock + del->br_blockcount;
5130        if (cur) {
5131                error = xfs_bmbt_lookup_eq(cur, &got, &i);
5132                if (error)
5133                        goto done;
5134                if (XFS_IS_CORRUPT(mp, i != 1)) {
5135                        error = -EFSCORRUPTED;
5136                        goto done;
5137                }
5138        }
5139
5140        if (got.br_startoff == del->br_startoff)
5141                state |= BMAP_LEFT_FILLING;
5142        if (got_endoff == del_endoff)
5143                state |= BMAP_RIGHT_FILLING;
5144
5145        switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
5146        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
5147                /*
5148                 * Matches the whole extent.  Delete the entry.
5149                 */
5150                xfs_iext_remove(ip, icur, state);
5151                xfs_iext_prev(ifp, icur);
5152                ifp->if_nextents--;
5153
5154                flags |= XFS_ILOG_CORE;
5155                if (!cur) {
5156                        flags |= xfs_ilog_fext(whichfork);
5157                        break;
5158                }
5159                if ((error = xfs_btree_delete(cur, &i)))
5160                        goto done;
5161                if (XFS_IS_CORRUPT(mp, i != 1)) {
5162                        error = -EFSCORRUPTED;
5163                        goto done;
5164                }
5165                break;
5166        case BMAP_LEFT_FILLING:
5167                /*
5168                 * Deleting the first part of the extent.
5169                 */
5170                got.br_startoff = del_endoff;
5171                got.br_startblock = del_endblock;
5172                got.br_blockcount -= del->br_blockcount;
5173                xfs_iext_update_extent(ip, state, icur, &got);
5174                if (!cur) {
5175                        flags |= xfs_ilog_fext(whichfork);
5176                        break;
5177                }
5178                error = xfs_bmbt_update(cur, &got);
5179                if (error)
5180                        goto done;
5181                break;
5182        case BMAP_RIGHT_FILLING:
5183                /*
5184                 * Deleting the last part of the extent.
5185                 */
5186                got.br_blockcount -= del->br_blockcount;
5187                xfs_iext_update_extent(ip, state, icur, &got);
5188                if (!cur) {
5189                        flags |= xfs_ilog_fext(whichfork);
5190                        break;
5191                }
5192                error = xfs_bmbt_update(cur, &got);
5193                if (error)
5194                        goto done;
5195                break;
5196        case 0:
5197                /*
5198                 * Deleting the middle of the extent.
5199                 */
5200
5201                /*
5202                 * For directories, -ENOSPC is returned since a directory entry
5203                 * remove operation must not fail due to low extent count
5204                 * availability. -ENOSPC will be handled by higher layers of XFS
5205                 * by letting the corresponding empty Data/Free blocks to linger
5206                 * until a future remove operation. Dabtree blocks would be
5207                 * swapped with the last block in the leaf space and then the
5208                 * new last block will be unmapped.
5209                 *
5210                 * The above logic also applies to the source directory entry of
5211                 * a rename operation.
5212                 */
5213                error = xfs_iext_count_may_overflow(ip, whichfork, 1);
5214                if (error) {
5215                        ASSERT(S_ISDIR(VFS_I(ip)->i_mode) &&
5216                                whichfork == XFS_DATA_FORK);
5217                        error = -ENOSPC;
5218                        goto done;
5219                }
5220
5221                old = got;
5222
5223                got.br_blockcount = del->br_startoff - got.br_startoff;
5224                xfs_iext_update_extent(ip, state, icur, &got);
5225
5226                new.br_startoff = del_endoff;
5227                new.br_blockcount = got_endoff - del_endoff;
5228                new.br_state = got.br_state;
5229                new.br_startblock = del_endblock;
5230
5231                flags |= XFS_ILOG_CORE;
5232                if (cur) {
5233                        error = xfs_bmbt_update(cur, &got);
5234                        if (error)
5235                                goto done;
5236                        error = xfs_btree_increment(cur, 0, &i);
5237                        if (error)
5238                                goto done;
5239                        cur->bc_rec.b = new;
5240                        error = xfs_btree_insert(cur, &i);
5241                        if (error && error != -ENOSPC)
5242                                goto done;
5243                        /*
5244                         * If get no-space back from btree insert, it tried a
5245                         * split, and we have a zero block reservation.  Fix up
5246                         * our state and return the error.
5247                         */
5248                        if (error == -ENOSPC) {
5249                                /*
5250                                 * Reset the cursor, don't trust it after any
5251                                 * insert operation.
5252                                 */
5253                                error = xfs_bmbt_lookup_eq(cur, &got, &i);
5254                                if (error)
5255                                        goto done;
5256                                if (XFS_IS_CORRUPT(mp, i != 1)) {
5257                                        error = -EFSCORRUPTED;
5258                                        goto done;
5259                                }
5260                                /*
5261                                 * Update the btree record back
5262                                 * to the original value.
5263                                 */
5264                                error = xfs_bmbt_update(cur, &old);
5265                                if (error)
5266                                        goto done;
5267                                /*
5268                                 * Reset the extent record back
5269                                 * to the original value.
5270                                 */
5271                                xfs_iext_update_extent(ip, state, icur, &old);
5272                                flags = 0;
5273                                error = -ENOSPC;
5274                                goto done;
5275                        }
5276                        if (XFS_IS_CORRUPT(mp, i != 1)) {
5277                                error = -EFSCORRUPTED;
5278                                goto done;
5279                        }
5280                } else
5281                        flags |= xfs_ilog_fext(whichfork);
5282
5283                ifp->if_nextents++;
5284                xfs_iext_next(ifp, icur);
5285                xfs_iext_insert(ip, icur, &new, state);
5286                break;
5287        }
5288
5289        /* remove reverse mapping */
5290        xfs_rmap_unmap_extent(tp, ip, whichfork, del);
5291
5292        /*
5293         * If we need to, add to list of extents to delete.
5294         */
5295        if (do_fx && !(bflags & XFS_BMAPI_REMAP)) {
5296                if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
5297                        xfs_refcount_decrease_extent(tp, del);
5298                } else {
5299                        __xfs_bmap_add_free(tp, del->br_startblock,
5300                                        del->br_blockcount, NULL,
5301                                        (bflags & XFS_BMAPI_NODISCARD) ||
5302                                        del->br_state == XFS_EXT_UNWRITTEN);
5303                }
5304        }
5305
5306        /*
5307         * Adjust inode # blocks in the file.
5308         */
5309        if (nblks)
5310                ip->i_nblocks -= nblks;
5311        /*
5312         * Adjust quota data.
5313         */
5314        if (qfield && !(bflags & XFS_BMAPI_REMAP))
5315                xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
5316
5317done:
5318        *logflagsp = flags;
5319        return error;
5320}
5321
5322/*
5323 * Unmap (remove) blocks from a file.
5324 * If nexts is nonzero then the number of extents to remove is limited to
5325 * that value.  If not all extents in the block range can be removed then
5326 * *done is set.
5327 */
5328int                                             /* error */
5329__xfs_bunmapi(
5330        struct xfs_trans        *tp,            /* transaction pointer */
5331        struct xfs_inode        *ip,            /* incore inode */
5332        xfs_fileoff_t           start,          /* first file offset deleted */
5333        xfs_filblks_t           *rlen,          /* i/o: amount remaining */
5334        int                     flags,          /* misc flags */
5335        xfs_extnum_t            nexts)          /* number of extents max */
5336{
5337        struct xfs_btree_cur    *cur;           /* bmap btree cursor */
5338        struct xfs_bmbt_irec    del;            /* extent being deleted */
5339        int                     error;          /* error return value */
5340        xfs_extnum_t            extno;          /* extent number in list */
5341        struct xfs_bmbt_irec    got;            /* current extent record */
5342        struct xfs_ifork        *ifp;           /* inode fork pointer */
5343        int                     isrt;           /* freeing in rt area */
5344        int                     logflags;       /* transaction logging flags */
5345        xfs_extlen_t            mod;            /* rt extent offset */
5346        struct xfs_mount        *mp = ip->i_mount;
5347        int                     tmp_logflags;   /* partial logging flags */
5348        int                     wasdel;         /* was a delayed alloc extent */
5349        int                     whichfork;      /* data or attribute fork */
5350        xfs_fsblock_t           sum;
5351        xfs_filblks_t           len = *rlen;    /* length to unmap in file */
5352        xfs_fileoff_t           max_len;
5353        xfs_fileoff_t           end;
5354        struct xfs_iext_cursor  icur;
5355        bool                    done = false;
5356
5357        trace_xfs_bunmap(ip, start, len, flags, _RET_IP_);
5358
5359        whichfork = xfs_bmapi_whichfork(flags);
5360        ASSERT(whichfork != XFS_COW_FORK);
5361        ifp = XFS_IFORK_PTR(ip, whichfork);
5362        if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)))
5363                return -EFSCORRUPTED;
5364        if (XFS_FORCED_SHUTDOWN(mp))
5365                return -EIO;
5366
5367        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5368        ASSERT(len > 0);
5369        ASSERT(nexts >= 0);
5370
5371        /*
5372         * Guesstimate how many blocks we can unmap without running the risk of
5373         * blowing out the transaction with a mix of EFIs and reflink
5374         * adjustments.
5375         */
5376        if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
5377                max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
5378        else
5379                max_len = len;
5380
5381        error = xfs_iread_extents(tp, ip, whichfork);
5382        if (error)
5383                return error;
5384
5385        if (xfs_iext_count(ifp) == 0) {
5386                *rlen = 0;
5387                return 0;
5388        }
5389        XFS_STATS_INC(mp, xs_blk_unmap);
5390        isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
5391        end = start + len;
5392
5393        if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) {
5394                *rlen = 0;
5395                return 0;
5396        }
5397        end--;
5398
5399        logflags = 0;
5400        if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
5401                ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
5402                cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5403                cur->bc_ino.flags = 0;
5404        } else
5405                cur = NULL;
5406
5407        if (isrt) {
5408                /*
5409                 * Synchronize by locking the bitmap inode.
5410                 */
5411                xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
5412                xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
5413                xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
5414                xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
5415        }
5416
5417        extno = 0;
5418        while (end != (xfs_fileoff_t)-1 && end >= start &&
5419               (nexts == 0 || extno < nexts) && max_len > 0) {
5420                /*
5421                 * Is the found extent after a hole in which end lives?
5422                 * Just back up to the previous extent, if so.
5423                 */
5424                if (got.br_startoff > end &&
5425                    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5426                        done = true;
5427                        break;
5428                }
5429                /*
5430                 * Is the last block of this extent before the range
5431                 * we're supposed to delete?  If so, we're done.
5432                 */
5433                end = XFS_FILEOFF_MIN(end,
5434                        got.br_startoff + got.br_blockcount - 1);
5435                if (end < start)
5436                        break;
5437                /*
5438                 * Then deal with the (possibly delayed) allocated space
5439                 * we found.
5440                 */
5441                del = got;
5442                wasdel = isnullstartblock(del.br_startblock);
5443
5444                if (got.br_startoff < start) {
5445                        del.br_startoff = start;
5446                        del.br_blockcount -= start - got.br_startoff;
5447                        if (!wasdel)
5448                                del.br_startblock += start - got.br_startoff;
5449                }
5450                if (del.br_startoff + del.br_blockcount > end + 1)
5451                        del.br_blockcount = end + 1 - del.br_startoff;
5452
5453                /* How much can we safely unmap? */
5454                if (max_len < del.br_blockcount) {
5455                        del.br_startoff += del.br_blockcount - max_len;
5456                        if (!wasdel)
5457                                del.br_startblock += del.br_blockcount - max_len;
5458                        del.br_blockcount = max_len;
5459                }
5460
5461                if (!isrt)
5462                        goto delete;
5463
5464                sum = del.br_startblock + del.br_blockcount;
5465                div_u64_rem(sum, mp->m_sb.sb_rextsize, &mod);
5466                if (mod) {
5467                        /*
5468                         * Realtime extent not lined up at the end.
5469                         * The extent could have been split into written
5470                         * and unwritten pieces, or we could just be
5471                         * unmapping part of it.  But we can't really
5472                         * get rid of part of a realtime extent.
5473                         */
5474                        if (del.br_state == XFS_EXT_UNWRITTEN) {
5475                                /*
5476                                 * This piece is unwritten, or we're not
5477                                 * using unwritten extents.  Skip over it.
5478                                 */
5479                                ASSERT(end >= mod);
5480                                end -= mod > del.br_blockcount ?
5481                                        del.br_blockcount : mod;
5482                                if (end < got.br_startoff &&
5483                                    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5484                                        done = true;
5485                                        break;
5486                                }
5487                                continue;
5488                        }
5489                        /*
5490                         * It's written, turn it unwritten.
5491                         * This is better than zeroing it.
5492                         */
5493                        ASSERT(del.br_state == XFS_EXT_NORM);
5494                        ASSERT(tp->t_blk_res > 0);
5495                        /*
5496                         * If this spans a realtime extent boundary,
5497                         * chop it back to the start of the one we end at.
5498                         */
5499                        if (del.br_blockcount > mod) {
5500                                del.br_startoff += del.br_blockcount - mod;
5501                                del.br_startblock += del.br_blockcount - mod;
5502                                del.br_blockcount = mod;
5503                        }
5504                        del.br_state = XFS_EXT_UNWRITTEN;
5505                        error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5506                                        whichfork, &icur, &cur, &del,
5507                                        &logflags);
5508                        if (error)
5509                                goto error0;
5510                        goto nodelete;
5511                }
5512                div_u64_rem(del.br_startblock, mp->m_sb.sb_rextsize, &mod);
5513                if (mod) {
5514                        xfs_extlen_t off = mp->m_sb.sb_rextsize - mod;
5515
5516                        /*
5517                         * Realtime extent is lined up at the end but not
5518                         * at the front.  We'll get rid of full extents if
5519                         * we can.
5520                         */
5521                        if (del.br_blockcount > off) {
5522                                del.br_blockcount -= off;
5523                                del.br_startoff += off;
5524                                del.br_startblock += off;
5525                        } else if (del.br_startoff == start &&
5526                                   (del.br_state == XFS_EXT_UNWRITTEN ||
5527                                    tp->t_blk_res == 0)) {
5528                                /*
5529                                 * Can't make it unwritten.  There isn't
5530                                 * a full extent here so just skip it.
5531                                 */
5532                                ASSERT(end >= del.br_blockcount);
5533                                end -= del.br_blockcount;
5534                                if (got.br_startoff > end &&
5535                                    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5536                                        done = true;
5537                                        break;
5538                                }
5539                                continue;
5540                        } else if (del.br_state == XFS_EXT_UNWRITTEN) {
5541                                struct xfs_bmbt_irec    prev;
5542                                xfs_fileoff_t           unwrite_start;
5543
5544                                /*
5545                                 * This one is already unwritten.
5546                                 * It must have a written left neighbor.
5547                                 * Unwrite the killed part of that one and
5548                                 * try again.
5549                                 */
5550                                if (!xfs_iext_prev_extent(ifp, &icur, &prev))
5551                                        ASSERT(0);
5552                                ASSERT(prev.br_state == XFS_EXT_NORM);
5553                                ASSERT(!isnullstartblock(prev.br_startblock));
5554                                ASSERT(del.br_startblock ==
5555                                       prev.br_startblock + prev.br_blockcount);
5556                                unwrite_start = max3(start,
5557                                                     del.br_startoff - mod,
5558                                                     prev.br_startoff);
5559                                mod = unwrite_start - prev.br_startoff;
5560                                prev.br_startoff = unwrite_start;
5561                                prev.br_startblock += mod;
5562                                prev.br_blockcount -= mod;
5563                                prev.br_state = XFS_EXT_UNWRITTEN;
5564                                error = xfs_bmap_add_extent_unwritten_real(tp,
5565                                                ip, whichfork, &icur, &cur,
5566                                                &prev, &logflags);
5567                                if (error)
5568                                        goto error0;
5569                                goto nodelete;
5570                        } else {
5571                                ASSERT(del.br_state == XFS_EXT_NORM);
5572                                del.br_state = XFS_EXT_UNWRITTEN;
5573                                error = xfs_bmap_add_extent_unwritten_real(tp,
5574                                                ip, whichfork, &icur, &cur,
5575                                                &del, &logflags);
5576                                if (error)
5577                                        goto error0;
5578                                goto nodelete;
5579                        }
5580                }
5581
5582delete:
5583                if (wasdel) {
5584                        error = xfs_bmap_del_extent_delay(ip, whichfork, &icur,
5585                                        &got, &del);
5586                } else {
5587                        error = xfs_bmap_del_extent_real(ip, tp, &icur, cur,
5588                                        &del, &tmp_logflags, whichfork,
5589                                        flags);
5590                        logflags |= tmp_logflags;
5591                }
5592
5593                if (error)
5594                        goto error0;
5595
5596                max_len -= del.br_blockcount;
5597                end = del.br_startoff - 1;
5598nodelete:
5599                /*
5600                 * If not done go on to the next (previous) record.
5601                 */
5602                if (end != (xfs_fileoff_t)-1 && end >= start) {
5603                        if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5604                            (got.br_startoff > end &&
5605                             !xfs_iext_prev_extent(ifp, &icur, &got))) {
5606                                done = true;
5607                                break;
5608                        }
5609                        extno++;
5610                }
5611        }
5612        if (done || end == (xfs_fileoff_t)-1 || end < start)
5613                *rlen = 0;
5614        else
5615                *rlen = end - start + 1;
5616
5617        /*
5618         * Convert to a btree if necessary.
5619         */
5620        if (xfs_bmap_needs_btree(ip, whichfork)) {
5621                ASSERT(cur == NULL);
5622                error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
5623                                &tmp_logflags, whichfork);
5624                logflags |= tmp_logflags;
5625        } else {
5626                error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags,
5627                        whichfork);
5628        }
5629
5630error0:
5631        /*
5632         * Log everything.  Do this after conversion, there's no point in
5633         * logging the extent records if we've converted to btree format.
5634         */
5635        if ((logflags & xfs_ilog_fext(whichfork)) &&
5636            ifp->if_format != XFS_DINODE_FMT_EXTENTS)
5637                logflags &= ~xfs_ilog_fext(whichfork);
5638        else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
5639                 ifp->if_format != XFS_DINODE_FMT_BTREE)
5640                logflags &= ~xfs_ilog_fbroot(whichfork);
5641        /*
5642         * Log inode even in the error case, if the transaction
5643         * is dirty we'll need to shut down the filesystem.
5644         */
5645        if (logflags)
5646                xfs_trans_log_inode(tp, ip, logflags);
5647        if (cur) {
5648                if (!error)
5649                        cur->bc_ino.allocated = 0;
5650                xfs_btree_del_cursor(cur, error);
5651        }
5652        return error;
5653}
5654
5655/* Unmap a range of a file. */
5656int
5657xfs_bunmapi(
5658        xfs_trans_t             *tp,
5659        struct xfs_inode        *ip,
5660        xfs_fileoff_t           bno,
5661        xfs_filblks_t           len,
5662        int                     flags,
5663        xfs_extnum_t            nexts,
5664        int                     *done)
5665{
5666        int                     error;
5667
5668        error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts);
5669        *done = (len == 0);
5670        return error;
5671}
5672
5673/*
5674 * Determine whether an extent shift can be accomplished by a merge with the
5675 * extent that precedes the target hole of the shift.
5676 */
5677STATIC bool
5678xfs_bmse_can_merge(
5679        struct xfs_bmbt_irec    *left,  /* preceding extent */
5680        struct xfs_bmbt_irec    *got,   /* current extent to shift */
5681        xfs_fileoff_t           shift)  /* shift fsb */
5682{
5683        xfs_fileoff_t           startoff;
5684
5685        startoff = got->br_startoff - shift;
5686
5687        /*
5688         * The extent, once shifted, must be adjacent in-file and on-disk with
5689         * the preceding extent.
5690         */
5691        if ((left->br_startoff + left->br_blockcount != startoff) ||
5692            (left->br_startblock + left->br_blockcount != got->br_startblock) ||
5693            (left->br_state != got->br_state) ||
5694            (left->br_blockcount + got->br_blockcount > MAXEXTLEN))
5695                return false;
5696
5697        return true;
5698}
5699
5700/*
5701 * A bmap extent shift adjusts the file offset of an extent to fill a preceding
5702 * hole in the file. If an extent shift would result in the extent being fully
5703 * adjacent to the extent that currently precedes the hole, we can merge with
5704 * the preceding extent rather than do the shift.
5705 *
5706 * This function assumes the caller has verified a shift-by-merge is possible
5707 * with the provided extents via xfs_bmse_can_merge().
5708 */
5709STATIC int
5710xfs_bmse_merge(
5711        struct xfs_trans                *tp,
5712        struct xfs_inode                *ip,
5713        int                             whichfork,
5714        xfs_fileoff_t                   shift,          /* shift fsb */
5715        struct xfs_iext_cursor          *icur,
5716        struct xfs_bmbt_irec            *got,           /* extent to shift */
5717        struct xfs_bmbt_irec            *left,          /* preceding extent */
5718        struct xfs_btree_cur            *cur,
5719        int                             *logflags)      /* output */
5720{
5721        struct xfs_ifork                *ifp = XFS_IFORK_PTR(ip, whichfork);
5722        struct xfs_bmbt_irec            new;
5723        xfs_filblks_t                   blockcount;
5724        int                             error, i;
5725        struct xfs_mount                *mp = ip->i_mount;
5726
5727        blockcount = left->br_blockcount + got->br_blockcount;
5728
5729        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5730        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5731        ASSERT(xfs_bmse_can_merge(left, got, shift));
5732
5733        new = *left;
5734        new.br_blockcount = blockcount;
5735
5736        /*
5737         * Update the on-disk extent count, the btree if necessary and log the
5738         * inode.
5739         */
5740        ifp->if_nextents--;
5741        *logflags |= XFS_ILOG_CORE;
5742        if (!cur) {
5743                *logflags |= XFS_ILOG_DEXT;
5744                goto done;
5745        }
5746
5747        /* lookup and remove the extent to merge */
5748        error = xfs_bmbt_lookup_eq(cur, got, &i);
5749        if (error)
5750                return error;
5751        if (XFS_IS_CORRUPT(mp, i != 1))
5752                return -EFSCORRUPTED;
5753
5754        error = xfs_btree_delete(cur, &i);
5755        if (error)
5756                return error;
5757        if (XFS_IS_CORRUPT(mp, i != 1))
5758                return -EFSCORRUPTED;
5759
5760        /* lookup and update size of the previous extent */
5761        error = xfs_bmbt_lookup_eq(cur, left, &i);
5762        if (error)
5763                return error;
5764        if (XFS_IS_CORRUPT(mp, i != 1))
5765                return -EFSCORRUPTED;
5766
5767        error = xfs_bmbt_update(cur, &new);
5768        if (error)
5769                return error;
5770
5771        /* change to extent format if required after extent removal */
5772        error = xfs_bmap_btree_to_extents(tp, ip, cur, logflags, whichfork);
5773        if (error)
5774                return error;
5775
5776done:
5777        xfs_iext_remove(ip, icur, 0);
5778        xfs_iext_prev(ifp, icur);
5779        xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5780                        &new);
5781
5782        /* update reverse mapping. rmap functions merge the rmaps for us */
5783        xfs_rmap_unmap_extent(tp, ip, whichfork, got);
5784        memcpy(&new, got, sizeof(new));
5785        new.br_startoff = left->br_startoff + left->br_blockcount;
5786        xfs_rmap_map_extent(tp, ip, whichfork, &new);
5787        return 0;
5788}
5789
5790static int
5791xfs_bmap_shift_update_extent(
5792        struct xfs_trans        *tp,
5793        struct xfs_inode        *ip,
5794        int                     whichfork,
5795        struct xfs_iext_cursor  *icur,
5796        struct xfs_bmbt_irec    *got,
5797        struct xfs_btree_cur    *cur,
5798        int                     *logflags,
5799        xfs_fileoff_t           startoff)
5800{
5801        struct xfs_mount        *mp = ip->i_mount;
5802        struct xfs_bmbt_irec    prev = *got;
5803        int                     error, i;
5804
5805        *logflags |= XFS_ILOG_CORE;
5806
5807        got->br_startoff = startoff;
5808
5809        if (cur) {
5810                error = xfs_bmbt_lookup_eq(cur, &prev, &i);
5811                if (error)
5812                        return error;
5813                if (XFS_IS_CORRUPT(mp, i != 1))
5814                        return -EFSCORRUPTED;
5815
5816                error = xfs_bmbt_update(cur, got);
5817                if (error)
5818                        return error;
5819        } else {
5820                *logflags |= XFS_ILOG_DEXT;
5821        }
5822
5823        xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5824                        got);
5825
5826        /* update reverse mapping */
5827        xfs_rmap_unmap_extent(tp, ip, whichfork, &prev);
5828        xfs_rmap_map_extent(tp, ip, whichfork, got);
5829        return 0;
5830}
5831
5832int
5833xfs_bmap_collapse_extents(
5834        struct xfs_trans        *tp,
5835        struct xfs_inode        *ip,
5836        xfs_fileoff_t           *next_fsb,
5837        xfs_fileoff_t           offset_shift_fsb,
5838        bool                    *done)
5839{
5840        int                     whichfork = XFS_DATA_FORK;
5841        struct xfs_mount        *mp = ip->i_mount;
5842        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
5843        struct xfs_btree_cur    *cur = NULL;
5844        struct xfs_bmbt_irec    got, prev;
5845        struct xfs_iext_cursor  icur;
5846        xfs_fileoff_t           new_startoff;
5847        int                     error = 0;
5848        int                     logflags = 0;
5849
5850        if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
5851            XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
5852                return -EFSCORRUPTED;
5853        }
5854
5855        if (XFS_FORCED_SHUTDOWN(mp))
5856                return -EIO;
5857
5858        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
5859
5860        error = xfs_iread_extents(tp, ip, whichfork);
5861        if (error)
5862                return error;
5863
5864        if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
5865                cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5866                cur->bc_ino.flags = 0;
5867        }
5868
5869        if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
5870                *done = true;
5871                goto del_cursor;
5872        }
5873        if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
5874                error = -EFSCORRUPTED;
5875                goto del_cursor;
5876        }
5877
5878        new_startoff = got.br_startoff - offset_shift_fsb;
5879        if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) {
5880                if (new_startoff < prev.br_startoff + prev.br_blockcount) {
5881                        error = -EINVAL;
5882                        goto del_cursor;
5883                }
5884
5885                if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) {
5886                        error = xfs_bmse_merge(tp, ip, whichfork,
5887                                        offset_shift_fsb, &icur, &got, &prev,
5888                                        cur, &logflags);
5889                        if (error)
5890                                goto del_cursor;
5891                        goto done;
5892                }
5893        } else {
5894                if (got.br_startoff < offset_shift_fsb) {
5895                        error = -EINVAL;
5896                        goto del_cursor;
5897                }
5898        }
5899
5900        error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
5901                        cur, &logflags, new_startoff);
5902        if (error)
5903                goto del_cursor;
5904
5905done:
5906        if (!xfs_iext_next_extent(ifp, &icur, &got)) {
5907                *done = true;
5908                goto del_cursor;
5909        }
5910
5911        *next_fsb = got.br_startoff;
5912del_cursor:
5913        if (cur)
5914                xfs_btree_del_cursor(cur, error);
5915        if (logflags)
5916                xfs_trans_log_inode(tp, ip, logflags);
5917        return error;
5918}
5919
5920/* Make sure we won't be right-shifting an extent past the maximum bound. */
5921int
5922xfs_bmap_can_insert_extents(
5923        struct xfs_inode        *ip,
5924        xfs_fileoff_t           off,
5925        xfs_fileoff_t           shift)
5926{
5927        struct xfs_bmbt_irec    got;
5928        int                     is_empty;
5929        int                     error = 0;
5930
5931        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5932
5933        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
5934                return -EIO;
5935
5936        xfs_ilock(ip, XFS_ILOCK_EXCL);
5937        error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &got, &is_empty);
5938        if (!error && !is_empty && got.br_startoff >= off &&
5939            ((got.br_startoff + shift) & BMBT_STARTOFF_MASK) < got.br_startoff)
5940                error = -EINVAL;
5941        xfs_iunlock(ip, XFS_ILOCK_EXCL);
5942
5943        return error;
5944}
5945
5946int
5947xfs_bmap_insert_extents(
5948        struct xfs_trans        *tp,
5949        struct xfs_inode        *ip,
5950        xfs_fileoff_t           *next_fsb,
5951        xfs_fileoff_t           offset_shift_fsb,
5952        bool                    *done,
5953        xfs_fileoff_t           stop_fsb)
5954{
5955        int                     whichfork = XFS_DATA_FORK;
5956        struct xfs_mount        *mp = ip->i_mount;
5957        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
5958        struct xfs_btree_cur    *cur = NULL;
5959        struct xfs_bmbt_irec    got, next;
5960        struct xfs_iext_cursor  icur;
5961        xfs_fileoff_t           new_startoff;
5962        int                     error = 0;
5963        int                     logflags = 0;
5964
5965        if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
5966            XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
5967                return -EFSCORRUPTED;
5968        }
5969
5970        if (XFS_FORCED_SHUTDOWN(mp))
5971                return -EIO;
5972
5973        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
5974
5975        error = xfs_iread_extents(tp, ip, whichfork);
5976        if (error)
5977                return error;
5978
5979        if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
5980                cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5981                cur->bc_ino.flags = 0;
5982        }
5983
5984        if (*next_fsb == NULLFSBLOCK) {
5985                xfs_iext_last(ifp, &icur);
5986                if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5987                    stop_fsb > got.br_startoff) {
5988                        *done = true;
5989                        goto del_cursor;
5990                }
5991        } else {
5992                if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
5993                        *done = true;
5994                        goto del_cursor;
5995                }
5996        }
5997        if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
5998                error = -EFSCORRUPTED;
5999                goto del_cursor;
6000        }
6001
6002        if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) {
6003                error = -EFSCORRUPTED;
6004                goto del_cursor;
6005        }
6006
6007        new_startoff = got.br_startoff + offset_shift_fsb;
6008        if (xfs_iext_peek_next_extent(ifp, &icur, &next)) {
6009                if (new_startoff + got.br_blockcount > next.br_startoff) {
6010                        error = -EINVAL;
6011                        goto del_cursor;
6012                }
6013
6014                /*
6015                 * Unlike a left shift (which involves a hole punch), a right
6016                 * shift does not modify extent neighbors in any way.  We should
6017                 * never find mergeable extents in this scenario.  Check anyways
6018                 * and warn if we encounter two extents that could be one.
6019                 */
6020                if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb))
6021                        WARN_ON_ONCE(1);
6022        }
6023
6024        error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
6025                        cur, &logflags, new_startoff);
6026        if (error)
6027                goto del_cursor;
6028
6029        if (!xfs_iext_prev_extent(ifp, &icur, &got) ||
6030            stop_fsb >= got.br_startoff + got.br_blockcount) {
6031                *done = true;
6032                goto del_cursor;
6033        }
6034
6035        *next_fsb = got.br_startoff;
6036del_cursor:
6037        if (cur)
6038                xfs_btree_del_cursor(cur, error);
6039        if (logflags)
6040                xfs_trans_log_inode(tp, ip, logflags);
6041        return error;
6042}
6043
6044/*
6045 * Splits an extent into two extents at split_fsb block such that it is the
6046 * first block of the current_ext. @ext is a target extent to be split.
6047 * @split_fsb is a block where the extents is split.  If split_fsb lies in a
6048 * hole or the first block of extents, just return 0.
6049 */
6050int
6051xfs_bmap_split_extent(
6052        struct xfs_trans        *tp,
6053        struct xfs_inode        *ip,
6054        xfs_fileoff_t           split_fsb)
6055{
6056        int                             whichfork = XFS_DATA_FORK;
6057        struct xfs_ifork                *ifp = XFS_IFORK_PTR(ip, whichfork);
6058        struct xfs_btree_cur            *cur = NULL;
6059        struct xfs_bmbt_irec            got;
6060        struct xfs_bmbt_irec            new; /* split extent */
6061        struct xfs_mount                *mp = ip->i_mount;
6062        xfs_fsblock_t                   gotblkcnt; /* new block count for got */
6063        struct xfs_iext_cursor          icur;
6064        int                             error = 0;
6065        int                             logflags = 0;
6066        int                             i = 0;
6067
6068        if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
6069            XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
6070                return -EFSCORRUPTED;
6071        }
6072
6073        if (XFS_FORCED_SHUTDOWN(mp))
6074                return -EIO;
6075
6076        /* Read in all the extents */
6077        error = xfs_iread_extents(tp, ip, whichfork);
6078        if (error)
6079                return error;
6080
6081        /*
6082         * If there are not extents, or split_fsb lies in a hole we are done.
6083         */
6084        if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &icur, &got) ||
6085            got.br_startoff >= split_fsb)
6086                return 0;
6087
6088        gotblkcnt = split_fsb - got.br_startoff;
6089        new.br_startoff = split_fsb;
6090        new.br_startblock = got.br_startblock + gotblkcnt;
6091        new.br_blockcount = got.br_blockcount - gotblkcnt;
6092        new.br_state = got.br_state;
6093
6094        if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
6095                cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
6096                cur->bc_ino.flags = 0;
6097                error = xfs_bmbt_lookup_eq(cur, &got, &i);
6098                if (error)
6099                        goto del_cursor;
6100                if (XFS_IS_CORRUPT(mp, i != 1)) {
6101                        error = -EFSCORRUPTED;
6102                        goto del_cursor;
6103                }
6104        }
6105
6106        got.br_blockcount = gotblkcnt;
6107        xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), &icur,
6108                        &got);
6109
6110        logflags = XFS_ILOG_CORE;
6111        if (cur) {
6112                error = xfs_bmbt_update(cur, &got);
6113                if (error)
6114                        goto del_cursor;
6115        } else
6116                logflags |= XFS_ILOG_DEXT;
6117
6118        /* Add new extent */
6119        xfs_iext_next(ifp, &icur);
6120        xfs_iext_insert(ip, &icur, &new, 0);
6121        ifp->if_nextents++;
6122
6123        if (cur) {
6124                error = xfs_bmbt_lookup_eq(cur, &new, &i);
6125                if (error)
6126                        goto del_cursor;
6127                if (XFS_IS_CORRUPT(mp, i != 0)) {
6128                        error = -EFSCORRUPTED;
6129                        goto del_cursor;
6130                }
6131                error = xfs_btree_insert(cur, &i);
6132                if (error)
6133                        goto del_cursor;
6134                if (XFS_IS_CORRUPT(mp, i != 1)) {
6135                        error = -EFSCORRUPTED;
6136                        goto del_cursor;
6137                }
6138        }
6139
6140        /*
6141         * Convert to a btree if necessary.
6142         */
6143        if (xfs_bmap_needs_btree(ip, whichfork)) {
6144                int tmp_logflags; /* partial log flag return val */
6145
6146                ASSERT(cur == NULL);
6147                error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
6148                                &tmp_logflags, whichfork);
6149                logflags |= tmp_logflags;
6150        }
6151
6152del_cursor:
6153        if (cur) {
6154                cur->bc_ino.allocated = 0;
6155                xfs_btree_del_cursor(cur, error);
6156        }
6157
6158        if (logflags)
6159                xfs_trans_log_inode(tp, ip, logflags);
6160        return error;
6161}
6162
6163/* Deferred mapping is only for real extents in the data fork. */
6164static bool
6165xfs_bmap_is_update_needed(
6166        struct xfs_bmbt_irec    *bmap)
6167{
6168        return  bmap->br_startblock != HOLESTARTBLOCK &&
6169                bmap->br_startblock != DELAYSTARTBLOCK;
6170}
6171
6172/* Record a bmap intent. */
6173static int
6174__xfs_bmap_add(
6175        struct xfs_trans                *tp,
6176        enum xfs_bmap_intent_type       type,
6177        struct xfs_inode                *ip,
6178        int                             whichfork,
6179        struct xfs_bmbt_irec            *bmap)
6180{
6181        struct xfs_bmap_intent          *bi;
6182
6183        trace_xfs_bmap_defer(tp->t_mountp,
6184                        XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),
6185                        type,
6186                        XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock),
6187                        ip->i_ino, whichfork,
6188                        bmap->br_startoff,
6189                        bmap->br_blockcount,
6190                        bmap->br_state);
6191
6192        bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_NOFS);
6193        INIT_LIST_HEAD(&bi->bi_list);
6194        bi->bi_type = type;
6195        bi->bi_owner = ip;
6196        bi->bi_whichfork = whichfork;
6197        bi->bi_bmap = *bmap;
6198
6199        xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list);
6200        return 0;
6201}
6202
6203/* Map an extent into a file. */
6204void
6205xfs_bmap_map_extent(
6206        struct xfs_trans        *tp,
6207        struct xfs_inode        *ip,
6208        struct xfs_bmbt_irec    *PREV)
6209{
6210        if (!xfs_bmap_is_update_needed(PREV))
6211                return;
6212
6213        __xfs_bmap_add(tp, XFS_BMAP_MAP, ip, XFS_DATA_FORK, PREV);
6214}
6215
6216/* Unmap an extent out of a file. */
6217void
6218xfs_bmap_unmap_extent(
6219        struct xfs_trans        *tp,
6220        struct xfs_inode        *ip,
6221        struct xfs_bmbt_irec    *PREV)
6222{
6223        if (!xfs_bmap_is_update_needed(PREV))
6224                return;
6225
6226        __xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, XFS_DATA_FORK, PREV);
6227}
6228
6229/*
6230 * Process one of the deferred bmap operations.  We pass back the
6231 * btree cursor to maintain our lock on the bmapbt between calls.
6232 */
6233int
6234xfs_bmap_finish_one(
6235        struct xfs_trans                *tp,
6236        struct xfs_inode                *ip,
6237        enum xfs_bmap_intent_type       type,
6238        int                             whichfork,
6239        xfs_fileoff_t                   startoff,
6240        xfs_fsblock_t                   startblock,
6241        xfs_filblks_t                   *blockcount,
6242        xfs_exntst_t                    state)
6243{
6244        int                             error = 0;
6245
6246        ASSERT(tp->t_firstblock == NULLFSBLOCK);
6247
6248        trace_xfs_bmap_deferred(tp->t_mountp,
6249                        XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
6250                        XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
6251                        ip->i_ino, whichfork, startoff, *blockcount, state);
6252
6253        if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK))
6254                return -EFSCORRUPTED;
6255
6256        if (XFS_TEST_ERROR(false, tp->t_mountp,
6257                        XFS_ERRTAG_BMAP_FINISH_ONE))
6258                return -EIO;
6259
6260        switch (type) {
6261        case XFS_BMAP_MAP:
6262                error = xfs_bmapi_remap(tp, ip, startoff, *blockcount,
6263                                startblock, 0);
6264                *blockcount = 0;
6265                break;
6266        case XFS_BMAP_UNMAP:
6267                error = __xfs_bunmapi(tp, ip, startoff, blockcount,
6268                                XFS_BMAPI_REMAP, 1);
6269                break;
6270        default:
6271                ASSERT(0);
6272                error = -EFSCORRUPTED;
6273        }
6274
6275        return error;
6276}
6277
6278/* Check that an inode's extent does not have invalid flags or bad ranges. */
6279xfs_failaddr_t
6280xfs_bmap_validate_extent(
6281        struct xfs_inode        *ip,
6282        int                     whichfork,
6283        struct xfs_bmbt_irec    *irec)
6284{
6285        struct xfs_mount        *mp = ip->i_mount;
6286
6287        if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
6288                return __this_address;
6289
6290        if (XFS_IS_REALTIME_INODE(ip) && whichfork == XFS_DATA_FORK) {
6291                if (!xfs_verify_rtext(mp, irec->br_startblock,
6292                                          irec->br_blockcount))
6293                        return __this_address;
6294        } else {
6295                if (!xfs_verify_fsbext(mp, irec->br_startblock,
6296                                           irec->br_blockcount))
6297                        return __this_address;
6298        }
6299        if (irec->br_state != XFS_EXT_NORM && whichfork != XFS_DATA_FORK)
6300                return __this_address;
6301        return NULL;
6302}
6303