linux/fs/xfs/libxfs/xfs_bmap.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   4 * All Rights Reserved.
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_log_format.h"
  11#include "xfs_trans_resv.h"
  12#include "xfs_bit.h"
  13#include "xfs_sb.h"
  14#include "xfs_mount.h"
  15#include "xfs_defer.h"
  16#include "xfs_dir2.h"
  17#include "xfs_inode.h"
  18#include "xfs_btree.h"
  19#include "xfs_trans.h"
  20#include "xfs_alloc.h"
  21#include "xfs_bmap.h"
  22#include "xfs_bmap_util.h"
  23#include "xfs_bmap_btree.h"
  24#include "xfs_rtalloc.h"
  25#include "xfs_errortag.h"
  26#include "xfs_error.h"
  27#include "xfs_quota.h"
  28#include "xfs_trans_space.h"
  29#include "xfs_buf_item.h"
  30#include "xfs_trace.h"
  31#include "xfs_attr_leaf.h"
  32#include "xfs_filestream.h"
  33#include "xfs_rmap.h"
  34#include "xfs_ag_resv.h"
  35#include "xfs_refcount.h"
  36#include "xfs_icache.h"
  37#include "xfs_iomap.h"
  38
  39
  40kmem_zone_t             *xfs_bmap_free_item_zone;
  41
  42/*
  43 * Miscellaneous helper functions
  44 */
  45
  46/*
  47 * Compute and fill in the value of the maximum depth of a bmap btree
  48 * in this filesystem.  Done once, during mount.
  49 */
  50void
  51xfs_bmap_compute_maxlevels(
  52        xfs_mount_t     *mp,            /* file system mount structure */
  53        int             whichfork)      /* data or attr fork */
  54{
  55        int             level;          /* btree level */
  56        uint            maxblocks;      /* max blocks at this level */
  57        uint            maxleafents;    /* max leaf entries possible */
  58        int             maxrootrecs;    /* max records in root block */
  59        int             minleafrecs;    /* min records in leaf block */
  60        int             minnoderecs;    /* min records in node block */
  61        int             sz;             /* root block size */
  62
  63        /*
  64         * The maximum number of extents in a file, hence the maximum number of
  65         * leaf entries, is controlled by the size of the on-disk extent count,
  66         * either a signed 32-bit number for the data fork, or a signed 16-bit
  67         * number for the attr fork.
  68         *
  69         * Note that we can no longer assume that if we are in ATTR1 that
  70         * the fork offset of all the inodes will be
  71         * (xfs_default_attroffset(ip) >> 3) because we could have mounted
  72         * with ATTR2 and then mounted back with ATTR1, keeping the
  73         * di_forkoff's fixed but probably at various positions. Therefore,
  74         * for both ATTR1 and ATTR2 we have to assume the worst case scenario
  75         * of a minimum size available.
  76         */
  77        if (whichfork == XFS_DATA_FORK) {
  78                maxleafents = MAXEXTNUM;
  79                sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
  80        } else {
  81                maxleafents = MAXAEXTNUM;
  82                sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
  83        }
  84        maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
  85        minleafrecs = mp->m_bmap_dmnr[0];
  86        minnoderecs = mp->m_bmap_dmnr[1];
  87        maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
  88        for (level = 1; maxblocks > 1; level++) {
  89                if (maxblocks <= maxrootrecs)
  90                        maxblocks = 1;
  91                else
  92                        maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
  93        }
  94        mp->m_bm_maxlevels[whichfork] = level;
  95}
  96
  97STATIC int                              /* error */
  98xfs_bmbt_lookup_eq(
  99        struct xfs_btree_cur    *cur,
 100        struct xfs_bmbt_irec    *irec,
 101        int                     *stat)  /* success/failure */
 102{
 103        cur->bc_rec.b = *irec;
 104        return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
 105}
 106
 107STATIC int                              /* error */
 108xfs_bmbt_lookup_first(
 109        struct xfs_btree_cur    *cur,
 110        int                     *stat)  /* success/failure */
 111{
 112        cur->bc_rec.b.br_startoff = 0;
 113        cur->bc_rec.b.br_startblock = 0;
 114        cur->bc_rec.b.br_blockcount = 0;
 115        return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
 116}
 117
 118/*
 119 * Check if the inode needs to be converted to btree format.
 120 */
 121static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
 122{
 123        struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
 124
 125        return whichfork != XFS_COW_FORK &&
 126                ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
 127                ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork);
 128}
 129
 130/*
 131 * Check if the inode should be converted to extent format.
 132 */
 133static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
 134{
 135        struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
 136
 137        return whichfork != XFS_COW_FORK &&
 138                ifp->if_format == XFS_DINODE_FMT_BTREE &&
 139                ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork);
 140}
 141
 142/*
 143 * Update the record referred to by cur to the value given by irec
 144 * This either works (return 0) or gets an EFSCORRUPTED error.
 145 */
 146STATIC int
 147xfs_bmbt_update(
 148        struct xfs_btree_cur    *cur,
 149        struct xfs_bmbt_irec    *irec)
 150{
 151        union xfs_btree_rec     rec;
 152
 153        xfs_bmbt_disk_set_all(&rec.bmbt, irec);
 154        return xfs_btree_update(cur, &rec);
 155}
 156
 157/*
 158 * Compute the worst-case number of indirect blocks that will be used
 159 * for ip's delayed extent of length "len".
 160 */
 161STATIC xfs_filblks_t
 162xfs_bmap_worst_indlen(
 163        xfs_inode_t     *ip,            /* incore inode pointer */
 164        xfs_filblks_t   len)            /* delayed extent length */
 165{
 166        int             level;          /* btree level number */
 167        int             maxrecs;        /* maximum record count at this level */
 168        xfs_mount_t     *mp;            /* mount structure */
 169        xfs_filblks_t   rval;           /* return value */
 170
 171        mp = ip->i_mount;
 172        maxrecs = mp->m_bmap_dmxr[0];
 173        for (level = 0, rval = 0;
 174             level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
 175             level++) {
 176                len += maxrecs - 1;
 177                do_div(len, maxrecs);
 178                rval += len;
 179                if (len == 1)
 180                        return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
 181                                level - 1;
 182                if (level == 0)
 183                        maxrecs = mp->m_bmap_dmxr[1];
 184        }
 185        return rval;
 186}
 187
 188/*
 189 * Calculate the default attribute fork offset for newly created inodes.
 190 */
 191uint
 192xfs_default_attroffset(
 193        struct xfs_inode        *ip)
 194{
 195        struct xfs_mount        *mp = ip->i_mount;
 196        uint                    offset;
 197
 198        if (mp->m_sb.sb_inodesize == 256)
 199                offset = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS);
 200        else
 201                offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
 202
 203        ASSERT(offset < XFS_LITINO(mp));
 204        return offset;
 205}
 206
 207/*
 208 * Helper routine to reset inode di_forkoff field when switching
 209 * attribute fork from local to extent format - we reset it where
 210 * possible to make space available for inline data fork extents.
 211 */
 212STATIC void
 213xfs_bmap_forkoff_reset(
 214        xfs_inode_t     *ip,
 215        int             whichfork)
 216{
 217        if (whichfork == XFS_ATTR_FORK &&
 218            ip->i_df.if_format != XFS_DINODE_FMT_DEV &&
 219            ip->i_df.if_format != XFS_DINODE_FMT_BTREE) {
 220                uint    dfl_forkoff = xfs_default_attroffset(ip) >> 3;
 221
 222                if (dfl_forkoff > ip->i_d.di_forkoff)
 223                        ip->i_d.di_forkoff = dfl_forkoff;
 224        }
 225}
 226
 227#ifdef DEBUG
 228STATIC struct xfs_buf *
 229xfs_bmap_get_bp(
 230        struct xfs_btree_cur    *cur,
 231        xfs_fsblock_t           bno)
 232{
 233        struct xfs_log_item     *lip;
 234        int                     i;
 235
 236        if (!cur)
 237                return NULL;
 238
 239        for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
 240                if (!cur->bc_bufs[i])
 241                        break;
 242                if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
 243                        return cur->bc_bufs[i];
 244        }
 245
 246        /* Chase down all the log items to see if the bp is there */
 247        list_for_each_entry(lip, &cur->bc_tp->t_items, li_trans) {
 248                struct xfs_buf_log_item *bip = (struct xfs_buf_log_item *)lip;
 249
 250                if (bip->bli_item.li_type == XFS_LI_BUF &&
 251                    XFS_BUF_ADDR(bip->bli_buf) == bno)
 252                        return bip->bli_buf;
 253        }
 254
 255        return NULL;
 256}
 257
 258STATIC void
 259xfs_check_block(
 260        struct xfs_btree_block  *block,
 261        xfs_mount_t             *mp,
 262        int                     root,
 263        short                   sz)
 264{
 265        int                     i, j, dmxr;
 266        __be64                  *pp, *thispa;   /* pointer to block address */
 267        xfs_bmbt_key_t          *prevp, *keyp;
 268
 269        ASSERT(be16_to_cpu(block->bb_level) > 0);
 270
 271        prevp = NULL;
 272        for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
 273                dmxr = mp->m_bmap_dmxr[0];
 274                keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
 275
 276                if (prevp) {
 277                        ASSERT(be64_to_cpu(prevp->br_startoff) <
 278                               be64_to_cpu(keyp->br_startoff));
 279                }
 280                prevp = keyp;
 281
 282                /*
 283                 * Compare the block numbers to see if there are dups.
 284                 */
 285                if (root)
 286                        pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
 287                else
 288                        pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
 289
 290                for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
 291                        if (root)
 292                                thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
 293                        else
 294                                thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
 295                        if (*thispa == *pp) {
 296                                xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
 297                                        __func__, j, i,
 298                                        (unsigned long long)be64_to_cpu(*thispa));
 299                                xfs_err(mp, "%s: ptrs are equal in node\n",
 300                                        __func__);
 301                                xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 302                        }
 303                }
 304        }
 305}
 306
 307/*
 308 * Check that the extents for the inode ip are in the right order in all
 309 * btree leaves. THis becomes prohibitively expensive for large extent count
 310 * files, so don't bother with inodes that have more than 10,000 extents in
 311 * them. The btree record ordering checks will still be done, so for such large
 312 * bmapbt constructs that is going to catch most corruptions.
 313 */
 314STATIC void
 315xfs_bmap_check_leaf_extents(
 316        xfs_btree_cur_t         *cur,   /* btree cursor or null */
 317        xfs_inode_t             *ip,            /* incore inode pointer */
 318        int                     whichfork)      /* data or attr fork */
 319{
 320        struct xfs_mount        *mp = ip->i_mount;
 321        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
 322        struct xfs_btree_block  *block; /* current btree block */
 323        xfs_fsblock_t           bno;    /* block # of "block" */
 324        xfs_buf_t               *bp;    /* buffer for "block" */
 325        int                     error;  /* error return value */
 326        xfs_extnum_t            i=0, j; /* index into the extents list */
 327        int                     level;  /* btree level, for checking */
 328        __be64                  *pp;    /* pointer to block address */
 329        xfs_bmbt_rec_t          *ep;    /* pointer to current extent */
 330        xfs_bmbt_rec_t          last = {0, 0}; /* last extent in prev block */
 331        xfs_bmbt_rec_t          *nextp; /* pointer to next extent */
 332        int                     bp_release = 0;
 333
 334        if (ifp->if_format != XFS_DINODE_FMT_BTREE)
 335                return;
 336
 337        /* skip large extent count inodes */
 338        if (ip->i_df.if_nextents > 10000)
 339                return;
 340
 341        bno = NULLFSBLOCK;
 342        block = ifp->if_broot;
 343        /*
 344         * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
 345         */
 346        level = be16_to_cpu(block->bb_level);
 347        ASSERT(level > 0);
 348        xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
 349        pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
 350        bno = be64_to_cpu(*pp);
 351
 352        ASSERT(bno != NULLFSBLOCK);
 353        ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
 354        ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
 355
 356        /*
 357         * Go down the tree until leaf level is reached, following the first
 358         * pointer (leftmost) at each level.
 359         */
 360        while (level-- > 0) {
 361                /* See if buf is in cur first */
 362                bp_release = 0;
 363                bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
 364                if (!bp) {
 365                        bp_release = 1;
 366                        error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
 367                                                XFS_BMAP_BTREE_REF,
 368                                                &xfs_bmbt_buf_ops);
 369                        if (error)
 370                                goto error_norelse;
 371                }
 372                block = XFS_BUF_TO_BLOCK(bp);
 373                if (level == 0)
 374                        break;
 375
 376                /*
 377                 * Check this block for basic sanity (increasing keys and
 378                 * no duplicate blocks).
 379                 */
 380
 381                xfs_check_block(block, mp, 0, 0);
 382                pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
 383                bno = be64_to_cpu(*pp);
 384                if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, bno))) {
 385                        error = -EFSCORRUPTED;
 386                        goto error0;
 387                }
 388                if (bp_release) {
 389                        bp_release = 0;
 390                        xfs_trans_brelse(NULL, bp);
 391                }
 392        }
 393
 394        /*
 395         * Here with bp and block set to the leftmost leaf node in the tree.
 396         */
 397        i = 0;
 398
 399        /*
 400         * Loop over all leaf nodes checking that all extents are in the right order.
 401         */
 402        for (;;) {
 403                xfs_fsblock_t   nextbno;
 404                xfs_extnum_t    num_recs;
 405
 406
 407                num_recs = xfs_btree_get_numrecs(block);
 408
 409                /*
 410                 * Read-ahead the next leaf block, if any.
 411                 */
 412
 413                nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
 414
 415                /*
 416                 * Check all the extents to make sure they are OK.
 417                 * If we had a previous block, the last entry should
 418                 * conform with the first entry in this one.
 419                 */
 420
 421                ep = XFS_BMBT_REC_ADDR(mp, block, 1);
 422                if (i) {
 423                        ASSERT(xfs_bmbt_disk_get_startoff(&last) +
 424                               xfs_bmbt_disk_get_blockcount(&last) <=
 425                               xfs_bmbt_disk_get_startoff(ep));
 426                }
 427                for (j = 1; j < num_recs; j++) {
 428                        nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
 429                        ASSERT(xfs_bmbt_disk_get_startoff(ep) +
 430                               xfs_bmbt_disk_get_blockcount(ep) <=
 431                               xfs_bmbt_disk_get_startoff(nextp));
 432                        ep = nextp;
 433                }
 434
 435                last = *ep;
 436                i += num_recs;
 437                if (bp_release) {
 438                        bp_release = 0;
 439                        xfs_trans_brelse(NULL, bp);
 440                }
 441                bno = nextbno;
 442                /*
 443                 * If we've reached the end, stop.
 444                 */
 445                if (bno == NULLFSBLOCK)
 446                        break;
 447
 448                bp_release = 0;
 449                bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
 450                if (!bp) {
 451                        bp_release = 1;
 452                        error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
 453                                                XFS_BMAP_BTREE_REF,
 454                                                &xfs_bmbt_buf_ops);
 455                        if (error)
 456                                goto error_norelse;
 457                }
 458                block = XFS_BUF_TO_BLOCK(bp);
 459        }
 460
 461        return;
 462
 463error0:
 464        xfs_warn(mp, "%s: at error0", __func__);
 465        if (bp_release)
 466                xfs_trans_brelse(NULL, bp);
 467error_norelse:
 468        xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
 469                __func__, i);
 470        xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__);
 471        xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 472        return;
 473}
 474
 475/*
 476 * Validate that the bmbt_irecs being returned from bmapi are valid
 477 * given the caller's original parameters.  Specifically check the
 478 * ranges of the returned irecs to ensure that they only extend beyond
 479 * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
 480 */
 481STATIC void
 482xfs_bmap_validate_ret(
 483        xfs_fileoff_t           bno,
 484        xfs_filblks_t           len,
 485        int                     flags,
 486        xfs_bmbt_irec_t         *mval,
 487        int                     nmap,
 488        int                     ret_nmap)
 489{
 490        int                     i;              /* index to map values */
 491
 492        ASSERT(ret_nmap <= nmap);
 493
 494        for (i = 0; i < ret_nmap; i++) {
 495                ASSERT(mval[i].br_blockcount > 0);
 496                if (!(flags & XFS_BMAPI_ENTIRE)) {
 497                        ASSERT(mval[i].br_startoff >= bno);
 498                        ASSERT(mval[i].br_blockcount <= len);
 499                        ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
 500                               bno + len);
 501                } else {
 502                        ASSERT(mval[i].br_startoff < bno + len);
 503                        ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
 504                               bno);
 505                }
 506                ASSERT(i == 0 ||
 507                       mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
 508                       mval[i].br_startoff);
 509                ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
 510                       mval[i].br_startblock != HOLESTARTBLOCK);
 511                ASSERT(mval[i].br_state == XFS_EXT_NORM ||
 512                       mval[i].br_state == XFS_EXT_UNWRITTEN);
 513        }
 514}
 515
 516#else
 517#define xfs_bmap_check_leaf_extents(cur, ip, whichfork)         do { } while (0)
 518#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)    do { } while (0)
 519#endif /* DEBUG */
 520
 521/*
 522 * bmap free list manipulation functions
 523 */
 524
 525/*
 526 * Add the extent to the list of extents to be free at transaction end.
 527 * The list is maintained sorted (by block number).
 528 */
 529void
 530__xfs_bmap_add_free(
 531        struct xfs_trans                *tp,
 532        xfs_fsblock_t                   bno,
 533        xfs_filblks_t                   len,
 534        const struct xfs_owner_info     *oinfo,
 535        bool                            skip_discard)
 536{
 537        struct xfs_extent_free_item     *new;           /* new element */
 538#ifdef DEBUG
 539        struct xfs_mount                *mp = tp->t_mountp;
 540        xfs_agnumber_t                  agno;
 541        xfs_agblock_t                   agbno;
 542
 543        ASSERT(bno != NULLFSBLOCK);
 544        ASSERT(len > 0);
 545        ASSERT(len <= MAXEXTLEN);
 546        ASSERT(!isnullstartblock(bno));
 547        agno = XFS_FSB_TO_AGNO(mp, bno);
 548        agbno = XFS_FSB_TO_AGBNO(mp, bno);
 549        ASSERT(agno < mp->m_sb.sb_agcount);
 550        ASSERT(agbno < mp->m_sb.sb_agblocks);
 551        ASSERT(len < mp->m_sb.sb_agblocks);
 552        ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
 553#endif
 554        ASSERT(xfs_bmap_free_item_zone != NULL);
 555
 556        new = kmem_cache_alloc(xfs_bmap_free_item_zone,
 557                               GFP_KERNEL | __GFP_NOFAIL);
 558        new->xefi_startblock = bno;
 559        new->xefi_blockcount = (xfs_extlen_t)len;
 560        if (oinfo)
 561                new->xefi_oinfo = *oinfo;
 562        else
 563                new->xefi_oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
 564        new->xefi_skip_discard = skip_discard;
 565        trace_xfs_bmap_free_defer(tp->t_mountp,
 566                        XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
 567                        XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
 568        xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
 569}
 570
 571/*
 572 * Inode fork format manipulation functions
 573 */
 574
 575/*
 576 * Convert the inode format to extent format if it currently is in btree format,
 577 * but the extent list is small enough that it fits into the extent format.
 578 *
 579 * Since the extents are already in-core, all we have to do is give up the space
 580 * for the btree root and pitch the leaf block.
 581 */
 582STATIC int                              /* error */
 583xfs_bmap_btree_to_extents(
 584        struct xfs_trans        *tp,    /* transaction pointer */
 585        struct xfs_inode        *ip,    /* incore inode pointer */
 586        struct xfs_btree_cur    *cur,   /* btree cursor */
 587        int                     *logflagsp, /* inode logging flags */
 588        int                     whichfork)  /* data or attr fork */
 589{
 590        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
 591        struct xfs_mount        *mp = ip->i_mount;
 592        struct xfs_btree_block  *rblock = ifp->if_broot;
 593        struct xfs_btree_block  *cblock;/* child btree block */
 594        xfs_fsblock_t           cbno;   /* child block number */
 595        xfs_buf_t               *cbp;   /* child block's buffer */
 596        int                     error;  /* error return value */
 597        __be64                  *pp;    /* ptr to block address */
 598        struct xfs_owner_info   oinfo;
 599
 600        /* check if we actually need the extent format first: */
 601        if (!xfs_bmap_wants_extents(ip, whichfork))
 602                return 0;
 603
 604        ASSERT(cur);
 605        ASSERT(whichfork != XFS_COW_FORK);
 606        ASSERT(ifp->if_flags & XFS_IFEXTENTS);
 607        ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
 608        ASSERT(be16_to_cpu(rblock->bb_level) == 1);
 609        ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
 610        ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
 611
 612        pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
 613        cbno = be64_to_cpu(*pp);
 614#ifdef DEBUG
 615        if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_btree_check_lptr(cur, cbno, 1)))
 616                return -EFSCORRUPTED;
 617#endif
 618        error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF,
 619                                &xfs_bmbt_buf_ops);
 620        if (error)
 621                return error;
 622        cblock = XFS_BUF_TO_BLOCK(cbp);
 623        if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
 624                return error;
 625        xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
 626        xfs_bmap_add_free(cur->bc_tp, cbno, 1, &oinfo);
 627        ip->i_d.di_nblocks--;
 628        xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 629        xfs_trans_binval(tp, cbp);
 630        if (cur->bc_bufs[0] == cbp)
 631                cur->bc_bufs[0] = NULL;
 632        xfs_iroot_realloc(ip, -1, whichfork);
 633        ASSERT(ifp->if_broot == NULL);
 634        ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
 635        ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 636        *logflagsp |= XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
 637        return 0;
 638}
 639
 640/*
 641 * Convert an extents-format file into a btree-format file.
 642 * The new file will have a root block (in the inode) and a single child block.
 643 */
 644STATIC int                                      /* error */
 645xfs_bmap_extents_to_btree(
 646        struct xfs_trans        *tp,            /* transaction pointer */
 647        struct xfs_inode        *ip,            /* incore inode pointer */
 648        struct xfs_btree_cur    **curp,         /* cursor returned to caller */
 649        int                     wasdel,         /* converting a delayed alloc */
 650        int                     *logflagsp,     /* inode logging flags */
 651        int                     whichfork)      /* data or attr fork */
 652{
 653        struct xfs_btree_block  *ablock;        /* allocated (child) bt block */
 654        struct xfs_buf          *abp;           /* buffer for ablock */
 655        struct xfs_alloc_arg    args;           /* allocation arguments */
 656        struct xfs_bmbt_rec     *arp;           /* child record pointer */
 657        struct xfs_btree_block  *block;         /* btree root block */
 658        struct xfs_btree_cur    *cur;           /* bmap btree cursor */
 659        int                     error;          /* error return value */
 660        struct xfs_ifork        *ifp;           /* inode fork pointer */
 661        struct xfs_bmbt_key     *kp;            /* root block key pointer */
 662        struct xfs_mount        *mp;            /* mount structure */
 663        xfs_bmbt_ptr_t          *pp;            /* root block address pointer */
 664        struct xfs_iext_cursor  icur;
 665        struct xfs_bmbt_irec    rec;
 666        xfs_extnum_t            cnt = 0;
 667
 668        mp = ip->i_mount;
 669        ASSERT(whichfork != XFS_COW_FORK);
 670        ifp = XFS_IFORK_PTR(ip, whichfork);
 671        ASSERT(ifp->if_format == XFS_DINODE_FMT_EXTENTS);
 672
 673        /*
 674         * Make space in the inode incore. This needs to be undone if we fail
 675         * to expand the root.
 676         */
 677        xfs_iroot_realloc(ip, 1, whichfork);
 678        ifp->if_flags |= XFS_IFBROOT;
 679
 680        /*
 681         * Fill in the root.
 682         */
 683        block = ifp->if_broot;
 684        xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
 685                                 XFS_BTNUM_BMAP, 1, 1, ip->i_ino,
 686                                 XFS_BTREE_LONG_PTRS);
 687        /*
 688         * Need a cursor.  Can't allocate until bb_level is filled in.
 689         */
 690        cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 691        cur->bc_ino.flags = wasdel ? XFS_BTCUR_BMBT_WASDEL : 0;
 692        /*
 693         * Convert to a btree with two levels, one record in root.
 694         */
 695        ifp->if_format = XFS_DINODE_FMT_BTREE;
 696        memset(&args, 0, sizeof(args));
 697        args.tp = tp;
 698        args.mp = mp;
 699        xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
 700        if (tp->t_firstblock == NULLFSBLOCK) {
 701                args.type = XFS_ALLOCTYPE_START_BNO;
 702                args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
 703        } else if (tp->t_flags & XFS_TRANS_LOWMODE) {
 704                args.type = XFS_ALLOCTYPE_START_BNO;
 705                args.fsbno = tp->t_firstblock;
 706        } else {
 707                args.type = XFS_ALLOCTYPE_NEAR_BNO;
 708                args.fsbno = tp->t_firstblock;
 709        }
 710        args.minlen = args.maxlen = args.prod = 1;
 711        args.wasdel = wasdel;
 712        *logflagsp = 0;
 713        error = xfs_alloc_vextent(&args);
 714        if (error)
 715                goto out_root_realloc;
 716
 717        if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
 718                error = -ENOSPC;
 719                goto out_root_realloc;
 720        }
 721
 722        /*
 723         * Allocation can't fail, the space was reserved.
 724         */
 725        ASSERT(tp->t_firstblock == NULLFSBLOCK ||
 726               args.agno >= XFS_FSB_TO_AGNO(mp, tp->t_firstblock));
 727        tp->t_firstblock = args.fsbno;
 728        cur->bc_ino.allocated++;
 729        ip->i_d.di_nblocks++;
 730        xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
 731        error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
 732                        XFS_FSB_TO_DADDR(mp, args.fsbno),
 733                        mp->m_bsize, 0, &abp);
 734        if (error)
 735                goto out_unreserve_dquot;
 736
 737        /*
 738         * Fill in the child block.
 739         */
 740        abp->b_ops = &xfs_bmbt_buf_ops;
 741        ablock = XFS_BUF_TO_BLOCK(abp);
 742        xfs_btree_init_block_int(mp, ablock, abp->b_bn,
 743                                XFS_BTNUM_BMAP, 0, 0, ip->i_ino,
 744                                XFS_BTREE_LONG_PTRS);
 745
 746        for_each_xfs_iext(ifp, &icur, &rec) {
 747                if (isnullstartblock(rec.br_startblock))
 748                        continue;
 749                arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt);
 750                xfs_bmbt_disk_set_all(arp, &rec);
 751                cnt++;
 752        }
 753        ASSERT(cnt == ifp->if_nextents);
 754        xfs_btree_set_numrecs(ablock, cnt);
 755
 756        /*
 757         * Fill in the root key and pointer.
 758         */
 759        kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
 760        arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
 761        kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
 762        pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
 763                                                be16_to_cpu(block->bb_level)));
 764        *pp = cpu_to_be64(args.fsbno);
 765
 766        /*
 767         * Do all this logging at the end so that
 768         * the root is at the right level.
 769         */
 770        xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
 771        xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
 772        ASSERT(*curp == NULL);
 773        *curp = cur;
 774        *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
 775        return 0;
 776
 777out_unreserve_dquot:
 778        xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 779out_root_realloc:
 780        xfs_iroot_realloc(ip, -1, whichfork);
 781        ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 782        ASSERT(ifp->if_broot == NULL);
 783        xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 784
 785        return error;
 786}
 787
 788/*
 789 * Convert a local file to an extents file.
 790 * This code is out of bounds for data forks of regular files,
 791 * since the file data needs to get logged so things will stay consistent.
 792 * (The bmap-level manipulations are ok, though).
 793 */
 794void
 795xfs_bmap_local_to_extents_empty(
 796        struct xfs_trans        *tp,
 797        struct xfs_inode        *ip,
 798        int                     whichfork)
 799{
 800        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
 801
 802        ASSERT(whichfork != XFS_COW_FORK);
 803        ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
 804        ASSERT(ifp->if_bytes == 0);
 805        ASSERT(ifp->if_nextents == 0);
 806
 807        xfs_bmap_forkoff_reset(ip, whichfork);
 808        ifp->if_flags &= ~XFS_IFINLINE;
 809        ifp->if_flags |= XFS_IFEXTENTS;
 810        ifp->if_u1.if_root = NULL;
 811        ifp->if_height = 0;
 812        ifp->if_format = XFS_DINODE_FMT_EXTENTS;
 813        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 814}
 815
 816
 817STATIC int                              /* error */
 818xfs_bmap_local_to_extents(
 819        xfs_trans_t     *tp,            /* transaction pointer */
 820        xfs_inode_t     *ip,            /* incore inode pointer */
 821        xfs_extlen_t    total,          /* total blocks needed by transaction */
 822        int             *logflagsp,     /* inode logging flags */
 823        int             whichfork,
 824        void            (*init_fn)(struct xfs_trans *tp,
 825                                   struct xfs_buf *bp,
 826                                   struct xfs_inode *ip,
 827                                   struct xfs_ifork *ifp))
 828{
 829        int             error = 0;
 830        int             flags;          /* logging flags returned */
 831        struct xfs_ifork *ifp;          /* inode fork pointer */
 832        xfs_alloc_arg_t args;           /* allocation arguments */
 833        xfs_buf_t       *bp;            /* buffer for extent block */
 834        struct xfs_bmbt_irec rec;
 835        struct xfs_iext_cursor icur;
 836
 837        /*
 838         * We don't want to deal with the case of keeping inode data inline yet.
 839         * So sending the data fork of a regular inode is invalid.
 840         */
 841        ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK));
 842        ifp = XFS_IFORK_PTR(ip, whichfork);
 843        ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
 844
 845        if (!ifp->if_bytes) {
 846                xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
 847                flags = XFS_ILOG_CORE;
 848                goto done;
 849        }
 850
 851        flags = 0;
 852        error = 0;
 853        ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS)) == XFS_IFINLINE);
 854        memset(&args, 0, sizeof(args));
 855        args.tp = tp;
 856        args.mp = ip->i_mount;
 857        xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
 858        /*
 859         * Allocate a block.  We know we need only one, since the
 860         * file currently fits in an inode.
 861         */
 862        if (tp->t_firstblock == NULLFSBLOCK) {
 863                args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
 864                args.type = XFS_ALLOCTYPE_START_BNO;
 865        } else {
 866                args.fsbno = tp->t_firstblock;
 867                args.type = XFS_ALLOCTYPE_NEAR_BNO;
 868        }
 869        args.total = total;
 870        args.minlen = args.maxlen = args.prod = 1;
 871        error = xfs_alloc_vextent(&args);
 872        if (error)
 873                goto done;
 874
 875        /* Can't fail, the space was reserved. */
 876        ASSERT(args.fsbno != NULLFSBLOCK);
 877        ASSERT(args.len == 1);
 878        tp->t_firstblock = args.fsbno;
 879        error = xfs_trans_get_buf(tp, args.mp->m_ddev_targp,
 880                        XFS_FSB_TO_DADDR(args.mp, args.fsbno),
 881                        args.mp->m_bsize, 0, &bp);
 882        if (error)
 883                goto done;
 884
 885        /*
 886         * Initialize the block, copy the data and log the remote buffer.
 887         *
 888         * The callout is responsible for logging because the remote format
 889         * might differ from the local format and thus we don't know how much to
 890         * log here. Note that init_fn must also set the buffer log item type
 891         * correctly.
 892         */
 893        init_fn(tp, bp, ip, ifp);
 894
 895        /* account for the change in fork size */
 896        xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
 897        xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
 898        flags |= XFS_ILOG_CORE;
 899
 900        ifp->if_u1.if_root = NULL;
 901        ifp->if_height = 0;
 902
 903        rec.br_startoff = 0;
 904        rec.br_startblock = args.fsbno;
 905        rec.br_blockcount = 1;
 906        rec.br_state = XFS_EXT_NORM;
 907        xfs_iext_first(ifp, &icur);
 908        xfs_iext_insert(ip, &icur, &rec, 0);
 909
 910        ifp->if_nextents = 1;
 911        ip->i_d.di_nblocks = 1;
 912        xfs_trans_mod_dquot_byino(tp, ip,
 913                XFS_TRANS_DQ_BCOUNT, 1L);
 914        flags |= xfs_ilog_fext(whichfork);
 915
 916done:
 917        *logflagsp = flags;
 918        return error;
 919}
 920
 921/*
 922 * Called from xfs_bmap_add_attrfork to handle btree format files.
 923 */
 924STATIC int                                      /* error */
 925xfs_bmap_add_attrfork_btree(
 926        xfs_trans_t             *tp,            /* transaction pointer */
 927        xfs_inode_t             *ip,            /* incore inode pointer */
 928        int                     *flags)         /* inode logging flags */
 929{
 930        xfs_btree_cur_t         *cur;           /* btree cursor */
 931        int                     error;          /* error return value */
 932        xfs_mount_t             *mp;            /* file system mount struct */
 933        int                     stat;           /* newroot status */
 934
 935        mp = ip->i_mount;
 936        if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip))
 937                *flags |= XFS_ILOG_DBROOT;
 938        else {
 939                cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
 940                error = xfs_bmbt_lookup_first(cur, &stat);
 941                if (error)
 942                        goto error0;
 943                /* must be at least one entry */
 944                if (XFS_IS_CORRUPT(mp, stat != 1)) {
 945                        error = -EFSCORRUPTED;
 946                        goto error0;
 947                }
 948                if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
 949                        goto error0;
 950                if (stat == 0) {
 951                        xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 952                        return -ENOSPC;
 953                }
 954                cur->bc_ino.allocated = 0;
 955                xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 956        }
 957        return 0;
 958error0:
 959        xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 960        return error;
 961}
 962
 963/*
 964 * Called from xfs_bmap_add_attrfork to handle extents format files.
 965 */
 966STATIC int                                      /* error */
 967xfs_bmap_add_attrfork_extents(
 968        struct xfs_trans        *tp,            /* transaction pointer */
 969        struct xfs_inode        *ip,            /* incore inode pointer */
 970        int                     *flags)         /* inode logging flags */
 971{
 972        xfs_btree_cur_t         *cur;           /* bmap btree cursor */
 973        int                     error;          /* error return value */
 974
 975        if (ip->i_df.if_nextents * sizeof(struct xfs_bmbt_rec) <=
 976            XFS_IFORK_DSIZE(ip))
 977                return 0;
 978        cur = NULL;
 979        error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags,
 980                                          XFS_DATA_FORK);
 981        if (cur) {
 982                cur->bc_ino.allocated = 0;
 983                xfs_btree_del_cursor(cur, error);
 984        }
 985        return error;
 986}
 987
 988/*
 989 * Called from xfs_bmap_add_attrfork to handle local format files. Each
 990 * different data fork content type needs a different callout to do the
 991 * conversion. Some are basic and only require special block initialisation
 992 * callouts for the data formating, others (directories) are so specialised they
 993 * handle everything themselves.
 994 *
 995 * XXX (dgc): investigate whether directory conversion can use the generic
 996 * formatting callout. It should be possible - it's just a very complex
 997 * formatter.
 998 */
 999STATIC int                                      /* error */
1000xfs_bmap_add_attrfork_local(
1001        struct xfs_trans        *tp,            /* transaction pointer */
1002        struct xfs_inode        *ip,            /* incore inode pointer */
1003        int                     *flags)         /* inode logging flags */
1004{
1005        struct xfs_da_args      dargs;          /* args for dir/attr code */
1006
1007        if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
1008                return 0;
1009
1010        if (S_ISDIR(VFS_I(ip)->i_mode)) {
1011                memset(&dargs, 0, sizeof(dargs));
1012                dargs.geo = ip->i_mount->m_dir_geo;
1013                dargs.dp = ip;
1014                dargs.total = dargs.geo->fsbcount;
1015                dargs.whichfork = XFS_DATA_FORK;
1016                dargs.trans = tp;
1017                return xfs_dir2_sf_to_block(&dargs);
1018        }
1019
1020        if (S_ISLNK(VFS_I(ip)->i_mode))
1021                return xfs_bmap_local_to_extents(tp, ip, 1, flags,
1022                                                 XFS_DATA_FORK,
1023                                                 xfs_symlink_local_to_remote);
1024
1025        /* should only be called for types that support local format data */
1026        ASSERT(0);
1027        return -EFSCORRUPTED;
1028}
1029
1030/* Set an inode attr fork off based on the format */
1031int
1032xfs_bmap_set_attrforkoff(
1033        struct xfs_inode        *ip,
1034        int                     size,
1035        int                     *version)
1036{
1037        switch (ip->i_df.if_format) {
1038        case XFS_DINODE_FMT_DEV:
1039                ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
1040                break;
1041        case XFS_DINODE_FMT_LOCAL:
1042        case XFS_DINODE_FMT_EXTENTS:
1043        case XFS_DINODE_FMT_BTREE:
1044                ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
1045                if (!ip->i_d.di_forkoff)
1046                        ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
1047                else if ((ip->i_mount->m_flags & XFS_MOUNT_ATTR2) && version)
1048                        *version = 2;
1049                break;
1050        default:
1051                ASSERT(0);
1052                return -EINVAL;
1053        }
1054
1055        return 0;
1056}
1057
1058/*
1059 * Convert inode from non-attributed to attributed.
1060 * Must not be in a transaction, ip must not be locked.
1061 */
1062int                                             /* error code */
1063xfs_bmap_add_attrfork(
1064        xfs_inode_t             *ip,            /* incore inode pointer */
1065        int                     size,           /* space new attribute needs */
1066        int                     rsvd)           /* xact may use reserved blks */
1067{
1068        xfs_mount_t             *mp;            /* mount structure */
1069        xfs_trans_t             *tp;            /* transaction pointer */
1070        int                     blks;           /* space reservation */
1071        int                     version = 1;    /* superblock attr version */
1072        int                     logflags;       /* logging flags */
1073        int                     error;          /* error return value */
1074
1075        ASSERT(XFS_IFORK_Q(ip) == 0);
1076
1077        mp = ip->i_mount;
1078        ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1079
1080        blks = XFS_ADDAFORK_SPACE_RES(mp);
1081
1082        error = xfs_trans_alloc(mp, &M_RES(mp)->tr_addafork, blks, 0,
1083                        rsvd ? XFS_TRANS_RESERVE : 0, &tp);
1084        if (error)
1085                return error;
1086
1087        xfs_ilock(ip, XFS_ILOCK_EXCL);
1088        error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
1089                        XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
1090                        XFS_QMOPT_RES_REGBLKS);
1091        if (error)
1092                goto trans_cancel;
1093        if (XFS_IFORK_Q(ip))
1094                goto trans_cancel;
1095
1096        xfs_trans_ijoin(tp, ip, 0);
1097        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1098        error = xfs_bmap_set_attrforkoff(ip, size, &version);
1099        if (error)
1100                goto trans_cancel;
1101        ASSERT(ip->i_afp == NULL);
1102
1103        ip->i_afp = kmem_cache_zalloc(xfs_ifork_zone,
1104                                      GFP_KERNEL | __GFP_NOFAIL);
1105
1106        ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
1107        ip->i_afp->if_flags = XFS_IFEXTENTS;
1108        logflags = 0;
1109        switch (ip->i_df.if_format) {
1110        case XFS_DINODE_FMT_LOCAL:
1111                error = xfs_bmap_add_attrfork_local(tp, ip, &logflags);
1112                break;
1113        case XFS_DINODE_FMT_EXTENTS:
1114                error = xfs_bmap_add_attrfork_extents(tp, ip, &logflags);
1115                break;
1116        case XFS_DINODE_FMT_BTREE:
1117                error = xfs_bmap_add_attrfork_btree(tp, ip, &logflags);
1118                break;
1119        default:
1120                error = 0;
1121                break;
1122        }
1123        if (logflags)
1124                xfs_trans_log_inode(tp, ip, logflags);
1125        if (error)
1126                goto trans_cancel;
1127        if (!xfs_sb_version_hasattr(&mp->m_sb) ||
1128           (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
1129                bool log_sb = false;
1130
1131                spin_lock(&mp->m_sb_lock);
1132                if (!xfs_sb_version_hasattr(&mp->m_sb)) {
1133                        xfs_sb_version_addattr(&mp->m_sb);
1134                        log_sb = true;
1135                }
1136                if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
1137                        xfs_sb_version_addattr2(&mp->m_sb);
1138                        log_sb = true;
1139                }
1140                spin_unlock(&mp->m_sb_lock);
1141                if (log_sb)
1142                        xfs_log_sb(tp);
1143        }
1144
1145        error = xfs_trans_commit(tp);
1146        xfs_iunlock(ip, XFS_ILOCK_EXCL);
1147        return error;
1148
1149trans_cancel:
1150        xfs_trans_cancel(tp);
1151        xfs_iunlock(ip, XFS_ILOCK_EXCL);
1152        return error;
1153}
1154
1155/*
1156 * Internal and external extent tree search functions.
1157 */
1158
1159struct xfs_iread_state {
1160        struct xfs_iext_cursor  icur;
1161        xfs_extnum_t            loaded;
1162};
1163
1164/* Stuff every bmbt record from this block into the incore extent map. */
1165static int
1166xfs_iread_bmbt_block(
1167        struct xfs_btree_cur    *cur,
1168        int                     level,
1169        void                    *priv)
1170{
1171        struct xfs_iread_state  *ir = priv;
1172        struct xfs_mount        *mp = cur->bc_mp;
1173        struct xfs_inode        *ip = cur->bc_ino.ip;
1174        struct xfs_btree_block  *block;
1175        struct xfs_buf          *bp;
1176        struct xfs_bmbt_rec     *frp;
1177        xfs_extnum_t            num_recs;
1178        xfs_extnum_t            j;
1179        int                     whichfork = cur->bc_ino.whichfork;
1180        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1181
1182        block = xfs_btree_get_block(cur, level, &bp);
1183
1184        /* Abort if we find more records than nextents. */
1185        num_recs = xfs_btree_get_numrecs(block);
1186        if (unlikely(ir->loaded + num_recs > ifp->if_nextents)) {
1187                xfs_warn(ip->i_mount, "corrupt dinode %llu, (btree extents).",
1188                                (unsigned long long)ip->i_ino);
1189                xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, block,
1190                                sizeof(*block), __this_address);
1191                return -EFSCORRUPTED;
1192        }
1193
1194        /* Copy records into the incore cache. */
1195        frp = XFS_BMBT_REC_ADDR(mp, block, 1);
1196        for (j = 0; j < num_recs; j++, frp++, ir->loaded++) {
1197                struct xfs_bmbt_irec    new;
1198                xfs_failaddr_t          fa;
1199
1200                xfs_bmbt_disk_get_all(frp, &new);
1201                fa = xfs_bmap_validate_extent(ip, whichfork, &new);
1202                if (fa) {
1203                        xfs_inode_verifier_error(ip, -EFSCORRUPTED,
1204                                        "xfs_iread_extents(2)", frp,
1205                                        sizeof(*frp), fa);
1206                        return -EFSCORRUPTED;
1207                }
1208                xfs_iext_insert(ip, &ir->icur, &new,
1209                                xfs_bmap_fork_to_state(whichfork));
1210                trace_xfs_read_extent(ip, &ir->icur,
1211                                xfs_bmap_fork_to_state(whichfork), _THIS_IP_);
1212                xfs_iext_next(ifp, &ir->icur);
1213        }
1214
1215        return 0;
1216}
1217
1218/*
1219 * Read in extents from a btree-format inode.
1220 */
1221int
1222xfs_iread_extents(
1223        struct xfs_trans        *tp,
1224        struct xfs_inode        *ip,
1225        int                     whichfork)
1226{
1227        struct xfs_iread_state  ir;
1228        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1229        struct xfs_mount        *mp = ip->i_mount;
1230        struct xfs_btree_cur    *cur;
1231        int                     error;
1232
1233        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1234
1235        if (XFS_IS_CORRUPT(mp, ifp->if_format != XFS_DINODE_FMT_BTREE)) {
1236                error = -EFSCORRUPTED;
1237                goto out;
1238        }
1239
1240        ir.loaded = 0;
1241        xfs_iext_first(ifp, &ir.icur);
1242        cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
1243        error = xfs_btree_visit_blocks(cur, xfs_iread_bmbt_block,
1244                        XFS_BTREE_VISIT_RECORDS, &ir);
1245        xfs_btree_del_cursor(cur, error);
1246        if (error)
1247                goto out;
1248
1249        if (XFS_IS_CORRUPT(mp, ir.loaded != ifp->if_nextents)) {
1250                error = -EFSCORRUPTED;
1251                goto out;
1252        }
1253        ASSERT(ir.loaded == xfs_iext_count(ifp));
1254
1255        ifp->if_flags |= XFS_IFEXTENTS;
1256        return 0;
1257out:
1258        xfs_iext_destroy(ifp);
1259        return error;
1260}
1261
1262/*
1263 * Returns the relative block number of the first unused block(s) in the given
1264 * fork with at least "len" logically contiguous blocks free.  This is the
1265 * lowest-address hole if the fork has holes, else the first block past the end
1266 * of fork.  Return 0 if the fork is currently local (in-inode).
1267 */
1268int                                             /* error */
1269xfs_bmap_first_unused(
1270        struct xfs_trans        *tp,            /* transaction pointer */
1271        struct xfs_inode        *ip,            /* incore inode */
1272        xfs_extlen_t            len,            /* size of hole to find */
1273        xfs_fileoff_t           *first_unused,  /* unused block */
1274        int                     whichfork)      /* data or attr fork */
1275{
1276        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1277        struct xfs_bmbt_irec    got;
1278        struct xfs_iext_cursor  icur;
1279        xfs_fileoff_t           lastaddr = 0;
1280        xfs_fileoff_t           lowest, max;
1281        int                     error;
1282
1283        if (ifp->if_format == XFS_DINODE_FMT_LOCAL) {
1284                *first_unused = 0;
1285                return 0;
1286        }
1287
1288        ASSERT(xfs_ifork_has_extents(ifp));
1289
1290        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1291                error = xfs_iread_extents(tp, ip, whichfork);
1292                if (error)
1293                        return error;
1294        }
1295
1296        lowest = max = *first_unused;
1297        for_each_xfs_iext(ifp, &icur, &got) {
1298                /*
1299                 * See if the hole before this extent will work.
1300                 */
1301                if (got.br_startoff >= lowest + len &&
1302                    got.br_startoff - max >= len)
1303                        break;
1304                lastaddr = got.br_startoff + got.br_blockcount;
1305                max = XFS_FILEOFF_MAX(lastaddr, lowest);
1306        }
1307
1308        *first_unused = max;
1309        return 0;
1310}
1311
1312/*
1313 * Returns the file-relative block number of the last block - 1 before
1314 * last_block (input value) in the file.
1315 * This is not based on i_size, it is based on the extent records.
1316 * Returns 0 for local files, as they do not have extent records.
1317 */
1318int                                             /* error */
1319xfs_bmap_last_before(
1320        struct xfs_trans        *tp,            /* transaction pointer */
1321        struct xfs_inode        *ip,            /* incore inode */
1322        xfs_fileoff_t           *last_block,    /* last block */
1323        int                     whichfork)      /* data or attr fork */
1324{
1325        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1326        struct xfs_bmbt_irec    got;
1327        struct xfs_iext_cursor  icur;
1328        int                     error;
1329
1330        switch (ifp->if_format) {
1331        case XFS_DINODE_FMT_LOCAL:
1332                *last_block = 0;
1333                return 0;
1334        case XFS_DINODE_FMT_BTREE:
1335        case XFS_DINODE_FMT_EXTENTS:
1336                break;
1337        default:
1338                ASSERT(0);
1339                return -EFSCORRUPTED;
1340        }
1341
1342        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1343                error = xfs_iread_extents(tp, ip, whichfork);
1344                if (error)
1345                        return error;
1346        }
1347
1348        if (!xfs_iext_lookup_extent_before(ip, ifp, last_block, &icur, &got))
1349                *last_block = 0;
1350        return 0;
1351}
1352
1353int
1354xfs_bmap_last_extent(
1355        struct xfs_trans        *tp,
1356        struct xfs_inode        *ip,
1357        int                     whichfork,
1358        struct xfs_bmbt_irec    *rec,
1359        int                     *is_empty)
1360{
1361        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1362        struct xfs_iext_cursor  icur;
1363        int                     error;
1364
1365        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1366                error = xfs_iread_extents(tp, ip, whichfork);
1367                if (error)
1368                        return error;
1369        }
1370
1371        xfs_iext_last(ifp, &icur);
1372        if (!xfs_iext_get_extent(ifp, &icur, rec))
1373                *is_empty = 1;
1374        else
1375                *is_empty = 0;
1376        return 0;
1377}
1378
1379/*
1380 * Check the last inode extent to determine whether this allocation will result
1381 * in blocks being allocated at the end of the file. When we allocate new data
1382 * blocks at the end of the file which do not start at the previous data block,
1383 * we will try to align the new blocks at stripe unit boundaries.
1384 *
1385 * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
1386 * at, or past the EOF.
1387 */
1388STATIC int
1389xfs_bmap_isaeof(
1390        struct xfs_bmalloca     *bma,
1391        int                     whichfork)
1392{
1393        struct xfs_bmbt_irec    rec;
1394        int                     is_empty;
1395        int                     error;
1396
1397        bma->aeof = false;
1398        error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1399                                     &is_empty);
1400        if (error)
1401                return error;
1402
1403        if (is_empty) {
1404                bma->aeof = true;
1405                return 0;
1406        }
1407
1408        /*
1409         * Check if we are allocation or past the last extent, or at least into
1410         * the last delayed allocated extent.
1411         */
1412        bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
1413                (bma->offset >= rec.br_startoff &&
1414                 isnullstartblock(rec.br_startblock));
1415        return 0;
1416}
1417
1418/*
1419 * Returns the file-relative block number of the first block past eof in
1420 * the file.  This is not based on i_size, it is based on the extent records.
1421 * Returns 0 for local files, as they do not have extent records.
1422 */
1423int
1424xfs_bmap_last_offset(
1425        struct xfs_inode        *ip,
1426        xfs_fileoff_t           *last_block,
1427        int                     whichfork)
1428{
1429        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1430        struct xfs_bmbt_irec    rec;
1431        int                     is_empty;
1432        int                     error;
1433
1434        *last_block = 0;
1435
1436        if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
1437                return 0;
1438
1439        if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ifp)))
1440                return -EFSCORRUPTED;
1441
1442        error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
1443        if (error || is_empty)
1444                return error;
1445
1446        *last_block = rec.br_startoff + rec.br_blockcount;
1447        return 0;
1448}
1449
1450/*
1451 * Returns whether the selected fork of the inode has exactly one
1452 * block or not.  For the data fork we check this matches di_size,
1453 * implying the file's range is 0..bsize-1.
1454 */
1455int                                     /* 1=>1 block, 0=>otherwise */
1456xfs_bmap_one_block(
1457        struct xfs_inode        *ip,            /* incore inode */
1458        int                     whichfork)      /* data or attr fork */
1459{
1460        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1461        int                     rval;           /* return value */
1462        struct xfs_bmbt_irec    s;              /* internal version of extent */
1463        struct xfs_iext_cursor icur;
1464
1465#ifndef DEBUG
1466        if (whichfork == XFS_DATA_FORK)
1467                return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
1468#endif  /* !DEBUG */
1469        if (ifp->if_nextents != 1)
1470                return 0;
1471        if (ifp->if_format != XFS_DINODE_FMT_EXTENTS)
1472                return 0;
1473        ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1474        xfs_iext_first(ifp, &icur);
1475        xfs_iext_get_extent(ifp, &icur, &s);
1476        rval = s.br_startoff == 0 && s.br_blockcount == 1;
1477        if (rval && whichfork == XFS_DATA_FORK)
1478                ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
1479        return rval;
1480}
1481
1482/*
1483 * Extent tree manipulation functions used during allocation.
1484 */
1485
1486/*
1487 * Convert a delayed allocation to a real allocation.
1488 */
1489STATIC int                              /* error */
1490xfs_bmap_add_extent_delay_real(
1491        struct xfs_bmalloca     *bma,
1492        int                     whichfork)
1493{
1494        struct xfs_mount        *mp = bma->ip->i_mount;
1495        struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
1496        struct xfs_bmbt_irec    *new = &bma->got;
1497        int                     error;  /* error return value */
1498        int                     i;      /* temp state */
1499        xfs_fileoff_t           new_endoff;     /* end offset of new entry */
1500        xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
1501                                        /* left is 0, right is 1, prev is 2 */
1502        int                     rval=0; /* return value (logging flags) */
1503        int                     state = xfs_bmap_fork_to_state(whichfork);
1504        xfs_filblks_t           da_new; /* new count del alloc blocks used */
1505        xfs_filblks_t           da_old; /* old count del alloc blocks used */
1506        xfs_filblks_t           temp=0; /* value for da_new calculations */
1507        int                     tmp_rval;       /* partial logging flags */
1508        struct xfs_bmbt_irec    old;
1509
1510        ASSERT(whichfork != XFS_ATTR_FORK);
1511        ASSERT(!isnullstartblock(new->br_startblock));
1512        ASSERT(!bma->cur ||
1513               (bma->cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL));
1514
1515        XFS_STATS_INC(mp, xs_add_exlist);
1516
1517#define LEFT            r[0]
1518#define RIGHT           r[1]
1519#define PREV            r[2]
1520
1521        /*
1522         * Set up a bunch of variables to make the tests simpler.
1523         */
1524        xfs_iext_get_extent(ifp, &bma->icur, &PREV);
1525        new_endoff = new->br_startoff + new->br_blockcount;
1526        ASSERT(isnullstartblock(PREV.br_startblock));
1527        ASSERT(PREV.br_startoff <= new->br_startoff);
1528        ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1529
1530        da_old = startblockval(PREV.br_startblock);
1531        da_new = 0;
1532
1533        /*
1534         * Set flags determining what part of the previous delayed allocation
1535         * extent is being replaced by a real allocation.
1536         */
1537        if (PREV.br_startoff == new->br_startoff)
1538                state |= BMAP_LEFT_FILLING;
1539        if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1540                state |= BMAP_RIGHT_FILLING;
1541
1542        /*
1543         * Check and set flags if this segment has a left neighbor.
1544         * Don't set contiguous if the combined extent would be too large.
1545         */
1546        if (xfs_iext_peek_prev_extent(ifp, &bma->icur, &LEFT)) {
1547                state |= BMAP_LEFT_VALID;
1548                if (isnullstartblock(LEFT.br_startblock))
1549                        state |= BMAP_LEFT_DELAY;
1550        }
1551
1552        if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1553            LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1554            LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1555            LEFT.br_state == new->br_state &&
1556            LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1557                state |= BMAP_LEFT_CONTIG;
1558
1559        /*
1560         * Check and set flags if this segment has a right neighbor.
1561         * Don't set contiguous if the combined extent would be too large.
1562         * Also check for all-three-contiguous being too large.
1563         */
1564        if (xfs_iext_peek_next_extent(ifp, &bma->icur, &RIGHT)) {
1565                state |= BMAP_RIGHT_VALID;
1566                if (isnullstartblock(RIGHT.br_startblock))
1567                        state |= BMAP_RIGHT_DELAY;
1568        }
1569
1570        if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1571            new_endoff == RIGHT.br_startoff &&
1572            new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1573            new->br_state == RIGHT.br_state &&
1574            new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
1575            ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1576                       BMAP_RIGHT_FILLING)) !=
1577                      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1578                       BMAP_RIGHT_FILLING) ||
1579             LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1580                        <= MAXEXTLEN))
1581                state |= BMAP_RIGHT_CONTIG;
1582
1583        error = 0;
1584        /*
1585         * Switch out based on the FILLING and CONTIG state bits.
1586         */
1587        switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1588                         BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1589        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1590             BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1591                /*
1592                 * Filling in all of a previously delayed allocation extent.
1593                 * The left and right neighbors are both contiguous with new.
1594                 */
1595                LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
1596
1597                xfs_iext_remove(bma->ip, &bma->icur, state);
1598                xfs_iext_remove(bma->ip, &bma->icur, state);
1599                xfs_iext_prev(ifp, &bma->icur);
1600                xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1601                ifp->if_nextents--;
1602
1603                if (bma->cur == NULL)
1604                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1605                else {
1606                        rval = XFS_ILOG_CORE;
1607                        error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1608                        if (error)
1609                                goto done;
1610                        if (XFS_IS_CORRUPT(mp, i != 1)) {
1611                                error = -EFSCORRUPTED;
1612                                goto done;
1613                        }
1614                        error = xfs_btree_delete(bma->cur, &i);
1615                        if (error)
1616                                goto done;
1617                        if (XFS_IS_CORRUPT(mp, i != 1)) {
1618                                error = -EFSCORRUPTED;
1619                                goto done;
1620                        }
1621                        error = xfs_btree_decrement(bma->cur, 0, &i);
1622                        if (error)
1623                                goto done;
1624                        if (XFS_IS_CORRUPT(mp, i != 1)) {
1625                                error = -EFSCORRUPTED;
1626                                goto done;
1627                        }
1628                        error = xfs_bmbt_update(bma->cur, &LEFT);
1629                        if (error)
1630                                goto done;
1631                }
1632                break;
1633
1634        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1635                /*
1636                 * Filling in all of a previously delayed allocation extent.
1637                 * The left neighbor is contiguous, the right is not.
1638                 */
1639                old = LEFT;
1640                LEFT.br_blockcount += PREV.br_blockcount;
1641
1642                xfs_iext_remove(bma->ip, &bma->icur, state);
1643                xfs_iext_prev(ifp, &bma->icur);
1644                xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1645
1646                if (bma->cur == NULL)
1647                        rval = XFS_ILOG_DEXT;
1648                else {
1649                        rval = 0;
1650                        error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1651                        if (error)
1652                                goto done;
1653                        if (XFS_IS_CORRUPT(mp, i != 1)) {
1654                                error = -EFSCORRUPTED;
1655                                goto done;
1656                        }
1657                        error = xfs_bmbt_update(bma->cur, &LEFT);
1658                        if (error)
1659                                goto done;
1660                }
1661                break;
1662
1663        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1664                /*
1665                 * Filling in all of a previously delayed allocation extent.
1666                 * The right neighbor is contiguous, the left is not. Take care
1667                 * with delay -> unwritten extent allocation here because the
1668                 * delalloc record we are overwriting is always written.
1669                 */
1670                PREV.br_startblock = new->br_startblock;
1671                PREV.br_blockcount += RIGHT.br_blockcount;
1672                PREV.br_state = new->br_state;
1673
1674                xfs_iext_next(ifp, &bma->icur);
1675                xfs_iext_remove(bma->ip, &bma->icur, state);
1676                xfs_iext_prev(ifp, &bma->icur);
1677                xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1678
1679                if (bma->cur == NULL)
1680                        rval = XFS_ILOG_DEXT;
1681                else {
1682                        rval = 0;
1683                        error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1684                        if (error)
1685                                goto done;
1686                        if (XFS_IS_CORRUPT(mp, i != 1)) {
1687                                error = -EFSCORRUPTED;
1688                                goto done;
1689                        }
1690                        error = xfs_bmbt_update(bma->cur, &PREV);
1691                        if (error)
1692                                goto done;
1693                }
1694                break;
1695
1696        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1697                /*
1698                 * Filling in all of a previously delayed allocation extent.
1699                 * Neither the left nor right neighbors are contiguous with
1700                 * the new one.
1701                 */
1702                PREV.br_startblock = new->br_startblock;
1703                PREV.br_state = new->br_state;
1704                xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1705                ifp->if_nextents++;
1706
1707                if (bma->cur == NULL)
1708                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1709                else {
1710                        rval = XFS_ILOG_CORE;
1711                        error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1712                        if (error)
1713                                goto done;
1714                        if (XFS_IS_CORRUPT(mp, i != 0)) {
1715                                error = -EFSCORRUPTED;
1716                                goto done;
1717                        }
1718                        error = xfs_btree_insert(bma->cur, &i);
1719                        if (error)
1720                                goto done;
1721                        if (XFS_IS_CORRUPT(mp, i != 1)) {
1722                                error = -EFSCORRUPTED;
1723                                goto done;
1724                        }
1725                }
1726                break;
1727
1728        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1729                /*
1730                 * Filling in the first part of a previous delayed allocation.
1731                 * The left neighbor is contiguous.
1732                 */
1733                old = LEFT;
1734                temp = PREV.br_blockcount - new->br_blockcount;
1735                da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1736                                startblockval(PREV.br_startblock));
1737
1738                LEFT.br_blockcount += new->br_blockcount;
1739
1740                PREV.br_blockcount = temp;
1741                PREV.br_startoff += new->br_blockcount;
1742                PREV.br_startblock = nullstartblock(da_new);
1743
1744                xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1745                xfs_iext_prev(ifp, &bma->icur);
1746                xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1747
1748                if (bma->cur == NULL)
1749                        rval = XFS_ILOG_DEXT;
1750                else {
1751                        rval = 0;
1752                        error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1753                        if (error)
1754                                goto done;
1755                        if (XFS_IS_CORRUPT(mp, i != 1)) {
1756                                error = -EFSCORRUPTED;
1757                                goto done;
1758                        }
1759                        error = xfs_bmbt_update(bma->cur, &LEFT);
1760                        if (error)
1761                                goto done;
1762                }
1763                break;
1764
1765        case BMAP_LEFT_FILLING:
1766                /*
1767                 * Filling in the first part of a previous delayed allocation.
1768                 * The left neighbor is not contiguous.
1769                 */
1770                xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1771                ifp->if_nextents++;
1772
1773                if (bma->cur == NULL)
1774                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1775                else {
1776                        rval = XFS_ILOG_CORE;
1777                        error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1778                        if (error)
1779                                goto done;
1780                        if (XFS_IS_CORRUPT(mp, i != 0)) {
1781                                error = -EFSCORRUPTED;
1782                                goto done;
1783                        }
1784                        error = xfs_btree_insert(bma->cur, &i);
1785                        if (error)
1786                                goto done;
1787                        if (XFS_IS_CORRUPT(mp, i != 1)) {
1788                                error = -EFSCORRUPTED;
1789                                goto done;
1790                        }
1791                }
1792
1793                if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1794                        error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1795                                        &bma->cur, 1, &tmp_rval, whichfork);
1796                        rval |= tmp_rval;
1797                        if (error)
1798                                goto done;
1799                }
1800
1801                temp = PREV.br_blockcount - new->br_blockcount;
1802                da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1803                        startblockval(PREV.br_startblock) -
1804                        (bma->cur ? bma->cur->bc_ino.allocated : 0));
1805
1806                PREV.br_startoff = new_endoff;
1807                PREV.br_blockcount = temp;
1808                PREV.br_startblock = nullstartblock(da_new);
1809                xfs_iext_next(ifp, &bma->icur);
1810                xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1811                xfs_iext_prev(ifp, &bma->icur);
1812                break;
1813
1814        case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1815                /*
1816                 * Filling in the last part of a previous delayed allocation.
1817                 * The right neighbor is contiguous with the new allocation.
1818                 */
1819                old = RIGHT;
1820                RIGHT.br_startoff = new->br_startoff;
1821                RIGHT.br_startblock = new->br_startblock;
1822                RIGHT.br_blockcount += new->br_blockcount;
1823
1824                if (bma->cur == NULL)
1825                        rval = XFS_ILOG_DEXT;
1826                else {
1827                        rval = 0;
1828                        error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1829                        if (error)
1830                                goto done;
1831                        if (XFS_IS_CORRUPT(mp, i != 1)) {
1832                                error = -EFSCORRUPTED;
1833                                goto done;
1834                        }
1835                        error = xfs_bmbt_update(bma->cur, &RIGHT);
1836                        if (error)
1837                                goto done;
1838                }
1839
1840                temp = PREV.br_blockcount - new->br_blockcount;
1841                da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1842                        startblockval(PREV.br_startblock));
1843
1844                PREV.br_blockcount = temp;
1845                PREV.br_startblock = nullstartblock(da_new);
1846
1847                xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1848                xfs_iext_next(ifp, &bma->icur);
1849                xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT);
1850                break;
1851
1852        case BMAP_RIGHT_FILLING:
1853                /*
1854                 * Filling in the last part of a previous delayed allocation.
1855                 * The right neighbor is not contiguous.
1856                 */
1857                xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1858                ifp->if_nextents++;
1859
1860                if (bma->cur == NULL)
1861                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1862                else {
1863                        rval = XFS_ILOG_CORE;
1864                        error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1865                        if (error)
1866                                goto done;
1867                        if (XFS_IS_CORRUPT(mp, i != 0)) {
1868                                error = -EFSCORRUPTED;
1869                                goto done;
1870                        }
1871                        error = xfs_btree_insert(bma->cur, &i);
1872                        if (error)
1873                                goto done;
1874                        if (XFS_IS_CORRUPT(mp, i != 1)) {
1875                                error = -EFSCORRUPTED;
1876                                goto done;
1877                        }
1878                }
1879
1880                if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1881                        error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1882                                &bma->cur, 1, &tmp_rval, whichfork);
1883                        rval |= tmp_rval;
1884                        if (error)
1885                                goto done;
1886                }
1887
1888                temp = PREV.br_blockcount - new->br_blockcount;
1889                da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1890                        startblockval(PREV.br_startblock) -
1891                        (bma->cur ? bma->cur->bc_ino.allocated : 0));
1892
1893                PREV.br_startblock = nullstartblock(da_new);
1894                PREV.br_blockcount = temp;
1895                xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1896                xfs_iext_next(ifp, &bma->icur);
1897                break;
1898
1899        case 0:
1900                /*
1901                 * Filling in the middle part of a previous delayed allocation.
1902                 * Contiguity is impossible here.
1903                 * This case is avoided almost all the time.
1904                 *
1905                 * We start with a delayed allocation:
1906                 *
1907                 * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
1908                 *  PREV @ idx
1909                 *
1910                 * and we are allocating:
1911                 *                     +rrrrrrrrrrrrrrrrr+
1912                 *                            new
1913                 *
1914                 * and we set it up for insertion as:
1915                 * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
1916                 *                            new
1917                 *  PREV @ idx          LEFT              RIGHT
1918                 *                      inserted at idx + 1
1919                 */
1920                old = PREV;
1921
1922                /* LEFT is the new middle */
1923                LEFT = *new;
1924
1925                /* RIGHT is the new right */
1926                RIGHT.br_state = PREV.br_state;
1927                RIGHT.br_startoff = new_endoff;
1928                RIGHT.br_blockcount =
1929                        PREV.br_startoff + PREV.br_blockcount - new_endoff;
1930                RIGHT.br_startblock =
1931                        nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1932                                        RIGHT.br_blockcount));
1933
1934                /* truncate PREV */
1935                PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
1936                PREV.br_startblock =
1937                        nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1938                                        PREV.br_blockcount));
1939                xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1940
1941                xfs_iext_next(ifp, &bma->icur);
1942                xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state);
1943                xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state);
1944                ifp->if_nextents++;
1945
1946                if (bma->cur == NULL)
1947                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1948                else {
1949                        rval = XFS_ILOG_CORE;
1950                        error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1951                        if (error)
1952                                goto done;
1953                        if (XFS_IS_CORRUPT(mp, i != 0)) {
1954                                error = -EFSCORRUPTED;
1955                                goto done;
1956                        }
1957                        error = xfs_btree_insert(bma->cur, &i);
1958                        if (error)
1959                                goto done;
1960                        if (XFS_IS_CORRUPT(mp, i != 1)) {
1961                                error = -EFSCORRUPTED;
1962                                goto done;
1963                        }
1964                }
1965
1966                if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1967                        error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1968                                        &bma->cur, 1, &tmp_rval, whichfork);
1969                        rval |= tmp_rval;
1970                        if (error)
1971                                goto done;
1972                }
1973
1974                da_new = startblockval(PREV.br_startblock) +
1975                         startblockval(RIGHT.br_startblock);
1976                break;
1977
1978        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1979        case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1980        case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
1981        case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1982        case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1983        case BMAP_LEFT_CONTIG:
1984        case BMAP_RIGHT_CONTIG:
1985                /*
1986                 * These cases are all impossible.
1987                 */
1988                ASSERT(0);
1989        }
1990
1991        /* add reverse mapping unless caller opted out */
1992        if (!(bma->flags & XFS_BMAPI_NORMAP))
1993                xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new);
1994
1995        /* convert to a btree if necessary */
1996        if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1997                int     tmp_logflags;   /* partial log flag return val */
1998
1999                ASSERT(bma->cur == NULL);
2000                error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2001                                &bma->cur, da_old > 0, &tmp_logflags,
2002                                whichfork);
2003                bma->logflags |= tmp_logflags;
2004                if (error)
2005                        goto done;
2006        }
2007
2008        if (da_new != da_old)
2009                xfs_mod_delalloc(mp, (int64_t)da_new - da_old);
2010
2011        if (bma->cur) {
2012                da_new += bma->cur->bc_ino.allocated;
2013                bma->cur->bc_ino.allocated = 0;
2014        }
2015
2016        /* adjust for changes in reserved delayed indirect blocks */
2017        if (da_new != da_old) {
2018                ASSERT(state == 0 || da_new < da_old);
2019                error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new),
2020                                false);
2021        }
2022
2023        xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
2024done:
2025        if (whichfork != XFS_COW_FORK)
2026                bma->logflags |= rval;
2027        return error;
2028#undef  LEFT
2029#undef  RIGHT
2030#undef  PREV
2031}
2032
2033/*
2034 * Convert an unwritten allocation to a real allocation or vice versa.
2035 */
2036int                                     /* error */
2037xfs_bmap_add_extent_unwritten_real(
2038        struct xfs_trans        *tp,
2039        xfs_inode_t             *ip,    /* incore inode pointer */
2040        int                     whichfork,
2041        struct xfs_iext_cursor  *icur,
2042        xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
2043        xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
2044        int                     *logflagsp) /* inode logging flags */
2045{
2046        xfs_btree_cur_t         *cur;   /* btree cursor */
2047        int                     error;  /* error return value */
2048        int                     i;      /* temp state */
2049        struct xfs_ifork        *ifp;   /* inode fork pointer */
2050        xfs_fileoff_t           new_endoff;     /* end offset of new entry */
2051        xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
2052                                        /* left is 0, right is 1, prev is 2 */
2053        int                     rval=0; /* return value (logging flags) */
2054        int                     state = xfs_bmap_fork_to_state(whichfork);
2055        struct xfs_mount        *mp = ip->i_mount;
2056        struct xfs_bmbt_irec    old;
2057
2058        *logflagsp = 0;
2059
2060        cur = *curp;
2061        ifp = XFS_IFORK_PTR(ip, whichfork);
2062
2063        ASSERT(!isnullstartblock(new->br_startblock));
2064
2065        XFS_STATS_INC(mp, xs_add_exlist);
2066
2067#define LEFT            r[0]
2068#define RIGHT           r[1]
2069#define PREV            r[2]
2070
2071        /*
2072         * Set up a bunch of variables to make the tests simpler.
2073         */
2074        error = 0;
2075        xfs_iext_get_extent(ifp, icur, &PREV);
2076        ASSERT(new->br_state != PREV.br_state);
2077        new_endoff = new->br_startoff + new->br_blockcount;
2078        ASSERT(PREV.br_startoff <= new->br_startoff);
2079        ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
2080
2081        /*
2082         * Set flags determining what part of the previous oldext allocation
2083         * extent is being replaced by a newext allocation.
2084         */
2085        if (PREV.br_startoff == new->br_startoff)
2086                state |= BMAP_LEFT_FILLING;
2087        if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
2088                state |= BMAP_RIGHT_FILLING;
2089
2090        /*
2091         * Check and set flags if this segment has a left neighbor.
2092         * Don't set contiguous if the combined extent would be too large.
2093         */
2094        if (xfs_iext_peek_prev_extent(ifp, icur, &LEFT)) {
2095                state |= BMAP_LEFT_VALID;
2096                if (isnullstartblock(LEFT.br_startblock))
2097                        state |= BMAP_LEFT_DELAY;
2098        }
2099
2100        if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2101            LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
2102            LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
2103            LEFT.br_state == new->br_state &&
2104            LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2105                state |= BMAP_LEFT_CONTIG;
2106
2107        /*
2108         * Check and set flags if this segment has a right neighbor.
2109         * Don't set contiguous if the combined extent would be too large.
2110         * Also check for all-three-contiguous being too large.
2111         */
2112        if (xfs_iext_peek_next_extent(ifp, icur, &RIGHT)) {
2113                state |= BMAP_RIGHT_VALID;
2114                if (isnullstartblock(RIGHT.br_startblock))
2115                        state |= BMAP_RIGHT_DELAY;
2116        }
2117
2118        if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2119            new_endoff == RIGHT.br_startoff &&
2120            new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
2121            new->br_state == RIGHT.br_state &&
2122            new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
2123            ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2124                       BMAP_RIGHT_FILLING)) !=
2125                      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2126                       BMAP_RIGHT_FILLING) ||
2127             LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
2128                        <= MAXEXTLEN))
2129                state |= BMAP_RIGHT_CONTIG;
2130
2131        /*
2132         * Switch out based on the FILLING and CONTIG state bits.
2133         */
2134        switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2135                         BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
2136        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2137             BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2138                /*
2139                 * Setting all of a previous oldext extent to newext.
2140                 * The left and right neighbors are both contiguous with new.
2141                 */
2142                LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
2143
2144                xfs_iext_remove(ip, icur, state);
2145                xfs_iext_remove(ip, icur, state);
2146                xfs_iext_prev(ifp, icur);
2147                xfs_iext_update_extent(ip, state, icur, &LEFT);
2148                ifp->if_nextents -= 2;
2149                if (cur == NULL)
2150                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2151                else {
2152                        rval = XFS_ILOG_CORE;
2153                        error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2154                        if (error)
2155                                goto done;
2156                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2157                                error = -EFSCORRUPTED;
2158                                goto done;
2159                        }
2160                        if ((error = xfs_btree_delete(cur, &i)))
2161                                goto done;
2162                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2163                                error = -EFSCORRUPTED;
2164                                goto done;
2165                        }
2166                        if ((error = xfs_btree_decrement(cur, 0, &i)))
2167                                goto done;
2168                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2169                                error = -EFSCORRUPTED;
2170                                goto done;
2171                        }
2172                        if ((error = xfs_btree_delete(cur, &i)))
2173                                goto done;
2174                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2175                                error = -EFSCORRUPTED;
2176                                goto done;
2177                        }
2178                        if ((error = xfs_btree_decrement(cur, 0, &i)))
2179                                goto done;
2180                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2181                                error = -EFSCORRUPTED;
2182                                goto done;
2183                        }
2184                        error = xfs_bmbt_update(cur, &LEFT);
2185                        if (error)
2186                                goto done;
2187                }
2188                break;
2189
2190        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2191                /*
2192                 * Setting all of a previous oldext extent to newext.
2193                 * The left neighbor is contiguous, the right is not.
2194                 */
2195                LEFT.br_blockcount += PREV.br_blockcount;
2196
2197                xfs_iext_remove(ip, icur, state);
2198                xfs_iext_prev(ifp, icur);
2199                xfs_iext_update_extent(ip, state, icur, &LEFT);
2200                ifp->if_nextents--;
2201                if (cur == NULL)
2202                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2203                else {
2204                        rval = XFS_ILOG_CORE;
2205                        error = xfs_bmbt_lookup_eq(cur, &PREV, &i);
2206                        if (error)
2207                                goto done;
2208                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2209                                error = -EFSCORRUPTED;
2210                                goto done;
2211                        }
2212                        if ((error = xfs_btree_delete(cur, &i)))
2213                                goto done;
2214                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2215                                error = -EFSCORRUPTED;
2216                                goto done;
2217                        }
2218                        if ((error = xfs_btree_decrement(cur, 0, &i)))
2219                                goto done;
2220                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2221                                error = -EFSCORRUPTED;
2222                                goto done;
2223                        }
2224                        error = xfs_bmbt_update(cur, &LEFT);
2225                        if (error)
2226                                goto done;
2227                }
2228                break;
2229
2230        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2231                /*
2232                 * Setting all of a previous oldext extent to newext.
2233                 * The right neighbor is contiguous, the left is not.
2234                 */
2235                PREV.br_blockcount += RIGHT.br_blockcount;
2236                PREV.br_state = new->br_state;
2237
2238                xfs_iext_next(ifp, icur);
2239                xfs_iext_remove(ip, icur, state);
2240                xfs_iext_prev(ifp, icur);
2241                xfs_iext_update_extent(ip, state, icur, &PREV);
2242                ifp->if_nextents--;
2243
2244                if (cur == NULL)
2245                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2246                else {
2247                        rval = XFS_ILOG_CORE;
2248                        error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2249                        if (error)
2250                                goto done;
2251                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2252                                error = -EFSCORRUPTED;
2253                                goto done;
2254                        }
2255                        if ((error = xfs_btree_delete(cur, &i)))
2256                                goto done;
2257                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2258                                error = -EFSCORRUPTED;
2259                                goto done;
2260                        }
2261                        if ((error = xfs_btree_decrement(cur, 0, &i)))
2262                                goto done;
2263                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2264                                error = -EFSCORRUPTED;
2265                                goto done;
2266                        }
2267                        error = xfs_bmbt_update(cur, &PREV);
2268                        if (error)
2269                                goto done;
2270                }
2271                break;
2272
2273        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
2274                /*
2275                 * Setting all of a previous oldext extent to newext.
2276                 * Neither the left nor right neighbors are contiguous with
2277                 * the new one.
2278                 */
2279                PREV.br_state = new->br_state;
2280                xfs_iext_update_extent(ip, state, icur, &PREV);
2281
2282                if (cur == NULL)
2283                        rval = XFS_ILOG_DEXT;
2284                else {
2285                        rval = 0;
2286                        error = xfs_bmbt_lookup_eq(cur, new, &i);
2287                        if (error)
2288                                goto done;
2289                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2290                                error = -EFSCORRUPTED;
2291                                goto done;
2292                        }
2293                        error = xfs_bmbt_update(cur, &PREV);
2294                        if (error)
2295                                goto done;
2296                }
2297                break;
2298
2299        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
2300                /*
2301                 * Setting the first part of a previous oldext extent to newext.
2302                 * The left neighbor is contiguous.
2303                 */
2304                LEFT.br_blockcount += new->br_blockcount;
2305
2306                old = PREV;
2307                PREV.br_startoff += new->br_blockcount;
2308                PREV.br_startblock += new->br_blockcount;
2309                PREV.br_blockcount -= new->br_blockcount;
2310
2311                xfs_iext_update_extent(ip, state, icur, &PREV);
2312                xfs_iext_prev(ifp, icur);
2313                xfs_iext_update_extent(ip, state, icur, &LEFT);
2314
2315                if (cur == NULL)
2316                        rval = XFS_ILOG_DEXT;
2317                else {
2318                        rval = 0;
2319                        error = xfs_bmbt_lookup_eq(cur, &old, &i);
2320                        if (error)
2321                                goto done;
2322                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2323                                error = -EFSCORRUPTED;
2324                                goto done;
2325                        }
2326                        error = xfs_bmbt_update(cur, &PREV);
2327                        if (error)
2328                                goto done;
2329                        error = xfs_btree_decrement(cur, 0, &i);
2330                        if (error)
2331                                goto done;
2332                        error = xfs_bmbt_update(cur, &LEFT);
2333                        if (error)
2334                                goto done;
2335                }
2336                break;
2337
2338        case BMAP_LEFT_FILLING:
2339                /*
2340                 * Setting the first part of a previous oldext extent to newext.
2341                 * The left neighbor is not contiguous.
2342                 */
2343                old = PREV;
2344                PREV.br_startoff += new->br_blockcount;
2345                PREV.br_startblock += new->br_blockcount;
2346                PREV.br_blockcount -= new->br_blockcount;
2347
2348                xfs_iext_update_extent(ip, state, icur, &PREV);
2349                xfs_iext_insert(ip, icur, new, state);
2350                ifp->if_nextents++;
2351
2352                if (cur == NULL)
2353                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2354                else {
2355                        rval = XFS_ILOG_CORE;
2356                        error = xfs_bmbt_lookup_eq(cur, &old, &i);
2357                        if (error)
2358                                goto done;
2359                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2360                                error = -EFSCORRUPTED;
2361                                goto done;
2362                        }
2363                        error = xfs_bmbt_update(cur, &PREV);
2364                        if (error)
2365                                goto done;
2366                        cur->bc_rec.b = *new;
2367                        if ((error = xfs_btree_insert(cur, &i)))
2368                                goto done;
2369                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2370                                error = -EFSCORRUPTED;
2371                                goto done;
2372                        }
2373                }
2374                break;
2375
2376        case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2377                /*
2378                 * Setting the last part of a previous oldext extent to newext.
2379                 * The right neighbor is contiguous with the new allocation.
2380                 */
2381                old = PREV;
2382                PREV.br_blockcount -= new->br_blockcount;
2383
2384                RIGHT.br_startoff = new->br_startoff;
2385                RIGHT.br_startblock = new->br_startblock;
2386                RIGHT.br_blockcount += new->br_blockcount;
2387
2388                xfs_iext_update_extent(ip, state, icur, &PREV);
2389                xfs_iext_next(ifp, icur);
2390                xfs_iext_update_extent(ip, state, icur, &RIGHT);
2391
2392                if (cur == NULL)
2393                        rval = XFS_ILOG_DEXT;
2394                else {
2395                        rval = 0;
2396                        error = xfs_bmbt_lookup_eq(cur, &old, &i);
2397                        if (error)
2398                                goto done;
2399                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2400                                error = -EFSCORRUPTED;
2401                                goto done;
2402                        }
2403                        error = xfs_bmbt_update(cur, &PREV);
2404                        if (error)
2405                                goto done;
2406                        error = xfs_btree_increment(cur, 0, &i);
2407                        if (error)
2408                                goto done;
2409                        error = xfs_bmbt_update(cur, &RIGHT);
2410                        if (error)
2411                                goto done;
2412                }
2413                break;
2414
2415        case BMAP_RIGHT_FILLING:
2416                /*
2417                 * Setting the last part of a previous oldext extent to newext.
2418                 * The right neighbor is not contiguous.
2419                 */
2420                old = PREV;
2421                PREV.br_blockcount -= new->br_blockcount;
2422
2423                xfs_iext_update_extent(ip, state, icur, &PREV);
2424                xfs_iext_next(ifp, icur);
2425                xfs_iext_insert(ip, icur, new, state);
2426                ifp->if_nextents++;
2427
2428                if (cur == NULL)
2429                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2430                else {
2431                        rval = XFS_ILOG_CORE;
2432                        error = xfs_bmbt_lookup_eq(cur, &old, &i);
2433                        if (error)
2434                                goto done;
2435                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2436                                error = -EFSCORRUPTED;
2437                                goto done;
2438                        }
2439                        error = xfs_bmbt_update(cur, &PREV);
2440                        if (error)
2441                                goto done;
2442                        error = xfs_bmbt_lookup_eq(cur, new, &i);
2443                        if (error)
2444                                goto done;
2445                        if (XFS_IS_CORRUPT(mp, i != 0)) {
2446                                error = -EFSCORRUPTED;
2447                                goto done;
2448                        }
2449                        if ((error = xfs_btree_insert(cur, &i)))
2450                                goto done;
2451                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2452                                error = -EFSCORRUPTED;
2453                                goto done;
2454                        }
2455                }
2456                break;
2457
2458        case 0:
2459                /*
2460                 * Setting the middle part of a previous oldext extent to
2461                 * newext.  Contiguity is impossible here.
2462                 * One extent becomes three extents.
2463                 */
2464                old = PREV;
2465                PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
2466
2467                r[0] = *new;
2468                r[1].br_startoff = new_endoff;
2469                r[1].br_blockcount =
2470                        old.br_startoff + old.br_blockcount - new_endoff;
2471                r[1].br_startblock = new->br_startblock + new->br_blockcount;
2472                r[1].br_state = PREV.br_state;
2473
2474                xfs_iext_update_extent(ip, state, icur, &PREV);
2475                xfs_iext_next(ifp, icur);
2476                xfs_iext_insert(ip, icur, &r[1], state);
2477                xfs_iext_insert(ip, icur, &r[0], state);
2478                ifp->if_nextents += 2;
2479
2480                if (cur == NULL)
2481                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2482                else {
2483                        rval = XFS_ILOG_CORE;
2484                        error = xfs_bmbt_lookup_eq(cur, &old, &i);
2485                        if (error)
2486                                goto done;
2487                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2488                                error = -EFSCORRUPTED;
2489                                goto done;
2490                        }
2491                        /* new right extent - oldext */
2492                        error = xfs_bmbt_update(cur, &r[1]);
2493                        if (error)
2494                                goto done;
2495                        /* new left extent - oldext */
2496                        cur->bc_rec.b = PREV;
2497                        if ((error = xfs_btree_insert(cur, &i)))
2498                                goto done;
2499                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2500                                error = -EFSCORRUPTED;
2501                                goto done;
2502                        }
2503                        /*
2504                         * Reset the cursor to the position of the new extent
2505                         * we are about to insert as we can't trust it after
2506                         * the previous insert.
2507                         */
2508                        error = xfs_bmbt_lookup_eq(cur, new, &i);
2509                        if (error)
2510                                goto done;
2511                        if (XFS_IS_CORRUPT(mp, i != 0)) {
2512                                error = -EFSCORRUPTED;
2513                                goto done;
2514                        }
2515                        /* new middle extent - newext */
2516                        if ((error = xfs_btree_insert(cur, &i)))
2517                                goto done;
2518                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2519                                error = -EFSCORRUPTED;
2520                                goto done;
2521                        }
2522                }
2523                break;
2524
2525        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2526        case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2527        case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2528        case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2529        case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2530        case BMAP_LEFT_CONTIG:
2531        case BMAP_RIGHT_CONTIG:
2532                /*
2533                 * These cases are all impossible.
2534                 */
2535                ASSERT(0);
2536        }
2537
2538        /* update reverse mappings */
2539        xfs_rmap_convert_extent(mp, tp, ip, whichfork, new);
2540
2541        /* convert to a btree if necessary */
2542        if (xfs_bmap_needs_btree(ip, whichfork)) {
2543                int     tmp_logflags;   /* partial log flag return val */
2544
2545                ASSERT(cur == NULL);
2546                error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
2547                                &tmp_logflags, whichfork);
2548                *logflagsp |= tmp_logflags;
2549                if (error)
2550                        goto done;
2551        }
2552
2553        /* clear out the allocated field, done with it now in any case. */
2554        if (cur) {
2555                cur->bc_ino.allocated = 0;
2556                *curp = cur;
2557        }
2558
2559        xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
2560done:
2561        *logflagsp |= rval;
2562        return error;
2563#undef  LEFT
2564#undef  RIGHT
2565#undef  PREV
2566}
2567
2568/*
2569 * Convert a hole to a delayed allocation.
2570 */
2571STATIC void
2572xfs_bmap_add_extent_hole_delay(
2573        xfs_inode_t             *ip,    /* incore inode pointer */
2574        int                     whichfork,
2575        struct xfs_iext_cursor  *icur,
2576        xfs_bmbt_irec_t         *new)   /* new data to add to file extents */
2577{
2578        struct xfs_ifork        *ifp;   /* inode fork pointer */
2579        xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
2580        xfs_filblks_t           newlen=0;       /* new indirect size */
2581        xfs_filblks_t           oldlen=0;       /* old indirect size */
2582        xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
2583        int                     state = xfs_bmap_fork_to_state(whichfork);
2584        xfs_filblks_t           temp;    /* temp for indirect calculations */
2585
2586        ifp = XFS_IFORK_PTR(ip, whichfork);
2587        ASSERT(isnullstartblock(new->br_startblock));
2588
2589        /*
2590         * Check and set flags if this segment has a left neighbor
2591         */
2592        if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2593                state |= BMAP_LEFT_VALID;
2594                if (isnullstartblock(left.br_startblock))
2595                        state |= BMAP_LEFT_DELAY;
2596        }
2597
2598        /*
2599         * Check and set flags if the current (right) segment exists.
2600         * If it doesn't exist, we're converting the hole at end-of-file.
2601         */
2602        if (xfs_iext_get_extent(ifp, icur, &right)) {
2603                state |= BMAP_RIGHT_VALID;
2604                if (isnullstartblock(right.br_startblock))
2605                        state |= BMAP_RIGHT_DELAY;
2606        }
2607
2608        /*
2609         * Set contiguity flags on the left and right neighbors.
2610         * Don't let extents get too large, even if the pieces are contiguous.
2611         */
2612        if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
2613            left.br_startoff + left.br_blockcount == new->br_startoff &&
2614            left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2615                state |= BMAP_LEFT_CONTIG;
2616
2617        if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
2618            new->br_startoff + new->br_blockcount == right.br_startoff &&
2619            new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2620            (!(state & BMAP_LEFT_CONTIG) ||
2621             (left.br_blockcount + new->br_blockcount +
2622              right.br_blockcount <= MAXEXTLEN)))
2623                state |= BMAP_RIGHT_CONTIG;
2624
2625        /*
2626         * Switch out based on the contiguity flags.
2627         */
2628        switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2629        case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2630                /*
2631                 * New allocation is contiguous with delayed allocations
2632                 * on the left and on the right.
2633                 * Merge all three into a single extent record.
2634                 */
2635                temp = left.br_blockcount + new->br_blockcount +
2636                        right.br_blockcount;
2637
2638                oldlen = startblockval(left.br_startblock) +
2639                        startblockval(new->br_startblock) +
2640                        startblockval(right.br_startblock);
2641                newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2642                                         oldlen);
2643                left.br_startblock = nullstartblock(newlen);
2644                left.br_blockcount = temp;
2645
2646                xfs_iext_remove(ip, icur, state);
2647                xfs_iext_prev(ifp, icur);
2648                xfs_iext_update_extent(ip, state, icur, &left);
2649                break;
2650
2651        case BMAP_LEFT_CONTIG:
2652                /*
2653                 * New allocation is contiguous with a delayed allocation
2654                 * on the left.
2655                 * Merge the new allocation with the left neighbor.
2656                 */
2657                temp = left.br_blockcount + new->br_blockcount;
2658
2659                oldlen = startblockval(left.br_startblock) +
2660                        startblockval(new->br_startblock);
2661                newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2662                                         oldlen);
2663                left.br_blockcount = temp;
2664                left.br_startblock = nullstartblock(newlen);
2665
2666                xfs_iext_prev(ifp, icur);
2667                xfs_iext_update_extent(ip, state, icur, &left);
2668                break;
2669
2670        case BMAP_RIGHT_CONTIG:
2671                /*
2672                 * New allocation is contiguous with a delayed allocation
2673                 * on the right.
2674                 * Merge the new allocation with the right neighbor.
2675                 */
2676                temp = new->br_blockcount + right.br_blockcount;
2677                oldlen = startblockval(new->br_startblock) +
2678                        startblockval(right.br_startblock);
2679                newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2680                                         oldlen);
2681                right.br_startoff = new->br_startoff;
2682                right.br_startblock = nullstartblock(newlen);
2683                right.br_blockcount = temp;
2684                xfs_iext_update_extent(ip, state, icur, &right);
2685                break;
2686
2687        case 0:
2688                /*
2689                 * New allocation is not contiguous with another
2690                 * delayed allocation.
2691                 * Insert a new entry.
2692                 */
2693                oldlen = newlen = 0;
2694                xfs_iext_insert(ip, icur, new, state);
2695                break;
2696        }
2697        if (oldlen != newlen) {
2698                ASSERT(oldlen > newlen);
2699                xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
2700                                 false);
2701                /*
2702                 * Nothing to do for disk quota accounting here.
2703                 */
2704                xfs_mod_delalloc(ip->i_mount, (int64_t)newlen - oldlen);
2705        }
2706}
2707
2708/*
2709 * Convert a hole to a real allocation.
2710 */
2711STATIC int                              /* error */
2712xfs_bmap_add_extent_hole_real(
2713        struct xfs_trans        *tp,
2714        struct xfs_inode        *ip,
2715        int                     whichfork,
2716        struct xfs_iext_cursor  *icur,
2717        struct xfs_btree_cur    **curp,
2718        struct xfs_bmbt_irec    *new,
2719        int                     *logflagsp,
2720        int                     flags)
2721{
2722        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
2723        struct xfs_mount        *mp = ip->i_mount;
2724        struct xfs_btree_cur    *cur = *curp;
2725        int                     error;  /* error return value */
2726        int                     i;      /* temp state */
2727        xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
2728        xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
2729        int                     rval=0; /* return value (logging flags) */
2730        int                     state = xfs_bmap_fork_to_state(whichfork);
2731        struct xfs_bmbt_irec    old;
2732
2733        ASSERT(!isnullstartblock(new->br_startblock));
2734        ASSERT(!cur || !(cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL));
2735
2736        XFS_STATS_INC(mp, xs_add_exlist);
2737
2738        /*
2739         * Check and set flags if this segment has a left neighbor.
2740         */
2741        if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2742                state |= BMAP_LEFT_VALID;
2743                if (isnullstartblock(left.br_startblock))
2744                        state |= BMAP_LEFT_DELAY;
2745        }
2746
2747        /*
2748         * Check and set flags if this segment has a current value.
2749         * Not true if we're inserting into the "hole" at eof.
2750         */
2751        if (xfs_iext_get_extent(ifp, icur, &right)) {
2752                state |= BMAP_RIGHT_VALID;
2753                if (isnullstartblock(right.br_startblock))
2754                        state |= BMAP_RIGHT_DELAY;
2755        }
2756
2757        /*
2758         * We're inserting a real allocation between "left" and "right".
2759         * Set the contiguity flags.  Don't let extents get too large.
2760         */
2761        if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2762            left.br_startoff + left.br_blockcount == new->br_startoff &&
2763            left.br_startblock + left.br_blockcount == new->br_startblock &&
2764            left.br_state == new->br_state &&
2765            left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2766                state |= BMAP_LEFT_CONTIG;
2767
2768        if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2769            new->br_startoff + new->br_blockcount == right.br_startoff &&
2770            new->br_startblock + new->br_blockcount == right.br_startblock &&
2771            new->br_state == right.br_state &&
2772            new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2773            (!(state & BMAP_LEFT_CONTIG) ||
2774             left.br_blockcount + new->br_blockcount +
2775             right.br_blockcount <= MAXEXTLEN))
2776                state |= BMAP_RIGHT_CONTIG;
2777
2778        error = 0;
2779        /*
2780         * Select which case we're in here, and implement it.
2781         */
2782        switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2783        case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2784                /*
2785                 * New allocation is contiguous with real allocations on the
2786                 * left and on the right.
2787                 * Merge all three into a single extent record.
2788                 */
2789                left.br_blockcount += new->br_blockcount + right.br_blockcount;
2790
2791                xfs_iext_remove(ip, icur, state);
2792                xfs_iext_prev(ifp, icur);
2793                xfs_iext_update_extent(ip, state, icur, &left);
2794                ifp->if_nextents--;
2795
2796                if (cur == NULL) {
2797                        rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2798                } else {
2799                        rval = XFS_ILOG_CORE;
2800                        error = xfs_bmbt_lookup_eq(cur, &right, &i);
2801                        if (error)
2802                                goto done;
2803                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2804                                error = -EFSCORRUPTED;
2805                                goto done;
2806                        }
2807                        error = xfs_btree_delete(cur, &i);
2808                        if (error)
2809                                goto done;
2810                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2811                                error = -EFSCORRUPTED;
2812                                goto done;
2813                        }
2814                        error = xfs_btree_decrement(cur, 0, &i);
2815                        if (error)
2816                                goto done;
2817                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2818                                error = -EFSCORRUPTED;
2819                                goto done;
2820                        }
2821                        error = xfs_bmbt_update(cur, &left);
2822                        if (error)
2823                                goto done;
2824                }
2825                break;
2826
2827        case BMAP_LEFT_CONTIG:
2828                /*
2829                 * New allocation is contiguous with a real allocation
2830                 * on the left.
2831                 * Merge the new allocation with the left neighbor.
2832                 */
2833                old = left;
2834                left.br_blockcount += new->br_blockcount;
2835
2836                xfs_iext_prev(ifp, icur);
2837                xfs_iext_update_extent(ip, state, icur, &left);
2838
2839                if (cur == NULL) {
2840                        rval = xfs_ilog_fext(whichfork);
2841                } else {
2842                        rval = 0;
2843                        error = xfs_bmbt_lookup_eq(cur, &old, &i);
2844                        if (error)
2845                                goto done;
2846                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2847                                error = -EFSCORRUPTED;
2848                                goto done;
2849                        }
2850                        error = xfs_bmbt_update(cur, &left);
2851                        if (error)
2852                                goto done;
2853                }
2854                break;
2855
2856        case BMAP_RIGHT_CONTIG:
2857                /*
2858                 * New allocation is contiguous with a real allocation
2859                 * on the right.
2860                 * Merge the new allocation with the right neighbor.
2861                 */
2862                old = right;
2863
2864                right.br_startoff = new->br_startoff;
2865                right.br_startblock = new->br_startblock;
2866                right.br_blockcount += new->br_blockcount;
2867                xfs_iext_update_extent(ip, state, icur, &right);
2868
2869                if (cur == NULL) {
2870                        rval = xfs_ilog_fext(whichfork);
2871                } else {
2872                        rval = 0;
2873                        error = xfs_bmbt_lookup_eq(cur, &old, &i);
2874                        if (error)
2875                                goto done;
2876                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2877                                error = -EFSCORRUPTED;
2878                                goto done;
2879                        }
2880                        error = xfs_bmbt_update(cur, &right);
2881                        if (error)
2882                                goto done;
2883                }
2884                break;
2885
2886        case 0:
2887                /*
2888                 * New allocation is not contiguous with another
2889                 * real allocation.
2890                 * Insert a new entry.
2891                 */
2892                xfs_iext_insert(ip, icur, new, state);
2893                ifp->if_nextents++;
2894
2895                if (cur == NULL) {
2896                        rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2897                } else {
2898                        rval = XFS_ILOG_CORE;
2899                        error = xfs_bmbt_lookup_eq(cur, new, &i);
2900                        if (error)
2901                                goto done;
2902                        if (XFS_IS_CORRUPT(mp, i != 0)) {
2903                                error = -EFSCORRUPTED;
2904                                goto done;
2905                        }
2906                        error = xfs_btree_insert(cur, &i);
2907                        if (error)
2908                                goto done;
2909                        if (XFS_IS_CORRUPT(mp, i != 1)) {
2910                                error = -EFSCORRUPTED;
2911                                goto done;
2912                        }
2913                }
2914                break;
2915        }
2916
2917        /* add reverse mapping unless caller opted out */
2918        if (!(flags & XFS_BMAPI_NORMAP))
2919                xfs_rmap_map_extent(tp, ip, whichfork, new);
2920
2921        /* convert to a btree if necessary */
2922        if (xfs_bmap_needs_btree(ip, whichfork)) {
2923                int     tmp_logflags;   /* partial log flag return val */
2924
2925                ASSERT(cur == NULL);
2926                error = xfs_bmap_extents_to_btree(tp, ip, curp, 0,
2927                                &tmp_logflags, whichfork);
2928                *logflagsp |= tmp_logflags;
2929                cur = *curp;
2930                if (error)
2931                        goto done;
2932        }
2933
2934        /* clear out the allocated field, done with it now in any case. */
2935        if (cur)
2936                cur->bc_ino.allocated = 0;
2937
2938        xfs_bmap_check_leaf_extents(cur, ip, whichfork);
2939done:
2940        *logflagsp |= rval;
2941        return error;
2942}
2943
2944/*
2945 * Functions used in the extent read, allocate and remove paths
2946 */
2947
2948/*
2949 * Adjust the size of the new extent based on di_extsize and rt extsize.
2950 */
2951int
2952xfs_bmap_extsize_align(
2953        xfs_mount_t     *mp,
2954        xfs_bmbt_irec_t *gotp,          /* next extent pointer */
2955        xfs_bmbt_irec_t *prevp,         /* previous extent pointer */
2956        xfs_extlen_t    extsz,          /* align to this extent size */
2957        int             rt,             /* is this a realtime inode? */
2958        int             eof,            /* is extent at end-of-file? */
2959        int             delay,          /* creating delalloc extent? */
2960        int             convert,        /* overwriting unwritten extent? */
2961        xfs_fileoff_t   *offp,          /* in/out: aligned offset */
2962        xfs_extlen_t    *lenp)          /* in/out: aligned length */
2963{
2964        xfs_fileoff_t   orig_off;       /* original offset */
2965        xfs_extlen_t    orig_alen;      /* original length */
2966        xfs_fileoff_t   orig_end;       /* original off+len */
2967        xfs_fileoff_t   nexto;          /* next file offset */
2968        xfs_fileoff_t   prevo;          /* previous file offset */
2969        xfs_fileoff_t   align_off;      /* temp for offset */
2970        xfs_extlen_t    align_alen;     /* temp for length */
2971        xfs_extlen_t    temp;           /* temp for calculations */
2972
2973        if (convert)
2974                return 0;
2975
2976        orig_off = align_off = *offp;
2977        orig_alen = align_alen = *lenp;
2978        orig_end = orig_off + orig_alen;
2979
2980        /*
2981         * If this request overlaps an existing extent, then don't
2982         * attempt to perform any additional alignment.
2983         */
2984        if (!delay && !eof &&
2985            (orig_off >= gotp->br_startoff) &&
2986            (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
2987                return 0;
2988        }
2989
2990        /*
2991         * If the file offset is unaligned vs. the extent size
2992         * we need to align it.  This will be possible unless
2993         * the file was previously written with a kernel that didn't
2994         * perform this alignment, or if a truncate shot us in the
2995         * foot.
2996         */
2997        div_u64_rem(orig_off, extsz, &temp);
2998        if (temp) {
2999                align_alen += temp;
3000                align_off -= temp;
3001        }
3002
3003        /* Same adjustment for the end of the requested area. */
3004        temp = (align_alen % extsz);
3005        if (temp)
3006                align_alen += extsz - temp;
3007
3008        /*
3009         * For large extent hint sizes, the aligned extent might be larger than
3010         * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls
3011         * the length back under MAXEXTLEN. The outer allocation loops handle
3012         * short allocation just fine, so it is safe to do this. We only want to
3013         * do it when we are forced to, though, because it means more allocation
3014         * operations are required.
3015         */
3016        while (align_alen > MAXEXTLEN)
3017                align_alen -= extsz;
3018        ASSERT(align_alen <= MAXEXTLEN);
3019
3020        /*
3021         * If the previous block overlaps with this proposed allocation
3022         * then move the start forward without adjusting the length.
3023         */
3024        if (prevp->br_startoff != NULLFILEOFF) {
3025                if (prevp->br_startblock == HOLESTARTBLOCK)
3026                        prevo = prevp->br_startoff;
3027                else
3028                        prevo = prevp->br_startoff + prevp->br_blockcount;
3029        } else
3030                prevo = 0;
3031        if (align_off != orig_off && align_off < prevo)
3032                align_off = prevo;
3033        /*
3034         * If the next block overlaps with this proposed allocation
3035         * then move the start back without adjusting the length,
3036         * but not before offset 0.
3037         * This may of course make the start overlap previous block,
3038         * and if we hit the offset 0 limit then the next block
3039         * can still overlap too.
3040         */
3041        if (!eof && gotp->br_startoff != NULLFILEOFF) {
3042                if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
3043                    (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
3044                        nexto = gotp->br_startoff + gotp->br_blockcount;
3045                else
3046                        nexto = gotp->br_startoff;
3047        } else
3048                nexto = NULLFILEOFF;
3049        if (!eof &&
3050            align_off + align_alen != orig_end &&
3051            align_off + align_alen > nexto)
3052                align_off = nexto > align_alen ? nexto - align_alen : 0;
3053        /*
3054         * If we're now overlapping the next or previous extent that
3055         * means we can't fit an extsz piece in this hole.  Just move
3056         * the start forward to the first valid spot and set
3057         * the length so we hit the end.
3058         */
3059        if (align_off != orig_off && align_off < prevo)
3060                align_off = prevo;
3061        if (align_off + align_alen != orig_end &&
3062            align_off + align_alen > nexto &&
3063            nexto != NULLFILEOFF) {
3064                ASSERT(nexto > prevo);
3065                align_alen = nexto - align_off;
3066        }
3067
3068        /*
3069         * If realtime, and the result isn't a multiple of the realtime
3070         * extent size we need to remove blocks until it is.
3071         */
3072        if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
3073                /*
3074                 * We're not covering the original request, or
3075                 * we won't be able to once we fix the length.
3076                 */
3077                if (orig_off < align_off ||
3078                    orig_end > align_off + align_alen ||
3079                    align_alen - temp < orig_alen)
3080                        return -EINVAL;
3081                /*
3082                 * Try to fix it by moving the start up.
3083                 */
3084                if (align_off + temp <= orig_off) {
3085                        align_alen -= temp;
3086                        align_off += temp;
3087                }
3088                /*
3089                 * Try to fix it by moving the end in.
3090                 */
3091                else if (align_off + align_alen - temp >= orig_end)
3092                        align_alen -= temp;
3093                /*
3094                 * Set the start to the minimum then trim the length.
3095                 */
3096                else {
3097                        align_alen -= orig_off - align_off;
3098                        align_off = orig_off;
3099                        align_alen -= align_alen % mp->m_sb.sb_rextsize;
3100                }
3101                /*
3102                 * Result doesn't cover the request, fail it.
3103                 */
3104                if (orig_off < align_off || orig_end > align_off + align_alen)
3105                        return -EINVAL;
3106        } else {
3107                ASSERT(orig_off >= align_off);
3108                /* see MAXEXTLEN handling above */
3109                ASSERT(orig_end <= align_off + align_alen ||
3110                       align_alen + extsz > MAXEXTLEN);
3111        }
3112
3113#ifdef DEBUG
3114        if (!eof && gotp->br_startoff != NULLFILEOFF)
3115                ASSERT(align_off + align_alen <= gotp->br_startoff);
3116        if (prevp->br_startoff != NULLFILEOFF)
3117                ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
3118#endif
3119
3120        *lenp = align_alen;
3121        *offp = align_off;
3122        return 0;
3123}
3124
3125#define XFS_ALLOC_GAP_UNITS     4
3126
3127void
3128xfs_bmap_adjacent(
3129        struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
3130{
3131        xfs_fsblock_t   adjust;         /* adjustment to block numbers */
3132        xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
3133        xfs_mount_t     *mp;            /* mount point structure */
3134        int             nullfb;         /* true if ap->firstblock isn't set */
3135        int             rt;             /* true if inode is realtime */
3136
3137#define ISVALID(x,y)    \
3138        (rt ? \
3139                (x) < mp->m_sb.sb_rblocks : \
3140                XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
3141                XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
3142                XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
3143
3144        mp = ap->ip->i_mount;
3145        nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
3146        rt = XFS_IS_REALTIME_INODE(ap->ip) &&
3147                (ap->datatype & XFS_ALLOC_USERDATA);
3148        fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
3149                                                        ap->tp->t_firstblock);
3150        /*
3151         * If allocating at eof, and there's a previous real block,
3152         * try to use its last block as our starting point.
3153         */
3154        if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
3155            !isnullstartblock(ap->prev.br_startblock) &&
3156            ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
3157                    ap->prev.br_startblock)) {
3158                ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
3159                /*
3160                 * Adjust for the gap between prevp and us.
3161                 */
3162                adjust = ap->offset -
3163                        (ap->prev.br_startoff + ap->prev.br_blockcount);
3164                if (adjust &&
3165                    ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
3166                        ap->blkno += adjust;
3167        }
3168        /*
3169         * If not at eof, then compare the two neighbor blocks.
3170         * Figure out whether either one gives us a good starting point,
3171         * and pick the better one.
3172         */
3173        else if (!ap->eof) {
3174                xfs_fsblock_t   gotbno;         /* right side block number */
3175                xfs_fsblock_t   gotdiff=0;      /* right side difference */
3176                xfs_fsblock_t   prevbno;        /* left side block number */
3177                xfs_fsblock_t   prevdiff=0;     /* left side difference */
3178
3179                /*
3180                 * If there's a previous (left) block, select a requested
3181                 * start block based on it.
3182                 */
3183                if (ap->prev.br_startoff != NULLFILEOFF &&
3184                    !isnullstartblock(ap->prev.br_startblock) &&
3185                    (prevbno = ap->prev.br_startblock +
3186                               ap->prev.br_blockcount) &&
3187                    ISVALID(prevbno, ap->prev.br_startblock)) {
3188                        /*
3189                         * Calculate gap to end of previous block.
3190                         */
3191                        adjust = prevdiff = ap->offset -
3192                                (ap->prev.br_startoff +
3193                                 ap->prev.br_blockcount);
3194                        /*
3195                         * Figure the startblock based on the previous block's
3196                         * end and the gap size.
3197                         * Heuristic!
3198                         * If the gap is large relative to the piece we're
3199                         * allocating, or using it gives us an invalid block
3200                         * number, then just use the end of the previous block.
3201                         */
3202                        if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3203                            ISVALID(prevbno + prevdiff,
3204                                    ap->prev.br_startblock))
3205                                prevbno += adjust;
3206                        else
3207                                prevdiff += adjust;
3208                        /*
3209                         * If the firstblock forbids it, can't use it,
3210                         * must use default.
3211                         */
3212                        if (!rt && !nullfb &&
3213                            XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
3214                                prevbno = NULLFSBLOCK;
3215                }
3216                /*
3217                 * No previous block or can't follow it, just default.
3218                 */
3219                else
3220                        prevbno = NULLFSBLOCK;
3221                /*
3222                 * If there's a following (right) block, select a requested
3223                 * start block based on it.
3224                 */
3225                if (!isnullstartblock(ap->got.br_startblock)) {
3226                        /*
3227                         * Calculate gap to start of next block.
3228                         */
3229                        adjust = gotdiff = ap->got.br_startoff - ap->offset;
3230                        /*
3231                         * Figure the startblock based on the next block's
3232                         * start and the gap size.
3233                         */
3234                        gotbno = ap->got.br_startblock;
3235                        /*
3236                         * Heuristic!
3237                         * If the gap is large relative to the piece we're
3238                         * allocating, or using it gives us an invalid block
3239                         * number, then just use the start of the next block
3240                         * offset by our length.
3241                         */
3242                        if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3243                            ISVALID(gotbno - gotdiff, gotbno))
3244                                gotbno -= adjust;
3245                        else if (ISVALID(gotbno - ap->length, gotbno)) {
3246                                gotbno -= ap->length;
3247                                gotdiff += adjust - ap->length;
3248                        } else
3249                                gotdiff += adjust;
3250                        /*
3251                         * If the firstblock forbids it, can't use it,
3252                         * must use default.
3253                         */
3254                        if (!rt && !nullfb &&
3255                            XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
3256                                gotbno = NULLFSBLOCK;
3257                }
3258                /*
3259                 * No next block, just default.
3260                 */
3261                else
3262                        gotbno = NULLFSBLOCK;
3263                /*
3264                 * If both valid, pick the better one, else the only good
3265                 * one, else ap->blkno is already set (to 0 or the inode block).
3266                 */
3267                if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
3268                        ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
3269                else if (prevbno != NULLFSBLOCK)
3270                        ap->blkno = prevbno;
3271                else if (gotbno != NULLFSBLOCK)
3272                        ap->blkno = gotbno;
3273        }
3274#undef ISVALID
3275}
3276
3277static int
3278xfs_bmap_longest_free_extent(
3279        struct xfs_trans        *tp,
3280        xfs_agnumber_t          ag,
3281        xfs_extlen_t            *blen,
3282        int                     *notinit)
3283{
3284        struct xfs_mount        *mp = tp->t_mountp;
3285        struct xfs_perag        *pag;
3286        xfs_extlen_t            longest;
3287        int                     error = 0;
3288
3289        pag = xfs_perag_get(mp, ag);
3290        if (!pag->pagf_init) {
3291                error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
3292                if (error) {
3293                        /* Couldn't lock the AGF, so skip this AG. */
3294                        if (error == -EAGAIN) {
3295                                *notinit = 1;
3296                                error = 0;
3297                        }
3298                        goto out;
3299                }
3300        }
3301
3302        longest = xfs_alloc_longest_free_extent(pag,
3303                                xfs_alloc_min_freelist(mp, pag),
3304                                xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
3305        if (*blen < longest)
3306                *blen = longest;
3307
3308out:
3309        xfs_perag_put(pag);
3310        return error;
3311}
3312
3313static void
3314xfs_bmap_select_minlen(
3315        struct xfs_bmalloca     *ap,
3316        struct xfs_alloc_arg    *args,
3317        xfs_extlen_t            *blen,
3318        int                     notinit)
3319{
3320        if (notinit || *blen < ap->minlen) {
3321                /*
3322                 * Since we did a BUF_TRYLOCK above, it is possible that
3323                 * there is space for this request.
3324                 */
3325                args->minlen = ap->minlen;
3326        } else if (*blen < args->maxlen) {
3327                /*
3328                 * If the best seen length is less than the request length,
3329                 * use the best as the minimum.
3330                 */
3331                args->minlen = *blen;
3332        } else {
3333                /*
3334                 * Otherwise we've seen an extent as big as maxlen, use that
3335                 * as the minimum.
3336                 */
3337                args->minlen = args->maxlen;
3338        }
3339}
3340
3341STATIC int
3342xfs_bmap_btalloc_nullfb(
3343        struct xfs_bmalloca     *ap,
3344        struct xfs_alloc_arg    *args,
3345        xfs_extlen_t            *blen)
3346{
3347        struct xfs_mount        *mp = ap->ip->i_mount;
3348        xfs_agnumber_t          ag, startag;
3349        int                     notinit = 0;
3350        int                     error;
3351
3352        args->type = XFS_ALLOCTYPE_START_BNO;
3353        args->total = ap->total;
3354
3355        startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3356        if (startag == NULLAGNUMBER)
3357                startag = ag = 0;
3358
3359        while (*blen < args->maxlen) {
3360                error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3361                                                     &notinit);
3362                if (error)
3363                        return error;
3364
3365                if (++ag == mp->m_sb.sb_agcount)
3366                        ag = 0;
3367                if (ag == startag)
3368                        break;
3369        }
3370
3371        xfs_bmap_select_minlen(ap, args, blen, notinit);
3372        return 0;
3373}
3374
3375STATIC int
3376xfs_bmap_btalloc_filestreams(
3377        struct xfs_bmalloca     *ap,
3378        struct xfs_alloc_arg    *args,
3379        xfs_extlen_t            *blen)
3380{
3381        struct xfs_mount        *mp = ap->ip->i_mount;
3382        xfs_agnumber_t          ag;
3383        int                     notinit = 0;
3384        int                     error;
3385
3386        args->type = XFS_ALLOCTYPE_NEAR_BNO;
3387        args->total = ap->total;
3388
3389        ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3390        if (ag == NULLAGNUMBER)
3391                ag = 0;
3392
3393        error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
3394        if (error)
3395                return error;
3396
3397        if (*blen < args->maxlen) {
3398                error = xfs_filestream_new_ag(ap, &ag);
3399                if (error)
3400                        return error;
3401
3402                error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3403                                                     &notinit);
3404                if (error)
3405                        return error;
3406
3407        }
3408
3409        xfs_bmap_select_minlen(ap, args, blen, notinit);
3410
3411        /*
3412         * Set the failure fallback case to look in the selected AG as stream
3413         * may have moved.
3414         */
3415        ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
3416        return 0;
3417}
3418
3419/* Update all inode and quota accounting for the allocation we just did. */
3420static void
3421xfs_bmap_btalloc_accounting(
3422        struct xfs_bmalloca     *ap,
3423        struct xfs_alloc_arg    *args)
3424{
3425        if (ap->flags & XFS_BMAPI_COWFORK) {
3426                /*
3427                 * COW fork blocks are in-core only and thus are treated as
3428                 * in-core quota reservation (like delalloc blocks) even when
3429                 * converted to real blocks. The quota reservation is not
3430                 * accounted to disk until blocks are remapped to the data
3431                 * fork. So if these blocks were previously delalloc, we
3432                 * already have quota reservation and there's nothing to do
3433                 * yet.
3434                 */
3435                if (ap->wasdel) {
3436                        xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len);
3437                        return;
3438                }
3439
3440                /*
3441                 * Otherwise, we've allocated blocks in a hole. The transaction
3442                 * has acquired in-core quota reservation for this extent.
3443                 * Rather than account these as real blocks, however, we reduce
3444                 * the transaction quota reservation based on the allocation.
3445                 * This essentially transfers the transaction quota reservation
3446                 * to that of a delalloc extent.
3447                 */
3448                ap->ip->i_delayed_blks += args->len;
3449                xfs_trans_mod_dquot_byino(ap->tp, ap->ip, XFS_TRANS_DQ_RES_BLKS,
3450                                -(long)args->len);
3451                return;
3452        }
3453
3454        /* data/attr fork only */
3455        ap->ip->i_d.di_nblocks += args->len;
3456        xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3457        if (ap->wasdel) {
3458                ap->ip->i_delayed_blks -= args->len;
3459                xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len);
3460        }
3461        xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
3462                ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT,
3463                args->len);
3464}
3465
3466STATIC int
3467xfs_bmap_btalloc(
3468        struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
3469{
3470        xfs_mount_t     *mp;            /* mount point structure */
3471        xfs_alloctype_t atype = 0;      /* type for allocation routines */
3472        xfs_extlen_t    align = 0;      /* minimum allocation alignment */
3473        xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
3474        xfs_agnumber_t  ag;
3475        xfs_alloc_arg_t args;
3476        xfs_fileoff_t   orig_offset;
3477        xfs_extlen_t    orig_length;
3478        xfs_extlen_t    blen;
3479        xfs_extlen_t    nextminlen = 0;
3480        int             nullfb;         /* true if ap->firstblock isn't set */
3481        int             isaligned;
3482        int             tryagain;
3483        int             error;
3484        int             stripe_align;
3485
3486        ASSERT(ap->length);
3487        orig_offset = ap->offset;
3488        orig_length = ap->length;
3489
3490        mp = ap->ip->i_mount;
3491
3492        /* stripe alignment for allocation is determined by mount parameters */
3493        stripe_align = 0;
3494        if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
3495                stripe_align = mp->m_swidth;
3496        else if (mp->m_dalign)
3497                stripe_align = mp->m_dalign;
3498
3499        if (ap->flags & XFS_BMAPI_COWFORK)
3500                align = xfs_get_cowextsz_hint(ap->ip);
3501        else if (ap->datatype & XFS_ALLOC_USERDATA)
3502                align = xfs_get_extsz_hint(ap->ip);
3503        if (align) {
3504                error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
3505                                                align, 0, ap->eof, 0, ap->conv,
3506                                                &ap->offset, &ap->length);
3507                ASSERT(!error);
3508                ASSERT(ap->length);
3509        }
3510
3511
3512        nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
3513        fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
3514                                                        ap->tp->t_firstblock);
3515        if (nullfb) {
3516                if ((ap->datatype & XFS_ALLOC_USERDATA) &&
3517                    xfs_inode_is_filestream(ap->ip)) {
3518                        ag = xfs_filestream_lookup_ag(ap->ip);
3519                        ag = (ag != NULLAGNUMBER) ? ag : 0;
3520                        ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
3521                } else {
3522                        ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
3523                }
3524        } else
3525                ap->blkno = ap->tp->t_firstblock;
3526
3527        xfs_bmap_adjacent(ap);
3528
3529        /*
3530         * If allowed, use ap->blkno; otherwise must use firstblock since
3531         * it's in the right allocation group.
3532         */
3533        if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
3534                ;
3535        else
3536                ap->blkno = ap->tp->t_firstblock;
3537        /*
3538         * Normal allocation, done through xfs_alloc_vextent.
3539         */
3540        tryagain = isaligned = 0;
3541        memset(&args, 0, sizeof(args));
3542        args.tp = ap->tp;
3543        args.mp = mp;
3544        args.fsbno = ap->blkno;
3545        args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
3546
3547        /* Trim the allocation back to the maximum an AG can fit. */
3548        args.maxlen = min(ap->length, mp->m_ag_max_usable);
3549        blen = 0;
3550        if (nullfb) {
3551                /*
3552                 * Search for an allocation group with a single extent large
3553                 * enough for the request.  If one isn't found, then adjust
3554                 * the minimum allocation size to the largest space found.
3555                 */
3556                if ((ap->datatype & XFS_ALLOC_USERDATA) &&
3557                    xfs_inode_is_filestream(ap->ip))
3558                        error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
3559                else
3560                        error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
3561                if (error)
3562                        return error;
3563        } else if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
3564                if (xfs_inode_is_filestream(ap->ip))
3565                        args.type = XFS_ALLOCTYPE_FIRST_AG;
3566                else
3567                        args.type = XFS_ALLOCTYPE_START_BNO;
3568                args.total = args.minlen = ap->minlen;
3569        } else {
3570                args.type = XFS_ALLOCTYPE_NEAR_BNO;
3571                args.total = ap->total;
3572                args.minlen = ap->minlen;
3573        }
3574        /* apply extent size hints if obtained earlier */
3575        if (align) {
3576                args.prod = align;
3577                div_u64_rem(ap->offset, args.prod, &args.mod);
3578                if (args.mod)
3579                        args.mod = args.prod - args.mod;
3580        } else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) {
3581                args.prod = 1;
3582                args.mod = 0;
3583        } else {
3584                args.prod = PAGE_SIZE >> mp->m_sb.sb_blocklog;
3585                div_u64_rem(ap->offset, args.prod, &args.mod);
3586                if (args.mod)
3587                        args.mod = args.prod - args.mod;
3588        }
3589        /*
3590         * If we are not low on available data blocks, and the underlying
3591         * logical volume manager is a stripe, and the file offset is zero then
3592         * try to allocate data blocks on stripe unit boundary. NOTE: ap->aeof
3593         * is only set if the allocation length is >= the stripe unit and the
3594         * allocation offset is at the end of file.
3595         */
3596        if (!(ap->tp->t_flags & XFS_TRANS_LOWMODE) && ap->aeof) {
3597                if (!ap->offset) {
3598                        args.alignment = stripe_align;
3599                        atype = args.type;
3600                        isaligned = 1;
3601                        /*
3602                         * Adjust minlen to try and preserve alignment if we
3603                         * can't guarantee an aligned maxlen extent.
3604                         */
3605                        if (blen > args.alignment &&
3606                            blen <= args.maxlen + args.alignment)
3607                                args.minlen = blen - args.alignment;
3608                        args.minalignslop = 0;
3609                } else {
3610                        /*
3611                         * First try an exact bno allocation.
3612                         * If it fails then do a near or start bno
3613                         * allocation with alignment turned on.
3614                         */
3615                        atype = args.type;
3616                        tryagain = 1;
3617                        args.type = XFS_ALLOCTYPE_THIS_BNO;
3618                        args.alignment = 1;
3619                        /*
3620                         * Compute the minlen+alignment for the
3621                         * next case.  Set slop so that the value
3622                         * of minlen+alignment+slop doesn't go up
3623                         * between the calls.
3624                         */
3625                        if (blen > stripe_align && blen <= args.maxlen)
3626                                nextminlen = blen - stripe_align;
3627                        else
3628                                nextminlen = args.minlen;
3629                        if (nextminlen + stripe_align > args.minlen + 1)
3630                                args.minalignslop =
3631                                        nextminlen + stripe_align -
3632                                        args.minlen - 1;
3633                        else
3634                                args.minalignslop = 0;
3635                }
3636        } else {
3637                args.alignment = 1;
3638                args.minalignslop = 0;
3639        }
3640        args.minleft = ap->minleft;
3641        args.wasdel = ap->wasdel;
3642        args.resv = XFS_AG_RESV_NONE;
3643        args.datatype = ap->datatype;
3644
3645        error = xfs_alloc_vextent(&args);
3646        if (error)
3647                return error;
3648
3649        if (tryagain && args.fsbno == NULLFSBLOCK) {
3650                /*
3651                 * Exact allocation failed. Now try with alignment
3652                 * turned on.
3653                 */
3654                args.type = atype;
3655                args.fsbno = ap->blkno;
3656                args.alignment = stripe_align;
3657                args.minlen = nextminlen;
3658                args.minalignslop = 0;
3659                isaligned = 1;
3660                if ((error = xfs_alloc_vextent(&args)))
3661                        return error;
3662        }
3663        if (isaligned && args.fsbno == NULLFSBLOCK) {
3664                /*
3665                 * allocation failed, so turn off alignment and
3666                 * try again.
3667                 */
3668                args.type = atype;
3669                args.fsbno = ap->blkno;
3670                args.alignment = 0;
3671                if ((error = xfs_alloc_vextent(&args)))
3672                        return error;
3673        }
3674        if (args.fsbno == NULLFSBLOCK && nullfb &&
3675            args.minlen > ap->minlen) {
3676                args.minlen = ap->minlen;
3677                args.type = XFS_ALLOCTYPE_START_BNO;
3678                args.fsbno = ap->blkno;
3679                if ((error = xfs_alloc_vextent(&args)))
3680                        return error;
3681        }
3682        if (args.fsbno == NULLFSBLOCK && nullfb) {
3683                args.fsbno = 0;
3684                args.type = XFS_ALLOCTYPE_FIRST_AG;
3685                args.total = ap->minlen;
3686                if ((error = xfs_alloc_vextent(&args)))
3687                        return error;
3688                ap->tp->t_flags |= XFS_TRANS_LOWMODE;
3689        }
3690        if (args.fsbno != NULLFSBLOCK) {
3691                /*
3692                 * check the allocation happened at the same or higher AG than
3693                 * the first block that was allocated.
3694                 */
3695                ASSERT(ap->tp->t_firstblock == NULLFSBLOCK ||
3696                       XFS_FSB_TO_AGNO(mp, ap->tp->t_firstblock) <=
3697                       XFS_FSB_TO_AGNO(mp, args.fsbno));
3698
3699                ap->blkno = args.fsbno;
3700                if (ap->tp->t_firstblock == NULLFSBLOCK)
3701                        ap->tp->t_firstblock = args.fsbno;
3702                ASSERT(nullfb || fb_agno <= args.agno);
3703                ap->length = args.len;
3704                /*
3705                 * If the extent size hint is active, we tried to round the
3706                 * caller's allocation request offset down to extsz and the
3707                 * length up to another extsz boundary.  If we found a free
3708                 * extent we mapped it in starting at this new offset.  If the
3709                 * newly mapped space isn't long enough to cover any of the
3710                 * range of offsets that was originally requested, move the
3711                 * mapping up so that we can fill as much of the caller's
3712                 * original request as possible.  Free space is apparently
3713                 * very fragmented so we're unlikely to be able to satisfy the
3714                 * hints anyway.
3715                 */
3716                if (ap->length <= orig_length)
3717                        ap->offset = orig_offset;
3718                else if (ap->offset + ap->length < orig_offset + orig_length)
3719                        ap->offset = orig_offset + orig_length - ap->length;
3720                xfs_bmap_btalloc_accounting(ap, &args);
3721        } else {
3722                ap->blkno = NULLFSBLOCK;
3723                ap->length = 0;
3724        }
3725        return 0;
3726}
3727
3728/* Trim extent to fit a logical block range. */
3729void
3730xfs_trim_extent(
3731        struct xfs_bmbt_irec    *irec,
3732        xfs_fileoff_t           bno,
3733        xfs_filblks_t           len)
3734{
3735        xfs_fileoff_t           distance;
3736        xfs_fileoff_t           end = bno + len;
3737
3738        if (irec->br_startoff + irec->br_blockcount <= bno ||
3739            irec->br_startoff >= end) {
3740                irec->br_blockcount = 0;
3741                return;
3742        }
3743
3744        if (irec->br_startoff < bno) {
3745                distance = bno - irec->br_startoff;
3746                if (isnullstartblock(irec->br_startblock))
3747                        irec->br_startblock = DELAYSTARTBLOCK;
3748                if (irec->br_startblock != DELAYSTARTBLOCK &&
3749                    irec->br_startblock != HOLESTARTBLOCK)
3750                        irec->br_startblock += distance;
3751                irec->br_startoff += distance;
3752                irec->br_blockcount -= distance;
3753        }
3754
3755        if (end < irec->br_startoff + irec->br_blockcount) {
3756                distance = irec->br_startoff + irec->br_blockcount - end;
3757                irec->br_blockcount -= distance;
3758        }
3759}
3760
3761/*
3762 * Trim the returned map to the required bounds
3763 */
3764STATIC void
3765xfs_bmapi_trim_map(
3766        struct xfs_bmbt_irec    *mval,
3767        struct xfs_bmbt_irec    *got,
3768        xfs_fileoff_t           *bno,
3769        xfs_filblks_t           len,
3770        xfs_fileoff_t           obno,
3771        xfs_fileoff_t           end,
3772        int                     n,
3773        int                     flags)
3774{
3775        if ((flags & XFS_BMAPI_ENTIRE) ||
3776            got->br_startoff + got->br_blockcount <= obno) {
3777                *mval = *got;
3778                if (isnullstartblock(got->br_startblock))
3779                        mval->br_startblock = DELAYSTARTBLOCK;
3780                return;
3781        }
3782
3783        if (obno > *bno)
3784                *bno = obno;
3785        ASSERT((*bno >= obno) || (n == 0));
3786        ASSERT(*bno < end);
3787        mval->br_startoff = *bno;
3788        if (isnullstartblock(got->br_startblock))
3789                mval->br_startblock = DELAYSTARTBLOCK;
3790        else
3791                mval->br_startblock = got->br_startblock +
3792                                        (*bno - got->br_startoff);
3793        /*
3794         * Return the minimum of what we got and what we asked for for
3795         * the length.  We can use the len variable here because it is
3796         * modified below and we could have been there before coming
3797         * here if the first part of the allocation didn't overlap what
3798         * was asked for.
3799         */
3800        mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
3801                        got->br_blockcount - (*bno - got->br_startoff));
3802        mval->br_state = got->br_state;
3803        ASSERT(mval->br_blockcount <= len);
3804        return;
3805}
3806
3807/*
3808 * Update and validate the extent map to return
3809 */
3810STATIC void
3811xfs_bmapi_update_map(
3812        struct xfs_bmbt_irec    **map,
3813        xfs_fileoff_t           *bno,
3814        xfs_filblks_t           *len,
3815        xfs_fileoff_t           obno,
3816        xfs_fileoff_t           end,
3817        int                     *n,
3818        int                     flags)
3819{
3820        xfs_bmbt_irec_t *mval = *map;
3821
3822        ASSERT((flags & XFS_BMAPI_ENTIRE) ||
3823               ((mval->br_startoff + mval->br_blockcount) <= end));
3824        ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
3825               (mval->br_startoff < obno));
3826
3827        *bno = mval->br_startoff + mval->br_blockcount;
3828        *len = end - *bno;
3829        if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
3830                /* update previous map with new information */
3831                ASSERT(mval->br_startblock == mval[-1].br_startblock);
3832                ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
3833                ASSERT(mval->br_state == mval[-1].br_state);
3834                mval[-1].br_blockcount = mval->br_blockcount;
3835                mval[-1].br_state = mval->br_state;
3836        } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
3837                   mval[-1].br_startblock != DELAYSTARTBLOCK &&
3838                   mval[-1].br_startblock != HOLESTARTBLOCK &&
3839                   mval->br_startblock == mval[-1].br_startblock +
3840                                          mval[-1].br_blockcount &&
3841                   mval[-1].br_state == mval->br_state) {
3842                ASSERT(mval->br_startoff ==
3843                       mval[-1].br_startoff + mval[-1].br_blockcount);
3844                mval[-1].br_blockcount += mval->br_blockcount;
3845        } else if (*n > 0 &&
3846                   mval->br_startblock == DELAYSTARTBLOCK &&
3847                   mval[-1].br_startblock == DELAYSTARTBLOCK &&
3848                   mval->br_startoff ==
3849                   mval[-1].br_startoff + mval[-1].br_blockcount) {
3850                mval[-1].br_blockcount += mval->br_blockcount;
3851                mval[-1].br_state = mval->br_state;
3852        } else if (!((*n == 0) &&
3853                     ((mval->br_startoff + mval->br_blockcount) <=
3854                      obno))) {
3855                mval++;
3856                (*n)++;
3857        }
3858        *map = mval;
3859}
3860
3861/*
3862 * Map file blocks to filesystem blocks without allocation.
3863 */
3864int
3865xfs_bmapi_read(
3866        struct xfs_inode        *ip,
3867        xfs_fileoff_t           bno,
3868        xfs_filblks_t           len,
3869        struct xfs_bmbt_irec    *mval,
3870        int                     *nmap,
3871        int                     flags)
3872{
3873        struct xfs_mount        *mp = ip->i_mount;
3874        int                     whichfork = xfs_bmapi_whichfork(flags);
3875        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
3876        struct xfs_bmbt_irec    got;
3877        xfs_fileoff_t           obno;
3878        xfs_fileoff_t           end;
3879        struct xfs_iext_cursor  icur;
3880        int                     error;
3881        bool                    eof = false;
3882        int                     n = 0;
3883
3884        ASSERT(*nmap >= 1);
3885        ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_ENTIRE)));
3886        ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
3887
3888        if (WARN_ON_ONCE(!ifp))
3889                return -EFSCORRUPTED;
3890
3891        if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
3892            XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT))
3893                return -EFSCORRUPTED;
3894
3895        if (XFS_FORCED_SHUTDOWN(mp))
3896                return -EIO;
3897
3898        XFS_STATS_INC(mp, xs_blk_mapr);
3899
3900        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
3901                error = xfs_iread_extents(NULL, ip, whichfork);
3902                if (error)
3903                        return error;
3904        }
3905
3906        if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got))
3907                eof = true;
3908        end = bno + len;
3909        obno = bno;
3910
3911        while (bno < end && n < *nmap) {
3912                /* Reading past eof, act as though there's a hole up to end. */
3913                if (eof)
3914                        got.br_startoff = end;
3915                if (got.br_startoff > bno) {
3916                        /* Reading in a hole.  */
3917                        mval->br_startoff = bno;
3918                        mval->br_startblock = HOLESTARTBLOCK;
3919                        mval->br_blockcount =
3920                                XFS_FILBLKS_MIN(len, got.br_startoff - bno);
3921                        mval->br_state = XFS_EXT_NORM;
3922                        bno += mval->br_blockcount;
3923                        len -= mval->br_blockcount;
3924                        mval++;
3925                        n++;
3926                        continue;
3927                }
3928
3929                /* set up the extent map to return. */
3930                xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
3931                xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
3932
3933                /* If we're done, stop now. */
3934                if (bno >= end || n >= *nmap)
3935                        break;
3936
3937                /* Else go on to the next record. */
3938                if (!xfs_iext_next_extent(ifp, &icur, &got))
3939                        eof = true;
3940        }
3941        *nmap = n;
3942        return 0;
3943}
3944
3945/*
3946 * Add a delayed allocation extent to an inode. Blocks are reserved from the
3947 * global pool and the extent inserted into the inode in-core extent tree.
3948 *
3949 * On entry, got refers to the first extent beyond the offset of the extent to
3950 * allocate or eof is specified if no such extent exists. On return, got refers
3951 * to the extent record that was inserted to the inode fork.
3952 *
3953 * Note that the allocated extent may have been merged with contiguous extents
3954 * during insertion into the inode fork. Thus, got does not reflect the current
3955 * state of the inode fork on return. If necessary, the caller can use lastx to
3956 * look up the updated record in the inode fork.
3957 */
3958int
3959xfs_bmapi_reserve_delalloc(
3960        struct xfs_inode        *ip,
3961        int                     whichfork,
3962        xfs_fileoff_t           off,
3963        xfs_filblks_t           len,
3964        xfs_filblks_t           prealloc,
3965        struct xfs_bmbt_irec    *got,
3966        struct xfs_iext_cursor  *icur,
3967        int                     eof)
3968{
3969        struct xfs_mount        *mp = ip->i_mount;
3970        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
3971        xfs_extlen_t            alen;
3972        xfs_extlen_t            indlen;
3973        int                     error;
3974        xfs_fileoff_t           aoff = off;
3975
3976        /*
3977         * Cap the alloc length. Keep track of prealloc so we know whether to
3978         * tag the inode before we return.
3979         */
3980        alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN);
3981        if (!eof)
3982                alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
3983        if (prealloc && alen >= len)
3984                prealloc = alen - len;
3985
3986        /* Figure out the extent size, adjust alen */
3987        if (whichfork == XFS_COW_FORK) {
3988                struct xfs_bmbt_irec    prev;
3989                xfs_extlen_t            extsz = xfs_get_cowextsz_hint(ip);
3990
3991                if (!xfs_iext_peek_prev_extent(ifp, icur, &prev))
3992                        prev.br_startoff = NULLFILEOFF;
3993
3994                error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof,
3995                                               1, 0, &aoff, &alen);
3996                ASSERT(!error);
3997        }
3998
3999        /*
4000         * Make a transaction-less quota reservation for delayed allocation
4001         * blocks.  This number gets adjusted later.  We return if we haven't
4002         * allocated blocks already inside this loop.
4003         */
4004        error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0,
4005                                                XFS_QMOPT_RES_REGBLKS);
4006        if (error)
4007                return error;
4008
4009        /*
4010         * Split changing sb for alen and indlen since they could be coming
4011         * from different places.
4012         */
4013        indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
4014        ASSERT(indlen > 0);
4015
4016        error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
4017        if (error)
4018                goto out_unreserve_quota;
4019
4020        error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
4021        if (error)
4022                goto out_unreserve_blocks;
4023
4024
4025        ip->i_delayed_blks += alen;
4026        xfs_mod_delalloc(ip->i_mount, alen + indlen);
4027
4028        got->br_startoff = aoff;
4029        got->br_startblock = nullstartblock(indlen);
4030        got->br_blockcount = alen;
4031        got->br_state = XFS_EXT_NORM;
4032
4033        xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got);
4034
4035        /*
4036         * Tag the inode if blocks were preallocated. Note that COW fork
4037         * preallocation can occur at the start or end of the extent, even when
4038         * prealloc == 0, so we must also check the aligned offset and length.
4039         */
4040        if (whichfork == XFS_DATA_FORK && prealloc)
4041                xfs_inode_set_eofblocks_tag(ip);
4042        if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
4043                xfs_inode_set_cowblocks_tag(ip);
4044
4045        return 0;
4046
4047out_unreserve_blocks:
4048        xfs_mod_fdblocks(mp, alen, false);
4049out_unreserve_quota:
4050        if (XFS_IS_QUOTA_ON(mp))
4051                xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0,
4052                                                XFS_QMOPT_RES_REGBLKS);
4053        return error;
4054}
4055
4056static int
4057xfs_bmap_alloc_userdata(
4058        struct xfs_bmalloca     *bma)
4059{
4060        struct xfs_mount        *mp = bma->ip->i_mount;
4061        int                     whichfork = xfs_bmapi_whichfork(bma->flags);
4062        int                     error;
4063
4064        /*
4065         * Set the data type being allocated. For the data fork, the first data
4066         * in the file is treated differently to all other allocations. For the
4067         * attribute fork, we only need to ensure the allocated range is not on
4068         * the busy list.
4069         */
4070        bma->datatype = XFS_ALLOC_NOBUSY;
4071        if (whichfork == XFS_DATA_FORK) {
4072                bma->datatype |= XFS_ALLOC_USERDATA;
4073                if (bma->offset == 0)
4074                        bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
4075
4076                if (mp->m_dalign && bma->length >= mp->m_dalign) {
4077                        error = xfs_bmap_isaeof(bma, whichfork);
4078                        if (error)
4079                                return error;
4080                }
4081
4082                if (XFS_IS_REALTIME_INODE(bma->ip))
4083                        return xfs_bmap_rtalloc(bma);
4084        }
4085
4086        return xfs_bmap_btalloc(bma);
4087}
4088
4089static int
4090xfs_bmapi_allocate(
4091        struct xfs_bmalloca     *bma)
4092{
4093        struct xfs_mount        *mp = bma->ip->i_mount;
4094        int                     whichfork = xfs_bmapi_whichfork(bma->flags);
4095        struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4096        int                     tmp_logflags = 0;
4097        int                     error;
4098
4099        ASSERT(bma->length > 0);
4100
4101        /*
4102         * For the wasdelay case, we could also just allocate the stuff asked
4103         * for in this bmap call but that wouldn't be as good.
4104         */
4105        if (bma->wasdel) {
4106                bma->length = (xfs_extlen_t)bma->got.br_blockcount;
4107                bma->offset = bma->got.br_startoff;
4108                if (!xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev))
4109                        bma->prev.br_startoff = NULLFILEOFF;
4110        } else {
4111                bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
4112                if (!bma->eof)
4113                        bma->length = XFS_FILBLKS_MIN(bma->length,
4114                                        bma->got.br_startoff - bma->offset);
4115        }
4116
4117        if (bma->flags & XFS_BMAPI_CONTIG)
4118                bma->minlen = bma->length;
4119        else
4120                bma->minlen = 1;
4121
4122        if (bma->flags & XFS_BMAPI_METADATA)
4123                error = xfs_bmap_btalloc(bma);
4124        else
4125                error = xfs_bmap_alloc_userdata(bma);
4126        if (error || bma->blkno == NULLFSBLOCK)
4127                return error;
4128
4129        if (bma->flags & XFS_BMAPI_ZERO) {
4130                error = xfs_zero_extent(bma->ip, bma->blkno, bma->length);
4131                if (error)
4132                        return error;
4133        }
4134
4135        if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur)
4136                bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
4137        /*
4138         * Bump the number of extents we've allocated
4139         * in this call.
4140         */
4141        bma->nallocs++;
4142
4143        if (bma->cur)
4144                bma->cur->bc_ino.flags =
4145                        bma->wasdel ? XFS_BTCUR_BMBT_WASDEL : 0;
4146
4147        bma->got.br_startoff = bma->offset;
4148        bma->got.br_startblock = bma->blkno;
4149        bma->got.br_blockcount = bma->length;
4150        bma->got.br_state = XFS_EXT_NORM;
4151
4152        if (bma->flags & XFS_BMAPI_PREALLOC)
4153                bma->got.br_state = XFS_EXT_UNWRITTEN;
4154
4155        if (bma->wasdel)
4156                error = xfs_bmap_add_extent_delay_real(bma, whichfork);
4157        else
4158                error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
4159                                whichfork, &bma->icur, &bma->cur, &bma->got,
4160                                &bma->logflags, bma->flags);
4161
4162        bma->logflags |= tmp_logflags;
4163        if (error)
4164                return error;
4165
4166        /*
4167         * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
4168         * or xfs_bmap_add_extent_hole_real might have merged it into one of
4169         * the neighbouring ones.
4170         */
4171        xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4172
4173        ASSERT(bma->got.br_startoff <= bma->offset);
4174        ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
4175               bma->offset + bma->length);
4176        ASSERT(bma->got.br_state == XFS_EXT_NORM ||
4177               bma->got.br_state == XFS_EXT_UNWRITTEN);
4178        return 0;
4179}
4180
4181STATIC int
4182xfs_bmapi_convert_unwritten(
4183        struct xfs_bmalloca     *bma,
4184        struct xfs_bmbt_irec    *mval,
4185        xfs_filblks_t           len,
4186        int                     flags)
4187{
4188        int                     whichfork = xfs_bmapi_whichfork(flags);
4189        struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4190        int                     tmp_logflags = 0;
4191        int                     error;
4192
4193        /* check if we need to do unwritten->real conversion */
4194        if (mval->br_state == XFS_EXT_UNWRITTEN &&
4195            (flags & XFS_BMAPI_PREALLOC))
4196                return 0;
4197
4198        /* check if we need to do real->unwritten conversion */
4199        if (mval->br_state == XFS_EXT_NORM &&
4200            (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
4201                        (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
4202                return 0;
4203
4204        /*
4205         * Modify (by adding) the state flag, if writing.
4206         */
4207        ASSERT(mval->br_blockcount <= len);
4208        if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4209                bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
4210                                        bma->ip, whichfork);
4211        }
4212        mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4213                                ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
4214
4215        /*
4216         * Before insertion into the bmbt, zero the range being converted
4217         * if required.
4218         */
4219        if (flags & XFS_BMAPI_ZERO) {
4220                error = xfs_zero_extent(bma->ip, mval->br_startblock,
4221                                        mval->br_blockcount);
4222                if (error)
4223                        return error;
4224        }
4225
4226        error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
4227                        &bma->icur, &bma->cur, mval, &tmp_logflags);
4228        /*
4229         * Log the inode core unconditionally in the unwritten extent conversion
4230         * path because the conversion might not have done so (e.g., if the
4231         * extent count hasn't changed). We need to make sure the inode is dirty
4232         * in the transaction for the sake of fsync(), even if nothing has
4233         * changed, because fsync() will not force the log for this transaction
4234         * unless it sees the inode pinned.
4235         *
4236         * Note: If we're only converting cow fork extents, there aren't
4237         * any on-disk updates to make, so we don't need to log anything.
4238         */
4239        if (whichfork != XFS_COW_FORK)
4240                bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
4241        if (error)
4242                return error;
4243
4244        /*
4245         * Update our extent pointer, given that
4246         * xfs_bmap_add_extent_unwritten_real might have merged it into one
4247         * of the neighbouring ones.
4248         */
4249        xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4250
4251        /*
4252         * We may have combined previously unwritten space with written space,
4253         * so generate another request.
4254         */
4255        if (mval->br_blockcount < len)
4256                return -EAGAIN;
4257        return 0;
4258}
4259
4260static inline xfs_extlen_t
4261xfs_bmapi_minleft(
4262        struct xfs_trans        *tp,
4263        struct xfs_inode        *ip,
4264        int                     fork)
4265{
4266        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, fork);
4267
4268        if (tp && tp->t_firstblock != NULLFSBLOCK)
4269                return 0;
4270        if (ifp->if_format != XFS_DINODE_FMT_BTREE)
4271                return 1;
4272        return be16_to_cpu(ifp->if_broot->bb_level) + 1;
4273}
4274
4275/*
4276 * Log whatever the flags say, even if error.  Otherwise we might miss detecting
4277 * a case where the data is changed, there's an error, and it's not logged so we
4278 * don't shutdown when we should.  Don't bother logging extents/btree changes if
4279 * we converted to the other format.
4280 */
4281static void
4282xfs_bmapi_finish(
4283        struct xfs_bmalloca     *bma,
4284        int                     whichfork,
4285        int                     error)
4286{
4287        struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4288
4289        if ((bma->logflags & xfs_ilog_fext(whichfork)) &&
4290            ifp->if_format != XFS_DINODE_FMT_EXTENTS)
4291                bma->logflags &= ~xfs_ilog_fext(whichfork);
4292        else if ((bma->logflags & xfs_ilog_fbroot(whichfork)) &&
4293                 ifp->if_format != XFS_DINODE_FMT_BTREE)
4294                bma->logflags &= ~xfs_ilog_fbroot(whichfork);
4295
4296        if (bma->logflags)
4297                xfs_trans_log_inode(bma->tp, bma->ip, bma->logflags);
4298        if (bma->cur)
4299                xfs_btree_del_cursor(bma->cur, error);
4300}
4301
4302/*
4303 * Map file blocks to filesystem blocks, and allocate blocks or convert the
4304 * extent state if necessary.  Details behaviour is controlled by the flags
4305 * parameter.  Only allocates blocks from a single allocation group, to avoid
4306 * locking problems.
4307 */
4308int
4309xfs_bmapi_write(
4310        struct xfs_trans        *tp,            /* transaction pointer */
4311        struct xfs_inode        *ip,            /* incore inode */
4312        xfs_fileoff_t           bno,            /* starting file offs. mapped */
4313        xfs_filblks_t           len,            /* length to map in file */
4314        int                     flags,          /* XFS_BMAPI_... */
4315        xfs_extlen_t            total,          /* total blocks needed */
4316        struct xfs_bmbt_irec    *mval,          /* output: map values */
4317        int                     *nmap)          /* i/o: mval size/count */
4318{
4319        struct xfs_bmalloca     bma = {
4320                .tp             = tp,
4321                .ip             = ip,
4322                .total          = total,
4323        };
4324        struct xfs_mount        *mp = ip->i_mount;
4325        int                     whichfork = xfs_bmapi_whichfork(flags);
4326        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
4327        xfs_fileoff_t           end;            /* end of mapped file region */
4328        bool                    eof = false;    /* after the end of extents */
4329        int                     error;          /* error return */
4330        int                     n;              /* current extent index */
4331        xfs_fileoff_t           obno;           /* old block number (offset) */
4332
4333#ifdef DEBUG
4334        xfs_fileoff_t           orig_bno;       /* original block number value */
4335        int                     orig_flags;     /* original flags arg value */
4336        xfs_filblks_t           orig_len;       /* original value of len arg */
4337        struct xfs_bmbt_irec    *orig_mval;     /* original value of mval */
4338        int                     orig_nmap;      /* original value of *nmap */
4339
4340        orig_bno = bno;
4341        orig_len = len;
4342        orig_flags = flags;
4343        orig_mval = mval;
4344        orig_nmap = *nmap;
4345#endif
4346
4347        ASSERT(*nmap >= 1);
4348        ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4349        ASSERT(tp != NULL);
4350        ASSERT(len > 0);
4351        ASSERT(ifp->if_format != XFS_DINODE_FMT_LOCAL);
4352        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4353        ASSERT(!(flags & XFS_BMAPI_REMAP));
4354
4355        /* zeroing is for currently only for data extents, not metadata */
4356        ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
4357                        (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
4358        /*
4359         * we can allocate unwritten extents or pre-zero allocated blocks,
4360         * but it makes no sense to do both at once. This would result in
4361         * zeroing the unwritten extent twice, but it still being an
4362         * unwritten extent....
4363         */
4364        ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
4365                        (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
4366
4367        if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
4368            XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
4369                return -EFSCORRUPTED;
4370        }
4371
4372        if (XFS_FORCED_SHUTDOWN(mp))
4373                return -EIO;
4374
4375        XFS_STATS_INC(mp, xs_blk_mapw);
4376
4377        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4378                error = xfs_iread_extents(tp, ip, whichfork);
4379                if (error)
4380                        goto error0;
4381        }
4382
4383        if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.icur, &bma.got))
4384                eof = true;
4385        if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4386                bma.prev.br_startoff = NULLFILEOFF;
4387        bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
4388
4389        n = 0;
4390        end = bno + len;
4391        obno = bno;
4392        while (bno < end && n < *nmap) {
4393                bool                    need_alloc = false, wasdelay = false;
4394
4395                /* in hole or beyond EOF? */
4396                if (eof || bma.got.br_startoff > bno) {
4397                        /*
4398                         * CoW fork conversions should /never/ hit EOF or
4399                         * holes.  There should always be something for us
4400                         * to work on.
4401                         */
4402                        ASSERT(!((flags & XFS_BMAPI_CONVERT) &&
4403                                 (flags & XFS_BMAPI_COWFORK)));
4404
4405                        need_alloc = true;
4406                } else if (isnullstartblock(bma.got.br_startblock)) {
4407                        wasdelay = true;
4408                }
4409
4410                /*
4411                 * First, deal with the hole before the allocated space
4412                 * that we found, if any.
4413                 */
4414                if (need_alloc || wasdelay) {
4415                        bma.eof = eof;
4416                        bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4417                        bma.wasdel = wasdelay;
4418                        bma.offset = bno;
4419                        bma.flags = flags;
4420
4421                        /*
4422                         * There's a 32/64 bit type mismatch between the
4423                         * allocation length request (which can be 64 bits in
4424                         * length) and the bma length request, which is
4425                         * xfs_extlen_t and therefore 32 bits. Hence we have to
4426                         * check for 32-bit overflows and handle them here.
4427                         */
4428                        if (len > (xfs_filblks_t)MAXEXTLEN)
4429                                bma.length = MAXEXTLEN;
4430                        else
4431                                bma.length = len;
4432
4433                        ASSERT(len > 0);
4434                        ASSERT(bma.length > 0);
4435                        error = xfs_bmapi_allocate(&bma);
4436                        if (error)
4437                                goto error0;
4438                        if (bma.blkno == NULLFSBLOCK)
4439                                break;
4440
4441                        /*
4442                         * If this is a CoW allocation, record the data in
4443                         * the refcount btree for orphan recovery.
4444                         */
4445                        if (whichfork == XFS_COW_FORK)
4446                                xfs_refcount_alloc_cow_extent(tp, bma.blkno,
4447                                                bma.length);
4448                }
4449
4450                /* Deal with the allocated space we found.  */
4451                xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
4452                                                        end, n, flags);
4453
4454                /* Execute unwritten extent conversion if necessary */
4455                error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
4456                if (error == -EAGAIN)
4457                        continue;
4458                if (error)
4459                        goto error0;
4460
4461                /* update the extent map to return */
4462                xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4463
4464                /*
4465                 * If we're done, stop now.  Stop when we've allocated
4466                 * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
4467                 * the transaction may get too big.
4468                 */
4469                if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
4470                        break;
4471
4472                /* Else go on to the next record. */
4473                bma.prev = bma.got;
4474                if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got))
4475                        eof = true;
4476        }
4477        *nmap = n;
4478
4479        error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
4480                        whichfork);
4481        if (error)
4482                goto error0;
4483
4484        ASSERT(ifp->if_format != XFS_DINODE_FMT_BTREE ||
4485               ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork));
4486        xfs_bmapi_finish(&bma, whichfork, 0);
4487        xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
4488                orig_nmap, *nmap);
4489        return 0;
4490error0:
4491        xfs_bmapi_finish(&bma, whichfork, error);
4492        return error;
4493}
4494
4495/*
4496 * Convert an existing delalloc extent to real blocks based on file offset. This
4497 * attempts to allocate the entire delalloc extent and may require multiple
4498 * invocations to allocate the target offset if a large enough physical extent
4499 * is not available.
4500 */
4501int
4502xfs_bmapi_convert_delalloc(
4503        struct xfs_inode        *ip,
4504        int                     whichfork,
4505        xfs_off_t               offset,
4506        struct iomap            *iomap,
4507        unsigned int            *seq)
4508{
4509        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
4510        struct xfs_mount        *mp = ip->i_mount;
4511        xfs_fileoff_t           offset_fsb = XFS_B_TO_FSBT(mp, offset);
4512        struct xfs_bmalloca     bma = { NULL };
4513        uint16_t                flags = 0;
4514        struct xfs_trans        *tp;
4515        int                     error;
4516
4517        if (whichfork == XFS_COW_FORK)
4518                flags |= IOMAP_F_SHARED;
4519
4520        /*
4521         * Space for the extent and indirect blocks was reserved when the
4522         * delalloc extent was created so there's no need to do so here.
4523         */
4524        error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
4525                                XFS_TRANS_RESERVE, &tp);
4526        if (error)
4527                return error;
4528
4529        xfs_ilock(ip, XFS_ILOCK_EXCL);
4530        xfs_trans_ijoin(tp, ip, 0);
4531
4532        if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) ||
4533            bma.got.br_startoff > offset_fsb) {
4534                /*
4535                 * No extent found in the range we are trying to convert.  This
4536                 * should only happen for the COW fork, where another thread
4537                 * might have moved the extent to the data fork in the meantime.
4538                 */
4539                WARN_ON_ONCE(whichfork != XFS_COW_FORK);
4540                error = -EAGAIN;
4541                goto out_trans_cancel;
4542        }
4543
4544        /*
4545         * If we find a real extent here we raced with another thread converting
4546         * the extent.  Just return the real extent at this offset.
4547         */
4548        if (!isnullstartblock(bma.got.br_startblock)) {
4549                xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
4550                *seq = READ_ONCE(ifp->if_seq);
4551                goto out_trans_cancel;
4552        }
4553
4554        bma.tp = tp;
4555        bma.ip = ip;
4556        bma.wasdel = true;
4557        bma.offset = bma.got.br_startoff;
4558        bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount, MAXEXTLEN);
4559        bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
4560
4561        /*
4562         * When we're converting the delalloc reservations backing dirty pages
4563         * in the page cache, we must be careful about how we create the new
4564         * extents:
4565         *
4566         * New CoW fork extents are created unwritten, turned into real extents
4567         * when we're about to write the data to disk, and mapped into the data
4568         * fork after the write finishes.  End of story.
4569         *
4570         * New data fork extents must be mapped in as unwritten and converted
4571         * to real extents after the write succeeds to avoid exposing stale
4572         * disk contents if we crash.
4573         */
4574        bma.flags = XFS_BMAPI_PREALLOC;
4575        if (whichfork == XFS_COW_FORK)
4576                bma.flags |= XFS_BMAPI_COWFORK;
4577
4578        if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4579                bma.prev.br_startoff = NULLFILEOFF;
4580
4581        error = xfs_bmapi_allocate(&bma);
4582        if (error)
4583                goto out_finish;
4584
4585        error = -ENOSPC;
4586        if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK))
4587                goto out_finish;
4588        error = -EFSCORRUPTED;
4589        if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock)))
4590                goto out_finish;
4591
4592        XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length));
4593        XFS_STATS_INC(mp, xs_xstrat_quick);
4594
4595        ASSERT(!isnullstartblock(bma.got.br_startblock));
4596        xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
4597        *seq = READ_ONCE(ifp->if_seq);
4598
4599        if (whichfork == XFS_COW_FORK)
4600                xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length);
4601
4602        error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
4603                        whichfork);
4604        if (error)
4605                goto out_finish;
4606
4607        xfs_bmapi_finish(&bma, whichfork, 0);
4608        error = xfs_trans_commit(tp);
4609        xfs_iunlock(ip, XFS_ILOCK_EXCL);
4610        return error;
4611
4612out_finish:
4613        xfs_bmapi_finish(&bma, whichfork, error);
4614out_trans_cancel:
4615        xfs_trans_cancel(tp);
4616        xfs_iunlock(ip, XFS_ILOCK_EXCL);
4617        return error;
4618}
4619
4620int
4621xfs_bmapi_remap(
4622        struct xfs_trans        *tp,
4623        struct xfs_inode        *ip,
4624        xfs_fileoff_t           bno,
4625        xfs_filblks_t           len,
4626        xfs_fsblock_t           startblock,
4627        int                     flags)
4628{
4629        struct xfs_mount        *mp = ip->i_mount;
4630        struct xfs_ifork        *ifp;
4631        struct xfs_btree_cur    *cur = NULL;
4632        struct xfs_bmbt_irec    got;
4633        struct xfs_iext_cursor  icur;
4634        int                     whichfork = xfs_bmapi_whichfork(flags);
4635        int                     logflags = 0, error;
4636
4637        ifp = XFS_IFORK_PTR(ip, whichfork);
4638        ASSERT(len > 0);
4639        ASSERT(len <= (xfs_filblks_t)MAXEXTLEN);
4640        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4641        ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC |
4642                           XFS_BMAPI_NORMAP)));
4643        ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) !=
4644                        (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC));
4645
4646        if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
4647            XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
4648                return -EFSCORRUPTED;
4649        }
4650
4651        if (XFS_FORCED_SHUTDOWN(mp))
4652                return -EIO;
4653
4654        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4655                error = xfs_iread_extents(tp, ip, whichfork);
4656                if (error)
4657                        return error;
4658        }
4659
4660        if (xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) {
4661                /* make sure we only reflink into a hole. */
4662                ASSERT(got.br_startoff > bno);
4663                ASSERT(got.br_startoff - bno >= len);
4664        }
4665
4666        ip->i_d.di_nblocks += len;
4667        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
4668
4669        if (ifp->if_flags & XFS_IFBROOT) {
4670                cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
4671                cur->bc_ino.flags = 0;
4672        }
4673
4674        got.br_startoff = bno;
4675        got.br_startblock = startblock;
4676        got.br_blockcount = len;
4677        if (flags & XFS_BMAPI_PREALLOC)
4678                got.br_state = XFS_EXT_UNWRITTEN;
4679        else
4680                got.br_state = XFS_EXT_NORM;
4681
4682        error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur,
4683                        &cur, &got, &logflags, flags);
4684        if (error)
4685                goto error0;
4686
4687        error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags, whichfork);
4688
4689error0:
4690        if (ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS)
4691                logflags &= ~XFS_ILOG_DEXT;
4692        else if (ip->i_df.if_format != XFS_DINODE_FMT_BTREE)
4693                logflags &= ~XFS_ILOG_DBROOT;
4694
4695        if (logflags)
4696                xfs_trans_log_inode(tp, ip, logflags);
4697        if (cur)
4698                xfs_btree_del_cursor(cur, error);
4699        return error;
4700}
4701
4702/*
4703 * When a delalloc extent is split (e.g., due to a hole punch), the original
4704 * indlen reservation must be shared across the two new extents that are left
4705 * behind.
4706 *
4707 * Given the original reservation and the worst case indlen for the two new
4708 * extents (as calculated by xfs_bmap_worst_indlen()), split the original
4709 * reservation fairly across the two new extents. If necessary, steal available
4710 * blocks from a deleted extent to make up a reservation deficiency (e.g., if
4711 * ores == 1). The number of stolen blocks is returned. The availability and
4712 * subsequent accounting of stolen blocks is the responsibility of the caller.
4713 */
4714static xfs_filblks_t
4715xfs_bmap_split_indlen(
4716        xfs_filblks_t                   ores,           /* original res. */
4717        xfs_filblks_t                   *indlen1,       /* ext1 worst indlen */
4718        xfs_filblks_t                   *indlen2,       /* ext2 worst indlen */
4719        xfs_filblks_t                   avail)          /* stealable blocks */
4720{
4721        xfs_filblks_t                   len1 = *indlen1;
4722        xfs_filblks_t                   len2 = *indlen2;
4723        xfs_filblks_t                   nres = len1 + len2; /* new total res. */
4724        xfs_filblks_t                   stolen = 0;
4725        xfs_filblks_t                   resfactor;
4726
4727        /*
4728         * Steal as many blocks as we can to try and satisfy the worst case
4729         * indlen for both new extents.
4730         */
4731        if (ores < nres && avail)
4732                stolen = XFS_FILBLKS_MIN(nres - ores, avail);
4733        ores += stolen;
4734
4735         /* nothing else to do if we've satisfied the new reservation */
4736        if (ores >= nres)
4737                return stolen;
4738
4739        /*
4740         * We can't meet the total required reservation for the two extents.
4741         * Calculate the percent of the overall shortage between both extents
4742         * and apply this percentage to each of the requested indlen values.
4743         * This distributes the shortage fairly and reduces the chances that one
4744         * of the two extents is left with nothing when extents are repeatedly
4745         * split.
4746         */
4747        resfactor = (ores * 100);
4748        do_div(resfactor, nres);
4749        len1 *= resfactor;
4750        do_div(len1, 100);
4751        len2 *= resfactor;
4752        do_div(len2, 100);
4753        ASSERT(len1 + len2 <= ores);
4754        ASSERT(len1 < *indlen1 && len2 < *indlen2);
4755
4756        /*
4757         * Hand out the remainder to each extent. If one of the two reservations
4758         * is zero, we want to make sure that one gets a block first. The loop
4759         * below starts with len1, so hand len2 a block right off the bat if it
4760         * is zero.
4761         */
4762        ores -= (len1 + len2);
4763        ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores);
4764        if (ores && !len2 && *indlen2) {
4765                len2++;
4766                ores--;
4767        }
4768        while (ores) {
4769                if (len1 < *indlen1) {
4770                        len1++;
4771                        ores--;
4772                }
4773                if (!ores)
4774                        break;
4775                if (len2 < *indlen2) {
4776                        len2++;
4777                        ores--;
4778                }
4779        }
4780
4781        *indlen1 = len1;
4782        *indlen2 = len2;
4783
4784        return stolen;
4785}
4786
4787int
4788xfs_bmap_del_extent_delay(
4789        struct xfs_inode        *ip,
4790        int                     whichfork,
4791        struct xfs_iext_cursor  *icur,
4792        struct xfs_bmbt_irec    *got,
4793        struct xfs_bmbt_irec    *del)
4794{
4795        struct xfs_mount        *mp = ip->i_mount;
4796        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
4797        struct xfs_bmbt_irec    new;
4798        int64_t                 da_old, da_new, da_diff = 0;
4799        xfs_fileoff_t           del_endoff, got_endoff;
4800        xfs_filblks_t           got_indlen, new_indlen, stolen;
4801        int                     state = xfs_bmap_fork_to_state(whichfork);
4802        int                     error = 0;
4803        bool                    isrt;
4804
4805        XFS_STATS_INC(mp, xs_del_exlist);
4806
4807        isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
4808        del_endoff = del->br_startoff + del->br_blockcount;
4809        got_endoff = got->br_startoff + got->br_blockcount;
4810        da_old = startblockval(got->br_startblock);
4811        da_new = 0;
4812
4813        ASSERT(del->br_blockcount > 0);
4814        ASSERT(got->br_startoff <= del->br_startoff);
4815        ASSERT(got_endoff >= del_endoff);
4816
4817        if (isrt) {
4818                uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
4819
4820                do_div(rtexts, mp->m_sb.sb_rextsize);
4821                xfs_mod_frextents(mp, rtexts);
4822        }
4823
4824        /*
4825         * Update the inode delalloc counter now and wait to update the
4826         * sb counters as we might have to borrow some blocks for the
4827         * indirect block accounting.
4828         */
4829        error = xfs_trans_reserve_quota_nblks(NULL, ip,
4830                        -((long)del->br_blockcount), 0,
4831                        isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4832        if (error)
4833                return error;
4834        ip->i_delayed_blks -= del->br_blockcount;
4835
4836        if (got->br_startoff == del->br_startoff)
4837                state |= BMAP_LEFT_FILLING;
4838        if (got_endoff == del_endoff)
4839                state |= BMAP_RIGHT_FILLING;
4840
4841        switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4842        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4843                /*
4844                 * Matches the whole extent.  Delete the entry.
4845                 */
4846                xfs_iext_remove(ip, icur, state);
4847                xfs_iext_prev(ifp, icur);
4848                break;
4849        case BMAP_LEFT_FILLING:
4850                /*
4851                 * Deleting the first part of the extent.
4852                 */
4853                got->br_startoff = del_endoff;
4854                got->br_blockcount -= del->br_blockcount;
4855                da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4856                                got->br_blockcount), da_old);
4857                got->br_startblock = nullstartblock((int)da_new);
4858                xfs_iext_update_extent(ip, state, icur, got);
4859                break;
4860        case BMAP_RIGHT_FILLING:
4861                /*
4862                 * Deleting the last part of the extent.
4863                 */
4864                got->br_blockcount = got->br_blockcount - del->br_blockcount;
4865                da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4866                                got->br_blockcount), da_old);
4867                got->br_startblock = nullstartblock((int)da_new);
4868                xfs_iext_update_extent(ip, state, icur, got);
4869                break;
4870        case 0:
4871                /*
4872                 * Deleting the middle of the extent.
4873                 *
4874                 * Distribute the original indlen reservation across the two new
4875                 * extents.  Steal blocks from the deleted extent if necessary.
4876                 * Stealing blocks simply fudges the fdblocks accounting below.
4877                 * Warn if either of the new indlen reservations is zero as this
4878                 * can lead to delalloc problems.
4879                 */
4880                got->br_blockcount = del->br_startoff - got->br_startoff;
4881                got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
4882
4883                new.br_blockcount = got_endoff - del_endoff;
4884                new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
4885
4886                WARN_ON_ONCE(!got_indlen || !new_indlen);
4887                stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
4888                                                       del->br_blockcount);
4889
4890                got->br_startblock = nullstartblock((int)got_indlen);
4891
4892                new.br_startoff = del_endoff;
4893                new.br_state = got->br_state;
4894                new.br_startblock = nullstartblock((int)new_indlen);
4895
4896                xfs_iext_update_extent(ip, state, icur, got);
4897                xfs_iext_next(ifp, icur);
4898                xfs_iext_insert(ip, icur, &new, state);
4899
4900                da_new = got_indlen + new_indlen - stolen;
4901                del->br_blockcount -= stolen;
4902                break;
4903        }
4904
4905        ASSERT(da_old >= da_new);
4906        da_diff = da_old - da_new;
4907        if (!isrt)
4908                da_diff += del->br_blockcount;
4909        if (da_diff) {
4910                xfs_mod_fdblocks(mp, da_diff, false);
4911                xfs_mod_delalloc(mp, -da_diff);
4912        }
4913        return error;
4914}
4915
4916void
4917xfs_bmap_del_extent_cow(
4918        struct xfs_inode        *ip,
4919        struct xfs_iext_cursor  *icur,
4920        struct xfs_bmbt_irec    *got,
4921        struct xfs_bmbt_irec    *del)
4922{
4923        struct xfs_mount        *mp = ip->i_mount;
4924        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
4925        struct xfs_bmbt_irec    new;
4926        xfs_fileoff_t           del_endoff, got_endoff;
4927        int                     state = BMAP_COWFORK;
4928
4929        XFS_STATS_INC(mp, xs_del_exlist);
4930
4931        del_endoff = del->br_startoff + del->br_blockcount;
4932        got_endoff = got->br_startoff + got->br_blockcount;
4933
4934        ASSERT(del->br_blockcount > 0);
4935        ASSERT(got->br_startoff <= del->br_startoff);
4936        ASSERT(got_endoff >= del_endoff);
4937        ASSERT(!isnullstartblock(got->br_startblock));
4938
4939        if (got->br_startoff == del->br_startoff)
4940                state |= BMAP_LEFT_FILLING;
4941        if (got_endoff == del_endoff)
4942                state |= BMAP_RIGHT_FILLING;
4943
4944        switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4945        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4946                /*
4947                 * Matches the whole extent.  Delete the entry.
4948                 */
4949                xfs_iext_remove(ip, icur, state);
4950                xfs_iext_prev(ifp, icur);
4951                break;
4952        case BMAP_LEFT_FILLING:
4953                /*
4954                 * Deleting the first part of the extent.
4955                 */
4956                got->br_startoff = del_endoff;
4957                got->br_blockcount -= del->br_blockcount;
4958                got->br_startblock = del->br_startblock + del->br_blockcount;
4959                xfs_iext_update_extent(ip, state, icur, got);
4960                break;
4961        case BMAP_RIGHT_FILLING:
4962                /*
4963                 * Deleting the last part of the extent.
4964                 */
4965                got->br_blockcount -= del->br_blockcount;
4966                xfs_iext_update_extent(ip, state, icur, got);
4967                break;
4968        case 0:
4969                /*
4970                 * Deleting the middle of the extent.
4971                 */
4972                got->br_blockcount = del->br_startoff - got->br_startoff;
4973
4974                new.br_startoff = del_endoff;
4975                new.br_blockcount = got_endoff - del_endoff;
4976                new.br_state = got->br_state;
4977                new.br_startblock = del->br_startblock + del->br_blockcount;
4978
4979                xfs_iext_update_extent(ip, state, icur, got);
4980                xfs_iext_next(ifp, icur);
4981                xfs_iext_insert(ip, icur, &new, state);
4982                break;
4983        }
4984        ip->i_delayed_blks -= del->br_blockcount;
4985}
4986
4987/*
4988 * Called by xfs_bmapi to update file extent records and the btree
4989 * after removing space.
4990 */
4991STATIC int                              /* error */
4992xfs_bmap_del_extent_real(
4993        xfs_inode_t             *ip,    /* incore inode pointer */
4994        xfs_trans_t             *tp,    /* current transaction pointer */
4995        struct xfs_iext_cursor  *icur,
4996        xfs_btree_cur_t         *cur,   /* if null, not a btree */
4997        xfs_bmbt_irec_t         *del,   /* data to remove from extents */
4998        int                     *logflagsp, /* inode logging flags */
4999        int                     whichfork, /* data or attr fork */
5000        int                     bflags) /* bmapi flags */
5001{
5002        xfs_fsblock_t           del_endblock=0; /* first block past del */
5003        xfs_fileoff_t           del_endoff;     /* first offset past del */
5004        int                     do_fx;  /* free extent at end of routine */
5005        int                     error;  /* error return value */
5006        int                     flags = 0;/* inode logging flags */
5007        struct xfs_bmbt_irec    got;    /* current extent entry */
5008        xfs_fileoff_t           got_endoff;     /* first offset past got */
5009        int                     i;      /* temp state */
5010        struct xfs_ifork        *ifp;   /* inode fork pointer */
5011        xfs_mount_t             *mp;    /* mount structure */
5012        xfs_filblks_t           nblks;  /* quota/sb block count */
5013        xfs_bmbt_irec_t         new;    /* new record to be inserted */
5014        /* REFERENCED */
5015        uint                    qfield; /* quota field to update */
5016        int                     state = xfs_bmap_fork_to_state(whichfork);
5017        struct xfs_bmbt_irec    old;
5018
5019        mp = ip->i_mount;
5020        XFS_STATS_INC(mp, xs_del_exlist);
5021
5022        ifp = XFS_IFORK_PTR(ip, whichfork);
5023        ASSERT(del->br_blockcount > 0);
5024        xfs_iext_get_extent(ifp, icur, &got);
5025        ASSERT(got.br_startoff <= del->br_startoff);
5026        del_endoff = del->br_startoff + del->br_blockcount;
5027        got_endoff = got.br_startoff + got.br_blockcount;
5028        ASSERT(got_endoff >= del_endoff);
5029        ASSERT(!isnullstartblock(got.br_startblock));
5030        qfield = 0;
5031        error = 0;
5032
5033        /*
5034         * If it's the case where the directory code is running with no block
5035         * reservation, and the deleted block is in the middle of its extent,
5036         * and the resulting insert of an extent would cause transformation to
5037         * btree format, then reject it.  The calling code will then swap blocks
5038         * around instead.  We have to do this now, rather than waiting for the
5039         * conversion to btree format, since the transaction will be dirty then.
5040         */
5041        if (tp->t_blk_res == 0 &&
5042            ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
5043            ifp->if_nextents >= XFS_IFORK_MAXEXT(ip, whichfork) &&
5044            del->br_startoff > got.br_startoff && del_endoff < got_endoff)
5045                return -ENOSPC;
5046
5047        flags = XFS_ILOG_CORE;
5048        if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
5049                xfs_fsblock_t   bno;
5050                xfs_filblks_t   len;
5051                xfs_extlen_t    mod;
5052
5053                bno = div_u64_rem(del->br_startblock, mp->m_sb.sb_rextsize,
5054                                  &mod);
5055                ASSERT(mod == 0);
5056                len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize,
5057                                  &mod);
5058                ASSERT(mod == 0);
5059
5060                error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
5061                if (error)
5062                        goto done;
5063                do_fx = 0;
5064                nblks = len * mp->m_sb.sb_rextsize;
5065                qfield = XFS_TRANS_DQ_RTBCOUNT;
5066        } else {
5067                do_fx = 1;
5068                nblks = del->br_blockcount;
5069                qfield = XFS_TRANS_DQ_BCOUNT;
5070        }
5071
5072        del_endblock = del->br_startblock + del->br_blockcount;
5073        if (cur) {
5074                error = xfs_bmbt_lookup_eq(cur, &got, &i);
5075                if (error)
5076                        goto done;
5077                if (XFS_IS_CORRUPT(mp, i != 1)) {
5078                        error = -EFSCORRUPTED;
5079                        goto done;
5080                }
5081        }
5082
5083        if (got.br_startoff == del->br_startoff)
5084                state |= BMAP_LEFT_FILLING;
5085        if (got_endoff == del_endoff)
5086                state |= BMAP_RIGHT_FILLING;
5087
5088        switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
5089        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
5090                /*
5091                 * Matches the whole extent.  Delete the entry.
5092                 */
5093                xfs_iext_remove(ip, icur, state);
5094                xfs_iext_prev(ifp, icur);
5095                ifp->if_nextents--;
5096
5097                flags |= XFS_ILOG_CORE;
5098                if (!cur) {
5099                        flags |= xfs_ilog_fext(whichfork);
5100                        break;
5101                }
5102                if ((error = xfs_btree_delete(cur, &i)))
5103                        goto done;
5104                if (XFS_IS_CORRUPT(mp, i != 1)) {
5105                        error = -EFSCORRUPTED;
5106                        goto done;
5107                }
5108                break;
5109        case BMAP_LEFT_FILLING:
5110                /*
5111                 * Deleting the first part of the extent.
5112                 */
5113                got.br_startoff = del_endoff;
5114                got.br_startblock = del_endblock;
5115                got.br_blockcount -= del->br_blockcount;
5116                xfs_iext_update_extent(ip, state, icur, &got);
5117                if (!cur) {
5118                        flags |= xfs_ilog_fext(whichfork);
5119                        break;
5120                }
5121                error = xfs_bmbt_update(cur, &got);
5122                if (error)
5123                        goto done;
5124                break;
5125        case BMAP_RIGHT_FILLING:
5126                /*
5127                 * Deleting the last part of the extent.
5128                 */
5129                got.br_blockcount -= del->br_blockcount;
5130                xfs_iext_update_extent(ip, state, icur, &got);
5131                if (!cur) {
5132                        flags |= xfs_ilog_fext(whichfork);
5133                        break;
5134                }
5135                error = xfs_bmbt_update(cur, &got);
5136                if (error)
5137                        goto done;
5138                break;
5139        case 0:
5140                /*
5141                 * Deleting the middle of the extent.
5142                 */
5143                old = got;
5144
5145                got.br_blockcount = del->br_startoff - got.br_startoff;
5146                xfs_iext_update_extent(ip, state, icur, &got);
5147
5148                new.br_startoff = del_endoff;
5149                new.br_blockcount = got_endoff - del_endoff;
5150                new.br_state = got.br_state;
5151                new.br_startblock = del_endblock;
5152
5153                flags |= XFS_ILOG_CORE;
5154                if (cur) {
5155                        error = xfs_bmbt_update(cur, &got);
5156                        if (error)
5157                                goto done;
5158                        error = xfs_btree_increment(cur, 0, &i);
5159                        if (error)
5160                                goto done;
5161                        cur->bc_rec.b = new;
5162                        error = xfs_btree_insert(cur, &i);
5163                        if (error && error != -ENOSPC)
5164                                goto done;
5165                        /*
5166                         * If get no-space back from btree insert, it tried a
5167                         * split, and we have a zero block reservation.  Fix up
5168                         * our state and return the error.
5169                         */
5170                        if (error == -ENOSPC) {
5171                                /*
5172                                 * Reset the cursor, don't trust it after any
5173                                 * insert operation.
5174                                 */
5175                                error = xfs_bmbt_lookup_eq(cur, &got, &i);
5176                                if (error)
5177                                        goto done;
5178                                if (XFS_IS_CORRUPT(mp, i != 1)) {
5179                                        error = -EFSCORRUPTED;
5180                                        goto done;
5181                                }
5182                                /*
5183                                 * Update the btree record back
5184                                 * to the original value.
5185                                 */
5186                                error = xfs_bmbt_update(cur, &old);
5187                                if (error)
5188                                        goto done;
5189                                /*
5190                                 * Reset the extent record back
5191                                 * to the original value.
5192                                 */
5193                                xfs_iext_update_extent(ip, state, icur, &old);
5194                                flags = 0;
5195                                error = -ENOSPC;
5196                                goto done;
5197                        }
5198                        if (XFS_IS_CORRUPT(mp, i != 1)) {
5199                                error = -EFSCORRUPTED;
5200                                goto done;
5201                        }
5202                } else
5203                        flags |= xfs_ilog_fext(whichfork);
5204
5205                ifp->if_nextents++;
5206                xfs_iext_next(ifp, icur);
5207                xfs_iext_insert(ip, icur, &new, state);
5208                break;
5209        }
5210
5211        /* remove reverse mapping */
5212        xfs_rmap_unmap_extent(tp, ip, whichfork, del);
5213
5214        /*
5215         * If we need to, add to list of extents to delete.
5216         */
5217        if (do_fx && !(bflags & XFS_BMAPI_REMAP)) {
5218                if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
5219                        xfs_refcount_decrease_extent(tp, del);
5220                } else {
5221                        __xfs_bmap_add_free(tp, del->br_startblock,
5222                                        del->br_blockcount, NULL,
5223                                        (bflags & XFS_BMAPI_NODISCARD) ||
5224                                        del->br_state == XFS_EXT_UNWRITTEN);
5225                }
5226        }
5227
5228        /*
5229         * Adjust inode # blocks in the file.
5230         */
5231        if (nblks)
5232                ip->i_d.di_nblocks -= nblks;
5233        /*
5234         * Adjust quota data.
5235         */
5236        if (qfield && !(bflags & XFS_BMAPI_REMAP))
5237                xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
5238
5239done:
5240        *logflagsp = flags;
5241        return error;
5242}
5243
5244/*
5245 * Unmap (remove) blocks from a file.
5246 * If nexts is nonzero then the number of extents to remove is limited to
5247 * that value.  If not all extents in the block range can be removed then
5248 * *done is set.
5249 */
5250int                                             /* error */
5251__xfs_bunmapi(
5252        struct xfs_trans        *tp,            /* transaction pointer */
5253        struct xfs_inode        *ip,            /* incore inode */
5254        xfs_fileoff_t           start,          /* first file offset deleted */
5255        xfs_filblks_t           *rlen,          /* i/o: amount remaining */
5256        int                     flags,          /* misc flags */
5257        xfs_extnum_t            nexts)          /* number of extents max */
5258{
5259        struct xfs_btree_cur    *cur;           /* bmap btree cursor */
5260        struct xfs_bmbt_irec    del;            /* extent being deleted */
5261        int                     error;          /* error return value */
5262        xfs_extnum_t            extno;          /* extent number in list */
5263        struct xfs_bmbt_irec    got;            /* current extent record */
5264        struct xfs_ifork        *ifp;           /* inode fork pointer */
5265        int                     isrt;           /* freeing in rt area */
5266        int                     logflags;       /* transaction logging flags */
5267        xfs_extlen_t            mod;            /* rt extent offset */
5268        struct xfs_mount        *mp = ip->i_mount;
5269        int                     tmp_logflags;   /* partial logging flags */
5270        int                     wasdel;         /* was a delayed alloc extent */
5271        int                     whichfork;      /* data or attribute fork */
5272        xfs_fsblock_t           sum;
5273        xfs_filblks_t           len = *rlen;    /* length to unmap in file */
5274        xfs_fileoff_t           max_len;
5275        xfs_agnumber_t          prev_agno = NULLAGNUMBER, agno;
5276        xfs_fileoff_t           end;
5277        struct xfs_iext_cursor  icur;
5278        bool                    done = false;
5279
5280        trace_xfs_bunmap(ip, start, len, flags, _RET_IP_);
5281
5282        whichfork = xfs_bmapi_whichfork(flags);
5283        ASSERT(whichfork != XFS_COW_FORK);
5284        ifp = XFS_IFORK_PTR(ip, whichfork);
5285        if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)))
5286                return -EFSCORRUPTED;
5287        if (XFS_FORCED_SHUTDOWN(mp))
5288                return -EIO;
5289
5290        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5291        ASSERT(len > 0);
5292        ASSERT(nexts >= 0);
5293
5294        /*
5295         * Guesstimate how many blocks we can unmap without running the risk of
5296         * blowing out the transaction with a mix of EFIs and reflink
5297         * adjustments.
5298         */
5299        if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
5300                max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
5301        else
5302                max_len = len;
5303
5304        if (!(ifp->if_flags & XFS_IFEXTENTS) &&
5305            (error = xfs_iread_extents(tp, ip, whichfork)))
5306                return error;
5307        if (xfs_iext_count(ifp) == 0) {
5308                *rlen = 0;
5309                return 0;
5310        }
5311        XFS_STATS_INC(mp, xs_blk_unmap);
5312        isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
5313        end = start + len;
5314
5315        if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) {
5316                *rlen = 0;
5317                return 0;
5318        }
5319        end--;
5320
5321        logflags = 0;
5322        if (ifp->if_flags & XFS_IFBROOT) {
5323                ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
5324                cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5325                cur->bc_ino.flags = 0;
5326        } else
5327                cur = NULL;
5328
5329        if (isrt) {
5330                /*
5331                 * Synchronize by locking the bitmap inode.
5332                 */
5333                xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
5334                xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
5335                xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
5336                xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
5337        }
5338
5339        extno = 0;
5340        while (end != (xfs_fileoff_t)-1 && end >= start &&
5341               (nexts == 0 || extno < nexts) && max_len > 0) {
5342                /*
5343                 * Is the found extent after a hole in which end lives?
5344                 * Just back up to the previous extent, if so.
5345                 */
5346                if (got.br_startoff > end &&
5347                    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5348                        done = true;
5349                        break;
5350                }
5351                /*
5352                 * Is the last block of this extent before the range
5353                 * we're supposed to delete?  If so, we're done.
5354                 */
5355                end = XFS_FILEOFF_MIN(end,
5356                        got.br_startoff + got.br_blockcount - 1);
5357                if (end < start)
5358                        break;
5359                /*
5360                 * Then deal with the (possibly delayed) allocated space
5361                 * we found.
5362                 */
5363                del = got;
5364                wasdel = isnullstartblock(del.br_startblock);
5365
5366                /*
5367                 * Make sure we don't touch multiple AGF headers out of order
5368                 * in a single transaction, as that could cause AB-BA deadlocks.
5369                 */
5370                if (!wasdel && !isrt) {
5371                        agno = XFS_FSB_TO_AGNO(mp, del.br_startblock);
5372                        if (prev_agno != NULLAGNUMBER && prev_agno > agno)
5373                                break;
5374                        prev_agno = agno;
5375                }
5376                if (got.br_startoff < start) {
5377                        del.br_startoff = start;
5378                        del.br_blockcount -= start - got.br_startoff;
5379                        if (!wasdel)
5380                                del.br_startblock += start - got.br_startoff;
5381                }
5382                if (del.br_startoff + del.br_blockcount > end + 1)
5383                        del.br_blockcount = end + 1 - del.br_startoff;
5384
5385                /* How much can we safely unmap? */
5386                if (max_len < del.br_blockcount) {
5387                        del.br_startoff += del.br_blockcount - max_len;
5388                        if (!wasdel)
5389                                del.br_startblock += del.br_blockcount - max_len;
5390                        del.br_blockcount = max_len;
5391                }
5392
5393                if (!isrt)
5394                        goto delete;
5395
5396                sum = del.br_startblock + del.br_blockcount;
5397                div_u64_rem(sum, mp->m_sb.sb_rextsize, &mod);
5398                if (mod) {
5399                        /*
5400                         * Realtime extent not lined up at the end.
5401                         * The extent could have been split into written
5402                         * and unwritten pieces, or we could just be
5403                         * unmapping part of it.  But we can't really
5404                         * get rid of part of a realtime extent.
5405                         */
5406                        if (del.br_state == XFS_EXT_UNWRITTEN) {
5407                                /*
5408                                 * This piece is unwritten, or we're not
5409                                 * using unwritten extents.  Skip over it.
5410                                 */
5411                                ASSERT(end >= mod);
5412                                end -= mod > del.br_blockcount ?
5413                                        del.br_blockcount : mod;
5414                                if (end < got.br_startoff &&
5415                                    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5416                                        done = true;
5417                                        break;
5418                                }
5419                                continue;
5420                        }
5421                        /*
5422                         * It's written, turn it unwritten.
5423                         * This is better than zeroing it.
5424                         */
5425                        ASSERT(del.br_state == XFS_EXT_NORM);
5426                        ASSERT(tp->t_blk_res > 0);
5427                        /*
5428                         * If this spans a realtime extent boundary,
5429                         * chop it back to the start of the one we end at.
5430                         */
5431                        if (del.br_blockcount > mod) {
5432                                del.br_startoff += del.br_blockcount - mod;
5433                                del.br_startblock += del.br_blockcount - mod;
5434                                del.br_blockcount = mod;
5435                        }
5436                        del.br_state = XFS_EXT_UNWRITTEN;
5437                        error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5438                                        whichfork, &icur, &cur, &del,
5439                                        &logflags);
5440                        if (error)
5441                                goto error0;
5442                        goto nodelete;
5443                }
5444                div_u64_rem(del.br_startblock, mp->m_sb.sb_rextsize, &mod);
5445                if (mod) {
5446                        xfs_extlen_t off = mp->m_sb.sb_rextsize - mod;
5447
5448                        /*
5449                         * Realtime extent is lined up at the end but not
5450                         * at the front.  We'll get rid of full extents if
5451                         * we can.
5452                         */
5453                        if (del.br_blockcount > off) {
5454                                del.br_blockcount -= off;
5455                                del.br_startoff += off;
5456                                del.br_startblock += off;
5457                        } else if (del.br_startoff == start &&
5458                                   (del.br_state == XFS_EXT_UNWRITTEN ||
5459                                    tp->t_blk_res == 0)) {
5460                                /*
5461                                 * Can't make it unwritten.  There isn't
5462                                 * a full extent here so just skip it.
5463                                 */
5464                                ASSERT(end >= del.br_blockcount);
5465                                end -= del.br_blockcount;
5466                                if (got.br_startoff > end &&
5467                                    !xfs_iext_prev_extent(ifp, &icur, &got)) {
5468                                        done = true;
5469                                        break;
5470                                }
5471                                continue;
5472                        } else if (del.br_state == XFS_EXT_UNWRITTEN) {
5473                                struct xfs_bmbt_irec    prev;
5474                                xfs_fileoff_t           unwrite_start;
5475
5476                                /*
5477                                 * This one is already unwritten.
5478                                 * It must have a written left neighbor.
5479                                 * Unwrite the killed part of that one and
5480                                 * try again.
5481                                 */
5482                                if (!xfs_iext_prev_extent(ifp, &icur, &prev))
5483                                        ASSERT(0);
5484                                ASSERT(prev.br_state == XFS_EXT_NORM);
5485                                ASSERT(!isnullstartblock(prev.br_startblock));
5486                                ASSERT(del.br_startblock ==
5487                                       prev.br_startblock + prev.br_blockcount);
5488                                unwrite_start = max3(start,
5489                                                     del.br_startoff - mod,
5490                                                     prev.br_startoff);
5491                                mod = unwrite_start - prev.br_startoff;
5492                                prev.br_startoff = unwrite_start;
5493                                prev.br_startblock += mod;
5494                                prev.br_blockcount -= mod;
5495                                prev.br_state = XFS_EXT_UNWRITTEN;
5496                                error = xfs_bmap_add_extent_unwritten_real(tp,
5497                                                ip, whichfork, &icur, &cur,
5498                                                &prev, &logflags);
5499                                if (error)
5500                                        goto error0;
5501                                goto nodelete;
5502                        } else {
5503                                ASSERT(del.br_state == XFS_EXT_NORM);
5504                                del.br_state = XFS_EXT_UNWRITTEN;
5505                                error = xfs_bmap_add_extent_unwritten_real(tp,
5506                                                ip, whichfork, &icur, &cur,
5507                                                &del, &logflags);
5508                                if (error)
5509                                        goto error0;
5510                                goto nodelete;
5511                        }
5512                }
5513
5514delete:
5515                if (wasdel) {
5516                        error = xfs_bmap_del_extent_delay(ip, whichfork, &icur,
5517                                        &got, &del);
5518                } else {
5519                        error = xfs_bmap_del_extent_real(ip, tp, &icur, cur,
5520                                        &del, &tmp_logflags, whichfork,
5521                                        flags);
5522                        logflags |= tmp_logflags;
5523                }
5524
5525                if (error)
5526                        goto error0;
5527
5528                max_len -= del.br_blockcount;
5529                end = del.br_startoff - 1;
5530nodelete:
5531                /*
5532                 * If not done go on to the next (previous) record.
5533                 */
5534                if (end != (xfs_fileoff_t)-1 && end >= start) {
5535                        if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5536                            (got.br_startoff > end &&
5537                             !xfs_iext_prev_extent(ifp, &icur, &got))) {
5538                                done = true;
5539                                break;
5540                        }
5541                        extno++;
5542                }
5543        }
5544        if (done || end == (xfs_fileoff_t)-1 || end < start)
5545                *rlen = 0;
5546        else
5547                *rlen = end - start + 1;
5548
5549        /*
5550         * Convert to a btree if necessary.
5551         */
5552        if (xfs_bmap_needs_btree(ip, whichfork)) {
5553                ASSERT(cur == NULL);
5554                error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
5555                                &tmp_logflags, whichfork);
5556                logflags |= tmp_logflags;
5557        } else {
5558                error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags,
5559                        whichfork);
5560        }
5561
5562error0:
5563        /*
5564         * Log everything.  Do this after conversion, there's no point in
5565         * logging the extent records if we've converted to btree format.
5566         */
5567        if ((logflags & xfs_ilog_fext(whichfork)) &&
5568            ifp->if_format != XFS_DINODE_FMT_EXTENTS)
5569                logflags &= ~xfs_ilog_fext(whichfork);
5570        else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
5571                 ifp->if_format != XFS_DINODE_FMT_BTREE)
5572                logflags &= ~xfs_ilog_fbroot(whichfork);
5573        /*
5574         * Log inode even in the error case, if the transaction
5575         * is dirty we'll need to shut down the filesystem.
5576         */
5577        if (logflags)
5578                xfs_trans_log_inode(tp, ip, logflags);
5579        if (cur) {
5580                if (!error)
5581                        cur->bc_ino.allocated = 0;
5582                xfs_btree_del_cursor(cur, error);
5583        }
5584        return error;
5585}
5586
5587/* Unmap a range of a file. */
5588int
5589xfs_bunmapi(
5590        xfs_trans_t             *tp,
5591        struct xfs_inode        *ip,
5592        xfs_fileoff_t           bno,
5593        xfs_filblks_t           len,
5594        int                     flags,
5595        xfs_extnum_t            nexts,
5596        int                     *done)
5597{
5598        int                     error;
5599
5600        error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts);
5601        *done = (len == 0);
5602        return error;
5603}
5604
5605/*
5606 * Determine whether an extent shift can be accomplished by a merge with the
5607 * extent that precedes the target hole of the shift.
5608 */
5609STATIC bool
5610xfs_bmse_can_merge(
5611        struct xfs_bmbt_irec    *left,  /* preceding extent */
5612        struct xfs_bmbt_irec    *got,   /* current extent to shift */
5613        xfs_fileoff_t           shift)  /* shift fsb */
5614{
5615        xfs_fileoff_t           startoff;
5616
5617        startoff = got->br_startoff - shift;
5618
5619        /*
5620         * The extent, once shifted, must be adjacent in-file and on-disk with
5621         * the preceding extent.
5622         */
5623        if ((left->br_startoff + left->br_blockcount != startoff) ||
5624            (left->br_startblock + left->br_blockcount != got->br_startblock) ||
5625            (left->br_state != got->br_state) ||
5626            (left->br_blockcount + got->br_blockcount > MAXEXTLEN))
5627                return false;
5628
5629        return true;
5630}
5631
5632/*
5633 * A bmap extent shift adjusts the file offset of an extent to fill a preceding
5634 * hole in the file. If an extent shift would result in the extent being fully
5635 * adjacent to the extent that currently precedes the hole, we can merge with
5636 * the preceding extent rather than do the shift.
5637 *
5638 * This function assumes the caller has verified a shift-by-merge is possible
5639 * with the provided extents via xfs_bmse_can_merge().
5640 */
5641STATIC int
5642xfs_bmse_merge(
5643        struct xfs_trans                *tp,
5644        struct xfs_inode                *ip,
5645        int                             whichfork,
5646        xfs_fileoff_t                   shift,          /* shift fsb */
5647        struct xfs_iext_cursor          *icur,
5648        struct xfs_bmbt_irec            *got,           /* extent to shift */
5649        struct xfs_bmbt_irec            *left,          /* preceding extent */
5650        struct xfs_btree_cur            *cur,
5651        int                             *logflags)      /* output */
5652{
5653        struct xfs_ifork                *ifp = XFS_IFORK_PTR(ip, whichfork);
5654        struct xfs_bmbt_irec            new;
5655        xfs_filblks_t                   blockcount;
5656        int                             error, i;
5657        struct xfs_mount                *mp = ip->i_mount;
5658
5659        blockcount = left->br_blockcount + got->br_blockcount;
5660
5661        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5662        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5663        ASSERT(xfs_bmse_can_merge(left, got, shift));
5664
5665        new = *left;
5666        new.br_blockcount = blockcount;
5667
5668        /*
5669         * Update the on-disk extent count, the btree if necessary and log the
5670         * inode.
5671         */
5672        ifp->if_nextents--;
5673        *logflags |= XFS_ILOG_CORE;
5674        if (!cur) {
5675                *logflags |= XFS_ILOG_DEXT;
5676                goto done;
5677        }
5678
5679        /* lookup and remove the extent to merge */
5680        error = xfs_bmbt_lookup_eq(cur, got, &i);
5681        if (error)
5682                return error;
5683        if (XFS_IS_CORRUPT(mp, i != 1))
5684                return -EFSCORRUPTED;
5685
5686        error = xfs_btree_delete(cur, &i);
5687        if (error)
5688                return error;
5689        if (XFS_IS_CORRUPT(mp, i != 1))
5690                return -EFSCORRUPTED;
5691
5692        /* lookup and update size of the previous extent */
5693        error = xfs_bmbt_lookup_eq(cur, left, &i);
5694        if (error)
5695                return error;
5696        if (XFS_IS_CORRUPT(mp, i != 1))
5697                return -EFSCORRUPTED;
5698
5699        error = xfs_bmbt_update(cur, &new);
5700        if (error)
5701                return error;
5702
5703        /* change to extent format if required after extent removal */
5704        error = xfs_bmap_btree_to_extents(tp, ip, cur, logflags, whichfork);
5705        if (error)
5706                return error;
5707
5708done:
5709        xfs_iext_remove(ip, icur, 0);
5710        xfs_iext_prev(ifp, icur);
5711        xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5712                        &new);
5713
5714        /* update reverse mapping. rmap functions merge the rmaps for us */
5715        xfs_rmap_unmap_extent(tp, ip, whichfork, got);
5716        memcpy(&new, got, sizeof(new));
5717        new.br_startoff = left->br_startoff + left->br_blockcount;
5718        xfs_rmap_map_extent(tp, ip, whichfork, &new);
5719        return 0;
5720}
5721
5722static int
5723xfs_bmap_shift_update_extent(
5724        struct xfs_trans        *tp,
5725        struct xfs_inode        *ip,
5726        int                     whichfork,
5727        struct xfs_iext_cursor  *icur,
5728        struct xfs_bmbt_irec    *got,
5729        struct xfs_btree_cur    *cur,
5730        int                     *logflags,
5731        xfs_fileoff_t           startoff)
5732{
5733        struct xfs_mount        *mp = ip->i_mount;
5734        struct xfs_bmbt_irec    prev = *got;
5735        int                     error, i;
5736
5737        *logflags |= XFS_ILOG_CORE;
5738
5739        got->br_startoff = startoff;
5740
5741        if (cur) {
5742                error = xfs_bmbt_lookup_eq(cur, &prev, &i);
5743                if (error)
5744                        return error;
5745                if (XFS_IS_CORRUPT(mp, i != 1))
5746                        return -EFSCORRUPTED;
5747
5748                error = xfs_bmbt_update(cur, got);
5749                if (error)
5750                        return error;
5751        } else {
5752                *logflags |= XFS_ILOG_DEXT;
5753        }
5754
5755        xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5756                        got);
5757
5758        /* update reverse mapping */
5759        xfs_rmap_unmap_extent(tp, ip, whichfork, &prev);
5760        xfs_rmap_map_extent(tp, ip, whichfork, got);
5761        return 0;
5762}
5763
5764int
5765xfs_bmap_collapse_extents(
5766        struct xfs_trans        *tp,
5767        struct xfs_inode        *ip,
5768        xfs_fileoff_t           *next_fsb,
5769        xfs_fileoff_t           offset_shift_fsb,
5770        bool                    *done)
5771{
5772        int                     whichfork = XFS_DATA_FORK;
5773        struct xfs_mount        *mp = ip->i_mount;
5774        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
5775        struct xfs_btree_cur    *cur = NULL;
5776        struct xfs_bmbt_irec    got, prev;
5777        struct xfs_iext_cursor  icur;
5778        xfs_fileoff_t           new_startoff;
5779        int                     error = 0;
5780        int                     logflags = 0;
5781
5782        if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
5783            XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
5784                return -EFSCORRUPTED;
5785        }
5786
5787        if (XFS_FORCED_SHUTDOWN(mp))
5788                return -EIO;
5789
5790        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
5791
5792        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5793                error = xfs_iread_extents(tp, ip, whichfork);
5794                if (error)
5795                        return error;
5796        }
5797
5798        if (ifp->if_flags & XFS_IFBROOT) {
5799                cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5800                cur->bc_ino.flags = 0;
5801        }
5802
5803        if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
5804                *done = true;
5805                goto del_cursor;
5806        }
5807        if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
5808                error = -EFSCORRUPTED;
5809                goto del_cursor;
5810        }
5811
5812        new_startoff = got.br_startoff - offset_shift_fsb;
5813        if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) {
5814                if (new_startoff < prev.br_startoff + prev.br_blockcount) {
5815                        error = -EINVAL;
5816                        goto del_cursor;
5817                }
5818
5819                if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) {
5820                        error = xfs_bmse_merge(tp, ip, whichfork,
5821                                        offset_shift_fsb, &icur, &got, &prev,
5822                                        cur, &logflags);
5823                        if (error)
5824                                goto del_cursor;
5825                        goto done;
5826                }
5827        } else {
5828                if (got.br_startoff < offset_shift_fsb) {
5829                        error = -EINVAL;
5830                        goto del_cursor;
5831                }
5832        }
5833
5834        error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
5835                        cur, &logflags, new_startoff);
5836        if (error)
5837                goto del_cursor;
5838
5839done:
5840        if (!xfs_iext_next_extent(ifp, &icur, &got)) {
5841                *done = true;
5842                goto del_cursor;
5843        }
5844
5845        *next_fsb = got.br_startoff;
5846del_cursor:
5847        if (cur)
5848                xfs_btree_del_cursor(cur, error);
5849        if (logflags)
5850                xfs_trans_log_inode(tp, ip, logflags);
5851        return error;
5852}
5853
5854/* Make sure we won't be right-shifting an extent past the maximum bound. */
5855int
5856xfs_bmap_can_insert_extents(
5857        struct xfs_inode        *ip,
5858        xfs_fileoff_t           off,
5859        xfs_fileoff_t           shift)
5860{
5861        struct xfs_bmbt_irec    got;
5862        int                     is_empty;
5863        int                     error = 0;
5864
5865        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5866
5867        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
5868                return -EIO;
5869
5870        xfs_ilock(ip, XFS_ILOCK_EXCL);
5871        error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &got, &is_empty);
5872        if (!error && !is_empty && got.br_startoff >= off &&
5873            ((got.br_startoff + shift) & BMBT_STARTOFF_MASK) < got.br_startoff)
5874                error = -EINVAL;
5875        xfs_iunlock(ip, XFS_ILOCK_EXCL);
5876
5877        return error;
5878}
5879
5880int
5881xfs_bmap_insert_extents(
5882        struct xfs_trans        *tp,
5883        struct xfs_inode        *ip,
5884        xfs_fileoff_t           *next_fsb,
5885        xfs_fileoff_t           offset_shift_fsb,
5886        bool                    *done,
5887        xfs_fileoff_t           stop_fsb)
5888{
5889        int                     whichfork = XFS_DATA_FORK;
5890        struct xfs_mount        *mp = ip->i_mount;
5891        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
5892        struct xfs_btree_cur    *cur = NULL;
5893        struct xfs_bmbt_irec    got, next;
5894        struct xfs_iext_cursor  icur;
5895        xfs_fileoff_t           new_startoff;
5896        int                     error = 0;
5897        int                     logflags = 0;
5898
5899        if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
5900            XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
5901                return -EFSCORRUPTED;
5902        }
5903
5904        if (XFS_FORCED_SHUTDOWN(mp))
5905                return -EIO;
5906
5907        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
5908
5909        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5910                error = xfs_iread_extents(tp, ip, whichfork);
5911                if (error)
5912                        return error;
5913        }
5914
5915        if (ifp->if_flags & XFS_IFBROOT) {
5916                cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5917                cur->bc_ino.flags = 0;
5918        }
5919
5920        if (*next_fsb == NULLFSBLOCK) {
5921                xfs_iext_last(ifp, &icur);
5922                if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5923                    stop_fsb > got.br_startoff) {
5924                        *done = true;
5925                        goto del_cursor;
5926                }
5927        } else {
5928                if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
5929                        *done = true;
5930                        goto del_cursor;
5931                }
5932        }
5933        if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
5934                error = -EFSCORRUPTED;
5935                goto del_cursor;
5936        }
5937
5938        if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) {
5939                error = -EFSCORRUPTED;
5940                goto del_cursor;
5941        }
5942
5943        new_startoff = got.br_startoff + offset_shift_fsb;
5944        if (xfs_iext_peek_next_extent(ifp, &icur, &next)) {
5945                if (new_startoff + got.br_blockcount > next.br_startoff) {
5946                        error = -EINVAL;
5947                        goto del_cursor;
5948                }
5949
5950                /*
5951                 * Unlike a left shift (which involves a hole punch), a right
5952                 * shift does not modify extent neighbors in any way.  We should
5953                 * never find mergeable extents in this scenario.  Check anyways
5954                 * and warn if we encounter two extents that could be one.
5955                 */
5956                if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb))
5957                        WARN_ON_ONCE(1);
5958        }
5959
5960        error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
5961                        cur, &logflags, new_startoff);
5962        if (error)
5963                goto del_cursor;
5964
5965        if (!xfs_iext_prev_extent(ifp, &icur, &got) ||
5966            stop_fsb >= got.br_startoff + got.br_blockcount) {
5967                *done = true;
5968                goto del_cursor;
5969        }
5970
5971        *next_fsb = got.br_startoff;
5972del_cursor:
5973        if (cur)
5974                xfs_btree_del_cursor(cur, error);
5975        if (logflags)
5976                xfs_trans_log_inode(tp, ip, logflags);
5977        return error;
5978}
5979
5980/*
5981 * Splits an extent into two extents at split_fsb block such that it is the
5982 * first block of the current_ext. @ext is a target extent to be split.
5983 * @split_fsb is a block where the extents is split.  If split_fsb lies in a
5984 * hole or the first block of extents, just return 0.
5985 */
5986int
5987xfs_bmap_split_extent(
5988        struct xfs_trans        *tp,
5989        struct xfs_inode        *ip,
5990        xfs_fileoff_t           split_fsb)
5991{
5992        int                             whichfork = XFS_DATA_FORK;
5993        struct xfs_ifork                *ifp = XFS_IFORK_PTR(ip, whichfork);
5994        struct xfs_btree_cur            *cur = NULL;
5995        struct xfs_bmbt_irec            got;
5996        struct xfs_bmbt_irec            new; /* split extent */
5997        struct xfs_mount                *mp = ip->i_mount;
5998        xfs_fsblock_t                   gotblkcnt; /* new block count for got */
5999        struct xfs_iext_cursor          icur;
6000        int                             error = 0;
6001        int                             logflags = 0;
6002        int                             i = 0;
6003
6004        if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
6005            XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
6006                return -EFSCORRUPTED;
6007        }
6008
6009        if (XFS_FORCED_SHUTDOWN(mp))
6010                return -EIO;
6011
6012        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
6013                /* Read in all the extents */
6014                error = xfs_iread_extents(tp, ip, whichfork);
6015                if (error)
6016                        return error;
6017        }
6018
6019        /*
6020         * If there are not extents, or split_fsb lies in a hole we are done.
6021         */
6022        if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &icur, &got) ||
6023            got.br_startoff >= split_fsb)
6024                return 0;
6025
6026        gotblkcnt = split_fsb - got.br_startoff;
6027        new.br_startoff = split_fsb;
6028        new.br_startblock = got.br_startblock + gotblkcnt;
6029        new.br_blockcount = got.br_blockcount - gotblkcnt;
6030        new.br_state = got.br_state;
6031
6032        if (ifp->if_flags & XFS_IFBROOT) {
6033                cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
6034                cur->bc_ino.flags = 0;
6035                error = xfs_bmbt_lookup_eq(cur, &got, &i);
6036                if (error)
6037                        goto del_cursor;
6038                if (XFS_IS_CORRUPT(mp, i != 1)) {
6039                        error = -EFSCORRUPTED;
6040                        goto del_cursor;
6041                }
6042        }
6043
6044        got.br_blockcount = gotblkcnt;
6045        xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), &icur,
6046                        &got);
6047
6048        logflags = XFS_ILOG_CORE;
6049        if (cur) {
6050                error = xfs_bmbt_update(cur, &got);
6051                if (error)
6052                        goto del_cursor;
6053        } else
6054                logflags |= XFS_ILOG_DEXT;
6055
6056        /* Add new extent */
6057        xfs_iext_next(ifp, &icur);
6058        xfs_iext_insert(ip, &icur, &new, 0);
6059        ifp->if_nextents++;
6060
6061        if (cur) {
6062                error = xfs_bmbt_lookup_eq(cur, &new, &i);
6063                if (error)
6064                        goto del_cursor;
6065                if (XFS_IS_CORRUPT(mp, i != 0)) {
6066                        error = -EFSCORRUPTED;
6067                        goto del_cursor;
6068                }
6069                error = xfs_btree_insert(cur, &i);
6070                if (error)
6071                        goto del_cursor;
6072                if (XFS_IS_CORRUPT(mp, i != 1)) {
6073                        error = -EFSCORRUPTED;
6074                        goto del_cursor;
6075                }
6076        }
6077
6078        /*
6079         * Convert to a btree if necessary.
6080         */
6081        if (xfs_bmap_needs_btree(ip, whichfork)) {
6082                int tmp_logflags; /* partial log flag return val */
6083
6084                ASSERT(cur == NULL);
6085                error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
6086                                &tmp_logflags, whichfork);
6087                logflags |= tmp_logflags;
6088        }
6089
6090del_cursor:
6091        if (cur) {
6092                cur->bc_ino.allocated = 0;
6093                xfs_btree_del_cursor(cur, error);
6094        }
6095
6096        if (logflags)
6097                xfs_trans_log_inode(tp, ip, logflags);
6098        return error;
6099}
6100
6101/* Deferred mapping is only for real extents in the data fork. */
6102static bool
6103xfs_bmap_is_update_needed(
6104        struct xfs_bmbt_irec    *bmap)
6105{
6106        return  bmap->br_startblock != HOLESTARTBLOCK &&
6107                bmap->br_startblock != DELAYSTARTBLOCK;
6108}
6109
6110/* Record a bmap intent. */
6111static int
6112__xfs_bmap_add(
6113        struct xfs_trans                *tp,
6114        enum xfs_bmap_intent_type       type,
6115        struct xfs_inode                *ip,
6116        int                             whichfork,
6117        struct xfs_bmbt_irec            *bmap)
6118{
6119        struct xfs_bmap_intent          *bi;
6120
6121        trace_xfs_bmap_defer(tp->t_mountp,
6122                        XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),
6123                        type,
6124                        XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock),
6125                        ip->i_ino, whichfork,
6126                        bmap->br_startoff,
6127                        bmap->br_blockcount,
6128                        bmap->br_state);
6129
6130        bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_NOFS);
6131        INIT_LIST_HEAD(&bi->bi_list);
6132        bi->bi_type = type;
6133        bi->bi_owner = ip;
6134        bi->bi_whichfork = whichfork;
6135        bi->bi_bmap = *bmap;
6136
6137        xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list);
6138        return 0;
6139}
6140
6141/* Map an extent into a file. */
6142void
6143xfs_bmap_map_extent(
6144        struct xfs_trans        *tp,
6145        struct xfs_inode        *ip,
6146        struct xfs_bmbt_irec    *PREV)
6147{
6148        if (!xfs_bmap_is_update_needed(PREV))
6149                return;
6150
6151        __xfs_bmap_add(tp, XFS_BMAP_MAP, ip, XFS_DATA_FORK, PREV);
6152}
6153
6154/* Unmap an extent out of a file. */
6155void
6156xfs_bmap_unmap_extent(
6157        struct xfs_trans        *tp,
6158        struct xfs_inode        *ip,
6159        struct xfs_bmbt_irec    *PREV)
6160{
6161        if (!xfs_bmap_is_update_needed(PREV))
6162                return;
6163
6164        __xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, XFS_DATA_FORK, PREV);
6165}
6166
6167/*
6168 * Process one of the deferred bmap operations.  We pass back the
6169 * btree cursor to maintain our lock on the bmapbt between calls.
6170 */
6171int
6172xfs_bmap_finish_one(
6173        struct xfs_trans                *tp,
6174        struct xfs_inode                *ip,
6175        enum xfs_bmap_intent_type       type,
6176        int                             whichfork,
6177        xfs_fileoff_t                   startoff,
6178        xfs_fsblock_t                   startblock,
6179        xfs_filblks_t                   *blockcount,
6180        xfs_exntst_t                    state)
6181{
6182        int                             error = 0;
6183
6184        ASSERT(tp->t_firstblock == NULLFSBLOCK);
6185
6186        trace_xfs_bmap_deferred(tp->t_mountp,
6187                        XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
6188                        XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
6189                        ip->i_ino, whichfork, startoff, *blockcount, state);
6190
6191        if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK))
6192                return -EFSCORRUPTED;
6193
6194        if (XFS_TEST_ERROR(false, tp->t_mountp,
6195                        XFS_ERRTAG_BMAP_FINISH_ONE))
6196                return -EIO;
6197
6198        switch (type) {
6199        case XFS_BMAP_MAP:
6200                error = xfs_bmapi_remap(tp, ip, startoff, *blockcount,
6201                                startblock, 0);
6202                *blockcount = 0;
6203                break;
6204        case XFS_BMAP_UNMAP:
6205                error = __xfs_bunmapi(tp, ip, startoff, blockcount,
6206                                XFS_BMAPI_REMAP, 1);
6207                break;
6208        default:
6209                ASSERT(0);
6210                error = -EFSCORRUPTED;
6211        }
6212
6213        return error;
6214}
6215
6216/* Check that an inode's extent does not have invalid flags or bad ranges. */
6217xfs_failaddr_t
6218xfs_bmap_validate_extent(
6219        struct xfs_inode        *ip,
6220        int                     whichfork,
6221        struct xfs_bmbt_irec    *irec)
6222{
6223        struct xfs_mount        *mp = ip->i_mount;
6224        xfs_fsblock_t           endfsb;
6225        bool                    isrt;
6226
6227        isrt = XFS_IS_REALTIME_INODE(ip);
6228        endfsb = irec->br_startblock + irec->br_blockcount - 1;
6229        if (isrt && whichfork == XFS_DATA_FORK) {
6230                if (!xfs_verify_rtbno(mp, irec->br_startblock))
6231                        return __this_address;
6232                if (!xfs_verify_rtbno(mp, endfsb))
6233                        return __this_address;
6234        } else {
6235                if (!xfs_verify_fsbno(mp, irec->br_startblock))
6236                        return __this_address;
6237                if (!xfs_verify_fsbno(mp, endfsb))
6238                        return __this_address;
6239                if (XFS_FSB_TO_AGNO(mp, irec->br_startblock) !=
6240                    XFS_FSB_TO_AGNO(mp, endfsb))
6241                        return __this_address;
6242        }
6243        if (irec->br_state != XFS_EXT_NORM && whichfork != XFS_DATA_FORK)
6244                return __this_address;
6245        return NULL;
6246}
6247