linux/fs/xfs/libxfs/xfs_bmap.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_shared.h"
  21#include "xfs_format.h"
  22#include "xfs_log_format.h"
  23#include "xfs_trans_resv.h"
  24#include "xfs_bit.h"
  25#include "xfs_sb.h"
  26#include "xfs_mount.h"
  27#include "xfs_defer.h"
  28#include "xfs_da_format.h"
  29#include "xfs_da_btree.h"
  30#include "xfs_dir2.h"
  31#include "xfs_inode.h"
  32#include "xfs_btree.h"
  33#include "xfs_trans.h"
  34#include "xfs_inode_item.h"
  35#include "xfs_extfree_item.h"
  36#include "xfs_alloc.h"
  37#include "xfs_bmap.h"
  38#include "xfs_bmap_util.h"
  39#include "xfs_bmap_btree.h"
  40#include "xfs_rtalloc.h"
  41#include "xfs_error.h"
  42#include "xfs_quota.h"
  43#include "xfs_trans_space.h"
  44#include "xfs_buf_item.h"
  45#include "xfs_trace.h"
  46#include "xfs_symlink.h"
  47#include "xfs_attr_leaf.h"
  48#include "xfs_filestream.h"
  49#include "xfs_rmap.h"
  50#include "xfs_ag_resv.h"
  51#include "xfs_refcount.h"
  52
  53
  54kmem_zone_t             *xfs_bmap_free_item_zone;
  55
  56/*
  57 * Miscellaneous helper functions
  58 */
  59
  60/*
  61 * Compute and fill in the value of the maximum depth of a bmap btree
  62 * in this filesystem.  Done once, during mount.
  63 */
  64void
  65xfs_bmap_compute_maxlevels(
  66        xfs_mount_t     *mp,            /* file system mount structure */
  67        int             whichfork)      /* data or attr fork */
  68{
  69        int             level;          /* btree level */
  70        uint            maxblocks;      /* max blocks at this level */
  71        uint            maxleafents;    /* max leaf entries possible */
  72        int             maxrootrecs;    /* max records in root block */
  73        int             minleafrecs;    /* min records in leaf block */
  74        int             minnoderecs;    /* min records in node block */
  75        int             sz;             /* root block size */
  76
  77        /*
  78         * The maximum number of extents in a file, hence the maximum
  79         * number of leaf entries, is controlled by the type of di_nextents
  80         * (a signed 32-bit number, xfs_extnum_t), or by di_anextents
  81         * (a signed 16-bit number, xfs_aextnum_t).
  82         *
  83         * Note that we can no longer assume that if we are in ATTR1 that
  84         * the fork offset of all the inodes will be
  85         * (xfs_default_attroffset(ip) >> 3) because we could have mounted
  86         * with ATTR2 and then mounted back with ATTR1, keeping the
  87         * di_forkoff's fixed but probably at various positions. Therefore,
  88         * for both ATTR1 and ATTR2 we have to assume the worst case scenario
  89         * of a minimum size available.
  90         */
  91        if (whichfork == XFS_DATA_FORK) {
  92                maxleafents = MAXEXTNUM;
  93                sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
  94        } else {
  95                maxleafents = MAXAEXTNUM;
  96                sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
  97        }
  98        maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
  99        minleafrecs = mp->m_bmap_dmnr[0];
 100        minnoderecs = mp->m_bmap_dmnr[1];
 101        maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
 102        for (level = 1; maxblocks > 1; level++) {
 103                if (maxblocks <= maxrootrecs)
 104                        maxblocks = 1;
 105                else
 106                        maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
 107        }
 108        mp->m_bm_maxlevels[whichfork] = level;
 109}
 110
 111STATIC int                              /* error */
 112xfs_bmbt_lookup_eq(
 113        struct xfs_btree_cur    *cur,
 114        xfs_fileoff_t           off,
 115        xfs_fsblock_t           bno,
 116        xfs_filblks_t           len,
 117        int                     *stat)  /* success/failure */
 118{
 119        cur->bc_rec.b.br_startoff = off;
 120        cur->bc_rec.b.br_startblock = bno;
 121        cur->bc_rec.b.br_blockcount = len;
 122        return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
 123}
 124
 125STATIC int                              /* error */
 126xfs_bmbt_lookup_ge(
 127        struct xfs_btree_cur    *cur,
 128        xfs_fileoff_t           off,
 129        xfs_fsblock_t           bno,
 130        xfs_filblks_t           len,
 131        int                     *stat)  /* success/failure */
 132{
 133        cur->bc_rec.b.br_startoff = off;
 134        cur->bc_rec.b.br_startblock = bno;
 135        cur->bc_rec.b.br_blockcount = len;
 136        return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
 137}
 138
 139/*
 140 * Check if the inode needs to be converted to btree format.
 141 */
 142static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
 143{
 144        return whichfork != XFS_COW_FORK &&
 145                XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
 146                XFS_IFORK_NEXTENTS(ip, whichfork) >
 147                        XFS_IFORK_MAXEXT(ip, whichfork);
 148}
 149
 150/*
 151 * Check if the inode should be converted to extent format.
 152 */
 153static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
 154{
 155        return whichfork != XFS_COW_FORK &&
 156                XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
 157                XFS_IFORK_NEXTENTS(ip, whichfork) <=
 158                        XFS_IFORK_MAXEXT(ip, whichfork);
 159}
 160
 161/*
 162 * Update the record referred to by cur to the value given
 163 * by [off, bno, len, state].
 164 * This either works (return 0) or gets an EFSCORRUPTED error.
 165 */
 166STATIC int
 167xfs_bmbt_update(
 168        struct xfs_btree_cur    *cur,
 169        xfs_fileoff_t           off,
 170        xfs_fsblock_t           bno,
 171        xfs_filblks_t           len,
 172        xfs_exntst_t            state)
 173{
 174        union xfs_btree_rec     rec;
 175
 176        xfs_bmbt_disk_set_allf(&rec.bmbt, off, bno, len, state);
 177        return xfs_btree_update(cur, &rec);
 178}
 179
 180/*
 181 * Compute the worst-case number of indirect blocks that will be used
 182 * for ip's delayed extent of length "len".
 183 */
 184STATIC xfs_filblks_t
 185xfs_bmap_worst_indlen(
 186        xfs_inode_t     *ip,            /* incore inode pointer */
 187        xfs_filblks_t   len)            /* delayed extent length */
 188{
 189        int             level;          /* btree level number */
 190        int             maxrecs;        /* maximum record count at this level */
 191        xfs_mount_t     *mp;            /* mount structure */
 192        xfs_filblks_t   rval;           /* return value */
 193
 194        mp = ip->i_mount;
 195        maxrecs = mp->m_bmap_dmxr[0];
 196        for (level = 0, rval = 0;
 197             level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
 198             level++) {
 199                len += maxrecs - 1;
 200                do_div(len, maxrecs);
 201                rval += len;
 202                if (len == 1)
 203                        return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
 204                                level - 1;
 205                if (level == 0)
 206                        maxrecs = mp->m_bmap_dmxr[1];
 207        }
 208        return rval;
 209}
 210
 211/*
 212 * Calculate the default attribute fork offset for newly created inodes.
 213 */
 214uint
 215xfs_default_attroffset(
 216        struct xfs_inode        *ip)
 217{
 218        struct xfs_mount        *mp = ip->i_mount;
 219        uint                    offset;
 220
 221        if (mp->m_sb.sb_inodesize == 256) {
 222                offset = XFS_LITINO(mp, ip->i_d.di_version) -
 223                                XFS_BMDR_SPACE_CALC(MINABTPTRS);
 224        } else {
 225                offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
 226        }
 227
 228        ASSERT(offset < XFS_LITINO(mp, ip->i_d.di_version));
 229        return offset;
 230}
 231
 232/*
 233 * Helper routine to reset inode di_forkoff field when switching
 234 * attribute fork from local to extent format - we reset it where
 235 * possible to make space available for inline data fork extents.
 236 */
 237STATIC void
 238xfs_bmap_forkoff_reset(
 239        xfs_inode_t     *ip,
 240        int             whichfork)
 241{
 242        if (whichfork == XFS_ATTR_FORK &&
 243            ip->i_d.di_format != XFS_DINODE_FMT_DEV &&
 244            ip->i_d.di_format != XFS_DINODE_FMT_UUID &&
 245            ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
 246                uint    dfl_forkoff = xfs_default_attroffset(ip) >> 3;
 247
 248                if (dfl_forkoff > ip->i_d.di_forkoff)
 249                        ip->i_d.di_forkoff = dfl_forkoff;
 250        }
 251}
 252
 253#ifdef DEBUG
 254STATIC struct xfs_buf *
 255xfs_bmap_get_bp(
 256        struct xfs_btree_cur    *cur,
 257        xfs_fsblock_t           bno)
 258{
 259        struct xfs_log_item_desc *lidp;
 260        int                     i;
 261
 262        if (!cur)
 263                return NULL;
 264
 265        for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
 266                if (!cur->bc_bufs[i])
 267                        break;
 268                if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
 269                        return cur->bc_bufs[i];
 270        }
 271
 272        /* Chase down all the log items to see if the bp is there */
 273        list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) {
 274                struct xfs_buf_log_item *bip;
 275                bip = (struct xfs_buf_log_item *)lidp->lid_item;
 276                if (bip->bli_item.li_type == XFS_LI_BUF &&
 277                    XFS_BUF_ADDR(bip->bli_buf) == bno)
 278                        return bip->bli_buf;
 279        }
 280
 281        return NULL;
 282}
 283
 284STATIC void
 285xfs_check_block(
 286        struct xfs_btree_block  *block,
 287        xfs_mount_t             *mp,
 288        int                     root,
 289        short                   sz)
 290{
 291        int                     i, j, dmxr;
 292        __be64                  *pp, *thispa;   /* pointer to block address */
 293        xfs_bmbt_key_t          *prevp, *keyp;
 294
 295        ASSERT(be16_to_cpu(block->bb_level) > 0);
 296
 297        prevp = NULL;
 298        for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
 299                dmxr = mp->m_bmap_dmxr[0];
 300                keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
 301
 302                if (prevp) {
 303                        ASSERT(be64_to_cpu(prevp->br_startoff) <
 304                               be64_to_cpu(keyp->br_startoff));
 305                }
 306                prevp = keyp;
 307
 308                /*
 309                 * Compare the block numbers to see if there are dups.
 310                 */
 311                if (root)
 312                        pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
 313                else
 314                        pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
 315
 316                for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
 317                        if (root)
 318                                thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
 319                        else
 320                                thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
 321                        if (*thispa == *pp) {
 322                                xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
 323                                        __func__, j, i,
 324                                        (unsigned long long)be64_to_cpu(*thispa));
 325                                panic("%s: ptrs are equal in node\n",
 326                                        __func__);
 327                        }
 328                }
 329        }
 330}
 331
 332/*
 333 * Check that the extents for the inode ip are in the right order in all
 334 * btree leaves. THis becomes prohibitively expensive for large extent count
 335 * files, so don't bother with inodes that have more than 10,000 extents in
 336 * them. The btree record ordering checks will still be done, so for such large
 337 * bmapbt constructs that is going to catch most corruptions.
 338 */
 339STATIC void
 340xfs_bmap_check_leaf_extents(
 341        xfs_btree_cur_t         *cur,   /* btree cursor or null */
 342        xfs_inode_t             *ip,            /* incore inode pointer */
 343        int                     whichfork)      /* data or attr fork */
 344{
 345        struct xfs_btree_block  *block; /* current btree block */
 346        xfs_fsblock_t           bno;    /* block # of "block" */
 347        xfs_buf_t               *bp;    /* buffer for "block" */
 348        int                     error;  /* error return value */
 349        xfs_extnum_t            i=0, j; /* index into the extents list */
 350        xfs_ifork_t             *ifp;   /* fork structure */
 351        int                     level;  /* btree level, for checking */
 352        xfs_mount_t             *mp;    /* file system mount structure */
 353        __be64                  *pp;    /* pointer to block address */
 354        xfs_bmbt_rec_t          *ep;    /* pointer to current extent */
 355        xfs_bmbt_rec_t          last = {0, 0}; /* last extent in prev block */
 356        xfs_bmbt_rec_t          *nextp; /* pointer to next extent */
 357        int                     bp_release = 0;
 358
 359        if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) {
 360                return;
 361        }
 362
 363        /* skip large extent count inodes */
 364        if (ip->i_d.di_nextents > 10000)
 365                return;
 366
 367        bno = NULLFSBLOCK;
 368        mp = ip->i_mount;
 369        ifp = XFS_IFORK_PTR(ip, whichfork);
 370        block = ifp->if_broot;
 371        /*
 372         * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
 373         */
 374        level = be16_to_cpu(block->bb_level);
 375        ASSERT(level > 0);
 376        xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
 377        pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
 378        bno = be64_to_cpu(*pp);
 379
 380        ASSERT(bno != NULLFSBLOCK);
 381        ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
 382        ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
 383
 384        /*
 385         * Go down the tree until leaf level is reached, following the first
 386         * pointer (leftmost) at each level.
 387         */
 388        while (level-- > 0) {
 389                /* See if buf is in cur first */
 390                bp_release = 0;
 391                bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
 392                if (!bp) {
 393                        bp_release = 1;
 394                        error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
 395                                                XFS_BMAP_BTREE_REF,
 396                                                &xfs_bmbt_buf_ops);
 397                        if (error)
 398                                goto error_norelse;
 399                }
 400                block = XFS_BUF_TO_BLOCK(bp);
 401                if (level == 0)
 402                        break;
 403
 404                /*
 405                 * Check this block for basic sanity (increasing keys and
 406                 * no duplicate blocks).
 407                 */
 408
 409                xfs_check_block(block, mp, 0, 0);
 410                pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
 411                bno = be64_to_cpu(*pp);
 412                XFS_WANT_CORRUPTED_GOTO(mp,
 413                                        XFS_FSB_SANITY_CHECK(mp, bno), error0);
 414                if (bp_release) {
 415                        bp_release = 0;
 416                        xfs_trans_brelse(NULL, bp);
 417                }
 418        }
 419
 420        /*
 421         * Here with bp and block set to the leftmost leaf node in the tree.
 422         */
 423        i = 0;
 424
 425        /*
 426         * Loop over all leaf nodes checking that all extents are in the right order.
 427         */
 428        for (;;) {
 429                xfs_fsblock_t   nextbno;
 430                xfs_extnum_t    num_recs;
 431
 432
 433                num_recs = xfs_btree_get_numrecs(block);
 434
 435                /*
 436                 * Read-ahead the next leaf block, if any.
 437                 */
 438
 439                nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
 440
 441                /*
 442                 * Check all the extents to make sure they are OK.
 443                 * If we had a previous block, the last entry should
 444                 * conform with the first entry in this one.
 445                 */
 446
 447                ep = XFS_BMBT_REC_ADDR(mp, block, 1);
 448                if (i) {
 449                        ASSERT(xfs_bmbt_disk_get_startoff(&last) +
 450                               xfs_bmbt_disk_get_blockcount(&last) <=
 451                               xfs_bmbt_disk_get_startoff(ep));
 452                }
 453                for (j = 1; j < num_recs; j++) {
 454                        nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
 455                        ASSERT(xfs_bmbt_disk_get_startoff(ep) +
 456                               xfs_bmbt_disk_get_blockcount(ep) <=
 457                               xfs_bmbt_disk_get_startoff(nextp));
 458                        ep = nextp;
 459                }
 460
 461                last = *ep;
 462                i += num_recs;
 463                if (bp_release) {
 464                        bp_release = 0;
 465                        xfs_trans_brelse(NULL, bp);
 466                }
 467                bno = nextbno;
 468                /*
 469                 * If we've reached the end, stop.
 470                 */
 471                if (bno == NULLFSBLOCK)
 472                        break;
 473
 474                bp_release = 0;
 475                bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
 476                if (!bp) {
 477                        bp_release = 1;
 478                        error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
 479                                                XFS_BMAP_BTREE_REF,
 480                                                &xfs_bmbt_buf_ops);
 481                        if (error)
 482                                goto error_norelse;
 483                }
 484                block = XFS_BUF_TO_BLOCK(bp);
 485        }
 486
 487        return;
 488
 489error0:
 490        xfs_warn(mp, "%s: at error0", __func__);
 491        if (bp_release)
 492                xfs_trans_brelse(NULL, bp);
 493error_norelse:
 494        xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
 495                __func__, i);
 496        panic("%s: CORRUPTED BTREE OR SOMETHING", __func__);
 497        return;
 498}
 499
 500/*
 501 * Add bmap trace insert entries for all the contents of the extent records.
 502 */
 503void
 504xfs_bmap_trace_exlist(
 505        xfs_inode_t     *ip,            /* incore inode pointer */
 506        xfs_extnum_t    cnt,            /* count of entries in the list */
 507        int             whichfork,      /* data or attr fork */
 508        unsigned long   caller_ip)
 509{
 510        xfs_extnum_t    idx;            /* extent record index */
 511        xfs_ifork_t     *ifp;           /* inode fork pointer */
 512        int             state = 0;
 513
 514        if (whichfork == XFS_ATTR_FORK)
 515                state |= BMAP_ATTRFORK;
 516
 517        ifp = XFS_IFORK_PTR(ip, whichfork);
 518        ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
 519        for (idx = 0; idx < cnt; idx++)
 520                trace_xfs_extlist(ip, idx, whichfork, caller_ip);
 521}
 522
 523/*
 524 * Validate that the bmbt_irecs being returned from bmapi are valid
 525 * given the caller's original parameters.  Specifically check the
 526 * ranges of the returned irecs to ensure that they only extend beyond
 527 * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
 528 */
 529STATIC void
 530xfs_bmap_validate_ret(
 531        xfs_fileoff_t           bno,
 532        xfs_filblks_t           len,
 533        int                     flags,
 534        xfs_bmbt_irec_t         *mval,
 535        int                     nmap,
 536        int                     ret_nmap)
 537{
 538        int                     i;              /* index to map values */
 539
 540        ASSERT(ret_nmap <= nmap);
 541
 542        for (i = 0; i < ret_nmap; i++) {
 543                ASSERT(mval[i].br_blockcount > 0);
 544                if (!(flags & XFS_BMAPI_ENTIRE)) {
 545                        ASSERT(mval[i].br_startoff >= bno);
 546                        ASSERT(mval[i].br_blockcount <= len);
 547                        ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
 548                               bno + len);
 549                } else {
 550                        ASSERT(mval[i].br_startoff < bno + len);
 551                        ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
 552                               bno);
 553                }
 554                ASSERT(i == 0 ||
 555                       mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
 556                       mval[i].br_startoff);
 557                ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
 558                       mval[i].br_startblock != HOLESTARTBLOCK);
 559                ASSERT(mval[i].br_state == XFS_EXT_NORM ||
 560                       mval[i].br_state == XFS_EXT_UNWRITTEN);
 561        }
 562}
 563
 564#else
 565#define xfs_bmap_check_leaf_extents(cur, ip, whichfork)         do { } while (0)
 566#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)
 567#endif /* DEBUG */
 568
 569/*
 570 * bmap free list manipulation functions
 571 */
 572
 573/*
 574 * Add the extent to the list of extents to be free at transaction end.
 575 * The list is maintained sorted (by block number).
 576 */
 577void
 578xfs_bmap_add_free(
 579        struct xfs_mount                *mp,
 580        struct xfs_defer_ops            *dfops,
 581        xfs_fsblock_t                   bno,
 582        xfs_filblks_t                   len,
 583        struct xfs_owner_info           *oinfo)
 584{
 585        struct xfs_extent_free_item     *new;           /* new element */
 586#ifdef DEBUG
 587        xfs_agnumber_t          agno;
 588        xfs_agblock_t           agbno;
 589
 590        ASSERT(bno != NULLFSBLOCK);
 591        ASSERT(len > 0);
 592        ASSERT(len <= MAXEXTLEN);
 593        ASSERT(!isnullstartblock(bno));
 594        agno = XFS_FSB_TO_AGNO(mp, bno);
 595        agbno = XFS_FSB_TO_AGBNO(mp, bno);
 596        ASSERT(agno < mp->m_sb.sb_agcount);
 597        ASSERT(agbno < mp->m_sb.sb_agblocks);
 598        ASSERT(len < mp->m_sb.sb_agblocks);
 599        ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
 600#endif
 601        ASSERT(xfs_bmap_free_item_zone != NULL);
 602
 603        new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
 604        new->xefi_startblock = bno;
 605        new->xefi_blockcount = (xfs_extlen_t)len;
 606        if (oinfo)
 607                new->xefi_oinfo = *oinfo;
 608        else
 609                xfs_rmap_skip_owner_update(&new->xefi_oinfo);
 610        trace_xfs_bmap_free_defer(mp, XFS_FSB_TO_AGNO(mp, bno), 0,
 611                        XFS_FSB_TO_AGBNO(mp, bno), len);
 612        xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
 613}
 614
 615/*
 616 * Inode fork format manipulation functions
 617 */
 618
 619/*
 620 * Transform a btree format file with only one leaf node, where the
 621 * extents list will fit in the inode, into an extents format file.
 622 * Since the file extents are already in-core, all we have to do is
 623 * give up the space for the btree root and pitch the leaf block.
 624 */
 625STATIC int                              /* error */
 626xfs_bmap_btree_to_extents(
 627        xfs_trans_t             *tp,    /* transaction pointer */
 628        xfs_inode_t             *ip,    /* incore inode pointer */
 629        xfs_btree_cur_t         *cur,   /* btree cursor */
 630        int                     *logflagsp, /* inode logging flags */
 631        int                     whichfork)  /* data or attr fork */
 632{
 633        /* REFERENCED */
 634        struct xfs_btree_block  *cblock;/* child btree block */
 635        xfs_fsblock_t           cbno;   /* child block number */
 636        xfs_buf_t               *cbp;   /* child block's buffer */
 637        int                     error;  /* error return value */
 638        xfs_ifork_t             *ifp;   /* inode fork data */
 639        xfs_mount_t             *mp;    /* mount point structure */
 640        __be64                  *pp;    /* ptr to block address */
 641        struct xfs_btree_block  *rblock;/* root btree block */
 642        struct xfs_owner_info   oinfo;
 643
 644        mp = ip->i_mount;
 645        ifp = XFS_IFORK_PTR(ip, whichfork);
 646        ASSERT(whichfork != XFS_COW_FORK);
 647        ASSERT(ifp->if_flags & XFS_IFEXTENTS);
 648        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
 649        rblock = ifp->if_broot;
 650        ASSERT(be16_to_cpu(rblock->bb_level) == 1);
 651        ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
 652        ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
 653        pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
 654        cbno = be64_to_cpu(*pp);
 655        *logflagsp = 0;
 656#ifdef DEBUG
 657        if ((error = xfs_btree_check_lptr(cur, cbno, 1)))
 658                return error;
 659#endif
 660        error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
 661                                &xfs_bmbt_buf_ops);
 662        if (error)
 663                return error;
 664        cblock = XFS_BUF_TO_BLOCK(cbp);
 665        if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
 666                return error;
 667        xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
 668        xfs_bmap_add_free(mp, cur->bc_private.b.dfops, cbno, 1, &oinfo);
 669        ip->i_d.di_nblocks--;
 670        xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 671        xfs_trans_binval(tp, cbp);
 672        if (cur->bc_bufs[0] == cbp)
 673                cur->bc_bufs[0] = NULL;
 674        xfs_iroot_realloc(ip, -1, whichfork);
 675        ASSERT(ifp->if_broot == NULL);
 676        ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
 677        XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
 678        *logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
 679        return 0;
 680}
 681
 682/*
 683 * Convert an extents-format file into a btree-format file.
 684 * The new file will have a root block (in the inode) and a single child block.
 685 */
 686STATIC int                                      /* error */
 687xfs_bmap_extents_to_btree(
 688        xfs_trans_t             *tp,            /* transaction pointer */
 689        xfs_inode_t             *ip,            /* incore inode pointer */
 690        xfs_fsblock_t           *firstblock,    /* first-block-allocated */
 691        struct xfs_defer_ops    *dfops,         /* blocks freed in xaction */
 692        xfs_btree_cur_t         **curp,         /* cursor returned to caller */
 693        int                     wasdel,         /* converting a delayed alloc */
 694        int                     *logflagsp,     /* inode logging flags */
 695        int                     whichfork)      /* data or attr fork */
 696{
 697        struct xfs_btree_block  *ablock;        /* allocated (child) bt block */
 698        xfs_buf_t               *abp;           /* buffer for ablock */
 699        xfs_alloc_arg_t         args;           /* allocation arguments */
 700        xfs_bmbt_rec_t          *arp;           /* child record pointer */
 701        struct xfs_btree_block  *block;         /* btree root block */
 702        xfs_btree_cur_t         *cur;           /* bmap btree cursor */
 703        xfs_bmbt_rec_host_t     *ep;            /* extent record pointer */
 704        int                     error;          /* error return value */
 705        xfs_extnum_t            i, cnt;         /* extent record index */
 706        xfs_ifork_t             *ifp;           /* inode fork pointer */
 707        xfs_bmbt_key_t          *kp;            /* root block key pointer */
 708        xfs_mount_t             *mp;            /* mount structure */
 709        xfs_extnum_t            nextents;       /* number of file extents */
 710        xfs_bmbt_ptr_t          *pp;            /* root block address pointer */
 711
 712        mp = ip->i_mount;
 713        ASSERT(whichfork != XFS_COW_FORK);
 714        ifp = XFS_IFORK_PTR(ip, whichfork);
 715        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
 716
 717        /*
 718         * Make space in the inode incore.
 719         */
 720        xfs_iroot_realloc(ip, 1, whichfork);
 721        ifp->if_flags |= XFS_IFBROOT;
 722
 723        /*
 724         * Fill in the root.
 725         */
 726        block = ifp->if_broot;
 727        if (xfs_sb_version_hascrc(&mp->m_sb))
 728                xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
 729                                 XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino,
 730                                 XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
 731        else
 732                xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
 733                                 XFS_BMAP_MAGIC, 1, 1, ip->i_ino,
 734                                 XFS_BTREE_LONG_PTRS);
 735
 736        /*
 737         * Need a cursor.  Can't allocate until bb_level is filled in.
 738         */
 739        cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 740        cur->bc_private.b.firstblock = *firstblock;
 741        cur->bc_private.b.dfops = dfops;
 742        cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
 743        /*
 744         * Convert to a btree with two levels, one record in root.
 745         */
 746        XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
 747        memset(&args, 0, sizeof(args));
 748        args.tp = tp;
 749        args.mp = mp;
 750        xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
 751        args.firstblock = *firstblock;
 752        if (*firstblock == NULLFSBLOCK) {
 753                args.type = XFS_ALLOCTYPE_START_BNO;
 754                args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
 755        } else if (dfops->dop_low) {
 756try_another_ag:
 757                args.type = XFS_ALLOCTYPE_START_BNO;
 758                args.fsbno = *firstblock;
 759        } else {
 760                args.type = XFS_ALLOCTYPE_NEAR_BNO;
 761                args.fsbno = *firstblock;
 762        }
 763        args.minlen = args.maxlen = args.prod = 1;
 764        args.wasdel = wasdel;
 765        *logflagsp = 0;
 766        if ((error = xfs_alloc_vextent(&args))) {
 767                xfs_iroot_realloc(ip, -1, whichfork);
 768                xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 769                return error;
 770        }
 771
 772        /*
 773         * During a CoW operation, the allocation and bmbt updates occur in
 774         * different transactions.  The mapping code tries to put new bmbt
 775         * blocks near extents being mapped, but the only way to guarantee this
 776         * is if the alloc and the mapping happen in a single transaction that
 777         * has a block reservation.  That isn't the case here, so if we run out
 778         * of space we'll try again with another AG.
 779         */
 780        if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
 781            args.fsbno == NULLFSBLOCK &&
 782            args.type == XFS_ALLOCTYPE_NEAR_BNO) {
 783                dfops->dop_low = true;
 784                goto try_another_ag;
 785        }
 786        /*
 787         * Allocation can't fail, the space was reserved.
 788         */
 789        ASSERT(args.fsbno != NULLFSBLOCK);
 790        ASSERT(*firstblock == NULLFSBLOCK ||
 791               args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
 792               (dfops->dop_low &&
 793                args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
 794        *firstblock = cur->bc_private.b.firstblock = args.fsbno;
 795        cur->bc_private.b.allocated++;
 796        ip->i_d.di_nblocks++;
 797        xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
 798        abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
 799        /*
 800         * Fill in the child block.
 801         */
 802        abp->b_ops = &xfs_bmbt_buf_ops;
 803        ablock = XFS_BUF_TO_BLOCK(abp);
 804        if (xfs_sb_version_hascrc(&mp->m_sb))
 805                xfs_btree_init_block_int(mp, ablock, abp->b_bn,
 806                                XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
 807                                XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
 808        else
 809                xfs_btree_init_block_int(mp, ablock, abp->b_bn,
 810                                XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
 811                                XFS_BTREE_LONG_PTRS);
 812
 813        arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
 814        nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
 815        for (cnt = i = 0; i < nextents; i++) {
 816                ep = xfs_iext_get_ext(ifp, i);
 817                if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) {
 818                        arp->l0 = cpu_to_be64(ep->l0);
 819                        arp->l1 = cpu_to_be64(ep->l1);
 820                        arp++; cnt++;
 821                }
 822        }
 823        ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
 824        xfs_btree_set_numrecs(ablock, cnt);
 825
 826        /*
 827         * Fill in the root key and pointer.
 828         */
 829        kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
 830        arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
 831        kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
 832        pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
 833                                                be16_to_cpu(block->bb_level)));
 834        *pp = cpu_to_be64(args.fsbno);
 835
 836        /*
 837         * Do all this logging at the end so that
 838         * the root is at the right level.
 839         */
 840        xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
 841        xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
 842        ASSERT(*curp == NULL);
 843        *curp = cur;
 844        *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
 845        return 0;
 846}
 847
 848/*
 849 * Convert a local file to an extents file.
 850 * This code is out of bounds for data forks of regular files,
 851 * since the file data needs to get logged so things will stay consistent.
 852 * (The bmap-level manipulations are ok, though).
 853 */
 854void
 855xfs_bmap_local_to_extents_empty(
 856        struct xfs_inode        *ip,
 857        int                     whichfork)
 858{
 859        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
 860
 861        ASSERT(whichfork != XFS_COW_FORK);
 862        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
 863        ASSERT(ifp->if_bytes == 0);
 864        ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
 865
 866        xfs_bmap_forkoff_reset(ip, whichfork);
 867        ifp->if_flags &= ~XFS_IFINLINE;
 868        ifp->if_flags |= XFS_IFEXTENTS;
 869        XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
 870}
 871
 872
 873STATIC int                              /* error */
 874xfs_bmap_local_to_extents(
 875        xfs_trans_t     *tp,            /* transaction pointer */
 876        xfs_inode_t     *ip,            /* incore inode pointer */
 877        xfs_fsblock_t   *firstblock,    /* first block allocated in xaction */
 878        xfs_extlen_t    total,          /* total blocks needed by transaction */
 879        int             *logflagsp,     /* inode logging flags */
 880        int             whichfork,
 881        void            (*init_fn)(struct xfs_trans *tp,
 882                                   struct xfs_buf *bp,
 883                                   struct xfs_inode *ip,
 884                                   struct xfs_ifork *ifp))
 885{
 886        int             error = 0;
 887        int             flags;          /* logging flags returned */
 888        xfs_ifork_t     *ifp;           /* inode fork pointer */
 889        xfs_alloc_arg_t args;           /* allocation arguments */
 890        xfs_buf_t       *bp;            /* buffer for extent block */
 891        xfs_bmbt_rec_host_t *ep;        /* extent record pointer */
 892
 893        /*
 894         * We don't want to deal with the case of keeping inode data inline yet.
 895         * So sending the data fork of a regular inode is invalid.
 896         */
 897        ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK));
 898        ifp = XFS_IFORK_PTR(ip, whichfork);
 899        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
 900
 901        if (!ifp->if_bytes) {
 902                xfs_bmap_local_to_extents_empty(ip, whichfork);
 903                flags = XFS_ILOG_CORE;
 904                goto done;
 905        }
 906
 907        flags = 0;
 908        error = 0;
 909        ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) ==
 910                                                                XFS_IFINLINE);
 911        memset(&args, 0, sizeof(args));
 912        args.tp = tp;
 913        args.mp = ip->i_mount;
 914        xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
 915        args.firstblock = *firstblock;
 916        /*
 917         * Allocate a block.  We know we need only one, since the
 918         * file currently fits in an inode.
 919         */
 920        if (*firstblock == NULLFSBLOCK) {
 921try_another_ag:
 922                args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
 923                args.type = XFS_ALLOCTYPE_START_BNO;
 924        } else {
 925                args.fsbno = *firstblock;
 926                args.type = XFS_ALLOCTYPE_NEAR_BNO;
 927        }
 928        args.total = total;
 929        args.minlen = args.maxlen = args.prod = 1;
 930        error = xfs_alloc_vextent(&args);
 931        if (error)
 932                goto done;
 933
 934        /*
 935         * During a CoW operation, the allocation and bmbt updates occur in
 936         * different transactions.  The mapping code tries to put new bmbt
 937         * blocks near extents being mapped, but the only way to guarantee this
 938         * is if the alloc and the mapping happen in a single transaction that
 939         * has a block reservation.  That isn't the case here, so if we run out
 940         * of space we'll try again with another AG.
 941         */
 942        if (xfs_sb_version_hasreflink(&ip->i_mount->m_sb) &&
 943            args.fsbno == NULLFSBLOCK &&
 944            args.type == XFS_ALLOCTYPE_NEAR_BNO) {
 945                goto try_another_ag;
 946        }
 947        /* Can't fail, the space was reserved. */
 948        ASSERT(args.fsbno != NULLFSBLOCK);
 949        ASSERT(args.len == 1);
 950        *firstblock = args.fsbno;
 951        bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
 952
 953        /*
 954         * Initialize the block, copy the data and log the remote buffer.
 955         *
 956         * The callout is responsible for logging because the remote format
 957         * might differ from the local format and thus we don't know how much to
 958         * log here. Note that init_fn must also set the buffer log item type
 959         * correctly.
 960         */
 961        init_fn(tp, bp, ip, ifp);
 962
 963        /* account for the change in fork size */
 964        xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
 965        xfs_bmap_local_to_extents_empty(ip, whichfork);
 966        flags |= XFS_ILOG_CORE;
 967
 968        xfs_iext_add(ifp, 0, 1);
 969        ep = xfs_iext_get_ext(ifp, 0);
 970        xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
 971        trace_xfs_bmap_post_update(ip, 0,
 972                        whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0,
 973                        _THIS_IP_);
 974        XFS_IFORK_NEXT_SET(ip, whichfork, 1);
 975        ip->i_d.di_nblocks = 1;
 976        xfs_trans_mod_dquot_byino(tp, ip,
 977                XFS_TRANS_DQ_BCOUNT, 1L);
 978        flags |= xfs_ilog_fext(whichfork);
 979
 980done:
 981        *logflagsp = flags;
 982        return error;
 983}
 984
 985/*
 986 * Called from xfs_bmap_add_attrfork to handle btree format files.
 987 */
 988STATIC int                                      /* error */
 989xfs_bmap_add_attrfork_btree(
 990        xfs_trans_t             *tp,            /* transaction pointer */
 991        xfs_inode_t             *ip,            /* incore inode pointer */
 992        xfs_fsblock_t           *firstblock,    /* first block allocated */
 993        struct xfs_defer_ops    *dfops,         /* blocks to free at commit */
 994        int                     *flags)         /* inode logging flags */
 995{
 996        xfs_btree_cur_t         *cur;           /* btree cursor */
 997        int                     error;          /* error return value */
 998        xfs_mount_t             *mp;            /* file system mount struct */
 999        int                     stat;           /* newroot status */
1000
1001        mp = ip->i_mount;
1002        if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip))
1003                *flags |= XFS_ILOG_DBROOT;
1004        else {
1005                cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
1006                cur->bc_private.b.dfops = dfops;
1007                cur->bc_private.b.firstblock = *firstblock;
1008                if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
1009                        goto error0;
1010                /* must be at least one entry */
1011                XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0);
1012                if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
1013                        goto error0;
1014                if (stat == 0) {
1015                        xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1016                        return -ENOSPC;
1017                }
1018                *firstblock = cur->bc_private.b.firstblock;
1019                cur->bc_private.b.allocated = 0;
1020                xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1021        }
1022        return 0;
1023error0:
1024        xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1025        return error;
1026}
1027
1028/*
1029 * Called from xfs_bmap_add_attrfork to handle extents format files.
1030 */
1031STATIC int                                      /* error */
1032xfs_bmap_add_attrfork_extents(
1033        xfs_trans_t             *tp,            /* transaction pointer */
1034        xfs_inode_t             *ip,            /* incore inode pointer */
1035        xfs_fsblock_t           *firstblock,    /* first block allocated */
1036        struct xfs_defer_ops    *dfops,         /* blocks to free at commit */
1037        int                     *flags)         /* inode logging flags */
1038{
1039        xfs_btree_cur_t         *cur;           /* bmap btree cursor */
1040        int                     error;          /* error return value */
1041
1042        if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip))
1043                return 0;
1044        cur = NULL;
1045        error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops, &cur, 0,
1046                flags, XFS_DATA_FORK);
1047        if (cur) {
1048                cur->bc_private.b.allocated = 0;
1049                xfs_btree_del_cursor(cur,
1050                        error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
1051        }
1052        return error;
1053}
1054
1055/*
1056 * Called from xfs_bmap_add_attrfork to handle local format files. Each
1057 * different data fork content type needs a different callout to do the
1058 * conversion. Some are basic and only require special block initialisation
1059 * callouts for the data formating, others (directories) are so specialised they
1060 * handle everything themselves.
1061 *
1062 * XXX (dgc): investigate whether directory conversion can use the generic
1063 * formatting callout. It should be possible - it's just a very complex
1064 * formatter.
1065 */
1066STATIC int                                      /* error */
1067xfs_bmap_add_attrfork_local(
1068        xfs_trans_t             *tp,            /* transaction pointer */
1069        xfs_inode_t             *ip,            /* incore inode pointer */
1070        xfs_fsblock_t           *firstblock,    /* first block allocated */
1071        struct xfs_defer_ops    *dfops,         /* blocks to free at commit */
1072        int                     *flags)         /* inode logging flags */
1073{
1074        xfs_da_args_t           dargs;          /* args for dir/attr code */
1075
1076        if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
1077                return 0;
1078
1079        if (S_ISDIR(VFS_I(ip)->i_mode)) {
1080                memset(&dargs, 0, sizeof(dargs));
1081                dargs.geo = ip->i_mount->m_dir_geo;
1082                dargs.dp = ip;
1083                dargs.firstblock = firstblock;
1084                dargs.dfops = dfops;
1085                dargs.total = dargs.geo->fsbcount;
1086                dargs.whichfork = XFS_DATA_FORK;
1087                dargs.trans = tp;
1088                return xfs_dir2_sf_to_block(&dargs);
1089        }
1090
1091        if (S_ISLNK(VFS_I(ip)->i_mode))
1092                return xfs_bmap_local_to_extents(tp, ip, firstblock, 1,
1093                                                 flags, XFS_DATA_FORK,
1094                                                 xfs_symlink_local_to_remote);
1095
1096        /* should only be called for types that support local format data */
1097        ASSERT(0);
1098        return -EFSCORRUPTED;
1099}
1100
1101/*
1102 * Convert inode from non-attributed to attributed.
1103 * Must not be in a transaction, ip must not be locked.
1104 */
1105int                                             /* error code */
1106xfs_bmap_add_attrfork(
1107        xfs_inode_t             *ip,            /* incore inode pointer */
1108        int                     size,           /* space new attribute needs */
1109        int                     rsvd)           /* xact may use reserved blks */
1110{
1111        xfs_fsblock_t           firstblock;     /* 1st block/ag allocated */
1112        struct xfs_defer_ops    dfops;          /* freed extent records */
1113        xfs_mount_t             *mp;            /* mount structure */
1114        xfs_trans_t             *tp;            /* transaction pointer */
1115        int                     blks;           /* space reservation */
1116        int                     version = 1;    /* superblock attr version */
1117        int                     logflags;       /* logging flags */
1118        int                     error;          /* error return value */
1119
1120        ASSERT(XFS_IFORK_Q(ip) == 0);
1121
1122        mp = ip->i_mount;
1123        ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1124
1125        blks = XFS_ADDAFORK_SPACE_RES(mp);
1126
1127        error = xfs_trans_alloc(mp, &M_RES(mp)->tr_addafork, blks, 0,
1128                        rsvd ? XFS_TRANS_RESERVE : 0, &tp);
1129        if (error)
1130                return error;
1131
1132        xfs_ilock(ip, XFS_ILOCK_EXCL);
1133        error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
1134                        XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
1135                        XFS_QMOPT_RES_REGBLKS);
1136        if (error)
1137                goto trans_cancel;
1138        if (XFS_IFORK_Q(ip))
1139                goto trans_cancel;
1140        if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
1141                /*
1142                 * For inodes coming from pre-6.2 filesystems.
1143                 */
1144                ASSERT(ip->i_d.di_aformat == 0);
1145                ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
1146        }
1147        ASSERT(ip->i_d.di_anextents == 0);
1148
1149        xfs_trans_ijoin(tp, ip, 0);
1150        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1151
1152        switch (ip->i_d.di_format) {
1153        case XFS_DINODE_FMT_DEV:
1154                ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
1155                break;
1156        case XFS_DINODE_FMT_UUID:
1157                ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3;
1158                break;
1159        case XFS_DINODE_FMT_LOCAL:
1160        case XFS_DINODE_FMT_EXTENTS:
1161        case XFS_DINODE_FMT_BTREE:
1162                ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
1163                if (!ip->i_d.di_forkoff)
1164                        ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
1165                else if (mp->m_flags & XFS_MOUNT_ATTR2)
1166                        version = 2;
1167                break;
1168        default:
1169                ASSERT(0);
1170                error = -EINVAL;
1171                goto trans_cancel;
1172        }
1173
1174        ASSERT(ip->i_afp == NULL);
1175        ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
1176        ip->i_afp->if_flags = XFS_IFEXTENTS;
1177        logflags = 0;
1178        xfs_defer_init(&dfops, &firstblock);
1179        switch (ip->i_d.di_format) {
1180        case XFS_DINODE_FMT_LOCAL:
1181                error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &dfops,
1182                        &logflags);
1183                break;
1184        case XFS_DINODE_FMT_EXTENTS:
1185                error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock,
1186                        &dfops, &logflags);
1187                break;
1188        case XFS_DINODE_FMT_BTREE:
1189                error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &dfops,
1190                        &logflags);
1191                break;
1192        default:
1193                error = 0;
1194                break;
1195        }
1196        if (logflags)
1197                xfs_trans_log_inode(tp, ip, logflags);
1198        if (error)
1199                goto bmap_cancel;
1200        if (!xfs_sb_version_hasattr(&mp->m_sb) ||
1201           (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
1202                bool log_sb = false;
1203
1204                spin_lock(&mp->m_sb_lock);
1205                if (!xfs_sb_version_hasattr(&mp->m_sb)) {
1206                        xfs_sb_version_addattr(&mp->m_sb);
1207                        log_sb = true;
1208                }
1209                if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
1210                        xfs_sb_version_addattr2(&mp->m_sb);
1211                        log_sb = true;
1212                }
1213                spin_unlock(&mp->m_sb_lock);
1214                if (log_sb)
1215                        xfs_log_sb(tp);
1216        }
1217
1218        error = xfs_defer_finish(&tp, &dfops, NULL);
1219        if (error)
1220                goto bmap_cancel;
1221        error = xfs_trans_commit(tp);
1222        xfs_iunlock(ip, XFS_ILOCK_EXCL);
1223        return error;
1224
1225bmap_cancel:
1226        xfs_defer_cancel(&dfops);
1227trans_cancel:
1228        xfs_trans_cancel(tp);
1229        xfs_iunlock(ip, XFS_ILOCK_EXCL);
1230        return error;
1231}
1232
1233/*
1234 * Internal and external extent tree search functions.
1235 */
1236
1237/*
1238 * Read in the extents to if_extents.
1239 * All inode fields are set up by caller, we just traverse the btree
1240 * and copy the records in. If the file system cannot contain unwritten
1241 * extents, the records are checked for no "state" flags.
1242 */
1243int                                     /* error */
1244xfs_bmap_read_extents(
1245        xfs_trans_t             *tp,    /* transaction pointer */
1246        xfs_inode_t             *ip,    /* incore inode */
1247        int                     whichfork) /* data or attr fork */
1248{
1249        struct xfs_btree_block  *block; /* current btree block */
1250        xfs_fsblock_t           bno;    /* block # of "block" */
1251        xfs_buf_t               *bp;    /* buffer for "block" */
1252        int                     error;  /* error return value */
1253        xfs_exntfmt_t           exntf;  /* XFS_EXTFMT_NOSTATE, if checking */
1254        xfs_extnum_t            i, j;   /* index into the extents list */
1255        xfs_ifork_t             *ifp;   /* fork structure */
1256        int                     level;  /* btree level, for checking */
1257        xfs_mount_t             *mp;    /* file system mount structure */
1258        __be64                  *pp;    /* pointer to block address */
1259        /* REFERENCED */
1260        xfs_extnum_t            room;   /* number of entries there's room for */
1261
1262        bno = NULLFSBLOCK;
1263        mp = ip->i_mount;
1264        ifp = XFS_IFORK_PTR(ip, whichfork);
1265        exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE :
1266                                        XFS_EXTFMT_INODE(ip);
1267        block = ifp->if_broot;
1268        /*
1269         * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
1270         */
1271        level = be16_to_cpu(block->bb_level);
1272        ASSERT(level > 0);
1273        pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
1274        bno = be64_to_cpu(*pp);
1275        ASSERT(bno != NULLFSBLOCK);
1276        ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
1277        ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
1278        /*
1279         * Go down the tree until leaf level is reached, following the first
1280         * pointer (leftmost) at each level.
1281         */
1282        while (level-- > 0) {
1283                error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1284                                XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1285                if (error)
1286                        return error;
1287                block = XFS_BUF_TO_BLOCK(bp);
1288                if (level == 0)
1289                        break;
1290                pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
1291                bno = be64_to_cpu(*pp);
1292                XFS_WANT_CORRUPTED_GOTO(mp,
1293                        XFS_FSB_SANITY_CHECK(mp, bno), error0);
1294                xfs_trans_brelse(tp, bp);
1295        }
1296        /*
1297         * Here with bp and block set to the leftmost leaf node in the tree.
1298         */
1299        room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1300        i = 0;
1301        /*
1302         * Loop over all leaf nodes.  Copy information to the extent records.
1303         */
1304        for (;;) {
1305                xfs_bmbt_rec_t  *frp;
1306                xfs_fsblock_t   nextbno;
1307                xfs_extnum_t    num_recs;
1308                xfs_extnum_t    start;
1309
1310                num_recs = xfs_btree_get_numrecs(block);
1311                if (unlikely(i + num_recs > room)) {
1312                        ASSERT(i + num_recs <= room);
1313                        xfs_warn(ip->i_mount,
1314                                "corrupt dinode %Lu, (btree extents).",
1315                                (unsigned long long) ip->i_ino);
1316                        XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)",
1317                                XFS_ERRLEVEL_LOW, ip->i_mount, block);
1318                        goto error0;
1319                }
1320                /*
1321                 * Read-ahead the next leaf block, if any.
1322                 */
1323                nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
1324                if (nextbno != NULLFSBLOCK)
1325                        xfs_btree_reada_bufl(mp, nextbno, 1,
1326                                             &xfs_bmbt_buf_ops);
1327                /*
1328                 * Copy records into the extent records.
1329                 */
1330                frp = XFS_BMBT_REC_ADDR(mp, block, 1);
1331                start = i;
1332                for (j = 0; j < num_recs; j++, i++, frp++) {
1333                        xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
1334                        trp->l0 = be64_to_cpu(frp->l0);
1335                        trp->l1 = be64_to_cpu(frp->l1);
1336                }
1337                if (exntf == XFS_EXTFMT_NOSTATE) {
1338                        /*
1339                         * Check all attribute bmap btree records and
1340                         * any "older" data bmap btree records for a
1341                         * set bit in the "extent flag" position.
1342                         */
1343                        if (unlikely(xfs_check_nostate_extents(ifp,
1344                                        start, num_recs))) {
1345                                XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
1346                                                 XFS_ERRLEVEL_LOW,
1347                                                 ip->i_mount);
1348                                goto error0;
1349                        }
1350                }
1351                xfs_trans_brelse(tp, bp);
1352                bno = nextbno;
1353                /*
1354                 * If we've reached the end, stop.
1355                 */
1356                if (bno == NULLFSBLOCK)
1357                        break;
1358                error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1359                                XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1360                if (error)
1361                        return error;
1362                block = XFS_BUF_TO_BLOCK(bp);
1363        }
1364        ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
1365        ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
1366        XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
1367        return 0;
1368error0:
1369        xfs_trans_brelse(tp, bp);
1370        return -EFSCORRUPTED;
1371}
1372
1373
1374/*
1375 * Search the extent records for the entry containing block bno.
1376 * If bno lies in a hole, point to the next entry.  If bno lies
1377 * past eof, *eofp will be set, and *prevp will contain the last
1378 * entry (null if none).  Else, *lastxp will be set to the index
1379 * of the found entry; *gotp will contain the entry.
1380 */
1381STATIC xfs_bmbt_rec_host_t *            /* pointer to found extent entry */
1382xfs_bmap_search_multi_extents(
1383        xfs_ifork_t     *ifp,           /* inode fork pointer */
1384        xfs_fileoff_t   bno,            /* block number searched for */
1385        int             *eofp,          /* out: end of file found */
1386        xfs_extnum_t    *lastxp,        /* out: last extent index */
1387        xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
1388        xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
1389{
1390        xfs_bmbt_rec_host_t *ep;                /* extent record pointer */
1391        xfs_extnum_t    lastx;          /* last extent index */
1392
1393        /*
1394         * Initialize the extent entry structure to catch access to
1395         * uninitialized br_startblock field.
1396         */
1397        gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL;
1398        gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL;
1399        gotp->br_state = XFS_EXT_INVALID;
1400        gotp->br_startblock = 0xffffa5a5a5a5a5a5LL;
1401        prevp->br_startoff = NULLFILEOFF;
1402
1403        ep = xfs_iext_bno_to_ext(ifp, bno, &lastx);
1404        if (lastx > 0) {
1405                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp);
1406        }
1407        if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
1408                xfs_bmbt_get_all(ep, gotp);
1409                *eofp = 0;
1410        } else {
1411                if (lastx > 0) {
1412                        *gotp = *prevp;
1413                }
1414                *eofp = 1;
1415                ep = NULL;
1416        }
1417        *lastxp = lastx;
1418        return ep;
1419}
1420
1421/*
1422 * Search the extents list for the inode, for the extent containing bno.
1423 * If bno lies in a hole, point to the next entry.  If bno lies past eof,
1424 * *eofp will be set, and *prevp will contain the last entry (null if none).
1425 * Else, *lastxp will be set to the index of the found
1426 * entry; *gotp will contain the entry.
1427 */
1428xfs_bmbt_rec_host_t *                 /* pointer to found extent entry */
1429xfs_bmap_search_extents(
1430        xfs_inode_t     *ip,            /* incore inode pointer */
1431        xfs_fileoff_t   bno,            /* block number searched for */
1432        int             fork,           /* data or attr fork */
1433        int             *eofp,          /* out: end of file found */
1434        xfs_extnum_t    *lastxp,        /* out: last extent index */
1435        xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
1436        xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
1437{
1438        xfs_ifork_t     *ifp;           /* inode fork pointer */
1439        xfs_bmbt_rec_host_t  *ep;            /* extent record pointer */
1440
1441        XFS_STATS_INC(ip->i_mount, xs_look_exlist);
1442        ifp = XFS_IFORK_PTR(ip, fork);
1443
1444        ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
1445
1446        if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) &&
1447                     !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) {
1448                xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
1449                                "Access to block zero in inode %llu "
1450                                "start_block: %llx start_off: %llx "
1451                                "blkcnt: %llx extent-state: %x lastx: %x",
1452                        (unsigned long long)ip->i_ino,
1453                        (unsigned long long)gotp->br_startblock,
1454                        (unsigned long long)gotp->br_startoff,
1455                        (unsigned long long)gotp->br_blockcount,
1456                        gotp->br_state, *lastxp);
1457                *lastxp = NULLEXTNUM;
1458                *eofp = 1;
1459                return NULL;
1460        }
1461        return ep;
1462}
1463
1464/*
1465 * Returns the file-relative block number of the first unused block(s)
1466 * in the file with at least "len" logically contiguous blocks free.
1467 * This is the lowest-address hole if the file has holes, else the first block
1468 * past the end of file.
1469 * Return 0 if the file is currently local (in-inode).
1470 */
1471int                                             /* error */
1472xfs_bmap_first_unused(
1473        xfs_trans_t     *tp,                    /* transaction pointer */
1474        xfs_inode_t     *ip,                    /* incore inode */
1475        xfs_extlen_t    len,                    /* size of hole to find */
1476        xfs_fileoff_t   *first_unused,          /* unused block */
1477        int             whichfork)              /* data or attr fork */
1478{
1479        int             error;                  /* error return value */
1480        int             idx;                    /* extent record index */
1481        xfs_ifork_t     *ifp;                   /* inode fork pointer */
1482        xfs_fileoff_t   lastaddr;               /* last block number seen */
1483        xfs_fileoff_t   lowest;                 /* lowest useful block */
1484        xfs_fileoff_t   max;                    /* starting useful block */
1485        xfs_fileoff_t   off;                    /* offset for this block */
1486        xfs_extnum_t    nextents;               /* number of extent entries */
1487
1488        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
1489               XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ||
1490               XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
1491        if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
1492                *first_unused = 0;
1493                return 0;
1494        }
1495        ifp = XFS_IFORK_PTR(ip, whichfork);
1496        if (!(ifp->if_flags & XFS_IFEXTENTS) &&
1497            (error = xfs_iread_extents(tp, ip, whichfork)))
1498                return error;
1499        lowest = *first_unused;
1500        nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1501        for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
1502                xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
1503                off = xfs_bmbt_get_startoff(ep);
1504                /*
1505                 * See if the hole before this extent will work.
1506                 */
1507                if (off >= lowest + len && off - max >= len) {
1508                        *first_unused = max;
1509                        return 0;
1510                }
1511                lastaddr = off + xfs_bmbt_get_blockcount(ep);
1512                max = XFS_FILEOFF_MAX(lastaddr, lowest);
1513        }
1514        *first_unused = max;
1515        return 0;
1516}
1517
1518/*
1519 * Returns the file-relative block number of the last block - 1 before
1520 * last_block (input value) in the file.
1521 * This is not based on i_size, it is based on the extent records.
1522 * Returns 0 for local files, as they do not have extent records.
1523 */
1524int                                             /* error */
1525xfs_bmap_last_before(
1526        xfs_trans_t     *tp,                    /* transaction pointer */
1527        xfs_inode_t     *ip,                    /* incore inode */
1528        xfs_fileoff_t   *last_block,            /* last block */
1529        int             whichfork)              /* data or attr fork */
1530{
1531        xfs_fileoff_t   bno;                    /* input file offset */
1532        int             eof;                    /* hit end of file */
1533        xfs_bmbt_rec_host_t *ep;                /* pointer to last extent */
1534        int             error;                  /* error return value */
1535        xfs_bmbt_irec_t got;                    /* current extent value */
1536        xfs_ifork_t     *ifp;                   /* inode fork pointer */
1537        xfs_extnum_t    lastx;                  /* last extent used */
1538        xfs_bmbt_irec_t prev;                   /* previous extent value */
1539
1540        if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
1541            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
1542            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
1543               return -EIO;
1544        if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
1545                *last_block = 0;
1546                return 0;
1547        }
1548        ifp = XFS_IFORK_PTR(ip, whichfork);
1549        if (!(ifp->if_flags & XFS_IFEXTENTS) &&
1550            (error = xfs_iread_extents(tp, ip, whichfork)))
1551                return error;
1552        bno = *last_block - 1;
1553        ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
1554                &prev);
1555        if (eof || xfs_bmbt_get_startoff(ep) > bno) {
1556                if (prev.br_startoff == NULLFILEOFF)
1557                        *last_block = 0;
1558                else
1559                        *last_block = prev.br_startoff + prev.br_blockcount;
1560        }
1561        /*
1562         * Otherwise *last_block is already the right answer.
1563         */
1564        return 0;
1565}
1566
1567int
1568xfs_bmap_last_extent(
1569        struct xfs_trans        *tp,
1570        struct xfs_inode        *ip,
1571        int                     whichfork,
1572        struct xfs_bmbt_irec    *rec,
1573        int                     *is_empty)
1574{
1575        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1576        int                     error;
1577        int                     nextents;
1578
1579        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1580                error = xfs_iread_extents(tp, ip, whichfork);
1581                if (error)
1582                        return error;
1583        }
1584
1585        nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
1586        if (nextents == 0) {
1587                *is_empty = 1;
1588                return 0;
1589        }
1590
1591        xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec);
1592        *is_empty = 0;
1593        return 0;
1594}
1595
1596/*
1597 * Check the last inode extent to determine whether this allocation will result
1598 * in blocks being allocated at the end of the file. When we allocate new data
1599 * blocks at the end of the file which do not start at the previous data block,
1600 * we will try to align the new blocks at stripe unit boundaries.
1601 *
1602 * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
1603 * at, or past the EOF.
1604 */
1605STATIC int
1606xfs_bmap_isaeof(
1607        struct xfs_bmalloca     *bma,
1608        int                     whichfork)
1609{
1610        struct xfs_bmbt_irec    rec;
1611        int                     is_empty;
1612        int                     error;
1613
1614        bma->aeof = 0;
1615        error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1616                                     &is_empty);
1617        if (error)
1618                return error;
1619
1620        if (is_empty) {
1621                bma->aeof = 1;
1622                return 0;
1623        }
1624
1625        /*
1626         * Check if we are allocation or past the last extent, or at least into
1627         * the last delayed allocated extent.
1628         */
1629        bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
1630                (bma->offset >= rec.br_startoff &&
1631                 isnullstartblock(rec.br_startblock));
1632        return 0;
1633}
1634
1635/*
1636 * Returns the file-relative block number of the first block past eof in
1637 * the file.  This is not based on i_size, it is based on the extent records.
1638 * Returns 0 for local files, as they do not have extent records.
1639 */
1640int
1641xfs_bmap_last_offset(
1642        struct xfs_inode        *ip,
1643        xfs_fileoff_t           *last_block,
1644        int                     whichfork)
1645{
1646        struct xfs_bmbt_irec    rec;
1647        int                     is_empty;
1648        int                     error;
1649
1650        *last_block = 0;
1651
1652        if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL)
1653                return 0;
1654
1655        if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
1656            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
1657               return -EIO;
1658
1659        error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
1660        if (error || is_empty)
1661                return error;
1662
1663        *last_block = rec.br_startoff + rec.br_blockcount;
1664        return 0;
1665}
1666
1667/*
1668 * Returns whether the selected fork of the inode has exactly one
1669 * block or not.  For the data fork we check this matches di_size,
1670 * implying the file's range is 0..bsize-1.
1671 */
1672int                                     /* 1=>1 block, 0=>otherwise */
1673xfs_bmap_one_block(
1674        xfs_inode_t     *ip,            /* incore inode */
1675        int             whichfork)      /* data or attr fork */
1676{
1677        xfs_bmbt_rec_host_t *ep;        /* ptr to fork's extent */
1678        xfs_ifork_t     *ifp;           /* inode fork pointer */
1679        int             rval;           /* return value */
1680        xfs_bmbt_irec_t s;              /* internal version of extent */
1681
1682#ifndef DEBUG
1683        if (whichfork == XFS_DATA_FORK)
1684                return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
1685#endif  /* !DEBUG */
1686        if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
1687                return 0;
1688        if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
1689                return 0;
1690        ifp = XFS_IFORK_PTR(ip, whichfork);
1691        ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1692        ep = xfs_iext_get_ext(ifp, 0);
1693        xfs_bmbt_get_all(ep, &s);
1694        rval = s.br_startoff == 0 && s.br_blockcount == 1;
1695        if (rval && whichfork == XFS_DATA_FORK)
1696                ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
1697        return rval;
1698}
1699
1700/*
1701 * Extent tree manipulation functions used during allocation.
1702 */
1703
1704/*
1705 * Convert a delayed allocation to a real allocation.
1706 */
1707STATIC int                              /* error */
1708xfs_bmap_add_extent_delay_real(
1709        struct xfs_bmalloca     *bma,
1710        int                     whichfork)
1711{
1712        struct xfs_bmbt_irec    *new = &bma->got;
1713        int                     diff;   /* temp value */
1714        xfs_bmbt_rec_host_t     *ep;    /* extent entry for idx */
1715        int                     error;  /* error return value */
1716        int                     i;      /* temp state */
1717        xfs_ifork_t             *ifp;   /* inode fork pointer */
1718        xfs_fileoff_t           new_endoff;     /* end offset of new entry */
1719        xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
1720                                        /* left is 0, right is 1, prev is 2 */
1721        int                     rval=0; /* return value (logging flags) */
1722        int                     state = 0;/* state bits, accessed thru macros */
1723        xfs_filblks_t           da_new; /* new count del alloc blocks used */
1724        xfs_filblks_t           da_old; /* old count del alloc blocks used */
1725        xfs_filblks_t           temp=0; /* value for da_new calculations */
1726        xfs_filblks_t           temp2=0;/* value for da_new calculations */
1727        int                     tmp_rval;       /* partial logging flags */
1728        struct xfs_mount        *mp;
1729        xfs_extnum_t            *nextents;
1730
1731        mp = bma->ip->i_mount;
1732        ifp = XFS_IFORK_PTR(bma->ip, whichfork);
1733        ASSERT(whichfork != XFS_ATTR_FORK);
1734        nextents = (whichfork == XFS_COW_FORK ? &bma->ip->i_cnextents :
1735                                                &bma->ip->i_d.di_nextents);
1736
1737        ASSERT(bma->idx >= 0);
1738        ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
1739        ASSERT(!isnullstartblock(new->br_startblock));
1740        ASSERT(!bma->cur ||
1741               (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
1742
1743        XFS_STATS_INC(mp, xs_add_exlist);
1744
1745#define LEFT            r[0]
1746#define RIGHT           r[1]
1747#define PREV            r[2]
1748
1749        if (whichfork == XFS_COW_FORK)
1750                state |= BMAP_COWFORK;
1751
1752        /*
1753         * Set up a bunch of variables to make the tests simpler.
1754         */
1755        ep = xfs_iext_get_ext(ifp, bma->idx);
1756        xfs_bmbt_get_all(ep, &PREV);
1757        new_endoff = new->br_startoff + new->br_blockcount;
1758        ASSERT(PREV.br_startoff <= new->br_startoff);
1759        ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1760
1761        da_old = startblockval(PREV.br_startblock);
1762        da_new = 0;
1763
1764        /*
1765         * Set flags determining what part of the previous delayed allocation
1766         * extent is being replaced by a real allocation.
1767         */
1768        if (PREV.br_startoff == new->br_startoff)
1769                state |= BMAP_LEFT_FILLING;
1770        if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1771                state |= BMAP_RIGHT_FILLING;
1772
1773        /*
1774         * Check and set flags if this segment has a left neighbor.
1775         * Don't set contiguous if the combined extent would be too large.
1776         */
1777        if (bma->idx > 0) {
1778                state |= BMAP_LEFT_VALID;
1779                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &LEFT);
1780
1781                if (isnullstartblock(LEFT.br_startblock))
1782                        state |= BMAP_LEFT_DELAY;
1783        }
1784
1785        if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1786            LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1787            LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1788            LEFT.br_state == new->br_state &&
1789            LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1790                state |= BMAP_LEFT_CONTIG;
1791
1792        /*
1793         * Check and set flags if this segment has a right neighbor.
1794         * Don't set contiguous if the combined extent would be too large.
1795         * Also check for all-three-contiguous being too large.
1796         */
1797        if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
1798                state |= BMAP_RIGHT_VALID;
1799                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT);
1800
1801                if (isnullstartblock(RIGHT.br_startblock))
1802                        state |= BMAP_RIGHT_DELAY;
1803        }
1804
1805        if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1806            new_endoff == RIGHT.br_startoff &&
1807            new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1808            new->br_state == RIGHT.br_state &&
1809            new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
1810            ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1811                       BMAP_RIGHT_FILLING)) !=
1812                      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1813                       BMAP_RIGHT_FILLING) ||
1814             LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1815                        <= MAXEXTLEN))
1816                state |= BMAP_RIGHT_CONTIG;
1817
1818        error = 0;
1819        /*
1820         * Switch out based on the FILLING and CONTIG state bits.
1821         */
1822        switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1823                         BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1824        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1825             BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1826                /*
1827                 * Filling in all of a previously delayed allocation extent.
1828                 * The left and right neighbors are both contiguous with new.
1829                 */
1830                bma->idx--;
1831                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1832                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
1833                        LEFT.br_blockcount + PREV.br_blockcount +
1834                        RIGHT.br_blockcount);
1835                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1836
1837                xfs_iext_remove(bma->ip, bma->idx + 1, 2, state);
1838                (*nextents)--;
1839                if (bma->cur == NULL)
1840                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1841                else {
1842                        rval = XFS_ILOG_CORE;
1843                        error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
1844                                        RIGHT.br_startblock,
1845                                        RIGHT.br_blockcount, &i);
1846                        if (error)
1847                                goto done;
1848                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1849                        error = xfs_btree_delete(bma->cur, &i);
1850                        if (error)
1851                                goto done;
1852                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1853                        error = xfs_btree_decrement(bma->cur, 0, &i);
1854                        if (error)
1855                                goto done;
1856                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1857                        error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
1858                                        LEFT.br_startblock,
1859                                        LEFT.br_blockcount +
1860                                        PREV.br_blockcount +
1861                                        RIGHT.br_blockcount, LEFT.br_state);
1862                        if (error)
1863                                goto done;
1864                }
1865                break;
1866
1867        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1868                /*
1869                 * Filling in all of a previously delayed allocation extent.
1870                 * The left neighbor is contiguous, the right is not.
1871                 */
1872                bma->idx--;
1873
1874                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1875                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
1876                        LEFT.br_blockcount + PREV.br_blockcount);
1877                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1878
1879                xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
1880                if (bma->cur == NULL)
1881                        rval = XFS_ILOG_DEXT;
1882                else {
1883                        rval = 0;
1884                        error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff,
1885                                        LEFT.br_startblock, LEFT.br_blockcount,
1886                                        &i);
1887                        if (error)
1888                                goto done;
1889                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1890                        error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
1891                                        LEFT.br_startblock,
1892                                        LEFT.br_blockcount +
1893                                        PREV.br_blockcount, LEFT.br_state);
1894                        if (error)
1895                                goto done;
1896                }
1897                break;
1898
1899        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1900                /*
1901                 * Filling in all of a previously delayed allocation extent.
1902                 * The right neighbor is contiguous, the left is not.
1903                 */
1904                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1905                xfs_bmbt_set_startblock(ep, new->br_startblock);
1906                xfs_bmbt_set_blockcount(ep,
1907                        PREV.br_blockcount + RIGHT.br_blockcount);
1908                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1909
1910                xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
1911                if (bma->cur == NULL)
1912                        rval = XFS_ILOG_DEXT;
1913                else {
1914                        rval = 0;
1915                        error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
1916                                        RIGHT.br_startblock,
1917                                        RIGHT.br_blockcount, &i);
1918                        if (error)
1919                                goto done;
1920                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1921                        error = xfs_bmbt_update(bma->cur, PREV.br_startoff,
1922                                        new->br_startblock,
1923                                        PREV.br_blockcount +
1924                                        RIGHT.br_blockcount, PREV.br_state);
1925                        if (error)
1926                                goto done;
1927                }
1928                break;
1929
1930        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1931                /*
1932                 * Filling in all of a previously delayed allocation extent.
1933                 * Neither the left nor right neighbors are contiguous with
1934                 * the new one.
1935                 */
1936                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1937                xfs_bmbt_set_startblock(ep, new->br_startblock);
1938                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1939
1940                (*nextents)++;
1941                if (bma->cur == NULL)
1942                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1943                else {
1944                        rval = XFS_ILOG_CORE;
1945                        error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
1946                                        new->br_startblock, new->br_blockcount,
1947                                        &i);
1948                        if (error)
1949                                goto done;
1950                        XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1951                        bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
1952                        error = xfs_btree_insert(bma->cur, &i);
1953                        if (error)
1954                                goto done;
1955                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1956                }
1957                break;
1958
1959        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1960                /*
1961                 * Filling in the first part of a previous delayed allocation.
1962                 * The left neighbor is contiguous.
1963                 */
1964                trace_xfs_bmap_pre_update(bma->ip, bma->idx - 1, state, _THIS_IP_);
1965                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx - 1),
1966                        LEFT.br_blockcount + new->br_blockcount);
1967                xfs_bmbt_set_startoff(ep,
1968                        PREV.br_startoff + new->br_blockcount);
1969                trace_xfs_bmap_post_update(bma->ip, bma->idx - 1, state, _THIS_IP_);
1970
1971                temp = PREV.br_blockcount - new->br_blockcount;
1972                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1973                xfs_bmbt_set_blockcount(ep, temp);
1974                if (bma->cur == NULL)
1975                        rval = XFS_ILOG_DEXT;
1976                else {
1977                        rval = 0;
1978                        error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff,
1979                                        LEFT.br_startblock, LEFT.br_blockcount,
1980                                        &i);
1981                        if (error)
1982                                goto done;
1983                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1984                        error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
1985                                        LEFT.br_startblock,
1986                                        LEFT.br_blockcount +
1987                                        new->br_blockcount,
1988                                        LEFT.br_state);
1989                        if (error)
1990                                goto done;
1991                }
1992                da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1993                        startblockval(PREV.br_startblock));
1994                xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
1995                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1996
1997                bma->idx--;
1998                break;
1999
2000        case BMAP_LEFT_FILLING:
2001                /*
2002                 * Filling in the first part of a previous delayed allocation.
2003                 * The left neighbor is not contiguous.
2004                 */
2005                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
2006                xfs_bmbt_set_startoff(ep, new_endoff);
2007                temp = PREV.br_blockcount - new->br_blockcount;
2008                xfs_bmbt_set_blockcount(ep, temp);
2009                xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
2010                (*nextents)++;
2011                if (bma->cur == NULL)
2012                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2013                else {
2014                        rval = XFS_ILOG_CORE;
2015                        error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
2016                                        new->br_startblock, new->br_blockcount,
2017                                        &i);
2018                        if (error)
2019                                goto done;
2020                        XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2021                        bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2022                        error = xfs_btree_insert(bma->cur, &i);
2023                        if (error)
2024                                goto done;
2025                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2026                }
2027
2028                if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
2029                        error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2030                                        bma->firstblock, bma->dfops,
2031                                        &bma->cur, 1, &tmp_rval, whichfork);
2032                        rval |= tmp_rval;
2033                        if (error)
2034                                goto done;
2035                }
2036                da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
2037                        startblockval(PREV.br_startblock) -
2038                        (bma->cur ? bma->cur->bc_private.b.allocated : 0));
2039                ep = xfs_iext_get_ext(ifp, bma->idx + 1);
2040                xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
2041                trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
2042                break;
2043
2044        case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2045                /*
2046                 * Filling in the last part of a previous delayed allocation.
2047                 * The right neighbor is contiguous with the new allocation.
2048                 */
2049                temp = PREV.br_blockcount - new->br_blockcount;
2050                trace_xfs_bmap_pre_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
2051                xfs_bmbt_set_blockcount(ep, temp);
2052                xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx + 1),
2053                        new->br_startoff, new->br_startblock,
2054                        new->br_blockcount + RIGHT.br_blockcount,
2055                        RIGHT.br_state);
2056                trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
2057                if (bma->cur == NULL)
2058                        rval = XFS_ILOG_DEXT;
2059                else {
2060                        rval = 0;
2061                        error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
2062                                        RIGHT.br_startblock,
2063                                        RIGHT.br_blockcount, &i);
2064                        if (error)
2065                                goto done;
2066                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2067                        error = xfs_bmbt_update(bma->cur, new->br_startoff,
2068                                        new->br_startblock,
2069                                        new->br_blockcount +
2070                                        RIGHT.br_blockcount,
2071                                        RIGHT.br_state);
2072                        if (error)
2073                                goto done;
2074                }
2075
2076                da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
2077                        startblockval(PREV.br_startblock));
2078                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
2079                xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
2080                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
2081
2082                bma->idx++;
2083                break;
2084
2085        case BMAP_RIGHT_FILLING:
2086                /*
2087                 * Filling in the last part of a previous delayed allocation.
2088                 * The right neighbor is not contiguous.
2089                 */
2090                temp = PREV.br_blockcount - new->br_blockcount;
2091                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
2092                xfs_bmbt_set_blockcount(ep, temp);
2093                xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state);
2094                (*nextents)++;
2095                if (bma->cur == NULL)
2096                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2097                else {
2098                        rval = XFS_ILOG_CORE;
2099                        error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
2100                                        new->br_startblock, new->br_blockcount,
2101                                        &i);
2102                        if (error)
2103                                goto done;
2104                        XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2105                        bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2106                        error = xfs_btree_insert(bma->cur, &i);
2107                        if (error)
2108                                goto done;
2109                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2110                }
2111
2112                if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
2113                        error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2114                                bma->firstblock, bma->dfops, &bma->cur, 1,
2115                                &tmp_rval, whichfork);
2116                        rval |= tmp_rval;
2117                        if (error)
2118                                goto done;
2119                }
2120                da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
2121                        startblockval(PREV.br_startblock) -
2122                        (bma->cur ? bma->cur->bc_private.b.allocated : 0));
2123                ep = xfs_iext_get_ext(ifp, bma->idx);
2124                xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
2125                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
2126
2127                bma->idx++;
2128                break;
2129
2130        case 0:
2131                /*
2132                 * Filling in the middle part of a previous delayed allocation.
2133                 * Contiguity is impossible here.
2134                 * This case is avoided almost all the time.
2135                 *
2136                 * We start with a delayed allocation:
2137                 *
2138                 * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
2139                 *  PREV @ idx
2140                 *
2141                 * and we are allocating:
2142                 *                     +rrrrrrrrrrrrrrrrr+
2143                 *                            new
2144                 *
2145                 * and we set it up for insertion as:
2146                 * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
2147                 *                            new
2148                 *  PREV @ idx          LEFT              RIGHT
2149                 *                      inserted at idx + 1
2150                 */
2151                temp = new->br_startoff - PREV.br_startoff;
2152                temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
2153                trace_xfs_bmap_pre_update(bma->ip, bma->idx, 0, _THIS_IP_);
2154                xfs_bmbt_set_blockcount(ep, temp);      /* truncate PREV */
2155                LEFT = *new;
2156                RIGHT.br_state = PREV.br_state;
2157                RIGHT.br_startblock = nullstartblock(
2158                                (int)xfs_bmap_worst_indlen(bma->ip, temp2));
2159                RIGHT.br_startoff = new_endoff;
2160                RIGHT.br_blockcount = temp2;
2161                /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */
2162                xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state);
2163                (*nextents)++;
2164                if (bma->cur == NULL)
2165                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2166                else {
2167                        rval = XFS_ILOG_CORE;
2168                        error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
2169                                        new->br_startblock, new->br_blockcount,
2170                                        &i);
2171                        if (error)
2172                                goto done;
2173                        XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2174                        bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2175                        error = xfs_btree_insert(bma->cur, &i);
2176                        if (error)
2177                                goto done;
2178                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2179                }
2180
2181                if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
2182                        error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2183                                        bma->firstblock, bma->dfops, &bma->cur,
2184                                        1, &tmp_rval, whichfork);
2185                        rval |= tmp_rval;
2186                        if (error)
2187                                goto done;
2188                }
2189                temp = xfs_bmap_worst_indlen(bma->ip, temp);
2190                temp2 = xfs_bmap_worst_indlen(bma->ip, temp2);
2191                diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
2192                        (bma->cur ? bma->cur->bc_private.b.allocated : 0));
2193                if (diff > 0) {
2194                        error = xfs_mod_fdblocks(bma->ip->i_mount,
2195                                                 -((int64_t)diff), false);
2196                        ASSERT(!error);
2197                        if (error)
2198                                goto done;
2199                }
2200
2201                ep = xfs_iext_get_ext(ifp, bma->idx);
2202                xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
2203                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
2204                trace_xfs_bmap_pre_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
2205                xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, bma->idx + 2),
2206                        nullstartblock((int)temp2));
2207                trace_xfs_bmap_post_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
2208
2209                bma->idx++;
2210                da_new = temp + temp2;
2211                break;
2212
2213        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2214        case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2215        case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2216        case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2217        case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2218        case BMAP_LEFT_CONTIG:
2219        case BMAP_RIGHT_CONTIG:
2220                /*
2221                 * These cases are all impossible.
2222                 */
2223                ASSERT(0);
2224        }
2225
2226        /* add reverse mapping */
2227        error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, whichfork, new);
2228        if (error)
2229                goto done;
2230
2231        /* convert to a btree if necessary */
2232        if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
2233                int     tmp_logflags;   /* partial log flag return val */
2234
2235                ASSERT(bma->cur == NULL);
2236                error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2237                                bma->firstblock, bma->dfops, &bma->cur,
2238                                da_old > 0, &tmp_logflags, whichfork);
2239                bma->logflags |= tmp_logflags;
2240                if (error)
2241                        goto done;
2242        }
2243
2244        /* adjust for changes in reserved delayed indirect blocks */
2245        if (da_old || da_new) {
2246                temp = da_new;
2247                if (bma->cur)
2248                        temp += bma->cur->bc_private.b.allocated;
2249                ASSERT(temp <= da_old);
2250                if (temp < da_old)
2251                        xfs_mod_fdblocks(bma->ip->i_mount,
2252                                        (int64_t)(da_old - temp), false);
2253        }
2254
2255        /* clear out the allocated field, done with it now in any case. */
2256        if (bma->cur)
2257                bma->cur->bc_private.b.allocated = 0;
2258
2259        xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
2260done:
2261        if (whichfork != XFS_COW_FORK)
2262                bma->logflags |= rval;
2263        return error;
2264#undef  LEFT
2265#undef  RIGHT
2266#undef  PREV
2267}
2268
2269/*
2270 * Convert an unwritten allocation to a real allocation or vice versa.
2271 */
2272STATIC int                              /* error */
2273xfs_bmap_add_extent_unwritten_real(
2274        struct xfs_trans        *tp,
2275        xfs_inode_t             *ip,    /* incore inode pointer */
2276        xfs_extnum_t            *idx,   /* extent number to update/insert */
2277        xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
2278        xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
2279        xfs_fsblock_t           *first, /* pointer to firstblock variable */
2280        struct xfs_defer_ops    *dfops, /* list of extents to be freed */
2281        int                     *logflagsp) /* inode logging flags */
2282{
2283        xfs_btree_cur_t         *cur;   /* btree cursor */
2284        xfs_bmbt_rec_host_t     *ep;    /* extent entry for idx */
2285        int                     error;  /* error return value */
2286        int                     i;      /* temp state */
2287        xfs_ifork_t             *ifp;   /* inode fork pointer */
2288        xfs_fileoff_t           new_endoff;     /* end offset of new entry */
2289        xfs_exntst_t            newext; /* new extent state */
2290        xfs_exntst_t            oldext; /* old extent state */
2291        xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
2292                                        /* left is 0, right is 1, prev is 2 */
2293        int                     rval=0; /* return value (logging flags) */
2294        int                     state = 0;/* state bits, accessed thru macros */
2295        struct xfs_mount        *mp = tp->t_mountp;
2296
2297        *logflagsp = 0;
2298
2299        cur = *curp;
2300        ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
2301
2302        ASSERT(*idx >= 0);
2303        ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
2304        ASSERT(!isnullstartblock(new->br_startblock));
2305
2306        XFS_STATS_INC(mp, xs_add_exlist);
2307
2308#define LEFT            r[0]
2309#define RIGHT           r[1]
2310#define PREV            r[2]
2311
2312        /*
2313         * Set up a bunch of variables to make the tests simpler.
2314         */
2315        error = 0;
2316        ep = xfs_iext_get_ext(ifp, *idx);
2317        xfs_bmbt_get_all(ep, &PREV);
2318        newext = new->br_state;
2319        oldext = (newext == XFS_EXT_UNWRITTEN) ?
2320                XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
2321        ASSERT(PREV.br_state == oldext);
2322        new_endoff = new->br_startoff + new->br_blockcount;
2323        ASSERT(PREV.br_startoff <= new->br_startoff);
2324        ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
2325
2326        /*
2327         * Set flags determining what part of the previous oldext allocation
2328         * extent is being replaced by a newext allocation.
2329         */
2330        if (PREV.br_startoff == new->br_startoff)
2331                state |= BMAP_LEFT_FILLING;
2332        if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
2333                state |= BMAP_RIGHT_FILLING;
2334
2335        /*
2336         * Check and set flags if this segment has a left neighbor.
2337         * Don't set contiguous if the combined extent would be too large.
2338         */
2339        if (*idx > 0) {
2340                state |= BMAP_LEFT_VALID;
2341                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT);
2342
2343                if (isnullstartblock(LEFT.br_startblock))
2344                        state |= BMAP_LEFT_DELAY;
2345        }
2346
2347        if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2348            LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
2349            LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
2350            LEFT.br_state == newext &&
2351            LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2352                state |= BMAP_LEFT_CONTIG;
2353
2354        /*
2355         * Check and set flags if this segment has a right neighbor.
2356         * Don't set contiguous if the combined extent would be too large.
2357         * Also check for all-three-contiguous being too large.
2358         */
2359        if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
2360                state |= BMAP_RIGHT_VALID;
2361                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
2362                if (isnullstartblock(RIGHT.br_startblock))
2363                        state |= BMAP_RIGHT_DELAY;
2364        }
2365
2366        if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2367            new_endoff == RIGHT.br_startoff &&
2368            new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
2369            newext == RIGHT.br_state &&
2370            new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
2371            ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2372                       BMAP_RIGHT_FILLING)) !=
2373                      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2374                       BMAP_RIGHT_FILLING) ||
2375             LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
2376                        <= MAXEXTLEN))
2377                state |= BMAP_RIGHT_CONTIG;
2378
2379        /*
2380         * Switch out based on the FILLING and CONTIG state bits.
2381         */
2382        switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2383                         BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
2384        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2385             BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2386                /*
2387                 * Setting all of a previous oldext extent to newext.
2388                 * The left and right neighbors are both contiguous with new.
2389                 */
2390                --*idx;
2391
2392                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2393                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
2394                        LEFT.br_blockcount + PREV.br_blockcount +
2395                        RIGHT.br_blockcount);
2396                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2397
2398                xfs_iext_remove(ip, *idx + 1, 2, state);
2399                ip->i_d.di_nextents -= 2;
2400                if (cur == NULL)
2401                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2402                else {
2403                        rval = XFS_ILOG_CORE;
2404                        if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
2405                                        RIGHT.br_startblock,
2406                                        RIGHT.br_blockcount, &i)))
2407                                goto done;
2408                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2409                        if ((error = xfs_btree_delete(cur, &i)))
2410                                goto done;
2411                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2412                        if ((error = xfs_btree_decrement(cur, 0, &i)))
2413                                goto done;
2414                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2415                        if ((error = xfs_btree_delete(cur, &i)))
2416                                goto done;
2417                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2418                        if ((error = xfs_btree_decrement(cur, 0, &i)))
2419                                goto done;
2420                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2421                        if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
2422                                LEFT.br_startblock,
2423                                LEFT.br_blockcount + PREV.br_blockcount +
2424                                RIGHT.br_blockcount, LEFT.br_state)))
2425                                goto done;
2426                }
2427                break;
2428
2429        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2430                /*
2431                 * Setting all of a previous oldext extent to newext.
2432                 * The left neighbor is contiguous, the right is not.
2433                 */
2434                --*idx;
2435
2436                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2437                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
2438                        LEFT.br_blockcount + PREV.br_blockcount);
2439                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2440
2441                xfs_iext_remove(ip, *idx + 1, 1, state);
2442                ip->i_d.di_nextents--;
2443                if (cur == NULL)
2444                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2445                else {
2446                        rval = XFS_ILOG_CORE;
2447                        if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2448                                        PREV.br_startblock, PREV.br_blockcount,
2449                                        &i)))
2450                                goto done;
2451                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2452                        if ((error = xfs_btree_delete(cur, &i)))
2453                                goto done;
2454                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2455                        if ((error = xfs_btree_decrement(cur, 0, &i)))
2456                                goto done;
2457                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2458                        if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
2459                                LEFT.br_startblock,
2460                                LEFT.br_blockcount + PREV.br_blockcount,
2461                                LEFT.br_state)))
2462                                goto done;
2463                }
2464                break;
2465
2466        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2467                /*
2468                 * Setting all of a previous oldext extent to newext.
2469                 * The right neighbor is contiguous, the left is not.
2470                 */
2471                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2472                xfs_bmbt_set_blockcount(ep,
2473                        PREV.br_blockcount + RIGHT.br_blockcount);
2474                xfs_bmbt_set_state(ep, newext);
2475                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2476                xfs_iext_remove(ip, *idx + 1, 1, state);
2477                ip->i_d.di_nextents--;
2478                if (cur == NULL)
2479                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2480                else {
2481                        rval = XFS_ILOG_CORE;
2482                        if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
2483                                        RIGHT.br_startblock,
2484                                        RIGHT.br_blockcount, &i)))
2485                                goto done;
2486                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2487                        if ((error = xfs_btree_delete(cur, &i)))
2488                                goto done;
2489                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2490                        if ((error = xfs_btree_decrement(cur, 0, &i)))
2491                                goto done;
2492                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2493                        if ((error = xfs_bmbt_update(cur, new->br_startoff,
2494                                new->br_startblock,
2495                                new->br_blockcount + RIGHT.br_blockcount,
2496                                newext)))
2497                                goto done;
2498                }
2499                break;
2500
2501        case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
2502                /*
2503                 * Setting all of a previous oldext extent to newext.
2504                 * Neither the left nor right neighbors are contiguous with
2505                 * the new one.
2506                 */
2507                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2508                xfs_bmbt_set_state(ep, newext);
2509                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2510
2511                if (cur == NULL)
2512                        rval = XFS_ILOG_DEXT;
2513                else {
2514                        rval = 0;
2515                        if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
2516                                        new->br_startblock, new->br_blockcount,
2517                                        &i)))
2518                                goto done;
2519                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2520                        if ((error = xfs_bmbt_update(cur, new->br_startoff,
2521                                new->br_startblock, new->br_blockcount,
2522                                newext)))
2523                                goto done;
2524                }
2525                break;
2526
2527        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
2528                /*
2529                 * Setting the first part of a previous oldext extent to newext.
2530                 * The left neighbor is contiguous.
2531                 */
2532                trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_);
2533                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1),
2534                        LEFT.br_blockcount + new->br_blockcount);
2535                xfs_bmbt_set_startoff(ep,
2536                        PREV.br_startoff + new->br_blockcount);
2537                trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_);
2538
2539                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2540                xfs_bmbt_set_startblock(ep,
2541                        new->br_startblock + new->br_blockcount);
2542                xfs_bmbt_set_blockcount(ep,
2543                        PREV.br_blockcount - new->br_blockcount);
2544                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2545
2546                --*idx;
2547
2548                if (cur == NULL)
2549                        rval = XFS_ILOG_DEXT;
2550                else {
2551                        rval = 0;
2552                        if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2553                                        PREV.br_startblock, PREV.br_blockcount,
2554                                        &i)))
2555                                goto done;
2556                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2557                        if ((error = xfs_bmbt_update(cur,
2558                                PREV.br_startoff + new->br_blockcount,
2559                                PREV.br_startblock + new->br_blockcount,
2560                                PREV.br_blockcount - new->br_blockcount,
2561                                oldext)))
2562                                goto done;
2563                        if ((error = xfs_btree_decrement(cur, 0, &i)))
2564                                goto done;
2565                        error = xfs_bmbt_update(cur, LEFT.br_startoff,
2566                                LEFT.br_startblock,
2567                                LEFT.br_blockcount + new->br_blockcount,
2568                                LEFT.br_state);
2569                        if (error)
2570                                goto done;
2571                }
2572                break;
2573
2574        case BMAP_LEFT_FILLING:
2575                /*
2576                 * Setting the first part of a previous oldext extent to newext.
2577                 * The left neighbor is not contiguous.
2578                 */
2579                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2580                ASSERT(ep && xfs_bmbt_get_state(ep) == oldext);
2581                xfs_bmbt_set_startoff(ep, new_endoff);
2582                xfs_bmbt_set_blockcount(ep,
2583                        PREV.br_blockcount - new->br_blockcount);
2584                xfs_bmbt_set_startblock(ep,
2585                        new->br_startblock + new->br_blockcount);
2586                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2587
2588                xfs_iext_insert(ip, *idx, 1, new, state);
2589                ip->i_d.di_nextents++;
2590                if (cur == NULL)
2591                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2592                else {
2593                        rval = XFS_ILOG_CORE;
2594                        if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2595                                        PREV.br_startblock, PREV.br_blockcount,
2596                                        &i)))
2597                                goto done;
2598                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2599                        if ((error = xfs_bmbt_update(cur,
2600                                PREV.br_startoff + new->br_blockcount,
2601                                PREV.br_startblock + new->br_blockcount,
2602                                PREV.br_blockcount - new->br_blockcount,
2603                                oldext)))
2604                                goto done;
2605                        cur->bc_rec.b = *new;
2606                        if ((error = xfs_btree_insert(cur, &i)))
2607                                goto done;
2608                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2609                }
2610                break;
2611
2612        case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2613                /*
2614                 * Setting the last part of a previous oldext extent to newext.
2615                 * The right neighbor is contiguous with the new allocation.
2616                 */
2617                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2618                xfs_bmbt_set_blockcount(ep,
2619                        PREV.br_blockcount - new->br_blockcount);
2620                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2621
2622                ++*idx;
2623
2624                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2625                xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
2626                        new->br_startoff, new->br_startblock,
2627                        new->br_blockcount + RIGHT.br_blockcount, newext);
2628                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2629
2630                if (cur == NULL)
2631                        rval = XFS_ILOG_DEXT;
2632                else {
2633                        rval = 0;
2634                        if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2635                                        PREV.br_startblock,
2636                                        PREV.br_blockcount, &i)))
2637                                goto done;
2638                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2639                        if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
2640                                PREV.br_startblock,
2641                                PREV.br_blockcount - new->br_blockcount,
2642                                oldext)))
2643                                goto done;
2644                        if ((error = xfs_btree_increment(cur, 0, &i)))
2645                                goto done;
2646                        if ((error = xfs_bmbt_update(cur, new->br_startoff,
2647                                new->br_startblock,
2648                                new->br_blockcount + RIGHT.br_blockcount,
2649                                newext)))
2650                                goto done;
2651                }
2652                break;
2653
2654        case BMAP_RIGHT_FILLING:
2655                /*
2656                 * Setting the last part of a previous oldext extent to newext.
2657                 * The right neighbor is not contiguous.
2658                 */
2659                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2660                xfs_bmbt_set_blockcount(ep,
2661                        PREV.br_blockcount - new->br_blockcount);
2662                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2663
2664                ++*idx;
2665                xfs_iext_insert(ip, *idx, 1, new, state);
2666
2667                ip->i_d.di_nextents++;
2668                if (cur == NULL)
2669                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2670                else {
2671                        rval = XFS_ILOG_CORE;
2672                        if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2673                                        PREV.br_startblock, PREV.br_blockcount,
2674                                        &i)))
2675                                goto done;
2676                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2677                        if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
2678                                PREV.br_startblock,
2679                                PREV.br_blockcount - new->br_blockcount,
2680                                oldext)))
2681                                goto done;
2682                        if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
2683                                        new->br_startblock, new->br_blockcount,
2684                                        &i)))
2685                                goto done;
2686                        XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2687                        cur->bc_rec.b.br_state = XFS_EXT_NORM;
2688                        if ((error = xfs_btree_insert(cur, &i)))
2689                                goto done;
2690                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2691                }
2692                break;
2693
2694        case 0:
2695                /*
2696                 * Setting the middle part of a previous oldext extent to
2697                 * newext.  Contiguity is impossible here.
2698                 * One extent becomes three extents.
2699                 */
2700                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2701                xfs_bmbt_set_blockcount(ep,
2702                        new->br_startoff - PREV.br_startoff);
2703                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2704
2705                r[0] = *new;
2706                r[1].br_startoff = new_endoff;
2707                r[1].br_blockcount =
2708                        PREV.br_startoff + PREV.br_blockcount - new_endoff;
2709                r[1].br_startblock = new->br_startblock + new->br_blockcount;
2710                r[1].br_state = oldext;
2711
2712                ++*idx;
2713                xfs_iext_insert(ip, *idx, 2, &r[0], state);
2714
2715                ip->i_d.di_nextents += 2;
2716                if (cur == NULL)
2717                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2718                else {
2719                        rval = XFS_ILOG_CORE;
2720                        if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2721                                        PREV.br_startblock, PREV.br_blockcount,
2722                                        &i)))
2723                                goto done;
2724                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2725                        /* new right extent - oldext */
2726                        if ((error = xfs_bmbt_update(cur, r[1].br_startoff,
2727                                r[1].br_startblock, r[1].br_blockcount,
2728                                r[1].br_state)))
2729                                goto done;
2730                        /* new left extent - oldext */
2731                        cur->bc_rec.b = PREV;
2732                        cur->bc_rec.b.br_blockcount =
2733                                new->br_startoff - PREV.br_startoff;
2734                        if ((error = xfs_btree_insert(cur, &i)))
2735                                goto done;
2736                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2737                        /*
2738                         * Reset the cursor to the position of the new extent
2739                         * we are about to insert as we can't trust it after
2740                         * the previous insert.
2741                         */
2742                        if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
2743                                        new->br_startblock, new->br_blockcount,
2744                                        &i)))
2745                                goto done;
2746                        XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2747                        /* new middle extent - newext */
2748                        cur->bc_rec.b.br_state = new->br_state;
2749                        if ((error = xfs_btree_insert(cur, &i)))
2750                                goto done;
2751                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2752                }
2753                break;
2754
2755        case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2756        case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2757        case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2758        case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2759        case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2760        case BMAP_LEFT_CONTIG:
2761        case BMAP_RIGHT_CONTIG:
2762                /*
2763                 * These cases are all impossible.
2764                 */
2765                ASSERT(0);
2766        }
2767
2768        /* update reverse mappings */
2769        error = xfs_rmap_convert_extent(mp, dfops, ip, XFS_DATA_FORK, new);
2770        if (error)
2771                goto done;
2772
2773        /* convert to a btree if necessary */
2774        if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) {
2775                int     tmp_logflags;   /* partial log flag return val */
2776
2777                ASSERT(cur == NULL);
2778                error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur,
2779                                0, &tmp_logflags, XFS_DATA_FORK);
2780                *logflagsp |= tmp_logflags;
2781                if (error)
2782                        goto done;
2783        }
2784
2785        /* clear out the allocated field, done with it now in any case. */
2786        if (cur) {
2787                cur->bc_private.b.allocated = 0;
2788                *curp = cur;
2789        }
2790
2791        xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK);
2792done:
2793        *logflagsp |= rval;
2794        return error;
2795#undef  LEFT
2796#undef  RIGHT
2797#undef  PREV
2798}
2799
2800/*
2801 * Convert a hole to a delayed allocation.
2802 */
2803STATIC void
2804xfs_bmap_add_extent_hole_delay(
2805        xfs_inode_t             *ip,    /* incore inode pointer */
2806        int                     whichfork,
2807        xfs_extnum_t            *idx,   /* extent number to update/insert */
2808        xfs_bmbt_irec_t         *new)   /* new data to add to file extents */
2809{
2810        xfs_ifork_t             *ifp;   /* inode fork pointer */
2811        xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
2812        xfs_filblks_t           newlen=0;       /* new indirect size */
2813        xfs_filblks_t           oldlen=0;       /* old indirect size */
2814        xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
2815        int                     state;  /* state bits, accessed thru macros */
2816        xfs_filblks_t           temp=0; /* temp for indirect calculations */
2817
2818        ifp = XFS_IFORK_PTR(ip, whichfork);
2819        state = 0;
2820        if (whichfork == XFS_COW_FORK)
2821                state |= BMAP_COWFORK;
2822        ASSERT(isnullstartblock(new->br_startblock));
2823
2824        /*
2825         * Check and set flags if this segment has a left neighbor
2826         */
2827        if (*idx > 0) {
2828                state |= BMAP_LEFT_VALID;
2829                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left);
2830
2831                if (isnullstartblock(left.br_startblock))
2832                        state |= BMAP_LEFT_DELAY;
2833        }
2834
2835        /*
2836         * Check and set flags if the current (right) segment exists.
2837         * If it doesn't exist, we're converting the hole at end-of-file.
2838         */
2839        if (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
2840                state |= BMAP_RIGHT_VALID;
2841                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
2842
2843                if (isnullstartblock(right.br_startblock))
2844                        state |= BMAP_RIGHT_DELAY;
2845        }
2846
2847        /*
2848         * Set contiguity flags on the left and right neighbors.
2849         * Don't let extents get too large, even if the pieces are contiguous.
2850         */
2851        if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
2852            left.br_startoff + left.br_blockcount == new->br_startoff &&
2853            left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2854                state |= BMAP_LEFT_CONTIG;
2855
2856        if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
2857            new->br_startoff + new->br_blockcount == right.br_startoff &&
2858            new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2859            (!(state & BMAP_LEFT_CONTIG) ||
2860             (left.br_blockcount + new->br_blockcount +
2861              right.br_blockcount <= MAXEXTLEN)))
2862                state |= BMAP_RIGHT_CONTIG;
2863
2864        /*
2865         * Switch out based on the contiguity flags.
2866         */
2867        switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2868        case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2869                /*
2870                 * New allocation is contiguous with delayed allocations
2871                 * on the left and on the right.
2872                 * Merge all three into a single extent record.
2873                 */
2874                --*idx;
2875                temp = left.br_blockcount + new->br_blockcount +
2876                        right.br_blockcount;
2877
2878                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2879                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
2880                oldlen = startblockval(left.br_startblock) +
2881                        startblockval(new->br_startblock) +
2882                        startblockval(right.br_startblock);
2883                newlen = xfs_bmap_worst_indlen(ip, temp);
2884                xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
2885                        nullstartblock((int)newlen));
2886                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2887
2888                xfs_iext_remove(ip, *idx + 1, 1, state);
2889                break;
2890
2891        case BMAP_LEFT_CONTIG:
2892                /*
2893                 * New allocation is contiguous with a delayed allocation
2894                 * on the left.
2895                 * Merge the new allocation with the left neighbor.
2896                 */
2897                --*idx;
2898                temp = left.br_blockcount + new->br_blockcount;
2899
2900                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2901                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
2902                oldlen = startblockval(left.br_startblock) +
2903                        startblockval(new->br_startblock);
2904                newlen = xfs_bmap_worst_indlen(ip, temp);
2905                xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
2906                        nullstartblock((int)newlen));
2907                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2908                break;
2909
2910        case BMAP_RIGHT_CONTIG:
2911                /*
2912                 * New allocation is contiguous with a delayed allocation
2913                 * on the right.
2914                 * Merge the new allocation with the right neighbor.
2915                 */
2916                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2917                temp = new->br_blockcount + right.br_blockcount;
2918                oldlen = startblockval(new->br_startblock) +
2919                        startblockval(right.br_startblock);
2920                newlen = xfs_bmap_worst_indlen(ip, temp);
2921                xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
2922                        new->br_startoff,
2923                        nullstartblock((int)newlen), temp, right.br_state);
2924                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2925                break;
2926
2927        case 0:
2928                /*
2929                 * New allocation is not contiguous with another
2930                 * delayed allocation.
2931                 * Insert a new entry.
2932                 */
2933                oldlen = newlen = 0;
2934                xfs_iext_insert(ip, *idx, 1, new, state);
2935                break;
2936        }
2937        if (oldlen != newlen) {
2938                ASSERT(oldlen > newlen);
2939                xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
2940                                 false);
2941                /*
2942                 * Nothing to do for disk quota accounting here.
2943                 */
2944        }
2945}
2946
2947/*
2948 * Convert a hole to a real allocation.
2949 */
2950STATIC int                              /* error */
2951xfs_bmap_add_extent_hole_real(
2952        struct xfs_bmalloca     *bma,
2953        int                     whichfork)
2954{
2955        struct xfs_bmbt_irec    *new = &bma->got;
2956        int                     error;  /* error return value */
2957        int                     i;      /* temp state */
2958        xfs_ifork_t             *ifp;   /* inode fork pointer */
2959        xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
2960        xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
2961        int                     rval=0; /* return value (logging flags) */
2962        int                     state;  /* state bits, accessed thru macros */
2963        struct xfs_mount        *mp;
2964
2965        mp = bma->ip->i_mount;
2966        ifp = XFS_IFORK_PTR(bma->ip, whichfork);
2967
2968        ASSERT(bma->idx >= 0);
2969        ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
2970        ASSERT(!isnullstartblock(new->br_startblock));
2971        ASSERT(!bma->cur ||
2972               !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
2973        ASSERT(whichfork != XFS_COW_FORK);
2974
2975        XFS_STATS_INC(mp, xs_add_exlist);
2976
2977        state = 0;
2978        if (whichfork == XFS_ATTR_FORK)
2979                state |= BMAP_ATTRFORK;
2980
2981        /*
2982         * Check and set flags if this segment has a left neighbor.
2983         */
2984        if (bma->idx > 0) {
2985                state |= BMAP_LEFT_VALID;
2986                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &left);
2987                if (isnullstartblock(left.br_startblock))
2988                        state |= BMAP_LEFT_DELAY;
2989        }
2990
2991        /*
2992         * Check and set flags if this segment has a current value.
2993         * Not true if we're inserting into the "hole" at eof.
2994         */
2995        if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
2996                state |= BMAP_RIGHT_VALID;
2997                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right);
2998                if (isnullstartblock(right.br_startblock))
2999                        state |= BMAP_RIGHT_DELAY;
3000        }
3001
3002        /*
3003         * We're inserting a real allocation between "left" and "right".
3004         * Set the contiguity flags.  Don't let extents get too large.
3005         */
3006        if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
3007            left.br_startoff + left.br_blockcount == new->br_startoff &&
3008            left.br_startblock + left.br_blockcount == new->br_startblock &&
3009            left.br_state == new->br_state &&
3010            left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
3011                state |= BMAP_LEFT_CONTIG;
3012
3013        if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
3014            new->br_startoff + new->br_blockcount == right.br_startoff &&
3015            new->br_startblock + new->br_blockcount == right.br_startblock &&
3016            new->br_state == right.br_state &&
3017            new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
3018            (!(state & BMAP_LEFT_CONTIG) ||
3019             left.br_blockcount + new->br_blockcount +
3020             right.br_blockcount <= MAXEXTLEN))
3021                state |= BMAP_RIGHT_CONTIG;
3022
3023        error = 0;
3024        /*
3025         * Select which case we're in here, and implement it.
3026         */
3027        switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
3028        case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
3029                /*
3030                 * New allocation is contiguous with real allocations on the
3031                 * left and on the right.
3032                 * Merge all three into a single extent record.
3033                 */
3034                --bma->idx;
3035                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
3036                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
3037                        left.br_blockcount + new->br_blockcount +
3038                        right.br_blockcount);
3039                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
3040
3041                xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
3042
3043                XFS_IFORK_NEXT_SET(bma->ip, whichfork,
3044                        XFS_IFORK_NEXTENTS(bma->ip, whichfork) - 1);
3045                if (bma->cur == NULL) {
3046                        rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
3047                } else {
3048                        rval = XFS_ILOG_CORE;
3049                        error = xfs_bmbt_lookup_eq(bma->cur, right.br_startoff,
3050                                        right.br_startblock, right.br_blockcount,
3051                                        &i);
3052                        if (error)
3053                                goto done;
3054                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3055                        error = xfs_btree_delete(bma->cur, &i);
3056                        if (error)
3057                                goto done;
3058                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3059                        error = xfs_btree_decrement(bma->cur, 0, &i);
3060                        if (error)
3061                                goto done;
3062                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3063                        error = xfs_bmbt_update(bma->cur, left.br_startoff,
3064                                        left.br_startblock,
3065                                        left.br_blockcount +
3066                                                new->br_blockcount +
3067                                                right.br_blockcount,
3068                                        left.br_state);
3069                        if (error)
3070                                goto done;
3071                }
3072                break;
3073
3074        case BMAP_LEFT_CONTIG:
3075                /*
3076                 * New allocation is contiguous with a real allocation
3077                 * on the left.
3078                 * Merge the new allocation with the left neighbor.
3079                 */
3080                --bma->idx;
3081                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
3082                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
3083                        left.br_blockcount + new->br_blockcount);
3084                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
3085
3086                if (bma->cur == NULL) {
3087                        rval = xfs_ilog_fext(whichfork);
3088                } else {
3089                        rval = 0;
3090                        error = xfs_bmbt_lookup_eq(bma->cur, left.br_startoff,
3091                                        left.br_startblock, left.br_blockcount,
3092                                        &i);
3093                        if (error)
3094                                goto done;
3095                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3096                        error = xfs_bmbt_update(bma->cur, left.br_startoff,
3097                                        left.br_startblock,
3098                                        left.br_blockcount +
3099                                                new->br_blockcount,
3100                                        left.br_state);
3101                        if (error)
3102                                goto done;
3103                }
3104                break;
3105
3106        case BMAP_RIGHT_CONTIG:
3107                /*
3108                 * New allocation is contiguous with a real allocation
3109                 * on the right.
3110                 * Merge the new allocation with the right neighbor.
3111                 */
3112                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
3113                xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx),
3114                        new->br_startoff, new->br_startblock,
3115                        new->br_blockcount + right.br_blockcount,
3116                        right.br_state);
3117                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
3118
3119                if (bma->cur == NULL) {
3120                        rval = xfs_ilog_fext(whichfork);
3121                } else {
3122                        rval = 0;
3123                        error = xfs_bmbt_lookup_eq(bma->cur,
3124                                        right.br_startoff,
3125                                        right.br_startblock,
3126                                        right.br_blockcount, &i);
3127                        if (error)
3128                                goto done;
3129                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3130                        error = xfs_bmbt_update(bma->cur, new->br_startoff,
3131                                        new->br_startblock,
3132                                        new->br_blockcount +
3133                                                right.br_blockcount,
3134                                        right.br_state);
3135                        if (error)
3136                                goto done;
3137                }
3138                break;
3139
3140        case 0:
3141                /*
3142                 * New allocation is not contiguous with another
3143                 * real allocation.
3144                 * Insert a new entry.
3145                 */
3146                xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
3147                XFS_IFORK_NEXT_SET(bma->ip, whichfork,
3148                        XFS_IFORK_NEXTENTS(bma->ip, whichfork) + 1);
3149                if (bma->cur == NULL) {
3150                        rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
3151                } else {
3152                        rval = XFS_ILOG_CORE;
3153                        error = xfs_bmbt_lookup_eq(bma->cur,
3154                                        new->br_startoff,
3155                                        new->br_startblock,
3156                                        new->br_blockcount, &i);
3157                        if (error)
3158                                goto done;
3159                        XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
3160                        bma->cur->bc_rec.b.br_state = new->br_state;
3161                        error = xfs_btree_insert(bma->cur, &i);
3162                        if (error)
3163                                goto done;
3164                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3165                }
3166                break;
3167        }
3168
3169        /* add reverse mapping */
3170        error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, whichfork, new);
3171        if (error)
3172                goto done;
3173
3174        /* convert to a btree if necessary */
3175        if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
3176                int     tmp_logflags;   /* partial log flag return val */
3177
3178                ASSERT(bma->cur == NULL);
3179                error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
3180                                bma->firstblock, bma->dfops, &bma->cur,
3181                                0, &tmp_logflags, whichfork);
3182                bma->logflags |= tmp_logflags;
3183                if (error)
3184                        goto done;
3185        }
3186
3187        /* clear out the allocated field, done with it now in any case. */
3188        if (bma->cur)
3189                bma->cur->bc_private.b.allocated = 0;
3190
3191        xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
3192done:
3193        bma->logflags |= rval;
3194        return error;
3195}
3196
3197/*
3198 * Functions used in the extent read, allocate and remove paths
3199 */
3200
3201/*
3202 * Adjust the size of the new extent based on di_extsize and rt extsize.
3203 */
3204int
3205xfs_bmap_extsize_align(
3206        xfs_mount_t     *mp,
3207        xfs_bmbt_irec_t *gotp,          /* next extent pointer */
3208        xfs_bmbt_irec_t *prevp,         /* previous extent pointer */
3209        xfs_extlen_t    extsz,          /* align to this extent size */
3210        int             rt,             /* is this a realtime inode? */
3211        int             eof,            /* is extent at end-of-file? */
3212        int             delay,          /* creating delalloc extent? */
3213        int             convert,        /* overwriting unwritten extent? */
3214        xfs_fileoff_t   *offp,          /* in/out: aligned offset */
3215        xfs_extlen_t    *lenp)          /* in/out: aligned length */
3216{
3217        xfs_fileoff_t   orig_off;       /* original offset */
3218        xfs_extlen_t    orig_alen;      /* original length */
3219        xfs_fileoff_t   orig_end;       /* original off+len */
3220        xfs_fileoff_t   nexto;          /* next file offset */
3221        xfs_fileoff_t   prevo;          /* previous file offset */
3222        xfs_fileoff_t   align_off;      /* temp for offset */
3223        xfs_extlen_t    align_alen;     /* temp for length */
3224        xfs_extlen_t    temp;           /* temp for calculations */
3225
3226        if (convert)
3227                return 0;
3228
3229        orig_off = align_off = *offp;
3230        orig_alen = align_alen = *lenp;
3231        orig_end = orig_off + orig_alen;
3232
3233        /*
3234         * If this request overlaps an existing extent, then don't
3235         * attempt to perform any additional alignment.
3236         */
3237        if (!delay && !eof &&
3238            (orig_off >= gotp->br_startoff) &&
3239            (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
3240                return 0;
3241        }
3242
3243        /*
3244         * If the file offset is unaligned vs. the extent size
3245         * we need to align it.  This will be possible unless
3246         * the file was previously written with a kernel that didn't
3247         * perform this alignment, or if a truncate shot us in the
3248         * foot.
3249         */
3250        temp = do_mod(orig_off, extsz);
3251        if (temp) {
3252                align_alen += temp;
3253                align_off -= temp;
3254        }
3255
3256        /* Same adjustment for the end of the requested area. */
3257        temp = (align_alen % extsz);
3258        if (temp)
3259                align_alen += extsz - temp;
3260
3261        /*
3262         * For large extent hint sizes, the aligned extent might be larger than
3263         * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls
3264         * the length back under MAXEXTLEN. The outer allocation loops handle
3265         * short allocation just fine, so it is safe to do this. We only want to
3266         * do it when we are forced to, though, because it means more allocation
3267         * operations are required.
3268         */
3269        while (align_alen > MAXEXTLEN)
3270                align_alen -= extsz;
3271        ASSERT(align_alen <= MAXEXTLEN);
3272
3273        /*
3274         * If the previous block overlaps with this proposed allocation
3275         * then move the start forward without adjusting the length.
3276         */
3277        if (prevp->br_startoff != NULLFILEOFF) {
3278                if (prevp->br_startblock == HOLESTARTBLOCK)
3279                        prevo = prevp->br_startoff;
3280                else
3281                        prevo = prevp->br_startoff + prevp->br_blockcount;
3282        } else
3283                prevo = 0;
3284        if (align_off != orig_off && align_off < prevo)
3285                align_off = prevo;
3286        /*
3287         * If the next block overlaps with this proposed allocation
3288         * then move the start back without adjusting the length,
3289         * but not before offset 0.
3290         * This may of course make the start overlap previous block,
3291         * and if we hit the offset 0 limit then the next block
3292         * can still overlap too.
3293         */
3294        if (!eof && gotp->br_startoff != NULLFILEOFF) {
3295                if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
3296                    (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
3297                        nexto = gotp->br_startoff + gotp->br_blockcount;
3298                else
3299                        nexto = gotp->br_startoff;
3300        } else
3301                nexto = NULLFILEOFF;
3302        if (!eof &&
3303            align_off + align_alen != orig_end &&
3304            align_off + align_alen > nexto)
3305                align_off = nexto > align_alen ? nexto - align_alen : 0;
3306        /*
3307         * If we're now overlapping the next or previous extent that
3308         * means we can't fit an extsz piece in this hole.  Just move
3309         * the start forward to the first valid spot and set
3310         * the length so we hit the end.
3311         */
3312        if (align_off != orig_off && align_off < prevo)
3313                align_off = prevo;
3314        if (align_off + align_alen != orig_end &&
3315            align_off + align_alen > nexto &&
3316            nexto != NULLFILEOFF) {
3317                ASSERT(nexto > prevo);
3318                align_alen = nexto - align_off;
3319        }
3320
3321        /*
3322         * If realtime, and the result isn't a multiple of the realtime
3323         * extent size we need to remove blocks until it is.
3324         */
3325        if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
3326                /*
3327                 * We're not covering the original request, or
3328                 * we won't be able to once we fix the length.
3329                 */
3330                if (orig_off < align_off ||
3331                    orig_end > align_off + align_alen ||
3332                    align_alen - temp < orig_alen)
3333                        return -EINVAL;
3334                /*
3335                 * Try to fix it by moving the start up.
3336                 */
3337                if (align_off + temp <= orig_off) {
3338                        align_alen -= temp;
3339                        align_off += temp;
3340                }
3341                /*
3342                 * Try to fix it by moving the end in.
3343                 */
3344                else if (align_off + align_alen - temp >= orig_end)
3345                        align_alen -= temp;
3346                /*
3347                 * Set the start to the minimum then trim the length.
3348                 */
3349                else {
3350                        align_alen -= orig_off - align_off;
3351                        align_off = orig_off;
3352                        align_alen -= align_alen % mp->m_sb.sb_rextsize;
3353                }
3354                /*
3355                 * Result doesn't cover the request, fail it.
3356                 */
3357                if (orig_off < align_off || orig_end > align_off + align_alen)
3358                        return -EINVAL;
3359        } else {
3360                ASSERT(orig_off >= align_off);
3361                /* see MAXEXTLEN handling above */
3362                ASSERT(orig_end <= align_off + align_alen ||
3363                       align_alen + extsz > MAXEXTLEN);
3364        }
3365
3366#ifdef DEBUG
3367        if (!eof && gotp->br_startoff != NULLFILEOFF)
3368                ASSERT(align_off + align_alen <= gotp->br_startoff);
3369        if (prevp->br_startoff != NULLFILEOFF)
3370                ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
3371#endif
3372
3373        *lenp = align_alen;
3374        *offp = align_off;
3375        return 0;
3376}
3377
3378#define XFS_ALLOC_GAP_UNITS     4
3379
3380void
3381xfs_bmap_adjacent(
3382        struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
3383{
3384        xfs_fsblock_t   adjust;         /* adjustment to block numbers */
3385        xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
3386        xfs_mount_t     *mp;            /* mount point structure */
3387        int             nullfb;         /* true if ap->firstblock isn't set */
3388        int             rt;             /* true if inode is realtime */
3389
3390#define ISVALID(x,y)    \
3391        (rt ? \
3392                (x) < mp->m_sb.sb_rblocks : \
3393                XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
3394                XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
3395                XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
3396
3397        mp = ap->ip->i_mount;
3398        nullfb = *ap->firstblock == NULLFSBLOCK;
3399        rt = XFS_IS_REALTIME_INODE(ap->ip) &&
3400                xfs_alloc_is_userdata(ap->datatype);
3401        fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
3402        /*
3403         * If allocating at eof, and there's a previous real block,
3404         * try to use its last block as our starting point.
3405         */
3406        if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
3407            !isnullstartblock(ap->prev.br_startblock) &&
3408            ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
3409                    ap->prev.br_startblock)) {
3410                ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
3411                /*
3412                 * Adjust for the gap between prevp and us.
3413                 */
3414                adjust = ap->offset -
3415                        (ap->prev.br_startoff + ap->prev.br_blockcount);
3416                if (adjust &&
3417                    ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
3418                        ap->blkno += adjust;
3419        }
3420        /*
3421         * If not at eof, then compare the two neighbor blocks.
3422         * Figure out whether either one gives us a good starting point,
3423         * and pick the better one.
3424         */
3425        else if (!ap->eof) {
3426                xfs_fsblock_t   gotbno;         /* right side block number */
3427                xfs_fsblock_t   gotdiff=0;      /* right side difference */
3428                xfs_fsblock_t   prevbno;        /* left side block number */
3429                xfs_fsblock_t   prevdiff=0;     /* left side difference */
3430
3431                /*
3432                 * If there's a previous (left) block, select a requested
3433                 * start block based on it.
3434                 */
3435                if (ap->prev.br_startoff != NULLFILEOFF &&
3436                    !isnullstartblock(ap->prev.br_startblock) &&
3437                    (prevbno = ap->prev.br_startblock +
3438                               ap->prev.br_blockcount) &&
3439                    ISVALID(prevbno, ap->prev.br_startblock)) {
3440                        /*
3441                         * Calculate gap to end of previous block.
3442                         */
3443                        adjust = prevdiff = ap->offset -
3444                                (ap->prev.br_startoff +
3445                                 ap->prev.br_blockcount);
3446                        /*
3447                         * Figure the startblock based on the previous block's
3448                         * end and the gap size.
3449                         * Heuristic!
3450                         * If the gap is large relative to the piece we're
3451                         * allocating, or using it gives us an invalid block
3452                         * number, then just use the end of the previous block.
3453                         */
3454                        if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3455                            ISVALID(prevbno + prevdiff,
3456                                    ap->prev.br_startblock))
3457                                prevbno += adjust;
3458                        else
3459                                prevdiff += adjust;
3460                        /*
3461                         * If the firstblock forbids it, can't use it,
3462                         * must use default.
3463                         */
3464                        if (!rt && !nullfb &&
3465                            XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
3466                                prevbno = NULLFSBLOCK;
3467                }
3468                /*
3469                 * No previous block or can't follow it, just default.
3470                 */
3471                else
3472                        prevbno = NULLFSBLOCK;
3473                /*
3474                 * If there's a following (right) block, select a requested
3475                 * start block based on it.
3476                 */
3477                if (!isnullstartblock(ap->got.br_startblock)) {
3478                        /*
3479                         * Calculate gap to start of next block.
3480                         */
3481                        adjust = gotdiff = ap->got.br_startoff - ap->offset;
3482                        /*
3483                         * Figure the startblock based on the next block's
3484                         * start and the gap size.
3485                         */
3486                        gotbno = ap->got.br_startblock;
3487                        /*
3488                         * Heuristic!
3489                         * If the gap is large relative to the piece we're
3490                         * allocating, or using it gives us an invalid block
3491                         * number, then just use the start of the next block
3492                         * offset by our length.
3493                         */
3494                        if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3495                            ISVALID(gotbno - gotdiff, gotbno))
3496                                gotbno -= adjust;
3497                        else if (ISVALID(gotbno - ap->length, gotbno)) {
3498                                gotbno -= ap->length;
3499                                gotdiff += adjust - ap->length;
3500                        } else
3501                                gotdiff += adjust;
3502                        /*
3503                         * If the firstblock forbids it, can't use it,
3504                         * must use default.
3505                         */
3506                        if (!rt && !nullfb &&
3507                            XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
3508                                gotbno = NULLFSBLOCK;
3509                }
3510                /*
3511                 * No next block, just default.
3512                 */
3513                else
3514                        gotbno = NULLFSBLOCK;
3515                /*
3516                 * If both valid, pick the better one, else the only good
3517                 * one, else ap->blkno is already set (to 0 or the inode block).
3518                 */
3519                if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
3520                        ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
3521                else if (prevbno != NULLFSBLOCK)
3522                        ap->blkno = prevbno;
3523                else if (gotbno != NULLFSBLOCK)
3524                        ap->blkno = gotbno;
3525        }
3526#undef ISVALID
3527}
3528
3529static int
3530xfs_bmap_longest_free_extent(
3531        struct xfs_trans        *tp,
3532        xfs_agnumber_t          ag,
3533        xfs_extlen_t            *blen,
3534        int                     *notinit)
3535{
3536        struct xfs_mount        *mp = tp->t_mountp;
3537        struct xfs_perag        *pag;
3538        xfs_extlen_t            longest;
3539        int                     error = 0;
3540
3541        pag = xfs_perag_get(mp, ag);
3542        if (!pag->pagf_init) {
3543                error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
3544                if (error)
3545                        goto out;
3546
3547                if (!pag->pagf_init) {
3548                        *notinit = 1;
3549                        goto out;
3550                }
3551        }
3552
3553        longest = xfs_alloc_longest_free_extent(mp, pag,
3554                                xfs_alloc_min_freelist(mp, pag),
3555                                xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
3556        if (*blen < longest)
3557                *blen = longest;
3558
3559out:
3560        xfs_perag_put(pag);
3561        return error;
3562}
3563
3564static void
3565xfs_bmap_select_minlen(
3566        struct xfs_bmalloca     *ap,
3567        struct xfs_alloc_arg    *args,
3568        xfs_extlen_t            *blen,
3569        int                     notinit)
3570{
3571        if (notinit || *blen < ap->minlen) {
3572                /*
3573                 * Since we did a BUF_TRYLOCK above, it is possible that
3574                 * there is space for this request.
3575                 */
3576                args->minlen = ap->minlen;
3577        } else if (*blen < args->maxlen) {
3578                /*
3579                 * If the best seen length is less than the request length,
3580                 * use the best as the minimum.
3581                 */
3582                args->minlen = *blen;
3583        } else {
3584                /*
3585                 * Otherwise we've seen an extent as big as maxlen, use that
3586                 * as the minimum.
3587                 */
3588                args->minlen = args->maxlen;
3589        }
3590}
3591
3592STATIC int
3593xfs_bmap_btalloc_nullfb(
3594        struct xfs_bmalloca     *ap,
3595        struct xfs_alloc_arg    *args,
3596        xfs_extlen_t            *blen)
3597{
3598        struct xfs_mount        *mp = ap->ip->i_mount;
3599        xfs_agnumber_t          ag, startag;
3600        int                     notinit = 0;
3601        int                     error;
3602
3603        args->type = XFS_ALLOCTYPE_START_BNO;
3604        args->total = ap->total;
3605
3606        startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3607        if (startag == NULLAGNUMBER)
3608                startag = ag = 0;
3609
3610        while (*blen < args->maxlen) {
3611                error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3612                                                     &notinit);
3613                if (error)
3614                        return error;
3615
3616                if (++ag == mp->m_sb.sb_agcount)
3617                        ag = 0;
3618                if (ag == startag)
3619                        break;
3620        }
3621
3622        xfs_bmap_select_minlen(ap, args, blen, notinit);
3623        return 0;
3624}
3625
3626STATIC int
3627xfs_bmap_btalloc_filestreams(
3628        struct xfs_bmalloca     *ap,
3629        struct xfs_alloc_arg    *args,
3630        xfs_extlen_t            *blen)
3631{
3632        struct xfs_mount        *mp = ap->ip->i_mount;
3633        xfs_agnumber_t          ag;
3634        int                     notinit = 0;
3635        int                     error;
3636
3637        args->type = XFS_ALLOCTYPE_NEAR_BNO;
3638        args->total = ap->total;
3639
3640        ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3641        if (ag == NULLAGNUMBER)
3642                ag = 0;
3643
3644        error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
3645        if (error)
3646                return error;
3647
3648        if (*blen < args->maxlen) {
3649                error = xfs_filestream_new_ag(ap, &ag);
3650                if (error)
3651                        return error;
3652
3653                error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3654                                                     &notinit);
3655                if (error)
3656                        return error;
3657
3658        }
3659
3660        xfs_bmap_select_minlen(ap, args, blen, notinit);
3661
3662        /*
3663         * Set the failure fallback case to look in the selected AG as stream
3664         * may have moved.
3665         */
3666        ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
3667        return 0;
3668}
3669
3670STATIC int
3671xfs_bmap_btalloc(
3672        struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
3673{
3674        xfs_mount_t     *mp;            /* mount point structure */
3675        xfs_alloctype_t atype = 0;      /* type for allocation routines */
3676        xfs_extlen_t    align = 0;      /* minimum allocation alignment */
3677        xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
3678        xfs_agnumber_t  ag;
3679        xfs_alloc_arg_t args;
3680        xfs_extlen_t    blen;
3681        xfs_extlen_t    nextminlen = 0;
3682        int             nullfb;         /* true if ap->firstblock isn't set */
3683        int             isaligned;
3684        int             tryagain;
3685        int             error;
3686        int             stripe_align;
3687
3688        ASSERT(ap->length);
3689
3690        mp = ap->ip->i_mount;
3691
3692        /* stripe alignment for allocation is determined by mount parameters */
3693        stripe_align = 0;
3694        if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
3695                stripe_align = mp->m_swidth;
3696        else if (mp->m_dalign)
3697                stripe_align = mp->m_dalign;
3698
3699        if (ap->flags & XFS_BMAPI_COWFORK)
3700                align = xfs_get_cowextsz_hint(ap->ip);
3701        else if (xfs_alloc_is_userdata(ap->datatype))
3702                align = xfs_get_extsz_hint(ap->ip);
3703        if (unlikely(align)) {
3704                error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
3705                                                align, 0, ap->eof, 0, ap->conv,
3706                                                &ap->offset, &ap->length);
3707                ASSERT(!error);
3708                ASSERT(ap->length);
3709        }
3710
3711
3712        nullfb = *ap->firstblock == NULLFSBLOCK;
3713        fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
3714        if (nullfb) {
3715                if (xfs_alloc_is_userdata(ap->datatype) &&
3716                    xfs_inode_is_filestream(ap->ip)) {
3717                        ag = xfs_filestream_lookup_ag(ap->ip);
3718                        ag = (ag != NULLAGNUMBER) ? ag : 0;
3719                        ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
3720                } else {
3721                        ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
3722                }
3723        } else
3724                ap->blkno = *ap->firstblock;
3725
3726        xfs_bmap_adjacent(ap);
3727
3728        /*
3729         * If allowed, use ap->blkno; otherwise must use firstblock since
3730         * it's in the right allocation group.
3731         */
3732        if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
3733                ;
3734        else
3735                ap->blkno = *ap->firstblock;
3736        /*
3737         * Normal allocation, done through xfs_alloc_vextent.
3738         */
3739        tryagain = isaligned = 0;
3740        memset(&args, 0, sizeof(args));
3741        args.tp = ap->tp;
3742        args.mp = mp;
3743        args.fsbno = ap->blkno;
3744        xfs_rmap_skip_owner_update(&args.oinfo);
3745
3746        /* Trim the allocation back to the maximum an AG can fit. */
3747        args.maxlen = MIN(ap->length, mp->m_ag_max_usable);
3748        args.firstblock = *ap->firstblock;
3749        blen = 0;
3750        if (nullfb) {
3751                /*
3752                 * Search for an allocation group with a single extent large
3753                 * enough for the request.  If one isn't found, then adjust
3754                 * the minimum allocation size to the largest space found.
3755                 */
3756                if (xfs_alloc_is_userdata(ap->datatype) &&
3757                    xfs_inode_is_filestream(ap->ip))
3758                        error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
3759                else
3760                        error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
3761                if (error)
3762                        return error;
3763        } else if (ap->dfops->dop_low) {
3764                if (xfs_inode_is_filestream(ap->ip))
3765                        args.type = XFS_ALLOCTYPE_FIRST_AG;
3766                else
3767                        args.type = XFS_ALLOCTYPE_START_BNO;
3768                args.total = args.minlen = ap->minlen;
3769        } else {
3770                args.type = XFS_ALLOCTYPE_NEAR_BNO;
3771                args.total = ap->total;
3772                args.minlen = ap->minlen;
3773        }
3774        /* apply extent size hints if obtained earlier */
3775        if (unlikely(align)) {
3776                args.prod = align;
3777                if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod)))
3778                        args.mod = (xfs_extlen_t)(args.prod - args.mod);
3779        } else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) {
3780                args.prod = 1;
3781                args.mod = 0;
3782        } else {
3783                args.prod = PAGE_SIZE >> mp->m_sb.sb_blocklog;
3784                if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod))))
3785                        args.mod = (xfs_extlen_t)(args.prod - args.mod);
3786        }
3787        /*
3788         * If we are not low on available data blocks, and the
3789         * underlying logical volume manager is a stripe, and
3790         * the file offset is zero then try to allocate data
3791         * blocks on stripe unit boundary.
3792         * NOTE: ap->aeof is only set if the allocation length
3793         * is >= the stripe unit and the allocation offset is
3794         * at the end of file.
3795         */
3796        if (!ap->dfops->dop_low && ap->aeof) {
3797                if (!ap->offset) {
3798                        args.alignment = stripe_align;
3799                        atype = args.type;
3800                        isaligned = 1;
3801                        /*
3802                         * Adjust for alignment
3803                         */
3804                        if (blen > args.alignment && blen <= args.maxlen)
3805                                args.minlen = blen - args.alignment;
3806                        args.minalignslop = 0;
3807                } else {
3808                        /*
3809                         * First try an exact bno allocation.
3810                         * If it fails then do a near or start bno
3811                         * allocation with alignment turned on.
3812                         */
3813                        atype = args.type;
3814                        tryagain = 1;
3815                        args.type = XFS_ALLOCTYPE_THIS_BNO;
3816                        args.alignment = 1;
3817                        /*
3818                         * Compute the minlen+alignment for the
3819                         * next case.  Set slop so that the value
3820                         * of minlen+alignment+slop doesn't go up
3821                         * between the calls.
3822                         */
3823                        if (blen > stripe_align && blen <= args.maxlen)
3824                                nextminlen = blen - stripe_align;
3825                        else
3826                                nextminlen = args.minlen;
3827                        if (nextminlen + stripe_align > args.minlen + 1)
3828                                args.minalignslop =
3829                                        nextminlen + stripe_align -
3830                                        args.minlen - 1;
3831                        else
3832                                args.minalignslop = 0;
3833                }
3834        } else {
3835                args.alignment = 1;
3836                args.minalignslop = 0;
3837        }
3838        args.minleft = ap->minleft;
3839        args.wasdel = ap->wasdel;
3840        args.resv = XFS_AG_RESV_NONE;
3841        args.datatype = ap->datatype;
3842        if (ap->datatype & XFS_ALLOC_USERDATA_ZERO)
3843                args.ip = ap->ip;
3844
3845        error = xfs_alloc_vextent(&args);
3846        if (error)
3847                return error;
3848
3849        if (tryagain && args.fsbno == NULLFSBLOCK) {
3850                /*
3851                 * Exact allocation failed. Now try with alignment
3852                 * turned on.
3853                 */
3854                args.type = atype;
3855                args.fsbno = ap->blkno;
3856                args.alignment = stripe_align;
3857                args.minlen = nextminlen;
3858                args.minalignslop = 0;
3859                isaligned = 1;
3860                if ((error = xfs_alloc_vextent(&args)))
3861                        return error;
3862        }
3863        if (isaligned && args.fsbno == NULLFSBLOCK) {
3864                /*
3865                 * allocation failed, so turn off alignment and
3866                 * try again.
3867                 */
3868                args.type = atype;
3869                args.fsbno = ap->blkno;
3870                args.alignment = 0;
3871                if ((error = xfs_alloc_vextent(&args)))
3872                        return error;
3873        }
3874        if (args.fsbno == NULLFSBLOCK && nullfb &&
3875            args.minlen > ap->minlen) {
3876                args.minlen = ap->minlen;
3877                args.type = XFS_ALLOCTYPE_START_BNO;
3878                args.fsbno = ap->blkno;
3879                if ((error = xfs_alloc_vextent(&args)))
3880                        return error;
3881        }
3882        if (args.fsbno == NULLFSBLOCK && nullfb) {
3883                args.fsbno = 0;
3884                args.type = XFS_ALLOCTYPE_FIRST_AG;
3885                args.total = ap->minlen;
3886                args.minleft = 0;
3887                if ((error = xfs_alloc_vextent(&args)))
3888                        return error;
3889                ap->dfops->dop_low = true;
3890        }
3891        if (args.fsbno != NULLFSBLOCK) {
3892                /*
3893                 * check the allocation happened at the same or higher AG than
3894                 * the first block that was allocated.
3895                 */
3896                ASSERT(*ap->firstblock == NULLFSBLOCK ||
3897                       XFS_FSB_TO_AGNO(mp, *ap->firstblock) ==
3898                       XFS_FSB_TO_AGNO(mp, args.fsbno) ||
3899                       (ap->dfops->dop_low &&
3900                        XFS_FSB_TO_AGNO(mp, *ap->firstblock) <
3901                        XFS_FSB_TO_AGNO(mp, args.fsbno)));
3902
3903                ap->blkno = args.fsbno;
3904                if (*ap->firstblock == NULLFSBLOCK)
3905                        *ap->firstblock = args.fsbno;
3906                ASSERT(nullfb || fb_agno == args.agno ||
3907                       (ap->dfops->dop_low && fb_agno < args.agno));
3908                ap->length = args.len;
3909                if (!(ap->flags & XFS_BMAPI_COWFORK))
3910                        ap->ip->i_d.di_nblocks += args.len;
3911                xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3912                if (ap->wasdel)
3913                        ap->ip->i_delayed_blks -= args.len;
3914                /*
3915                 * Adjust the disk quota also. This was reserved
3916                 * earlier.
3917                 */
3918                xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
3919                        ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
3920                                        XFS_TRANS_DQ_BCOUNT,
3921                        (long) args.len);
3922        } else {
3923                ap->blkno = NULLFSBLOCK;
3924                ap->length = 0;
3925        }
3926        return 0;
3927}
3928
3929/*
3930 * For a remap operation, just "allocate" an extent at the address that the
3931 * caller passed in, and ensure that the AGFL is the right size.  The caller
3932 * will then map the "allocated" extent into the file somewhere.
3933 */
3934STATIC int
3935xfs_bmap_remap_alloc(
3936        struct xfs_bmalloca     *ap)
3937{
3938        struct xfs_trans        *tp = ap->tp;
3939        struct xfs_mount        *mp = tp->t_mountp;
3940        xfs_agblock_t           bno;
3941        struct xfs_alloc_arg    args;
3942        int                     error;
3943
3944        /*
3945         * validate that the block number is legal - the enables us to detect
3946         * and handle a silent filesystem corruption rather than crashing.
3947         */
3948        memset(&args, 0, sizeof(struct xfs_alloc_arg));
3949        args.tp = ap->tp;
3950        args.mp = ap->tp->t_mountp;
3951        bno = *ap->firstblock;
3952        args.agno = XFS_FSB_TO_AGNO(mp, bno);
3953        args.agbno = XFS_FSB_TO_AGBNO(mp, bno);
3954        if (args.agno >= mp->m_sb.sb_agcount ||
3955            args.agbno >= mp->m_sb.sb_agblocks)
3956                return -EFSCORRUPTED;
3957
3958        /* "Allocate" the extent from the range we passed in. */
3959        trace_xfs_bmap_remap_alloc(ap->ip, *ap->firstblock, ap->length);
3960        ap->blkno = bno;
3961        ap->ip->i_d.di_nblocks += ap->length;
3962        xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3963
3964        /* Fix the freelist, like a real allocator does. */
3965        args.datatype = ap->datatype;
3966        args.pag = xfs_perag_get(args.mp, args.agno);
3967        ASSERT(args.pag);
3968
3969        /*
3970         * The freelist fixing code will decline the allocation if
3971         * the size and shape of the free space doesn't allow for
3972         * allocating the extent and updating all the metadata that
3973         * happens during an allocation.  We're remapping, not
3974         * allocating, so skip that check by pretending to be freeing.
3975         */
3976        error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
3977        xfs_perag_put(args.pag);
3978        if (error)
3979                trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_);
3980        return error;
3981}
3982
3983/*
3984 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
3985 * It figures out where to ask the underlying allocator to put the new extent.
3986 */
3987STATIC int
3988xfs_bmap_alloc(
3989        struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
3990{
3991        if (ap->flags & XFS_BMAPI_REMAP)
3992                return xfs_bmap_remap_alloc(ap);
3993        if (XFS_IS_REALTIME_INODE(ap->ip) &&
3994            xfs_alloc_is_userdata(ap->datatype))
3995                return xfs_bmap_rtalloc(ap);
3996        return xfs_bmap_btalloc(ap);
3997}
3998
3999/* Trim extent to fit a logical block range. */
4000void
4001xfs_trim_extent(
4002        struct xfs_bmbt_irec    *irec,
4003        xfs_fileoff_t           bno,
4004        xfs_filblks_t           len)
4005{
4006        xfs_fileoff_t           distance;
4007        xfs_fileoff_t           end = bno + len;
4008
4009        if (irec->br_startoff + irec->br_blockcount <= bno ||
4010            irec->br_startoff >= end) {
4011                irec->br_blockcount = 0;
4012                return;
4013        }
4014
4015        if (irec->br_startoff < bno) {
4016                distance = bno - irec->br_startoff;
4017                if (isnullstartblock(irec->br_startblock))
4018                        irec->br_startblock = DELAYSTARTBLOCK;
4019                if (irec->br_startblock != DELAYSTARTBLOCK &&
4020                    irec->br_startblock != HOLESTARTBLOCK)
4021                        irec->br_startblock += distance;
4022                irec->br_startoff += distance;
4023                irec->br_blockcount -= distance;
4024        }
4025
4026        if (end < irec->br_startoff + irec->br_blockcount) {
4027                distance = irec->br_startoff + irec->br_blockcount - end;
4028                irec->br_blockcount -= distance;
4029        }
4030}
4031
4032/*
4033 * Trim the returned map to the required bounds
4034 */
4035STATIC void
4036xfs_bmapi_trim_map(
4037        struct xfs_bmbt_irec    *mval,
4038        struct xfs_bmbt_irec    *got,
4039        xfs_fileoff_t           *bno,
4040        xfs_filblks_t           len,
4041        xfs_fileoff_t           obno,
4042        xfs_fileoff_t           end,
4043        int                     n,
4044        int                     flags)
4045{
4046        if ((flags & XFS_BMAPI_ENTIRE) ||
4047            got->br_startoff + got->br_blockcount <= obno) {
4048                *mval = *got;
4049                if (isnullstartblock(got->br_startblock))
4050                        mval->br_startblock = DELAYSTARTBLOCK;
4051                return;
4052        }
4053
4054        if (obno > *bno)
4055                *bno = obno;
4056        ASSERT((*bno >= obno) || (n == 0));
4057        ASSERT(*bno < end);
4058        mval->br_startoff = *bno;
4059        if (isnullstartblock(got->br_startblock))
4060                mval->br_startblock = DELAYSTARTBLOCK;
4061        else
4062                mval->br_startblock = got->br_startblock +
4063                                        (*bno - got->br_startoff);
4064        /*
4065         * Return the minimum of what we got and what we asked for for
4066         * the length.  We can use the len variable here because it is
4067         * modified below and we could have been there before coming
4068         * here if the first part of the allocation didn't overlap what
4069         * was asked for.
4070         */
4071        mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
4072                        got->br_blockcount - (*bno - got->br_startoff));
4073        mval->br_state = got->br_state;
4074        ASSERT(mval->br_blockcount <= len);
4075        return;
4076}
4077
4078/*
4079 * Update and validate the extent map to return
4080 */
4081STATIC void
4082xfs_bmapi_update_map(
4083        struct xfs_bmbt_irec    **map,
4084        xfs_fileoff_t           *bno,
4085        xfs_filblks_t           *len,
4086        xfs_fileoff_t           obno,
4087        xfs_fileoff_t           end,
4088        int                     *n,
4089        int                     flags)
4090{
4091        xfs_bmbt_irec_t *mval = *map;
4092
4093        ASSERT((flags & XFS_BMAPI_ENTIRE) ||
4094               ((mval->br_startoff + mval->br_blockcount) <= end));
4095        ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
4096               (mval->br_startoff < obno));
4097
4098        *bno = mval->br_startoff + mval->br_blockcount;
4099        *len = end - *bno;
4100        if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
4101                /* update previous map with new information */
4102                ASSERT(mval->br_startblock == mval[-1].br_startblock);
4103                ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
4104                ASSERT(mval->br_state == mval[-1].br_state);
4105                mval[-1].br_blockcount = mval->br_blockcount;
4106                mval[-1].br_state = mval->br_state;
4107        } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
4108                   mval[-1].br_startblock != DELAYSTARTBLOCK &&
4109                   mval[-1].br_startblock != HOLESTARTBLOCK &&
4110                   mval->br_startblock == mval[-1].br_startblock +
4111                                          mval[-1].br_blockcount &&
4112                   ((flags & XFS_BMAPI_IGSTATE) ||
4113                        mval[-1].br_state == mval->br_state)) {
4114                ASSERT(mval->br_startoff ==
4115                       mval[-1].br_startoff + mval[-1].br_blockcount);
4116                mval[-1].br_blockcount += mval->br_blockcount;
4117        } else if (*n > 0 &&
4118                   mval->br_startblock == DELAYSTARTBLOCK &&
4119                   mval[-1].br_startblock == DELAYSTARTBLOCK &&
4120                   mval->br_startoff ==
4121                   mval[-1].br_startoff + mval[-1].br_blockcount) {
4122                mval[-1].br_blockcount += mval->br_blockcount;
4123                mval[-1].br_state = mval->br_state;
4124        } else if (!((*n == 0) &&
4125                     ((mval->br_startoff + mval->br_blockcount) <=
4126                      obno))) {
4127                mval++;
4128                (*n)++;
4129        }
4130        *map = mval;
4131}
4132
4133/*
4134 * Map file blocks to filesystem blocks without allocation.
4135 */
4136int
4137xfs_bmapi_read(
4138        struct xfs_inode        *ip,
4139        xfs_fileoff_t           bno,
4140        xfs_filblks_t           len,
4141        struct xfs_bmbt_irec    *mval,
4142        int                     *nmap,
4143        int                     flags)
4144{
4145        struct xfs_mount        *mp = ip->i_mount;
4146        struct xfs_ifork        *ifp;
4147        struct xfs_bmbt_irec    got;
4148        struct xfs_bmbt_irec    prev;
4149        xfs_fileoff_t           obno;
4150        xfs_fileoff_t           end;
4151        xfs_extnum_t            lastx;
4152        int                     error;
4153        int                     eof;
4154        int                     n = 0;
4155        int                     whichfork = xfs_bmapi_whichfork(flags);
4156
4157        ASSERT(*nmap >= 1);
4158        ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE|
4159                           XFS_BMAPI_IGSTATE|XFS_BMAPI_COWFORK)));
4160        ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
4161
4162        if (unlikely(XFS_TEST_ERROR(
4163            (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
4164             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
4165             mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4166                XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp);
4167                return -EFSCORRUPTED;
4168        }
4169
4170        if (XFS_FORCED_SHUTDOWN(mp))
4171                return -EIO;
4172
4173        XFS_STATS_INC(mp, xs_blk_mapr);
4174
4175        ifp = XFS_IFORK_PTR(ip, whichfork);
4176
4177        /* No CoW fork?  Return a hole. */
4178        if (whichfork == XFS_COW_FORK && !ifp) {
4179                mval->br_startoff = bno;
4180                mval->br_startblock = HOLESTARTBLOCK;
4181                mval->br_blockcount = len;
4182                mval->br_state = XFS_EXT_NORM;
4183                *nmap = 1;
4184                return 0;
4185        }
4186
4187        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4188                error = xfs_iread_extents(NULL, ip, whichfork);
4189                if (error)
4190                        return error;
4191        }
4192
4193        xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev);
4194        end = bno + len;
4195        obno = bno;
4196
4197        while (bno < end && n < *nmap) {
4198                /* Reading past eof, act as though there's a hole up to end. */
4199                if (eof)
4200                        got.br_startoff = end;
4201                if (got.br_startoff > bno) {
4202                        /* Reading in a hole.  */
4203                        mval->br_startoff = bno;
4204                        mval->br_startblock = HOLESTARTBLOCK;
4205                        mval->br_blockcount =
4206                                XFS_FILBLKS_MIN(len, got.br_startoff - bno);
4207                        mval->br_state = XFS_EXT_NORM;
4208                        bno += mval->br_blockcount;
4209                        len -= mval->br_blockcount;
4210                        mval++;
4211                        n++;
4212                        continue;
4213                }
4214
4215                /* set up the extent map to return. */
4216                xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
4217                xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4218
4219                /* If we're done, stop now. */
4220                if (bno >= end || n >= *nmap)
4221                        break;
4222
4223                /* Else go on to the next record. */
4224                if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
4225                        xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
4226                else
4227                        eof = 1;
4228        }
4229        *nmap = n;
4230        return 0;
4231}
4232
4233int
4234xfs_bmapi_reserve_delalloc(
4235        struct xfs_inode        *ip,
4236        int                     whichfork,
4237        xfs_fileoff_t           aoff,
4238        xfs_filblks_t           len,
4239        struct xfs_bmbt_irec    *got,
4240        struct xfs_bmbt_irec    *prev,
4241        xfs_extnum_t            *lastx,
4242        int                     eof)
4243{
4244        struct xfs_mount        *mp = ip->i_mount;
4245        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
4246        xfs_extlen_t            alen;
4247        xfs_extlen_t            indlen;
4248        char                    rt = XFS_IS_REALTIME_INODE(ip);
4249        xfs_extlen_t            extsz;
4250        int                     error;
4251
4252        alen = XFS_FILBLKS_MIN(len, MAXEXTLEN);
4253        if (!eof)
4254                alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
4255
4256        /* Figure out the extent size, adjust alen */
4257        if (whichfork == XFS_COW_FORK)
4258                extsz = xfs_get_cowextsz_hint(ip);
4259        else
4260                extsz = xfs_get_extsz_hint(ip);
4261        if (extsz) {
4262                error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
4263                                               1, 0, &aoff, &alen);
4264                ASSERT(!error);
4265        }
4266
4267        if (rt)
4268                extsz = alen / mp->m_sb.sb_rextsize;
4269
4270        /*
4271         * Make a transaction-less quota reservation for delayed allocation
4272         * blocks.  This number gets adjusted later.  We return if we haven't
4273         * allocated blocks already inside this loop.
4274         */
4275        error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0,
4276                        rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4277        if (error)
4278                return error;
4279
4280        /*
4281         * Split changing sb for alen and indlen since they could be coming
4282         * from different places.
4283         */
4284        indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
4285        ASSERT(indlen > 0);
4286
4287        if (rt) {
4288                error = xfs_mod_frextents(mp, -((int64_t)extsz));
4289        } else {
4290                error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
4291        }
4292
4293        if (error)
4294                goto out_unreserve_quota;
4295
4296        error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
4297        if (error)
4298                goto out_unreserve_blocks;
4299
4300
4301        ip->i_delayed_blks += alen;
4302
4303        got->br_startoff = aoff;
4304        got->br_startblock = nullstartblock(indlen);
4305        got->br_blockcount = alen;
4306        got->br_state = XFS_EXT_NORM;
4307        xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
4308
4309        /*
4310         * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
4311         * might have merged it into one of the neighbouring ones.
4312         */
4313        xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
4314
4315        ASSERT(got->br_startoff <= aoff);
4316        ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
4317        ASSERT(isnullstartblock(got->br_startblock));
4318        ASSERT(got->br_state == XFS_EXT_NORM);
4319        return 0;
4320
4321out_unreserve_blocks:
4322        if (rt)
4323                xfs_mod_frextents(mp, extsz);
4324        else
4325                xfs_mod_fdblocks(mp, alen, false);
4326out_unreserve_quota:
4327        if (XFS_IS_QUOTA_ON(mp))
4328                xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ?
4329                                XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4330        return error;
4331}
4332
4333static int
4334xfs_bmapi_allocate(
4335        struct xfs_bmalloca     *bma)
4336{
4337        struct xfs_mount        *mp = bma->ip->i_mount;
4338        int                     whichfork = xfs_bmapi_whichfork(bma->flags);
4339        struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4340        int                     tmp_logflags = 0;
4341        int                     error;
4342
4343        ASSERT(bma->length > 0);
4344
4345        /*
4346         * For the wasdelay case, we could also just allocate the stuff asked
4347         * for in this bmap call but that wouldn't be as good.
4348         */
4349        if (bma->wasdel) {
4350                bma->length = (xfs_extlen_t)bma->got.br_blockcount;
4351                bma->offset = bma->got.br_startoff;
4352                if (bma->idx != NULLEXTNUM && bma->idx) {
4353                        xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1),
4354                                         &bma->prev);
4355                }
4356        } else {
4357                bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
4358                if (!bma->eof)
4359                        bma->length = XFS_FILBLKS_MIN(bma->length,
4360                                        bma->got.br_startoff - bma->offset);
4361        }
4362
4363        /*
4364         * Set the data type being allocated. For the data fork, the first data
4365         * in the file is treated differently to all other allocations. For the
4366         * attribute fork, we only need to ensure the allocated range is not on
4367         * the busy list.
4368         */
4369        if (!(bma->flags & XFS_BMAPI_METADATA)) {
4370                bma->datatype = XFS_ALLOC_NOBUSY;
4371                if (whichfork == XFS_DATA_FORK) {
4372                        if (bma->offset == 0)
4373                                bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
4374                        else
4375                                bma->datatype |= XFS_ALLOC_USERDATA;
4376                }
4377                if (bma->flags & XFS_BMAPI_ZERO)
4378                        bma->datatype |= XFS_ALLOC_USERDATA_ZERO;
4379        }
4380
4381        bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
4382
4383        /*
4384         * Only want to do the alignment at the eof if it is userdata and
4385         * allocation length is larger than a stripe unit.
4386         */
4387        if (mp->m_dalign && bma->length >= mp->m_dalign &&
4388            !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
4389                error = xfs_bmap_isaeof(bma, whichfork);
4390                if (error)
4391                        return error;
4392        }
4393
4394        error = xfs_bmap_alloc(bma);
4395        if (error)
4396                return error;
4397
4398        if (bma->dfops->dop_low)
4399                bma->minleft = 0;
4400        if (bma->cur)
4401                bma->cur->bc_private.b.firstblock = *bma->firstblock;
4402        if (bma->blkno == NULLFSBLOCK)
4403                return 0;
4404        if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4405                bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
4406                bma->cur->bc_private.b.firstblock = *bma->firstblock;
4407                bma->cur->bc_private.b.dfops = bma->dfops;
4408        }
4409        /*
4410         * Bump the number of extents we've allocated
4411         * in this call.
4412         */
4413        bma->nallocs++;
4414
4415        if (bma->cur)
4416                bma->cur->bc_private.b.flags =
4417                        bma->wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
4418
4419        bma->got.br_startoff = bma->offset;
4420        bma->got.br_startblock = bma->blkno;
4421        bma->got.br_blockcount = bma->length;
4422        bma->got.br_state = XFS_EXT_NORM;
4423
4424        /*
4425         * A wasdelay extent has been initialized, so shouldn't be flagged
4426         * as unwritten.
4427         */
4428        if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
4429            xfs_sb_version_hasextflgbit(&mp->m_sb))
4430                bma->got.br_state = XFS_EXT_UNWRITTEN;
4431
4432        if (bma->wasdel)
4433                error = xfs_bmap_add_extent_delay_real(bma, whichfork);
4434        else
4435                error = xfs_bmap_add_extent_hole_real(bma, whichfork);
4436
4437        bma->logflags |= tmp_logflags;
4438        if (error)
4439                return error;
4440
4441        /*
4442         * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
4443         * or xfs_bmap_add_extent_hole_real might have merged it into one of
4444         * the neighbouring ones.
4445         */
4446        xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got);
4447
4448        ASSERT(bma->got.br_startoff <= bma->offset);
4449        ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
4450               bma->offset + bma->length);
4451        ASSERT(bma->got.br_state == XFS_EXT_NORM ||
4452               bma->got.br_state == XFS_EXT_UNWRITTEN);
4453        return 0;
4454}
4455
4456STATIC int
4457xfs_bmapi_convert_unwritten(
4458        struct xfs_bmalloca     *bma,
4459        struct xfs_bmbt_irec    *mval,
4460        xfs_filblks_t           len,
4461        int                     flags)
4462{
4463        int                     whichfork = xfs_bmapi_whichfork(flags);
4464        struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4465        int                     tmp_logflags = 0;
4466        int                     error;
4467
4468        /* check if we need to do unwritten->real conversion */
4469        if (mval->br_state == XFS_EXT_UNWRITTEN &&
4470            (flags & XFS_BMAPI_PREALLOC))
4471                return 0;
4472
4473        /* check if we need to do real->unwritten conversion */
4474        if (mval->br_state == XFS_EXT_NORM &&
4475            (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
4476                        (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
4477                return 0;
4478
4479        ASSERT(whichfork != XFS_COW_FORK);
4480
4481        /*
4482         * Modify (by adding) the state flag, if writing.
4483         */
4484        ASSERT(mval->br_blockcount <= len);
4485        if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4486                bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
4487                                        bma->ip, whichfork);
4488                bma->cur->bc_private.b.firstblock = *bma->firstblock;
4489                bma->cur->bc_private.b.dfops = bma->dfops;
4490        }
4491        mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4492                                ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
4493
4494        /*
4495         * Before insertion into the bmbt, zero the range being converted
4496         * if required.
4497         */
4498        if (flags & XFS_BMAPI_ZERO) {
4499                error = xfs_zero_extent(bma->ip, mval->br_startblock,
4500                                        mval->br_blockcount);
4501                if (error)
4502                        return error;
4503        }
4504
4505        error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
4506                        &bma->cur, mval, bma->firstblock, bma->dfops,
4507                        &tmp_logflags);
4508        /*
4509         * Log the inode core unconditionally in the unwritten extent conversion
4510         * path because the conversion might not have done so (e.g., if the
4511         * extent count hasn't changed). We need to make sure the inode is dirty
4512         * in the transaction for the sake of fsync(), even if nothing has
4513         * changed, because fsync() will not force the log for this transaction
4514         * unless it sees the inode pinned.
4515         */
4516        bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
4517        if (error)
4518                return error;
4519
4520        /*
4521         * Update our extent pointer, given that
4522         * xfs_bmap_add_extent_unwritten_real might have merged it into one
4523         * of the neighbouring ones.
4524         */
4525        xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got);
4526
4527        /*
4528         * We may have combined previously unwritten space with written space,
4529         * so generate another request.
4530         */
4531        if (mval->br_blockcount < len)
4532                return -EAGAIN;
4533        return 0;
4534}
4535
4536/*
4537 * Map file blocks to filesystem blocks, and allocate blocks or convert the
4538 * extent state if necessary.  Details behaviour is controlled by the flags
4539 * parameter.  Only allocates blocks from a single allocation group, to avoid
4540 * locking problems.
4541 *
4542 * The returned value in "firstblock" from the first call in a transaction
4543 * must be remembered and presented to subsequent calls in "firstblock".
4544 * An upper bound for the number of blocks to be allocated is supplied to
4545 * the first call in "total"; if no allocation group has that many free
4546 * blocks then the call will fail (return NULLFSBLOCK in "firstblock").
4547 */
4548int
4549xfs_bmapi_write(
4550        struct xfs_trans        *tp,            /* transaction pointer */
4551        struct xfs_inode        *ip,            /* incore inode */
4552        xfs_fileoff_t           bno,            /* starting file offs. mapped */
4553        xfs_filblks_t           len,            /* length to map in file */
4554        int                     flags,          /* XFS_BMAPI_... */
4555        xfs_fsblock_t           *firstblock,    /* first allocated block
4556                                                   controls a.g. for allocs */
4557        xfs_extlen_t            total,          /* total blocks needed */
4558        struct xfs_bmbt_irec    *mval,          /* output: map values */
4559        int                     *nmap,          /* i/o: mval size/count */
4560        struct xfs_defer_ops    *dfops)         /* i/o: list extents to free */
4561{
4562        struct xfs_mount        *mp = ip->i_mount;
4563        struct xfs_ifork        *ifp;
4564        struct xfs_bmalloca     bma = { NULL }; /* args for xfs_bmap_alloc */
4565        xfs_fileoff_t           end;            /* end of mapped file region */
4566        int                     eof;            /* after the end of extents */
4567        int                     error;          /* error return */
4568        int                     n;              /* current extent index */
4569        xfs_fileoff_t           obno;           /* old block number (offset) */
4570        int                     whichfork;      /* data or attr fork */
4571        char                    inhole;         /* current location is hole in file */
4572        char                    wasdelay;       /* old extent was delayed */
4573
4574#ifdef DEBUG
4575        xfs_fileoff_t           orig_bno;       /* original block number value */
4576        int                     orig_flags;     /* original flags arg value */
4577        xfs_filblks_t           orig_len;       /* original value of len arg */
4578        struct xfs_bmbt_irec    *orig_mval;     /* original value of mval */
4579        int                     orig_nmap;      /* original value of *nmap */
4580
4581        orig_bno = bno;
4582        orig_len = len;
4583        orig_flags = flags;
4584        orig_mval = mval;
4585        orig_nmap = *nmap;
4586#endif
4587        whichfork = xfs_bmapi_whichfork(flags);
4588
4589        ASSERT(*nmap >= 1);
4590        ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4591        ASSERT(!(flags & XFS_BMAPI_IGSTATE));
4592        ASSERT(tp != NULL);
4593        ASSERT(len > 0);
4594        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
4595        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4596        ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK);
4597        ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP));
4598        ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP));
4599        ASSERT(!(flags & XFS_BMAPI_PREALLOC) || whichfork != XFS_COW_FORK);
4600        ASSERT(!(flags & XFS_BMAPI_CONVERT) || whichfork != XFS_COW_FORK);
4601
4602        /* zeroing is for currently only for data extents, not metadata */
4603        ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
4604                        (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
4605        /*
4606         * we can allocate unwritten extents or pre-zero allocated blocks,
4607         * but it makes no sense to do both at once. This would result in
4608         * zeroing the unwritten extent twice, but it still being an
4609         * unwritten extent....
4610         */
4611        ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
4612                        (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
4613
4614        if (unlikely(XFS_TEST_ERROR(
4615            (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
4616             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
4617             mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4618                XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp);
4619                return -EFSCORRUPTED;
4620        }
4621
4622        if (XFS_FORCED_SHUTDOWN(mp))
4623                return -EIO;
4624
4625        ifp = XFS_IFORK_PTR(ip, whichfork);
4626
4627        XFS_STATS_INC(mp, xs_blk_mapw);
4628
4629        if (*firstblock == NULLFSBLOCK) {
4630                if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
4631                        bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1;
4632                else
4633                        bma.minleft = 1;
4634        } else {
4635                bma.minleft = 0;
4636        }
4637
4638        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4639                error = xfs_iread_extents(tp, ip, whichfork);
4640                if (error)
4641                        goto error0;
4642        }
4643
4644        xfs_bmap_search_extents(ip, bno, whichfork, &eof, &bma.idx, &bma.got,
4645                                &bma.prev);
4646        n = 0;
4647        end = bno + len;
4648        obno = bno;
4649
4650        bma.tp = tp;
4651        bma.ip = ip;
4652        bma.total = total;
4653        bma.datatype = 0;
4654        bma.dfops = dfops;
4655        bma.firstblock = firstblock;
4656
4657        while (bno < end && n < *nmap) {
4658                inhole = eof || bma.got.br_startoff > bno;
4659                wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
4660
4661                /*
4662                 * Make sure we only reflink into a hole.
4663                 */
4664                if (flags & XFS_BMAPI_REMAP)
4665                        ASSERT(inhole);
4666                if (flags & XFS_BMAPI_COWFORK)
4667                        ASSERT(!inhole);
4668
4669                /*
4670                 * First, deal with the hole before the allocated space
4671                 * that we found, if any.
4672                 */
4673                if (inhole || wasdelay) {
4674                        bma.eof = eof;
4675                        bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4676                        bma.wasdel = wasdelay;
4677                        bma.offset = bno;
4678                        bma.flags = flags;
4679
4680                        /*
4681                         * There's a 32/64 bit type mismatch between the
4682                         * allocation length request (which can be 64 bits in
4683                         * length) and the bma length request, which is
4684                         * xfs_extlen_t and therefore 32 bits. Hence we have to
4685                         * check for 32-bit overflows and handle them here.
4686                         */
4687                        if (len > (xfs_filblks_t)MAXEXTLEN)
4688                                bma.length = MAXEXTLEN;
4689                        else
4690                                bma.length = len;
4691
4692                        ASSERT(len > 0);
4693                        ASSERT(bma.length > 0);
4694                        error = xfs_bmapi_allocate(&bma);
4695                        if (error)
4696                                goto error0;
4697                        if (bma.blkno == NULLFSBLOCK)
4698                                break;
4699
4700                        /*
4701                         * If this is a CoW allocation, record the data in
4702                         * the refcount btree for orphan recovery.
4703                         */
4704                        if (whichfork == XFS_COW_FORK) {
4705                                error = xfs_refcount_alloc_cow_extent(mp, dfops,
4706                                                bma.blkno, bma.length);
4707                                if (error)
4708                                        goto error0;
4709                        }
4710                }
4711
4712                /* Deal with the allocated space we found.  */
4713                xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
4714                                                        end, n, flags);
4715
4716                /* Execute unwritten extent conversion if necessary */
4717                error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
4718                if (error == -EAGAIN)
4719                        continue;
4720                if (error)
4721                        goto error0;
4722
4723                /* update the extent map to return */
4724                xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4725
4726                /*
4727                 * If we're done, stop now.  Stop when we've allocated
4728                 * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
4729                 * the transaction may get too big.
4730                 */
4731                if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
4732                        break;
4733
4734                /* Else go on to the next record. */
4735                bma.prev = bma.got;
4736                if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) {
4737                        xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx),
4738                                         &bma.got);
4739                } else
4740                        eof = 1;
4741        }
4742        *nmap = n;
4743
4744        /*
4745         * Transform from btree to extents, give it cur.
4746         */
4747        if (xfs_bmap_wants_extents(ip, whichfork)) {
4748                int             tmp_logflags = 0;
4749
4750                ASSERT(bma.cur);
4751                error = xfs_bmap_btree_to_extents(tp, ip, bma.cur,
4752                        &tmp_logflags, whichfork);
4753                bma.logflags |= tmp_logflags;
4754                if (error)
4755                        goto error0;
4756        }
4757
4758        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
4759               XFS_IFORK_NEXTENTS(ip, whichfork) >
4760                XFS_IFORK_MAXEXT(ip, whichfork));
4761        error = 0;
4762error0:
4763        /*
4764         * Log everything.  Do this after conversion, there's no point in
4765         * logging the extent records if we've converted to btree format.
4766         */
4767        if ((bma.logflags & xfs_ilog_fext(whichfork)) &&
4768            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
4769                bma.logflags &= ~xfs_ilog_fext(whichfork);
4770        else if ((bma.logflags & xfs_ilog_fbroot(whichfork)) &&
4771                 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
4772                bma.logflags &= ~xfs_ilog_fbroot(whichfork);
4773        /*
4774         * Log whatever the flags say, even if error.  Otherwise we might miss
4775         * detecting a case where the data is changed, there's an error,
4776         * and it's not logged so we don't shutdown when we should.
4777         */
4778        if (bma.logflags)
4779                xfs_trans_log_inode(tp, ip, bma.logflags);
4780
4781        if (bma.cur) {
4782                if (!error) {
4783                        ASSERT(*firstblock == NULLFSBLOCK ||
4784                               XFS_FSB_TO_AGNO(mp, *firstblock) ==
4785                               XFS_FSB_TO_AGNO(mp,
4786                                       bma.cur->bc_private.b.firstblock) ||
4787                               (dfops->dop_low &&
4788                                XFS_FSB_TO_AGNO(mp, *firstblock) <
4789                                XFS_FSB_TO_AGNO(mp,
4790                                        bma.cur->bc_private.b.firstblock)));
4791                        *firstblock = bma.cur->bc_private.b.firstblock;
4792                }
4793                xfs_btree_del_cursor(bma.cur,
4794                        error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
4795        }
4796        if (!error)
4797                xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
4798                        orig_nmap, *nmap);
4799        return error;
4800}
4801
4802/*
4803 * When a delalloc extent is split (e.g., due to a hole punch), the original
4804 * indlen reservation must be shared across the two new extents that are left
4805 * behind.
4806 *
4807 * Given the original reservation and the worst case indlen for the two new
4808 * extents (as calculated by xfs_bmap_worst_indlen()), split the original
4809 * reservation fairly across the two new extents. If necessary, steal available
4810 * blocks from a deleted extent to make up a reservation deficiency (e.g., if
4811 * ores == 1). The number of stolen blocks is returned. The availability and
4812 * subsequent accounting of stolen blocks is the responsibility of the caller.
4813 */
4814static xfs_filblks_t
4815xfs_bmap_split_indlen(
4816        xfs_filblks_t                   ores,           /* original res. */
4817        xfs_filblks_t                   *indlen1,       /* ext1 worst indlen */
4818        xfs_filblks_t                   *indlen2,       /* ext2 worst indlen */
4819        xfs_filblks_t                   avail)          /* stealable blocks */
4820{
4821        xfs_filblks_t                   len1 = *indlen1;
4822        xfs_filblks_t                   len2 = *indlen2;
4823        xfs_filblks_t                   nres = len1 + len2; /* new total res. */
4824        xfs_filblks_t                   stolen = 0;
4825
4826        /*
4827         * Steal as many blocks as we can to try and satisfy the worst case
4828         * indlen for both new extents.
4829         */
4830        while (nres > ores && avail) {
4831                nres--;
4832                avail--;
4833                stolen++;
4834        }
4835
4836        /*
4837         * The only blocks available are those reserved for the original
4838         * extent and what we can steal from the extent being removed.
4839         * If this still isn't enough to satisfy the combined
4840         * requirements for the two new extents, skim blocks off of each
4841         * of the new reservations until they match what is available.
4842         */
4843        while (nres > ores) {
4844                if (len1) {
4845                        len1--;
4846                        nres--;
4847                }
4848                if (nres == ores)
4849                        break;
4850                if (len2) {
4851                        len2--;
4852                        nres--;
4853                }
4854        }
4855
4856        *indlen1 = len1;
4857        *indlen2 = len2;
4858
4859        return stolen;
4860}
4861
4862int
4863xfs_bmap_del_extent_delay(
4864        struct xfs_inode        *ip,
4865        int                     whichfork,
4866        xfs_extnum_t            *idx,
4867        struct xfs_bmbt_irec    *got,
4868        struct xfs_bmbt_irec    *del)
4869{
4870        struct xfs_mount        *mp = ip->i_mount;
4871        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
4872        struct xfs_bmbt_irec    new;
4873        int64_t                 da_old, da_new, da_diff = 0;
4874        xfs_fileoff_t           del_endoff, got_endoff;
4875        xfs_filblks_t           got_indlen, new_indlen, stolen;
4876        int                     error = 0, state = 0;
4877        bool                    isrt;
4878
4879        XFS_STATS_INC(mp, xs_del_exlist);
4880
4881        isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
4882        del_endoff = del->br_startoff + del->br_blockcount;
4883        got_endoff = got->br_startoff + got->br_blockcount;
4884        da_old = startblockval(got->br_startblock);
4885        da_new = 0;
4886
4887        ASSERT(*idx >= 0);
4888        ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
4889        ASSERT(del->br_blockcount > 0);
4890        ASSERT(got->br_startoff <= del->br_startoff);
4891        ASSERT(got_endoff >= del_endoff);
4892
4893        if (isrt) {
4894                int64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
4895
4896                do_div(rtexts, mp->m_sb.sb_rextsize);
4897                xfs_mod_frextents(mp, rtexts);
4898        }
4899
4900        /*
4901         * Update the inode delalloc counter now and wait to update the
4902         * sb counters as we might have to borrow some blocks for the
4903         * indirect block accounting.
4904         */
4905        xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del->br_blockcount), 0,
4906                        isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4907        ip->i_delayed_blks -= del->br_blockcount;
4908
4909        if (whichfork == XFS_COW_FORK)
4910                state |= BMAP_COWFORK;
4911
4912        if (got->br_startoff == del->br_startoff)
4913                state |= BMAP_LEFT_CONTIG;
4914        if (got_endoff == del_endoff)
4915                state |= BMAP_RIGHT_CONTIG;
4916
4917        switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
4918        case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
4919                /*
4920                 * Matches the whole extent.  Delete the entry.
4921                 */
4922                xfs_iext_remove(ip, *idx, 1, state);
4923                --*idx;
4924                break;
4925        case BMAP_LEFT_CONTIG:
4926                /*
4927                 * Deleting the first part of the extent.
4928                 */
4929                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4930                got->br_startoff = del_endoff;
4931                got->br_blockcount -= del->br_blockcount;
4932                da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4933                                got->br_blockcount), da_old);
4934                got->br_startblock = nullstartblock((int)da_new);
4935                xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
4936                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4937                break;
4938        case BMAP_RIGHT_CONTIG:
4939                /*
4940                 * Deleting the last part of the extent.
4941                 */
4942                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4943                got->br_blockcount = got->br_blockcount - del->br_blockcount;
4944                da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4945                                got->br_blockcount), da_old);
4946                got->br_startblock = nullstartblock((int)da_new);
4947                xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
4948                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4949                break;
4950        case 0:
4951                /*
4952                 * Deleting the middle of the extent.
4953                 *
4954                 * Distribute the original indlen reservation across the two new
4955                 * extents.  Steal blocks from the deleted extent if necessary.
4956                 * Stealing blocks simply fudges the fdblocks accounting below.
4957                 * Warn if either of the new indlen reservations is zero as this
4958                 * can lead to delalloc problems.
4959                 */
4960                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4961
4962                got->br_blockcount = del->br_startoff - got->br_startoff;
4963                got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
4964
4965                new.br_blockcount = got_endoff - del_endoff;
4966                new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
4967
4968                WARN_ON_ONCE(!got_indlen || !new_indlen);
4969                stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
4970                                                       del->br_blockcount);
4971
4972                got->br_startblock = nullstartblock((int)got_indlen);
4973                xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
4974                trace_xfs_bmap_post_update(ip, *idx, 0, _THIS_IP_);
4975
4976                new.br_startoff = del_endoff;
4977                new.br_state = got->br_state;
4978                new.br_startblock = nullstartblock((int)new_indlen);
4979
4980                ++*idx;
4981                xfs_iext_insert(ip, *idx, 1, &new, state);
4982
4983                da_new = got_indlen + new_indlen - stolen;
4984                del->br_blockcount -= stolen;
4985                break;
4986        }
4987
4988        ASSERT(da_old >= da_new);
4989        da_diff = da_old - da_new;
4990        if (!isrt)
4991                da_diff += del->br_blockcount;
4992        if (da_diff)
4993                xfs_mod_fdblocks(mp, da_diff, false);
4994        return error;
4995}
4996
4997void
4998xfs_bmap_del_extent_cow(
4999        struct xfs_inode        *ip,
5000        xfs_extnum_t            *idx,
5001        struct xfs_bmbt_irec    *got,
5002        struct xfs_bmbt_irec    *del)
5003{
5004        struct xfs_mount        *mp = ip->i_mount;
5005        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
5006        struct xfs_bmbt_irec    new;
5007        xfs_fileoff_t           del_endoff, got_endoff;
5008        int                     state = BMAP_COWFORK;
5009
5010        XFS_STATS_INC(mp, xs_del_exlist);
5011
5012        del_endoff = del->br_startoff + del->br_blockcount;
5013        got_endoff = got->br_startoff + got->br_blockcount;
5014
5015        ASSERT(*idx >= 0);
5016        ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
5017        ASSERT(del->br_blockcount > 0);
5018        ASSERT(got->br_startoff <= del->br_startoff);
5019        ASSERT(got_endoff >= del_endoff);
5020        ASSERT(!isnullstartblock(got->br_startblock));
5021
5022        if (got->br_startoff == del->br_startoff)
5023                state |= BMAP_LEFT_CONTIG;
5024        if (got_endoff == del_endoff)
5025                state |= BMAP_RIGHT_CONTIG;
5026
5027        switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
5028        case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
5029                /*
5030                 * Matches the whole extent.  Delete the entry.
5031                 */
5032                xfs_iext_remove(ip, *idx, 1, state);
5033                --*idx;
5034                break;
5035        case BMAP_LEFT_CONTIG:
5036                /*
5037                 * Deleting the first part of the extent.
5038                 */
5039                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
5040                got->br_startoff = del_endoff;
5041                got->br_blockcount -= del->br_blockcount;
5042                got->br_startblock = del->br_startblock + del->br_blockcount;
5043                xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
5044                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5045                break;
5046        case BMAP_RIGHT_CONTIG:
5047                /*
5048                 * Deleting the last part of the extent.
5049                 */
5050                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
5051                got->br_blockcount -= del->br_blockcount;
5052                xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
5053                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5054                break;
5055        case 0:
5056                /*
5057                 * Deleting the middle of the extent.
5058                 */
5059                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
5060                got->br_blockcount = del->br_startoff - got->br_startoff;
5061                xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
5062                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5063
5064                new.br_startoff = del_endoff;
5065                new.br_blockcount = got_endoff - del_endoff;
5066                new.br_state = got->br_state;
5067                new.br_startblock = del->br_startblock + del->br_blockcount;
5068
5069                ++*idx;
5070                xfs_iext_insert(ip, *idx, 1, &new, state);
5071                break;
5072        }
5073}
5074
5075/*
5076 * Called by xfs_bmapi to update file extent records and the btree
5077 * after removing space (or undoing a delayed allocation).
5078 */
5079STATIC int                              /* error */
5080xfs_bmap_del_extent(
5081        xfs_inode_t             *ip,    /* incore inode pointer */
5082        xfs_trans_t             *tp,    /* current transaction pointer */
5083        xfs_extnum_t            *idx,   /* extent number to update/delete */
5084        struct xfs_defer_ops    *dfops, /* list of extents to be freed */
5085        xfs_btree_cur_t         *cur,   /* if null, not a btree */
5086        xfs_bmbt_irec_t         *del,   /* data to remove from extents */
5087        int                     *logflagsp, /* inode logging flags */
5088        int                     whichfork, /* data or attr fork */
5089        int                     bflags) /* bmapi flags */
5090{
5091        xfs_filblks_t           da_new; /* new delay-alloc indirect blocks */
5092        xfs_filblks_t           da_old; /* old delay-alloc indirect blocks */
5093        xfs_fsblock_t           del_endblock=0; /* first block past del */
5094        xfs_fileoff_t           del_endoff;     /* first offset past del */
5095        int                     delay;  /* current block is delayed allocated */
5096        int                     do_fx;  /* free extent at end of routine */
5097        xfs_bmbt_rec_host_t     *ep;    /* current extent entry pointer */
5098        int                     error;  /* error return value */
5099        int                     flags;  /* inode logging flags */
5100        xfs_bmbt_irec_t         got;    /* current extent entry */
5101        xfs_fileoff_t           got_endoff;     /* first offset past got */
5102        int                     i;      /* temp state */
5103        xfs_ifork_t             *ifp;   /* inode fork pointer */
5104        xfs_mount_t             *mp;    /* mount structure */
5105        xfs_filblks_t           nblks;  /* quota/sb block count */
5106        xfs_bmbt_irec_t         new;    /* new record to be inserted */
5107        /* REFERENCED */
5108        uint                    qfield; /* quota field to update */
5109        xfs_filblks_t           temp;   /* for indirect length calculations */
5110        xfs_filblks_t           temp2;  /* for indirect length calculations */
5111        int                     state = 0;
5112
5113        mp = ip->i_mount;
5114        XFS_STATS_INC(mp, xs_del_exlist);
5115
5116        if (whichfork == XFS_ATTR_FORK)
5117                state |= BMAP_ATTRFORK;
5118        else if (whichfork == XFS_COW_FORK)
5119                state |= BMAP_COWFORK;
5120
5121        ifp = XFS_IFORK_PTR(ip, whichfork);
5122        ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
5123                (uint)sizeof(xfs_bmbt_rec_t)));
5124        ASSERT(del->br_blockcount > 0);
5125        ep = xfs_iext_get_ext(ifp, *idx);
5126        xfs_bmbt_get_all(ep, &got);
5127        ASSERT(got.br_startoff <= del->br_startoff);
5128        del_endoff = del->br_startoff + del->br_blockcount;
5129        got_endoff = got.br_startoff + got.br_blockcount;
5130        ASSERT(got_endoff >= del_endoff);
5131        delay = isnullstartblock(got.br_startblock);
5132        ASSERT(isnullstartblock(del->br_startblock) == delay);
5133        flags = 0;
5134        qfield = 0;
5135        error = 0;
5136        /*
5137         * If deleting a real allocation, must free up the disk space.
5138         */
5139        if (!delay) {
5140                flags = XFS_ILOG_CORE;
5141                /*
5142                 * Realtime allocation.  Free it and record di_nblocks update.
5143                 */
5144                if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
5145                        xfs_fsblock_t   bno;
5146                        xfs_filblks_t   len;
5147
5148                        ASSERT(do_mod(del->br_blockcount,
5149                                      mp->m_sb.sb_rextsize) == 0);
5150                        ASSERT(do_mod(del->br_startblock,
5151                                      mp->m_sb.sb_rextsize) == 0);
5152                        bno = del->br_startblock;
5153                        len = del->br_blockcount;
5154                        do_div(bno, mp->m_sb.sb_rextsize);
5155                        do_div(len, mp->m_sb.sb_rextsize);
5156                        error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
5157                        if (error)
5158                                goto done;
5159                        do_fx = 0;
5160                        nblks = len * mp->m_sb.sb_rextsize;
5161                        qfield = XFS_TRANS_DQ_RTBCOUNT;
5162                }
5163                /*
5164                 * Ordinary allocation.
5165                 */
5166                else {
5167                        do_fx = 1;
5168                        nblks = del->br_blockcount;
5169                        qfield = XFS_TRANS_DQ_BCOUNT;
5170                }
5171                /*
5172                 * Set up del_endblock and cur for later.
5173                 */
5174                del_endblock = del->br_startblock + del->br_blockcount;
5175                if (cur) {
5176                        if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
5177                                        got.br_startblock, got.br_blockcount,
5178                                        &i)))
5179                                goto done;
5180                        XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
5181                }
5182                da_old = da_new = 0;
5183        } else {
5184                da_old = startblockval(got.br_startblock);
5185                da_new = 0;
5186                nblks = 0;
5187                do_fx = 0;
5188        }
5189
5190        /*
5191         * Set flag value to use in switch statement.
5192         * Left-contig is 2, right-contig is 1.
5193         */
5194        switch (((got.br_startoff == del->br_startoff) << 1) |
5195                (got_endoff == del_endoff)) {
5196        case 3:
5197                /*
5198                 * Matches the whole extent.  Delete the entry.
5199                 */
5200                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
5201                xfs_iext_remove(ip, *idx, 1,
5202                                whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
5203                --*idx;
5204                if (delay)
5205                        break;
5206
5207                XFS_IFORK_NEXT_SET(ip, whichfork,
5208                        XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
5209                flags |= XFS_ILOG_CORE;
5210                if (!cur) {
5211                        flags |= xfs_ilog_fext(whichfork);
5212                        break;
5213                }
5214                if ((error = xfs_btree_delete(cur, &i)))
5215                        goto done;
5216                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
5217                break;
5218
5219        case 2:
5220                /*
5221                 * Deleting the first part of the extent.
5222                 */
5223                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
5224                xfs_bmbt_set_startoff(ep, del_endoff);
5225                temp = got.br_blockcount - del->br_blockcount;
5226                xfs_bmbt_set_blockcount(ep, temp);
5227                if (delay) {
5228                        temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
5229                                da_old);
5230                        xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
5231                        trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5232                        da_new = temp;
5233                        break;
5234                }
5235                xfs_bmbt_set_startblock(ep, del_endblock);
5236                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5237                if (!cur) {
5238                        flags |= xfs_ilog_fext(whichfork);
5239                        break;
5240                }
5241                if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock,
5242                                got.br_blockcount - del->br_blockcount,
5243                                got.br_state)))
5244                        goto done;
5245                break;
5246
5247        case 1:
5248                /*
5249                 * Deleting the last part of the extent.
5250                 */
5251                temp = got.br_blockcount - del->br_blockcount;
5252                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
5253                xfs_bmbt_set_blockcount(ep, temp);
5254                if (delay) {
5255                        temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
5256                                da_old);
5257                        xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
5258                        trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5259                        da_new = temp;
5260                        break;
5261                }
5262                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5263                if (!cur) {
5264                        flags |= xfs_ilog_fext(whichfork);
5265                        break;
5266                }
5267                if ((error = xfs_bmbt_update(cur, got.br_startoff,
5268                                got.br_startblock,
5269                                got.br_blockcount - del->br_blockcount,
5270                                got.br_state)))
5271                        goto done;
5272                break;
5273
5274        case 0:
5275                /*
5276                 * Deleting the middle of the extent.
5277                 */
5278                temp = del->br_startoff - got.br_startoff;
5279                trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
5280                xfs_bmbt_set_blockcount(ep, temp);
5281                new.br_startoff = del_endoff;
5282                temp2 = got_endoff - del_endoff;
5283                new.br_blockcount = temp2;
5284                new.br_state = got.br_state;
5285                if (!delay) {
5286                        new.br_startblock = del_endblock;
5287                        flags |= XFS_ILOG_CORE;
5288                        if (cur) {
5289                                if ((error = xfs_bmbt_update(cur,
5290                                                got.br_startoff,
5291                                                got.br_startblock, temp,
5292                                                got.br_state)))
5293                                        goto done;
5294                                if ((error = xfs_btree_increment(cur, 0, &i)))
5295                                        goto done;
5296                                cur->bc_rec.b = new;
5297                                error = xfs_btree_insert(cur, &i);
5298                                if (error && error != -ENOSPC)
5299                                        goto done;
5300                                /*
5301                                 * If get no-space back from btree insert,
5302                                 * it tried a split, and we have a zero
5303                                 * block reservation.
5304                                 * Fix up our state and return the error.
5305                                 */
5306                                if (error == -ENOSPC) {
5307                                        /*
5308                                         * Reset the cursor, don't trust
5309                                         * it after any insert operation.
5310                                         */
5311                                        if ((error = xfs_bmbt_lookup_eq(cur,
5312                                                        got.br_startoff,
5313                                                        got.br_startblock,
5314                                                        temp, &i)))
5315                                                goto done;
5316                                        XFS_WANT_CORRUPTED_GOTO(mp,
5317                                                                i == 1, done);
5318                                        /*
5319                                         * Update the btree record back
5320                                         * to the original value.
5321                                         */
5322                                        if ((error = xfs_bmbt_update(cur,
5323                                                        got.br_startoff,
5324                                                        got.br_startblock,
5325                                                        got.br_blockcount,
5326                                                        got.br_state)))
5327                                                goto done;
5328                                        /*
5329                                         * Reset the extent record back
5330                                         * to the original value.
5331                                         */
5332                                        xfs_bmbt_set_blockcount(ep,
5333                                                got.br_blockcount);
5334                                        flags = 0;
5335                                        error = -ENOSPC;
5336                                        goto done;
5337                                }
5338                                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
5339                        } else
5340                                flags |= xfs_ilog_fext(whichfork);
5341                        XFS_IFORK_NEXT_SET(ip, whichfork,
5342                                XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
5343                } else {
5344                        xfs_filblks_t   stolen;
5345                        ASSERT(whichfork == XFS_DATA_FORK);
5346
5347                        /*
5348                         * Distribute the original indlen reservation across the
5349                         * two new extents. Steal blocks from the deleted extent
5350                         * if necessary. Stealing blocks simply fudges the
5351                         * fdblocks accounting in xfs_bunmapi().
5352                         */
5353                        temp = xfs_bmap_worst_indlen(ip, got.br_blockcount);
5354                        temp2 = xfs_bmap_worst_indlen(ip, new.br_blockcount);
5355                        stolen = xfs_bmap_split_indlen(da_old, &temp, &temp2,
5356                                                       del->br_blockcount);
5357                        da_new = temp + temp2 - stolen;
5358                        del->br_blockcount -= stolen;
5359
5360                        /*
5361                         * Set the reservation for each extent. Warn if either
5362                         * is zero as this can lead to delalloc problems.
5363                         */
5364                        WARN_ON_ONCE(!temp || !temp2);
5365                        xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
5366                        new.br_startblock = nullstartblock((int)temp2);
5367                }
5368                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5369                xfs_iext_insert(ip, *idx + 1, 1, &new, state);
5370                ++*idx;
5371                break;
5372        }
5373
5374        /* remove reverse mapping */
5375        if (!delay) {
5376                error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, del);
5377                if (error)
5378                        goto done;
5379        }
5380
5381        /*
5382         * If we need to, add to list of extents to delete.
5383         */
5384        if (do_fx && !(bflags & XFS_BMAPI_REMAP)) {
5385                if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
5386                        error = xfs_refcount_decrease_extent(mp, dfops, del);
5387                        if (error)
5388                                goto done;
5389                } else
5390                        xfs_bmap_add_free(mp, dfops, del->br_startblock,
5391                                        del->br_blockcount, NULL);
5392        }
5393
5394        /*
5395         * Adjust inode # blocks in the file.
5396         */
5397        if (nblks)
5398                ip->i_d.di_nblocks -= nblks;
5399        /*
5400         * Adjust quota data.
5401         */
5402        if (qfield && !(bflags & XFS_BMAPI_REMAP))
5403                xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
5404
5405        /*
5406         * Account for change in delayed indirect blocks.
5407         * Nothing to do for disk quota accounting here.
5408         */
5409        ASSERT(da_old >= da_new);
5410        if (da_old > da_new)
5411                xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
5412done:
5413        *logflagsp = flags;
5414        return error;
5415}
5416
5417/*
5418 * Unmap (remove) blocks from a file.
5419 * If nexts is nonzero then the number of extents to remove is limited to
5420 * that value.  If not all extents in the block range can be removed then
5421 * *done is set.
5422 */
5423int                                             /* error */
5424__xfs_bunmapi(
5425        xfs_trans_t             *tp,            /* transaction pointer */
5426        struct xfs_inode        *ip,            /* incore inode */
5427        xfs_fileoff_t           bno,            /* starting offset to unmap */
5428        xfs_filblks_t           *rlen,          /* i/o: amount remaining */
5429        int                     flags,          /* misc flags */
5430        xfs_extnum_t            nexts,          /* number of extents max */
5431        xfs_fsblock_t           *firstblock,    /* first allocated block
5432                                                   controls a.g. for allocs */
5433        struct xfs_defer_ops    *dfops)         /* i/o: deferred updates */
5434{
5435        xfs_btree_cur_t         *cur;           /* bmap btree cursor */
5436        xfs_bmbt_irec_t         del;            /* extent being deleted */
5437        int                     eof;            /* is deleting at eof */
5438        xfs_bmbt_rec_host_t     *ep;            /* extent record pointer */
5439        int                     error;          /* error return value */
5440        xfs_extnum_t            extno;          /* extent number in list */
5441        xfs_bmbt_irec_t         got;            /* current extent record */
5442        xfs_ifork_t             *ifp;           /* inode fork pointer */
5443        int                     isrt;           /* freeing in rt area */
5444        xfs_extnum_t            lastx;          /* last extent index used */
5445        int                     logflags;       /* transaction logging flags */
5446        xfs_extlen_t            mod;            /* rt extent offset */
5447        xfs_mount_t             *mp;            /* mount structure */
5448        xfs_extnum_t            nextents;       /* number of file extents */
5449        xfs_bmbt_irec_t         prev;           /* previous extent record */
5450        xfs_fileoff_t           start;          /* first file offset deleted */
5451        int                     tmp_logflags;   /* partial logging flags */
5452        int                     wasdel;         /* was a delayed alloc extent */
5453        int                     whichfork;      /* data or attribute fork */
5454        xfs_fsblock_t           sum;
5455        xfs_filblks_t           len = *rlen;    /* length to unmap in file */
5456
5457        trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);
5458
5459        whichfork = xfs_bmapi_whichfork(flags);
5460        ASSERT(whichfork != XFS_COW_FORK);
5461        ifp = XFS_IFORK_PTR(ip, whichfork);
5462        if (unlikely(
5463            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5464            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
5465                XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW,
5466                                 ip->i_mount);
5467                return -EFSCORRUPTED;
5468        }
5469        mp = ip->i_mount;
5470        if (XFS_FORCED_SHUTDOWN(mp))
5471                return -EIO;
5472
5473        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5474        ASSERT(len > 0);
5475        ASSERT(nexts >= 0);
5476
5477        if (!(ifp->if_flags & XFS_IFEXTENTS) &&
5478            (error = xfs_iread_extents(tp, ip, whichfork)))
5479                return error;
5480        nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
5481        if (nextents == 0) {
5482                *rlen = 0;
5483                return 0;
5484        }
5485        XFS_STATS_INC(mp, xs_blk_unmap);
5486        isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
5487        start = bno;
5488        bno = start + len - 1;
5489        ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
5490                &prev);
5491
5492        /*
5493         * Check to see if the given block number is past the end of the
5494         * file, back up to the last block if so...
5495         */
5496        if (eof) {
5497                ep = xfs_iext_get_ext(ifp, --lastx);
5498                xfs_bmbt_get_all(ep, &got);
5499                bno = got.br_startoff + got.br_blockcount - 1;
5500        }
5501        logflags = 0;
5502        if (ifp->if_flags & XFS_IFBROOT) {
5503                ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
5504                cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5505                cur->bc_private.b.firstblock = *firstblock;
5506                cur->bc_private.b.dfops = dfops;
5507                cur->bc_private.b.flags = 0;
5508        } else
5509                cur = NULL;
5510
5511        if (isrt) {
5512                /*
5513                 * Synchronize by locking the bitmap inode.
5514                 */
5515                xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
5516                xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
5517                xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
5518                xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
5519        }
5520
5521        extno = 0;
5522        while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 &&
5523               (nexts == 0 || extno < nexts)) {
5524                /*
5525                 * Is the found extent after a hole in which bno lives?
5526                 * Just back up to the previous extent, if so.
5527                 */
5528                if (got.br_startoff > bno) {
5529                        if (--lastx < 0)
5530                                break;
5531                        ep = xfs_iext_get_ext(ifp, lastx);
5532                        xfs_bmbt_get_all(ep, &got);
5533                }
5534                /*
5535                 * Is the last block of this extent before the range
5536                 * we're supposed to delete?  If so, we're done.
5537                 */
5538                bno = XFS_FILEOFF_MIN(bno,
5539                        got.br_startoff + got.br_blockcount - 1);
5540                if (bno < start)
5541                        break;
5542                /*
5543                 * Then deal with the (possibly delayed) allocated space
5544                 * we found.
5545                 */
5546                ASSERT(ep != NULL);
5547                del = got;
5548                wasdel = isnullstartblock(del.br_startblock);
5549                if (got.br_startoff < start) {
5550                        del.br_startoff = start;
5551                        del.br_blockcount -= start - got.br_startoff;
5552                        if (!wasdel)
5553                                del.br_startblock += start - got.br_startoff;
5554                }
5555                if (del.br_startoff + del.br_blockcount > bno + 1)
5556                        del.br_blockcount = bno + 1 - del.br_startoff;
5557                sum = del.br_startblock + del.br_blockcount;
5558                if (isrt &&
5559                    (mod = do_mod(sum, mp->m_sb.sb_rextsize))) {
5560                        /*
5561                         * Realtime extent not lined up at the end.
5562                         * The extent could have been split into written
5563                         * and unwritten pieces, or we could just be
5564                         * unmapping part of it.  But we can't really
5565                         * get rid of part of a realtime extent.
5566                         */
5567                        if (del.br_state == XFS_EXT_UNWRITTEN ||
5568                            !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
5569                                /*
5570                                 * This piece is unwritten, or we're not
5571                                 * using unwritten extents.  Skip over it.
5572                                 */
5573                                ASSERT(bno >= mod);
5574                                bno -= mod > del.br_blockcount ?
5575                                        del.br_blockcount : mod;
5576                                if (bno < got.br_startoff) {
5577                                        if (--lastx >= 0)
5578                                                xfs_bmbt_get_all(xfs_iext_get_ext(
5579                                                        ifp, lastx), &got);
5580                                }
5581                                continue;
5582                        }
5583                        /*
5584                         * It's written, turn it unwritten.
5585                         * This is better than zeroing it.
5586                         */
5587                        ASSERT(del.br_state == XFS_EXT_NORM);
5588                        ASSERT(tp->t_blk_res > 0);
5589                        /*
5590                         * If this spans a realtime extent boundary,
5591                         * chop it back to the start of the one we end at.
5592                         */
5593                        if (del.br_blockcount > mod) {
5594                                del.br_startoff += del.br_blockcount - mod;
5595                                del.br_startblock += del.br_blockcount - mod;
5596                                del.br_blockcount = mod;
5597                        }
5598                        del.br_state = XFS_EXT_UNWRITTEN;
5599                        error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5600                                        &lastx, &cur, &del, firstblock, dfops,
5601                                        &logflags);
5602                        if (error)
5603                                goto error0;
5604                        goto nodelete;
5605                }
5606                if (isrt && (mod = do_mod(del.br_startblock, mp->m_sb.sb_rextsize))) {
5607                        /*
5608                         * Realtime extent is lined up at the end but not
5609                         * at the front.  We'll get rid of full extents if
5610                         * we can.
5611                         */
5612                        mod = mp->m_sb.sb_rextsize - mod;
5613                        if (del.br_blockcount > mod) {
5614                                del.br_blockcount -= mod;
5615                                del.br_startoff += mod;
5616                                del.br_startblock += mod;
5617                        } else if ((del.br_startoff == start &&
5618                                    (del.br_state == XFS_EXT_UNWRITTEN ||
5619                                     tp->t_blk_res == 0)) ||
5620                                   !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
5621                                /*
5622                                 * Can't make it unwritten.  There isn't
5623                                 * a full extent here so just skip it.
5624                                 */
5625                                ASSERT(bno >= del.br_blockcount);
5626                                bno -= del.br_blockcount;
5627                                if (got.br_startoff > bno) {
5628                                        if (--lastx >= 0) {
5629                                                ep = xfs_iext_get_ext(ifp,
5630                                                                      lastx);
5631                                                xfs_bmbt_get_all(ep, &got);
5632                                        }
5633                                }
5634                                continue;
5635                        } else if (del.br_state == XFS_EXT_UNWRITTEN) {
5636                                /*
5637                                 * This one is already unwritten.
5638                                 * It must have a written left neighbor.
5639                                 * Unwrite the killed part of that one and
5640                                 * try again.
5641                                 */
5642                                ASSERT(lastx > 0);
5643                                xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
5644                                                lastx - 1), &prev);
5645                                ASSERT(prev.br_state == XFS_EXT_NORM);
5646                                ASSERT(!isnullstartblock(prev.br_startblock));
5647                                ASSERT(del.br_startblock ==
5648                                       prev.br_startblock + prev.br_blockcount);
5649                                if (prev.br_startoff < start) {
5650                                        mod = start - prev.br_startoff;
5651                                        prev.br_blockcount -= mod;
5652                                        prev.br_startblock += mod;
5653                                        prev.br_startoff = start;
5654                                }
5655                                prev.br_state = XFS_EXT_UNWRITTEN;
5656                                lastx--;
5657                                error = xfs_bmap_add_extent_unwritten_real(tp,
5658                                                ip, &lastx, &cur, &prev,
5659                                                firstblock, dfops, &logflags);
5660                                if (error)
5661                                        goto error0;
5662                                goto nodelete;
5663                        } else {
5664                                ASSERT(del.br_state == XFS_EXT_NORM);
5665                                del.br_state = XFS_EXT_UNWRITTEN;
5666                                error = xfs_bmap_add_extent_unwritten_real(tp,
5667                                                ip, &lastx, &cur, &del,
5668                                                firstblock, dfops, &logflags);
5669                                if (error)
5670                                        goto error0;
5671                                goto nodelete;
5672                        }
5673                }
5674
5675                /*
5676                 * If it's the case where the directory code is running
5677                 * with no block reservation, and the deleted block is in
5678                 * the middle of its extent, and the resulting insert
5679                 * of an extent would cause transformation to btree format,
5680                 * then reject it.  The calling code will then swap
5681                 * blocks around instead.
5682                 * We have to do this now, rather than waiting for the
5683                 * conversion to btree format, since the transaction
5684                 * will be dirty.
5685                 */
5686                if (!wasdel && tp->t_blk_res == 0 &&
5687                    XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
5688                    XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */
5689                        XFS_IFORK_MAXEXT(ip, whichfork) &&
5690                    del.br_startoff > got.br_startoff &&
5691                    del.br_startoff + del.br_blockcount <
5692                    got.br_startoff + got.br_blockcount) {
5693                        error = -ENOSPC;
5694                        goto error0;
5695                }
5696
5697                /*
5698                 * Unreserve quota and update realtime free space, if
5699                 * appropriate. If delayed allocation, update the inode delalloc
5700                 * counter now and wait to update the sb counters as
5701                 * xfs_bmap_del_extent() might need to borrow some blocks.
5702                 */
5703                if (wasdel) {
5704                        ASSERT(startblockval(del.br_startblock) > 0);
5705                        if (isrt) {
5706                                xfs_filblks_t rtexts;
5707
5708                                rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
5709                                do_div(rtexts, mp->m_sb.sb_rextsize);
5710                                xfs_mod_frextents(mp, (int64_t)rtexts);
5711                                (void)xfs_trans_reserve_quota_nblks(NULL,
5712                                        ip, -((long)del.br_blockcount), 0,
5713                                        XFS_QMOPT_RES_RTBLKS);
5714                        } else {
5715                                (void)xfs_trans_reserve_quota_nblks(NULL,
5716                                        ip, -((long)del.br_blockcount), 0,
5717                                        XFS_QMOPT_RES_REGBLKS);
5718                        }
5719                        ip->i_delayed_blks -= del.br_blockcount;
5720                        if (cur)
5721                                cur->bc_private.b.flags |=
5722                                        XFS_BTCUR_BPRV_WASDEL;
5723                } else if (cur)
5724                        cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
5725
5726                error = xfs_bmap_del_extent(ip, tp, &lastx, dfops, cur, &del,
5727                                &tmp_logflags, whichfork, flags);
5728                logflags |= tmp_logflags;
5729                if (error)
5730                        goto error0;
5731
5732                if (!isrt && wasdel)
5733                        xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, false);
5734
5735                bno = del.br_startoff - 1;
5736nodelete:
5737                /*
5738                 * If not done go on to the next (previous) record.
5739                 */
5740                if (bno != (xfs_fileoff_t)-1 && bno >= start) {
5741                        if (lastx >= 0) {
5742                                ep = xfs_iext_get_ext(ifp, lastx);
5743                                if (xfs_bmbt_get_startoff(ep) > bno) {
5744                                        if (--lastx >= 0)
5745                                                ep = xfs_iext_get_ext(ifp,
5746                                                                      lastx);
5747                                }
5748                                xfs_bmbt_get_all(ep, &got);
5749                        }
5750                        extno++;
5751                }
5752        }
5753        if (bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0)
5754                *rlen = 0;
5755        else
5756                *rlen = bno - start + 1;
5757
5758        /*
5759         * Convert to a btree if necessary.
5760         */
5761        if (xfs_bmap_needs_btree(ip, whichfork)) {
5762                ASSERT(cur == NULL);
5763                error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops,
5764                        &cur, 0, &tmp_logflags, whichfork);
5765                logflags |= tmp_logflags;
5766                if (error)
5767                        goto error0;
5768        }
5769        /*
5770         * transform from btree to extents, give it cur
5771         */
5772        else if (xfs_bmap_wants_extents(ip, whichfork)) {
5773                ASSERT(cur != NULL);
5774                error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags,
5775                        whichfork);
5776                logflags |= tmp_logflags;
5777                if (error)
5778                        goto error0;
5779        }
5780        /*
5781         * transform from extents to local?
5782         */
5783        error = 0;
5784error0:
5785        /*
5786         * Log everything.  Do this after conversion, there's no point in
5787         * logging the extent records if we've converted to btree format.
5788         */
5789        if ((logflags & xfs_ilog_fext(whichfork)) &&
5790            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
5791                logflags &= ~xfs_ilog_fext(whichfork);
5792        else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
5793                 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
5794                logflags &= ~xfs_ilog_fbroot(whichfork);
5795        /*
5796         * Log inode even in the error case, if the transaction
5797         * is dirty we'll need to shut down the filesystem.
5798         */
5799        if (logflags)
5800                xfs_trans_log_inode(tp, ip, logflags);
5801        if (cur) {
5802                if (!error) {
5803                        *firstblock = cur->bc_private.b.firstblock;
5804                        cur->bc_private.b.allocated = 0;
5805                }
5806                xfs_btree_del_cursor(cur,
5807                        error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5808        }
5809        return error;
5810}
5811
5812/* Unmap a range of a file. */
5813int
5814xfs_bunmapi(
5815        xfs_trans_t             *tp,
5816        struct xfs_inode        *ip,
5817        xfs_fileoff_t           bno,
5818        xfs_filblks_t           len,
5819        int                     flags,
5820        xfs_extnum_t            nexts,
5821        xfs_fsblock_t           *firstblock,
5822        struct xfs_defer_ops    *dfops,
5823        int                     *done)
5824{
5825        int                     error;
5826
5827        error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts, firstblock,
5828                        dfops);
5829        *done = (len == 0);
5830        return error;
5831}
5832
5833/*
5834 * Determine whether an extent shift can be accomplished by a merge with the
5835 * extent that precedes the target hole of the shift.
5836 */
5837STATIC bool
5838xfs_bmse_can_merge(
5839        struct xfs_bmbt_irec    *left,  /* preceding extent */
5840        struct xfs_bmbt_irec    *got,   /* current extent to shift */
5841        xfs_fileoff_t           shift)  /* shift fsb */
5842{
5843        xfs_fileoff_t           startoff;
5844
5845        startoff = got->br_startoff - shift;
5846
5847        /*
5848         * The extent, once shifted, must be adjacent in-file and on-disk with
5849         * the preceding extent.
5850         */
5851        if ((left->br_startoff + left->br_blockcount != startoff) ||
5852            (left->br_startblock + left->br_blockcount != got->br_startblock) ||
5853            (left->br_state != got->br_state) ||
5854            (left->br_blockcount + got->br_blockcount > MAXEXTLEN))
5855                return false;
5856
5857        return true;
5858}
5859
5860/*
5861 * A bmap extent shift adjusts the file offset of an extent to fill a preceding
5862 * hole in the file. If an extent shift would result in the extent being fully
5863 * adjacent to the extent that currently precedes the hole, we can merge with
5864 * the preceding extent rather than do the shift.
5865 *
5866 * This function assumes the caller has verified a shift-by-merge is possible
5867 * with the provided extents via xfs_bmse_can_merge().
5868 */
5869STATIC int
5870xfs_bmse_merge(
5871        struct xfs_inode                *ip,
5872        int                             whichfork,
5873        xfs_fileoff_t                   shift,          /* shift fsb */
5874        int                             current_ext,    /* idx of gotp */
5875        struct xfs_bmbt_rec_host        *gotp,          /* extent to shift */
5876        struct xfs_bmbt_rec_host        *leftp,         /* preceding extent */
5877        struct xfs_btree_cur            *cur,
5878        int                             *logflags)      /* output */
5879{
5880        struct xfs_bmbt_irec            got;
5881        struct xfs_bmbt_irec            left;
5882        xfs_filblks_t                   blockcount;
5883        int                             error, i;
5884        struct xfs_mount                *mp = ip->i_mount;
5885
5886        xfs_bmbt_get_all(gotp, &got);
5887        xfs_bmbt_get_all(leftp, &left);
5888        blockcount = left.br_blockcount + got.br_blockcount;
5889
5890        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5891        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5892        ASSERT(xfs_bmse_can_merge(&left, &got, shift));
5893
5894        /*
5895         * Merge the in-core extents. Note that the host record pointers and
5896         * current_ext index are invalid once the extent has been removed via
5897         * xfs_iext_remove().
5898         */
5899        xfs_bmbt_set_blockcount(leftp, blockcount);
5900        xfs_iext_remove(ip, current_ext, 1, 0);
5901
5902        /*
5903         * Update the on-disk extent count, the btree if necessary and log the
5904         * inode.
5905         */
5906        XFS_IFORK_NEXT_SET(ip, whichfork,
5907                           XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
5908        *logflags |= XFS_ILOG_CORE;
5909        if (!cur) {
5910                *logflags |= XFS_ILOG_DEXT;
5911                return 0;
5912        }
5913
5914        /* lookup and remove the extent to merge */
5915        error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
5916                                   got.br_blockcount, &i);
5917        if (error)
5918                return error;
5919        XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5920
5921        error = xfs_btree_delete(cur, &i);
5922        if (error)
5923                return error;
5924        XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5925
5926        /* lookup and update size of the previous extent */
5927        error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock,
5928                                   left.br_blockcount, &i);
5929        if (error)
5930                return error;
5931        XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5932
5933        left.br_blockcount = blockcount;
5934
5935        return xfs_bmbt_update(cur, left.br_startoff, left.br_startblock,
5936                               left.br_blockcount, left.br_state);
5937}
5938
5939/*
5940 * Shift a single extent.
5941 */
5942STATIC int
5943xfs_bmse_shift_one(
5944        struct xfs_inode                *ip,
5945        int                             whichfork,
5946        xfs_fileoff_t                   offset_shift_fsb,
5947        int                             *current_ext,
5948        struct xfs_bmbt_rec_host        *gotp,
5949        struct xfs_btree_cur            *cur,
5950        int                             *logflags,
5951        enum shift_direction            direction,
5952        struct xfs_defer_ops            *dfops)
5953{
5954        struct xfs_ifork                *ifp;
5955        struct xfs_mount                *mp;
5956        xfs_fileoff_t                   startoff;
5957        struct xfs_bmbt_rec_host        *adj_irecp;
5958        struct xfs_bmbt_irec            got;
5959        struct xfs_bmbt_irec            adj_irec;
5960        int                             error;
5961        int                             i;
5962        int                             total_extents;
5963
5964        mp = ip->i_mount;
5965        ifp = XFS_IFORK_PTR(ip, whichfork);
5966        total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5967
5968        xfs_bmbt_get_all(gotp, &got);
5969
5970        /* delalloc extents should be prevented by caller */
5971        XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
5972
5973        if (direction == SHIFT_LEFT) {
5974                startoff = got.br_startoff - offset_shift_fsb;
5975
5976                /*
5977                 * Check for merge if we've got an extent to the left,
5978                 * otherwise make sure there's enough room at the start
5979                 * of the file for the shift.
5980                 */
5981                if (!*current_ext) {
5982                        if (got.br_startoff < offset_shift_fsb)
5983                                return -EINVAL;
5984                        goto update_current_ext;
5985                }
5986                /*
5987                 * grab the left extent and check for a large
5988                 * enough hole.
5989                 */
5990                adj_irecp = xfs_iext_get_ext(ifp, *current_ext - 1);
5991                xfs_bmbt_get_all(adj_irecp, &adj_irec);
5992
5993                if (startoff <
5994                    adj_irec.br_startoff + adj_irec.br_blockcount)
5995                        return -EINVAL;
5996
5997                /* check whether to merge the extent or shift it down */
5998                if (xfs_bmse_can_merge(&adj_irec, &got,
5999                                       offset_shift_fsb)) {
6000                        error = xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
6001                                               *current_ext, gotp, adj_irecp,
6002                                               cur, logflags);
6003                        if (error)
6004                                return error;
6005                        adj_irec = got;
6006                        goto update_rmap;
6007                }
6008        } else {
6009                startoff = got.br_startoff + offset_shift_fsb;
6010                /* nothing to move if this is the last extent */
6011                if (*current_ext >= (total_extents - 1))
6012                        goto update_current_ext;
6013                /*
6014                 * If this is not the last extent in the file, make sure there
6015                 * is enough room between current extent and next extent for
6016                 * accommodating the shift.
6017                 */
6018                adj_irecp = xfs_iext_get_ext(ifp, *current_ext + 1);
6019                xfs_bmbt_get_all(adj_irecp, &adj_irec);
6020                if (startoff + got.br_blockcount > adj_irec.br_startoff)
6021                        return -EINVAL;
6022                /*
6023                 * Unlike a left shift (which involves a hole punch),
6024                 * a right shift does not modify extent neighbors
6025                 * in any way. We should never find mergeable extents
6026                 * in this scenario. Check anyways and warn if we
6027                 * encounter two extents that could be one.
6028                 */
6029                if (xfs_bmse_can_merge(&got, &adj_irec, offset_shift_fsb))
6030                        WARN_ON_ONCE(1);
6031        }
6032        /*
6033         * Increment the extent index for the next iteration, update the start
6034         * offset of the in-core extent and update the btree if applicable.
6035         */
6036update_current_ext:
6037        if (direction == SHIFT_LEFT)
6038                (*current_ext)++;
6039        else
6040                (*current_ext)--;
6041        xfs_bmbt_set_startoff(gotp, startoff);
6042        *logflags |= XFS_ILOG_CORE;
6043        adj_irec = got;
6044        if (!cur) {
6045                *logflags |= XFS_ILOG_DEXT;
6046                goto update_rmap;
6047        }
6048
6049        error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
6050                                   got.br_blockcount, &i);
6051        if (error)
6052                return error;
6053        XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
6054
6055        got.br_startoff = startoff;
6056        error = xfs_bmbt_update(cur, got.br_startoff, got.br_startblock,
6057                        got.br_blockcount, got.br_state);
6058        if (error)
6059                return error;
6060
6061update_rmap:
6062        /* update reverse mapping */
6063        error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, &adj_irec);
6064        if (error)
6065                return error;
6066        adj_irec.br_startoff = startoff;
6067        return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &adj_irec);
6068}
6069
6070/*
6071 * Shift extent records to the left/right to cover/create a hole.
6072 *
6073 * The maximum number of extents to be shifted in a single operation is
6074 * @num_exts. @stop_fsb specifies the file offset at which to stop shift and the
6075 * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb
6076 * is the length by which each extent is shifted. If there is no hole to shift
6077 * the extents into, this will be considered invalid operation and we abort
6078 * immediately.
6079 */
6080int
6081xfs_bmap_shift_extents(
6082        struct xfs_trans        *tp,
6083        struct xfs_inode        *ip,
6084        xfs_fileoff_t           *next_fsb,
6085        xfs_fileoff_t           offset_shift_fsb,
6086        int                     *done,
6087        xfs_fileoff_t           stop_fsb,
6088        xfs_fsblock_t           *firstblock,
6089        struct xfs_defer_ops    *dfops,
6090        enum shift_direction    direction,
6091        int                     num_exts)
6092{
6093        struct xfs_btree_cur            *cur = NULL;
6094        struct xfs_bmbt_rec_host        *gotp;
6095        struct xfs_bmbt_irec            got;
6096        struct xfs_mount                *mp = ip->i_mount;
6097        struct xfs_ifork                *ifp;
6098        xfs_extnum_t                    nexts = 0;
6099        xfs_extnum_t                    current_ext;
6100        xfs_extnum_t                    total_extents;
6101        xfs_extnum_t                    stop_extent;
6102        int                             error = 0;
6103        int                             whichfork = XFS_DATA_FORK;
6104        int                             logflags = 0;
6105
6106        if (unlikely(XFS_TEST_ERROR(
6107            (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
6108             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
6109             mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
6110                XFS_ERROR_REPORT("xfs_bmap_shift_extents",
6111                                 XFS_ERRLEVEL_LOW, mp);
6112                return -EFSCORRUPTED;
6113        }
6114
6115        if (XFS_FORCED_SHUTDOWN(mp))
6116                return -EIO;
6117
6118        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
6119        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
6120        ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
6121        ASSERT(*next_fsb != NULLFSBLOCK || direction == SHIFT_RIGHT);
6122
6123        ifp = XFS_IFORK_PTR(ip, whichfork);
6124        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
6125                /* Read in all the extents */
6126                error = xfs_iread_extents(tp, ip, whichfork);
6127                if (error)
6128                        return error;
6129        }
6130
6131        if (ifp->if_flags & XFS_IFBROOT) {
6132                cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
6133                cur->bc_private.b.firstblock = *firstblock;
6134                cur->bc_private.b.dfops = dfops;
6135                cur->bc_private.b.flags = 0;
6136        }
6137
6138        /*
6139         * There may be delalloc extents in the data fork before the range we
6140         * are collapsing out, so we cannot use the count of real extents here.
6141         * Instead we have to calculate it from the incore fork.
6142         */
6143        total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
6144        if (total_extents == 0) {
6145                *done = 1;
6146                goto del_cursor;
6147        }
6148
6149        /*
6150         * In case of first right shift, we need to initialize next_fsb
6151         */
6152        if (*next_fsb == NULLFSBLOCK) {
6153                gotp = xfs_iext_get_ext(ifp, total_extents - 1);
6154                xfs_bmbt_get_all(gotp, &got);
6155                *next_fsb = got.br_startoff;
6156                if (stop_fsb > *next_fsb) {
6157                        *done = 1;
6158                        goto del_cursor;
6159                }
6160        }
6161
6162        /* Lookup the extent index at which we have to stop */
6163        if (direction == SHIFT_RIGHT) {
6164                gotp = xfs_iext_bno_to_ext(ifp, stop_fsb, &stop_extent);
6165                /* Make stop_extent exclusive of shift range */
6166                stop_extent--;
6167        } else
6168                stop_extent = total_extents;
6169
6170        /*
6171         * Look up the extent index for the fsb where we start shifting. We can
6172         * henceforth iterate with current_ext as extent list changes are locked
6173         * out via ilock.
6174         *
6175         * gotp can be null in 2 cases: 1) if there are no extents or 2)
6176         * *next_fsb lies in a hole beyond which there are no extents. Either
6177         * way, we are done.
6178         */
6179        gotp = xfs_iext_bno_to_ext(ifp, *next_fsb, &current_ext);
6180        if (!gotp) {
6181                *done = 1;
6182                goto del_cursor;
6183        }
6184
6185        /* some sanity checking before we finally start shifting extents */
6186        if ((direction == SHIFT_LEFT && current_ext >= stop_extent) ||
6187             (direction == SHIFT_RIGHT && current_ext <= stop_extent)) {
6188                error = -EIO;
6189                goto del_cursor;
6190        }
6191
6192        while (nexts++ < num_exts) {
6193                error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
6194                                           &current_ext, gotp, cur, &logflags,
6195                                           direction, dfops);
6196                if (error)
6197                        goto del_cursor;
6198                /*
6199                 * If there was an extent merge during the shift, the extent
6200                 * count can change. Update the total and grade the next record.
6201                 */
6202                if (direction == SHIFT_LEFT) {
6203                        total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
6204                        stop_extent = total_extents;
6205                }
6206
6207                if (current_ext == stop_extent) {
6208                        *done = 1;
6209                        *next_fsb = NULLFSBLOCK;
6210                        break;
6211                }
6212                gotp = xfs_iext_get_ext(ifp, current_ext);
6213        }
6214
6215        if (!*done) {
6216                xfs_bmbt_get_all(gotp, &got);
6217                *next_fsb = got.br_startoff;
6218        }
6219
6220del_cursor:
6221        if (cur)
6222                xfs_btree_del_cursor(cur,
6223                        error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
6224
6225        if (logflags)
6226                xfs_trans_log_inode(tp, ip, logflags);
6227
6228        return error;
6229}
6230
6231/*
6232 * Splits an extent into two extents at split_fsb block such that it is
6233 * the first block of the current_ext. @current_ext is a target extent
6234 * to be split. @split_fsb is a block where the extents is split.
6235 * If split_fsb lies in a hole or the first block of extents, just return 0.
6236 */
6237STATIC int
6238xfs_bmap_split_extent_at(
6239        struct xfs_trans        *tp,
6240        struct xfs_inode        *ip,
6241        xfs_fileoff_t           split_fsb,
6242        xfs_fsblock_t           *firstfsb,
6243        struct xfs_defer_ops    *dfops)
6244{
6245        int                             whichfork = XFS_DATA_FORK;
6246        struct xfs_btree_cur            *cur = NULL;
6247        struct xfs_bmbt_rec_host        *gotp;
6248        struct xfs_bmbt_irec            got;
6249        struct xfs_bmbt_irec            new; /* split extent */
6250        struct xfs_mount                *mp = ip->i_mount;
6251        struct xfs_ifork                *ifp;
6252        xfs_fsblock_t                   gotblkcnt; /* new block count for got */
6253        xfs_extnum_t                    current_ext;
6254        int                             error = 0;
6255        int                             logflags = 0;
6256        int                             i = 0;
6257
6258        if (unlikely(XFS_TEST_ERROR(
6259            (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
6260             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
6261             mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
6262                XFS_ERROR_REPORT("xfs_bmap_split_extent_at",
6263                                 XFS_ERRLEVEL_LOW, mp);
6264                return -EFSCORRUPTED;
6265        }
6266
6267        if (XFS_FORCED_SHUTDOWN(mp))
6268                return -EIO;
6269
6270        ifp = XFS_IFORK_PTR(ip, whichfork);
6271        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
6272                /* Read in all the extents */
6273                error = xfs_iread_extents(tp, ip, whichfork);
6274                if (error)
6275                        return error;
6276        }
6277
6278        /*
6279         * gotp can be null in 2 cases: 1) if there are no extents
6280         * or 2) split_fsb lies in a hole beyond which there are
6281         * no extents. Either way, we are done.
6282         */
6283        gotp = xfs_iext_bno_to_ext(ifp, split_fsb, &current_ext);
6284        if (!gotp)
6285                return 0;
6286
6287        xfs_bmbt_get_all(gotp, &got);
6288
6289        /*
6290         * Check split_fsb lies in a hole or the start boundary offset
6291         * of the extent.
6292         */
6293        if (got.br_startoff >= split_fsb)
6294                return 0;
6295
6296        gotblkcnt = split_fsb - got.br_startoff;
6297        new.br_startoff = split_fsb;
6298        new.br_startblock = got.br_startblock + gotblkcnt;
6299        new.br_blockcount = got.br_blockcount - gotblkcnt;
6300        new.br_state = got.br_state;
6301
6302        if (ifp->if_flags & XFS_IFBROOT) {
6303                cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
6304                cur->bc_private.b.firstblock = *firstfsb;
6305                cur->bc_private.b.dfops = dfops;
6306                cur->bc_private.b.flags = 0;
6307                error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
6308                                got.br_startblock,
6309                                got.br_blockcount,
6310                                &i);
6311                if (error)
6312                        goto del_cursor;
6313                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
6314        }
6315
6316        xfs_bmbt_set_blockcount(gotp, gotblkcnt);
6317        got.br_blockcount = gotblkcnt;
6318
6319        logflags = XFS_ILOG_CORE;
6320        if (cur) {
6321                error = xfs_bmbt_update(cur, got.br_startoff,
6322                                got.br_startblock,
6323                                got.br_blockcount,
6324                                got.br_state);
6325                if (error)
6326                        goto del_cursor;
6327        } else
6328                logflags |= XFS_ILOG_DEXT;
6329
6330        /* Add new extent */
6331        current_ext++;
6332        xfs_iext_insert(ip, current_ext, 1, &new, 0);
6333        XFS_IFORK_NEXT_SET(ip, whichfork,
6334                           XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
6335
6336        if (cur) {
6337                error = xfs_bmbt_lookup_eq(cur, new.br_startoff,
6338                                new.br_startblock, new.br_blockcount,
6339                                &i);
6340                if (error)
6341                        goto del_cursor;
6342                XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor);
6343                cur->bc_rec.b.br_state = new.br_state;
6344
6345                error = xfs_btree_insert(cur, &i);
6346                if (error)
6347                        goto del_cursor;
6348                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
6349        }
6350
6351        /*
6352         * Convert to a btree if necessary.
6353         */
6354        if (xfs_bmap_needs_btree(ip, whichfork)) {
6355                int tmp_logflags; /* partial log flag return val */
6356
6357                ASSERT(cur == NULL);
6358                error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, dfops,
6359                                &cur, 0, &tmp_logflags, whichfork);
6360                logflags |= tmp_logflags;
6361        }
6362
6363del_cursor:
6364        if (cur) {
6365                cur->bc_private.b.allocated = 0;
6366                xfs_btree_del_cursor(cur,
6367                                error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
6368        }
6369
6370        if (logflags)
6371                xfs_trans_log_inode(tp, ip, logflags);
6372        return error;
6373}
6374
6375int
6376xfs_bmap_split_extent(
6377        struct xfs_inode        *ip,
6378        xfs_fileoff_t           split_fsb)
6379{
6380        struct xfs_mount        *mp = ip->i_mount;
6381        struct xfs_trans        *tp;
6382        struct xfs_defer_ops    dfops;
6383        xfs_fsblock_t           firstfsb;
6384        int                     error;
6385
6386        error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
6387                        XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
6388        if (error)
6389                return error;
6390
6391        xfs_ilock(ip, XFS_ILOCK_EXCL);
6392        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
6393
6394        xfs_defer_init(&dfops, &firstfsb);
6395
6396        error = xfs_bmap_split_extent_at(tp, ip, split_fsb,
6397                        &firstfsb, &dfops);
6398        if (error)
6399                goto out;
6400
6401        error = xfs_defer_finish(&tp, &dfops, NULL);
6402        if (error)
6403                goto out;
6404
6405        return xfs_trans_commit(tp);
6406
6407out:
6408        xfs_defer_cancel(&dfops);
6409        xfs_trans_cancel(tp);
6410        return error;
6411}
6412
6413/* Deferred mapping is only for real extents in the data fork. */
6414static bool
6415xfs_bmap_is_update_needed(
6416        struct xfs_bmbt_irec    *bmap)
6417{
6418        return  bmap->br_startblock != HOLESTARTBLOCK &&
6419                bmap->br_startblock != DELAYSTARTBLOCK;
6420}
6421
6422/* Record a bmap intent. */
6423static int
6424__xfs_bmap_add(
6425        struct xfs_mount                *mp,
6426        struct xfs_defer_ops            *dfops,
6427        enum xfs_bmap_intent_type       type,
6428        struct xfs_inode                *ip,
6429        int                             whichfork,
6430        struct xfs_bmbt_irec            *bmap)
6431{
6432        int                             error;
6433        struct xfs_bmap_intent          *bi;
6434
6435        trace_xfs_bmap_defer(mp,
6436                        XFS_FSB_TO_AGNO(mp, bmap->br_startblock),
6437                        type,
6438                        XFS_FSB_TO_AGBNO(mp, bmap->br_startblock),
6439                        ip->i_ino, whichfork,
6440                        bmap->br_startoff,
6441                        bmap->br_blockcount,
6442                        bmap->br_state);
6443
6444        bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_SLEEP | KM_NOFS);
6445        INIT_LIST_HEAD(&bi->bi_list);
6446        bi->bi_type = type;
6447        bi->bi_owner = ip;
6448        bi->bi_whichfork = whichfork;
6449        bi->bi_bmap = *bmap;
6450
6451        error = xfs_defer_join(dfops, bi->bi_owner);
6452        if (error) {
6453                kmem_free(bi);
6454                return error;
6455        }
6456
6457        xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list);
6458        return 0;
6459}
6460
6461/* Map an extent into a file. */
6462int
6463xfs_bmap_map_extent(
6464        struct xfs_mount        *mp,
6465        struct xfs_defer_ops    *dfops,
6466        struct xfs_inode        *ip,
6467        struct xfs_bmbt_irec    *PREV)
6468{
6469        if (!xfs_bmap_is_update_needed(PREV))
6470                return 0;
6471
6472        return __xfs_bmap_add(mp, dfops, XFS_BMAP_MAP, ip,
6473                        XFS_DATA_FORK, PREV);
6474}
6475
6476/* Unmap an extent out of a file. */
6477int
6478xfs_bmap_unmap_extent(
6479        struct xfs_mount        *mp,
6480        struct xfs_defer_ops    *dfops,
6481        struct xfs_inode        *ip,
6482        struct xfs_bmbt_irec    *PREV)
6483{
6484        if (!xfs_bmap_is_update_needed(PREV))
6485                return 0;
6486
6487        return __xfs_bmap_add(mp, dfops, XFS_BMAP_UNMAP, ip,
6488                        XFS_DATA_FORK, PREV);
6489}
6490
6491/*
6492 * Process one of the deferred bmap operations.  We pass back the
6493 * btree cursor to maintain our lock on the bmapbt between calls.
6494 */
6495int
6496xfs_bmap_finish_one(
6497        struct xfs_trans                *tp,
6498        struct xfs_defer_ops            *dfops,
6499        struct xfs_inode                *ip,
6500        enum xfs_bmap_intent_type       type,
6501        int                             whichfork,
6502        xfs_fileoff_t                   startoff,
6503        xfs_fsblock_t                   startblock,
6504        xfs_filblks_t                   blockcount,
6505        xfs_exntst_t                    state)
6506{
6507        struct xfs_bmbt_irec            bmap;
6508        int                             nimaps = 1;
6509        xfs_fsblock_t                   firstfsb;
6510        int                             flags = XFS_BMAPI_REMAP;
6511        int                             done;
6512        int                             error = 0;
6513
6514        bmap.br_startblock = startblock;
6515        bmap.br_startoff = startoff;
6516        bmap.br_blockcount = blockcount;
6517        bmap.br_state = state;
6518
6519        trace_xfs_bmap_deferred(tp->t_mountp,
6520                        XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
6521                        XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
6522                        ip->i_ino, whichfork, startoff, blockcount, state);
6523
6524        if (whichfork != XFS_DATA_FORK && whichfork != XFS_ATTR_FORK)
6525                return -EFSCORRUPTED;
6526        if (whichfork == XFS_ATTR_FORK)
6527                flags |= XFS_BMAPI_ATTRFORK;
6528
6529        if (XFS_TEST_ERROR(false, tp->t_mountp,
6530                        XFS_ERRTAG_BMAP_FINISH_ONE,
6531                        XFS_RANDOM_BMAP_FINISH_ONE))
6532                return -EIO;
6533
6534        switch (type) {
6535        case XFS_BMAP_MAP:
6536                firstfsb = bmap.br_startblock;
6537                error = xfs_bmapi_write(tp, ip, bmap.br_startoff,
6538                                        bmap.br_blockcount, flags, &firstfsb,
6539                                        bmap.br_blockcount, &bmap, &nimaps,
6540                                        dfops);
6541                break;
6542        case XFS_BMAP_UNMAP:
6543                error = xfs_bunmapi(tp, ip, bmap.br_startoff,
6544                                bmap.br_blockcount, flags, 1, &firstfsb,
6545                                dfops, &done);
6546                ASSERT(done);
6547                break;
6548        default:
6549                ASSERT(0);
6550                error = -EFSCORRUPTED;
6551        }
6552
6553        return error;
6554}
6555