linux/fs/xfs/libxfs/xfs_refcount.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2016 Oracle.  All Rights Reserved.
   3 *
   4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; either version 2
   9 * of the License, or (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it would be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write the Free Software Foundation,
  18 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
  19 */
  20#include "xfs.h"
  21#include "xfs_fs.h"
  22#include "xfs_shared.h"
  23#include "xfs_format.h"
  24#include "xfs_log_format.h"
  25#include "xfs_trans_resv.h"
  26#include "xfs_sb.h"
  27#include "xfs_mount.h"
  28#include "xfs_defer.h"
  29#include "xfs_btree.h"
  30#include "xfs_bmap.h"
  31#include "xfs_refcount_btree.h"
  32#include "xfs_alloc.h"
  33#include "xfs_errortag.h"
  34#include "xfs_error.h"
  35#include "xfs_trace.h"
  36#include "xfs_cksum.h"
  37#include "xfs_trans.h"
  38#include "xfs_bit.h"
  39#include "xfs_refcount.h"
  40#include "xfs_rmap.h"
  41
  42/* Allowable refcount adjustment amounts. */
  43enum xfs_refc_adjust_op {
  44        XFS_REFCOUNT_ADJUST_INCREASE    = 1,
  45        XFS_REFCOUNT_ADJUST_DECREASE    = -1,
  46        XFS_REFCOUNT_ADJUST_COW_ALLOC   = 0,
  47        XFS_REFCOUNT_ADJUST_COW_FREE    = -1,
  48};
  49
  50STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur,
  51                xfs_agblock_t agbno, xfs_extlen_t aglen,
  52                struct xfs_defer_ops *dfops);
  53STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur,
  54                xfs_agblock_t agbno, xfs_extlen_t aglen,
  55                struct xfs_defer_ops *dfops);
  56
  57/*
  58 * Look up the first record less than or equal to [bno, len] in the btree
  59 * given by cur.
  60 */
  61int
  62xfs_refcount_lookup_le(
  63        struct xfs_btree_cur    *cur,
  64        xfs_agblock_t           bno,
  65        int                     *stat)
  66{
  67        trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno,
  68                        XFS_LOOKUP_LE);
  69        cur->bc_rec.rc.rc_startblock = bno;
  70        cur->bc_rec.rc.rc_blockcount = 0;
  71        return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
  72}
  73
  74/*
  75 * Look up the first record greater than or equal to [bno, len] in the btree
  76 * given by cur.
  77 */
  78int
  79xfs_refcount_lookup_ge(
  80        struct xfs_btree_cur    *cur,
  81        xfs_agblock_t           bno,
  82        int                     *stat)
  83{
  84        trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno,
  85                        XFS_LOOKUP_GE);
  86        cur->bc_rec.rc.rc_startblock = bno;
  87        cur->bc_rec.rc.rc_blockcount = 0;
  88        return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
  89}
  90
  91/* Convert on-disk record to in-core format. */
  92static inline void
  93xfs_refcount_btrec_to_irec(
  94        union xfs_btree_rec             *rec,
  95        struct xfs_refcount_irec        *irec)
  96{
  97        irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock);
  98        irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount);
  99        irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount);
 100}
 101
 102/*
 103 * Get the data from the pointed-to record.
 104 */
 105int
 106xfs_refcount_get_rec(
 107        struct xfs_btree_cur            *cur,
 108        struct xfs_refcount_irec        *irec,
 109        int                             *stat)
 110{
 111        union xfs_btree_rec             *rec;
 112        int                             error;
 113
 114        error = xfs_btree_get_rec(cur, &rec, stat);
 115        if (!error && *stat == 1) {
 116                xfs_refcount_btrec_to_irec(rec, irec);
 117                trace_xfs_refcount_get(cur->bc_mp, cur->bc_private.a.agno,
 118                                irec);
 119        }
 120        return error;
 121}
 122
 123/*
 124 * Update the record referred to by cur to the value given
 125 * by [bno, len, refcount].
 126 * This either works (return 0) or gets an EFSCORRUPTED error.
 127 */
 128STATIC int
 129xfs_refcount_update(
 130        struct xfs_btree_cur            *cur,
 131        struct xfs_refcount_irec        *irec)
 132{
 133        union xfs_btree_rec     rec;
 134        int                     error;
 135
 136        trace_xfs_refcount_update(cur->bc_mp, cur->bc_private.a.agno, irec);
 137        rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock);
 138        rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount);
 139        rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount);
 140        error = xfs_btree_update(cur, &rec);
 141        if (error)
 142                trace_xfs_refcount_update_error(cur->bc_mp,
 143                                cur->bc_private.a.agno, error, _RET_IP_);
 144        return error;
 145}
 146
 147/*
 148 * Insert the record referred to by cur to the value given
 149 * by [bno, len, refcount].
 150 * This either works (return 0) or gets an EFSCORRUPTED error.
 151 */
 152STATIC int
 153xfs_refcount_insert(
 154        struct xfs_btree_cur            *cur,
 155        struct xfs_refcount_irec        *irec,
 156        int                             *i)
 157{
 158        int                             error;
 159
 160        trace_xfs_refcount_insert(cur->bc_mp, cur->bc_private.a.agno, irec);
 161        cur->bc_rec.rc.rc_startblock = irec->rc_startblock;
 162        cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount;
 163        cur->bc_rec.rc.rc_refcount = irec->rc_refcount;
 164        error = xfs_btree_insert(cur, i);
 165        XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error);
 166out_error:
 167        if (error)
 168                trace_xfs_refcount_insert_error(cur->bc_mp,
 169                                cur->bc_private.a.agno, error, _RET_IP_);
 170        return error;
 171}
 172
 173/*
 174 * Remove the record referred to by cur, then set the pointer to the spot
 175 * where the record could be re-inserted, in case we want to increment or
 176 * decrement the cursor.
 177 * This either works (return 0) or gets an EFSCORRUPTED error.
 178 */
 179STATIC int
 180xfs_refcount_delete(
 181        struct xfs_btree_cur    *cur,
 182        int                     *i)
 183{
 184        struct xfs_refcount_irec        irec;
 185        int                     found_rec;
 186        int                     error;
 187
 188        error = xfs_refcount_get_rec(cur, &irec, &found_rec);
 189        if (error)
 190                goto out_error;
 191        XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
 192        trace_xfs_refcount_delete(cur->bc_mp, cur->bc_private.a.agno, &irec);
 193        error = xfs_btree_delete(cur, i);
 194        XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error);
 195        if (error)
 196                goto out_error;
 197        error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec);
 198out_error:
 199        if (error)
 200                trace_xfs_refcount_delete_error(cur->bc_mp,
 201                                cur->bc_private.a.agno, error, _RET_IP_);
 202        return error;
 203}
 204
 205/*
 206 * Adjusting the Reference Count
 207 *
 208 * As stated elsewhere, the reference count btree (refcbt) stores
 209 * >1 reference counts for extents of physical blocks.  In this
 210 * operation, we're either raising or lowering the reference count of
 211 * some subrange stored in the tree:
 212 *
 213 *      <------ adjustment range ------>
 214 * ----+   +---+-----+ +--+--------+---------
 215 *  2  |   | 3 |  4  | |17|   55   |   10
 216 * ----+   +---+-----+ +--+--------+---------
 217 * X axis is physical blocks number;
 218 * reference counts are the numbers inside the rectangles
 219 *
 220 * The first thing we need to do is to ensure that there are no
 221 * refcount extents crossing either boundary of the range to be
 222 * adjusted.  For any extent that does cross a boundary, split it into
 223 * two extents so that we can increment the refcount of one of the
 224 * pieces later:
 225 *
 226 *      <------ adjustment range ------>
 227 * ----+   +---+-----+ +--+--------+----+----
 228 *  2  |   | 3 |  2  | |17|   55   | 10 | 10
 229 * ----+   +---+-----+ +--+--------+----+----
 230 *
 231 * For this next step, let's assume that all the physical blocks in
 232 * the adjustment range are mapped to a file and are therefore in use
 233 * at least once.  Therefore, we can infer that any gap in the
 234 * refcount tree within the adjustment range represents a physical
 235 * extent with refcount == 1:
 236 *
 237 *      <------ adjustment range ------>
 238 * ----+---+---+-----+-+--+--------+----+----
 239 *  2  |"1"| 3 |  2  |1|17|   55   | 10 | 10
 240 * ----+---+---+-----+-+--+--------+----+----
 241 *      ^
 242 *
 243 * For each extent that falls within the interval range, figure out
 244 * which extent is to the left or the right of that extent.  Now we
 245 * have a left, current, and right extent.  If the new reference count
 246 * of the center extent enables us to merge left, center, and right
 247 * into one record covering all three, do so.  If the center extent is
 248 * at the left end of the range, abuts the left extent, and its new
 249 * reference count matches the left extent's record, then merge them.
 250 * If the center extent is at the right end of the range, abuts the
 251 * right extent, and the reference counts match, merge those.  In the
 252 * example, we can left merge (assuming an increment operation):
 253 *
 254 *      <------ adjustment range ------>
 255 * --------+---+-----+-+--+--------+----+----
 256 *    2    | 3 |  2  |1|17|   55   | 10 | 10
 257 * --------+---+-----+-+--+--------+----+----
 258 *          ^
 259 *
 260 * For all other extents within the range, adjust the reference count
 261 * or delete it if the refcount falls below 2.  If we were
 262 * incrementing, the end result looks like this:
 263 *
 264 *      <------ adjustment range ------>
 265 * --------+---+-----+-+--+--------+----+----
 266 *    2    | 4 |  3  |2|18|   56   | 11 | 10
 267 * --------+---+-----+-+--+--------+----+----
 268 *
 269 * The result of a decrement operation looks as such:
 270 *
 271 *      <------ adjustment range ------>
 272 * ----+   +---+       +--+--------+----+----
 273 *  2  |   | 2 |       |16|   54   |  9 | 10
 274 * ----+   +---+       +--+--------+----+----
 275 *      DDDD    111111DD
 276 *
 277 * The blocks marked "D" are freed; the blocks marked "1" are only
 278 * referenced once and therefore the record is removed from the
 279 * refcount btree.
 280 */
 281
 282/* Next block after this extent. */
 283static inline xfs_agblock_t
 284xfs_refc_next(
 285        struct xfs_refcount_irec        *rc)
 286{
 287        return rc->rc_startblock + rc->rc_blockcount;
 288}
 289
 290/*
 291 * Split a refcount extent that crosses agbno.
 292 */
 293STATIC int
 294xfs_refcount_split_extent(
 295        struct xfs_btree_cur            *cur,
 296        xfs_agblock_t                   agbno,
 297        bool                            *shape_changed)
 298{
 299        struct xfs_refcount_irec        rcext, tmp;
 300        int                             found_rec;
 301        int                             error;
 302
 303        *shape_changed = false;
 304        error = xfs_refcount_lookup_le(cur, agbno, &found_rec);
 305        if (error)
 306                goto out_error;
 307        if (!found_rec)
 308                return 0;
 309
 310        error = xfs_refcount_get_rec(cur, &rcext, &found_rec);
 311        if (error)
 312                goto out_error;
 313        XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
 314        if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno)
 315                return 0;
 316
 317        *shape_changed = true;
 318        trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_private.a.agno,
 319                        &rcext, agbno);
 320
 321        /* Establish the right extent. */
 322        tmp = rcext;
 323        tmp.rc_startblock = agbno;
 324        tmp.rc_blockcount -= (agbno - rcext.rc_startblock);
 325        error = xfs_refcount_update(cur, &tmp);
 326        if (error)
 327                goto out_error;
 328
 329        /* Insert the left extent. */
 330        tmp = rcext;
 331        tmp.rc_blockcount = agbno - rcext.rc_startblock;
 332        error = xfs_refcount_insert(cur, &tmp, &found_rec);
 333        if (error)
 334                goto out_error;
 335        XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
 336        return error;
 337
 338out_error:
 339        trace_xfs_refcount_split_extent_error(cur->bc_mp,
 340                        cur->bc_private.a.agno, error, _RET_IP_);
 341        return error;
 342}
 343
 344/*
 345 * Merge the left, center, and right extents.
 346 */
 347STATIC int
 348xfs_refcount_merge_center_extents(
 349        struct xfs_btree_cur            *cur,
 350        struct xfs_refcount_irec        *left,
 351        struct xfs_refcount_irec        *center,
 352        struct xfs_refcount_irec        *right,
 353        unsigned long long              extlen,
 354        xfs_agblock_t                   *agbno,
 355        xfs_extlen_t                    *aglen)
 356{
 357        int                             error;
 358        int                             found_rec;
 359
 360        trace_xfs_refcount_merge_center_extents(cur->bc_mp,
 361                        cur->bc_private.a.agno, left, center, right);
 362
 363        /*
 364         * Make sure the center and right extents are not in the btree.
 365         * If the center extent was synthesized, the first delete call
 366         * removes the right extent and we skip the second deletion.
 367         * If center and right were in the btree, then the first delete
 368         * call removes the center and the second one removes the right
 369         * extent.
 370         */
 371        error = xfs_refcount_lookup_ge(cur, center->rc_startblock,
 372                        &found_rec);
 373        if (error)
 374                goto out_error;
 375        XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
 376
 377        error = xfs_refcount_delete(cur, &found_rec);
 378        if (error)
 379                goto out_error;
 380        XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
 381
 382        if (center->rc_refcount > 1) {
 383                error = xfs_refcount_delete(cur, &found_rec);
 384                if (error)
 385                        goto out_error;
 386                XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
 387                                out_error);
 388        }
 389
 390        /* Enlarge the left extent. */
 391        error = xfs_refcount_lookup_le(cur, left->rc_startblock,
 392                        &found_rec);
 393        if (error)
 394                goto out_error;
 395        XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
 396
 397        left->rc_blockcount = extlen;
 398        error = xfs_refcount_update(cur, left);
 399        if (error)
 400                goto out_error;
 401
 402        *aglen = 0;
 403        return error;
 404
 405out_error:
 406        trace_xfs_refcount_merge_center_extents_error(cur->bc_mp,
 407                        cur->bc_private.a.agno, error, _RET_IP_);
 408        return error;
 409}
 410
 411/*
 412 * Merge with the left extent.
 413 */
 414STATIC int
 415xfs_refcount_merge_left_extent(
 416        struct xfs_btree_cur            *cur,
 417        struct xfs_refcount_irec        *left,
 418        struct xfs_refcount_irec        *cleft,
 419        xfs_agblock_t                   *agbno,
 420        xfs_extlen_t                    *aglen)
 421{
 422        int                             error;
 423        int                             found_rec;
 424
 425        trace_xfs_refcount_merge_left_extent(cur->bc_mp,
 426                        cur->bc_private.a.agno, left, cleft);
 427
 428        /* If the extent at agbno (cleft) wasn't synthesized, remove it. */
 429        if (cleft->rc_refcount > 1) {
 430                error = xfs_refcount_lookup_le(cur, cleft->rc_startblock,
 431                                &found_rec);
 432                if (error)
 433                        goto out_error;
 434                XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
 435                                out_error);
 436
 437                error = xfs_refcount_delete(cur, &found_rec);
 438                if (error)
 439                        goto out_error;
 440                XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
 441                                out_error);
 442        }
 443
 444        /* Enlarge the left extent. */
 445        error = xfs_refcount_lookup_le(cur, left->rc_startblock,
 446                        &found_rec);
 447        if (error)
 448                goto out_error;
 449        XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
 450
 451        left->rc_blockcount += cleft->rc_blockcount;
 452        error = xfs_refcount_update(cur, left);
 453        if (error)
 454                goto out_error;
 455
 456        *agbno += cleft->rc_blockcount;
 457        *aglen -= cleft->rc_blockcount;
 458        return error;
 459
 460out_error:
 461        trace_xfs_refcount_merge_left_extent_error(cur->bc_mp,
 462                        cur->bc_private.a.agno, error, _RET_IP_);
 463        return error;
 464}
 465
 466/*
 467 * Merge with the right extent.
 468 */
 469STATIC int
 470xfs_refcount_merge_right_extent(
 471        struct xfs_btree_cur            *cur,
 472        struct xfs_refcount_irec        *right,
 473        struct xfs_refcount_irec        *cright,
 474        xfs_agblock_t                   *agbno,
 475        xfs_extlen_t                    *aglen)
 476{
 477        int                             error;
 478        int                             found_rec;
 479
 480        trace_xfs_refcount_merge_right_extent(cur->bc_mp,
 481                        cur->bc_private.a.agno, cright, right);
 482
 483        /*
 484         * If the extent ending at agbno+aglen (cright) wasn't synthesized,
 485         * remove it.
 486         */
 487        if (cright->rc_refcount > 1) {
 488                error = xfs_refcount_lookup_le(cur, cright->rc_startblock,
 489                        &found_rec);
 490                if (error)
 491                        goto out_error;
 492                XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
 493                                out_error);
 494
 495                error = xfs_refcount_delete(cur, &found_rec);
 496                if (error)
 497                        goto out_error;
 498                XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
 499                                out_error);
 500        }
 501
 502        /* Enlarge the right extent. */
 503        error = xfs_refcount_lookup_le(cur, right->rc_startblock,
 504                        &found_rec);
 505        if (error)
 506                goto out_error;
 507        XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
 508
 509        right->rc_startblock -= cright->rc_blockcount;
 510        right->rc_blockcount += cright->rc_blockcount;
 511        error = xfs_refcount_update(cur, right);
 512        if (error)
 513                goto out_error;
 514
 515        *aglen -= cright->rc_blockcount;
 516        return error;
 517
 518out_error:
 519        trace_xfs_refcount_merge_right_extent_error(cur->bc_mp,
 520                        cur->bc_private.a.agno, error, _RET_IP_);
 521        return error;
 522}
 523
 524#define XFS_FIND_RCEXT_SHARED   1
 525#define XFS_FIND_RCEXT_COW      2
 526/*
 527 * Find the left extent and the one after it (cleft).  This function assumes
 528 * that we've already split any extent crossing agbno.
 529 */
 530STATIC int
 531xfs_refcount_find_left_extents(
 532        struct xfs_btree_cur            *cur,
 533        struct xfs_refcount_irec        *left,
 534        struct xfs_refcount_irec        *cleft,
 535        xfs_agblock_t                   agbno,
 536        xfs_extlen_t                    aglen,
 537        int                             flags)
 538{
 539        struct xfs_refcount_irec        tmp;
 540        int                             error;
 541        int                             found_rec;
 542
 543        left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK;
 544        error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec);
 545        if (error)
 546                goto out_error;
 547        if (!found_rec)
 548                return 0;
 549
 550        error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
 551        if (error)
 552                goto out_error;
 553        XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
 554
 555        if (xfs_refc_next(&tmp) != agbno)
 556                return 0;
 557        if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2)
 558                return 0;
 559        if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1)
 560                return 0;
 561        /* We have a left extent; retrieve (or invent) the next right one */
 562        *left = tmp;
 563
 564        error = xfs_btree_increment(cur, 0, &found_rec);
 565        if (error)
 566                goto out_error;
 567        if (found_rec) {
 568                error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
 569                if (error)
 570                        goto out_error;
 571                XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
 572                                out_error);
 573
 574                /* if tmp starts at the end of our range, just use that */
 575                if (tmp.rc_startblock == agbno)
 576                        *cleft = tmp;
 577                else {
 578                        /*
 579                         * There's a gap in the refcntbt at the start of the
 580                         * range we're interested in (refcount == 1) so
 581                         * synthesize the implied extent and pass it back.
 582                         * We assume here that the agbno/aglen range was
 583                         * passed in from a data fork extent mapping and
 584                         * therefore is allocated to exactly one owner.
 585                         */
 586                        cleft->rc_startblock = agbno;
 587                        cleft->rc_blockcount = min(aglen,
 588                                        tmp.rc_startblock - agbno);
 589                        cleft->rc_refcount = 1;
 590                }
 591        } else {
 592                /*
 593                 * No extents, so pretend that there's one covering the whole
 594                 * range.
 595                 */
 596                cleft->rc_startblock = agbno;
 597                cleft->rc_blockcount = aglen;
 598                cleft->rc_refcount = 1;
 599        }
 600        trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_private.a.agno,
 601                        left, cleft, agbno);
 602        return error;
 603
 604out_error:
 605        trace_xfs_refcount_find_left_extent_error(cur->bc_mp,
 606                        cur->bc_private.a.agno, error, _RET_IP_);
 607        return error;
 608}
 609
 610/*
 611 * Find the right extent and the one before it (cright).  This function
 612 * assumes that we've already split any extents crossing agbno + aglen.
 613 */
 614STATIC int
 615xfs_refcount_find_right_extents(
 616        struct xfs_btree_cur            *cur,
 617        struct xfs_refcount_irec        *right,
 618        struct xfs_refcount_irec        *cright,
 619        xfs_agblock_t                   agbno,
 620        xfs_extlen_t                    aglen,
 621        int                             flags)
 622{
 623        struct xfs_refcount_irec        tmp;
 624        int                             error;
 625        int                             found_rec;
 626
 627        right->rc_startblock = cright->rc_startblock = NULLAGBLOCK;
 628        error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec);
 629        if (error)
 630                goto out_error;
 631        if (!found_rec)
 632                return 0;
 633
 634        error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
 635        if (error)
 636                goto out_error;
 637        XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
 638
 639        if (tmp.rc_startblock != agbno + aglen)
 640                return 0;
 641        if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2)
 642                return 0;
 643        if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1)
 644                return 0;
 645        /* We have a right extent; retrieve (or invent) the next left one */
 646        *right = tmp;
 647
 648        error = xfs_btree_decrement(cur, 0, &found_rec);
 649        if (error)
 650                goto out_error;
 651        if (found_rec) {
 652                error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
 653                if (error)
 654                        goto out_error;
 655                XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
 656                                out_error);
 657
 658                /* if tmp ends at the end of our range, just use that */
 659                if (xfs_refc_next(&tmp) == agbno + aglen)
 660                        *cright = tmp;
 661                else {
 662                        /*
 663                         * There's a gap in the refcntbt at the end of the
 664                         * range we're interested in (refcount == 1) so
 665                         * create the implied extent and pass it back.
 666                         * We assume here that the agbno/aglen range was
 667                         * passed in from a data fork extent mapping and
 668                         * therefore is allocated to exactly one owner.
 669                         */
 670                        cright->rc_startblock = max(agbno, xfs_refc_next(&tmp));
 671                        cright->rc_blockcount = right->rc_startblock -
 672                                        cright->rc_startblock;
 673                        cright->rc_refcount = 1;
 674                }
 675        } else {
 676                /*
 677                 * No extents, so pretend that there's one covering the whole
 678                 * range.
 679                 */
 680                cright->rc_startblock = agbno;
 681                cright->rc_blockcount = aglen;
 682                cright->rc_refcount = 1;
 683        }
 684        trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_private.a.agno,
 685                        cright, right, agbno + aglen);
 686        return error;
 687
 688out_error:
 689        trace_xfs_refcount_find_right_extent_error(cur->bc_mp,
 690                        cur->bc_private.a.agno, error, _RET_IP_);
 691        return error;
 692}
 693
 694/* Is this extent valid? */
 695static inline bool
 696xfs_refc_valid(
 697        struct xfs_refcount_irec        *rc)
 698{
 699        return rc->rc_startblock != NULLAGBLOCK;
 700}
 701
 702/*
 703 * Try to merge with any extents on the boundaries of the adjustment range.
 704 */
 705STATIC int
 706xfs_refcount_merge_extents(
 707        struct xfs_btree_cur    *cur,
 708        xfs_agblock_t           *agbno,
 709        xfs_extlen_t            *aglen,
 710        enum xfs_refc_adjust_op adjust,
 711        int                     flags,
 712        bool                    *shape_changed)
 713{
 714        struct xfs_refcount_irec        left = {0}, cleft = {0};
 715        struct xfs_refcount_irec        cright = {0}, right = {0};
 716        int                             error;
 717        unsigned long long              ulen;
 718        bool                            cequal;
 719
 720        *shape_changed = false;
 721        /*
 722         * Find the extent just below agbno [left], just above agbno [cleft],
 723         * just below (agbno + aglen) [cright], and just above (agbno + aglen)
 724         * [right].
 725         */
 726        error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno,
 727                        *aglen, flags);
 728        if (error)
 729                return error;
 730        error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno,
 731                        *aglen, flags);
 732        if (error)
 733                return error;
 734
 735        /* No left or right extent to merge; exit. */
 736        if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right))
 737                return 0;
 738
 739        cequal = (cleft.rc_startblock == cright.rc_startblock) &&
 740                 (cleft.rc_blockcount == cright.rc_blockcount);
 741
 742        /* Try to merge left, cleft, and right.  cleft must == cright. */
 743        ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount +
 744                        right.rc_blockcount;
 745        if (xfs_refc_valid(&left) && xfs_refc_valid(&right) &&
 746            xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal &&
 747            left.rc_refcount == cleft.rc_refcount + adjust &&
 748            right.rc_refcount == cleft.rc_refcount + adjust &&
 749            ulen < MAXREFCEXTLEN) {
 750                *shape_changed = true;
 751                return xfs_refcount_merge_center_extents(cur, &left, &cleft,
 752                                &right, ulen, agbno, aglen);
 753        }
 754
 755        /* Try to merge left and cleft. */
 756        ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount;
 757        if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) &&
 758            left.rc_refcount == cleft.rc_refcount + adjust &&
 759            ulen < MAXREFCEXTLEN) {
 760                *shape_changed = true;
 761                error = xfs_refcount_merge_left_extent(cur, &left, &cleft,
 762                                agbno, aglen);
 763                if (error)
 764                        return error;
 765
 766                /*
 767                 * If we just merged left + cleft and cleft == cright,
 768                 * we no longer have a cright to merge with right.  We're done.
 769                 */
 770                if (cequal)
 771                        return 0;
 772        }
 773
 774        /* Try to merge cright and right. */
 775        ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount;
 776        if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) &&
 777            right.rc_refcount == cright.rc_refcount + adjust &&
 778            ulen < MAXREFCEXTLEN) {
 779                *shape_changed = true;
 780                return xfs_refcount_merge_right_extent(cur, &right, &cright,
 781                                agbno, aglen);
 782        }
 783
 784        return error;
 785}
 786
 787/*
 788 * XXX: This is a pretty hand-wavy estimate.  The penalty for guessing
 789 * true incorrectly is a shutdown FS; the penalty for guessing false
 790 * incorrectly is more transaction rolls than might be necessary.
 791 * Be conservative here.
 792 */
 793static bool
 794xfs_refcount_still_have_space(
 795        struct xfs_btree_cur            *cur)
 796{
 797        unsigned long                   overhead;
 798
 799        overhead = cur->bc_private.a.priv.refc.shape_changes *
 800                        xfs_allocfree_log_count(cur->bc_mp, 1);
 801        overhead *= cur->bc_mp->m_sb.sb_blocksize;
 802
 803        /*
 804         * Only allow 2 refcount extent updates per transaction if the
 805         * refcount continue update "error" has been injected.
 806         */
 807        if (cur->bc_private.a.priv.refc.nr_ops > 2 &&
 808            XFS_TEST_ERROR(false, cur->bc_mp,
 809                        XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE))
 810                return false;
 811
 812        if (cur->bc_private.a.priv.refc.nr_ops == 0)
 813                return true;
 814        else if (overhead > cur->bc_tp->t_log_res)
 815                return false;
 816        return  cur->bc_tp->t_log_res - overhead >
 817                cur->bc_private.a.priv.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD;
 818}
 819
 820/*
 821 * Adjust the refcounts of middle extents.  At this point we should have
 822 * split extents that crossed the adjustment range; merged with adjacent
 823 * extents; and updated agbno/aglen to reflect the merges.  Therefore,
 824 * all we have to do is update the extents inside [agbno, agbno + aglen].
 825 */
 826STATIC int
 827xfs_refcount_adjust_extents(
 828        struct xfs_btree_cur    *cur,
 829        xfs_agblock_t           *agbno,
 830        xfs_extlen_t            *aglen,
 831        enum xfs_refc_adjust_op adj,
 832        struct xfs_defer_ops    *dfops,
 833        struct xfs_owner_info   *oinfo)
 834{
 835        struct xfs_refcount_irec        ext, tmp;
 836        int                             error;
 837        int                             found_rec, found_tmp;
 838        xfs_fsblock_t                   fsbno;
 839
 840        /* Merging did all the work already. */
 841        if (*aglen == 0)
 842                return 0;
 843
 844        error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec);
 845        if (error)
 846                goto out_error;
 847
 848        while (*aglen > 0 && xfs_refcount_still_have_space(cur)) {
 849                error = xfs_refcount_get_rec(cur, &ext, &found_rec);
 850                if (error)
 851                        goto out_error;
 852                if (!found_rec) {
 853                        ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks;
 854                        ext.rc_blockcount = 0;
 855                        ext.rc_refcount = 0;
 856                }
 857
 858                /*
 859                 * Deal with a hole in the refcount tree; if a file maps to
 860                 * these blocks and there's no refcountbt record, pretend that
 861                 * there is one with refcount == 1.
 862                 */
 863                if (ext.rc_startblock != *agbno) {
 864                        tmp.rc_startblock = *agbno;
 865                        tmp.rc_blockcount = min(*aglen,
 866                                        ext.rc_startblock - *agbno);
 867                        tmp.rc_refcount = 1 + adj;
 868                        trace_xfs_refcount_modify_extent(cur->bc_mp,
 869                                        cur->bc_private.a.agno, &tmp);
 870
 871                        /*
 872                         * Either cover the hole (increment) or
 873                         * delete the range (decrement).
 874                         */
 875                        if (tmp.rc_refcount) {
 876                                error = xfs_refcount_insert(cur, &tmp,
 877                                                &found_tmp);
 878                                if (error)
 879                                        goto out_error;
 880                                XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
 881                                                found_tmp == 1, out_error);
 882                                cur->bc_private.a.priv.refc.nr_ops++;
 883                        } else {
 884                                fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
 885                                                cur->bc_private.a.agno,
 886                                                tmp.rc_startblock);
 887                                xfs_bmap_add_free(cur->bc_mp, dfops, fsbno,
 888                                                tmp.rc_blockcount, oinfo);
 889                        }
 890
 891                        (*agbno) += tmp.rc_blockcount;
 892                        (*aglen) -= tmp.rc_blockcount;
 893
 894                        error = xfs_refcount_lookup_ge(cur, *agbno,
 895                                        &found_rec);
 896                        if (error)
 897                                goto out_error;
 898                }
 899
 900                /* Stop if there's nothing left to modify */
 901                if (*aglen == 0 || !xfs_refcount_still_have_space(cur))
 902                        break;
 903
 904                /*
 905                 * Adjust the reference count and either update the tree
 906                 * (incr) or free the blocks (decr).
 907                 */
 908                if (ext.rc_refcount == MAXREFCOUNT)
 909                        goto skip;
 910                ext.rc_refcount += adj;
 911                trace_xfs_refcount_modify_extent(cur->bc_mp,
 912                                cur->bc_private.a.agno, &ext);
 913                if (ext.rc_refcount > 1) {
 914                        error = xfs_refcount_update(cur, &ext);
 915                        if (error)
 916                                goto out_error;
 917                        cur->bc_private.a.priv.refc.nr_ops++;
 918                } else if (ext.rc_refcount == 1) {
 919                        error = xfs_refcount_delete(cur, &found_rec);
 920                        if (error)
 921                                goto out_error;
 922                        XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
 923                                        found_rec == 1, out_error);
 924                        cur->bc_private.a.priv.refc.nr_ops++;
 925                        goto advloop;
 926                } else {
 927                        fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
 928                                        cur->bc_private.a.agno,
 929                                        ext.rc_startblock);
 930                        xfs_bmap_add_free(cur->bc_mp, dfops, fsbno,
 931                                        ext.rc_blockcount, oinfo);
 932                }
 933
 934skip:
 935                error = xfs_btree_increment(cur, 0, &found_rec);
 936                if (error)
 937                        goto out_error;
 938
 939advloop:
 940                (*agbno) += ext.rc_blockcount;
 941                (*aglen) -= ext.rc_blockcount;
 942        }
 943
 944        return error;
 945out_error:
 946        trace_xfs_refcount_modify_extent_error(cur->bc_mp,
 947                        cur->bc_private.a.agno, error, _RET_IP_);
 948        return error;
 949}
 950
 951/* Adjust the reference count of a range of AG blocks. */
 952STATIC int
 953xfs_refcount_adjust(
 954        struct xfs_btree_cur    *cur,
 955        xfs_agblock_t           agbno,
 956        xfs_extlen_t            aglen,
 957        xfs_agblock_t           *new_agbno,
 958        xfs_extlen_t            *new_aglen,
 959        enum xfs_refc_adjust_op adj,
 960        struct xfs_defer_ops    *dfops,
 961        struct xfs_owner_info   *oinfo)
 962{
 963        bool                    shape_changed;
 964        int                     shape_changes = 0;
 965        int                     error;
 966
 967        *new_agbno = agbno;
 968        *new_aglen = aglen;
 969        if (adj == XFS_REFCOUNT_ADJUST_INCREASE)
 970                trace_xfs_refcount_increase(cur->bc_mp, cur->bc_private.a.agno,
 971                                agbno, aglen);
 972        else
 973                trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_private.a.agno,
 974                                agbno, aglen);
 975
 976        /*
 977         * Ensure that no rcextents cross the boundary of the adjustment range.
 978         */
 979        error = xfs_refcount_split_extent(cur, agbno, &shape_changed);
 980        if (error)
 981                goto out_error;
 982        if (shape_changed)
 983                shape_changes++;
 984
 985        error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed);
 986        if (error)
 987                goto out_error;
 988        if (shape_changed)
 989                shape_changes++;
 990
 991        /*
 992         * Try to merge with the left or right extents of the range.
 993         */
 994        error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj,
 995                        XFS_FIND_RCEXT_SHARED, &shape_changed);
 996        if (error)
 997                goto out_error;
 998        if (shape_changed)
 999                shape_changes++;
1000        if (shape_changes)
1001                cur->bc_private.a.priv.refc.shape_changes++;
1002
1003        /* Now that we've taken care of the ends, adjust the middle extents */
1004        error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen,
1005                        adj, dfops, oinfo);
1006        if (error)
1007                goto out_error;
1008
1009        return 0;
1010
1011out_error:
1012        trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_private.a.agno,
1013                        error, _RET_IP_);
1014        return error;
1015}
1016
1017/* Clean up after calling xfs_refcount_finish_one. */
1018void
1019xfs_refcount_finish_one_cleanup(
1020        struct xfs_trans        *tp,
1021        struct xfs_btree_cur    *rcur,
1022        int                     error)
1023{
1024        struct xfs_buf          *agbp;
1025
1026        if (rcur == NULL)
1027                return;
1028        agbp = rcur->bc_private.a.agbp;
1029        xfs_btree_del_cursor(rcur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
1030        if (error)
1031                xfs_trans_brelse(tp, agbp);
1032}
1033
1034/*
1035 * Process one of the deferred refcount operations.  We pass back the
1036 * btree cursor to maintain our lock on the btree between calls.
1037 * This saves time and eliminates a buffer deadlock between the
1038 * superblock and the AGF because we'll always grab them in the same
1039 * order.
1040 */
1041int
1042xfs_refcount_finish_one(
1043        struct xfs_trans                *tp,
1044        struct xfs_defer_ops            *dfops,
1045        enum xfs_refcount_intent_type   type,
1046        xfs_fsblock_t                   startblock,
1047        xfs_extlen_t                    blockcount,
1048        xfs_fsblock_t                   *new_fsb,
1049        xfs_extlen_t                    *new_len,
1050        struct xfs_btree_cur            **pcur)
1051{
1052        struct xfs_mount                *mp = tp->t_mountp;
1053        struct xfs_btree_cur            *rcur;
1054        struct xfs_buf                  *agbp = NULL;
1055        int                             error = 0;
1056        xfs_agnumber_t                  agno;
1057        xfs_agblock_t                   bno;
1058        xfs_agblock_t                   new_agbno;
1059        unsigned long                   nr_ops = 0;
1060        int                             shape_changes = 0;
1061
1062        agno = XFS_FSB_TO_AGNO(mp, startblock);
1063        ASSERT(agno != NULLAGNUMBER);
1064        bno = XFS_FSB_TO_AGBNO(mp, startblock);
1065
1066        trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock),
1067                        type, XFS_FSB_TO_AGBNO(mp, startblock),
1068                        blockcount);
1069
1070        if (XFS_TEST_ERROR(false, mp,
1071                        XFS_ERRTAG_REFCOUNT_FINISH_ONE))
1072                return -EIO;
1073
1074        /*
1075         * If we haven't gotten a cursor or the cursor AG doesn't match
1076         * the startblock, get one now.
1077         */
1078        rcur = *pcur;
1079        if (rcur != NULL && rcur->bc_private.a.agno != agno) {
1080                nr_ops = rcur->bc_private.a.priv.refc.nr_ops;
1081                shape_changes = rcur->bc_private.a.priv.refc.shape_changes;
1082                xfs_refcount_finish_one_cleanup(tp, rcur, 0);
1083                rcur = NULL;
1084                *pcur = NULL;
1085        }
1086        if (rcur == NULL) {
1087                error = xfs_alloc_read_agf(tp->t_mountp, tp, agno,
1088                                XFS_ALLOC_FLAG_FREEING, &agbp);
1089                if (error)
1090                        return error;
1091                if (!agbp)
1092                        return -EFSCORRUPTED;
1093
1094                rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, dfops);
1095                if (!rcur) {
1096                        error = -ENOMEM;
1097                        goto out_cur;
1098                }
1099                rcur->bc_private.a.priv.refc.nr_ops = nr_ops;
1100                rcur->bc_private.a.priv.refc.shape_changes = shape_changes;
1101        }
1102        *pcur = rcur;
1103
1104        switch (type) {
1105        case XFS_REFCOUNT_INCREASE:
1106                error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno,
1107                        new_len, XFS_REFCOUNT_ADJUST_INCREASE, dfops, NULL);
1108                *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno);
1109                break;
1110        case XFS_REFCOUNT_DECREASE:
1111                error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno,
1112                        new_len, XFS_REFCOUNT_ADJUST_DECREASE, dfops, NULL);
1113                *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno);
1114                break;
1115        case XFS_REFCOUNT_ALLOC_COW:
1116                *new_fsb = startblock + blockcount;
1117                *new_len = 0;
1118                error = __xfs_refcount_cow_alloc(rcur, bno, blockcount, dfops);
1119                break;
1120        case XFS_REFCOUNT_FREE_COW:
1121                *new_fsb = startblock + blockcount;
1122                *new_len = 0;
1123                error = __xfs_refcount_cow_free(rcur, bno, blockcount, dfops);
1124                break;
1125        default:
1126                ASSERT(0);
1127                error = -EFSCORRUPTED;
1128        }
1129        if (!error && *new_len > 0)
1130                trace_xfs_refcount_finish_one_leftover(mp, agno, type,
1131                                bno, blockcount, new_agbno, *new_len);
1132        return error;
1133
1134out_cur:
1135        xfs_trans_brelse(tp, agbp);
1136
1137        return error;
1138}
1139
1140/*
1141 * Record a refcount intent for later processing.
1142 */
1143static int
1144__xfs_refcount_add(
1145        struct xfs_mount                *mp,
1146        struct xfs_defer_ops            *dfops,
1147        enum xfs_refcount_intent_type   type,
1148        xfs_fsblock_t                   startblock,
1149        xfs_extlen_t                    blockcount)
1150{
1151        struct xfs_refcount_intent      *ri;
1152
1153        trace_xfs_refcount_defer(mp, XFS_FSB_TO_AGNO(mp, startblock),
1154                        type, XFS_FSB_TO_AGBNO(mp, startblock),
1155                        blockcount);
1156
1157        ri = kmem_alloc(sizeof(struct xfs_refcount_intent),
1158                        KM_SLEEP | KM_NOFS);
1159        INIT_LIST_HEAD(&ri->ri_list);
1160        ri->ri_type = type;
1161        ri->ri_startblock = startblock;
1162        ri->ri_blockcount = blockcount;
1163
1164        xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list);
1165        return 0;
1166}
1167
1168/*
1169 * Increase the reference count of the blocks backing a file's extent.
1170 */
1171int
1172xfs_refcount_increase_extent(
1173        struct xfs_mount                *mp,
1174        struct xfs_defer_ops            *dfops,
1175        struct xfs_bmbt_irec            *PREV)
1176{
1177        if (!xfs_sb_version_hasreflink(&mp->m_sb))
1178                return 0;
1179
1180        return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_INCREASE,
1181                        PREV->br_startblock, PREV->br_blockcount);
1182}
1183
1184/*
1185 * Decrease the reference count of the blocks backing a file's extent.
1186 */
1187int
1188xfs_refcount_decrease_extent(
1189        struct xfs_mount                *mp,
1190        struct xfs_defer_ops            *dfops,
1191        struct xfs_bmbt_irec            *PREV)
1192{
1193        if (!xfs_sb_version_hasreflink(&mp->m_sb))
1194                return 0;
1195
1196        return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_DECREASE,
1197                        PREV->br_startblock, PREV->br_blockcount);
1198}
1199
1200/*
1201 * Given an AG extent, find the lowest-numbered run of shared blocks
1202 * within that range and return the range in fbno/flen.  If
1203 * find_end_of_shared is set, return the longest contiguous extent of
1204 * shared blocks; if not, just return the first extent we find.  If no
1205 * shared blocks are found, fbno and flen will be set to NULLAGBLOCK
1206 * and 0, respectively.
1207 */
1208int
1209xfs_refcount_find_shared(
1210        struct xfs_btree_cur            *cur,
1211        xfs_agblock_t                   agbno,
1212        xfs_extlen_t                    aglen,
1213        xfs_agblock_t                   *fbno,
1214        xfs_extlen_t                    *flen,
1215        bool                            find_end_of_shared)
1216{
1217        struct xfs_refcount_irec        tmp;
1218        int                             i;
1219        int                             have;
1220        int                             error;
1221
1222        trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_private.a.agno,
1223                        agbno, aglen);
1224
1225        /* By default, skip the whole range */
1226        *fbno = NULLAGBLOCK;
1227        *flen = 0;
1228
1229        /* Try to find a refcount extent that crosses the start */
1230        error = xfs_refcount_lookup_le(cur, agbno, &have);
1231        if (error)
1232                goto out_error;
1233        if (!have) {
1234                /* No left extent, look at the next one */
1235                error = xfs_btree_increment(cur, 0, &have);
1236                if (error)
1237                        goto out_error;
1238                if (!have)
1239                        goto done;
1240        }
1241        error = xfs_refcount_get_rec(cur, &tmp, &i);
1242        if (error)
1243                goto out_error;
1244        XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error);
1245
1246        /* If the extent ends before the start, look at the next one */
1247        if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) {
1248                error = xfs_btree_increment(cur, 0, &have);
1249                if (error)
1250                        goto out_error;
1251                if (!have)
1252                        goto done;
1253                error = xfs_refcount_get_rec(cur, &tmp, &i);
1254                if (error)
1255                        goto out_error;
1256                XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error);
1257        }
1258
1259        /* If the extent starts after the range we want, bail out */
1260        if (tmp.rc_startblock >= agbno + aglen)
1261                goto done;
1262
1263        /* We found the start of a shared extent! */
1264        if (tmp.rc_startblock < agbno) {
1265                tmp.rc_blockcount -= (agbno - tmp.rc_startblock);
1266                tmp.rc_startblock = agbno;
1267        }
1268
1269        *fbno = tmp.rc_startblock;
1270        *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno);
1271        if (!find_end_of_shared)
1272                goto done;
1273
1274        /* Otherwise, find the end of this shared extent */
1275        while (*fbno + *flen < agbno + aglen) {
1276                error = xfs_btree_increment(cur, 0, &have);
1277                if (error)
1278                        goto out_error;
1279                if (!have)
1280                        break;
1281                error = xfs_refcount_get_rec(cur, &tmp, &i);
1282                if (error)
1283                        goto out_error;
1284                XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error);
1285                if (tmp.rc_startblock >= agbno + aglen ||
1286                    tmp.rc_startblock != *fbno + *flen)
1287                        break;
1288                *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno);
1289        }
1290
1291done:
1292        trace_xfs_refcount_find_shared_result(cur->bc_mp,
1293                        cur->bc_private.a.agno, *fbno, *flen);
1294
1295out_error:
1296        if (error)
1297                trace_xfs_refcount_find_shared_error(cur->bc_mp,
1298                                cur->bc_private.a.agno, error, _RET_IP_);
1299        return error;
1300}
1301
1302/*
1303 * Recovering CoW Blocks After a Crash
1304 *
1305 * Due to the way that the copy on write mechanism works, there's a window of
1306 * opportunity in which we can lose track of allocated blocks during a crash.
1307 * Because CoW uses delayed allocation in the in-core CoW fork, writeback
1308 * causes blocks to be allocated and stored in the CoW fork.  The blocks are
1309 * no longer in the free space btree but are not otherwise recorded anywhere
1310 * until the write completes and the blocks are mapped into the file.  A crash
1311 * in between allocation and remapping results in the replacement blocks being
1312 * lost.  This situation is exacerbated by the CoW extent size hint because
1313 * allocations can hang around for long time.
1314 *
1315 * However, there is a place where we can record these allocations before they
1316 * become mappings -- the reference count btree.  The btree does not record
1317 * extents with refcount == 1, so we can record allocations with a refcount of
1318 * 1.  Blocks being used for CoW writeout cannot be shared, so there should be
1319 * no conflict with shared block records.  These mappings should be created
1320 * when we allocate blocks to the CoW fork and deleted when they're removed
1321 * from the CoW fork.
1322 *
1323 * Minor nit: records for in-progress CoW allocations and records for shared
1324 * extents must never be merged, to preserve the property that (except for CoW
1325 * allocations) there are no refcount btree entries with refcount == 1.  The
1326 * only time this could potentially happen is when unsharing a block that's
1327 * adjacent to CoW allocations, so we must be careful to avoid this.
1328 *
1329 * At mount time we recover lost CoW allocations by searching the refcount
1330 * btree for these refcount == 1 mappings.  These represent CoW allocations
1331 * that were in progress at the time the filesystem went down, so we can free
1332 * them to get the space back.
1333 *
1334 * This mechanism is superior to creating EFIs for unmapped CoW extents for
1335 * several reasons -- first, EFIs pin the tail of the log and would have to be
1336 * periodically relogged to avoid filling up the log.  Second, CoW completions
1337 * will have to file an EFD and create new EFIs for whatever remains in the
1338 * CoW fork; this partially takes care of (1) but extent-size reservations
1339 * will have to periodically relog even if there's no writeout in progress.
1340 * This can happen if the CoW extent size hint is set, which you really want.
1341 * Third, EFIs cannot currently be automatically relogged into newer
1342 * transactions to advance the log tail.  Fourth, stuffing the log full of
1343 * EFIs places an upper bound on the number of CoW allocations that can be
1344 * held filesystem-wide at any given time.  Recording them in the refcount
1345 * btree doesn't require us to maintain any state in memory and doesn't pin
1346 * the log.
1347 */
1348/*
1349 * Adjust the refcounts of CoW allocations.  These allocations are "magic"
1350 * in that they're not referenced anywhere else in the filesystem, so we
1351 * stash them in the refcount btree with a refcount of 1 until either file
1352 * remapping (or CoW cancellation) happens.
1353 */
1354STATIC int
1355xfs_refcount_adjust_cow_extents(
1356        struct xfs_btree_cur    *cur,
1357        xfs_agblock_t           agbno,
1358        xfs_extlen_t            aglen,
1359        enum xfs_refc_adjust_op adj,
1360        struct xfs_defer_ops    *dfops,
1361        struct xfs_owner_info   *oinfo)
1362{
1363        struct xfs_refcount_irec        ext, tmp;
1364        int                             error;
1365        int                             found_rec, found_tmp;
1366
1367        if (aglen == 0)
1368                return 0;
1369
1370        /* Find any overlapping refcount records */
1371        error = xfs_refcount_lookup_ge(cur, agbno, &found_rec);
1372        if (error)
1373                goto out_error;
1374        error = xfs_refcount_get_rec(cur, &ext, &found_rec);
1375        if (error)
1376                goto out_error;
1377        if (!found_rec) {
1378                ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks +
1379                                XFS_REFC_COW_START;
1380                ext.rc_blockcount = 0;
1381                ext.rc_refcount = 0;
1382        }
1383
1384        switch (adj) {
1385        case XFS_REFCOUNT_ADJUST_COW_ALLOC:
1386                /* Adding a CoW reservation, there should be nothing here. */
1387                XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
1388                                ext.rc_startblock >= agbno + aglen, out_error);
1389
1390                tmp.rc_startblock = agbno;
1391                tmp.rc_blockcount = aglen;
1392                tmp.rc_refcount = 1;
1393                trace_xfs_refcount_modify_extent(cur->bc_mp,
1394                                cur->bc_private.a.agno, &tmp);
1395
1396                error = xfs_refcount_insert(cur, &tmp,
1397                                &found_tmp);
1398                if (error)
1399                        goto out_error;
1400                XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
1401                                found_tmp == 1, out_error);
1402                break;
1403        case XFS_REFCOUNT_ADJUST_COW_FREE:
1404                /* Removing a CoW reservation, there should be one extent. */
1405                XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
1406                        ext.rc_startblock == agbno, out_error);
1407                XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
1408                        ext.rc_blockcount == aglen, out_error);
1409                XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
1410                        ext.rc_refcount == 1, out_error);
1411
1412                ext.rc_refcount = 0;
1413                trace_xfs_refcount_modify_extent(cur->bc_mp,
1414                                cur->bc_private.a.agno, &ext);
1415                error = xfs_refcount_delete(cur, &found_rec);
1416                if (error)
1417                        goto out_error;
1418                XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
1419                                found_rec == 1, out_error);
1420                break;
1421        default:
1422                ASSERT(0);
1423        }
1424
1425        return error;
1426out_error:
1427        trace_xfs_refcount_modify_extent_error(cur->bc_mp,
1428                        cur->bc_private.a.agno, error, _RET_IP_);
1429        return error;
1430}
1431
1432/*
1433 * Add or remove refcount btree entries for CoW reservations.
1434 */
1435STATIC int
1436xfs_refcount_adjust_cow(
1437        struct xfs_btree_cur    *cur,
1438        xfs_agblock_t           agbno,
1439        xfs_extlen_t            aglen,
1440        enum xfs_refc_adjust_op adj,
1441        struct xfs_defer_ops    *dfops)
1442{
1443        bool                    shape_changed;
1444        int                     error;
1445
1446        agbno += XFS_REFC_COW_START;
1447
1448        /*
1449         * Ensure that no rcextents cross the boundary of the adjustment range.
1450         */
1451        error = xfs_refcount_split_extent(cur, agbno, &shape_changed);
1452        if (error)
1453                goto out_error;
1454
1455        error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed);
1456        if (error)
1457                goto out_error;
1458
1459        /*
1460         * Try to merge with the left or right extents of the range.
1461         */
1462        error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj,
1463                        XFS_FIND_RCEXT_COW, &shape_changed);
1464        if (error)
1465                goto out_error;
1466
1467        /* Now that we've taken care of the ends, adjust the middle extents */
1468        error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj,
1469                        dfops, NULL);
1470        if (error)
1471                goto out_error;
1472
1473        return 0;
1474
1475out_error:
1476        trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_private.a.agno,
1477                        error, _RET_IP_);
1478        return error;
1479}
1480
1481/*
1482 * Record a CoW allocation in the refcount btree.
1483 */
1484STATIC int
1485__xfs_refcount_cow_alloc(
1486        struct xfs_btree_cur    *rcur,
1487        xfs_agblock_t           agbno,
1488        xfs_extlen_t            aglen,
1489        struct xfs_defer_ops    *dfops)
1490{
1491        trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno,
1492                        agbno, aglen);
1493
1494        /* Add refcount btree reservation */
1495        return xfs_refcount_adjust_cow(rcur, agbno, aglen,
1496                        XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops);
1497}
1498
1499/*
1500 * Remove a CoW allocation from the refcount btree.
1501 */
1502STATIC int
1503__xfs_refcount_cow_free(
1504        struct xfs_btree_cur    *rcur,
1505        xfs_agblock_t           agbno,
1506        xfs_extlen_t            aglen,
1507        struct xfs_defer_ops    *dfops)
1508{
1509        trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno,
1510                        agbno, aglen);
1511
1512        /* Remove refcount btree reservation */
1513        return xfs_refcount_adjust_cow(rcur, agbno, aglen,
1514                        XFS_REFCOUNT_ADJUST_COW_FREE, dfops);
1515}
1516
1517/* Record a CoW staging extent in the refcount btree. */
1518int
1519xfs_refcount_alloc_cow_extent(
1520        struct xfs_mount                *mp,
1521        struct xfs_defer_ops            *dfops,
1522        xfs_fsblock_t                   fsb,
1523        xfs_extlen_t                    len)
1524{
1525        int                             error;
1526
1527        if (!xfs_sb_version_hasreflink(&mp->m_sb))
1528                return 0;
1529
1530        error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW,
1531                        fsb, len);
1532        if (error)
1533                return error;
1534
1535        /* Add rmap entry */
1536        return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
1537                        XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
1538}
1539
1540/* Forget a CoW staging event in the refcount btree. */
1541int
1542xfs_refcount_free_cow_extent(
1543        struct xfs_mount                *mp,
1544        struct xfs_defer_ops            *dfops,
1545        xfs_fsblock_t                   fsb,
1546        xfs_extlen_t                    len)
1547{
1548        int                             error;
1549
1550        if (!xfs_sb_version_hasreflink(&mp->m_sb))
1551                return 0;
1552
1553        /* Remove rmap entry */
1554        error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
1555                        XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
1556        if (error)
1557                return error;
1558
1559        return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW,
1560                        fsb, len);
1561}
1562
1563struct xfs_refcount_recovery {
1564        struct list_head                rr_list;
1565        struct xfs_refcount_irec        rr_rrec;
1566};
1567
1568/* Stuff an extent on the recovery list. */
1569STATIC int
1570xfs_refcount_recover_extent(
1571        struct xfs_btree_cur            *cur,
1572        union xfs_btree_rec             *rec,
1573        void                            *priv)
1574{
1575        struct list_head                *debris = priv;
1576        struct xfs_refcount_recovery    *rr;
1577
1578        if (be32_to_cpu(rec->refc.rc_refcount) != 1)
1579                return -EFSCORRUPTED;
1580
1581        rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), KM_SLEEP);
1582        xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec);
1583        list_add_tail(&rr->rr_list, debris);
1584
1585        return 0;
1586}
1587
1588/* Find and remove leftover CoW reservations. */
1589int
1590xfs_refcount_recover_cow_leftovers(
1591        struct xfs_mount                *mp,
1592        xfs_agnumber_t                  agno)
1593{
1594        struct xfs_trans                *tp;
1595        struct xfs_btree_cur            *cur;
1596        struct xfs_buf                  *agbp;
1597        struct xfs_refcount_recovery    *rr, *n;
1598        struct list_head                debris;
1599        union xfs_btree_irec            low;
1600        union xfs_btree_irec            high;
1601        struct xfs_defer_ops            dfops;
1602        xfs_fsblock_t                   fsb;
1603        xfs_agblock_t                   agbno;
1604        int                             error;
1605
1606        if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START)
1607                return -EOPNOTSUPP;
1608
1609        INIT_LIST_HEAD(&debris);
1610
1611        /*
1612         * In this first part, we use an empty transaction to gather up
1613         * all the leftover CoW extents so that we can subsequently
1614         * delete them.  The empty transaction is used to avoid
1615         * a buffer lock deadlock if there happens to be a loop in the
1616         * refcountbt because we're allowed to re-grab a buffer that is
1617         * already attached to our transaction.  When we're done
1618         * recording the CoW debris we cancel the (empty) transaction
1619         * and everything goes away cleanly.
1620         */
1621        error = xfs_trans_alloc_empty(mp, &tp);
1622        if (error)
1623                return error;
1624
1625        error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
1626        if (error)
1627                goto out_trans;
1628        if (!agbp) {
1629                error = -ENOMEM;
1630                goto out_trans;
1631        }
1632        cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL);
1633
1634        /* Find all the leftover CoW staging extents. */
1635        memset(&low, 0, sizeof(low));
1636        memset(&high, 0, sizeof(high));
1637        low.rc.rc_startblock = XFS_REFC_COW_START;
1638        high.rc.rc_startblock = -1U;
1639        error = xfs_btree_query_range(cur, &low, &high,
1640                        xfs_refcount_recover_extent, &debris);
1641        if (error)
1642                goto out_cursor;
1643        xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1644        xfs_trans_brelse(tp, agbp);
1645        xfs_trans_cancel(tp);
1646
1647        /* Now iterate the list to free the leftovers */
1648        list_for_each_entry_safe(rr, n, &debris, rr_list) {
1649                /* Set up transaction. */
1650                error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
1651                if (error)
1652                        goto out_free;
1653
1654                trace_xfs_refcount_recover_extent(mp, agno, &rr->rr_rrec);
1655
1656                /* Free the orphan record */
1657                xfs_defer_init(&dfops, &fsb);
1658                agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START;
1659                fsb = XFS_AGB_TO_FSB(mp, agno, agbno);
1660                error = xfs_refcount_free_cow_extent(mp, &dfops, fsb,
1661                                rr->rr_rrec.rc_blockcount);
1662                if (error)
1663                        goto out_defer;
1664
1665                /* Free the block. */
1666                xfs_bmap_add_free(mp, &dfops, fsb,
1667                                rr->rr_rrec.rc_blockcount, NULL);
1668
1669                error = xfs_defer_finish(&tp, &dfops);
1670                if (error)
1671                        goto out_defer;
1672
1673                error = xfs_trans_commit(tp);
1674                if (error)
1675                        goto out_free;
1676
1677                list_del(&rr->rr_list);
1678                kmem_free(rr);
1679        }
1680
1681        return error;
1682out_defer:
1683        xfs_defer_cancel(&dfops);
1684out_trans:
1685        xfs_trans_cancel(tp);
1686out_free:
1687        /* Free the leftover list */
1688        list_for_each_entry_safe(rr, n, &debris, rr_list) {
1689                list_del(&rr->rr_list);
1690                kmem_free(rr);
1691        }
1692        return error;
1693
1694out_cursor:
1695        xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1696        xfs_trans_brelse(tp, agbp);
1697        goto out_trans;
1698}
1699
1700/* Is there a record covering a given extent? */
1701int
1702xfs_refcount_has_record(
1703        struct xfs_btree_cur    *cur,
1704        xfs_agblock_t           bno,
1705        xfs_extlen_t            len,
1706        bool                    *exists)
1707{
1708        union xfs_btree_irec    low;
1709        union xfs_btree_irec    high;
1710
1711        memset(&low, 0, sizeof(low));
1712        low.rc.rc_startblock = bno;
1713        memset(&high, 0xFF, sizeof(high));
1714        high.rc.rc_startblock = bno + len - 1;
1715
1716        return xfs_btree_has_record(cur, &low, &high, exists);
1717}
1718