linux/fs/xfs/xfs_trans_buf.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_shared.h"
  21#include "xfs_format.h"
  22#include "xfs_log_format.h"
  23#include "xfs_trans_resv.h"
  24#include "xfs_sb.h"
  25#include "xfs_ag.h"
  26#include "xfs_mount.h"
  27#include "xfs_inode.h"
  28#include "xfs_trans.h"
  29#include "xfs_buf_item.h"
  30#include "xfs_trans_priv.h"
  31#include "xfs_error.h"
  32#include "xfs_trace.h"
  33
  34/*
  35 * Check to see if a buffer matching the given parameters is already
  36 * a part of the given transaction.
  37 */
  38STATIC struct xfs_buf *
  39xfs_trans_buf_item_match(
  40        struct xfs_trans        *tp,
  41        struct xfs_buftarg      *target,
  42        struct xfs_buf_map      *map,
  43        int                     nmaps)
  44{
  45        struct xfs_log_item_desc *lidp;
  46        struct xfs_buf_log_item *blip;
  47        int                     len = 0;
  48        int                     i;
  49
  50        for (i = 0; i < nmaps; i++)
  51                len += map[i].bm_len;
  52
  53        list_for_each_entry(lidp, &tp->t_items, lid_trans) {
  54                blip = (struct xfs_buf_log_item *)lidp->lid_item;
  55                if (blip->bli_item.li_type == XFS_LI_BUF &&
  56                    blip->bli_buf->b_target == target &&
  57                    XFS_BUF_ADDR(blip->bli_buf) == map[0].bm_bn &&
  58                    blip->bli_buf->b_length == len) {
  59                        ASSERT(blip->bli_buf->b_map_count == nmaps);
  60                        return blip->bli_buf;
  61                }
  62        }
  63
  64        return NULL;
  65}
  66
  67/*
  68 * Add the locked buffer to the transaction.
  69 *
  70 * The buffer must be locked, and it cannot be associated with any
  71 * transaction.
  72 *
  73 * If the buffer does not yet have a buf log item associated with it,
  74 * then allocate one for it.  Then add the buf item to the transaction.
  75 */
  76STATIC void
  77_xfs_trans_bjoin(
  78        struct xfs_trans        *tp,
  79        struct xfs_buf          *bp,
  80        int                     reset_recur)
  81{
  82        struct xfs_buf_log_item *bip;
  83
  84        ASSERT(bp->b_transp == NULL);
  85
  86        /*
  87         * The xfs_buf_log_item pointer is stored in b_fsprivate.  If
  88         * it doesn't have one yet, then allocate one and initialize it.
  89         * The checks to see if one is there are in xfs_buf_item_init().
  90         */
  91        xfs_buf_item_init(bp, tp->t_mountp);
  92        bip = bp->b_fspriv;
  93        ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
  94        ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
  95        ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
  96        if (reset_recur)
  97                bip->bli_recur = 0;
  98
  99        /*
 100         * Take a reference for this transaction on the buf item.
 101         */
 102        atomic_inc(&bip->bli_refcount);
 103
 104        /*
 105         * Get a log_item_desc to point at the new item.
 106         */
 107        xfs_trans_add_item(tp, &bip->bli_item);
 108
 109        /*
 110         * Initialize b_fsprivate2 so we can find it with incore_match()
 111         * in xfs_trans_get_buf() and friends above.
 112         */
 113        bp->b_transp = tp;
 114
 115}
 116
 117void
 118xfs_trans_bjoin(
 119        struct xfs_trans        *tp,
 120        struct xfs_buf          *bp)
 121{
 122        _xfs_trans_bjoin(tp, bp, 0);
 123        trace_xfs_trans_bjoin(bp->b_fspriv);
 124}
 125
 126/*
 127 * Get and lock the buffer for the caller if it is not already
 128 * locked within the given transaction.  If it is already locked
 129 * within the transaction, just increment its lock recursion count
 130 * and return a pointer to it.
 131 *
 132 * If the transaction pointer is NULL, make this just a normal
 133 * get_buf() call.
 134 */
 135struct xfs_buf *
 136xfs_trans_get_buf_map(
 137        struct xfs_trans        *tp,
 138        struct xfs_buftarg      *target,
 139        struct xfs_buf_map      *map,
 140        int                     nmaps,
 141        xfs_buf_flags_t         flags)
 142{
 143        xfs_buf_t               *bp;
 144        xfs_buf_log_item_t      *bip;
 145
 146        if (!tp)
 147                return xfs_buf_get_map(target, map, nmaps, flags);
 148
 149        /*
 150         * If we find the buffer in the cache with this transaction
 151         * pointer in its b_fsprivate2 field, then we know we already
 152         * have it locked.  In this case we just increment the lock
 153         * recursion count and return the buffer to the caller.
 154         */
 155        bp = xfs_trans_buf_item_match(tp, target, map, nmaps);
 156        if (bp != NULL) {
 157                ASSERT(xfs_buf_islocked(bp));
 158                if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) {
 159                        xfs_buf_stale(bp);
 160                        XFS_BUF_DONE(bp);
 161                }
 162
 163                ASSERT(bp->b_transp == tp);
 164                bip = bp->b_fspriv;
 165                ASSERT(bip != NULL);
 166                ASSERT(atomic_read(&bip->bli_refcount) > 0);
 167                bip->bli_recur++;
 168                trace_xfs_trans_get_buf_recur(bip);
 169                return (bp);
 170        }
 171
 172        bp = xfs_buf_get_map(target, map, nmaps, flags);
 173        if (bp == NULL) {
 174                return NULL;
 175        }
 176
 177        ASSERT(!bp->b_error);
 178
 179        _xfs_trans_bjoin(tp, bp, 1);
 180        trace_xfs_trans_get_buf(bp->b_fspriv);
 181        return (bp);
 182}
 183
 184/*
 185 * Get and lock the superblock buffer of this file system for the
 186 * given transaction.
 187 *
 188 * We don't need to use incore_match() here, because the superblock
 189 * buffer is a private buffer which we keep a pointer to in the
 190 * mount structure.
 191 */
 192xfs_buf_t *
 193xfs_trans_getsb(xfs_trans_t     *tp,
 194                struct xfs_mount *mp,
 195                int             flags)
 196{
 197        xfs_buf_t               *bp;
 198        xfs_buf_log_item_t      *bip;
 199
 200        /*
 201         * Default to just trying to lock the superblock buffer
 202         * if tp is NULL.
 203         */
 204        if (tp == NULL) {
 205                return (xfs_getsb(mp, flags));
 206        }
 207
 208        /*
 209         * If the superblock buffer already has this transaction
 210         * pointer in its b_fsprivate2 field, then we know we already
 211         * have it locked.  In this case we just increment the lock
 212         * recursion count and return the buffer to the caller.
 213         */
 214        bp = mp->m_sb_bp;
 215        if (bp->b_transp == tp) {
 216                bip = bp->b_fspriv;
 217                ASSERT(bip != NULL);
 218                ASSERT(atomic_read(&bip->bli_refcount) > 0);
 219                bip->bli_recur++;
 220                trace_xfs_trans_getsb_recur(bip);
 221                return (bp);
 222        }
 223
 224        bp = xfs_getsb(mp, flags);
 225        if (bp == NULL)
 226                return NULL;
 227
 228        _xfs_trans_bjoin(tp, bp, 1);
 229        trace_xfs_trans_getsb(bp->b_fspriv);
 230        return (bp);
 231}
 232
 233#ifdef DEBUG
 234xfs_buftarg_t *xfs_error_target;
 235int     xfs_do_error;
 236int     xfs_req_num;
 237int     xfs_error_mod = 33;
 238#endif
 239
 240/*
 241 * Get and lock the buffer for the caller if it is not already
 242 * locked within the given transaction.  If it has not yet been
 243 * read in, read it from disk. If it is already locked
 244 * within the transaction and already read in, just increment its
 245 * lock recursion count and return a pointer to it.
 246 *
 247 * If the transaction pointer is NULL, make this just a normal
 248 * read_buf() call.
 249 */
 250int
 251xfs_trans_read_buf_map(
 252        struct xfs_mount        *mp,
 253        struct xfs_trans        *tp,
 254        struct xfs_buftarg      *target,
 255        struct xfs_buf_map      *map,
 256        int                     nmaps,
 257        xfs_buf_flags_t         flags,
 258        struct xfs_buf          **bpp,
 259        const struct xfs_buf_ops *ops)
 260{
 261        xfs_buf_t               *bp;
 262        xfs_buf_log_item_t      *bip;
 263        int                     error;
 264
 265        *bpp = NULL;
 266        if (!tp) {
 267                bp = xfs_buf_read_map(target, map, nmaps, flags, ops);
 268                if (!bp)
 269                        return (flags & XBF_TRYLOCK) ?
 270                                        EAGAIN : XFS_ERROR(ENOMEM);
 271
 272                if (bp->b_error) {
 273                        error = bp->b_error;
 274                        xfs_buf_ioerror_alert(bp, __func__);
 275                        XFS_BUF_UNDONE(bp);
 276                        xfs_buf_stale(bp);
 277                        xfs_buf_relse(bp);
 278                        return error;
 279                }
 280#ifdef DEBUG
 281                if (xfs_do_error) {
 282                        if (xfs_error_target == target) {
 283                                if (((xfs_req_num++) % xfs_error_mod) == 0) {
 284                                        xfs_buf_relse(bp);
 285                                        xfs_debug(mp, "Returning error!");
 286                                        return XFS_ERROR(EIO);
 287                                }
 288                        }
 289                }
 290#endif
 291                if (XFS_FORCED_SHUTDOWN(mp))
 292                        goto shutdown_abort;
 293                *bpp = bp;
 294                return 0;
 295        }
 296
 297        /*
 298         * If we find the buffer in the cache with this transaction
 299         * pointer in its b_fsprivate2 field, then we know we already
 300         * have it locked.  If it is already read in we just increment
 301         * the lock recursion count and return the buffer to the caller.
 302         * If the buffer is not yet read in, then we read it in, increment
 303         * the lock recursion count, and return it to the caller.
 304         */
 305        bp = xfs_trans_buf_item_match(tp, target, map, nmaps);
 306        if (bp != NULL) {
 307                ASSERT(xfs_buf_islocked(bp));
 308                ASSERT(bp->b_transp == tp);
 309                ASSERT(bp->b_fspriv != NULL);
 310                ASSERT(!bp->b_error);
 311                if (!(XFS_BUF_ISDONE(bp))) {
 312                        trace_xfs_trans_read_buf_io(bp, _RET_IP_);
 313                        ASSERT(!XFS_BUF_ISASYNC(bp));
 314                        ASSERT(bp->b_iodone == NULL);
 315                        XFS_BUF_READ(bp);
 316                        bp->b_ops = ops;
 317
 318                        /*
 319                         * XXX(hch): clean up the error handling here to be less
 320                         * of a mess..
 321                         */
 322                        if (XFS_FORCED_SHUTDOWN(mp)) {
 323                                trace_xfs_bdstrat_shut(bp, _RET_IP_);
 324                                xfs_bioerror_relse(bp);
 325                        } else {
 326                                xfs_buf_iorequest(bp);
 327                        }
 328
 329                        error = xfs_buf_iowait(bp);
 330                        if (error) {
 331                                xfs_buf_ioerror_alert(bp, __func__);
 332                                xfs_buf_relse(bp);
 333                                /*
 334                                 * We can gracefully recover from most read
 335                                 * errors. Ones we can't are those that happen
 336                                 * after the transaction's already dirty.
 337                                 */
 338                                if (tp->t_flags & XFS_TRANS_DIRTY)
 339                                        xfs_force_shutdown(tp->t_mountp,
 340                                                        SHUTDOWN_META_IO_ERROR);
 341                                return error;
 342                        }
 343                }
 344                /*
 345                 * We never locked this buf ourselves, so we shouldn't
 346                 * brelse it either. Just get out.
 347                 */
 348                if (XFS_FORCED_SHUTDOWN(mp)) {
 349                        trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
 350                        *bpp = NULL;
 351                        return XFS_ERROR(EIO);
 352                }
 353
 354
 355                bip = bp->b_fspriv;
 356                bip->bli_recur++;
 357
 358                ASSERT(atomic_read(&bip->bli_refcount) > 0);
 359                trace_xfs_trans_read_buf_recur(bip);
 360                *bpp = bp;
 361                return 0;
 362        }
 363
 364        bp = xfs_buf_read_map(target, map, nmaps, flags, ops);
 365        if (bp == NULL) {
 366                *bpp = NULL;
 367                return (flags & XBF_TRYLOCK) ?
 368                                        0 : XFS_ERROR(ENOMEM);
 369        }
 370        if (bp->b_error) {
 371                error = bp->b_error;
 372                xfs_buf_stale(bp);
 373                XFS_BUF_DONE(bp);
 374                xfs_buf_ioerror_alert(bp, __func__);
 375                if (tp->t_flags & XFS_TRANS_DIRTY)
 376                        xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR);
 377                xfs_buf_relse(bp);
 378                return error;
 379        }
 380#ifdef DEBUG
 381        if (xfs_do_error && !(tp->t_flags & XFS_TRANS_DIRTY)) {
 382                if (xfs_error_target == target) {
 383                        if (((xfs_req_num++) % xfs_error_mod) == 0) {
 384                                xfs_force_shutdown(tp->t_mountp,
 385                                                   SHUTDOWN_META_IO_ERROR);
 386                                xfs_buf_relse(bp);
 387                                xfs_debug(mp, "Returning trans error!");
 388                                return XFS_ERROR(EIO);
 389                        }
 390                }
 391        }
 392#endif
 393        if (XFS_FORCED_SHUTDOWN(mp))
 394                goto shutdown_abort;
 395
 396        _xfs_trans_bjoin(tp, bp, 1);
 397        trace_xfs_trans_read_buf(bp->b_fspriv);
 398
 399        *bpp = bp;
 400        return 0;
 401
 402shutdown_abort:
 403        trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
 404        xfs_buf_relse(bp);
 405        *bpp = NULL;
 406        return XFS_ERROR(EIO);
 407}
 408
 409/*
 410 * Release the buffer bp which was previously acquired with one of the
 411 * xfs_trans_... buffer allocation routines if the buffer has not
 412 * been modified within this transaction.  If the buffer is modified
 413 * within this transaction, do decrement the recursion count but do
 414 * not release the buffer even if the count goes to 0.  If the buffer is not
 415 * modified within the transaction, decrement the recursion count and
 416 * release the buffer if the recursion count goes to 0.
 417 *
 418 * If the buffer is to be released and it was not modified before
 419 * this transaction began, then free the buf_log_item associated with it.
 420 *
 421 * If the transaction pointer is NULL, make this just a normal
 422 * brelse() call.
 423 */
 424void
 425xfs_trans_brelse(xfs_trans_t    *tp,
 426                 xfs_buf_t      *bp)
 427{
 428        xfs_buf_log_item_t      *bip;
 429
 430        /*
 431         * Default to a normal brelse() call if the tp is NULL.
 432         */
 433        if (tp == NULL) {
 434                ASSERT(bp->b_transp == NULL);
 435                xfs_buf_relse(bp);
 436                return;
 437        }
 438
 439        ASSERT(bp->b_transp == tp);
 440        bip = bp->b_fspriv;
 441        ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
 442        ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
 443        ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
 444        ASSERT(atomic_read(&bip->bli_refcount) > 0);
 445
 446        trace_xfs_trans_brelse(bip);
 447
 448        /*
 449         * If the release is just for a recursive lock,
 450         * then decrement the count and return.
 451         */
 452        if (bip->bli_recur > 0) {
 453                bip->bli_recur--;
 454                return;
 455        }
 456
 457        /*
 458         * If the buffer is dirty within this transaction, we can't
 459         * release it until we commit.
 460         */
 461        if (bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY)
 462                return;
 463
 464        /*
 465         * If the buffer has been invalidated, then we can't release
 466         * it until the transaction commits to disk unless it is re-dirtied
 467         * as part of this transaction.  This prevents us from pulling
 468         * the item from the AIL before we should.
 469         */
 470        if (bip->bli_flags & XFS_BLI_STALE)
 471                return;
 472
 473        ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
 474
 475        /*
 476         * Free up the log item descriptor tracking the released item.
 477         */
 478        xfs_trans_del_item(&bip->bli_item);
 479
 480        /*
 481         * Clear the hold flag in the buf log item if it is set.
 482         * We wouldn't want the next user of the buffer to
 483         * get confused.
 484         */
 485        if (bip->bli_flags & XFS_BLI_HOLD) {
 486                bip->bli_flags &= ~XFS_BLI_HOLD;
 487        }
 488
 489        /*
 490         * Drop our reference to the buf log item.
 491         */
 492        atomic_dec(&bip->bli_refcount);
 493
 494        /*
 495         * If the buf item is not tracking data in the log, then
 496         * we must free it before releasing the buffer back to the
 497         * free pool.  Before releasing the buffer to the free pool,
 498         * clear the transaction pointer in b_fsprivate2 to dissolve
 499         * its relation to this transaction.
 500         */
 501        if (!xfs_buf_item_dirty(bip)) {
 502/***
 503                ASSERT(bp->b_pincount == 0);
 504***/
 505                ASSERT(atomic_read(&bip->bli_refcount) == 0);
 506                ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL));
 507                ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF));
 508                xfs_buf_item_relse(bp);
 509        }
 510
 511        bp->b_transp = NULL;
 512        xfs_buf_relse(bp);
 513}
 514
 515/*
 516 * Mark the buffer as not needing to be unlocked when the buf item's
 517 * iop_unlock() routine is called.  The buffer must already be locked
 518 * and associated with the given transaction.
 519 */
 520/* ARGSUSED */
 521void
 522xfs_trans_bhold(xfs_trans_t     *tp,
 523                xfs_buf_t       *bp)
 524{
 525        xfs_buf_log_item_t      *bip = bp->b_fspriv;
 526
 527        ASSERT(bp->b_transp == tp);
 528        ASSERT(bip != NULL);
 529        ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
 530        ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
 531        ASSERT(atomic_read(&bip->bli_refcount) > 0);
 532
 533        bip->bli_flags |= XFS_BLI_HOLD;
 534        trace_xfs_trans_bhold(bip);
 535}
 536
 537/*
 538 * Cancel the previous buffer hold request made on this buffer
 539 * for this transaction.
 540 */
 541void
 542xfs_trans_bhold_release(xfs_trans_t     *tp,
 543                        xfs_buf_t       *bp)
 544{
 545        xfs_buf_log_item_t      *bip = bp->b_fspriv;
 546
 547        ASSERT(bp->b_transp == tp);
 548        ASSERT(bip != NULL);
 549        ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
 550        ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
 551        ASSERT(atomic_read(&bip->bli_refcount) > 0);
 552        ASSERT(bip->bli_flags & XFS_BLI_HOLD);
 553
 554        bip->bli_flags &= ~XFS_BLI_HOLD;
 555        trace_xfs_trans_bhold_release(bip);
 556}
 557
 558/*
 559 * This is called to mark bytes first through last inclusive of the given
 560 * buffer as needing to be logged when the transaction is committed.
 561 * The buffer must already be associated with the given transaction.
 562 *
 563 * First and last are numbers relative to the beginning of this buffer,
 564 * so the first byte in the buffer is numbered 0 regardless of the
 565 * value of b_blkno.
 566 */
 567void
 568xfs_trans_log_buf(xfs_trans_t   *tp,
 569                  xfs_buf_t     *bp,
 570                  uint          first,
 571                  uint          last)
 572{
 573        xfs_buf_log_item_t      *bip = bp->b_fspriv;
 574
 575        ASSERT(bp->b_transp == tp);
 576        ASSERT(bip != NULL);
 577        ASSERT(first <= last && last < BBTOB(bp->b_length));
 578        ASSERT(bp->b_iodone == NULL ||
 579               bp->b_iodone == xfs_buf_iodone_callbacks);
 580
 581        /*
 582         * Mark the buffer as needing to be written out eventually,
 583         * and set its iodone function to remove the buffer's buf log
 584         * item from the AIL and free it when the buffer is flushed
 585         * to disk.  See xfs_buf_attach_iodone() for more details
 586         * on li_cb and xfs_buf_iodone_callbacks().
 587         * If we end up aborting this transaction, we trap this buffer
 588         * inside the b_bdstrat callback so that this won't get written to
 589         * disk.
 590         */
 591        XFS_BUF_DONE(bp);
 592
 593        ASSERT(atomic_read(&bip->bli_refcount) > 0);
 594        bp->b_iodone = xfs_buf_iodone_callbacks;
 595        bip->bli_item.li_cb = xfs_buf_iodone;
 596
 597        trace_xfs_trans_log_buf(bip);
 598
 599        /*
 600         * If we invalidated the buffer within this transaction, then
 601         * cancel the invalidation now that we're dirtying the buffer
 602         * again.  There are no races with the code in xfs_buf_item_unpin(),
 603         * because we have a reference to the buffer this entire time.
 604         */
 605        if (bip->bli_flags & XFS_BLI_STALE) {
 606                bip->bli_flags &= ~XFS_BLI_STALE;
 607                ASSERT(XFS_BUF_ISSTALE(bp));
 608                XFS_BUF_UNSTALE(bp);
 609                bip->__bli_format.blf_flags &= ~XFS_BLF_CANCEL;
 610        }
 611
 612        tp->t_flags |= XFS_TRANS_DIRTY;
 613        bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
 614
 615        /*
 616         * If we have an ordered buffer we are not logging any dirty range but
 617         * it still needs to be marked dirty and that it has been logged.
 618         */
 619        bip->bli_flags |= XFS_BLI_DIRTY | XFS_BLI_LOGGED;
 620        if (!(bip->bli_flags & XFS_BLI_ORDERED))
 621                xfs_buf_item_log(bip, first, last);
 622}
 623
 624
 625/*
 626 * Invalidate a buffer that is being used within a transaction.
 627 *
 628 * Typically this is because the blocks in the buffer are being freed, so we
 629 * need to prevent it from being written out when we're done.  Allowing it
 630 * to be written again might overwrite data in the free blocks if they are
 631 * reallocated to a file.
 632 *
 633 * We prevent the buffer from being written out by marking it stale.  We can't
 634 * get rid of the buf log item at this point because the buffer may still be
 635 * pinned by another transaction.  If that is the case, then we'll wait until
 636 * the buffer is committed to disk for the last time (we can tell by the ref
 637 * count) and free it in xfs_buf_item_unpin().  Until that happens we will
 638 * keep the buffer locked so that the buffer and buf log item are not reused.
 639 *
 640 * We also set the XFS_BLF_CANCEL flag in the buf log format structure and log
 641 * the buf item.  This will be used at recovery time to determine that copies
 642 * of the buffer in the log before this should not be replayed.
 643 *
 644 * We mark the item descriptor and the transaction dirty so that we'll hold
 645 * the buffer until after the commit.
 646 *
 647 * Since we're invalidating the buffer, we also clear the state about which
 648 * parts of the buffer have been logged.  We also clear the flag indicating
 649 * that this is an inode buffer since the data in the buffer will no longer
 650 * be valid.
 651 *
 652 * We set the stale bit in the buffer as well since we're getting rid of it.
 653 */
 654void
 655xfs_trans_binval(
 656        xfs_trans_t     *tp,
 657        xfs_buf_t       *bp)
 658{
 659        xfs_buf_log_item_t      *bip = bp->b_fspriv;
 660        int                     i;
 661
 662        ASSERT(bp->b_transp == tp);
 663        ASSERT(bip != NULL);
 664        ASSERT(atomic_read(&bip->bli_refcount) > 0);
 665
 666        trace_xfs_trans_binval(bip);
 667
 668        if (bip->bli_flags & XFS_BLI_STALE) {
 669                /*
 670                 * If the buffer is already invalidated, then
 671                 * just return.
 672                 */
 673                ASSERT(XFS_BUF_ISSTALE(bp));
 674                ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
 675                ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_INODE_BUF));
 676                ASSERT(!(bip->__bli_format.blf_flags & XFS_BLFT_MASK));
 677                ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
 678                ASSERT(bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY);
 679                ASSERT(tp->t_flags & XFS_TRANS_DIRTY);
 680                return;
 681        }
 682
 683        xfs_buf_stale(bp);
 684
 685        bip->bli_flags |= XFS_BLI_STALE;
 686        bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY);
 687        bip->__bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
 688        bip->__bli_format.blf_flags |= XFS_BLF_CANCEL;
 689        bip->__bli_format.blf_flags &= ~XFS_BLFT_MASK;
 690        for (i = 0; i < bip->bli_format_count; i++) {
 691                memset(bip->bli_formats[i].blf_data_map, 0,
 692                       (bip->bli_formats[i].blf_map_size * sizeof(uint)));
 693        }
 694        bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
 695        tp->t_flags |= XFS_TRANS_DIRTY;
 696}
 697
 698/*
 699 * This call is used to indicate that the buffer contains on-disk inodes which
 700 * must be handled specially during recovery.  They require special handling
 701 * because only the di_next_unlinked from the inodes in the buffer should be
 702 * recovered.  The rest of the data in the buffer is logged via the inodes
 703 * themselves.
 704 *
 705 * All we do is set the XFS_BLI_INODE_BUF flag in the items flags so it can be
 706 * transferred to the buffer's log format structure so that we'll know what to
 707 * do at recovery time.
 708 */
 709void
 710xfs_trans_inode_buf(
 711        xfs_trans_t     *tp,
 712        xfs_buf_t       *bp)
 713{
 714        xfs_buf_log_item_t      *bip = bp->b_fspriv;
 715
 716        ASSERT(bp->b_transp == tp);
 717        ASSERT(bip != NULL);
 718        ASSERT(atomic_read(&bip->bli_refcount) > 0);
 719
 720        bip->bli_flags |= XFS_BLI_INODE_BUF;
 721        xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
 722}
 723
 724/*
 725 * This call is used to indicate that the buffer is going to
 726 * be staled and was an inode buffer. This means it gets
 727 * special processing during unpin - where any inodes
 728 * associated with the buffer should be removed from ail.
 729 * There is also special processing during recovery,
 730 * any replay of the inodes in the buffer needs to be
 731 * prevented as the buffer may have been reused.
 732 */
 733void
 734xfs_trans_stale_inode_buf(
 735        xfs_trans_t     *tp,
 736        xfs_buf_t       *bp)
 737{
 738        xfs_buf_log_item_t      *bip = bp->b_fspriv;
 739
 740        ASSERT(bp->b_transp == tp);
 741        ASSERT(bip != NULL);
 742        ASSERT(atomic_read(&bip->bli_refcount) > 0);
 743
 744        bip->bli_flags |= XFS_BLI_STALE_INODE;
 745        bip->bli_item.li_cb = xfs_buf_iodone;
 746        xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
 747}
 748
 749/*
 750 * Mark the buffer as being one which contains newly allocated
 751 * inodes.  We need to make sure that even if this buffer is
 752 * relogged as an 'inode buf' we still recover all of the inode
 753 * images in the face of a crash.  This works in coordination with
 754 * xfs_buf_item_committed() to ensure that the buffer remains in the
 755 * AIL at its original location even after it has been relogged.
 756 */
 757/* ARGSUSED */
 758void
 759xfs_trans_inode_alloc_buf(
 760        xfs_trans_t     *tp,
 761        xfs_buf_t       *bp)
 762{
 763        xfs_buf_log_item_t      *bip = bp->b_fspriv;
 764
 765        ASSERT(bp->b_transp == tp);
 766        ASSERT(bip != NULL);
 767        ASSERT(atomic_read(&bip->bli_refcount) > 0);
 768
 769        bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
 770        xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
 771}
 772
 773/*
 774 * Mark the buffer as ordered for this transaction. This means
 775 * that the contents of the buffer are not recorded in the transaction
 776 * but it is tracked in the AIL as though it was. This allows us
 777 * to record logical changes in transactions rather than the physical
 778 * changes we make to the buffer without changing writeback ordering
 779 * constraints of metadata buffers.
 780 */
 781void
 782xfs_trans_ordered_buf(
 783        struct xfs_trans        *tp,
 784        struct xfs_buf          *bp)
 785{
 786        struct xfs_buf_log_item *bip = bp->b_fspriv;
 787
 788        ASSERT(bp->b_transp == tp);
 789        ASSERT(bip != NULL);
 790        ASSERT(atomic_read(&bip->bli_refcount) > 0);
 791
 792        bip->bli_flags |= XFS_BLI_ORDERED;
 793        trace_xfs_buf_item_ordered(bip);
 794}
 795
 796/*
 797 * Set the type of the buffer for log recovery so that it can correctly identify
 798 * and hence attach the correct buffer ops to the buffer after replay.
 799 */
 800void
 801xfs_trans_buf_set_type(
 802        struct xfs_trans        *tp,
 803        struct xfs_buf          *bp,
 804        enum xfs_blft           type)
 805{
 806        struct xfs_buf_log_item *bip = bp->b_fspriv;
 807
 808        if (!tp)
 809                return;
 810
 811        ASSERT(bp->b_transp == tp);
 812        ASSERT(bip != NULL);
 813        ASSERT(atomic_read(&bip->bli_refcount) > 0);
 814
 815        xfs_blft_to_flags(&bip->__bli_format, type);
 816}
 817
 818void
 819xfs_trans_buf_copy_type(
 820        struct xfs_buf          *dst_bp,
 821        struct xfs_buf          *src_bp)
 822{
 823        struct xfs_buf_log_item *sbip = src_bp->b_fspriv;
 824        struct xfs_buf_log_item *dbip = dst_bp->b_fspriv;
 825        enum xfs_blft           type;
 826
 827        type = xfs_blft_from_flags(&sbip->__bli_format);
 828        xfs_blft_to_flags(&dbip->__bli_format, type);
 829}
 830
 831/*
 832 * Similar to xfs_trans_inode_buf(), this marks the buffer as a cluster of
 833 * dquots. However, unlike in inode buffer recovery, dquot buffers get
 834 * recovered in their entirety. (Hence, no XFS_BLI_DQUOT_ALLOC_BUF flag).
 835 * The only thing that makes dquot buffers different from regular
 836 * buffers is that we must not replay dquot bufs when recovering
 837 * if a _corresponding_ quotaoff has happened. We also have to distinguish
 838 * between usr dquot bufs and grp dquot bufs, because usr and grp quotas
 839 * can be turned off independently.
 840 */
 841/* ARGSUSED */
 842void
 843xfs_trans_dquot_buf(
 844        xfs_trans_t     *tp,
 845        xfs_buf_t       *bp,
 846        uint            type)
 847{
 848        struct xfs_buf_log_item *bip = bp->b_fspriv;
 849
 850        ASSERT(type == XFS_BLF_UDQUOT_BUF ||
 851               type == XFS_BLF_PDQUOT_BUF ||
 852               type == XFS_BLF_GDQUOT_BUF);
 853
 854        bip->__bli_format.blf_flags |= type;
 855
 856        switch (type) {
 857        case XFS_BLF_UDQUOT_BUF:
 858                type = XFS_BLFT_UDQUOT_BUF;
 859                break;
 860        case XFS_BLF_PDQUOT_BUF:
 861                type = XFS_BLFT_PDQUOT_BUF;
 862                break;
 863        case XFS_BLF_GDQUOT_BUF:
 864                type = XFS_BLFT_GDQUOT_BUF;
 865                break;
 866        default:
 867                type = XFS_BLFT_UNKNOWN_BUF;
 868                break;
 869        }
 870
 871        xfs_trans_buf_set_type(tp, bp, type);
 872}
 873