linux/fs/xfs/xfs_bmap_item.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2016 Oracle.  All Rights Reserved.
   3 *
   4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; either version 2
   9 * of the License, or (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it would be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write the Free Software Foundation,
  18 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
  19 */
  20#include "xfs.h"
  21#include "xfs_fs.h"
  22#include "xfs_format.h"
  23#include "xfs_log_format.h"
  24#include "xfs_trans_resv.h"
  25#include "xfs_bit.h"
  26#include "xfs_mount.h"
  27#include "xfs_defer.h"
  28#include "xfs_inode.h"
  29#include "xfs_trans.h"
  30#include "xfs_trans_priv.h"
  31#include "xfs_buf_item.h"
  32#include "xfs_bmap_item.h"
  33#include "xfs_log.h"
  34#include "xfs_bmap.h"
  35#include "xfs_icache.h"
  36#include "xfs_trace.h"
  37
  38
  39kmem_zone_t     *xfs_bui_zone;
  40kmem_zone_t     *xfs_bud_zone;
  41
  42static inline struct xfs_bui_log_item *BUI_ITEM(struct xfs_log_item *lip)
  43{
  44        return container_of(lip, struct xfs_bui_log_item, bui_item);
  45}
  46
  47void
  48xfs_bui_item_free(
  49        struct xfs_bui_log_item *buip)
  50{
  51        kmem_zone_free(xfs_bui_zone, buip);
  52}
  53
  54STATIC void
  55xfs_bui_item_size(
  56        struct xfs_log_item     *lip,
  57        int                     *nvecs,
  58        int                     *nbytes)
  59{
  60        struct xfs_bui_log_item *buip = BUI_ITEM(lip);
  61
  62        *nvecs += 1;
  63        *nbytes += xfs_bui_log_format_sizeof(buip->bui_format.bui_nextents);
  64}
  65
  66/*
  67 * This is called to fill in the vector of log iovecs for the
  68 * given bui log item. We use only 1 iovec, and we point that
  69 * at the bui_log_format structure embedded in the bui item.
  70 * It is at this point that we assert that all of the extent
  71 * slots in the bui item have been filled.
  72 */
  73STATIC void
  74xfs_bui_item_format(
  75        struct xfs_log_item     *lip,
  76        struct xfs_log_vec      *lv)
  77{
  78        struct xfs_bui_log_item *buip = BUI_ITEM(lip);
  79        struct xfs_log_iovec    *vecp = NULL;
  80
  81        ASSERT(atomic_read(&buip->bui_next_extent) ==
  82                        buip->bui_format.bui_nextents);
  83
  84        buip->bui_format.bui_type = XFS_LI_BUI;
  85        buip->bui_format.bui_size = 1;
  86
  87        xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_BUI_FORMAT, &buip->bui_format,
  88                        xfs_bui_log_format_sizeof(buip->bui_format.bui_nextents));
  89}
  90
  91/*
  92 * Pinning has no meaning for an bui item, so just return.
  93 */
  94STATIC void
  95xfs_bui_item_pin(
  96        struct xfs_log_item     *lip)
  97{
  98}
  99
 100/*
 101 * The unpin operation is the last place an BUI is manipulated in the log. It is
 102 * either inserted in the AIL or aborted in the event of a log I/O error. In
 103 * either case, the BUI transaction has been successfully committed to make it
 104 * this far. Therefore, we expect whoever committed the BUI to either construct
 105 * and commit the BUD or drop the BUD's reference in the event of error. Simply
 106 * drop the log's BUI reference now that the log is done with it.
 107 */
 108STATIC void
 109xfs_bui_item_unpin(
 110        struct xfs_log_item     *lip,
 111        int                     remove)
 112{
 113        struct xfs_bui_log_item *buip = BUI_ITEM(lip);
 114
 115        xfs_bui_release(buip);
 116}
 117
 118/*
 119 * BUI items have no locking or pushing.  However, since BUIs are pulled from
 120 * the AIL when their corresponding BUDs are committed to disk, their situation
 121 * is very similar to being pinned.  Return XFS_ITEM_PINNED so that the caller
 122 * will eventually flush the log.  This should help in getting the BUI out of
 123 * the AIL.
 124 */
 125STATIC uint
 126xfs_bui_item_push(
 127        struct xfs_log_item     *lip,
 128        struct list_head        *buffer_list)
 129{
 130        return XFS_ITEM_PINNED;
 131}
 132
 133/*
 134 * The BUI has been either committed or aborted if the transaction has been
 135 * cancelled. If the transaction was cancelled, an BUD isn't going to be
 136 * constructed and thus we free the BUI here directly.
 137 */
 138STATIC void
 139xfs_bui_item_unlock(
 140        struct xfs_log_item     *lip)
 141{
 142        if (lip->li_flags & XFS_LI_ABORTED)
 143                xfs_bui_item_free(BUI_ITEM(lip));
 144}
 145
 146/*
 147 * The BUI is logged only once and cannot be moved in the log, so simply return
 148 * the lsn at which it's been logged.
 149 */
 150STATIC xfs_lsn_t
 151xfs_bui_item_committed(
 152        struct xfs_log_item     *lip,
 153        xfs_lsn_t               lsn)
 154{
 155        return lsn;
 156}
 157
 158/*
 159 * The BUI dependency tracking op doesn't do squat.  It can't because
 160 * it doesn't know where the free extent is coming from.  The dependency
 161 * tracking has to be handled by the "enclosing" metadata object.  For
 162 * example, for inodes, the inode is locked throughout the extent freeing
 163 * so the dependency should be recorded there.
 164 */
 165STATIC void
 166xfs_bui_item_committing(
 167        struct xfs_log_item     *lip,
 168        xfs_lsn_t               lsn)
 169{
 170}
 171
 172/*
 173 * This is the ops vector shared by all bui log items.
 174 */
 175static const struct xfs_item_ops xfs_bui_item_ops = {
 176        .iop_size       = xfs_bui_item_size,
 177        .iop_format     = xfs_bui_item_format,
 178        .iop_pin        = xfs_bui_item_pin,
 179        .iop_unpin      = xfs_bui_item_unpin,
 180        .iop_unlock     = xfs_bui_item_unlock,
 181        .iop_committed  = xfs_bui_item_committed,
 182        .iop_push       = xfs_bui_item_push,
 183        .iop_committing = xfs_bui_item_committing,
 184};
 185
 186/*
 187 * Allocate and initialize an bui item with the given number of extents.
 188 */
 189struct xfs_bui_log_item *
 190xfs_bui_init(
 191        struct xfs_mount                *mp)
 192
 193{
 194        struct xfs_bui_log_item         *buip;
 195
 196        buip = kmem_zone_zalloc(xfs_bui_zone, KM_SLEEP);
 197
 198        xfs_log_item_init(mp, &buip->bui_item, XFS_LI_BUI, &xfs_bui_item_ops);
 199        buip->bui_format.bui_nextents = XFS_BUI_MAX_FAST_EXTENTS;
 200        buip->bui_format.bui_id = (uintptr_t)(void *)buip;
 201        atomic_set(&buip->bui_next_extent, 0);
 202        atomic_set(&buip->bui_refcount, 2);
 203
 204        return buip;
 205}
 206
 207/*
 208 * Freeing the BUI requires that we remove it from the AIL if it has already
 209 * been placed there. However, the BUI may not yet have been placed in the AIL
 210 * when called by xfs_bui_release() from BUD processing due to the ordering of
 211 * committed vs unpin operations in bulk insert operations. Hence the reference
 212 * count to ensure only the last caller frees the BUI.
 213 */
 214void
 215xfs_bui_release(
 216        struct xfs_bui_log_item *buip)
 217{
 218        if (atomic_dec_and_test(&buip->bui_refcount)) {
 219                xfs_trans_ail_remove(&buip->bui_item, SHUTDOWN_LOG_IO_ERROR);
 220                xfs_bui_item_free(buip);
 221        }
 222}
 223
 224static inline struct xfs_bud_log_item *BUD_ITEM(struct xfs_log_item *lip)
 225{
 226        return container_of(lip, struct xfs_bud_log_item, bud_item);
 227}
 228
 229STATIC void
 230xfs_bud_item_size(
 231        struct xfs_log_item     *lip,
 232        int                     *nvecs,
 233        int                     *nbytes)
 234{
 235        *nvecs += 1;
 236        *nbytes += sizeof(struct xfs_bud_log_format);
 237}
 238
 239/*
 240 * This is called to fill in the vector of log iovecs for the
 241 * given bud log item. We use only 1 iovec, and we point that
 242 * at the bud_log_format structure embedded in the bud item.
 243 * It is at this point that we assert that all of the extent
 244 * slots in the bud item have been filled.
 245 */
 246STATIC void
 247xfs_bud_item_format(
 248        struct xfs_log_item     *lip,
 249        struct xfs_log_vec      *lv)
 250{
 251        struct xfs_bud_log_item *budp = BUD_ITEM(lip);
 252        struct xfs_log_iovec    *vecp = NULL;
 253
 254        budp->bud_format.bud_type = XFS_LI_BUD;
 255        budp->bud_format.bud_size = 1;
 256
 257        xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_BUD_FORMAT, &budp->bud_format,
 258                        sizeof(struct xfs_bud_log_format));
 259}
 260
 261/*
 262 * Pinning has no meaning for an bud item, so just return.
 263 */
 264STATIC void
 265xfs_bud_item_pin(
 266        struct xfs_log_item     *lip)
 267{
 268}
 269
 270/*
 271 * Since pinning has no meaning for an bud item, unpinning does
 272 * not either.
 273 */
 274STATIC void
 275xfs_bud_item_unpin(
 276        struct xfs_log_item     *lip,
 277        int                     remove)
 278{
 279}
 280
 281/*
 282 * There isn't much you can do to push on an bud item.  It is simply stuck
 283 * waiting for the log to be flushed to disk.
 284 */
 285STATIC uint
 286xfs_bud_item_push(
 287        struct xfs_log_item     *lip,
 288        struct list_head        *buffer_list)
 289{
 290        return XFS_ITEM_PINNED;
 291}
 292
 293/*
 294 * The BUD is either committed or aborted if the transaction is cancelled. If
 295 * the transaction is cancelled, drop our reference to the BUI and free the
 296 * BUD.
 297 */
 298STATIC void
 299xfs_bud_item_unlock(
 300        struct xfs_log_item     *lip)
 301{
 302        struct xfs_bud_log_item *budp = BUD_ITEM(lip);
 303
 304        if (lip->li_flags & XFS_LI_ABORTED) {
 305                xfs_bui_release(budp->bud_buip);
 306                kmem_zone_free(xfs_bud_zone, budp);
 307        }
 308}
 309
 310/*
 311 * When the bud item is committed to disk, all we need to do is delete our
 312 * reference to our partner bui item and then free ourselves. Since we're
 313 * freeing ourselves we must return -1 to keep the transaction code from
 314 * further referencing this item.
 315 */
 316STATIC xfs_lsn_t
 317xfs_bud_item_committed(
 318        struct xfs_log_item     *lip,
 319        xfs_lsn_t               lsn)
 320{
 321        struct xfs_bud_log_item *budp = BUD_ITEM(lip);
 322
 323        /*
 324         * Drop the BUI reference regardless of whether the BUD has been
 325         * aborted. Once the BUD transaction is constructed, it is the sole
 326         * responsibility of the BUD to release the BUI (even if the BUI is
 327         * aborted due to log I/O error).
 328         */
 329        xfs_bui_release(budp->bud_buip);
 330        kmem_zone_free(xfs_bud_zone, budp);
 331
 332        return (xfs_lsn_t)-1;
 333}
 334
 335/*
 336 * The BUD dependency tracking op doesn't do squat.  It can't because
 337 * it doesn't know where the free extent is coming from.  The dependency
 338 * tracking has to be handled by the "enclosing" metadata object.  For
 339 * example, for inodes, the inode is locked throughout the extent freeing
 340 * so the dependency should be recorded there.
 341 */
 342STATIC void
 343xfs_bud_item_committing(
 344        struct xfs_log_item     *lip,
 345        xfs_lsn_t               lsn)
 346{
 347}
 348
 349/*
 350 * This is the ops vector shared by all bud log items.
 351 */
 352static const struct xfs_item_ops xfs_bud_item_ops = {
 353        .iop_size       = xfs_bud_item_size,
 354        .iop_format     = xfs_bud_item_format,
 355        .iop_pin        = xfs_bud_item_pin,
 356        .iop_unpin      = xfs_bud_item_unpin,
 357        .iop_unlock     = xfs_bud_item_unlock,
 358        .iop_committed  = xfs_bud_item_committed,
 359        .iop_push       = xfs_bud_item_push,
 360        .iop_committing = xfs_bud_item_committing,
 361};
 362
 363/*
 364 * Allocate and initialize an bud item with the given number of extents.
 365 */
 366struct xfs_bud_log_item *
 367xfs_bud_init(
 368        struct xfs_mount                *mp,
 369        struct xfs_bui_log_item         *buip)
 370
 371{
 372        struct xfs_bud_log_item *budp;
 373
 374        budp = kmem_zone_zalloc(xfs_bud_zone, KM_SLEEP);
 375        xfs_log_item_init(mp, &budp->bud_item, XFS_LI_BUD, &xfs_bud_item_ops);
 376        budp->bud_buip = buip;
 377        budp->bud_format.bud_bui_id = buip->bui_format.bui_id;
 378
 379        return budp;
 380}
 381
 382/*
 383 * Process a bmap update intent item that was recovered from the log.
 384 * We need to update some inode's bmbt.
 385 */
 386int
 387xfs_bui_recover(
 388        struct xfs_mount                *mp,
 389        struct xfs_bui_log_item         *buip)
 390{
 391        int                             error = 0;
 392        unsigned int                    bui_type;
 393        struct xfs_map_extent           *bmap;
 394        xfs_fsblock_t                   startblock_fsb;
 395        xfs_fsblock_t                   inode_fsb;
 396        bool                            op_ok;
 397        struct xfs_bud_log_item         *budp;
 398        enum xfs_bmap_intent_type       type;
 399        int                             whichfork;
 400        xfs_exntst_t                    state;
 401        struct xfs_trans                *tp;
 402        struct xfs_inode                *ip = NULL;
 403        struct xfs_defer_ops            dfops;
 404        xfs_fsblock_t                   firstfsb;
 405
 406        ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags));
 407
 408        /* Only one mapping operation per BUI... */
 409        if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
 410                set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
 411                xfs_bui_release(buip);
 412                return -EIO;
 413        }
 414
 415        /*
 416         * First check the validity of the extent described by the
 417         * BUI.  If anything is bad, then toss the BUI.
 418         */
 419        bmap = &buip->bui_format.bui_extents[0];
 420        startblock_fsb = XFS_BB_TO_FSB(mp,
 421                           XFS_FSB_TO_DADDR(mp, bmap->me_startblock));
 422        inode_fsb = XFS_BB_TO_FSB(mp, XFS_FSB_TO_DADDR(mp,
 423                        XFS_INO_TO_FSB(mp, bmap->me_owner)));
 424        switch (bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK) {
 425        case XFS_BMAP_MAP:
 426        case XFS_BMAP_UNMAP:
 427                op_ok = true;
 428                break;
 429        default:
 430                op_ok = false;
 431                break;
 432        }
 433        if (!op_ok || startblock_fsb == 0 ||
 434            bmap->me_len == 0 ||
 435            inode_fsb == 0 ||
 436            startblock_fsb >= mp->m_sb.sb_dblocks ||
 437            bmap->me_len >= mp->m_sb.sb_agblocks ||
 438            inode_fsb >= mp->m_sb.sb_dblocks ||
 439            (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS)) {
 440                /*
 441                 * This will pull the BUI from the AIL and
 442                 * free the memory associated with it.
 443                 */
 444                set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
 445                xfs_bui_release(buip);
 446                return -EIO;
 447        }
 448
 449        error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
 450        if (error)
 451                return error;
 452        budp = xfs_trans_get_bud(tp, buip);
 453
 454        /* Grab the inode. */
 455        error = xfs_iget(mp, tp, bmap->me_owner, 0, XFS_ILOCK_EXCL, &ip);
 456        if (error)
 457                goto err_inode;
 458
 459        if (VFS_I(ip)->i_nlink == 0)
 460                xfs_iflags_set(ip, XFS_IRECOVERY);
 461        xfs_defer_init(&dfops, &firstfsb);
 462
 463        /* Process deferred bmap item. */
 464        state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
 465                        XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
 466        whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
 467                        XFS_ATTR_FORK : XFS_DATA_FORK;
 468        bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
 469        switch (bui_type) {
 470        case XFS_BMAP_MAP:
 471        case XFS_BMAP_UNMAP:
 472                type = bui_type;
 473                break;
 474        default:
 475                error = -EFSCORRUPTED;
 476                goto err_dfops;
 477        }
 478        xfs_trans_ijoin(tp, ip, 0);
 479
 480        error = xfs_trans_log_finish_bmap_update(tp, budp, &dfops, type,
 481                        ip, whichfork, bmap->me_startoff,
 482                        bmap->me_startblock, bmap->me_len,
 483                        state);
 484        if (error)
 485                goto err_dfops;
 486
 487        /* Finish transaction, free inodes. */
 488        error = xfs_defer_finish(&tp, &dfops, NULL);
 489        if (error)
 490                goto err_dfops;
 491
 492        set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
 493        error = xfs_trans_commit(tp);
 494        xfs_iunlock(ip, XFS_ILOCK_EXCL);
 495        IRELE(ip);
 496
 497        return error;
 498
 499err_dfops:
 500        xfs_defer_cancel(&dfops);
 501err_inode:
 502        xfs_trans_cancel(tp);
 503        if (ip) {
 504                xfs_iunlock(ip, XFS_ILOCK_EXCL);
 505                IRELE(ip);
 506        }
 507        return error;
 508}
 509