linux/fs/xfs/scrub/common.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * Copyright (C) 2017 Oracle.  All Rights Reserved.
   4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_trans_resv.h"
  11#include "xfs_mount.h"
  12#include "xfs_defer.h"
  13#include "xfs_btree.h"
  14#include "xfs_bit.h"
  15#include "xfs_log_format.h"
  16#include "xfs_trans.h"
  17#include "xfs_sb.h"
  18#include "xfs_inode.h"
  19#include "xfs_icache.h"
  20#include "xfs_itable.h"
  21#include "xfs_alloc.h"
  22#include "xfs_alloc_btree.h"
  23#include "xfs_bmap.h"
  24#include "xfs_bmap_btree.h"
  25#include "xfs_ialloc.h"
  26#include "xfs_ialloc_btree.h"
  27#include "xfs_refcount.h"
  28#include "xfs_refcount_btree.h"
  29#include "xfs_rmap.h"
  30#include "xfs_rmap_btree.h"
  31#include "xfs_log.h"
  32#include "xfs_trans_priv.h"
  33#include "xfs_attr.h"
  34#include "xfs_reflink.h"
  35#include "scrub/xfs_scrub.h"
  36#include "scrub/scrub.h"
  37#include "scrub/common.h"
  38#include "scrub/trace.h"
  39#include "scrub/btree.h"
  40#include "scrub/repair.h"
  41#include "scrub/health.h"
  42
  43/* Common code for the metadata scrubbers. */
  44
  45/*
  46 * Handling operational errors.
  47 *
  48 * The *_process_error() family of functions are used to process error return
  49 * codes from functions called as part of a scrub operation.
  50 *
  51 * If there's no error, we return true to tell the caller that it's ok
  52 * to move on to the next check in its list.
  53 *
  54 * For non-verifier errors (e.g. ENOMEM) we return false to tell the
  55 * caller that something bad happened, and we preserve *error so that
  56 * the caller can return the *error up the stack to userspace.
  57 *
  58 * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting
  59 * OFLAG_CORRUPT in sm_flags and the *error is cleared.  In other words,
  60 * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT,
  61 * not via return codes.  We return false to tell the caller that
  62 * something bad happened.  Since the error has been cleared, the caller
  63 * will (presumably) return that zero and scrubbing will move on to
  64 * whatever's next.
  65 *
  66 * ftrace can be used to record the precise metadata location and the
  67 * approximate code location of the failed operation.
  68 */
  69
  70/* Check for operational errors. */
  71static bool
  72__xchk_process_error(
  73        struct xfs_scrub        *sc,
  74        xfs_agnumber_t          agno,
  75        xfs_agblock_t           bno,
  76        int                     *error,
  77        __u32                   errflag,
  78        void                    *ret_ip)
  79{
  80        switch (*error) {
  81        case 0:
  82                return true;
  83        case -EDEADLOCK:
  84                /* Used to restart an op with deadlock avoidance. */
  85                trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
  86                break;
  87        case -EFSBADCRC:
  88        case -EFSCORRUPTED:
  89                /* Note the badness but don't abort. */
  90                sc->sm->sm_flags |= errflag;
  91                *error = 0;
  92                /* fall through */
  93        default:
  94                trace_xchk_op_error(sc, agno, bno, *error,
  95                                ret_ip);
  96                break;
  97        }
  98        return false;
  99}
 100
 101bool
 102xchk_process_error(
 103        struct xfs_scrub        *sc,
 104        xfs_agnumber_t          agno,
 105        xfs_agblock_t           bno,
 106        int                     *error)
 107{
 108        return __xchk_process_error(sc, agno, bno, error,
 109                        XFS_SCRUB_OFLAG_CORRUPT, __return_address);
 110}
 111
 112bool
 113xchk_xref_process_error(
 114        struct xfs_scrub        *sc,
 115        xfs_agnumber_t          agno,
 116        xfs_agblock_t           bno,
 117        int                     *error)
 118{
 119        return __xchk_process_error(sc, agno, bno, error,
 120                        XFS_SCRUB_OFLAG_XFAIL, __return_address);
 121}
 122
 123/* Check for operational errors for a file offset. */
 124static bool
 125__xchk_fblock_process_error(
 126        struct xfs_scrub        *sc,
 127        int                     whichfork,
 128        xfs_fileoff_t           offset,
 129        int                     *error,
 130        __u32                   errflag,
 131        void                    *ret_ip)
 132{
 133        switch (*error) {
 134        case 0:
 135                return true;
 136        case -EDEADLOCK:
 137                /* Used to restart an op with deadlock avoidance. */
 138                trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
 139                break;
 140        case -EFSBADCRC:
 141        case -EFSCORRUPTED:
 142                /* Note the badness but don't abort. */
 143                sc->sm->sm_flags |= errflag;
 144                *error = 0;
 145                /* fall through */
 146        default:
 147                trace_xchk_file_op_error(sc, whichfork, offset, *error,
 148                                ret_ip);
 149                break;
 150        }
 151        return false;
 152}
 153
 154bool
 155xchk_fblock_process_error(
 156        struct xfs_scrub        *sc,
 157        int                     whichfork,
 158        xfs_fileoff_t           offset,
 159        int                     *error)
 160{
 161        return __xchk_fblock_process_error(sc, whichfork, offset, error,
 162                        XFS_SCRUB_OFLAG_CORRUPT, __return_address);
 163}
 164
 165bool
 166xchk_fblock_xref_process_error(
 167        struct xfs_scrub        *sc,
 168        int                     whichfork,
 169        xfs_fileoff_t           offset,
 170        int                     *error)
 171{
 172        return __xchk_fblock_process_error(sc, whichfork, offset, error,
 173                        XFS_SCRUB_OFLAG_XFAIL, __return_address);
 174}
 175
 176/*
 177 * Handling scrub corruption/optimization/warning checks.
 178 *
 179 * The *_set_{corrupt,preen,warning}() family of functions are used to
 180 * record the presence of metadata that is incorrect (corrupt), could be
 181 * optimized somehow (preen), or should be flagged for administrative
 182 * review but is not incorrect (warn).
 183 *
 184 * ftrace can be used to record the precise metadata location and
 185 * approximate code location of the failed check.
 186 */
 187
 188/* Record a block which could be optimized. */
 189void
 190xchk_block_set_preen(
 191        struct xfs_scrub        *sc,
 192        struct xfs_buf          *bp)
 193{
 194        sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
 195        trace_xchk_block_preen(sc, bp->b_bn, __return_address);
 196}
 197
 198/*
 199 * Record an inode which could be optimized.  The trace data will
 200 * include the block given by bp if bp is given; otherwise it will use
 201 * the block location of the inode record itself.
 202 */
 203void
 204xchk_ino_set_preen(
 205        struct xfs_scrub        *sc,
 206        xfs_ino_t               ino)
 207{
 208        sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
 209        trace_xchk_ino_preen(sc, ino, __return_address);
 210}
 211
 212/* Record something being wrong with the filesystem primary superblock. */
 213void
 214xchk_set_corrupt(
 215        struct xfs_scrub        *sc)
 216{
 217        sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
 218        trace_xchk_fs_error(sc, 0, __return_address);
 219}
 220
 221/* Record a corrupt block. */
 222void
 223xchk_block_set_corrupt(
 224        struct xfs_scrub        *sc,
 225        struct xfs_buf          *bp)
 226{
 227        sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
 228        trace_xchk_block_error(sc, bp->b_bn, __return_address);
 229}
 230
 231/* Record a corruption while cross-referencing. */
 232void
 233xchk_block_xref_set_corrupt(
 234        struct xfs_scrub        *sc,
 235        struct xfs_buf          *bp)
 236{
 237        sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
 238        trace_xchk_block_error(sc, bp->b_bn, __return_address);
 239}
 240
 241/*
 242 * Record a corrupt inode.  The trace data will include the block given
 243 * by bp if bp is given; otherwise it will use the block location of the
 244 * inode record itself.
 245 */
 246void
 247xchk_ino_set_corrupt(
 248        struct xfs_scrub        *sc,
 249        xfs_ino_t               ino)
 250{
 251        sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
 252        trace_xchk_ino_error(sc, ino, __return_address);
 253}
 254
 255/* Record a corruption while cross-referencing with an inode. */
 256void
 257xchk_ino_xref_set_corrupt(
 258        struct xfs_scrub        *sc,
 259        xfs_ino_t               ino)
 260{
 261        sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
 262        trace_xchk_ino_error(sc, ino, __return_address);
 263}
 264
 265/* Record corruption in a block indexed by a file fork. */
 266void
 267xchk_fblock_set_corrupt(
 268        struct xfs_scrub        *sc,
 269        int                     whichfork,
 270        xfs_fileoff_t           offset)
 271{
 272        sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
 273        trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
 274}
 275
 276/* Record a corruption while cross-referencing a fork block. */
 277void
 278xchk_fblock_xref_set_corrupt(
 279        struct xfs_scrub        *sc,
 280        int                     whichfork,
 281        xfs_fileoff_t           offset)
 282{
 283        sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
 284        trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
 285}
 286
 287/*
 288 * Warn about inodes that need administrative review but is not
 289 * incorrect.
 290 */
 291void
 292xchk_ino_set_warning(
 293        struct xfs_scrub        *sc,
 294        xfs_ino_t               ino)
 295{
 296        sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
 297        trace_xchk_ino_warning(sc, ino, __return_address);
 298}
 299
 300/* Warn about a block indexed by a file fork that needs review. */
 301void
 302xchk_fblock_set_warning(
 303        struct xfs_scrub        *sc,
 304        int                     whichfork,
 305        xfs_fileoff_t           offset)
 306{
 307        sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
 308        trace_xchk_fblock_warning(sc, whichfork, offset, __return_address);
 309}
 310
 311/* Signal an incomplete scrub. */
 312void
 313xchk_set_incomplete(
 314        struct xfs_scrub        *sc)
 315{
 316        sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE;
 317        trace_xchk_incomplete(sc, __return_address);
 318}
 319
 320/*
 321 * rmap scrubbing -- compute the number of blocks with a given owner,
 322 * at least according to the reverse mapping data.
 323 */
 324
 325struct xchk_rmap_ownedby_info {
 326        const struct xfs_owner_info     *oinfo;
 327        xfs_filblks_t                   *blocks;
 328};
 329
 330STATIC int
 331xchk_count_rmap_ownedby_irec(
 332        struct xfs_btree_cur            *cur,
 333        struct xfs_rmap_irec            *rec,
 334        void                            *priv)
 335{
 336        struct xchk_rmap_ownedby_info   *sroi = priv;
 337        bool                            irec_attr;
 338        bool                            oinfo_attr;
 339
 340        irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK;
 341        oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK;
 342
 343        if (rec->rm_owner != sroi->oinfo->oi_owner)
 344                return 0;
 345
 346        if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr)
 347                (*sroi->blocks) += rec->rm_blockcount;
 348
 349        return 0;
 350}
 351
 352/*
 353 * Calculate the number of blocks the rmap thinks are owned by something.
 354 * The caller should pass us an rmapbt cursor.
 355 */
 356int
 357xchk_count_rmap_ownedby_ag(
 358        struct xfs_scrub                *sc,
 359        struct xfs_btree_cur            *cur,
 360        const struct xfs_owner_info     *oinfo,
 361        xfs_filblks_t                   *blocks)
 362{
 363        struct xchk_rmap_ownedby_info   sroi = {
 364                .oinfo                  = oinfo,
 365                .blocks                 = blocks,
 366        };
 367
 368        *blocks = 0;
 369        return xfs_rmap_query_all(cur, xchk_count_rmap_ownedby_irec,
 370                        &sroi);
 371}
 372
 373/*
 374 * AG scrubbing
 375 *
 376 * These helpers facilitate locking an allocation group's header
 377 * buffers, setting up cursors for all btrees that are present, and
 378 * cleaning everything up once we're through.
 379 */
 380
 381/* Decide if we want to return an AG header read failure. */
 382static inline bool
 383want_ag_read_header_failure(
 384        struct xfs_scrub        *sc,
 385        unsigned int            type)
 386{
 387        /* Return all AG header read failures when scanning btrees. */
 388        if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF &&
 389            sc->sm->sm_type != XFS_SCRUB_TYPE_AGFL &&
 390            sc->sm->sm_type != XFS_SCRUB_TYPE_AGI)
 391                return true;
 392        /*
 393         * If we're scanning a given type of AG header, we only want to
 394         * see read failures from that specific header.  We'd like the
 395         * other headers to cross-check them, but this isn't required.
 396         */
 397        if (sc->sm->sm_type == type)
 398                return true;
 399        return false;
 400}
 401
 402/*
 403 * Grab all the headers for an AG.
 404 *
 405 * The headers should be released by xchk_ag_free, but as a fail
 406 * safe we attach all the buffers we grab to the scrub transaction so
 407 * they'll all be freed when we cancel it.
 408 */
 409int
 410xchk_ag_read_headers(
 411        struct xfs_scrub        *sc,
 412        xfs_agnumber_t          agno,
 413        struct xfs_buf          **agi,
 414        struct xfs_buf          **agf,
 415        struct xfs_buf          **agfl)
 416{
 417        struct xfs_mount        *mp = sc->mp;
 418        int                     error;
 419
 420        error = xfs_ialloc_read_agi(mp, sc->tp, agno, agi);
 421        if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI))
 422                goto out;
 423
 424        error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, agf);
 425        if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF))
 426                goto out;
 427
 428        error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl);
 429        if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL))
 430                goto out;
 431        error = 0;
 432out:
 433        return error;
 434}
 435
 436/* Release all the AG btree cursors. */
 437void
 438xchk_ag_btcur_free(
 439        struct xchk_ag          *sa)
 440{
 441        if (sa->refc_cur)
 442                xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR);
 443        if (sa->rmap_cur)
 444                xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR);
 445        if (sa->fino_cur)
 446                xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR);
 447        if (sa->ino_cur)
 448                xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR);
 449        if (sa->cnt_cur)
 450                xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR);
 451        if (sa->bno_cur)
 452                xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR);
 453
 454        sa->refc_cur = NULL;
 455        sa->rmap_cur = NULL;
 456        sa->fino_cur = NULL;
 457        sa->ino_cur = NULL;
 458        sa->bno_cur = NULL;
 459        sa->cnt_cur = NULL;
 460}
 461
 462/* Initialize all the btree cursors for an AG. */
 463int
 464xchk_ag_btcur_init(
 465        struct xfs_scrub        *sc,
 466        struct xchk_ag          *sa)
 467{
 468        struct xfs_mount        *mp = sc->mp;
 469        xfs_agnumber_t          agno = sa->agno;
 470
 471        xchk_perag_get(sc->mp, sa);
 472        if (sa->agf_bp &&
 473            xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_BNO)) {
 474                /* Set up a bnobt cursor for cross-referencing. */
 475                sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
 476                                agno, XFS_BTNUM_BNO);
 477                if (!sa->bno_cur)
 478                        goto err;
 479        }
 480
 481        if (sa->agf_bp &&
 482            xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_CNT)) {
 483                /* Set up a cntbt cursor for cross-referencing. */
 484                sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
 485                                agno, XFS_BTNUM_CNT);
 486                if (!sa->cnt_cur)
 487                        goto err;
 488        }
 489
 490        /* Set up a inobt cursor for cross-referencing. */
 491        if (sa->agi_bp &&
 492            xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_INO)) {
 493                sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
 494                                        agno, XFS_BTNUM_INO);
 495                if (!sa->ino_cur)
 496                        goto err;
 497        }
 498
 499        /* Set up a finobt cursor for cross-referencing. */
 500        if (sa->agi_bp && xfs_sb_version_hasfinobt(&mp->m_sb) &&
 501            xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_FINO)) {
 502                sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
 503                                agno, XFS_BTNUM_FINO);
 504                if (!sa->fino_cur)
 505                        goto err;
 506        }
 507
 508        /* Set up a rmapbt cursor for cross-referencing. */
 509        if (sa->agf_bp && xfs_sb_version_hasrmapbt(&mp->m_sb) &&
 510            xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_RMAP)) {
 511                sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
 512                                agno);
 513                if (!sa->rmap_cur)
 514                        goto err;
 515        }
 516
 517        /* Set up a refcountbt cursor for cross-referencing. */
 518        if (sa->agf_bp && xfs_sb_version_hasreflink(&mp->m_sb) &&
 519            xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_REFC)) {
 520                sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
 521                                sa->agf_bp, agno);
 522                if (!sa->refc_cur)
 523                        goto err;
 524        }
 525
 526        return 0;
 527err:
 528        return -ENOMEM;
 529}
 530
 531/* Release the AG header context and btree cursors. */
 532void
 533xchk_ag_free(
 534        struct xfs_scrub        *sc,
 535        struct xchk_ag          *sa)
 536{
 537        xchk_ag_btcur_free(sa);
 538        if (sa->agfl_bp) {
 539                xfs_trans_brelse(sc->tp, sa->agfl_bp);
 540                sa->agfl_bp = NULL;
 541        }
 542        if (sa->agf_bp) {
 543                xfs_trans_brelse(sc->tp, sa->agf_bp);
 544                sa->agf_bp = NULL;
 545        }
 546        if (sa->agi_bp) {
 547                xfs_trans_brelse(sc->tp, sa->agi_bp);
 548                sa->agi_bp = NULL;
 549        }
 550        if (sa->pag) {
 551                xfs_perag_put(sa->pag);
 552                sa->pag = NULL;
 553        }
 554        sa->agno = NULLAGNUMBER;
 555}
 556
 557/*
 558 * For scrub, grab the AGI and the AGF headers, in that order.  Locking
 559 * order requires us to get the AGI before the AGF.  We use the
 560 * transaction to avoid deadlocking on crosslinked metadata buffers;
 561 * either the caller passes one in (bmap scrub) or we have to create a
 562 * transaction ourselves.
 563 */
 564int
 565xchk_ag_init(
 566        struct xfs_scrub        *sc,
 567        xfs_agnumber_t          agno,
 568        struct xchk_ag          *sa)
 569{
 570        int                     error;
 571
 572        sa->agno = agno;
 573        error = xchk_ag_read_headers(sc, agno, &sa->agi_bp,
 574                        &sa->agf_bp, &sa->agfl_bp);
 575        if (error)
 576                return error;
 577
 578        return xchk_ag_btcur_init(sc, sa);
 579}
 580
 581/*
 582 * Grab the per-ag structure if we haven't already gotten it.  Teardown of the
 583 * xchk_ag will release it for us.
 584 */
 585void
 586xchk_perag_get(
 587        struct xfs_mount        *mp,
 588        struct xchk_ag          *sa)
 589{
 590        if (!sa->pag)
 591                sa->pag = xfs_perag_get(mp, sa->agno);
 592}
 593
 594/* Per-scrubber setup functions */
 595
 596/*
 597 * Grab an empty transaction so that we can re-grab locked buffers if
 598 * one of our btrees turns out to be cyclic.
 599 *
 600 * If we're going to repair something, we need to ask for the largest possible
 601 * log reservation so that we can handle the worst case scenario for metadata
 602 * updates while rebuilding a metadata item.  We also need to reserve as many
 603 * blocks in the head transaction as we think we're going to need to rebuild
 604 * the metadata object.
 605 */
 606int
 607xchk_trans_alloc(
 608        struct xfs_scrub        *sc,
 609        uint                    resblks)
 610{
 611        if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
 612                return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
 613                                resblks, 0, 0, &sc->tp);
 614
 615        return xfs_trans_alloc_empty(sc->mp, &sc->tp);
 616}
 617
 618/* Set us up with a transaction and an empty context. */
 619int
 620xchk_setup_fs(
 621        struct xfs_scrub        *sc,
 622        struct xfs_inode        *ip)
 623{
 624        uint                    resblks;
 625
 626        resblks = xrep_calc_ag_resblks(sc);
 627        return xchk_trans_alloc(sc, resblks);
 628}
 629
 630/* Set us up with AG headers and btree cursors. */
 631int
 632xchk_setup_ag_btree(
 633        struct xfs_scrub        *sc,
 634        struct xfs_inode        *ip,
 635        bool                    force_log)
 636{
 637        struct xfs_mount        *mp = sc->mp;
 638        int                     error;
 639
 640        /*
 641         * If the caller asks us to checkpont the log, do so.  This
 642         * expensive operation should be performed infrequently and only
 643         * as a last resort.  Any caller that sets force_log should
 644         * document why they need to do so.
 645         */
 646        if (force_log) {
 647                error = xchk_checkpoint_log(mp);
 648                if (error)
 649                        return error;
 650        }
 651
 652        error = xchk_setup_fs(sc, ip);
 653        if (error)
 654                return error;
 655
 656        return xchk_ag_init(sc, sc->sm->sm_agno, &sc->sa);
 657}
 658
 659/* Push everything out of the log onto disk. */
 660int
 661xchk_checkpoint_log(
 662        struct xfs_mount        *mp)
 663{
 664        int                     error;
 665
 666        error = xfs_log_force(mp, XFS_LOG_SYNC);
 667        if (error)
 668                return error;
 669        xfs_ail_push_all_sync(mp->m_ail);
 670        return 0;
 671}
 672
 673/*
 674 * Given an inode and the scrub control structure, grab either the
 675 * inode referenced in the control structure or the inode passed in.
 676 * The inode is not locked.
 677 */
 678int
 679xchk_get_inode(
 680        struct xfs_scrub        *sc,
 681        struct xfs_inode        *ip_in)
 682{
 683        struct xfs_imap         imap;
 684        struct xfs_mount        *mp = sc->mp;
 685        struct xfs_inode        *ip = NULL;
 686        int                     error;
 687
 688        /* We want to scan the inode we already had opened. */
 689        if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
 690                sc->ip = ip_in;
 691                return 0;
 692        }
 693
 694        /* Look up the inode, see if the generation number matches. */
 695        if (xfs_internal_inum(mp, sc->sm->sm_ino))
 696                return -ENOENT;
 697        error = xfs_iget(mp, NULL, sc->sm->sm_ino,
 698                        XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip);
 699        switch (error) {
 700        case -ENOENT:
 701                /* Inode doesn't exist, just bail out. */
 702                return error;
 703        case 0:
 704                /* Got an inode, continue. */
 705                break;
 706        case -EINVAL:
 707                /*
 708                 * -EINVAL with IGET_UNTRUSTED could mean one of several
 709                 * things: userspace gave us an inode number that doesn't
 710                 * correspond to fs space, or doesn't have an inobt entry;
 711                 * or it could simply mean that the inode buffer failed the
 712                 * read verifiers.
 713                 *
 714                 * Try just the inode mapping lookup -- if it succeeds, then
 715                 * the inode buffer verifier failed and something needs fixing.
 716                 * Otherwise, we really couldn't find it so tell userspace
 717                 * that it no longer exists.
 718                 */
 719                error = xfs_imap(sc->mp, sc->tp, sc->sm->sm_ino, &imap,
 720                                XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE);
 721                if (error)
 722                        return -ENOENT;
 723                error = -EFSCORRUPTED;
 724                /* fall through */
 725        default:
 726                trace_xchk_op_error(sc,
 727                                XFS_INO_TO_AGNO(mp, sc->sm->sm_ino),
 728                                XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
 729                                error, __return_address);
 730                return error;
 731        }
 732        if (VFS_I(ip)->i_generation != sc->sm->sm_gen) {
 733                xfs_irele(ip);
 734                return -ENOENT;
 735        }
 736
 737        sc->ip = ip;
 738        return 0;
 739}
 740
 741/* Set us up to scrub a file's contents. */
 742int
 743xchk_setup_inode_contents(
 744        struct xfs_scrub        *sc,
 745        struct xfs_inode        *ip,
 746        unsigned int            resblks)
 747{
 748        int                     error;
 749
 750        error = xchk_get_inode(sc, ip);
 751        if (error)
 752                return error;
 753
 754        /* Got the inode, lock it and we're ready to go. */
 755        sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
 756        xfs_ilock(sc->ip, sc->ilock_flags);
 757        error = xchk_trans_alloc(sc, resblks);
 758        if (error)
 759                goto out;
 760        sc->ilock_flags |= XFS_ILOCK_EXCL;
 761        xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
 762
 763out:
 764        /* scrub teardown will unlock and release the inode for us */
 765        return error;
 766}
 767
 768/*
 769 * Predicate that decides if we need to evaluate the cross-reference check.
 770 * If there was an error accessing the cross-reference btree, just delete
 771 * the cursor and skip the check.
 772 */
 773bool
 774xchk_should_check_xref(
 775        struct xfs_scrub        *sc,
 776        int                     *error,
 777        struct xfs_btree_cur    **curpp)
 778{
 779        /* No point in xref if we already know we're corrupt. */
 780        if (xchk_skip_xref(sc->sm))
 781                return false;
 782
 783        if (*error == 0)
 784                return true;
 785
 786        if (curpp) {
 787                /* If we've already given up on xref, just bail out. */
 788                if (!*curpp)
 789                        return false;
 790
 791                /* xref error, delete cursor and bail out. */
 792                xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR);
 793                *curpp = NULL;
 794        }
 795
 796        sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
 797        trace_xchk_xref_error(sc, *error, __return_address);
 798
 799        /*
 800         * Errors encountered during cross-referencing with another
 801         * data structure should not cause this scrubber to abort.
 802         */
 803        *error = 0;
 804        return false;
 805}
 806
 807/* Run the structure verifiers on in-memory buffers to detect bad memory. */
 808void
 809xchk_buffer_recheck(
 810        struct xfs_scrub        *sc,
 811        struct xfs_buf          *bp)
 812{
 813        xfs_failaddr_t          fa;
 814
 815        if (bp->b_ops == NULL) {
 816                xchk_block_set_corrupt(sc, bp);
 817                return;
 818        }
 819        if (bp->b_ops->verify_struct == NULL) {
 820                xchk_set_incomplete(sc);
 821                return;
 822        }
 823        fa = bp->b_ops->verify_struct(bp);
 824        if (!fa)
 825                return;
 826        sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
 827        trace_xchk_block_error(sc, bp->b_bn, fa);
 828}
 829
 830/*
 831 * Scrub the attr/data forks of a metadata inode.  The metadata inode must be
 832 * pointed to by sc->ip and the ILOCK must be held.
 833 */
 834int
 835xchk_metadata_inode_forks(
 836        struct xfs_scrub        *sc)
 837{
 838        __u32                   smtype;
 839        bool                    shared;
 840        int                     error;
 841
 842        if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 843                return 0;
 844
 845        /* Metadata inodes don't live on the rt device. */
 846        if (sc->ip->i_d.di_flags & XFS_DIFLAG_REALTIME) {
 847                xchk_ino_set_corrupt(sc, sc->ip->i_ino);
 848                return 0;
 849        }
 850
 851        /* They should never participate in reflink. */
 852        if (xfs_is_reflink_inode(sc->ip)) {
 853                xchk_ino_set_corrupt(sc, sc->ip->i_ino);
 854                return 0;
 855        }
 856
 857        /* They also should never have extended attributes. */
 858        if (xfs_inode_hasattr(sc->ip)) {
 859                xchk_ino_set_corrupt(sc, sc->ip->i_ino);
 860                return 0;
 861        }
 862
 863        /* Invoke the data fork scrubber. */
 864        smtype = sc->sm->sm_type;
 865        sc->sm->sm_type = XFS_SCRUB_TYPE_BMBTD;
 866        error = xchk_bmap_data(sc);
 867        sc->sm->sm_type = smtype;
 868        if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
 869                return error;
 870
 871        /* Look for incorrect shared blocks. */
 872        if (xfs_sb_version_hasreflink(&sc->mp->m_sb)) {
 873                error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
 874                                &shared);
 875                if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0,
 876                                &error))
 877                        return error;
 878                if (shared)
 879                        xchk_ino_set_corrupt(sc, sc->ip->i_ino);
 880        }
 881
 882        return error;
 883}
 884
 885/*
 886 * Try to lock an inode in violation of the usual locking order rules.  For
 887 * example, trying to get the IOLOCK while in transaction context, or just
 888 * plain breaking AG-order or inode-order inode locking rules.  Either way,
 889 * the only way to avoid an ABBA deadlock is to use trylock and back off if
 890 * we can't.
 891 */
 892int
 893xchk_ilock_inverted(
 894        struct xfs_inode        *ip,
 895        uint                    lock_mode)
 896{
 897        int                     i;
 898
 899        for (i = 0; i < 20; i++) {
 900                if (xfs_ilock_nowait(ip, lock_mode))
 901                        return 0;
 902                delay(1);
 903        }
 904        return -EDEADLOCK;
 905}
 906
 907/* Pause background reaping of resources. */
 908void
 909xchk_stop_reaping(
 910        struct xfs_scrub        *sc)
 911{
 912        sc->flags |= XCHK_REAPING_DISABLED;
 913        xfs_stop_block_reaping(sc->mp);
 914}
 915
 916/* Restart background reaping of resources. */
 917void
 918xchk_start_reaping(
 919        struct xfs_scrub        *sc)
 920{
 921        xfs_start_block_reaping(sc->mp);
 922        sc->flags &= ~XCHK_REAPING_DISABLED;
 923}
 924