linux/fs/xfs/scrub/dabtree.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * Copyright (C) 2017 Oracle.  All Rights Reserved.
   4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_trans_resv.h"
  11#include "xfs_mount.h"
  12#include "xfs_log_format.h"
  13#include "xfs_trans.h"
  14#include "xfs_inode.h"
  15#include "xfs_dir2.h"
  16#include "xfs_dir2_priv.h"
  17#include "xfs_attr_leaf.h"
  18#include "scrub/scrub.h"
  19#include "scrub/common.h"
  20#include "scrub/trace.h"
  21#include "scrub/dabtree.h"
  22
  23/* Directory/Attribute Btree */
  24
  25/*
  26 * Check for da btree operation errors.  See the section about handling
  27 * operational errors in common.c.
  28 */
  29bool
  30xchk_da_process_error(
  31        struct xchk_da_btree    *ds,
  32        int                     level,
  33        int                     *error)
  34{
  35        struct xfs_scrub        *sc = ds->sc;
  36
  37        if (*error == 0)
  38                return true;
  39
  40        switch (*error) {
  41        case -EDEADLOCK:
  42                /* Used to restart an op with deadlock avoidance. */
  43                trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
  44                break;
  45        case -EFSBADCRC:
  46        case -EFSCORRUPTED:
  47                /* Note the badness but don't abort. */
  48                sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  49                *error = 0;
  50                fallthrough;
  51        default:
  52                trace_xchk_file_op_error(sc, ds->dargs.whichfork,
  53                                xfs_dir2_da_to_db(ds->dargs.geo,
  54                                        ds->state->path.blk[level].blkno),
  55                                *error, __return_address);
  56                break;
  57        }
  58        return false;
  59}
  60
  61/*
  62 * Check for da btree corruption.  See the section about handling
  63 * operational errors in common.c.
  64 */
  65void
  66xchk_da_set_corrupt(
  67        struct xchk_da_btree    *ds,
  68        int                     level)
  69{
  70        struct xfs_scrub        *sc = ds->sc;
  71
  72        sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  73
  74        trace_xchk_fblock_error(sc, ds->dargs.whichfork,
  75                        xfs_dir2_da_to_db(ds->dargs.geo,
  76                                ds->state->path.blk[level].blkno),
  77                        __return_address);
  78}
  79
  80static struct xfs_da_node_entry *
  81xchk_da_btree_node_entry(
  82        struct xchk_da_btree            *ds,
  83        int                             level)
  84{
  85        struct xfs_da_state_blk         *blk = &ds->state->path.blk[level];
  86        struct xfs_da3_icnode_hdr       hdr;
  87
  88        ASSERT(blk->magic == XFS_DA_NODE_MAGIC);
  89
  90        xfs_da3_node_hdr_from_disk(ds->sc->mp, &hdr, blk->bp->b_addr);
  91        return hdr.btree + blk->index;
  92}
  93
  94/* Scrub a da btree hash (key). */
  95int
  96xchk_da_btree_hash(
  97        struct xchk_da_btree            *ds,
  98        int                             level,
  99        __be32                          *hashp)
 100{
 101        struct xfs_da_node_entry        *entry;
 102        xfs_dahash_t                    hash;
 103        xfs_dahash_t                    parent_hash;
 104
 105        /* Is this hash in order? */
 106        hash = be32_to_cpu(*hashp);
 107        if (hash < ds->hashes[level])
 108                xchk_da_set_corrupt(ds, level);
 109        ds->hashes[level] = hash;
 110
 111        if (level == 0)
 112                return 0;
 113
 114        /* Is this hash no larger than the parent hash? */
 115        entry = xchk_da_btree_node_entry(ds, level - 1);
 116        parent_hash = be32_to_cpu(entry->hashval);
 117        if (parent_hash < hash)
 118                xchk_da_set_corrupt(ds, level);
 119
 120        return 0;
 121}
 122
 123/*
 124 * Check a da btree pointer.  Returns true if it's ok to use this
 125 * pointer.
 126 */
 127STATIC bool
 128xchk_da_btree_ptr_ok(
 129        struct xchk_da_btree    *ds,
 130        int                     level,
 131        xfs_dablk_t             blkno)
 132{
 133        if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) {
 134                xchk_da_set_corrupt(ds, level);
 135                return false;
 136        }
 137
 138        return true;
 139}
 140
 141/*
 142 * The da btree scrubber can handle leaf1 blocks as a degenerate
 143 * form of leafn blocks.  Since the regular da code doesn't handle
 144 * leaf1, we must multiplex the verifiers.
 145 */
 146static void
 147xchk_da_btree_read_verify(
 148        struct xfs_buf          *bp)
 149{
 150        struct xfs_da_blkinfo   *info = bp->b_addr;
 151
 152        switch (be16_to_cpu(info->magic)) {
 153        case XFS_DIR2_LEAF1_MAGIC:
 154        case XFS_DIR3_LEAF1_MAGIC:
 155                bp->b_ops = &xfs_dir3_leaf1_buf_ops;
 156                bp->b_ops->verify_read(bp);
 157                return;
 158        default:
 159                /*
 160                 * xfs_da3_node_buf_ops already know how to handle
 161                 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
 162                 */
 163                bp->b_ops = &xfs_da3_node_buf_ops;
 164                bp->b_ops->verify_read(bp);
 165                return;
 166        }
 167}
 168static void
 169xchk_da_btree_write_verify(
 170        struct xfs_buf          *bp)
 171{
 172        struct xfs_da_blkinfo   *info = bp->b_addr;
 173
 174        switch (be16_to_cpu(info->magic)) {
 175        case XFS_DIR2_LEAF1_MAGIC:
 176        case XFS_DIR3_LEAF1_MAGIC:
 177                bp->b_ops = &xfs_dir3_leaf1_buf_ops;
 178                bp->b_ops->verify_write(bp);
 179                return;
 180        default:
 181                /*
 182                 * xfs_da3_node_buf_ops already know how to handle
 183                 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
 184                 */
 185                bp->b_ops = &xfs_da3_node_buf_ops;
 186                bp->b_ops->verify_write(bp);
 187                return;
 188        }
 189}
 190static void *
 191xchk_da_btree_verify(
 192        struct xfs_buf          *bp)
 193{
 194        struct xfs_da_blkinfo   *info = bp->b_addr;
 195
 196        switch (be16_to_cpu(info->magic)) {
 197        case XFS_DIR2_LEAF1_MAGIC:
 198        case XFS_DIR3_LEAF1_MAGIC:
 199                bp->b_ops = &xfs_dir3_leaf1_buf_ops;
 200                return bp->b_ops->verify_struct(bp);
 201        default:
 202                bp->b_ops = &xfs_da3_node_buf_ops;
 203                return bp->b_ops->verify_struct(bp);
 204        }
 205}
 206
 207static const struct xfs_buf_ops xchk_da_btree_buf_ops = {
 208        .name = "xchk_da_btree",
 209        .verify_read = xchk_da_btree_read_verify,
 210        .verify_write = xchk_da_btree_write_verify,
 211        .verify_struct = xchk_da_btree_verify,
 212};
 213
 214/* Check a block's sibling. */
 215STATIC int
 216xchk_da_btree_block_check_sibling(
 217        struct xchk_da_btree    *ds,
 218        int                     level,
 219        int                     direction,
 220        xfs_dablk_t             sibling)
 221{
 222        struct xfs_da_state_path *path = &ds->state->path;
 223        struct xfs_da_state_path *altpath = &ds->state->altpath;
 224        int                     retval;
 225        int                     plevel;
 226        int                     error;
 227
 228        memcpy(altpath, path, sizeof(ds->state->altpath));
 229
 230        /*
 231         * If the pointer is null, we shouldn't be able to move the upper
 232         * level pointer anywhere.
 233         */
 234        if (sibling == 0) {
 235                error = xfs_da3_path_shift(ds->state, altpath, direction,
 236                                false, &retval);
 237                if (error == 0 && retval == 0)
 238                        xchk_da_set_corrupt(ds, level);
 239                error = 0;
 240                goto out;
 241        }
 242
 243        /* Move the alternate cursor one block in the direction given. */
 244        error = xfs_da3_path_shift(ds->state, altpath, direction, false,
 245                        &retval);
 246        if (!xchk_da_process_error(ds, level, &error))
 247                goto out;
 248        if (retval) {
 249                xchk_da_set_corrupt(ds, level);
 250                goto out;
 251        }
 252        if (altpath->blk[level].bp)
 253                xchk_buffer_recheck(ds->sc, altpath->blk[level].bp);
 254
 255        /* Compare upper level pointer to sibling pointer. */
 256        if (altpath->blk[level].blkno != sibling)
 257                xchk_da_set_corrupt(ds, level);
 258
 259out:
 260        /* Free all buffers in the altpath that aren't referenced from path. */
 261        for (plevel = 0; plevel < altpath->active; plevel++) {
 262                if (altpath->blk[plevel].bp == NULL ||
 263                    (plevel < path->active &&
 264                     altpath->blk[plevel].bp == path->blk[plevel].bp))
 265                        continue;
 266
 267                xfs_trans_brelse(ds->dargs.trans, altpath->blk[plevel].bp);
 268                altpath->blk[plevel].bp = NULL;
 269        }
 270
 271        return error;
 272}
 273
 274/* Check a block's sibling pointers. */
 275STATIC int
 276xchk_da_btree_block_check_siblings(
 277        struct xchk_da_btree    *ds,
 278        int                     level,
 279        struct xfs_da_blkinfo   *hdr)
 280{
 281        xfs_dablk_t             forw;
 282        xfs_dablk_t             back;
 283        int                     error = 0;
 284
 285        forw = be32_to_cpu(hdr->forw);
 286        back = be32_to_cpu(hdr->back);
 287
 288        /* Top level blocks should not have sibling pointers. */
 289        if (level == 0) {
 290                if (forw != 0 || back != 0)
 291                        xchk_da_set_corrupt(ds, level);
 292                return 0;
 293        }
 294
 295        /*
 296         * Check back (left) and forw (right) pointers.  These functions
 297         * absorb error codes for us.
 298         */
 299        error = xchk_da_btree_block_check_sibling(ds, level, 0, back);
 300        if (error)
 301                goto out;
 302        error = xchk_da_btree_block_check_sibling(ds, level, 1, forw);
 303
 304out:
 305        memset(&ds->state->altpath, 0, sizeof(ds->state->altpath));
 306        return error;
 307}
 308
 309/* Load a dir/attribute block from a btree. */
 310STATIC int
 311xchk_da_btree_block(
 312        struct xchk_da_btree            *ds,
 313        int                             level,
 314        xfs_dablk_t                     blkno)
 315{
 316        struct xfs_da_state_blk         *blk;
 317        struct xfs_da_intnode           *node;
 318        struct xfs_da_node_entry        *btree;
 319        struct xfs_da3_blkinfo          *hdr3;
 320        struct xfs_da_args              *dargs = &ds->dargs;
 321        struct xfs_inode                *ip = ds->dargs.dp;
 322        xfs_ino_t                       owner;
 323        int                             *pmaxrecs;
 324        struct xfs_da3_icnode_hdr       nodehdr;
 325        int                             error = 0;
 326
 327        blk = &ds->state->path.blk[level];
 328        ds->state->path.active = level + 1;
 329
 330        /* Release old block. */
 331        if (blk->bp) {
 332                xfs_trans_brelse(dargs->trans, blk->bp);
 333                blk->bp = NULL;
 334        }
 335
 336        /* Check the pointer. */
 337        blk->blkno = blkno;
 338        if (!xchk_da_btree_ptr_ok(ds, level, blkno))
 339                goto out_nobuf;
 340
 341        /* Read the buffer. */
 342        error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno,
 343                        XFS_DABUF_MAP_HOLE_OK, &blk->bp, dargs->whichfork,
 344                        &xchk_da_btree_buf_ops);
 345        if (!xchk_da_process_error(ds, level, &error))
 346                goto out_nobuf;
 347        if (blk->bp)
 348                xchk_buffer_recheck(ds->sc, blk->bp);
 349
 350        /*
 351         * We didn't find a dir btree root block, which means that
 352         * there's no LEAF1/LEAFN tree (at least not where it's supposed
 353         * to be), so jump out now.
 354         */
 355        if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 &&
 356                        blk->bp == NULL)
 357                goto out_nobuf;
 358
 359        /* It's /not/ ok for attr trees not to have a da btree. */
 360        if (blk->bp == NULL) {
 361                xchk_da_set_corrupt(ds, level);
 362                goto out_nobuf;
 363        }
 364
 365        hdr3 = blk->bp->b_addr;
 366        blk->magic = be16_to_cpu(hdr3->hdr.magic);
 367        pmaxrecs = &ds->maxrecs[level];
 368
 369        /* We only started zeroing the header on v5 filesystems. */
 370        if (xfs_has_crc(ds->sc->mp) && hdr3->hdr.pad)
 371                xchk_da_set_corrupt(ds, level);
 372
 373        /* Check the owner. */
 374        if (xfs_has_crc(ip->i_mount)) {
 375                owner = be64_to_cpu(hdr3->owner);
 376                if (owner != ip->i_ino)
 377                        xchk_da_set_corrupt(ds, level);
 378        }
 379
 380        /* Check the siblings. */
 381        error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
 382        if (error)
 383                goto out;
 384
 385        /* Interpret the buffer. */
 386        switch (blk->magic) {
 387        case XFS_ATTR_LEAF_MAGIC:
 388        case XFS_ATTR3_LEAF_MAGIC:
 389                xfs_trans_buf_set_type(dargs->trans, blk->bp,
 390                                XFS_BLFT_ATTR_LEAF_BUF);
 391                blk->magic = XFS_ATTR_LEAF_MAGIC;
 392                blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs);
 393                if (ds->tree_level != 0)
 394                        xchk_da_set_corrupt(ds, level);
 395                break;
 396        case XFS_DIR2_LEAFN_MAGIC:
 397        case XFS_DIR3_LEAFN_MAGIC:
 398                xfs_trans_buf_set_type(dargs->trans, blk->bp,
 399                                XFS_BLFT_DIR_LEAFN_BUF);
 400                blk->magic = XFS_DIR2_LEAFN_MAGIC;
 401                blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
 402                if (ds->tree_level != 0)
 403                        xchk_da_set_corrupt(ds, level);
 404                break;
 405        case XFS_DIR2_LEAF1_MAGIC:
 406        case XFS_DIR3_LEAF1_MAGIC:
 407                xfs_trans_buf_set_type(dargs->trans, blk->bp,
 408                                XFS_BLFT_DIR_LEAF1_BUF);
 409                blk->magic = XFS_DIR2_LEAF1_MAGIC;
 410                blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
 411                if (ds->tree_level != 0)
 412                        xchk_da_set_corrupt(ds, level);
 413                break;
 414        case XFS_DA_NODE_MAGIC:
 415        case XFS_DA3_NODE_MAGIC:
 416                xfs_trans_buf_set_type(dargs->trans, blk->bp,
 417                                XFS_BLFT_DA_NODE_BUF);
 418                blk->magic = XFS_DA_NODE_MAGIC;
 419                node = blk->bp->b_addr;
 420                xfs_da3_node_hdr_from_disk(ip->i_mount, &nodehdr, node);
 421                btree = nodehdr.btree;
 422                *pmaxrecs = nodehdr.count;
 423                blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
 424                if (level == 0) {
 425                        if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
 426                                xchk_da_set_corrupt(ds, level);
 427                                goto out_freebp;
 428                        }
 429                        ds->tree_level = nodehdr.level;
 430                } else {
 431                        if (ds->tree_level != nodehdr.level) {
 432                                xchk_da_set_corrupt(ds, level);
 433                                goto out_freebp;
 434                        }
 435                }
 436
 437                /* XXX: Check hdr3.pad32 once we know how to fix it. */
 438                break;
 439        default:
 440                xchk_da_set_corrupt(ds, level);
 441                goto out_freebp;
 442        }
 443
 444        /*
 445         * If we've been handed a block that is below the dabtree root, does
 446         * its hashval match what the parent block expected to see?
 447         */
 448        if (level > 0) {
 449                struct xfs_da_node_entry        *key;
 450
 451                key = xchk_da_btree_node_entry(ds, level - 1);
 452                if (be32_to_cpu(key->hashval) != blk->hashval) {
 453                        xchk_da_set_corrupt(ds, level);
 454                        goto out_freebp;
 455                }
 456        }
 457
 458out:
 459        return error;
 460out_freebp:
 461        xfs_trans_brelse(dargs->trans, blk->bp);
 462        blk->bp = NULL;
 463out_nobuf:
 464        blk->blkno = 0;
 465        return error;
 466}
 467
 468/* Visit all nodes and leaves of a da btree. */
 469int
 470xchk_da_btree(
 471        struct xfs_scrub                *sc,
 472        int                             whichfork,
 473        xchk_da_btree_rec_fn            scrub_fn,
 474        void                            *private)
 475{
 476        struct xchk_da_btree            ds = {};
 477        struct xfs_mount                *mp = sc->mp;
 478        struct xfs_da_state_blk         *blks;
 479        struct xfs_da_node_entry        *key;
 480        xfs_dablk_t                     blkno;
 481        int                             level;
 482        int                             error;
 483
 484        /* Skip short format data structures; no btree to scan. */
 485        if (!xfs_ifork_has_extents(XFS_IFORK_PTR(sc->ip, whichfork)))
 486                return 0;
 487
 488        /* Set up initial da state. */
 489        ds.dargs.dp = sc->ip;
 490        ds.dargs.whichfork = whichfork;
 491        ds.dargs.trans = sc->tp;
 492        ds.dargs.op_flags = XFS_DA_OP_OKNOENT;
 493        ds.state = xfs_da_state_alloc(&ds.dargs);
 494        ds.sc = sc;
 495        ds.private = private;
 496        if (whichfork == XFS_ATTR_FORK) {
 497                ds.dargs.geo = mp->m_attr_geo;
 498                ds.lowest = 0;
 499                ds.highest = 0;
 500        } else {
 501                ds.dargs.geo = mp->m_dir_geo;
 502                ds.lowest = ds.dargs.geo->leafblk;
 503                ds.highest = ds.dargs.geo->freeblk;
 504        }
 505        blkno = ds.lowest;
 506        level = 0;
 507
 508        /* Find the root of the da tree, if present. */
 509        blks = ds.state->path.blk;
 510        error = xchk_da_btree_block(&ds, level, blkno);
 511        if (error)
 512                goto out_state;
 513        /*
 514         * We didn't find a block at ds.lowest, which means that there's
 515         * no LEAF1/LEAFN tree (at least not where it's supposed to be),
 516         * so jump out now.
 517         */
 518        if (blks[level].bp == NULL)
 519                goto out_state;
 520
 521        blks[level].index = 0;
 522        while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) {
 523                /* Handle leaf block. */
 524                if (blks[level].magic != XFS_DA_NODE_MAGIC) {
 525                        /* End of leaf, pop back towards the root. */
 526                        if (blks[level].index >= ds.maxrecs[level]) {
 527                                if (level > 0)
 528                                        blks[level - 1].index++;
 529                                ds.tree_level++;
 530                                level--;
 531                                continue;
 532                        }
 533
 534                        /* Dispatch record scrubbing. */
 535                        error = scrub_fn(&ds, level);
 536                        if (error)
 537                                break;
 538                        if (xchk_should_terminate(sc, &error) ||
 539                            (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
 540                                break;
 541
 542                        blks[level].index++;
 543                        continue;
 544                }
 545
 546
 547                /* End of node, pop back towards the root. */
 548                if (blks[level].index >= ds.maxrecs[level]) {
 549                        if (level > 0)
 550                                blks[level - 1].index++;
 551                        ds.tree_level++;
 552                        level--;
 553                        continue;
 554                }
 555
 556                /* Hashes in order for scrub? */
 557                key = xchk_da_btree_node_entry(&ds, level);
 558                error = xchk_da_btree_hash(&ds, level, &key->hashval);
 559                if (error)
 560                        goto out;
 561
 562                /* Drill another level deeper. */
 563                blkno = be32_to_cpu(key->before);
 564                level++;
 565                if (level >= XFS_DA_NODE_MAXDEPTH) {
 566                        /* Too deep! */
 567                        xchk_da_set_corrupt(&ds, level - 1);
 568                        break;
 569                }
 570                ds.tree_level--;
 571                error = xchk_da_btree_block(&ds, level, blkno);
 572                if (error)
 573                        goto out;
 574                if (blks[level].bp == NULL)
 575                        goto out;
 576
 577                blks[level].index = 0;
 578        }
 579
 580out:
 581        /* Release all the buffers we're tracking. */
 582        for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) {
 583                if (blks[level].bp == NULL)
 584                        continue;
 585                xfs_trans_brelse(sc->tp, blks[level].bp);
 586                blks[level].bp = NULL;
 587        }
 588
 589out_state:
 590        xfs_da_state_free(ds.state);
 591        return error;
 592}
 593