linux/fs/xfs/scrub/dabtree.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2017 Oracle.  All Rights Reserved.
   3 *
   4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; either version 2
   9 * of the License, or (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it would be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write the Free Software Foundation,
  18 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
  19 */
  20#include "xfs.h"
  21#include "xfs_fs.h"
  22#include "xfs_shared.h"
  23#include "xfs_format.h"
  24#include "xfs_trans_resv.h"
  25#include "xfs_mount.h"
  26#include "xfs_defer.h"
  27#include "xfs_btree.h"
  28#include "xfs_bit.h"
  29#include "xfs_log_format.h"
  30#include "xfs_trans.h"
  31#include "xfs_sb.h"
  32#include "xfs_inode.h"
  33#include "xfs_inode_fork.h"
  34#include "xfs_da_format.h"
  35#include "xfs_da_btree.h"
  36#include "xfs_dir2.h"
  37#include "xfs_dir2_priv.h"
  38#include "xfs_attr_leaf.h"
  39#include "scrub/xfs_scrub.h"
  40#include "scrub/scrub.h"
  41#include "scrub/common.h"
  42#include "scrub/trace.h"
  43#include "scrub/dabtree.h"
  44
  45/* Directory/Attribute Btree */
  46
  47/*
  48 * Check for da btree operation errors.  See the section about handling
  49 * operational errors in common.c.
  50 */
  51bool
  52xfs_scrub_da_process_error(
  53        struct xfs_scrub_da_btree       *ds,
  54        int                             level,
  55        int                             *error)
  56{
  57        struct xfs_scrub_context        *sc = ds->sc;
  58
  59        if (*error == 0)
  60                return true;
  61
  62        switch (*error) {
  63        case -EDEADLOCK:
  64                /* Used to restart an op with deadlock avoidance. */
  65                trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
  66                break;
  67        case -EFSBADCRC:
  68        case -EFSCORRUPTED:
  69                /* Note the badness but don't abort. */
  70                sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  71                *error = 0;
  72                /* fall through */
  73        default:
  74                trace_xfs_scrub_file_op_error(sc, ds->dargs.whichfork,
  75                                xfs_dir2_da_to_db(ds->dargs.geo,
  76                                        ds->state->path.blk[level].blkno),
  77                                *error, __return_address);
  78                break;
  79        }
  80        return false;
  81}
  82
  83/*
  84 * Check for da btree corruption.  See the section about handling
  85 * operational errors in common.c.
  86 */
  87void
  88xfs_scrub_da_set_corrupt(
  89        struct xfs_scrub_da_btree       *ds,
  90        int                             level)
  91{
  92        struct xfs_scrub_context        *sc = ds->sc;
  93
  94        sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
  95
  96        trace_xfs_scrub_fblock_error(sc, ds->dargs.whichfork,
  97                        xfs_dir2_da_to_db(ds->dargs.geo,
  98                                ds->state->path.blk[level].blkno),
  99                        __return_address);
 100}
 101
 102/* Find an entry at a certain level in a da btree. */
 103STATIC void *
 104xfs_scrub_da_btree_entry(
 105        struct xfs_scrub_da_btree       *ds,
 106        int                             level,
 107        int                             rec)
 108{
 109        char                            *ents;
 110        struct xfs_da_state_blk         *blk;
 111        void                            *baddr;
 112
 113        /* Dispatch the entry finding function. */
 114        blk = &ds->state->path.blk[level];
 115        baddr = blk->bp->b_addr;
 116        switch (blk->magic) {
 117        case XFS_ATTR_LEAF_MAGIC:
 118        case XFS_ATTR3_LEAF_MAGIC:
 119                ents = (char *)xfs_attr3_leaf_entryp(baddr);
 120                return ents + (rec * sizeof(struct xfs_attr_leaf_entry));
 121        case XFS_DIR2_LEAFN_MAGIC:
 122        case XFS_DIR3_LEAFN_MAGIC:
 123                ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr);
 124                return ents + (rec * sizeof(struct xfs_dir2_leaf_entry));
 125        case XFS_DIR2_LEAF1_MAGIC:
 126        case XFS_DIR3_LEAF1_MAGIC:
 127                ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr);
 128                return ents + (rec * sizeof(struct xfs_dir2_leaf_entry));
 129        case XFS_DA_NODE_MAGIC:
 130        case XFS_DA3_NODE_MAGIC:
 131                ents = (char *)ds->dargs.dp->d_ops->node_tree_p(baddr);
 132                return ents + (rec * sizeof(struct xfs_da_node_entry));
 133        }
 134
 135        return NULL;
 136}
 137
 138/* Scrub a da btree hash (key). */
 139int
 140xfs_scrub_da_btree_hash(
 141        struct xfs_scrub_da_btree       *ds,
 142        int                             level,
 143        __be32                          *hashp)
 144{
 145        struct xfs_da_state_blk         *blks;
 146        struct xfs_da_node_entry        *entry;
 147        xfs_dahash_t                    hash;
 148        xfs_dahash_t                    parent_hash;
 149
 150        /* Is this hash in order? */
 151        hash = be32_to_cpu(*hashp);
 152        if (hash < ds->hashes[level])
 153                xfs_scrub_da_set_corrupt(ds, level);
 154        ds->hashes[level] = hash;
 155
 156        if (level == 0)
 157                return 0;
 158
 159        /* Is this hash no larger than the parent hash? */
 160        blks = ds->state->path.blk;
 161        entry = xfs_scrub_da_btree_entry(ds, level - 1, blks[level - 1].index);
 162        parent_hash = be32_to_cpu(entry->hashval);
 163        if (parent_hash < hash)
 164                xfs_scrub_da_set_corrupt(ds, level);
 165
 166        return 0;
 167}
 168
 169/*
 170 * Check a da btree pointer.  Returns true if it's ok to use this
 171 * pointer.
 172 */
 173STATIC bool
 174xfs_scrub_da_btree_ptr_ok(
 175        struct xfs_scrub_da_btree       *ds,
 176        int                             level,
 177        xfs_dablk_t                     blkno)
 178{
 179        if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) {
 180                xfs_scrub_da_set_corrupt(ds, level);
 181                return false;
 182        }
 183
 184        return true;
 185}
 186
 187/*
 188 * The da btree scrubber can handle leaf1 blocks as a degenerate
 189 * form of leafn blocks.  Since the regular da code doesn't handle
 190 * leaf1, we must multiplex the verifiers.
 191 */
 192static void
 193xfs_scrub_da_btree_read_verify(
 194        struct xfs_buf          *bp)
 195{
 196        struct xfs_da_blkinfo   *info = bp->b_addr;
 197
 198        switch (be16_to_cpu(info->magic)) {
 199        case XFS_DIR2_LEAF1_MAGIC:
 200        case XFS_DIR3_LEAF1_MAGIC:
 201                bp->b_ops = &xfs_dir3_leaf1_buf_ops;
 202                bp->b_ops->verify_read(bp);
 203                return;
 204        default:
 205                /*
 206                 * xfs_da3_node_buf_ops already know how to handle
 207                 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
 208                 */
 209                bp->b_ops = &xfs_da3_node_buf_ops;
 210                bp->b_ops->verify_read(bp);
 211                return;
 212        }
 213}
 214static void
 215xfs_scrub_da_btree_write_verify(
 216        struct xfs_buf          *bp)
 217{
 218        struct xfs_da_blkinfo   *info = bp->b_addr;
 219
 220        switch (be16_to_cpu(info->magic)) {
 221        case XFS_DIR2_LEAF1_MAGIC:
 222        case XFS_DIR3_LEAF1_MAGIC:
 223                bp->b_ops = &xfs_dir3_leaf1_buf_ops;
 224                bp->b_ops->verify_write(bp);
 225                return;
 226        default:
 227                /*
 228                 * xfs_da3_node_buf_ops already know how to handle
 229                 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
 230                 */
 231                bp->b_ops = &xfs_da3_node_buf_ops;
 232                bp->b_ops->verify_write(bp);
 233                return;
 234        }
 235}
 236static void *
 237xfs_scrub_da_btree_verify(
 238        struct xfs_buf          *bp)
 239{
 240        struct xfs_da_blkinfo   *info = bp->b_addr;
 241
 242        switch (be16_to_cpu(info->magic)) {
 243        case XFS_DIR2_LEAF1_MAGIC:
 244        case XFS_DIR3_LEAF1_MAGIC:
 245                bp->b_ops = &xfs_dir3_leaf1_buf_ops;
 246                return bp->b_ops->verify_struct(bp);
 247        default:
 248                bp->b_ops = &xfs_da3_node_buf_ops;
 249                return bp->b_ops->verify_struct(bp);
 250        }
 251}
 252
 253static const struct xfs_buf_ops xfs_scrub_da_btree_buf_ops = {
 254        .name = "xfs_scrub_da_btree",
 255        .verify_read = xfs_scrub_da_btree_read_verify,
 256        .verify_write = xfs_scrub_da_btree_write_verify,
 257        .verify_struct = xfs_scrub_da_btree_verify,
 258};
 259
 260/* Check a block's sibling. */
 261STATIC int
 262xfs_scrub_da_btree_block_check_sibling(
 263        struct xfs_scrub_da_btree       *ds,
 264        int                             level,
 265        int                             direction,
 266        xfs_dablk_t                     sibling)
 267{
 268        int                             retval;
 269        int                             error;
 270
 271        memcpy(&ds->state->altpath, &ds->state->path,
 272                        sizeof(ds->state->altpath));
 273
 274        /*
 275         * If the pointer is null, we shouldn't be able to move the upper
 276         * level pointer anywhere.
 277         */
 278        if (sibling == 0) {
 279                error = xfs_da3_path_shift(ds->state, &ds->state->altpath,
 280                                direction, false, &retval);
 281                if (error == 0 && retval == 0)
 282                        xfs_scrub_da_set_corrupt(ds, level);
 283                error = 0;
 284                goto out;
 285        }
 286
 287        /* Move the alternate cursor one block in the direction given. */
 288        error = xfs_da3_path_shift(ds->state, &ds->state->altpath,
 289                        direction, false, &retval);
 290        if (!xfs_scrub_da_process_error(ds, level, &error))
 291                return error;
 292        if (retval) {
 293                xfs_scrub_da_set_corrupt(ds, level);
 294                return error;
 295        }
 296        if (ds->state->altpath.blk[level].bp)
 297                xfs_scrub_buffer_recheck(ds->sc,
 298                                ds->state->altpath.blk[level].bp);
 299
 300        /* Compare upper level pointer to sibling pointer. */
 301        if (ds->state->altpath.blk[level].blkno != sibling)
 302                xfs_scrub_da_set_corrupt(ds, level);
 303        xfs_trans_brelse(ds->dargs.trans, ds->state->altpath.blk[level].bp);
 304out:
 305        return error;
 306}
 307
 308/* Check a block's sibling pointers. */
 309STATIC int
 310xfs_scrub_da_btree_block_check_siblings(
 311        struct xfs_scrub_da_btree       *ds,
 312        int                             level,
 313        struct xfs_da_blkinfo           *hdr)
 314{
 315        xfs_dablk_t                     forw;
 316        xfs_dablk_t                     back;
 317        int                             error = 0;
 318
 319        forw = be32_to_cpu(hdr->forw);
 320        back = be32_to_cpu(hdr->back);
 321
 322        /* Top level blocks should not have sibling pointers. */
 323        if (level == 0) {
 324                if (forw != 0 || back != 0)
 325                        xfs_scrub_da_set_corrupt(ds, level);
 326                return 0;
 327        }
 328
 329        /*
 330         * Check back (left) and forw (right) pointers.  These functions
 331         * absorb error codes for us.
 332         */
 333        error = xfs_scrub_da_btree_block_check_sibling(ds, level, 0, back);
 334        if (error)
 335                goto out;
 336        error = xfs_scrub_da_btree_block_check_sibling(ds, level, 1, forw);
 337
 338out:
 339        memset(&ds->state->altpath, 0, sizeof(ds->state->altpath));
 340        return error;
 341}
 342
 343/* Load a dir/attribute block from a btree. */
 344STATIC int
 345xfs_scrub_da_btree_block(
 346        struct xfs_scrub_da_btree       *ds,
 347        int                             level,
 348        xfs_dablk_t                     blkno)
 349{
 350        struct xfs_da_state_blk         *blk;
 351        struct xfs_da_intnode           *node;
 352        struct xfs_da_node_entry        *btree;
 353        struct xfs_da3_blkinfo          *hdr3;
 354        struct xfs_da_args              *dargs = &ds->dargs;
 355        struct xfs_inode                *ip = ds->dargs.dp;
 356        xfs_ino_t                       owner;
 357        int                             *pmaxrecs;
 358        struct xfs_da3_icnode_hdr       nodehdr;
 359        int                             error = 0;
 360
 361        blk = &ds->state->path.blk[level];
 362        ds->state->path.active = level + 1;
 363
 364        /* Release old block. */
 365        if (blk->bp) {
 366                xfs_trans_brelse(dargs->trans, blk->bp);
 367                blk->bp = NULL;
 368        }
 369
 370        /* Check the pointer. */
 371        blk->blkno = blkno;
 372        if (!xfs_scrub_da_btree_ptr_ok(ds, level, blkno))
 373                goto out_nobuf;
 374
 375        /* Read the buffer. */
 376        error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno, -2,
 377                        &blk->bp, dargs->whichfork,
 378                        &xfs_scrub_da_btree_buf_ops);
 379        if (!xfs_scrub_da_process_error(ds, level, &error))
 380                goto out_nobuf;
 381        if (blk->bp)
 382                xfs_scrub_buffer_recheck(ds->sc, blk->bp);
 383
 384        /*
 385         * We didn't find a dir btree root block, which means that
 386         * there's no LEAF1/LEAFN tree (at least not where it's supposed
 387         * to be), so jump out now.
 388         */
 389        if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 &&
 390                        blk->bp == NULL)
 391                goto out_nobuf;
 392
 393        /* It's /not/ ok for attr trees not to have a da btree. */
 394        if (blk->bp == NULL) {
 395                xfs_scrub_da_set_corrupt(ds, level);
 396                goto out_nobuf;
 397        }
 398
 399        hdr3 = blk->bp->b_addr;
 400        blk->magic = be16_to_cpu(hdr3->hdr.magic);
 401        pmaxrecs = &ds->maxrecs[level];
 402
 403        /* We only started zeroing the header on v5 filesystems. */
 404        if (xfs_sb_version_hascrc(&ds->sc->mp->m_sb) && hdr3->hdr.pad)
 405                xfs_scrub_da_set_corrupt(ds, level);
 406
 407        /* Check the owner. */
 408        if (xfs_sb_version_hascrc(&ip->i_mount->m_sb)) {
 409                owner = be64_to_cpu(hdr3->owner);
 410                if (owner != ip->i_ino)
 411                        xfs_scrub_da_set_corrupt(ds, level);
 412        }
 413
 414        /* Check the siblings. */
 415        error = xfs_scrub_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
 416        if (error)
 417                goto out;
 418
 419        /* Interpret the buffer. */
 420        switch (blk->magic) {
 421        case XFS_ATTR_LEAF_MAGIC:
 422        case XFS_ATTR3_LEAF_MAGIC:
 423                xfs_trans_buf_set_type(dargs->trans, blk->bp,
 424                                XFS_BLFT_ATTR_LEAF_BUF);
 425                blk->magic = XFS_ATTR_LEAF_MAGIC;
 426                blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs);
 427                if (ds->tree_level != 0)
 428                        xfs_scrub_da_set_corrupt(ds, level);
 429                break;
 430        case XFS_DIR2_LEAFN_MAGIC:
 431        case XFS_DIR3_LEAFN_MAGIC:
 432                xfs_trans_buf_set_type(dargs->trans, blk->bp,
 433                                XFS_BLFT_DIR_LEAFN_BUF);
 434                blk->magic = XFS_DIR2_LEAFN_MAGIC;
 435                blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
 436                if (ds->tree_level != 0)
 437                        xfs_scrub_da_set_corrupt(ds, level);
 438                break;
 439        case XFS_DIR2_LEAF1_MAGIC:
 440        case XFS_DIR3_LEAF1_MAGIC:
 441                xfs_trans_buf_set_type(dargs->trans, blk->bp,
 442                                XFS_BLFT_DIR_LEAF1_BUF);
 443                blk->magic = XFS_DIR2_LEAF1_MAGIC;
 444                blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
 445                if (ds->tree_level != 0)
 446                        xfs_scrub_da_set_corrupt(ds, level);
 447                break;
 448        case XFS_DA_NODE_MAGIC:
 449        case XFS_DA3_NODE_MAGIC:
 450                xfs_trans_buf_set_type(dargs->trans, blk->bp,
 451                                XFS_BLFT_DA_NODE_BUF);
 452                blk->magic = XFS_DA_NODE_MAGIC;
 453                node = blk->bp->b_addr;
 454                ip->d_ops->node_hdr_from_disk(&nodehdr, node);
 455                btree = ip->d_ops->node_tree_p(node);
 456                *pmaxrecs = nodehdr.count;
 457                blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
 458                if (level == 0) {
 459                        if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
 460                                xfs_scrub_da_set_corrupt(ds, level);
 461                                goto out_freebp;
 462                        }
 463                        ds->tree_level = nodehdr.level;
 464                } else {
 465                        if (ds->tree_level != nodehdr.level) {
 466                                xfs_scrub_da_set_corrupt(ds, level);
 467                                goto out_freebp;
 468                        }
 469                }
 470
 471                /* XXX: Check hdr3.pad32 once we know how to fix it. */
 472                break;
 473        default:
 474                xfs_scrub_da_set_corrupt(ds, level);
 475                goto out_freebp;
 476        }
 477
 478out:
 479        return error;
 480out_freebp:
 481        xfs_trans_brelse(dargs->trans, blk->bp);
 482        blk->bp = NULL;
 483out_nobuf:
 484        blk->blkno = 0;
 485        return error;
 486}
 487
 488/* Visit all nodes and leaves of a da btree. */
 489int
 490xfs_scrub_da_btree(
 491        struct xfs_scrub_context        *sc,
 492        int                             whichfork,
 493        xfs_scrub_da_btree_rec_fn       scrub_fn,
 494        void                            *private)
 495{
 496        struct xfs_scrub_da_btree       ds = {};
 497        struct xfs_mount                *mp = sc->mp;
 498        struct xfs_da_state_blk         *blks;
 499        struct xfs_da_node_entry        *key;
 500        void                            *rec;
 501        xfs_dablk_t                     blkno;
 502        int                             level;
 503        int                             error;
 504
 505        /* Skip short format data structures; no btree to scan. */
 506        if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
 507            XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE)
 508                return 0;
 509
 510        /* Set up initial da state. */
 511        ds.dargs.dp = sc->ip;
 512        ds.dargs.whichfork = whichfork;
 513        ds.dargs.trans = sc->tp;
 514        ds.dargs.op_flags = XFS_DA_OP_OKNOENT;
 515        ds.state = xfs_da_state_alloc();
 516        ds.state->args = &ds.dargs;
 517        ds.state->mp = mp;
 518        ds.sc = sc;
 519        ds.private = private;
 520        if (whichfork == XFS_ATTR_FORK) {
 521                ds.dargs.geo = mp->m_attr_geo;
 522                ds.lowest = 0;
 523                ds.highest = 0;
 524        } else {
 525                ds.dargs.geo = mp->m_dir_geo;
 526                ds.lowest = ds.dargs.geo->leafblk;
 527                ds.highest = ds.dargs.geo->freeblk;
 528        }
 529        blkno = ds.lowest;
 530        level = 0;
 531
 532        /* Find the root of the da tree, if present. */
 533        blks = ds.state->path.blk;
 534        error = xfs_scrub_da_btree_block(&ds, level, blkno);
 535        if (error)
 536                goto out_state;
 537        /*
 538         * We didn't find a block at ds.lowest, which means that there's
 539         * no LEAF1/LEAFN tree (at least not where it's supposed to be),
 540         * so jump out now.
 541         */
 542        if (blks[level].bp == NULL)
 543                goto out_state;
 544
 545        blks[level].index = 0;
 546        while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) {
 547                /* Handle leaf block. */
 548                if (blks[level].magic != XFS_DA_NODE_MAGIC) {
 549                        /* End of leaf, pop back towards the root. */
 550                        if (blks[level].index >= ds.maxrecs[level]) {
 551                                if (level > 0)
 552                                        blks[level - 1].index++;
 553                                ds.tree_level++;
 554                                level--;
 555                                continue;
 556                        }
 557
 558                        /* Dispatch record scrubbing. */
 559                        rec = xfs_scrub_da_btree_entry(&ds, level,
 560                                        blks[level].index);
 561                        error = scrub_fn(&ds, level, rec);
 562                        if (error)
 563                                break;
 564                        if (xfs_scrub_should_terminate(sc, &error) ||
 565                            (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
 566                                break;
 567
 568                        blks[level].index++;
 569                        continue;
 570                }
 571
 572
 573                /* End of node, pop back towards the root. */
 574                if (blks[level].index >= ds.maxrecs[level]) {
 575                        if (level > 0)
 576                                blks[level - 1].index++;
 577                        ds.tree_level++;
 578                        level--;
 579                        continue;
 580                }
 581
 582                /* Hashes in order for scrub? */
 583                key = xfs_scrub_da_btree_entry(&ds, level, blks[level].index);
 584                error = xfs_scrub_da_btree_hash(&ds, level, &key->hashval);
 585                if (error)
 586                        goto out;
 587
 588                /* Drill another level deeper. */
 589                blkno = be32_to_cpu(key->before);
 590                level++;
 591                ds.tree_level--;
 592                error = xfs_scrub_da_btree_block(&ds, level, blkno);
 593                if (error)
 594                        goto out;
 595                if (blks[level].bp == NULL)
 596                        goto out;
 597
 598                blks[level].index = 0;
 599        }
 600
 601out:
 602        /* Release all the buffers we're tracking. */
 603        for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) {
 604                if (blks[level].bp == NULL)
 605                        continue;
 606                xfs_trans_brelse(sc->tp, blks[level].bp);
 607                blks[level].bp = NULL;
 608        }
 609
 610out_state:
 611        xfs_da_state_free(ds.state);
 612        return error;
 613}
 614