linux/fs/xfs/scrub/refcount.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * Copyright (C) 2017 Oracle.  All Rights Reserved.
   4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_btree.h"
  11#include "xfs_rmap.h"
  12#include "xfs_refcount.h"
  13#include "scrub/scrub.h"
  14#include "scrub/common.h"
  15#include "scrub/btree.h"
  16#include "xfs_ag.h"
  17
  18/*
  19 * Set us up to scrub reference count btrees.
  20 */
  21int
  22xchk_setup_ag_refcountbt(
  23        struct xfs_scrub        *sc)
  24{
  25        return xchk_setup_ag_btree(sc, false);
  26}
  27
  28/* Reference count btree scrubber. */
  29
  30/*
  31 * Confirming Reference Counts via Reverse Mappings
  32 *
  33 * We want to count the reverse mappings overlapping a refcount record
  34 * (bno, len, refcount), allowing for the possibility that some of the
  35 * overlap may come from smaller adjoining reverse mappings, while some
  36 * comes from single extents which overlap the range entirely.  The
  37 * outer loop is as follows:
  38 *
  39 * 1. For all reverse mappings overlapping the refcount extent,
  40 *    a. If a given rmap completely overlaps, mark it as seen.
  41 *    b. Otherwise, record the fragment (in agbno order) for later
  42 *       processing.
  43 *
  44 * Once we've seen all the rmaps, we know that for all blocks in the
  45 * refcount record we want to find $refcount owners and we've already
  46 * visited $seen extents that overlap all the blocks.  Therefore, we
  47 * need to find ($refcount - $seen) owners for every block in the
  48 * extent; call that quantity $target_nr.  Proceed as follows:
  49 *
  50 * 2. Pull the first $target_nr fragments from the list; all of them
  51 *    should start at or before the start of the extent.
  52 *    Call this subset of fragments the working set.
  53 * 3. Until there are no more unprocessed fragments,
  54 *    a. Find the shortest fragments in the set and remove them.
  55 *    b. Note the block number of the end of these fragments.
  56 *    c. Pull the same number of fragments from the list.  All of these
  57 *       fragments should start at the block number recorded in the
  58 *       previous step.
  59 *    d. Put those fragments in the set.
  60 * 4. Check that there are $target_nr fragments remaining in the list,
  61 *    and that they all end at or beyond the end of the refcount extent.
  62 *
  63 * If the refcount is correct, all the check conditions in the algorithm
  64 * should always hold true.  If not, the refcount is incorrect.
  65 */
  66struct xchk_refcnt_frag {
  67        struct list_head        list;
  68        struct xfs_rmap_irec    rm;
  69};
  70
  71struct xchk_refcnt_check {
  72        struct xfs_scrub        *sc;
  73        struct list_head        fragments;
  74
  75        /* refcount extent we're examining */
  76        xfs_agblock_t           bno;
  77        xfs_extlen_t            len;
  78        xfs_nlink_t             refcount;
  79
  80        /* number of owners seen */
  81        xfs_nlink_t             seen;
  82};
  83
  84/*
  85 * Decide if the given rmap is large enough that we can redeem it
  86 * towards refcount verification now, or if it's a fragment, in
  87 * which case we'll hang onto it in the hopes that we'll later
  88 * discover that we've collected exactly the correct number of
  89 * fragments as the refcountbt says we should have.
  90 */
  91STATIC int
  92xchk_refcountbt_rmap_check(
  93        struct xfs_btree_cur            *cur,
  94        const struct xfs_rmap_irec      *rec,
  95        void                            *priv)
  96{
  97        struct xchk_refcnt_check        *refchk = priv;
  98        struct xchk_refcnt_frag         *frag;
  99        xfs_agblock_t                   rm_last;
 100        xfs_agblock_t                   rc_last;
 101        int                             error = 0;
 102
 103        if (xchk_should_terminate(refchk->sc, &error))
 104                return error;
 105
 106        rm_last = rec->rm_startblock + rec->rm_blockcount - 1;
 107        rc_last = refchk->bno + refchk->len - 1;
 108
 109        /* Confirm that a single-owner refc extent is a CoW stage. */
 110        if (refchk->refcount == 1 && rec->rm_owner != XFS_RMAP_OWN_COW) {
 111                xchk_btree_xref_set_corrupt(refchk->sc, cur, 0);
 112                return 0;
 113        }
 114
 115        if (rec->rm_startblock <= refchk->bno && rm_last >= rc_last) {
 116                /*
 117                 * The rmap overlaps the refcount record, so we can confirm
 118                 * one refcount owner seen.
 119                 */
 120                refchk->seen++;
 121        } else {
 122                /*
 123                 * This rmap covers only part of the refcount record, so
 124                 * save the fragment for later processing.  If the rmapbt
 125                 * is healthy each rmap_irec we see will be in agbno order
 126                 * so we don't need insertion sort here.
 127                 */
 128                frag = kmem_alloc(sizeof(struct xchk_refcnt_frag),
 129                                KM_MAYFAIL);
 130                if (!frag)
 131                        return -ENOMEM;
 132                memcpy(&frag->rm, rec, sizeof(frag->rm));
 133                list_add_tail(&frag->list, &refchk->fragments);
 134        }
 135
 136        return 0;
 137}
 138
 139/*
 140 * Given a bunch of rmap fragments, iterate through them, keeping
 141 * a running tally of the refcount.  If this ever deviates from
 142 * what we expect (which is the refcountbt's refcount minus the
 143 * number of extents that totally covered the refcountbt extent),
 144 * we have a refcountbt error.
 145 */
 146STATIC void
 147xchk_refcountbt_process_rmap_fragments(
 148        struct xchk_refcnt_check        *refchk)
 149{
 150        struct list_head                worklist;
 151        struct xchk_refcnt_frag         *frag;
 152        struct xchk_refcnt_frag         *n;
 153        xfs_agblock_t                   bno;
 154        xfs_agblock_t                   rbno;
 155        xfs_agblock_t                   next_rbno;
 156        xfs_nlink_t                     nr;
 157        xfs_nlink_t                     target_nr;
 158
 159        target_nr = refchk->refcount - refchk->seen;
 160        if (target_nr == 0)
 161                return;
 162
 163        /*
 164         * There are (refchk->rc.rc_refcount - refchk->nr refcount)
 165         * references we haven't found yet.  Pull that many off the
 166         * fragment list and figure out where the smallest rmap ends
 167         * (and therefore the next rmap should start).  All the rmaps
 168         * we pull off should start at or before the beginning of the
 169         * refcount record's range.
 170         */
 171        INIT_LIST_HEAD(&worklist);
 172        rbno = NULLAGBLOCK;
 173
 174        /* Make sure the fragments actually /are/ in agbno order. */
 175        bno = 0;
 176        list_for_each_entry(frag, &refchk->fragments, list) {
 177                if (frag->rm.rm_startblock < bno)
 178                        goto done;
 179                bno = frag->rm.rm_startblock;
 180        }
 181
 182        /*
 183         * Find all the rmaps that start at or before the refc extent,
 184         * and put them on the worklist.
 185         */
 186        nr = 0;
 187        list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
 188                if (frag->rm.rm_startblock > refchk->bno || nr > target_nr)
 189                        break;
 190                bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
 191                if (bno < rbno)
 192                        rbno = bno;
 193                list_move_tail(&frag->list, &worklist);
 194                nr++;
 195        }
 196
 197        /*
 198         * We should have found exactly $target_nr rmap fragments starting
 199         * at or before the refcount extent.
 200         */
 201        if (nr != target_nr)
 202                goto done;
 203
 204        while (!list_empty(&refchk->fragments)) {
 205                /* Discard any fragments ending at rbno from the worklist. */
 206                nr = 0;
 207                next_rbno = NULLAGBLOCK;
 208                list_for_each_entry_safe(frag, n, &worklist, list) {
 209                        bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
 210                        if (bno != rbno) {
 211                                if (bno < next_rbno)
 212                                        next_rbno = bno;
 213                                continue;
 214                        }
 215                        list_del(&frag->list);
 216                        kmem_free(frag);
 217                        nr++;
 218                }
 219
 220                /* Try to add nr rmaps starting at rbno to the worklist. */
 221                list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
 222                        bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
 223                        if (frag->rm.rm_startblock != rbno)
 224                                goto done;
 225                        list_move_tail(&frag->list, &worklist);
 226                        if (next_rbno > bno)
 227                                next_rbno = bno;
 228                        nr--;
 229                        if (nr == 0)
 230                                break;
 231                }
 232
 233                /*
 234                 * If we get here and nr > 0, this means that we added fewer
 235                 * items to the worklist than we discarded because the fragment
 236                 * list ran out of items.  Therefore, we cannot maintain the
 237                 * required refcount.  Something is wrong, so we're done.
 238                 */
 239                if (nr)
 240                        goto done;
 241
 242                rbno = next_rbno;
 243        }
 244
 245        /*
 246         * Make sure the last extent we processed ends at or beyond
 247         * the end of the refcount extent.
 248         */
 249        if (rbno < refchk->bno + refchk->len)
 250                goto done;
 251
 252        /* Actually record us having seen the remaining refcount. */
 253        refchk->seen = refchk->refcount;
 254done:
 255        /* Delete fragments and work list. */
 256        list_for_each_entry_safe(frag, n, &worklist, list) {
 257                list_del(&frag->list);
 258                kmem_free(frag);
 259        }
 260        list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
 261                list_del(&frag->list);
 262                kmem_free(frag);
 263        }
 264}
 265
 266/* Use the rmap entries covering this extent to verify the refcount. */
 267STATIC void
 268xchk_refcountbt_xref_rmap(
 269        struct xfs_scrub                *sc,
 270        xfs_agblock_t                   bno,
 271        xfs_extlen_t                    len,
 272        xfs_nlink_t                     refcount)
 273{
 274        struct xchk_refcnt_check        refchk = {
 275                .sc = sc,
 276                .bno = bno,
 277                .len = len,
 278                .refcount = refcount,
 279                .seen = 0,
 280        };
 281        struct xfs_rmap_irec            low;
 282        struct xfs_rmap_irec            high;
 283        struct xchk_refcnt_frag         *frag;
 284        struct xchk_refcnt_frag         *n;
 285        int                             error;
 286
 287        if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
 288                return;
 289
 290        /* Cross-reference with the rmapbt to confirm the refcount. */
 291        memset(&low, 0, sizeof(low));
 292        low.rm_startblock = bno;
 293        memset(&high, 0xFF, sizeof(high));
 294        high.rm_startblock = bno + len - 1;
 295
 296        INIT_LIST_HEAD(&refchk.fragments);
 297        error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high,
 298                        &xchk_refcountbt_rmap_check, &refchk);
 299        if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
 300                goto out_free;
 301
 302        xchk_refcountbt_process_rmap_fragments(&refchk);
 303        if (refcount != refchk.seen)
 304                xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
 305
 306out_free:
 307        list_for_each_entry_safe(frag, n, &refchk.fragments, list) {
 308                list_del(&frag->list);
 309                kmem_free(frag);
 310        }
 311}
 312
 313/* Cross-reference with the other btrees. */
 314STATIC void
 315xchk_refcountbt_xref(
 316        struct xfs_scrub        *sc,
 317        xfs_agblock_t           agbno,
 318        xfs_extlen_t            len,
 319        xfs_nlink_t             refcount)
 320{
 321        if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 322                return;
 323
 324        xchk_xref_is_used_space(sc, agbno, len);
 325        xchk_xref_is_not_inode_chunk(sc, agbno, len);
 326        xchk_refcountbt_xref_rmap(sc, agbno, len, refcount);
 327}
 328
 329/* Scrub a refcountbt record. */
 330STATIC int
 331xchk_refcountbt_rec(
 332        struct xchk_btree       *bs,
 333        const union xfs_btree_rec *rec)
 334{
 335        struct xfs_mount        *mp = bs->cur->bc_mp;
 336        xfs_agblock_t           *cow_blocks = bs->private;
 337        xfs_agnumber_t          agno = bs->cur->bc_ag.pag->pag_agno;
 338        xfs_agblock_t           bno;
 339        xfs_extlen_t            len;
 340        xfs_nlink_t             refcount;
 341        bool                    has_cowflag;
 342
 343        bno = be32_to_cpu(rec->refc.rc_startblock);
 344        len = be32_to_cpu(rec->refc.rc_blockcount);
 345        refcount = be32_to_cpu(rec->refc.rc_refcount);
 346
 347        /* Only CoW records can have refcount == 1. */
 348        has_cowflag = (bno & XFS_REFC_COW_START);
 349        if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag))
 350                xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 351        if (has_cowflag)
 352                (*cow_blocks) += len;
 353
 354        /* Check the extent. */
 355        bno &= ~XFS_REFC_COW_START;
 356        if (bno + len <= bno ||
 357            !xfs_verify_agbno(mp, agno, bno) ||
 358            !xfs_verify_agbno(mp, agno, bno + len - 1))
 359                xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 360
 361        if (refcount == 0)
 362                xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 363
 364        xchk_refcountbt_xref(bs->sc, bno, len, refcount);
 365
 366        return 0;
 367}
 368
 369/* Make sure we have as many refc blocks as the rmap says. */
 370STATIC void
 371xchk_refcount_xref_rmap(
 372        struct xfs_scrub        *sc,
 373        xfs_filblks_t           cow_blocks)
 374{
 375        xfs_extlen_t            refcbt_blocks = 0;
 376        xfs_filblks_t           blocks;
 377        int                     error;
 378
 379        if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
 380                return;
 381
 382        /* Check that we saw as many refcbt blocks as the rmap knows about. */
 383        error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks);
 384        if (!xchk_btree_process_error(sc, sc->sa.refc_cur, 0, &error))
 385                return;
 386        error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur,
 387                        &XFS_RMAP_OINFO_REFC, &blocks);
 388        if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
 389                return;
 390        if (blocks != refcbt_blocks)
 391                xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
 392
 393        /* Check that we saw as many cow blocks as the rmap knows about. */
 394        error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur,
 395                        &XFS_RMAP_OINFO_COW, &blocks);
 396        if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
 397                return;
 398        if (blocks != cow_blocks)
 399                xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
 400}
 401
 402/* Scrub the refcount btree for some AG. */
 403int
 404xchk_refcountbt(
 405        struct xfs_scrub        *sc)
 406{
 407        xfs_agblock_t           cow_blocks = 0;
 408        int                     error;
 409
 410        error = xchk_btree(sc, sc->sa.refc_cur, xchk_refcountbt_rec,
 411                        &XFS_RMAP_OINFO_REFC, &cow_blocks);
 412        if (error)
 413                return error;
 414
 415        xchk_refcount_xref_rmap(sc, cow_blocks);
 416
 417        return 0;
 418}
 419
 420/* xref check that a cow staging extent is marked in the refcountbt. */
 421void
 422xchk_xref_is_cow_staging(
 423        struct xfs_scrub                *sc,
 424        xfs_agblock_t                   agbno,
 425        xfs_extlen_t                    len)
 426{
 427        struct xfs_refcount_irec        rc;
 428        bool                            has_cowflag;
 429        int                             has_refcount;
 430        int                             error;
 431
 432        if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm))
 433                return;
 434
 435        /* Find the CoW staging extent. */
 436        error = xfs_refcount_lookup_le(sc->sa.refc_cur,
 437                        agbno + XFS_REFC_COW_START, &has_refcount);
 438        if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
 439                return;
 440        if (!has_refcount) {
 441                xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
 442                return;
 443        }
 444
 445        error = xfs_refcount_get_rec(sc->sa.refc_cur, &rc, &has_refcount);
 446        if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
 447                return;
 448        if (!has_refcount) {
 449                xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
 450                return;
 451        }
 452
 453        /* CoW flag must be set, refcount must be 1. */
 454        has_cowflag = (rc.rc_startblock & XFS_REFC_COW_START);
 455        if (!has_cowflag || rc.rc_refcount != 1)
 456                xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
 457
 458        /* Must be at least as long as what was passed in */
 459        if (rc.rc_blockcount < len)
 460                xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
 461}
 462
 463/*
 464 * xref check that the extent is not shared.  Only file data blocks
 465 * can have multiple owners.
 466 */
 467void
 468xchk_xref_is_not_shared(
 469        struct xfs_scrub        *sc,
 470        xfs_agblock_t           agbno,
 471        xfs_extlen_t            len)
 472{
 473        bool                    shared;
 474        int                     error;
 475
 476        if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm))
 477                return;
 478
 479        error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared);
 480        if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
 481                return;
 482        if (shared)
 483                xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
 484}
 485