linux/fs/xfs/scrub/parent.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * Copyright (C) 2017 Oracle.  All Rights Reserved.
   4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_trans_resv.h"
  11#include "xfs_mount.h"
  12#include "xfs_log_format.h"
  13#include "xfs_inode.h"
  14#include "xfs_icache.h"
  15#include "xfs_dir2.h"
  16#include "xfs_dir2_priv.h"
  17#include "scrub/scrub.h"
  18#include "scrub/common.h"
  19
  20/* Set us up to scrub parents. */
  21int
  22xchk_setup_parent(
  23        struct xfs_scrub        *sc)
  24{
  25        return xchk_setup_inode_contents(sc, 0);
  26}
  27
  28/* Parent pointers */
  29
  30/* Look for an entry in a parent pointing to this inode. */
  31
  32struct xchk_parent_ctx {
  33        struct dir_context      dc;
  34        struct xfs_scrub        *sc;
  35        xfs_ino_t               ino;
  36        xfs_nlink_t             nlink;
  37        bool                    cancelled;
  38};
  39
  40/* Look for a single entry in a directory pointing to an inode. */
  41STATIC int
  42xchk_parent_actor(
  43        struct dir_context      *dc,
  44        const char              *name,
  45        int                     namelen,
  46        loff_t                  pos,
  47        u64                     ino,
  48        unsigned                type)
  49{
  50        struct xchk_parent_ctx  *spc;
  51        int                     error = 0;
  52
  53        spc = container_of(dc, struct xchk_parent_ctx, dc);
  54        if (spc->ino == ino)
  55                spc->nlink++;
  56
  57        /*
  58         * If we're facing a fatal signal, bail out.  Store the cancellation
  59         * status separately because the VFS readdir code squashes error codes
  60         * into short directory reads.
  61         */
  62        if (xchk_should_terminate(spc->sc, &error))
  63                spc->cancelled = true;
  64
  65        return error;
  66}
  67
  68/* Count the number of dentries in the parent dir that point to this inode. */
  69STATIC int
  70xchk_parent_count_parent_dentries(
  71        struct xfs_scrub        *sc,
  72        struct xfs_inode        *parent,
  73        xfs_nlink_t             *nlink)
  74{
  75        struct xchk_parent_ctx  spc = {
  76                .dc.actor       = xchk_parent_actor,
  77                .ino            = sc->ip->i_ino,
  78                .sc             = sc,
  79        };
  80        size_t                  bufsize;
  81        loff_t                  oldpos;
  82        uint                    lock_mode;
  83        int                     error = 0;
  84
  85        /*
  86         * If there are any blocks, read-ahead block 0 as we're almost
  87         * certain to have the next operation be a read there.  This is
  88         * how we guarantee that the parent's extent map has been loaded,
  89         * if there is one.
  90         */
  91        lock_mode = xfs_ilock_data_map_shared(parent);
  92        if (parent->i_df.if_nextents > 0)
  93                error = xfs_dir3_data_readahead(parent, 0, 0);
  94        xfs_iunlock(parent, lock_mode);
  95        if (error)
  96                return error;
  97
  98        /*
  99         * Iterate the parent dir to confirm that there is
 100         * exactly one entry pointing back to the inode being
 101         * scanned.
 102         */
 103        bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE,
 104                        parent->i_disk_size);
 105        oldpos = 0;
 106        while (true) {
 107                error = xfs_readdir(sc->tp, parent, &spc.dc, bufsize);
 108                if (error)
 109                        goto out;
 110                if (spc.cancelled) {
 111                        error = -EAGAIN;
 112                        goto out;
 113                }
 114                if (oldpos == spc.dc.pos)
 115                        break;
 116                oldpos = spc.dc.pos;
 117        }
 118        *nlink = spc.nlink;
 119out:
 120        return error;
 121}
 122
 123/*
 124 * Given the inode number of the alleged parent of the inode being
 125 * scrubbed, try to validate that the parent has exactly one directory
 126 * entry pointing back to the inode being scrubbed.
 127 */
 128STATIC int
 129xchk_parent_validate(
 130        struct xfs_scrub        *sc,
 131        xfs_ino_t               dnum,
 132        bool                    *try_again)
 133{
 134        struct xfs_mount        *mp = sc->mp;
 135        struct xfs_inode        *dp = NULL;
 136        xfs_nlink_t             expected_nlink;
 137        xfs_nlink_t             nlink;
 138        int                     error = 0;
 139
 140        *try_again = false;
 141
 142        if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 143                goto out;
 144
 145        /* '..' must not point to ourselves. */
 146        if (sc->ip->i_ino == dnum) {
 147                xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 148                goto out;
 149        }
 150
 151        /*
 152         * If we're an unlinked directory, the parent /won't/ have a link
 153         * to us.  Otherwise, it should have one link.
 154         */
 155        expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1;
 156
 157        /*
 158         * Grab this parent inode.  We release the inode before we
 159         * cancel the scrub transaction.  Since we're don't know a
 160         * priori that releasing the inode won't trigger eofblocks
 161         * cleanup (which allocates what would be a nested transaction)
 162         * if the parent pointer erroneously points to a file, we
 163         * can't use DONTCACHE here because DONTCACHE inodes can trigger
 164         * immediate inactive cleanup of the inode.
 165         *
 166         * If _iget returns -EINVAL or -ENOENT then the parent inode number is
 167         * garbage and the directory is corrupt.  If the _iget returns
 168         * -EFSCORRUPTED or -EFSBADCRC then the parent is corrupt which is a
 169         *  cross referencing error.  Any other error is an operational error.
 170         */
 171        error = xfs_iget(mp, sc->tp, dnum, XFS_IGET_UNTRUSTED, 0, &dp);
 172        if (error == -EINVAL || error == -ENOENT) {
 173                error = -EFSCORRUPTED;
 174                xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error);
 175                goto out;
 176        }
 177        if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
 178                goto out;
 179        if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) {
 180                xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 181                goto out_rele;
 182        }
 183
 184        /*
 185         * We prefer to keep the inode locked while we lock and search
 186         * its alleged parent for a forward reference.  If we can grab
 187         * the iolock, validate the pointers and we're done.  We must
 188         * use nowait here to avoid an ABBA deadlock on the parent and
 189         * the child inodes.
 190         */
 191        if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) {
 192                error = xchk_parent_count_parent_dentries(sc, dp, &nlink);
 193                if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
 194                                &error))
 195                        goto out_unlock;
 196                if (nlink != expected_nlink)
 197                        xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 198                goto out_unlock;
 199        }
 200
 201        /*
 202         * The game changes if we get here.  We failed to lock the parent,
 203         * so we're going to try to verify both pointers while only holding
 204         * one lock so as to avoid deadlocking with something that's actually
 205         * trying to traverse down the directory tree.
 206         */
 207        xfs_iunlock(sc->ip, sc->ilock_flags);
 208        sc->ilock_flags = 0;
 209        error = xchk_ilock_inverted(dp, XFS_IOLOCK_SHARED);
 210        if (error)
 211                goto out_rele;
 212
 213        /* Go looking for our dentry. */
 214        error = xchk_parent_count_parent_dentries(sc, dp, &nlink);
 215        if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
 216                goto out_unlock;
 217
 218        /* Drop the parent lock, relock this inode. */
 219        xfs_iunlock(dp, XFS_IOLOCK_SHARED);
 220        error = xchk_ilock_inverted(sc->ip, XFS_IOLOCK_EXCL);
 221        if (error)
 222                goto out_rele;
 223        sc->ilock_flags = XFS_IOLOCK_EXCL;
 224
 225        /*
 226         * If we're an unlinked directory, the parent /won't/ have a link
 227         * to us.  Otherwise, it should have one link.  We have to re-set
 228         * it here because we dropped the lock on sc->ip.
 229         */
 230        expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1;
 231
 232        /* Look up '..' to see if the inode changed. */
 233        error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL);
 234        if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
 235                goto out_rele;
 236
 237        /* Drat, parent changed.  Try again! */
 238        if (dnum != dp->i_ino) {
 239                xfs_irele(dp);
 240                *try_again = true;
 241                return 0;
 242        }
 243        xfs_irele(dp);
 244
 245        /*
 246         * '..' didn't change, so check that there was only one entry
 247         * for us in the parent.
 248         */
 249        if (nlink != expected_nlink)
 250                xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 251        return error;
 252
 253out_unlock:
 254        xfs_iunlock(dp, XFS_IOLOCK_SHARED);
 255out_rele:
 256        xfs_irele(dp);
 257out:
 258        return error;
 259}
 260
 261/* Scrub a parent pointer. */
 262int
 263xchk_parent(
 264        struct xfs_scrub        *sc)
 265{
 266        struct xfs_mount        *mp = sc->mp;
 267        xfs_ino_t               dnum;
 268        bool                    try_again;
 269        int                     tries = 0;
 270        int                     error = 0;
 271
 272        /*
 273         * If we're a directory, check that the '..' link points up to
 274         * a directory that has one entry pointing to us.
 275         */
 276        if (!S_ISDIR(VFS_I(sc->ip)->i_mode))
 277                return -ENOENT;
 278
 279        /* We're not a special inode, are we? */
 280        if (!xfs_verify_dir_ino(mp, sc->ip->i_ino)) {
 281                xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 282                goto out;
 283        }
 284
 285        /*
 286         * The VFS grabs a read or write lock via i_rwsem before it reads
 287         * or writes to a directory.  If we've gotten this far we've
 288         * already obtained IOLOCK_EXCL, which (since 4.10) is the same as
 289         * getting a write lock on i_rwsem.  Therefore, it is safe for us
 290         * to drop the ILOCK here in order to do directory lookups.
 291         */
 292        sc->ilock_flags &= ~(XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL);
 293        xfs_iunlock(sc->ip, XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL);
 294
 295        /* Look up '..' */
 296        error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL);
 297        if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
 298                goto out;
 299        if (!xfs_verify_dir_ino(mp, dnum)) {
 300                xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 301                goto out;
 302        }
 303
 304        /* Is this the root dir?  Then '..' must point to itself. */
 305        if (sc->ip == mp->m_rootip) {
 306                if (sc->ip->i_ino != mp->m_sb.sb_rootino ||
 307                    sc->ip->i_ino != dnum)
 308                        xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 309                goto out;
 310        }
 311
 312        do {
 313                error = xchk_parent_validate(sc, dnum, &try_again);
 314                if (error)
 315                        goto out;
 316        } while (try_again && ++tries < 20);
 317
 318        /*
 319         * We gave it our best shot but failed, so mark this scrub
 320         * incomplete.  Userspace can decide if it wants to try again.
 321         */
 322        if (try_again && tries == 20)
 323                xchk_set_incomplete(sc);
 324out:
 325        /*
 326         * If we failed to lock the parent inode even after a retry, just mark
 327         * this scrub incomplete and return.
 328         */
 329        if ((sc->flags & XCHK_TRY_HARDER) && error == -EDEADLOCK) {
 330                error = 0;
 331                xchk_set_incomplete(sc);
 332        }
 333        return error;
 334}
 335