linux/fs/xfs/scrub/parent.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2017 Oracle.  All Rights Reserved.
   3 *
   4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; either version 2
   9 * of the License, or (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it would be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write the Free Software Foundation,
  18 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
  19 */
  20#include "xfs.h"
  21#include "xfs_fs.h"
  22#include "xfs_shared.h"
  23#include "xfs_format.h"
  24#include "xfs_trans_resv.h"
  25#include "xfs_mount.h"
  26#include "xfs_defer.h"
  27#include "xfs_btree.h"
  28#include "xfs_bit.h"
  29#include "xfs_log_format.h"
  30#include "xfs_trans.h"
  31#include "xfs_sb.h"
  32#include "xfs_inode.h"
  33#include "xfs_icache.h"
  34#include "xfs_dir2.h"
  35#include "xfs_dir2_priv.h"
  36#include "xfs_ialloc.h"
  37#include "scrub/xfs_scrub.h"
  38#include "scrub/scrub.h"
  39#include "scrub/common.h"
  40#include "scrub/trace.h"
  41
  42/* Set us up to scrub parents. */
  43int
  44xfs_scrub_setup_parent(
  45        struct xfs_scrub_context        *sc,
  46        struct xfs_inode                *ip)
  47{
  48        return xfs_scrub_setup_inode_contents(sc, ip, 0);
  49}
  50
  51/* Parent pointers */
  52
  53/* Look for an entry in a parent pointing to this inode. */
  54
  55struct xfs_scrub_parent_ctx {
  56        struct dir_context              dc;
  57        xfs_ino_t                       ino;
  58        xfs_nlink_t                     nlink;
  59};
  60
  61/* Look for a single entry in a directory pointing to an inode. */
  62STATIC int
  63xfs_scrub_parent_actor(
  64        struct dir_context              *dc,
  65        const char                      *name,
  66        int                             namelen,
  67        loff_t                          pos,
  68        u64                             ino,
  69        unsigned                        type)
  70{
  71        struct xfs_scrub_parent_ctx     *spc;
  72
  73        spc = container_of(dc, struct xfs_scrub_parent_ctx, dc);
  74        if (spc->ino == ino)
  75                spc->nlink++;
  76        return 0;
  77}
  78
  79/* Count the number of dentries in the parent dir that point to this inode. */
  80STATIC int
  81xfs_scrub_parent_count_parent_dentries(
  82        struct xfs_scrub_context        *sc,
  83        struct xfs_inode                *parent,
  84        xfs_nlink_t                     *nlink)
  85{
  86        struct xfs_scrub_parent_ctx     spc = {
  87                .dc.actor = xfs_scrub_parent_actor,
  88                .dc.pos = 0,
  89                .ino = sc->ip->i_ino,
  90                .nlink = 0,
  91        };
  92        size_t                          bufsize;
  93        loff_t                          oldpos;
  94        uint                            lock_mode;
  95        int                             error = 0;
  96
  97        /*
  98         * If there are any blocks, read-ahead block 0 as we're almost
  99         * certain to have the next operation be a read there.  This is
 100         * how we guarantee that the parent's extent map has been loaded,
 101         * if there is one.
 102         */
 103        lock_mode = xfs_ilock_data_map_shared(parent);
 104        if (parent->i_d.di_nextents > 0)
 105                error = xfs_dir3_data_readahead(parent, 0, -1);
 106        xfs_iunlock(parent, lock_mode);
 107        if (error)
 108                return error;
 109
 110        /*
 111         * Iterate the parent dir to confirm that there is
 112         * exactly one entry pointing back to the inode being
 113         * scanned.
 114         */
 115        bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE,
 116                        parent->i_d.di_size);
 117        oldpos = 0;
 118        while (true) {
 119                error = xfs_readdir(sc->tp, parent, &spc.dc, bufsize);
 120                if (error)
 121                        goto out;
 122                if (oldpos == spc.dc.pos)
 123                        break;
 124                oldpos = spc.dc.pos;
 125        }
 126        *nlink = spc.nlink;
 127out:
 128        return error;
 129}
 130
 131/*
 132 * Given the inode number of the alleged parent of the inode being
 133 * scrubbed, try to validate that the parent has exactly one directory
 134 * entry pointing back to the inode being scrubbed.
 135 */
 136STATIC int
 137xfs_scrub_parent_validate(
 138        struct xfs_scrub_context        *sc,
 139        xfs_ino_t                       dnum,
 140        bool                            *try_again)
 141{
 142        struct xfs_mount                *mp = sc->mp;
 143        struct xfs_inode                *dp = NULL;
 144        xfs_nlink_t                     expected_nlink;
 145        xfs_nlink_t                     nlink;
 146        int                             error = 0;
 147
 148        *try_again = false;
 149
 150        /* '..' must not point to ourselves. */
 151        if (sc->ip->i_ino == dnum) {
 152                xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 153                goto out;
 154        }
 155
 156        /*
 157         * If we're an unlinked directory, the parent /won't/ have a link
 158         * to us.  Otherwise, it should have one link.
 159         */
 160        expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1;
 161
 162        /*
 163         * Grab this parent inode.  We release the inode before we
 164         * cancel the scrub transaction.  Since we're don't know a
 165         * priori that releasing the inode won't trigger eofblocks
 166         * cleanup (which allocates what would be a nested transaction)
 167         * if the parent pointer erroneously points to a file, we
 168         * can't use DONTCACHE here because DONTCACHE inodes can trigger
 169         * immediate inactive cleanup of the inode.
 170         */
 171        error = xfs_iget(mp, sc->tp, dnum, 0, 0, &dp);
 172        if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
 173                goto out;
 174        if (dp == sc->ip) {
 175                xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 176                goto out_rele;
 177        }
 178
 179        /*
 180         * We prefer to keep the inode locked while we lock and search
 181         * its alleged parent for a forward reference.  If we can grab
 182         * the iolock, validate the pointers and we're done.  We must
 183         * use nowait here to avoid an ABBA deadlock on the parent and
 184         * the child inodes.
 185         */
 186        if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) {
 187                error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink);
 188                if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0,
 189                                &error))
 190                        goto out_unlock;
 191                if (nlink != expected_nlink)
 192                        xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 193                goto out_unlock;
 194        }
 195
 196        /*
 197         * The game changes if we get here.  We failed to lock the parent,
 198         * so we're going to try to verify both pointers while only holding
 199         * one lock so as to avoid deadlocking with something that's actually
 200         * trying to traverse down the directory tree.
 201         */
 202        xfs_iunlock(sc->ip, sc->ilock_flags);
 203        sc->ilock_flags = 0;
 204        xfs_ilock(dp, XFS_IOLOCK_SHARED);
 205
 206        /* Go looking for our dentry. */
 207        error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink);
 208        if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
 209                goto out_unlock;
 210
 211        /* Drop the parent lock, relock this inode. */
 212        xfs_iunlock(dp, XFS_IOLOCK_SHARED);
 213        sc->ilock_flags = XFS_IOLOCK_EXCL;
 214        xfs_ilock(sc->ip, sc->ilock_flags);
 215
 216        /*
 217         * If we're an unlinked directory, the parent /won't/ have a link
 218         * to us.  Otherwise, it should have one link.  We have to re-set
 219         * it here because we dropped the lock on sc->ip.
 220         */
 221        expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1;
 222
 223        /* Look up '..' to see if the inode changed. */
 224        error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL);
 225        if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
 226                goto out_rele;
 227
 228        /* Drat, parent changed.  Try again! */
 229        if (dnum != dp->i_ino) {
 230                iput(VFS_I(dp));
 231                *try_again = true;
 232                return 0;
 233        }
 234        iput(VFS_I(dp));
 235
 236        /*
 237         * '..' didn't change, so check that there was only one entry
 238         * for us in the parent.
 239         */
 240        if (nlink != expected_nlink)
 241                xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 242        return error;
 243
 244out_unlock:
 245        xfs_iunlock(dp, XFS_IOLOCK_SHARED);
 246out_rele:
 247        iput(VFS_I(dp));
 248out:
 249        return error;
 250}
 251
 252/* Scrub a parent pointer. */
 253int
 254xfs_scrub_parent(
 255        struct xfs_scrub_context        *sc)
 256{
 257        struct xfs_mount                *mp = sc->mp;
 258        xfs_ino_t                       dnum;
 259        bool                            try_again;
 260        int                             tries = 0;
 261        int                             error = 0;
 262
 263        /*
 264         * If we're a directory, check that the '..' link points up to
 265         * a directory that has one entry pointing to us.
 266         */
 267        if (!S_ISDIR(VFS_I(sc->ip)->i_mode))
 268                return -ENOENT;
 269
 270        /* We're not a special inode, are we? */
 271        if (!xfs_verify_dir_ino(mp, sc->ip->i_ino)) {
 272                xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 273                goto out;
 274        }
 275
 276        /*
 277         * The VFS grabs a read or write lock via i_rwsem before it reads
 278         * or writes to a directory.  If we've gotten this far we've
 279         * already obtained IOLOCK_EXCL, which (since 4.10) is the same as
 280         * getting a write lock on i_rwsem.  Therefore, it is safe for us
 281         * to drop the ILOCK here in order to do directory lookups.
 282         */
 283        sc->ilock_flags &= ~(XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL);
 284        xfs_iunlock(sc->ip, XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL);
 285
 286        /* Look up '..' */
 287        error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL);
 288        if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
 289                goto out;
 290        if (!xfs_verify_dir_ino(mp, dnum)) {
 291                xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 292                goto out;
 293        }
 294
 295        /* Is this the root dir?  Then '..' must point to itself. */
 296        if (sc->ip == mp->m_rootip) {
 297                if (sc->ip->i_ino != mp->m_sb.sb_rootino ||
 298                    sc->ip->i_ino != dnum)
 299                        xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 300                goto out;
 301        }
 302
 303        do {
 304                error = xfs_scrub_parent_validate(sc, dnum, &try_again);
 305                if (error)
 306                        goto out;
 307        } while (try_again && ++tries < 20);
 308
 309        /*
 310         * We gave it our best shot but failed, so mark this scrub
 311         * incomplete.  Userspace can decide if it wants to try again.
 312         */
 313        if (try_again && tries == 20)
 314                xfs_scrub_set_incomplete(sc);
 315out:
 316        return error;
 317}
 318