linux/fs/xfs/scrub/inode.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * Copyright (C) 2017 Oracle.  All Rights Reserved.
   4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_trans_resv.h"
  11#include "xfs_mount.h"
  12#include "xfs_btree.h"
  13#include "xfs_log_format.h"
  14#include "xfs_inode.h"
  15#include "xfs_ialloc.h"
  16#include "xfs_da_format.h"
  17#include "xfs_reflink.h"
  18#include "xfs_rmap.h"
  19#include "xfs_bmap_util.h"
  20#include "scrub/scrub.h"
  21#include "scrub/common.h"
  22#include "scrub/btree.h"
  23
  24/*
  25 * Grab total control of the inode metadata.  It doesn't matter here if
  26 * the file data is still changing; exclusive access to the metadata is
  27 * the goal.
  28 */
  29int
  30xchk_setup_inode(
  31        struct xfs_scrub        *sc,
  32        struct xfs_inode        *ip)
  33{
  34        int                     error;
  35
  36        /*
  37         * Try to get the inode.  If the verifiers fail, we try again
  38         * in raw mode.
  39         */
  40        error = xchk_get_inode(sc, ip);
  41        switch (error) {
  42        case 0:
  43                break;
  44        case -EFSCORRUPTED:
  45        case -EFSBADCRC:
  46                return xchk_trans_alloc(sc, 0);
  47        default:
  48                return error;
  49        }
  50
  51        /* Got the inode, lock it and we're ready to go. */
  52        sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
  53        xfs_ilock(sc->ip, sc->ilock_flags);
  54        error = xchk_trans_alloc(sc, 0);
  55        if (error)
  56                goto out;
  57        sc->ilock_flags |= XFS_ILOCK_EXCL;
  58        xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
  59
  60out:
  61        /* scrub teardown will unlock and release the inode for us */
  62        return error;
  63}
  64
  65/* Inode core */
  66
  67/* Validate di_extsize hint. */
  68STATIC void
  69xchk_inode_extsize(
  70        struct xfs_scrub        *sc,
  71        struct xfs_dinode       *dip,
  72        xfs_ino_t               ino,
  73        uint16_t                mode,
  74        uint16_t                flags)
  75{
  76        xfs_failaddr_t          fa;
  77
  78        fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize),
  79                        mode, flags);
  80        if (fa)
  81                xchk_ino_set_corrupt(sc, ino);
  82}
  83
  84/*
  85 * Validate di_cowextsize hint.
  86 *
  87 * The rules are documented at xfs_ioctl_setattr_check_cowextsize().
  88 * These functions must be kept in sync with each other.
  89 */
  90STATIC void
  91xchk_inode_cowextsize(
  92        struct xfs_scrub        *sc,
  93        struct xfs_dinode       *dip,
  94        xfs_ino_t               ino,
  95        uint16_t                mode,
  96        uint16_t                flags,
  97        uint64_t                flags2)
  98{
  99        xfs_failaddr_t          fa;
 100
 101        fa = xfs_inode_validate_cowextsize(sc->mp,
 102                        be32_to_cpu(dip->di_cowextsize), mode, flags,
 103                        flags2);
 104        if (fa)
 105                xchk_ino_set_corrupt(sc, ino);
 106}
 107
 108/* Make sure the di_flags make sense for the inode. */
 109STATIC void
 110xchk_inode_flags(
 111        struct xfs_scrub        *sc,
 112        struct xfs_dinode       *dip,
 113        xfs_ino_t               ino,
 114        uint16_t                mode,
 115        uint16_t                flags)
 116{
 117        struct xfs_mount        *mp = sc->mp;
 118
 119        /* di_flags are all taken, last bit cannot be used */
 120        if (flags & ~XFS_DIFLAG_ANY)
 121                goto bad;
 122
 123        /* rt flags require rt device */
 124        if ((flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT)) &&
 125            !mp->m_rtdev_targp)
 126                goto bad;
 127
 128        /* new rt bitmap flag only valid for rbmino */
 129        if ((flags & XFS_DIFLAG_NEWRTBM) && ino != mp->m_sb.sb_rbmino)
 130                goto bad;
 131
 132        /* directory-only flags */
 133        if ((flags & (XFS_DIFLAG_RTINHERIT |
 134                     XFS_DIFLAG_EXTSZINHERIT |
 135                     XFS_DIFLAG_PROJINHERIT |
 136                     XFS_DIFLAG_NOSYMLINKS)) &&
 137            !S_ISDIR(mode))
 138                goto bad;
 139
 140        /* file-only flags */
 141        if ((flags & (XFS_DIFLAG_REALTIME | FS_XFLAG_EXTSIZE)) &&
 142            !S_ISREG(mode))
 143                goto bad;
 144
 145        /* filestreams and rt make no sense */
 146        if ((flags & XFS_DIFLAG_FILESTREAM) && (flags & XFS_DIFLAG_REALTIME))
 147                goto bad;
 148
 149        return;
 150bad:
 151        xchk_ino_set_corrupt(sc, ino);
 152}
 153
 154/* Make sure the di_flags2 make sense for the inode. */
 155STATIC void
 156xchk_inode_flags2(
 157        struct xfs_scrub        *sc,
 158        struct xfs_dinode       *dip,
 159        xfs_ino_t               ino,
 160        uint16_t                mode,
 161        uint16_t                flags,
 162        uint64_t                flags2)
 163{
 164        struct xfs_mount        *mp = sc->mp;
 165
 166        /* Unknown di_flags2 could be from a future kernel */
 167        if (flags2 & ~XFS_DIFLAG2_ANY)
 168                xchk_ino_set_warning(sc, ino);
 169
 170        /* reflink flag requires reflink feature */
 171        if ((flags2 & XFS_DIFLAG2_REFLINK) &&
 172            !xfs_sb_version_hasreflink(&mp->m_sb))
 173                goto bad;
 174
 175        /* cowextsize flag is checked w.r.t. mode separately */
 176
 177        /* file/dir-only flags */
 178        if ((flags2 & XFS_DIFLAG2_DAX) && !(S_ISREG(mode) || S_ISDIR(mode)))
 179                goto bad;
 180
 181        /* file-only flags */
 182        if ((flags2 & XFS_DIFLAG2_REFLINK) && !S_ISREG(mode))
 183                goto bad;
 184
 185        /* realtime and reflink make no sense, currently */
 186        if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK))
 187                goto bad;
 188
 189        /* dax and reflink make no sense, currently */
 190        if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK))
 191                goto bad;
 192
 193        return;
 194bad:
 195        xchk_ino_set_corrupt(sc, ino);
 196}
 197
 198/* Scrub all the ondisk inode fields. */
 199STATIC void
 200xchk_dinode(
 201        struct xfs_scrub        *sc,
 202        struct xfs_dinode       *dip,
 203        xfs_ino_t               ino)
 204{
 205        struct xfs_mount        *mp = sc->mp;
 206        size_t                  fork_recs;
 207        unsigned long long      isize;
 208        uint64_t                flags2;
 209        uint32_t                nextents;
 210        uint16_t                flags;
 211        uint16_t                mode;
 212
 213        flags = be16_to_cpu(dip->di_flags);
 214        if (dip->di_version >= 3)
 215                flags2 = be64_to_cpu(dip->di_flags2);
 216        else
 217                flags2 = 0;
 218
 219        /* di_mode */
 220        mode = be16_to_cpu(dip->di_mode);
 221        switch (mode & S_IFMT) {
 222        case S_IFLNK:
 223        case S_IFREG:
 224        case S_IFDIR:
 225        case S_IFCHR:
 226        case S_IFBLK:
 227        case S_IFIFO:
 228        case S_IFSOCK:
 229                /* mode is recognized */
 230                break;
 231        default:
 232                xchk_ino_set_corrupt(sc, ino);
 233                break;
 234        }
 235
 236        /* v1/v2 fields */
 237        switch (dip->di_version) {
 238        case 1:
 239                /*
 240                 * We autoconvert v1 inodes into v2 inodes on writeout,
 241                 * so just mark this inode for preening.
 242                 */
 243                xchk_ino_set_preen(sc, ino);
 244                break;
 245        case 2:
 246        case 3:
 247                if (dip->di_onlink != 0)
 248                        xchk_ino_set_corrupt(sc, ino);
 249
 250                if (dip->di_mode == 0 && sc->ip)
 251                        xchk_ino_set_corrupt(sc, ino);
 252
 253                if (dip->di_projid_hi != 0 &&
 254                    !xfs_sb_version_hasprojid32bit(&mp->m_sb))
 255                        xchk_ino_set_corrupt(sc, ino);
 256                break;
 257        default:
 258                xchk_ino_set_corrupt(sc, ino);
 259                return;
 260        }
 261
 262        /*
 263         * di_uid/di_gid -- -1 isn't invalid, but there's no way that
 264         * userspace could have created that.
 265         */
 266        if (dip->di_uid == cpu_to_be32(-1U) ||
 267            dip->di_gid == cpu_to_be32(-1U))
 268                xchk_ino_set_warning(sc, ino);
 269
 270        /* di_format */
 271        switch (dip->di_format) {
 272        case XFS_DINODE_FMT_DEV:
 273                if (!S_ISCHR(mode) && !S_ISBLK(mode) &&
 274                    !S_ISFIFO(mode) && !S_ISSOCK(mode))
 275                        xchk_ino_set_corrupt(sc, ino);
 276                break;
 277        case XFS_DINODE_FMT_LOCAL:
 278                if (!S_ISDIR(mode) && !S_ISLNK(mode))
 279                        xchk_ino_set_corrupt(sc, ino);
 280                break;
 281        case XFS_DINODE_FMT_EXTENTS:
 282                if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode))
 283                        xchk_ino_set_corrupt(sc, ino);
 284                break;
 285        case XFS_DINODE_FMT_BTREE:
 286                if (!S_ISREG(mode) && !S_ISDIR(mode))
 287                        xchk_ino_set_corrupt(sc, ino);
 288                break;
 289        case XFS_DINODE_FMT_UUID:
 290        default:
 291                xchk_ino_set_corrupt(sc, ino);
 292                break;
 293        }
 294
 295        /* di_[amc]time.nsec */
 296        if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC)
 297                xchk_ino_set_corrupt(sc, ino);
 298        if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC)
 299                xchk_ino_set_corrupt(sc, ino);
 300        if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC)
 301                xchk_ino_set_corrupt(sc, ino);
 302
 303        /*
 304         * di_size.  xfs_dinode_verify checks for things that screw up
 305         * the VFS such as the upper bit being set and zero-length
 306         * symlinks/directories, but we can do more here.
 307         */
 308        isize = be64_to_cpu(dip->di_size);
 309        if (isize & (1ULL << 63))
 310                xchk_ino_set_corrupt(sc, ino);
 311
 312        /* Devices, fifos, and sockets must have zero size */
 313        if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0)
 314                xchk_ino_set_corrupt(sc, ino);
 315
 316        /* Directories can't be larger than the data section size (32G) */
 317        if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE))
 318                xchk_ino_set_corrupt(sc, ino);
 319
 320        /* Symlinks can't be larger than SYMLINK_MAXLEN */
 321        if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN))
 322                xchk_ino_set_corrupt(sc, ino);
 323
 324        /*
 325         * Warn if the running kernel can't handle the kinds of offsets
 326         * needed to deal with the file size.  In other words, if the
 327         * pagecache can't cache all the blocks in this file due to
 328         * overly large offsets, flag the inode for admin review.
 329         */
 330        if (isize >= mp->m_super->s_maxbytes)
 331                xchk_ino_set_warning(sc, ino);
 332
 333        /* di_nblocks */
 334        if (flags2 & XFS_DIFLAG2_REFLINK) {
 335                ; /* nblocks can exceed dblocks */
 336        } else if (flags & XFS_DIFLAG_REALTIME) {
 337                /*
 338                 * nblocks is the sum of data extents (in the rtdev),
 339                 * attr extents (in the datadev), and both forks' bmbt
 340                 * blocks (in the datadev).  This clumsy check is the
 341                 * best we can do without cross-referencing with the
 342                 * inode forks.
 343                 */
 344                if (be64_to_cpu(dip->di_nblocks) >=
 345                    mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks)
 346                        xchk_ino_set_corrupt(sc, ino);
 347        } else {
 348                if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks)
 349                        xchk_ino_set_corrupt(sc, ino);
 350        }
 351
 352        xchk_inode_flags(sc, dip, ino, mode, flags);
 353
 354        xchk_inode_extsize(sc, dip, ino, mode, flags);
 355
 356        /* di_nextents */
 357        nextents = be32_to_cpu(dip->di_nextents);
 358        fork_recs =  XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec);
 359        switch (dip->di_format) {
 360        case XFS_DINODE_FMT_EXTENTS:
 361                if (nextents > fork_recs)
 362                        xchk_ino_set_corrupt(sc, ino);
 363                break;
 364        case XFS_DINODE_FMT_BTREE:
 365                if (nextents <= fork_recs)
 366                        xchk_ino_set_corrupt(sc, ino);
 367                break;
 368        default:
 369                if (nextents != 0)
 370                        xchk_ino_set_corrupt(sc, ino);
 371                break;
 372        }
 373
 374        /* di_forkoff */
 375        if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize)
 376                xchk_ino_set_corrupt(sc, ino);
 377        if (dip->di_anextents != 0 && dip->di_forkoff == 0)
 378                xchk_ino_set_corrupt(sc, ino);
 379        if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS)
 380                xchk_ino_set_corrupt(sc, ino);
 381
 382        /* di_aformat */
 383        if (dip->di_aformat != XFS_DINODE_FMT_LOCAL &&
 384            dip->di_aformat != XFS_DINODE_FMT_EXTENTS &&
 385            dip->di_aformat != XFS_DINODE_FMT_BTREE)
 386                xchk_ino_set_corrupt(sc, ino);
 387
 388        /* di_anextents */
 389        nextents = be16_to_cpu(dip->di_anextents);
 390        fork_recs =  XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec);
 391        switch (dip->di_aformat) {
 392        case XFS_DINODE_FMT_EXTENTS:
 393                if (nextents > fork_recs)
 394                        xchk_ino_set_corrupt(sc, ino);
 395                break;
 396        case XFS_DINODE_FMT_BTREE:
 397                if (nextents <= fork_recs)
 398                        xchk_ino_set_corrupt(sc, ino);
 399                break;
 400        default:
 401                if (nextents != 0)
 402                        xchk_ino_set_corrupt(sc, ino);
 403        }
 404
 405        if (dip->di_version >= 3) {
 406                if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC)
 407                        xchk_ino_set_corrupt(sc, ino);
 408                xchk_inode_flags2(sc, dip, ino, mode, flags, flags2);
 409                xchk_inode_cowextsize(sc, dip, ino, mode, flags,
 410                                flags2);
 411        }
 412}
 413
 414/*
 415 * Make sure the finobt doesn't think this inode is free.
 416 * We don't have to check the inobt ourselves because we got the inode via
 417 * IGET_UNTRUSTED, which checks the inobt for us.
 418 */
 419static void
 420xchk_inode_xref_finobt(
 421        struct xfs_scrub                *sc,
 422        xfs_ino_t                       ino)
 423{
 424        struct xfs_inobt_rec_incore     rec;
 425        xfs_agino_t                     agino;
 426        int                             has_record;
 427        int                             error;
 428
 429        if (!sc->sa.fino_cur || xchk_skip_xref(sc->sm))
 430                return;
 431
 432        agino = XFS_INO_TO_AGINO(sc->mp, ino);
 433
 434        /*
 435         * Try to get the finobt record.  If we can't get it, then we're
 436         * in good shape.
 437         */
 438        error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE,
 439                        &has_record);
 440        if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) ||
 441            !has_record)
 442                return;
 443
 444        error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record);
 445        if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) ||
 446            !has_record)
 447                return;
 448
 449        /*
 450         * Otherwise, make sure this record either doesn't cover this inode,
 451         * or that it does but it's marked present.
 452         */
 453        if (rec.ir_startino > agino ||
 454            rec.ir_startino + XFS_INODES_PER_CHUNK <= agino)
 455                return;
 456
 457        if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))
 458                xchk_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0);
 459}
 460
 461/* Cross reference the inode fields with the forks. */
 462STATIC void
 463xchk_inode_xref_bmap(
 464        struct xfs_scrub        *sc,
 465        struct xfs_dinode       *dip)
 466{
 467        xfs_extnum_t            nextents;
 468        xfs_filblks_t           count;
 469        xfs_filblks_t           acount;
 470        int                     error;
 471
 472        if (xchk_skip_xref(sc->sm))
 473                return;
 474
 475        /* Walk all the extents to check nextents/naextents/nblocks. */
 476        error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
 477                        &nextents, &count);
 478        if (!xchk_should_check_xref(sc, &error, NULL))
 479                return;
 480        if (nextents < be32_to_cpu(dip->di_nextents))
 481                xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino);
 482
 483        error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
 484                        &nextents, &acount);
 485        if (!xchk_should_check_xref(sc, &error, NULL))
 486                return;
 487        if (nextents != be16_to_cpu(dip->di_anextents))
 488                xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino);
 489
 490        /* Check nblocks against the inode. */
 491        if (count + acount != be64_to_cpu(dip->di_nblocks))
 492                xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino);
 493}
 494
 495/* Cross-reference with the other btrees. */
 496STATIC void
 497xchk_inode_xref(
 498        struct xfs_scrub        *sc,
 499        xfs_ino_t               ino,
 500        struct xfs_dinode       *dip)
 501{
 502        xfs_agnumber_t          agno;
 503        xfs_agblock_t           agbno;
 504        int                     error;
 505
 506        if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 507                return;
 508
 509        agno = XFS_INO_TO_AGNO(sc->mp, ino);
 510        agbno = XFS_INO_TO_AGBNO(sc->mp, ino);
 511
 512        error = xchk_ag_init(sc, agno, &sc->sa);
 513        if (!xchk_xref_process_error(sc, agno, agbno, &error))
 514                return;
 515
 516        xchk_xref_is_used_space(sc, agbno, 1);
 517        xchk_inode_xref_finobt(sc, ino);
 518        xchk_xref_is_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_INODES);
 519        xchk_xref_is_not_shared(sc, agbno, 1);
 520        xchk_inode_xref_bmap(sc, dip);
 521
 522        xchk_ag_free(sc, &sc->sa);
 523}
 524
 525/*
 526 * If the reflink iflag disagrees with a scan for shared data fork extents,
 527 * either flag an error (shared extents w/ no flag) or a preen (flag set w/o
 528 * any shared extents).  We already checked for reflink iflag set on a non
 529 * reflink filesystem.
 530 */
 531static void
 532xchk_inode_check_reflink_iflag(
 533        struct xfs_scrub        *sc,
 534        xfs_ino_t               ino)
 535{
 536        struct xfs_mount        *mp = sc->mp;
 537        bool                    has_shared;
 538        int                     error;
 539
 540        if (!xfs_sb_version_hasreflink(&mp->m_sb))
 541                return;
 542
 543        error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
 544                        &has_shared);
 545        if (!xchk_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
 546                        XFS_INO_TO_AGBNO(mp, ino), &error))
 547                return;
 548        if (xfs_is_reflink_inode(sc->ip) && !has_shared)
 549                xchk_ino_set_preen(sc, ino);
 550        else if (!xfs_is_reflink_inode(sc->ip) && has_shared)
 551                xchk_ino_set_corrupt(sc, ino);
 552}
 553
 554/* Scrub an inode. */
 555int
 556xchk_inode(
 557        struct xfs_scrub        *sc)
 558{
 559        struct xfs_dinode       di;
 560        int                     error = 0;
 561
 562        /*
 563         * If sc->ip is NULL, that means that the setup function called
 564         * xfs_iget to look up the inode.  xfs_iget returned a EFSCORRUPTED
 565         * and a NULL inode, so flag the corruption error and return.
 566         */
 567        if (!sc->ip) {
 568                xchk_ino_set_corrupt(sc, sc->sm->sm_ino);
 569                return 0;
 570        }
 571
 572        /* Scrub the inode core. */
 573        xfs_inode_to_disk(sc->ip, &di, 0);
 574        xchk_dinode(sc, &di, sc->ip->i_ino);
 575        if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 576                goto out;
 577
 578        /*
 579         * Look for discrepancies between file's data blocks and the reflink
 580         * iflag.  We already checked the iflag against the file mode when
 581         * we scrubbed the dinode.
 582         */
 583        if (S_ISREG(VFS_I(sc->ip)->i_mode))
 584                xchk_inode_check_reflink_iflag(sc, sc->ip->i_ino);
 585
 586        xchk_inode_xref(sc, sc->ip->i_ino, &di);
 587out:
 588        return error;
 589}
 590