linux/fs/xfs/scrub/dir.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * Copyright (C) 2017 Oracle.  All Rights Reserved.
   4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_trans_resv.h"
  11#include "xfs_mount.h"
  12#include "xfs_log_format.h"
  13#include "xfs_trans.h"
  14#include "xfs_inode.h"
  15#include "xfs_icache.h"
  16#include "xfs_dir2.h"
  17#include "xfs_dir2_priv.h"
  18#include "scrub/scrub.h"
  19#include "scrub/common.h"
  20#include "scrub/dabtree.h"
  21
  22/* Set us up to scrub directories. */
  23int
  24xchk_setup_directory(
  25        struct xfs_scrub        *sc)
  26{
  27        return xchk_setup_inode_contents(sc, 0);
  28}
  29
  30/* Directories */
  31
  32/* Scrub a directory entry. */
  33
  34struct xchk_dir_ctx {
  35        /* VFS fill-directory iterator */
  36        struct dir_context      dir_iter;
  37
  38        struct xfs_scrub        *sc;
  39};
  40
  41/* Check that an inode's mode matches a given DT_ type. */
  42STATIC int
  43xchk_dir_check_ftype(
  44        struct xchk_dir_ctx     *sdc,
  45        xfs_fileoff_t           offset,
  46        xfs_ino_t               inum,
  47        int                     dtype)
  48{
  49        struct xfs_mount        *mp = sdc->sc->mp;
  50        struct xfs_inode        *ip;
  51        int                     ino_dtype;
  52        int                     error = 0;
  53
  54        if (!xfs_has_ftype(mp)) {
  55                if (dtype != DT_UNKNOWN && dtype != DT_DIR)
  56                        xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
  57                                        offset);
  58                goto out;
  59        }
  60
  61        /*
  62         * Grab the inode pointed to by the dirent.  We release the
  63         * inode before we cancel the scrub transaction.  Since we're
  64         * don't know a priori that releasing the inode won't trigger
  65         * eofblocks cleanup (which allocates what would be a nested
  66         * transaction), we can't use DONTCACHE here because DONTCACHE
  67         * inodes can trigger immediate inactive cleanup of the inode.
  68         *
  69         * If _iget returns -EINVAL or -ENOENT then the child inode number is
  70         * garbage and the directory is corrupt.  If the _iget returns
  71         * -EFSCORRUPTED or -EFSBADCRC then the child is corrupt which is a
  72         *  cross referencing error.  Any other error is an operational error.
  73         */
  74        error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip);
  75        if (error == -EINVAL || error == -ENOENT) {
  76                error = -EFSCORRUPTED;
  77                xchk_fblock_process_error(sdc->sc, XFS_DATA_FORK, 0, &error);
  78                goto out;
  79        }
  80        if (!xchk_fblock_xref_process_error(sdc->sc, XFS_DATA_FORK, offset,
  81                        &error))
  82                goto out;
  83
  84        /* Convert mode to the DT_* values that dir_emit uses. */
  85        ino_dtype = xfs_dir3_get_dtype(mp,
  86                        xfs_mode_to_ftype(VFS_I(ip)->i_mode));
  87        if (ino_dtype != dtype)
  88                xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset);
  89        xfs_irele(ip);
  90out:
  91        return error;
  92}
  93
  94/*
  95 * Scrub a single directory entry.
  96 *
  97 * We use the VFS directory iterator (i.e. readdir) to call this
  98 * function for every directory entry in a directory.  Once we're here,
  99 * we check the inode number to make sure it's sane, then we check that
 100 * we can look up this filename.  Finally, we check the ftype.
 101 */
 102STATIC int
 103xchk_dir_actor(
 104        struct dir_context      *dir_iter,
 105        const char              *name,
 106        int                     namelen,
 107        loff_t                  pos,
 108        u64                     ino,
 109        unsigned                type)
 110{
 111        struct xfs_mount        *mp;
 112        struct xfs_inode        *ip;
 113        struct xchk_dir_ctx     *sdc;
 114        struct xfs_name         xname;
 115        xfs_ino_t               lookup_ino;
 116        xfs_dablk_t             offset;
 117        bool                    checked_ftype = false;
 118        int                     error = 0;
 119
 120        sdc = container_of(dir_iter, struct xchk_dir_ctx, dir_iter);
 121        ip = sdc->sc->ip;
 122        mp = ip->i_mount;
 123        offset = xfs_dir2_db_to_da(mp->m_dir_geo,
 124                        xfs_dir2_dataptr_to_db(mp->m_dir_geo, pos));
 125
 126        if (xchk_should_terminate(sdc->sc, &error))
 127                return error;
 128
 129        /* Does this inode number make sense? */
 130        if (!xfs_verify_dir_ino(mp, ino)) {
 131                xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset);
 132                goto out;
 133        }
 134
 135        /* Does this name make sense? */
 136        if (!xfs_dir2_namecheck(name, namelen)) {
 137                xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset);
 138                goto out;
 139        }
 140
 141        if (!strncmp(".", name, namelen)) {
 142                /* If this is "." then check that the inum matches the dir. */
 143                if (xfs_has_ftype(mp) && type != DT_DIR)
 144                        xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
 145                                        offset);
 146                checked_ftype = true;
 147                if (ino != ip->i_ino)
 148                        xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
 149                                        offset);
 150        } else if (!strncmp("..", name, namelen)) {
 151                /*
 152                 * If this is ".." in the root inode, check that the inum
 153                 * matches this dir.
 154                 */
 155                if (xfs_has_ftype(mp) && type != DT_DIR)
 156                        xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
 157                                        offset);
 158                checked_ftype = true;
 159                if (ip->i_ino == mp->m_sb.sb_rootino && ino != ip->i_ino)
 160                        xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
 161                                        offset);
 162        }
 163
 164        /* Verify that we can look up this name by hash. */
 165        xname.name = name;
 166        xname.len = namelen;
 167        xname.type = XFS_DIR3_FT_UNKNOWN;
 168
 169        error = xfs_dir_lookup(sdc->sc->tp, ip, &xname, &lookup_ino, NULL);
 170        /* ENOENT means the hash lookup failed and the dir is corrupt */
 171        if (error == -ENOENT)
 172                error = -EFSCORRUPTED;
 173        if (!xchk_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset,
 174                        &error))
 175                goto out;
 176        if (lookup_ino != ino) {
 177                xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset);
 178                goto out;
 179        }
 180
 181        /* Verify the file type.  This function absorbs error codes. */
 182        if (!checked_ftype) {
 183                error = xchk_dir_check_ftype(sdc, offset, lookup_ino, type);
 184                if (error)
 185                        goto out;
 186        }
 187out:
 188        /*
 189         * A negative error code returned here is supposed to cause the
 190         * dir_emit caller (xfs_readdir) to abort the directory iteration
 191         * and return zero to xchk_directory.
 192         */
 193        if (error == 0 && sdc->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 194                return -EFSCORRUPTED;
 195        return error;
 196}
 197
 198/* Scrub a directory btree record. */
 199STATIC int
 200xchk_dir_rec(
 201        struct xchk_da_btree            *ds,
 202        int                             level)
 203{
 204        struct xfs_da_state_blk         *blk = &ds->state->path.blk[level];
 205        struct xfs_mount                *mp = ds->state->mp;
 206        struct xfs_inode                *dp = ds->dargs.dp;
 207        struct xfs_da_geometry          *geo = mp->m_dir_geo;
 208        struct xfs_dir2_data_entry      *dent;
 209        struct xfs_buf                  *bp;
 210        struct xfs_dir2_leaf_entry      *ent;
 211        unsigned int                    end;
 212        unsigned int                    iter_off;
 213        xfs_ino_t                       ino;
 214        xfs_dablk_t                     rec_bno;
 215        xfs_dir2_db_t                   db;
 216        xfs_dir2_data_aoff_t            off;
 217        xfs_dir2_dataptr_t              ptr;
 218        xfs_dahash_t                    calc_hash;
 219        xfs_dahash_t                    hash;
 220        struct xfs_dir3_icleaf_hdr      hdr;
 221        unsigned int                    tag;
 222        int                             error;
 223
 224        ASSERT(blk->magic == XFS_DIR2_LEAF1_MAGIC ||
 225               blk->magic == XFS_DIR2_LEAFN_MAGIC);
 226
 227        xfs_dir2_leaf_hdr_from_disk(mp, &hdr, blk->bp->b_addr);
 228        ent = hdr.ents + blk->index;
 229
 230        /* Check the hash of the entry. */
 231        error = xchk_da_btree_hash(ds, level, &ent->hashval);
 232        if (error)
 233                goto out;
 234
 235        /* Valid hash pointer? */
 236        ptr = be32_to_cpu(ent->address);
 237        if (ptr == 0)
 238                return 0;
 239
 240        /* Find the directory entry's location. */
 241        db = xfs_dir2_dataptr_to_db(geo, ptr);
 242        off = xfs_dir2_dataptr_to_off(geo, ptr);
 243        rec_bno = xfs_dir2_db_to_da(geo, db);
 244
 245        if (rec_bno >= geo->leafblk) {
 246                xchk_da_set_corrupt(ds, level);
 247                goto out;
 248        }
 249        error = xfs_dir3_data_read(ds->dargs.trans, dp, rec_bno,
 250                        XFS_DABUF_MAP_HOLE_OK, &bp);
 251        if (!xchk_fblock_process_error(ds->sc, XFS_DATA_FORK, rec_bno,
 252                        &error))
 253                goto out;
 254        if (!bp) {
 255                xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
 256                goto out;
 257        }
 258        xchk_buffer_recheck(ds->sc, bp);
 259
 260        if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 261                goto out_relse;
 262
 263        dent = bp->b_addr + off;
 264
 265        /* Make sure we got a real directory entry. */
 266        iter_off = geo->data_entry_offset;
 267        end = xfs_dir3_data_end_offset(geo, bp->b_addr);
 268        if (!end) {
 269                xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
 270                goto out_relse;
 271        }
 272        for (;;) {
 273                struct xfs_dir2_data_entry      *dep = bp->b_addr + iter_off;
 274                struct xfs_dir2_data_unused     *dup = bp->b_addr + iter_off;
 275
 276                if (iter_off >= end) {
 277                        xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
 278                        goto out_relse;
 279                }
 280
 281                if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
 282                        iter_off += be16_to_cpu(dup->length);
 283                        continue;
 284                }
 285                if (dep == dent)
 286                        break;
 287                iter_off += xfs_dir2_data_entsize(mp, dep->namelen);
 288        }
 289
 290        /* Retrieve the entry, sanity check it, and compare hashes. */
 291        ino = be64_to_cpu(dent->inumber);
 292        hash = be32_to_cpu(ent->hashval);
 293        tag = be16_to_cpup(xfs_dir2_data_entry_tag_p(mp, dent));
 294        if (!xfs_verify_dir_ino(mp, ino) || tag != off)
 295                xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
 296        if (dent->namelen == 0) {
 297                xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
 298                goto out_relse;
 299        }
 300        calc_hash = xfs_da_hashname(dent->name, dent->namelen);
 301        if (calc_hash != hash)
 302                xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
 303
 304out_relse:
 305        xfs_trans_brelse(ds->dargs.trans, bp);
 306out:
 307        return error;
 308}
 309
 310/*
 311 * Is this unused entry either in the bestfree or smaller than all of
 312 * them?  We've already checked that the bestfrees are sorted longest to
 313 * shortest, and that there aren't any bogus entries.
 314 */
 315STATIC void
 316xchk_directory_check_free_entry(
 317        struct xfs_scrub                *sc,
 318        xfs_dablk_t                     lblk,
 319        struct xfs_dir2_data_free       *bf,
 320        struct xfs_dir2_data_unused     *dup)
 321{
 322        struct xfs_dir2_data_free       *dfp;
 323        unsigned int                    dup_length;
 324
 325        dup_length = be16_to_cpu(dup->length);
 326
 327        /* Unused entry is shorter than any of the bestfrees */
 328        if (dup_length < be16_to_cpu(bf[XFS_DIR2_DATA_FD_COUNT - 1].length))
 329                return;
 330
 331        for (dfp = &bf[XFS_DIR2_DATA_FD_COUNT - 1]; dfp >= bf; dfp--)
 332                if (dup_length == be16_to_cpu(dfp->length))
 333                        return;
 334
 335        /* Unused entry should be in the bestfrees but wasn't found. */
 336        xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 337}
 338
 339/* Check free space info in a directory data block. */
 340STATIC int
 341xchk_directory_data_bestfree(
 342        struct xfs_scrub                *sc,
 343        xfs_dablk_t                     lblk,
 344        bool                            is_block)
 345{
 346        struct xfs_dir2_data_unused     *dup;
 347        struct xfs_dir2_data_free       *dfp;
 348        struct xfs_buf                  *bp;
 349        struct xfs_dir2_data_free       *bf;
 350        struct xfs_mount                *mp = sc->mp;
 351        u16                             tag;
 352        unsigned int                    nr_bestfrees = 0;
 353        unsigned int                    nr_frees = 0;
 354        unsigned int                    smallest_bestfree;
 355        int                             newlen;
 356        unsigned int                    offset;
 357        unsigned int                    end;
 358        int                             error;
 359
 360        if (is_block) {
 361                /* dir block format */
 362                if (lblk != XFS_B_TO_FSBT(mp, XFS_DIR2_DATA_OFFSET))
 363                        xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 364                error = xfs_dir3_block_read(sc->tp, sc->ip, &bp);
 365        } else {
 366                /* dir data format */
 367                error = xfs_dir3_data_read(sc->tp, sc->ip, lblk, 0, &bp);
 368        }
 369        if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
 370                goto out;
 371        xchk_buffer_recheck(sc, bp);
 372
 373        /* XXX: Check xfs_dir3_data_hdr.pad is zero once we start setting it. */
 374
 375        if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 376                goto out_buf;
 377
 378        /* Do the bestfrees correspond to actual free space? */
 379        bf = xfs_dir2_data_bestfree_p(mp, bp->b_addr);
 380        smallest_bestfree = UINT_MAX;
 381        for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) {
 382                offset = be16_to_cpu(dfp->offset);
 383                if (offset == 0)
 384                        continue;
 385                if (offset >= mp->m_dir_geo->blksize) {
 386                        xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 387                        goto out_buf;
 388                }
 389                dup = bp->b_addr + offset;
 390                tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup));
 391
 392                /* bestfree doesn't match the entry it points at? */
 393                if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG) ||
 394                    be16_to_cpu(dup->length) != be16_to_cpu(dfp->length) ||
 395                    tag != offset) {
 396                        xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 397                        goto out_buf;
 398                }
 399
 400                /* bestfree records should be ordered largest to smallest */
 401                if (smallest_bestfree < be16_to_cpu(dfp->length)) {
 402                        xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 403                        goto out_buf;
 404                }
 405
 406                smallest_bestfree = be16_to_cpu(dfp->length);
 407                nr_bestfrees++;
 408        }
 409
 410        /* Make sure the bestfrees are actually the best free spaces. */
 411        offset = mp->m_dir_geo->data_entry_offset;
 412        end = xfs_dir3_data_end_offset(mp->m_dir_geo, bp->b_addr);
 413
 414        /* Iterate the entries, stopping when we hit or go past the end. */
 415        while (offset < end) {
 416                dup = bp->b_addr + offset;
 417
 418                /* Skip real entries */
 419                if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG)) {
 420                        struct xfs_dir2_data_entry *dep = bp->b_addr + offset;
 421
 422                        newlen = xfs_dir2_data_entsize(mp, dep->namelen);
 423                        if (newlen <= 0) {
 424                                xchk_fblock_set_corrupt(sc, XFS_DATA_FORK,
 425                                                lblk);
 426                                goto out_buf;
 427                        }
 428                        offset += newlen;
 429                        continue;
 430                }
 431
 432                /* Spot check this free entry */
 433                tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup));
 434                if (tag != offset) {
 435                        xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 436                        goto out_buf;
 437                }
 438
 439                /*
 440                 * Either this entry is a bestfree or it's smaller than
 441                 * any of the bestfrees.
 442                 */
 443                xchk_directory_check_free_entry(sc, lblk, bf, dup);
 444                if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 445                        goto out_buf;
 446
 447                /* Move on. */
 448                newlen = be16_to_cpu(dup->length);
 449                if (newlen <= 0) {
 450                        xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 451                        goto out_buf;
 452                }
 453                offset += newlen;
 454                if (offset <= end)
 455                        nr_frees++;
 456        }
 457
 458        /* We're required to fill all the space. */
 459        if (offset != end)
 460                xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 461
 462        /* Did we see at least as many free slots as there are bestfrees? */
 463        if (nr_frees < nr_bestfrees)
 464                xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 465out_buf:
 466        xfs_trans_brelse(sc->tp, bp);
 467out:
 468        return error;
 469}
 470
 471/*
 472 * Does the free space length in the free space index block ($len) match
 473 * the longest length in the directory data block's bestfree array?
 474 * Assume that we've already checked that the data block's bestfree
 475 * array is in order.
 476 */
 477STATIC void
 478xchk_directory_check_freesp(
 479        struct xfs_scrub                *sc,
 480        xfs_dablk_t                     lblk,
 481        struct xfs_buf                  *dbp,
 482        unsigned int                    len)
 483{
 484        struct xfs_dir2_data_free       *dfp;
 485
 486        dfp = xfs_dir2_data_bestfree_p(sc->mp, dbp->b_addr);
 487
 488        if (len != be16_to_cpu(dfp->length))
 489                xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 490
 491        if (len > 0 && be16_to_cpu(dfp->offset) == 0)
 492                xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 493}
 494
 495/* Check free space info in a directory leaf1 block. */
 496STATIC int
 497xchk_directory_leaf1_bestfree(
 498        struct xfs_scrub                *sc,
 499        struct xfs_da_args              *args,
 500        xfs_dir2_db_t                   last_data_db,
 501        xfs_dablk_t                     lblk)
 502{
 503        struct xfs_dir3_icleaf_hdr      leafhdr;
 504        struct xfs_dir2_leaf_tail       *ltp;
 505        struct xfs_dir2_leaf            *leaf;
 506        struct xfs_buf                  *dbp;
 507        struct xfs_buf                  *bp;
 508        struct xfs_da_geometry          *geo = sc->mp->m_dir_geo;
 509        __be16                          *bestp;
 510        __u16                           best;
 511        __u32                           hash;
 512        __u32                           lasthash = 0;
 513        __u32                           bestcount;
 514        unsigned int                    stale = 0;
 515        int                             i;
 516        int                             error;
 517
 518        /* Read the free space block. */
 519        error = xfs_dir3_leaf_read(sc->tp, sc->ip, lblk, &bp);
 520        if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
 521                return error;
 522        xchk_buffer_recheck(sc, bp);
 523
 524        leaf = bp->b_addr;
 525        xfs_dir2_leaf_hdr_from_disk(sc->ip->i_mount, &leafhdr, leaf);
 526        ltp = xfs_dir2_leaf_tail_p(geo, leaf);
 527        bestcount = be32_to_cpu(ltp->bestcount);
 528        bestp = xfs_dir2_leaf_bests_p(ltp);
 529
 530        if (xfs_has_crc(sc->mp)) {
 531                struct xfs_dir3_leaf_hdr        *hdr3 = bp->b_addr;
 532
 533                if (hdr3->pad != cpu_to_be32(0))
 534                        xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 535        }
 536
 537        /*
 538         * There must be enough bestfree slots to cover all the directory data
 539         * blocks that we scanned.  It is possible for there to be a hole
 540         * between the last data block and i_disk_size.  This seems like an
 541         * oversight to the scrub author, but as we have been writing out
 542         * directories like this (and xfs_repair doesn't mind them) for years,
 543         * that's what we have to check.
 544         */
 545        if (bestcount != last_data_db + 1) {
 546                xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 547                goto out;
 548        }
 549
 550        /* Is the leaf count even remotely sane? */
 551        if (leafhdr.count > geo->leaf_max_ents) {
 552                xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 553                goto out;
 554        }
 555
 556        /* Leaves and bests don't overlap in leaf format. */
 557        if ((char *)&leafhdr.ents[leafhdr.count] > (char *)bestp) {
 558                xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 559                goto out;
 560        }
 561
 562        /* Check hash value order, count stale entries.  */
 563        for (i = 0; i < leafhdr.count; i++) {
 564                hash = be32_to_cpu(leafhdr.ents[i].hashval);
 565                if (i > 0 && lasthash > hash)
 566                        xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 567                lasthash = hash;
 568                if (leafhdr.ents[i].address ==
 569                    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
 570                        stale++;
 571        }
 572        if (leafhdr.stale != stale)
 573                xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 574        if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 575                goto out;
 576
 577        /* Check all the bestfree entries. */
 578        for (i = 0; i < bestcount; i++, bestp++) {
 579                best = be16_to_cpu(*bestp);
 580                error = xfs_dir3_data_read(sc->tp, sc->ip,
 581                                xfs_dir2_db_to_da(args->geo, i),
 582                                XFS_DABUF_MAP_HOLE_OK,
 583                                &dbp);
 584                if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk,
 585                                &error))
 586                        break;
 587
 588                if (!dbp) {
 589                        if (best != NULLDATAOFF) {
 590                                xchk_fblock_set_corrupt(sc, XFS_DATA_FORK,
 591                                                lblk);
 592                                break;
 593                        }
 594                        continue;
 595                }
 596
 597                if (best == NULLDATAOFF)
 598                        xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 599                else
 600                        xchk_directory_check_freesp(sc, lblk, dbp, best);
 601                xfs_trans_brelse(sc->tp, dbp);
 602                if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 603                        break;
 604        }
 605out:
 606        xfs_trans_brelse(sc->tp, bp);
 607        return error;
 608}
 609
 610/* Check free space info in a directory freespace block. */
 611STATIC int
 612xchk_directory_free_bestfree(
 613        struct xfs_scrub                *sc,
 614        struct xfs_da_args              *args,
 615        xfs_dablk_t                     lblk)
 616{
 617        struct xfs_dir3_icfree_hdr      freehdr;
 618        struct xfs_buf                  *dbp;
 619        struct xfs_buf                  *bp;
 620        __u16                           best;
 621        unsigned int                    stale = 0;
 622        int                             i;
 623        int                             error;
 624
 625        /* Read the free space block */
 626        error = xfs_dir2_free_read(sc->tp, sc->ip, lblk, &bp);
 627        if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
 628                return error;
 629        xchk_buffer_recheck(sc, bp);
 630
 631        if (xfs_has_crc(sc->mp)) {
 632                struct xfs_dir3_free_hdr        *hdr3 = bp->b_addr;
 633
 634                if (hdr3->pad != cpu_to_be32(0))
 635                        xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 636        }
 637
 638        /* Check all the entries. */
 639        xfs_dir2_free_hdr_from_disk(sc->ip->i_mount, &freehdr, bp->b_addr);
 640        for (i = 0; i < freehdr.nvalid; i++) {
 641                best = be16_to_cpu(freehdr.bests[i]);
 642                if (best == NULLDATAOFF) {
 643                        stale++;
 644                        continue;
 645                }
 646                error = xfs_dir3_data_read(sc->tp, sc->ip,
 647                                (freehdr.firstdb + i) * args->geo->fsbcount,
 648                                0, &dbp);
 649                if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk,
 650                                &error))
 651                        goto out;
 652                xchk_directory_check_freesp(sc, lblk, dbp, best);
 653                xfs_trans_brelse(sc->tp, dbp);
 654        }
 655
 656        if (freehdr.nused + stale != freehdr.nvalid)
 657                xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 658out:
 659        xfs_trans_brelse(sc->tp, bp);
 660        return error;
 661}
 662
 663/* Check free space information in directories. */
 664STATIC int
 665xchk_directory_blocks(
 666        struct xfs_scrub        *sc)
 667{
 668        struct xfs_bmbt_irec    got;
 669        struct xfs_da_args      args;
 670        struct xfs_ifork        *ifp = XFS_IFORK_PTR(sc->ip, XFS_DATA_FORK);
 671        struct xfs_mount        *mp = sc->mp;
 672        xfs_fileoff_t           leaf_lblk;
 673        xfs_fileoff_t           free_lblk;
 674        xfs_fileoff_t           lblk;
 675        struct xfs_iext_cursor  icur;
 676        xfs_dablk_t             dabno;
 677        xfs_dir2_db_t           last_data_db = 0;
 678        bool                    found;
 679        int                     is_block = 0;
 680        int                     error;
 681
 682        /* Ignore local format directories. */
 683        if (ifp->if_format != XFS_DINODE_FMT_EXTENTS &&
 684            ifp->if_format != XFS_DINODE_FMT_BTREE)
 685                return 0;
 686
 687        lblk = XFS_B_TO_FSB(mp, XFS_DIR2_DATA_OFFSET);
 688        leaf_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_LEAF_OFFSET);
 689        free_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_FREE_OFFSET);
 690
 691        /* Is this a block dir? */
 692        args.dp = sc->ip;
 693        args.geo = mp->m_dir_geo;
 694        args.trans = sc->tp;
 695        error = xfs_dir2_isblock(&args, &is_block);
 696        if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
 697                goto out;
 698
 699        /* Iterate all the data extents in the directory... */
 700        found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
 701        while (found && !(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
 702                /* No more data blocks... */
 703                if (got.br_startoff >= leaf_lblk)
 704                        break;
 705
 706                /*
 707                 * Check each data block's bestfree data.
 708                 *
 709                 * Iterate all the fsbcount-aligned block offsets in
 710                 * this directory.  The directory block reading code is
 711                 * smart enough to do its own bmap lookups to handle
 712                 * discontiguous directory blocks.  When we're done
 713                 * with the extent record, re-query the bmap at the
 714                 * next fsbcount-aligned offset to avoid redundant
 715                 * block checks.
 716                 */
 717                for (lblk = roundup((xfs_dablk_t)got.br_startoff,
 718                                args.geo->fsbcount);
 719                     lblk < got.br_startoff + got.br_blockcount;
 720                     lblk += args.geo->fsbcount) {
 721                        last_data_db = xfs_dir2_da_to_db(args.geo, lblk);
 722                        error = xchk_directory_data_bestfree(sc, lblk,
 723                                        is_block);
 724                        if (error)
 725                                goto out;
 726                }
 727                dabno = got.br_startoff + got.br_blockcount;
 728                lblk = roundup(dabno, args.geo->fsbcount);
 729                found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
 730        }
 731
 732        if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 733                goto out;
 734
 735        /* Look for a leaf1 block, which has free info. */
 736        if (xfs_iext_lookup_extent(sc->ip, ifp, leaf_lblk, &icur, &got) &&
 737            got.br_startoff == leaf_lblk &&
 738            got.br_blockcount == args.geo->fsbcount &&
 739            !xfs_iext_next_extent(ifp, &icur, &got)) {
 740                if (is_block) {
 741                        xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 742                        goto out;
 743                }
 744                error = xchk_directory_leaf1_bestfree(sc, &args, last_data_db,
 745                                leaf_lblk);
 746                if (error)
 747                        goto out;
 748        }
 749
 750        if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 751                goto out;
 752
 753        /* Scan for free blocks */
 754        lblk = free_lblk;
 755        found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
 756        while (found && !(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
 757                /*
 758                 * Dirs can't have blocks mapped above 2^32.
 759                 * Single-block dirs shouldn't even be here.
 760                 */
 761                lblk = got.br_startoff;
 762                if (lblk & ~0xFFFFFFFFULL) {
 763                        xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 764                        goto out;
 765                }
 766                if (is_block) {
 767                        xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 768                        goto out;
 769                }
 770
 771                /*
 772                 * Check each dir free block's bestfree data.
 773                 *
 774                 * Iterate all the fsbcount-aligned block offsets in
 775                 * this directory.  The directory block reading code is
 776                 * smart enough to do its own bmap lookups to handle
 777                 * discontiguous directory blocks.  When we're done
 778                 * with the extent record, re-query the bmap at the
 779                 * next fsbcount-aligned offset to avoid redundant
 780                 * block checks.
 781                 */
 782                for (lblk = roundup((xfs_dablk_t)got.br_startoff,
 783                                args.geo->fsbcount);
 784                     lblk < got.br_startoff + got.br_blockcount;
 785                     lblk += args.geo->fsbcount) {
 786                        error = xchk_directory_free_bestfree(sc, &args,
 787                                        lblk);
 788                        if (error)
 789                                goto out;
 790                }
 791                dabno = got.br_startoff + got.br_blockcount;
 792                lblk = roundup(dabno, args.geo->fsbcount);
 793                found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
 794        }
 795out:
 796        return error;
 797}
 798
 799/* Scrub a whole directory. */
 800int
 801xchk_directory(
 802        struct xfs_scrub        *sc)
 803{
 804        struct xchk_dir_ctx     sdc = {
 805                .dir_iter.actor = xchk_dir_actor,
 806                .dir_iter.pos = 0,
 807                .sc = sc,
 808        };
 809        size_t                  bufsize;
 810        loff_t                  oldpos;
 811        int                     error = 0;
 812
 813        if (!S_ISDIR(VFS_I(sc->ip)->i_mode))
 814                return -ENOENT;
 815
 816        /* Plausible size? */
 817        if (sc->ip->i_disk_size < xfs_dir2_sf_hdr_size(0)) {
 818                xchk_ino_set_corrupt(sc, sc->ip->i_ino);
 819                goto out;
 820        }
 821
 822        /* Check directory tree structure */
 823        error = xchk_da_btree(sc, XFS_DATA_FORK, xchk_dir_rec, NULL);
 824        if (error)
 825                return error;
 826
 827        if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 828                return error;
 829
 830        /* Check the freespace. */
 831        error = xchk_directory_blocks(sc);
 832        if (error)
 833                return error;
 834
 835        if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 836                return error;
 837
 838        /*
 839         * Check that every dirent we see can also be looked up by hash.
 840         * Userspace usually asks for a 32k buffer, so we will too.
 841         */
 842        bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE,
 843                        sc->ip->i_disk_size);
 844
 845        /*
 846         * Look up every name in this directory by hash.
 847         *
 848         * Use the xfs_readdir function to call xchk_dir_actor on
 849         * every directory entry in this directory.  In _actor, we check
 850         * the name, inode number, and ftype (if applicable) of the
 851         * entry.  xfs_readdir uses the VFS filldir functions to provide
 852         * iteration context.
 853         *
 854         * The VFS grabs a read or write lock via i_rwsem before it reads
 855         * or writes to a directory.  If we've gotten this far we've
 856         * already obtained IOLOCK_EXCL, which (since 4.10) is the same as
 857         * getting a write lock on i_rwsem.  Therefore, it is safe for us
 858         * to drop the ILOCK here in order to reuse the _readdir and
 859         * _dir_lookup routines, which do their own ILOCK locking.
 860         */
 861        oldpos = 0;
 862        sc->ilock_flags &= ~XFS_ILOCK_EXCL;
 863        xfs_iunlock(sc->ip, XFS_ILOCK_EXCL);
 864        while (true) {
 865                error = xfs_readdir(sc->tp, sc->ip, &sdc.dir_iter, bufsize);
 866                if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0,
 867                                &error))
 868                        goto out;
 869                if (oldpos == sdc.dir_iter.pos)
 870                        break;
 871                oldpos = sdc.dir_iter.pos;
 872        }
 873
 874out:
 875        return error;
 876}
 877