linux/fs/xfs/xfs_health.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * Copyright (C) 2019 Oracle.  All Rights Reserved.
   4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_log_format.h"
  11#include "xfs_trans_resv.h"
  12#include "xfs_mount.h"
  13#include "xfs_inode.h"
  14#include "xfs_trace.h"
  15#include "xfs_health.h"
  16#include "xfs_ag.h"
  17
  18/*
  19 * Warn about metadata corruption that we detected but haven't fixed, and
  20 * make sure we're not sitting on anything that would get in the way of
  21 * recovery.
  22 */
  23void
  24xfs_health_unmount(
  25        struct xfs_mount        *mp)
  26{
  27        struct xfs_perag        *pag;
  28        xfs_agnumber_t          agno;
  29        unsigned int            sick = 0;
  30        unsigned int            checked = 0;
  31        bool                    warn = false;
  32
  33        if (xfs_is_shutdown(mp))
  34                return;
  35
  36        /* Measure AG corruption levels. */
  37        for_each_perag(mp, agno, pag) {
  38                xfs_ag_measure_sickness(pag, &sick, &checked);
  39                if (sick) {
  40                        trace_xfs_ag_unfixed_corruption(mp, agno, sick);
  41                        warn = true;
  42                }
  43        }
  44
  45        /* Measure realtime volume corruption levels. */
  46        xfs_rt_measure_sickness(mp, &sick, &checked);
  47        if (sick) {
  48                trace_xfs_rt_unfixed_corruption(mp, sick);
  49                warn = true;
  50        }
  51
  52        /*
  53         * Measure fs corruption and keep the sample around for the warning.
  54         * See the note below for why we exempt FS_COUNTERS.
  55         */
  56        xfs_fs_measure_sickness(mp, &sick, &checked);
  57        if (sick & ~XFS_SICK_FS_COUNTERS) {
  58                trace_xfs_fs_unfixed_corruption(mp, sick);
  59                warn = true;
  60        }
  61
  62        if (warn) {
  63                xfs_warn(mp,
  64"Uncorrected metadata errors detected; please run xfs_repair.");
  65
  66                /*
  67                 * We discovered uncorrected metadata problems at some point
  68                 * during this filesystem mount and have advised the
  69                 * administrator to run repair once the unmount completes.
  70                 *
  71                 * However, we must be careful -- when FSCOUNTERS are flagged
  72                 * unhealthy, the unmount procedure omits writing the clean
  73                 * unmount record to the log so that the next mount will run
  74                 * recovery and recompute the summary counters.  In other
  75                 * words, we leave a dirty log to get the counters fixed.
  76                 *
  77                 * Unfortunately, xfs_repair cannot recover dirty logs, so if
  78                 * there were filesystem problems, FSCOUNTERS was flagged, and
  79                 * the administrator takes our advice to run xfs_repair,
  80                 * they'll have to zap the log before repairing structures.
  81                 * We don't really want to encourage this, so we mark the
  82                 * FSCOUNTERS healthy so that a subsequent repair run won't see
  83                 * a dirty log.
  84                 */
  85                if (sick & XFS_SICK_FS_COUNTERS)
  86                        xfs_fs_mark_healthy(mp, XFS_SICK_FS_COUNTERS);
  87        }
  88}
  89
  90/* Mark unhealthy per-fs metadata. */
  91void
  92xfs_fs_mark_sick(
  93        struct xfs_mount        *mp,
  94        unsigned int            mask)
  95{
  96        ASSERT(!(mask & ~XFS_SICK_FS_PRIMARY));
  97        trace_xfs_fs_mark_sick(mp, mask);
  98
  99        spin_lock(&mp->m_sb_lock);
 100        mp->m_fs_sick |= mask;
 101        mp->m_fs_checked |= mask;
 102        spin_unlock(&mp->m_sb_lock);
 103}
 104
 105/* Mark a per-fs metadata healed. */
 106void
 107xfs_fs_mark_healthy(
 108        struct xfs_mount        *mp,
 109        unsigned int            mask)
 110{
 111        ASSERT(!(mask & ~XFS_SICK_FS_PRIMARY));
 112        trace_xfs_fs_mark_healthy(mp, mask);
 113
 114        spin_lock(&mp->m_sb_lock);
 115        mp->m_fs_sick &= ~mask;
 116        mp->m_fs_checked |= mask;
 117        spin_unlock(&mp->m_sb_lock);
 118}
 119
 120/* Sample which per-fs metadata are unhealthy. */
 121void
 122xfs_fs_measure_sickness(
 123        struct xfs_mount        *mp,
 124        unsigned int            *sick,
 125        unsigned int            *checked)
 126{
 127        spin_lock(&mp->m_sb_lock);
 128        *sick = mp->m_fs_sick;
 129        *checked = mp->m_fs_checked;
 130        spin_unlock(&mp->m_sb_lock);
 131}
 132
 133/* Mark unhealthy realtime metadata. */
 134void
 135xfs_rt_mark_sick(
 136        struct xfs_mount        *mp,
 137        unsigned int            mask)
 138{
 139        ASSERT(!(mask & ~XFS_SICK_RT_PRIMARY));
 140        trace_xfs_rt_mark_sick(mp, mask);
 141
 142        spin_lock(&mp->m_sb_lock);
 143        mp->m_rt_sick |= mask;
 144        mp->m_rt_checked |= mask;
 145        spin_unlock(&mp->m_sb_lock);
 146}
 147
 148/* Mark a realtime metadata healed. */
 149void
 150xfs_rt_mark_healthy(
 151        struct xfs_mount        *mp,
 152        unsigned int            mask)
 153{
 154        ASSERT(!(mask & ~XFS_SICK_RT_PRIMARY));
 155        trace_xfs_rt_mark_healthy(mp, mask);
 156
 157        spin_lock(&mp->m_sb_lock);
 158        mp->m_rt_sick &= ~mask;
 159        mp->m_rt_checked |= mask;
 160        spin_unlock(&mp->m_sb_lock);
 161}
 162
 163/* Sample which realtime metadata are unhealthy. */
 164void
 165xfs_rt_measure_sickness(
 166        struct xfs_mount        *mp,
 167        unsigned int            *sick,
 168        unsigned int            *checked)
 169{
 170        spin_lock(&mp->m_sb_lock);
 171        *sick = mp->m_rt_sick;
 172        *checked = mp->m_rt_checked;
 173        spin_unlock(&mp->m_sb_lock);
 174}
 175
 176/* Mark unhealthy per-ag metadata. */
 177void
 178xfs_ag_mark_sick(
 179        struct xfs_perag        *pag,
 180        unsigned int            mask)
 181{
 182        ASSERT(!(mask & ~XFS_SICK_AG_PRIMARY));
 183        trace_xfs_ag_mark_sick(pag->pag_mount, pag->pag_agno, mask);
 184
 185        spin_lock(&pag->pag_state_lock);
 186        pag->pag_sick |= mask;
 187        pag->pag_checked |= mask;
 188        spin_unlock(&pag->pag_state_lock);
 189}
 190
 191/* Mark per-ag metadata ok. */
 192void
 193xfs_ag_mark_healthy(
 194        struct xfs_perag        *pag,
 195        unsigned int            mask)
 196{
 197        ASSERT(!(mask & ~XFS_SICK_AG_PRIMARY));
 198        trace_xfs_ag_mark_healthy(pag->pag_mount, pag->pag_agno, mask);
 199
 200        spin_lock(&pag->pag_state_lock);
 201        pag->pag_sick &= ~mask;
 202        pag->pag_checked |= mask;
 203        spin_unlock(&pag->pag_state_lock);
 204}
 205
 206/* Sample which per-ag metadata are unhealthy. */
 207void
 208xfs_ag_measure_sickness(
 209        struct xfs_perag        *pag,
 210        unsigned int            *sick,
 211        unsigned int            *checked)
 212{
 213        spin_lock(&pag->pag_state_lock);
 214        *sick = pag->pag_sick;
 215        *checked = pag->pag_checked;
 216        spin_unlock(&pag->pag_state_lock);
 217}
 218
 219/* Mark the unhealthy parts of an inode. */
 220void
 221xfs_inode_mark_sick(
 222        struct xfs_inode        *ip,
 223        unsigned int            mask)
 224{
 225        ASSERT(!(mask & ~XFS_SICK_INO_PRIMARY));
 226        trace_xfs_inode_mark_sick(ip, mask);
 227
 228        spin_lock(&ip->i_flags_lock);
 229        ip->i_sick |= mask;
 230        ip->i_checked |= mask;
 231        spin_unlock(&ip->i_flags_lock);
 232
 233        /*
 234         * Keep this inode around so we don't lose the sickness report.  Scrub
 235         * grabs inodes with DONTCACHE assuming that most inode are ok, which
 236         * is not the case here.
 237         */
 238        spin_lock(&VFS_I(ip)->i_lock);
 239        VFS_I(ip)->i_state &= ~I_DONTCACHE;
 240        spin_unlock(&VFS_I(ip)->i_lock);
 241}
 242
 243/* Mark parts of an inode healed. */
 244void
 245xfs_inode_mark_healthy(
 246        struct xfs_inode        *ip,
 247        unsigned int            mask)
 248{
 249        ASSERT(!(mask & ~XFS_SICK_INO_PRIMARY));
 250        trace_xfs_inode_mark_healthy(ip, mask);
 251
 252        spin_lock(&ip->i_flags_lock);
 253        ip->i_sick &= ~mask;
 254        ip->i_checked |= mask;
 255        spin_unlock(&ip->i_flags_lock);
 256}
 257
 258/* Sample which parts of an inode are unhealthy. */
 259void
 260xfs_inode_measure_sickness(
 261        struct xfs_inode        *ip,
 262        unsigned int            *sick,
 263        unsigned int            *checked)
 264{
 265        spin_lock(&ip->i_flags_lock);
 266        *sick = ip->i_sick;
 267        *checked = ip->i_checked;
 268        spin_unlock(&ip->i_flags_lock);
 269}
 270
 271/* Mappings between internal sick masks and ioctl sick masks. */
 272
 273struct ioctl_sick_map {
 274        unsigned int            sick_mask;
 275        unsigned int            ioctl_mask;
 276};
 277
 278static const struct ioctl_sick_map fs_map[] = {
 279        { XFS_SICK_FS_COUNTERS, XFS_FSOP_GEOM_SICK_COUNTERS},
 280        { XFS_SICK_FS_UQUOTA,   XFS_FSOP_GEOM_SICK_UQUOTA },
 281        { XFS_SICK_FS_GQUOTA,   XFS_FSOP_GEOM_SICK_GQUOTA },
 282        { XFS_SICK_FS_PQUOTA,   XFS_FSOP_GEOM_SICK_PQUOTA },
 283        { 0, 0 },
 284};
 285
 286static const struct ioctl_sick_map rt_map[] = {
 287        { XFS_SICK_RT_BITMAP,   XFS_FSOP_GEOM_SICK_RT_BITMAP },
 288        { XFS_SICK_RT_SUMMARY,  XFS_FSOP_GEOM_SICK_RT_SUMMARY },
 289        { 0, 0 },
 290};
 291
 292static inline void
 293xfgeo_health_tick(
 294        struct xfs_fsop_geom            *geo,
 295        unsigned int                    sick,
 296        unsigned int                    checked,
 297        const struct ioctl_sick_map     *m)
 298{
 299        if (checked & m->sick_mask)
 300                geo->checked |= m->ioctl_mask;
 301        if (sick & m->sick_mask)
 302                geo->sick |= m->ioctl_mask;
 303}
 304
 305/* Fill out fs geometry health info. */
 306void
 307xfs_fsop_geom_health(
 308        struct xfs_mount                *mp,
 309        struct xfs_fsop_geom            *geo)
 310{
 311        const struct ioctl_sick_map     *m;
 312        unsigned int                    sick;
 313        unsigned int                    checked;
 314
 315        geo->sick = 0;
 316        geo->checked = 0;
 317
 318        xfs_fs_measure_sickness(mp, &sick, &checked);
 319        for (m = fs_map; m->sick_mask; m++)
 320                xfgeo_health_tick(geo, sick, checked, m);
 321
 322        xfs_rt_measure_sickness(mp, &sick, &checked);
 323        for (m = rt_map; m->sick_mask; m++)
 324                xfgeo_health_tick(geo, sick, checked, m);
 325}
 326
 327static const struct ioctl_sick_map ag_map[] = {
 328        { XFS_SICK_AG_SB,       XFS_AG_GEOM_SICK_SB },
 329        { XFS_SICK_AG_AGF,      XFS_AG_GEOM_SICK_AGF },
 330        { XFS_SICK_AG_AGFL,     XFS_AG_GEOM_SICK_AGFL },
 331        { XFS_SICK_AG_AGI,      XFS_AG_GEOM_SICK_AGI },
 332        { XFS_SICK_AG_BNOBT,    XFS_AG_GEOM_SICK_BNOBT },
 333        { XFS_SICK_AG_CNTBT,    XFS_AG_GEOM_SICK_CNTBT },
 334        { XFS_SICK_AG_INOBT,    XFS_AG_GEOM_SICK_INOBT },
 335        { XFS_SICK_AG_FINOBT,   XFS_AG_GEOM_SICK_FINOBT },
 336        { XFS_SICK_AG_RMAPBT,   XFS_AG_GEOM_SICK_RMAPBT },
 337        { XFS_SICK_AG_REFCNTBT, XFS_AG_GEOM_SICK_REFCNTBT },
 338        { 0, 0 },
 339};
 340
 341/* Fill out ag geometry health info. */
 342void
 343xfs_ag_geom_health(
 344        struct xfs_perag                *pag,
 345        struct xfs_ag_geometry          *ageo)
 346{
 347        const struct ioctl_sick_map     *m;
 348        unsigned int                    sick;
 349        unsigned int                    checked;
 350
 351        ageo->ag_sick = 0;
 352        ageo->ag_checked = 0;
 353
 354        xfs_ag_measure_sickness(pag, &sick, &checked);
 355        for (m = ag_map; m->sick_mask; m++) {
 356                if (checked & m->sick_mask)
 357                        ageo->ag_checked |= m->ioctl_mask;
 358                if (sick & m->sick_mask)
 359                        ageo->ag_sick |= m->ioctl_mask;
 360        }
 361}
 362
 363static const struct ioctl_sick_map ino_map[] = {
 364        { XFS_SICK_INO_CORE,    XFS_BS_SICK_INODE },
 365        { XFS_SICK_INO_BMBTD,   XFS_BS_SICK_BMBTD },
 366        { XFS_SICK_INO_BMBTA,   XFS_BS_SICK_BMBTA },
 367        { XFS_SICK_INO_BMBTC,   XFS_BS_SICK_BMBTC },
 368        { XFS_SICK_INO_DIR,     XFS_BS_SICK_DIR },
 369        { XFS_SICK_INO_XATTR,   XFS_BS_SICK_XATTR },
 370        { XFS_SICK_INO_SYMLINK, XFS_BS_SICK_SYMLINK },
 371        { XFS_SICK_INO_PARENT,  XFS_BS_SICK_PARENT },
 372        { 0, 0 },
 373};
 374
 375/* Fill out bulkstat health info. */
 376void
 377xfs_bulkstat_health(
 378        struct xfs_inode                *ip,
 379        struct xfs_bulkstat             *bs)
 380{
 381        const struct ioctl_sick_map     *m;
 382        unsigned int                    sick;
 383        unsigned int                    checked;
 384
 385        bs->bs_sick = 0;
 386        bs->bs_checked = 0;
 387
 388        xfs_inode_measure_sickness(ip, &sick, &checked);
 389        for (m = ino_map; m->sick_mask; m++) {
 390                if (checked & m->sick_mask)
 391                        bs->bs_checked |= m->ioctl_mask;
 392                if (sick & m->sick_mask)
 393                        bs->bs_sick |= m->ioctl_mask;
 394        }
 395}
 396