linux/arch/x86/kernel/cpu/mcheck/mce-severity.c
<<
>>
Prefs
   1/*
   2 * MCE grading rules.
   3 * Copyright 2008, 2009 Intel Corporation.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License
   7 * as published by the Free Software Foundation; version 2
   8 * of the License.
   9 *
  10 * Author: Andi Kleen
  11 */
  12#include <linux/kernel.h>
  13#include <linux/seq_file.h>
  14#include <linux/init.h>
  15#include <linux/debugfs.h>
  16#include <asm/mce.h>
  17
  18#include "mce-internal.h"
  19
  20/*
  21 * Grade an mce by severity. In general the most severe ones are processed
  22 * first. Since there are quite a lot of combinations test the bits in a
  23 * table-driven way. The rules are simply processed in order, first
  24 * match wins.
  25 *
  26 * Note this is only used for machine check exceptions, the corrected
  27 * errors use much simpler rules. The exceptions still check for the corrected
  28 * errors, but only to leave them alone for the CMCI handler (except for
  29 * panic situations)
  30 */
  31
  32enum context { IN_KERNEL = 1, IN_USER = 2 };
  33enum ser { SER_REQUIRED = 1, NO_SER = 2 };
  34
  35static struct severity {
  36        u64 mask;
  37        u64 result;
  38        unsigned char sev;
  39        unsigned char mcgmask;
  40        unsigned char mcgres;
  41        unsigned char ser;
  42        unsigned char context;
  43        unsigned char covered;
  44        char *msg;
  45} severities[] = {
  46#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
  47#define  KERNEL         .context = IN_KERNEL
  48#define  USER           .context = IN_USER
  49#define  SER            .ser = SER_REQUIRED
  50#define  NOSER          .ser = NO_SER
  51#define  BITCLR(x)      .mask = x, .result = 0
  52#define  BITSET(x)      .mask = x, .result = x
  53#define  MCGMASK(x, y)  .mcgmask = x, .mcgres = y
  54#define  MASK(x, y)     .mask = x, .result = y
  55#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
  56#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
  57#define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
  58
  59        MCESEV(
  60                NO, "Invalid",
  61                BITCLR(MCI_STATUS_VAL)
  62                ),
  63        MCESEV(
  64                NO, "Not enabled",
  65                BITCLR(MCI_STATUS_EN)
  66                ),
  67        MCESEV(
  68                PANIC, "Processor context corrupt",
  69                BITSET(MCI_STATUS_PCC)
  70                ),
  71        /* When MCIP is not set something is very confused */
  72        MCESEV(
  73                PANIC, "MCIP not set in MCA handler",
  74                MCGMASK(MCG_STATUS_MCIP, 0)
  75                ),
  76        /* Neither return not error IP -- no chance to recover -> PANIC */
  77        MCESEV(
  78                PANIC, "Neither restart nor error IP",
  79                MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
  80                ),
  81        MCESEV(
  82                PANIC, "In kernel and no restart IP",
  83                KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
  84                ),
  85        MCESEV(
  86                KEEP, "Corrected error",
  87                NOSER, BITCLR(MCI_STATUS_UC)
  88                ),
  89
  90        /* ignore OVER for UCNA */
  91        MCESEV(
  92                KEEP, "Uncorrected no action required",
  93                SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
  94                ),
  95        MCESEV(
  96                PANIC, "Illegal combination (UCNA with AR=1)",
  97                SER,
  98                MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR)
  99                ),
 100        MCESEV(
 101                KEEP, "Non signalled machine check",
 102                SER, BITCLR(MCI_STATUS_S)
 103                ),
 104
 105        MCESEV(
 106                PANIC, "Action required with lost events",
 107                SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR)
 108                ),
 109
 110        /* known AR MCACODs: */
 111#ifdef  CONFIG_MEMORY_FAILURE
 112        MCESEV(
 113                KEEP, "Action required but unaffected thread is continuable",
 114                SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR),
 115                MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
 116                ),
 117        MCESEV(
 118                AR, "Action required: data load error in a user process",
 119                SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
 120                USER
 121                ),
 122        MCESEV(
 123                AR, "Action required: instruction fetch error in a user process",
 124                SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
 125                USER
 126                ),
 127#endif
 128        MCESEV(
 129                PANIC, "Action required: unknown MCACOD",
 130                SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
 131                ),
 132
 133        /* known AO MCACODs: */
 134        MCESEV(
 135                AO, "Action optional: memory scrubbing error",
 136                SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD_SCRUBMSK, MCI_UC_S|MCACOD_SCRUB)
 137                ),
 138        MCESEV(
 139                AO, "Action optional: last level cache writeback error",
 140                SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|MCACOD_L3WB)
 141                ),
 142        MCESEV(
 143                SOME, "Action optional: unknown MCACOD",
 144                SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S)
 145                ),
 146        MCESEV(
 147                SOME, "Action optional with lost events",
 148                SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_S)
 149                ),
 150
 151        MCESEV(
 152                PANIC, "Overflowed uncorrected",
 153                BITSET(MCI_STATUS_OVER|MCI_STATUS_UC)
 154                ),
 155        MCESEV(
 156                UC, "Uncorrected",
 157                BITSET(MCI_STATUS_UC)
 158                ),
 159        MCESEV(
 160                SOME, "No match",
 161                BITSET(0)
 162                )       /* always matches. keep at end */
 163};
 164
 165/*
 166 * If mcgstatus indicated that ip/cs on the stack were
 167 * no good, then "m->cs" will be zero and we will have
 168 * to assume the worst case (IN_KERNEL) as we actually
 169 * have no idea what we were executing when the machine
 170 * check hit.
 171 * If we do have a good "m->cs" (or a faked one in the
 172 * case we were executing in VM86 mode) we can use it to
 173 * distinguish an exception taken in user from from one
 174 * taken in the kernel.
 175 */
 176static int error_context(struct mce *m)
 177{
 178        return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
 179}
 180
 181int mce_severity(struct mce *m, int tolerant, char **msg)
 182{
 183        enum context ctx = error_context(m);
 184        struct severity *s;
 185
 186        for (s = severities;; s++) {
 187                if ((m->status & s->mask) != s->result)
 188                        continue;
 189                if ((m->mcgstatus & s->mcgmask) != s->mcgres)
 190                        continue;
 191                if (s->ser == SER_REQUIRED && !mca_cfg.ser)
 192                        continue;
 193                if (s->ser == NO_SER && mca_cfg.ser)
 194                        continue;
 195                if (s->context && ctx != s->context)
 196                        continue;
 197                if (msg)
 198                        *msg = s->msg;
 199                s->covered = 1;
 200                if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
 201                        if (panic_on_oops || tolerant < 1)
 202                                return MCE_PANIC_SEVERITY;
 203                }
 204                return s->sev;
 205        }
 206}
 207
 208#ifdef CONFIG_DEBUG_FS
 209static void *s_start(struct seq_file *f, loff_t *pos)
 210{
 211        if (*pos >= ARRAY_SIZE(severities))
 212                return NULL;
 213        return &severities[*pos];
 214}
 215
 216static void *s_next(struct seq_file *f, void *data, loff_t *pos)
 217{
 218        if (++(*pos) >= ARRAY_SIZE(severities))
 219                return NULL;
 220        return &severities[*pos];
 221}
 222
 223static void s_stop(struct seq_file *f, void *data)
 224{
 225}
 226
 227static int s_show(struct seq_file *f, void *data)
 228{
 229        struct severity *ser = data;
 230        seq_printf(f, "%d\t%s\n", ser->covered, ser->msg);
 231        return 0;
 232}
 233
 234static const struct seq_operations severities_seq_ops = {
 235        .start  = s_start,
 236        .next   = s_next,
 237        .stop   = s_stop,
 238        .show   = s_show,
 239};
 240
 241static int severities_coverage_open(struct inode *inode, struct file *file)
 242{
 243        return seq_open(file, &severities_seq_ops);
 244}
 245
 246static ssize_t severities_coverage_write(struct file *file,
 247                                         const char __user *ubuf,
 248                                         size_t count, loff_t *ppos)
 249{
 250        int i;
 251        for (i = 0; i < ARRAY_SIZE(severities); i++)
 252                severities[i].covered = 0;
 253        return count;
 254}
 255
 256static const struct file_operations severities_coverage_fops = {
 257        .open           = severities_coverage_open,
 258        .release        = seq_release,
 259        .read           = seq_read,
 260        .write          = severities_coverage_write,
 261        .llseek         = seq_lseek,
 262};
 263
 264static int __init severities_debugfs_init(void)
 265{
 266        struct dentry *dmce, *fsev;
 267
 268        dmce = mce_get_debugfs_dir();
 269        if (!dmce)
 270                goto err_out;
 271
 272        fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL,
 273                                   &severities_coverage_fops);
 274        if (!fsev)
 275                goto err_out;
 276
 277        return 0;
 278
 279err_out:
 280        return -ENOMEM;
 281}
 282late_initcall(severities_debugfs_init);
 283#endif /* CONFIG_DEBUG_FS */
 284