linux/arch/powerpc/platforms/pseries/ras.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Copyright (C) 2001 Dave Engebretsen IBM Corporation
   4 */
   5
   6#include <linux/sched.h>
   7#include <linux/interrupt.h>
   8#include <linux/irq.h>
   9#include <linux/of.h>
  10#include <linux/fs.h>
  11#include <linux/reboot.h>
  12#include <linux/irq_work.h>
  13
  14#include <asm/machdep.h>
  15#include <asm/rtas.h>
  16#include <asm/firmware.h>
  17#include <asm/mce.h>
  18
  19#include "pseries.h"
  20
  21static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX];
  22static DEFINE_SPINLOCK(ras_log_buf_lock);
  23
  24static int ras_check_exception_token;
  25
  26static void mce_process_errlog_event(struct irq_work *work);
  27static struct irq_work mce_errlog_process_work = {
  28        .func = mce_process_errlog_event,
  29};
  30
  31#define EPOW_SENSOR_TOKEN       9
  32#define EPOW_SENSOR_INDEX       0
  33
  34/* EPOW events counter variable */
  35static int num_epow_events;
  36
  37static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id);
  38static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
  39static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
  40
  41/* RTAS pseries MCE errorlog section. */
  42struct pseries_mc_errorlog {
  43        __be32  fru_id;
  44        __be32  proc_id;
  45        u8      error_type;
  46        /*
  47         * sub_err_type (1 byte). Bit fields depends on error_type
  48         *
  49         *   MSB0
  50         *   |
  51         *   V
  52         *   01234567
  53         *   XXXXXXXX
  54         *
  55         * For error_type == MC_ERROR_TYPE_UE
  56         *   XXXXXXXX
  57         *   X          1: Permanent or Transient UE.
  58         *    X         1: Effective address provided.
  59         *     X        1: Logical address provided.
  60         *      XX      2: Reserved.
  61         *        XXX   3: Type of UE error.
  62         *
  63         * For error_type != MC_ERROR_TYPE_UE
  64         *   XXXXXXXX
  65         *   X          1: Effective address provided.
  66         *    XXXXX     5: Reserved.
  67         *         XX   2: Type of SLB/ERAT/TLB error.
  68         */
  69        u8      sub_err_type;
  70        u8      reserved_1[6];
  71        __be64  effective_address;
  72        __be64  logical_address;
  73} __packed;
  74
  75/* RTAS pseries MCE error types */
  76#define MC_ERROR_TYPE_UE                0x00
  77#define MC_ERROR_TYPE_SLB               0x01
  78#define MC_ERROR_TYPE_ERAT              0x02
  79#define MC_ERROR_TYPE_UNKNOWN           0x03
  80#define MC_ERROR_TYPE_TLB               0x04
  81#define MC_ERROR_TYPE_D_CACHE           0x05
  82#define MC_ERROR_TYPE_I_CACHE           0x07
  83
  84/* RTAS pseries MCE error sub types */
  85#define MC_ERROR_UE_INDETERMINATE               0
  86#define MC_ERROR_UE_IFETCH                      1
  87#define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH      2
  88#define MC_ERROR_UE_LOAD_STORE                  3
  89#define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE  4
  90
  91#define UE_EFFECTIVE_ADDR_PROVIDED              0x40
  92#define UE_LOGICAL_ADDR_PROVIDED                0x20
  93
  94#define MC_ERROR_SLB_PARITY             0
  95#define MC_ERROR_SLB_MULTIHIT           1
  96#define MC_ERROR_SLB_INDETERMINATE      2
  97
  98#define MC_ERROR_ERAT_PARITY            1
  99#define MC_ERROR_ERAT_MULTIHIT          2
 100#define MC_ERROR_ERAT_INDETERMINATE     3
 101
 102#define MC_ERROR_TLB_PARITY             1
 103#define MC_ERROR_TLB_MULTIHIT           2
 104#define MC_ERROR_TLB_INDETERMINATE      3
 105
 106static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
 107{
 108        switch (mlog->error_type) {
 109        case    MC_ERROR_TYPE_UE:
 110                return (mlog->sub_err_type & 0x07);
 111        case    MC_ERROR_TYPE_SLB:
 112        case    MC_ERROR_TYPE_ERAT:
 113        case    MC_ERROR_TYPE_TLB:
 114                return (mlog->sub_err_type & 0x03);
 115        default:
 116                return 0;
 117        }
 118}
 119
 120/*
 121 * Enable the hotplug interrupt late because processing them may touch other
 122 * devices or systems (e.g. hugepages) that have not been initialized at the
 123 * subsys stage.
 124 */
 125static int __init init_ras_hotplug_IRQ(void)
 126{
 127        struct device_node *np;
 128
 129        /* Hotplug Events */
 130        np = of_find_node_by_path("/event-sources/hot-plug-events");
 131        if (np != NULL) {
 132                if (dlpar_workqueue_init() == 0)
 133                        request_event_sources_irqs(np, ras_hotplug_interrupt,
 134                                                   "RAS_HOTPLUG");
 135                of_node_put(np);
 136        }
 137
 138        return 0;
 139}
 140machine_late_initcall(pseries, init_ras_hotplug_IRQ);
 141
 142/*
 143 * Initialize handlers for the set of interrupts caused by hardware errors
 144 * and power system events.
 145 */
 146static int __init init_ras_IRQ(void)
 147{
 148        struct device_node *np;
 149
 150        ras_check_exception_token = rtas_token("check-exception");
 151
 152        /* Internal Errors */
 153        np = of_find_node_by_path("/event-sources/internal-errors");
 154        if (np != NULL) {
 155                request_event_sources_irqs(np, ras_error_interrupt,
 156                                           "RAS_ERROR");
 157                of_node_put(np);
 158        }
 159
 160        /* EPOW Events */
 161        np = of_find_node_by_path("/event-sources/epow-events");
 162        if (np != NULL) {
 163                request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW");
 164                of_node_put(np);
 165        }
 166
 167        return 0;
 168}
 169machine_subsys_initcall(pseries, init_ras_IRQ);
 170
 171#define EPOW_SHUTDOWN_NORMAL                            1
 172#define EPOW_SHUTDOWN_ON_UPS                            2
 173#define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS        3
 174#define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH      4
 175
 176static void handle_system_shutdown(char event_modifier)
 177{
 178        switch (event_modifier) {
 179        case EPOW_SHUTDOWN_NORMAL:
 180                pr_emerg("Power off requested\n");
 181                orderly_poweroff(true);
 182                break;
 183
 184        case EPOW_SHUTDOWN_ON_UPS:
 185                pr_emerg("Loss of system power detected. System is running on"
 186                         " UPS/battery. Check RTAS error log for details\n");
 187                break;
 188
 189        case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
 190                pr_emerg("Loss of system critical functions detected. Check"
 191                         " RTAS error log for details\n");
 192                orderly_poweroff(true);
 193                break;
 194
 195        case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
 196                pr_emerg("High ambient temperature detected. Check RTAS"
 197                         " error log for details\n");
 198                orderly_poweroff(true);
 199                break;
 200
 201        default:
 202                pr_err("Unknown power/cooling shutdown event (modifier = %d)\n",
 203                        event_modifier);
 204        }
 205}
 206
 207struct epow_errorlog {
 208        unsigned char sensor_value;
 209        unsigned char event_modifier;
 210        unsigned char extended_modifier;
 211        unsigned char reserved;
 212        unsigned char platform_reason;
 213};
 214
 215#define EPOW_RESET                      0
 216#define EPOW_WARN_COOLING               1
 217#define EPOW_WARN_POWER                 2
 218#define EPOW_SYSTEM_SHUTDOWN            3
 219#define EPOW_SYSTEM_HALT                4
 220#define EPOW_MAIN_ENCLOSURE             5
 221#define EPOW_POWER_OFF                  7
 222
 223static void rtas_parse_epow_errlog(struct rtas_error_log *log)
 224{
 225        struct pseries_errorlog *pseries_log;
 226        struct epow_errorlog *epow_log;
 227        char action_code;
 228        char modifier;
 229
 230        pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW);
 231        if (pseries_log == NULL)
 232                return;
 233
 234        epow_log = (struct epow_errorlog *)pseries_log->data;
 235        action_code = epow_log->sensor_value & 0xF;     /* bottom 4 bits */
 236        modifier = epow_log->event_modifier & 0xF;      /* bottom 4 bits */
 237
 238        switch (action_code) {
 239        case EPOW_RESET:
 240                if (num_epow_events) {
 241                        pr_info("Non critical power/cooling issue cleared\n");
 242                        num_epow_events--;
 243                }
 244                break;
 245
 246        case EPOW_WARN_COOLING:
 247                pr_info("Non-critical cooling issue detected. Check RTAS error"
 248                        " log for details\n");
 249                break;
 250
 251        case EPOW_WARN_POWER:
 252                pr_info("Non-critical power issue detected. Check RTAS error"
 253                        " log for details\n");
 254                break;
 255
 256        case EPOW_SYSTEM_SHUTDOWN:
 257                handle_system_shutdown(modifier);
 258                break;
 259
 260        case EPOW_SYSTEM_HALT:
 261                pr_emerg("Critical power/cooling issue detected. Check RTAS"
 262                         " error log for details. Powering off.\n");
 263                orderly_poweroff(true);
 264                break;
 265
 266        case EPOW_MAIN_ENCLOSURE:
 267        case EPOW_POWER_OFF:
 268                pr_emerg("System about to lose power. Check RTAS error log "
 269                         " for details. Powering off immediately.\n");
 270                emergency_sync();
 271                kernel_power_off();
 272                break;
 273
 274        default:
 275                pr_err("Unknown power/cooling event (action code  = %d)\n",
 276                        action_code);
 277        }
 278
 279        /* Increment epow events counter variable */
 280        if (action_code != EPOW_RESET)
 281                num_epow_events++;
 282}
 283
 284static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id)
 285{
 286        struct pseries_errorlog *pseries_log;
 287        struct pseries_hp_errorlog *hp_elog;
 288
 289        spin_lock(&ras_log_buf_lock);
 290
 291        rtas_call(ras_check_exception_token, 6, 1, NULL,
 292                  RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq),
 293                  RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf),
 294                  rtas_get_error_log_max());
 295
 296        pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf,
 297                                           PSERIES_ELOG_SECT_ID_HOTPLUG);
 298        hp_elog = (struct pseries_hp_errorlog *)pseries_log->data;
 299
 300        /*
 301         * Since PCI hotplug is not currently supported on pseries, put PCI
 302         * hotplug events on the ras_log_buf to be handled by rtas_errd.
 303         */
 304        if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM ||
 305            hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU ||
 306            hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM)
 307                queue_hotplug_event(hp_elog);
 308        else
 309                log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
 310
 311        spin_unlock(&ras_log_buf_lock);
 312        return IRQ_HANDLED;
 313}
 314
 315/* Handle environmental and power warning (EPOW) interrupts. */
 316static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
 317{
 318        int state;
 319        int critical;
 320
 321        rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state);
 322
 323        if (state > 3)
 324                critical = 1;           /* Time Critical */
 325        else
 326                critical = 0;
 327
 328        spin_lock(&ras_log_buf_lock);
 329
 330        rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT,
 331                  virq_to_hw(irq), RTAS_EPOW_WARNING, critical, __pa(&ras_log_buf),
 332                  rtas_get_error_log_max());
 333
 334        log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
 335
 336        rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf);
 337
 338        spin_unlock(&ras_log_buf_lock);
 339        return IRQ_HANDLED;
 340}
 341
 342/*
 343 * Handle hardware error interrupts.
 344 *
 345 * RTAS check-exception is called to collect data on the exception.  If
 346 * the error is deemed recoverable, we log a warning and return.
 347 * For nonrecoverable errors, an error is logged and we stop all processing
 348 * as quickly as possible in order to prevent propagation of the failure.
 349 */
 350static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
 351{
 352        struct rtas_error_log *rtas_elog;
 353        int status;
 354        int fatal;
 355
 356        spin_lock(&ras_log_buf_lock);
 357
 358        status = rtas_call(ras_check_exception_token, 6, 1, NULL,
 359                           RTAS_VECTOR_EXTERNAL_INTERRUPT,
 360                           virq_to_hw(irq),
 361                           RTAS_INTERNAL_ERROR, 1 /* Time Critical */,
 362                           __pa(&ras_log_buf),
 363                                rtas_get_error_log_max());
 364
 365        rtas_elog = (struct rtas_error_log *)ras_log_buf;
 366
 367        if (status == 0 &&
 368            rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC)
 369                fatal = 1;
 370        else
 371                fatal = 0;
 372
 373        /* format and print the extended information */
 374        log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);
 375
 376        if (fatal) {
 377                pr_emerg("Fatal hardware error detected. Check RTAS error"
 378                         " log for details. Powering off immediately\n");
 379                emergency_sync();
 380                kernel_power_off();
 381        } else {
 382                pr_err("Recoverable hardware error detected\n");
 383        }
 384
 385        spin_unlock(&ras_log_buf_lock);
 386        return IRQ_HANDLED;
 387}
 388
 389/*
 390 * Some versions of FWNMI place the buffer inside the 4kB page starting at
 391 * 0x7000. Other versions place it inside the rtas buffer. We check both.
 392 * Minimum size of the buffer is 16 bytes.
 393 */
 394#define VALID_FWNMI_BUFFER(A) \
 395        ((((A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \
 396        (((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16))))
 397
 398static inline struct rtas_error_log *fwnmi_get_errlog(void)
 399{
 400        return (struct rtas_error_log *)local_paca->mce_data_buf;
 401}
 402
 403static __be64 *fwnmi_get_savep(struct pt_regs *regs)
 404{
 405        unsigned long savep_ra;
 406
 407        /* Mask top two bits */
 408        savep_ra = regs->gpr[3] & ~(0x3UL << 62);
 409        if (!VALID_FWNMI_BUFFER(savep_ra)) {
 410                printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
 411                return NULL;
 412        }
 413
 414        return __va(savep_ra);
 415}
 416
 417/*
 418 * Get the error information for errors coming through the
 419 * FWNMI vectors.  The pt_regs' r3 will be updated to reflect
 420 * the actual r3 if possible, and a ptr to the error log entry
 421 * will be returned if found.
 422 *
 423 * Use one buffer mce_data_buf per cpu to store RTAS error.
 424 *
 425 * The mce_data_buf does not have any locks or protection around it,
 426 * if a second machine check comes in, or a system reset is done
 427 * before we have logged the error, then we will get corruption in the
 428 * error log.  This is preferable over holding off on calling
 429 * ibm,nmi-interlock which would result in us checkstopping if a
 430 * second machine check did come in.
 431 */
 432static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
 433{
 434        struct rtas_error_log *h;
 435        __be64 *savep;
 436
 437        savep = fwnmi_get_savep(regs);
 438        if (!savep)
 439                return NULL;
 440
 441        regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */
 442
 443        h = (struct rtas_error_log *)&savep[1];
 444        /* Use the per cpu buffer from paca to store rtas error log */
 445        memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
 446        if (!rtas_error_extended(h)) {
 447                memcpy(local_paca->mce_data_buf, h, sizeof(__u64));
 448        } else {
 449                int len, error_log_length;
 450
 451                error_log_length = 8 + rtas_error_extended_log_length(h);
 452                len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX);
 453                memcpy(local_paca->mce_data_buf, h, len);
 454        }
 455
 456        return (struct rtas_error_log *)local_paca->mce_data_buf;
 457}
 458
 459/* Call this when done with the data returned by FWNMI_get_errinfo.
 460 * It will release the saved data area for other CPUs in the
 461 * partition to receive FWNMI errors.
 462 */
 463static void fwnmi_release_errinfo(void)
 464{
 465        struct rtas_args rtas_args;
 466        int ret;
 467
 468        /*
 469         * On pseries, the machine check stack is limited to under 4GB, so
 470         * args can be on-stack.
 471         */
 472        rtas_call_unlocked(&rtas_args, ibm_nmi_interlock_token, 0, 1, NULL);
 473        ret = be32_to_cpu(rtas_args.rets[0]);
 474        if (ret != 0)
 475                printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret);
 476}
 477
 478int pSeries_system_reset_exception(struct pt_regs *regs)
 479{
 480#ifdef __LITTLE_ENDIAN__
 481        /*
 482         * Some firmware byteswaps SRR registers and gives incorrect SRR1. Try
 483         * to detect the bad SRR1 pattern here. Flip the NIP back to correct
 484         * endian for reporting purposes. Unfortunately the MSR can't be fixed,
 485         * so clear it. It will be missing MSR_RI so we won't try to recover.
 486         */
 487        if ((be64_to_cpu(regs->msr) &
 488                        (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR|
 489                         MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) {
 490                regs->nip = be64_to_cpu((__be64)regs->nip);
 491                regs->msr = 0;
 492        }
 493#endif
 494
 495        if (fwnmi_active) {
 496                __be64 *savep;
 497
 498                /*
 499                 * Firmware (PowerVM and KVM) saves r3 to a save area like
 500                 * machine check, which is not exactly what PAPR (2.9)
 501                 * suggests but there is no way to detect otherwise, so this
 502                 * is the interface now.
 503                 *
 504                 * System resets do not save any error log or require an
 505                 * "ibm,nmi-interlock" rtas call to release.
 506                 */
 507
 508                savep = fwnmi_get_savep(regs);
 509                if (savep)
 510                        regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */
 511        }
 512
 513        if (smp_handle_nmi_ipi(regs))
 514                return 1;
 515
 516        return 0; /* need to perform reset */
 517}
 518
 519static int mce_handle_err_realmode(int disposition, u8 error_type)
 520{
 521#ifdef CONFIG_PPC_BOOK3S_64
 522        if (disposition == RTAS_DISP_NOT_RECOVERED) {
 523                switch (error_type) {
 524                case    MC_ERROR_TYPE_ERAT:
 525                        flush_erat();
 526                        disposition = RTAS_DISP_FULLY_RECOVERED;
 527                        break;
 528                case    MC_ERROR_TYPE_SLB:
 529                        /*
 530                         * Store the old slb content in paca before flushing.
 531                         * Print this when we go to virtual mode.
 532                         * There are chances that we may hit MCE again if there
 533                         * is a parity error on the SLB entry we trying to read
 534                         * for saving. Hence limit the slb saving to single
 535                         * level of recursion.
 536                         */
 537                        if (local_paca->in_mce == 1)
 538                                slb_save_contents(local_paca->mce_faulty_slbs);
 539                        flush_and_reload_slb();
 540                        disposition = RTAS_DISP_FULLY_RECOVERED;
 541                        break;
 542                default:
 543                        break;
 544                }
 545        } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
 546                /* Platform corrected itself but could be degraded */
 547                pr_err("MCE: limited recovery, system may be degraded\n");
 548                disposition = RTAS_DISP_FULLY_RECOVERED;
 549        }
 550#endif
 551        return disposition;
 552}
 553
 554static int mce_handle_err_virtmode(struct pt_regs *regs,
 555                                   struct rtas_error_log *errp,
 556                                   struct pseries_mc_errorlog *mce_log,
 557                                   int disposition)
 558{
 559        struct mce_error_info mce_err = { 0 };
 560        int initiator = rtas_error_initiator(errp);
 561        int severity = rtas_error_severity(errp);
 562        unsigned long eaddr = 0, paddr = 0;
 563        u8 error_type, err_sub_type;
 564
 565        if (!mce_log)
 566                goto out;
 567
 568        error_type = mce_log->error_type;
 569        err_sub_type = rtas_mc_error_sub_type(mce_log);
 570
 571        if (initiator == RTAS_INITIATOR_UNKNOWN)
 572                mce_err.initiator = MCE_INITIATOR_UNKNOWN;
 573        else if (initiator == RTAS_INITIATOR_CPU)
 574                mce_err.initiator = MCE_INITIATOR_CPU;
 575        else if (initiator == RTAS_INITIATOR_PCI)
 576                mce_err.initiator = MCE_INITIATOR_PCI;
 577        else if (initiator == RTAS_INITIATOR_ISA)
 578                mce_err.initiator = MCE_INITIATOR_ISA;
 579        else if (initiator == RTAS_INITIATOR_MEMORY)
 580                mce_err.initiator = MCE_INITIATOR_MEMORY;
 581        else if (initiator == RTAS_INITIATOR_POWERMGM)
 582                mce_err.initiator = MCE_INITIATOR_POWERMGM;
 583        else
 584                mce_err.initiator = MCE_INITIATOR_UNKNOWN;
 585
 586        if (severity == RTAS_SEVERITY_NO_ERROR)
 587                mce_err.severity = MCE_SEV_NO_ERROR;
 588        else if (severity == RTAS_SEVERITY_EVENT)
 589                mce_err.severity = MCE_SEV_WARNING;
 590        else if (severity == RTAS_SEVERITY_WARNING)
 591                mce_err.severity = MCE_SEV_WARNING;
 592        else if (severity == RTAS_SEVERITY_ERROR_SYNC)
 593                mce_err.severity = MCE_SEV_SEVERE;
 594        else if (severity == RTAS_SEVERITY_ERROR)
 595                mce_err.severity = MCE_SEV_SEVERE;
 596        else if (severity == RTAS_SEVERITY_FATAL)
 597                mce_err.severity = MCE_SEV_FATAL;
 598        else
 599                mce_err.severity = MCE_SEV_FATAL;
 600
 601        if (severity <= RTAS_SEVERITY_ERROR_SYNC)
 602                mce_err.sync_error = true;
 603        else
 604                mce_err.sync_error = false;
 605
 606        mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
 607        mce_err.error_class = MCE_ECLASS_UNKNOWN;
 608
 609        switch (error_type) {
 610        case MC_ERROR_TYPE_UE:
 611                mce_err.error_type = MCE_ERROR_TYPE_UE;
 612                mce_common_process_ue(regs, &mce_err);
 613                if (mce_err.ignore_event)
 614                        disposition = RTAS_DISP_FULLY_RECOVERED;
 615                switch (err_sub_type) {
 616                case MC_ERROR_UE_IFETCH:
 617                        mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH;
 618                        break;
 619                case MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH:
 620                        mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
 621                        break;
 622                case MC_ERROR_UE_LOAD_STORE:
 623                        mce_err.u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
 624                        break;
 625                case MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE:
 626                        mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
 627                        break;
 628                case MC_ERROR_UE_INDETERMINATE:
 629                default:
 630                        mce_err.u.ue_error_type = MCE_UE_ERROR_INDETERMINATE;
 631                        break;
 632                }
 633                if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED)
 634                        eaddr = be64_to_cpu(mce_log->effective_address);
 635
 636                if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
 637                        paddr = be64_to_cpu(mce_log->logical_address);
 638                } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
 639                        unsigned long pfn;
 640
 641                        pfn = addr_to_pfn(regs, eaddr);
 642                        if (pfn != ULONG_MAX)
 643                                paddr = pfn << PAGE_SHIFT;
 644                }
 645
 646                break;
 647        case MC_ERROR_TYPE_SLB:
 648                mce_err.error_type = MCE_ERROR_TYPE_SLB;
 649                switch (err_sub_type) {
 650                case MC_ERROR_SLB_PARITY:
 651                        mce_err.u.slb_error_type = MCE_SLB_ERROR_PARITY;
 652                        break;
 653                case MC_ERROR_SLB_MULTIHIT:
 654                        mce_err.u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
 655                        break;
 656                case MC_ERROR_SLB_INDETERMINATE:
 657                default:
 658                        mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
 659                        break;
 660                }
 661                if (mce_log->sub_err_type & 0x80)
 662                        eaddr = be64_to_cpu(mce_log->effective_address);
 663                break;
 664        case MC_ERROR_TYPE_ERAT:
 665                mce_err.error_type = MCE_ERROR_TYPE_ERAT;
 666                switch (err_sub_type) {
 667                case MC_ERROR_ERAT_PARITY:
 668                        mce_err.u.erat_error_type = MCE_ERAT_ERROR_PARITY;
 669                        break;
 670                case MC_ERROR_ERAT_MULTIHIT:
 671                        mce_err.u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
 672                        break;
 673                case MC_ERROR_ERAT_INDETERMINATE:
 674                default:
 675                        mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE;
 676                        break;
 677                }
 678                if (mce_log->sub_err_type & 0x80)
 679                        eaddr = be64_to_cpu(mce_log->effective_address);
 680                break;
 681        case MC_ERROR_TYPE_TLB:
 682                mce_err.error_type = MCE_ERROR_TYPE_TLB;
 683                switch (err_sub_type) {
 684                case MC_ERROR_TLB_PARITY:
 685                        mce_err.u.tlb_error_type = MCE_TLB_ERROR_PARITY;
 686                        break;
 687                case MC_ERROR_TLB_MULTIHIT:
 688                        mce_err.u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
 689                        break;
 690                case MC_ERROR_TLB_INDETERMINATE:
 691                default:
 692                        mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE;
 693                        break;
 694                }
 695                if (mce_log->sub_err_type & 0x80)
 696                        eaddr = be64_to_cpu(mce_log->effective_address);
 697                break;
 698        case MC_ERROR_TYPE_D_CACHE:
 699                mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
 700                break;
 701        case MC_ERROR_TYPE_I_CACHE:
 702                mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
 703                break;
 704        case MC_ERROR_TYPE_UNKNOWN:
 705        default:
 706                mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
 707                break;
 708        }
 709out:
 710        save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
 711                       &mce_err, regs->nip, eaddr, paddr);
 712        return disposition;
 713}
 714
 715static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
 716{
 717        struct pseries_errorlog *pseries_log;
 718        struct pseries_mc_errorlog *mce_log = NULL;
 719        int disposition = rtas_error_disposition(errp);
 720        unsigned long msr;
 721        u8 error_type;
 722
 723        if (!rtas_error_extended(errp))
 724                goto out;
 725
 726        pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
 727        if (!pseries_log)
 728                goto out;
 729
 730        mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
 731        error_type = mce_log->error_type;
 732
 733        disposition = mce_handle_err_realmode(disposition, error_type);
 734
 735        /*
 736         * Enable translation as we will be accessing per-cpu variables
 737         * in save_mce_event() which may fall outside RMO region, also
 738         * leave it enabled because subsequently we will be queuing work
 739         * to workqueues where again per-cpu variables accessed, besides
 740         * fwnmi_release_errinfo() crashes when called in realmode on
 741         * pseries.
 742         * Note: All the realmode handling like flushing SLB entries for
 743         *       SLB multihit is done by now.
 744         */
 745out:
 746        msr = mfmsr();
 747        mtmsr(msr | MSR_IR | MSR_DR);
 748
 749        disposition = mce_handle_err_virtmode(regs, errp, mce_log,
 750                                              disposition);
 751
 752        /*
 753         * Queue irq work to log this rtas event later.
 754         * irq_work_queue uses per-cpu variables, so do this in virt
 755         * mode as well.
 756         */
 757        irq_work_queue(&mce_errlog_process_work);
 758
 759        mtmsr(msr);
 760
 761        return disposition;
 762}
 763
 764/*
 765 * Process MCE rtas errlog event.
 766 */
 767static void mce_process_errlog_event(struct irq_work *work)
 768{
 769        struct rtas_error_log *err;
 770
 771        err = fwnmi_get_errlog();
 772        log_error((char *)err, ERR_TYPE_RTAS_LOG, 0);
 773}
 774
 775/*
 776 * See if we can recover from a machine check exception.
 777 * This is only called on power4 (or above) and only via
 778 * the Firmware Non-Maskable Interrupts (fwnmi) handler
 779 * which provides the error analysis for us.
 780 *
 781 * Return 1 if corrected (or delivered a signal).
 782 * Return 0 if there is nothing we can do.
 783 */
 784static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt)
 785{
 786        int recovered = 0;
 787
 788        if (!(regs->msr & MSR_RI)) {
 789                /* If MSR_RI isn't set, we cannot recover */
 790                pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
 791                recovered = 0;
 792        } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
 793                /* Platform corrected itself */
 794                recovered = 1;
 795        } else if (evt->severity == MCE_SEV_FATAL) {
 796                /* Fatal machine check */
 797                pr_err("Machine check interrupt is fatal\n");
 798                recovered = 0;
 799        }
 800
 801        if (!recovered && evt->sync_error) {
 802                /*
 803                 * Try to kill processes if we get a synchronous machine check
 804                 * (e.g., one caused by execution of this instruction). This
 805                 * will devolve into a panic if we try to kill init or are in
 806                 * an interrupt etc.
 807                 *
 808                 * TODO: Queue up this address for hwpoisioning later.
 809                 * TODO: This is not quite right for d-side machine
 810                 *       checks ->nip is not necessarily the important
 811                 *       address.
 812                 */
 813                if ((user_mode(regs))) {
 814                        _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
 815                        recovered = 1;
 816                } else if (die_will_crash()) {
 817                        /*
 818                         * die() would kill the kernel, so better to go via
 819                         * the platform reboot code that will log the
 820                         * machine check.
 821                         */
 822                        recovered = 0;
 823                } else {
 824                        die_mce("Machine check", regs, SIGBUS);
 825                        recovered = 1;
 826                }
 827        }
 828
 829        return recovered;
 830}
 831
 832/*
 833 * Handle a machine check.
 834 *
 835 * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi)
 836 * should be present.  If so the handler which called us tells us if the
 837 * error was recovered (never true if RI=0).
 838 *
 839 * On hardware prior to Power 4 these exceptions were asynchronous which
 840 * means we can't tell exactly where it occurred and so we can't recover.
 841 */
 842int pSeries_machine_check_exception(struct pt_regs *regs)
 843{
 844        struct machine_check_event evt;
 845
 846        if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
 847                return 0;
 848
 849        /* Print things out */
 850        if (evt.version != MCE_V1) {
 851                pr_err("Machine Check Exception, Unknown event version %d !\n",
 852                       evt.version);
 853                return 0;
 854        }
 855        machine_check_print_event_info(&evt, user_mode(regs), false);
 856
 857        if (recover_mce(regs, &evt))
 858                return 1;
 859
 860        return 0;
 861}
 862
 863long pseries_machine_check_realmode(struct pt_regs *regs)
 864{
 865        struct rtas_error_log *errp;
 866        int disposition;
 867
 868        if (fwnmi_active) {
 869                errp = fwnmi_get_errinfo(regs);
 870                /*
 871                 * Call to fwnmi_release_errinfo() in real mode causes kernel
 872                 * to panic. Hence we will call it as soon as we go into
 873                 * virtual mode.
 874                 */
 875                disposition = mce_handle_error(regs, errp);
 876
 877                fwnmi_release_errinfo();
 878
 879                if (disposition == RTAS_DISP_FULLY_RECOVERED)
 880                        return 1;
 881        }
 882
 883        return 0;
 884}
 885