linux/arch/powerpc/platforms/pseries/ras.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Copyright (C) 2001 Dave Engebretsen IBM Corporation
   4 */
   5
   6#include <linux/sched.h>
   7#include <linux/interrupt.h>
   8#include <linux/irq.h>
   9#include <linux/of.h>
  10#include <linux/fs.h>
  11#include <linux/reboot.h>
  12#include <linux/irq_work.h>
  13
  14#include <asm/machdep.h>
  15#include <asm/rtas.h>
  16#include <asm/firmware.h>
  17#include <asm/mce.h>
  18
  19#include "pseries.h"
  20
  21static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX];
  22static DEFINE_SPINLOCK(ras_log_buf_lock);
  23
  24static int ras_check_exception_token;
  25
  26static void mce_process_errlog_event(struct irq_work *work);
  27static struct irq_work mce_errlog_process_work = {
  28        .func = mce_process_errlog_event,
  29};
  30
  31#define EPOW_SENSOR_TOKEN       9
  32#define EPOW_SENSOR_INDEX       0
  33
  34/* EPOW events counter variable */
  35static int num_epow_events;
  36
  37static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id);
  38static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
  39static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
  40
  41/* RTAS pseries MCE errorlog section. */
  42struct pseries_mc_errorlog {
  43        __be32  fru_id;
  44        __be32  proc_id;
  45        u8      error_type;
  46        /*
  47         * sub_err_type (1 byte). Bit fields depends on error_type
  48         *
  49         *   MSB0
  50         *   |
  51         *   V
  52         *   01234567
  53         *   XXXXXXXX
  54         *
  55         * For error_type == MC_ERROR_TYPE_UE
  56         *   XXXXXXXX
  57         *   X          1: Permanent or Transient UE.
  58         *    X         1: Effective address provided.
  59         *     X        1: Logical address provided.
  60         *      XX      2: Reserved.
  61         *        XXX   3: Type of UE error.
  62         *
  63         * For error_type != MC_ERROR_TYPE_UE
  64         *   XXXXXXXX
  65         *   X          1: Effective address provided.
  66         *    XXXXX     5: Reserved.
  67         *         XX   2: Type of SLB/ERAT/TLB error.
  68         */
  69        u8      sub_err_type;
  70        u8      reserved_1[6];
  71        __be64  effective_address;
  72        __be64  logical_address;
  73} __packed;
  74
  75/* RTAS pseries MCE error types */
  76#define MC_ERROR_TYPE_UE                0x00
  77#define MC_ERROR_TYPE_SLB               0x01
  78#define MC_ERROR_TYPE_ERAT              0x02
  79#define MC_ERROR_TYPE_UNKNOWN           0x03
  80#define MC_ERROR_TYPE_TLB               0x04
  81#define MC_ERROR_TYPE_D_CACHE           0x05
  82#define MC_ERROR_TYPE_I_CACHE           0x07
  83
  84/* RTAS pseries MCE error sub types */
  85#define MC_ERROR_UE_INDETERMINATE               0
  86#define MC_ERROR_UE_IFETCH                      1
  87#define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH      2
  88#define MC_ERROR_UE_LOAD_STORE                  3
  89#define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE  4
  90
  91#define UE_EFFECTIVE_ADDR_PROVIDED              0x40
  92#define UE_LOGICAL_ADDR_PROVIDED                0x20
  93
  94#define MC_ERROR_SLB_PARITY             0
  95#define MC_ERROR_SLB_MULTIHIT           1
  96#define MC_ERROR_SLB_INDETERMINATE      2
  97
  98#define MC_ERROR_ERAT_PARITY            1
  99#define MC_ERROR_ERAT_MULTIHIT          2
 100#define MC_ERROR_ERAT_INDETERMINATE     3
 101
 102#define MC_ERROR_TLB_PARITY             1
 103#define MC_ERROR_TLB_MULTIHIT           2
 104#define MC_ERROR_TLB_INDETERMINATE      3
 105
 106static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
 107{
 108        switch (mlog->error_type) {
 109        case    MC_ERROR_TYPE_UE:
 110                return (mlog->sub_err_type & 0x07);
 111        case    MC_ERROR_TYPE_SLB:
 112        case    MC_ERROR_TYPE_ERAT:
 113        case    MC_ERROR_TYPE_TLB:
 114                return (mlog->sub_err_type & 0x03);
 115        default:
 116                return 0;
 117        }
 118}
 119
 120/*
 121 * Enable the hotplug interrupt late because processing them may touch other
 122 * devices or systems (e.g. hugepages) that have not been initialized at the
 123 * subsys stage.
 124 */
 125static int __init init_ras_hotplug_IRQ(void)
 126{
 127        struct device_node *np;
 128
 129        /* Hotplug Events */
 130        np = of_find_node_by_path("/event-sources/hot-plug-events");
 131        if (np != NULL) {
 132                if (dlpar_workqueue_init() == 0)
 133                        request_event_sources_irqs(np, ras_hotplug_interrupt,
 134                                                   "RAS_HOTPLUG");
 135                of_node_put(np);
 136        }
 137
 138        return 0;
 139}
 140machine_late_initcall(pseries, init_ras_hotplug_IRQ);
 141
 142/*
 143 * Initialize handlers for the set of interrupts caused by hardware errors
 144 * and power system events.
 145 */
 146static int __init init_ras_IRQ(void)
 147{
 148        struct device_node *np;
 149
 150        ras_check_exception_token = rtas_token("check-exception");
 151
 152        /* Internal Errors */
 153        np = of_find_node_by_path("/event-sources/internal-errors");
 154        if (np != NULL) {
 155                request_event_sources_irqs(np, ras_error_interrupt,
 156                                           "RAS_ERROR");
 157                of_node_put(np);
 158        }
 159
 160        /* EPOW Events */
 161        np = of_find_node_by_path("/event-sources/epow-events");
 162        if (np != NULL) {
 163                request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW");
 164                of_node_put(np);
 165        }
 166
 167        return 0;
 168}
 169machine_subsys_initcall(pseries, init_ras_IRQ);
 170
 171#define EPOW_SHUTDOWN_NORMAL                            1
 172#define EPOW_SHUTDOWN_ON_UPS                            2
 173#define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS        3
 174#define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH      4
 175
 176static void handle_system_shutdown(char event_modifier)
 177{
 178        switch (event_modifier) {
 179        case EPOW_SHUTDOWN_NORMAL:
 180                pr_emerg("Power off requested\n");
 181                orderly_poweroff(true);
 182                break;
 183
 184        case EPOW_SHUTDOWN_ON_UPS:
 185                pr_emerg("Loss of system power detected. System is running on"
 186                         " UPS/battery. Check RTAS error log for details\n");
 187                break;
 188
 189        case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
 190                pr_emerg("Loss of system critical functions detected. Check"
 191                         " RTAS error log for details\n");
 192                orderly_poweroff(true);
 193                break;
 194
 195        case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
 196                pr_emerg("High ambient temperature detected. Check RTAS"
 197                         " error log for details\n");
 198                orderly_poweroff(true);
 199                break;
 200
 201        default:
 202                pr_err("Unknown power/cooling shutdown event (modifier = %d)\n",
 203                        event_modifier);
 204        }
 205}
 206
 207struct epow_errorlog {
 208        unsigned char sensor_value;
 209        unsigned char event_modifier;
 210        unsigned char extended_modifier;
 211        unsigned char reserved;
 212        unsigned char platform_reason;
 213};
 214
 215#define EPOW_RESET                      0
 216#define EPOW_WARN_COOLING               1
 217#define EPOW_WARN_POWER                 2
 218#define EPOW_SYSTEM_SHUTDOWN            3
 219#define EPOW_SYSTEM_HALT                4
 220#define EPOW_MAIN_ENCLOSURE             5
 221#define EPOW_POWER_OFF                  7
 222
 223static void rtas_parse_epow_errlog(struct rtas_error_log *log)
 224{
 225        struct pseries_errorlog *pseries_log;
 226        struct epow_errorlog *epow_log;
 227        char action_code;
 228        char modifier;
 229
 230        pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW);
 231        if (pseries_log == NULL)
 232                return;
 233
 234        epow_log = (struct epow_errorlog *)pseries_log->data;
 235        action_code = epow_log->sensor_value & 0xF;     /* bottom 4 bits */
 236        modifier = epow_log->event_modifier & 0xF;      /* bottom 4 bits */
 237
 238        switch (action_code) {
 239        case EPOW_RESET:
 240                if (num_epow_events) {
 241                        pr_info("Non critical power/cooling issue cleared\n");
 242                        num_epow_events--;
 243                }
 244                break;
 245
 246        case EPOW_WARN_COOLING:
 247                pr_info("Non-critical cooling issue detected. Check RTAS error"
 248                        " log for details\n");
 249                break;
 250
 251        case EPOW_WARN_POWER:
 252                pr_info("Non-critical power issue detected. Check RTAS error"
 253                        " log for details\n");
 254                break;
 255
 256        case EPOW_SYSTEM_SHUTDOWN:
 257                handle_system_shutdown(modifier);
 258                break;
 259
 260        case EPOW_SYSTEM_HALT:
 261                pr_emerg("Critical power/cooling issue detected. Check RTAS"
 262                         " error log for details. Powering off.\n");
 263                orderly_poweroff(true);
 264                break;
 265
 266        case EPOW_MAIN_ENCLOSURE:
 267        case EPOW_POWER_OFF:
 268                pr_emerg("System about to lose power. Check RTAS error log "
 269                         " for details. Powering off immediately.\n");
 270                emergency_sync();
 271                kernel_power_off();
 272                break;
 273
 274        default:
 275                pr_err("Unknown power/cooling event (action code  = %d)\n",
 276                        action_code);
 277        }
 278
 279        /* Increment epow events counter variable */
 280        if (action_code != EPOW_RESET)
 281                num_epow_events++;
 282}
 283
 284static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id)
 285{
 286        struct pseries_errorlog *pseries_log;
 287        struct pseries_hp_errorlog *hp_elog;
 288
 289        spin_lock(&ras_log_buf_lock);
 290
 291        rtas_call(ras_check_exception_token, 6, 1, NULL,
 292                  RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq),
 293                  RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf),
 294                  rtas_get_error_log_max());
 295
 296        pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf,
 297                                           PSERIES_ELOG_SECT_ID_HOTPLUG);
 298        hp_elog = (struct pseries_hp_errorlog *)pseries_log->data;
 299
 300        /*
 301         * Since PCI hotplug is not currently supported on pseries, put PCI
 302         * hotplug events on the ras_log_buf to be handled by rtas_errd.
 303         */
 304        if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM ||
 305            hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU ||
 306            hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM)
 307                queue_hotplug_event(hp_elog);
 308        else
 309                log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
 310
 311        spin_unlock(&ras_log_buf_lock);
 312        return IRQ_HANDLED;
 313}
 314
 315/* Handle environmental and power warning (EPOW) interrupts. */
 316static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
 317{
 318        int state;
 319        int critical;
 320
 321        rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state);
 322
 323        if (state > 3)
 324                critical = 1;           /* Time Critical */
 325        else
 326                critical = 0;
 327
 328        spin_lock(&ras_log_buf_lock);
 329
 330        rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT,
 331                  virq_to_hw(irq), RTAS_EPOW_WARNING, critical, __pa(&ras_log_buf),
 332                  rtas_get_error_log_max());
 333
 334        log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
 335
 336        rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf);
 337
 338        spin_unlock(&ras_log_buf_lock);
 339        return IRQ_HANDLED;
 340}
 341
 342/*
 343 * Handle hardware error interrupts.
 344 *
 345 * RTAS check-exception is called to collect data on the exception.  If
 346 * the error is deemed recoverable, we log a warning and return.
 347 * For nonrecoverable errors, an error is logged and we stop all processing
 348 * as quickly as possible in order to prevent propagation of the failure.
 349 */
 350static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
 351{
 352        struct rtas_error_log *rtas_elog;
 353        int status;
 354        int fatal;
 355
 356        spin_lock(&ras_log_buf_lock);
 357
 358        status = rtas_call(ras_check_exception_token, 6, 1, NULL,
 359                           RTAS_VECTOR_EXTERNAL_INTERRUPT,
 360                           virq_to_hw(irq),
 361                           RTAS_INTERNAL_ERROR, 1 /* Time Critical */,
 362                           __pa(&ras_log_buf),
 363                                rtas_get_error_log_max());
 364
 365        rtas_elog = (struct rtas_error_log *)ras_log_buf;
 366
 367        if (status == 0 &&
 368            rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC)
 369                fatal = 1;
 370        else
 371                fatal = 0;
 372
 373        /* format and print the extended information */
 374        log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);
 375
 376        if (fatal) {
 377                pr_emerg("Fatal hardware error detected. Check RTAS error"
 378                         " log for details. Powering off immediately\n");
 379                emergency_sync();
 380                kernel_power_off();
 381        } else {
 382                pr_err("Recoverable hardware error detected\n");
 383        }
 384
 385        spin_unlock(&ras_log_buf_lock);
 386        return IRQ_HANDLED;
 387}
 388
 389/*
 390 * Some versions of FWNMI place the buffer inside the 4kB page starting at
 391 * 0x7000. Other versions place it inside the rtas buffer. We check both.
 392 * Minimum size of the buffer is 16 bytes.
 393 */
 394#define VALID_FWNMI_BUFFER(A) \
 395        ((((A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \
 396        (((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16))))
 397
 398static inline struct rtas_error_log *fwnmi_get_errlog(void)
 399{
 400        return (struct rtas_error_log *)local_paca->mce_data_buf;
 401}
 402
 403static __be64 *fwnmi_get_savep(struct pt_regs *regs)
 404{
 405        unsigned long savep_ra;
 406
 407        /* Mask top two bits */
 408        savep_ra = regs->gpr[3] & ~(0x3UL << 62);
 409        if (!VALID_FWNMI_BUFFER(savep_ra)) {
 410                printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
 411                return NULL;
 412        }
 413
 414        return __va(savep_ra);
 415}
 416
 417/*
 418 * Get the error information for errors coming through the
 419 * FWNMI vectors.  The pt_regs' r3 will be updated to reflect
 420 * the actual r3 if possible, and a ptr to the error log entry
 421 * will be returned if found.
 422 *
 423 * Use one buffer mce_data_buf per cpu to store RTAS error.
 424 *
 425 * The mce_data_buf does not have any locks or protection around it,
 426 * if a second machine check comes in, or a system reset is done
 427 * before we have logged the error, then we will get corruption in the
 428 * error log.  This is preferable over holding off on calling
 429 * ibm,nmi-interlock which would result in us checkstopping if a
 430 * second machine check did come in.
 431 */
 432static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
 433{
 434        struct rtas_error_log *h;
 435        __be64 *savep;
 436
 437        savep = fwnmi_get_savep(regs);
 438        if (!savep)
 439                return NULL;
 440
 441        regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */
 442
 443        h = (struct rtas_error_log *)&savep[1];
 444        /* Use the per cpu buffer from paca to store rtas error log */
 445        memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
 446        if (!rtas_error_extended(h)) {
 447                memcpy(local_paca->mce_data_buf, h, sizeof(__u64));
 448        } else {
 449                int len, error_log_length;
 450
 451                error_log_length = 8 + rtas_error_extended_log_length(h);
 452                len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX);
 453                memcpy(local_paca->mce_data_buf, h, len);
 454        }
 455
 456        return (struct rtas_error_log *)local_paca->mce_data_buf;
 457}
 458
 459/* Call this when done with the data returned by FWNMI_get_errinfo.
 460 * It will release the saved data area for other CPUs in the
 461 * partition to receive FWNMI errors.
 462 */
 463static void fwnmi_release_errinfo(void)
 464{
 465        struct rtas_args rtas_args;
 466        int ret;
 467
 468        /*
 469         * On pseries, the machine check stack is limited to under 4GB, so
 470         * args can be on-stack.
 471         */
 472        rtas_call_unlocked(&rtas_args, ibm_nmi_interlock_token, 0, 1, NULL);
 473        ret = be32_to_cpu(rtas_args.rets[0]);
 474        if (ret != 0)
 475                printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret);
 476}
 477
 478int pSeries_system_reset_exception(struct pt_regs *regs)
 479{
 480#ifdef __LITTLE_ENDIAN__
 481        /*
 482         * Some firmware byteswaps SRR registers and gives incorrect SRR1. Try
 483         * to detect the bad SRR1 pattern here. Flip the NIP back to correct
 484         * endian for reporting purposes. Unfortunately the MSR can't be fixed,
 485         * so clear it. It will be missing MSR_RI so we won't try to recover.
 486         */
 487        if ((be64_to_cpu(regs->msr) &
 488                        (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR|
 489                         MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) {
 490                regs_set_return_ip(regs, be64_to_cpu((__be64)regs->nip));
 491                regs_set_return_msr(regs, 0);
 492        }
 493#endif
 494
 495        if (fwnmi_active) {
 496                __be64 *savep;
 497
 498                /*
 499                 * Firmware (PowerVM and KVM) saves r3 to a save area like
 500                 * machine check, which is not exactly what PAPR (2.9)
 501                 * suggests but there is no way to detect otherwise, so this
 502                 * is the interface now.
 503                 *
 504                 * System resets do not save any error log or require an
 505                 * "ibm,nmi-interlock" rtas call to release.
 506                 */
 507
 508                savep = fwnmi_get_savep(regs);
 509                if (savep)
 510                        regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */
 511        }
 512
 513        if (smp_handle_nmi_ipi(regs))
 514                return 1;
 515
 516        return 0; /* need to perform reset */
 517}
 518
 519static int mce_handle_err_realmode(int disposition, u8 error_type)
 520{
 521#ifdef CONFIG_PPC_BOOK3S_64
 522        if (disposition == RTAS_DISP_NOT_RECOVERED) {
 523                switch (error_type) {
 524                case    MC_ERROR_TYPE_ERAT:
 525                        flush_erat();
 526                        disposition = RTAS_DISP_FULLY_RECOVERED;
 527                        break;
 528                case    MC_ERROR_TYPE_SLB:
 529                        /*
 530                         * Store the old slb content in paca before flushing.
 531                         * Print this when we go to virtual mode.
 532                         * There are chances that we may hit MCE again if there
 533                         * is a parity error on the SLB entry we trying to read
 534                         * for saving. Hence limit the slb saving to single
 535                         * level of recursion.
 536                         */
 537                        if (local_paca->in_mce == 1)
 538                                slb_save_contents(local_paca->mce_faulty_slbs);
 539                        flush_and_reload_slb();
 540                        disposition = RTAS_DISP_FULLY_RECOVERED;
 541                        break;
 542                default:
 543                        break;
 544                }
 545        } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
 546                /* Platform corrected itself but could be degraded */
 547                pr_err("MCE: limited recovery, system may be degraded\n");
 548                disposition = RTAS_DISP_FULLY_RECOVERED;
 549        }
 550#endif
 551        return disposition;
 552}
 553
 554static int mce_handle_err_virtmode(struct pt_regs *regs,
 555                                   struct rtas_error_log *errp,
 556                                   struct pseries_mc_errorlog *mce_log,
 557                                   int disposition)
 558{
 559        struct mce_error_info mce_err = { 0 };
 560        int initiator = rtas_error_initiator(errp);
 561        int severity = rtas_error_severity(errp);
 562        unsigned long eaddr = 0, paddr = 0;
 563        u8 error_type, err_sub_type;
 564
 565        if (!mce_log)
 566                goto out;
 567
 568        error_type = mce_log->error_type;
 569        err_sub_type = rtas_mc_error_sub_type(mce_log);
 570
 571        if (initiator == RTAS_INITIATOR_UNKNOWN)
 572                mce_err.initiator = MCE_INITIATOR_UNKNOWN;
 573        else if (initiator == RTAS_INITIATOR_CPU)
 574                mce_err.initiator = MCE_INITIATOR_CPU;
 575        else if (initiator == RTAS_INITIATOR_PCI)
 576                mce_err.initiator = MCE_INITIATOR_PCI;
 577        else if (initiator == RTAS_INITIATOR_ISA)
 578                mce_err.initiator = MCE_INITIATOR_ISA;
 579        else if (initiator == RTAS_INITIATOR_MEMORY)
 580                mce_err.initiator = MCE_INITIATOR_MEMORY;
 581        else if (initiator == RTAS_INITIATOR_POWERMGM)
 582                mce_err.initiator = MCE_INITIATOR_POWERMGM;
 583        else
 584                mce_err.initiator = MCE_INITIATOR_UNKNOWN;
 585
 586        if (severity == RTAS_SEVERITY_NO_ERROR)
 587                mce_err.severity = MCE_SEV_NO_ERROR;
 588        else if (severity == RTAS_SEVERITY_EVENT)
 589                mce_err.severity = MCE_SEV_WARNING;
 590        else if (severity == RTAS_SEVERITY_WARNING)
 591                mce_err.severity = MCE_SEV_WARNING;
 592        else if (severity == RTAS_SEVERITY_ERROR_SYNC)
 593                mce_err.severity = MCE_SEV_SEVERE;
 594        else if (severity == RTAS_SEVERITY_ERROR)
 595                mce_err.severity = MCE_SEV_SEVERE;
 596        else
 597                mce_err.severity = MCE_SEV_FATAL;
 598
 599        if (severity <= RTAS_SEVERITY_ERROR_SYNC)
 600                mce_err.sync_error = true;
 601        else
 602                mce_err.sync_error = false;
 603
 604        mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
 605        mce_err.error_class = MCE_ECLASS_UNKNOWN;
 606
 607        switch (error_type) {
 608        case MC_ERROR_TYPE_UE:
 609                mce_err.error_type = MCE_ERROR_TYPE_UE;
 610                mce_common_process_ue(regs, &mce_err);
 611                if (mce_err.ignore_event)
 612                        disposition = RTAS_DISP_FULLY_RECOVERED;
 613                switch (err_sub_type) {
 614                case MC_ERROR_UE_IFETCH:
 615                        mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH;
 616                        break;
 617                case MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH:
 618                        mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
 619                        break;
 620                case MC_ERROR_UE_LOAD_STORE:
 621                        mce_err.u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
 622                        break;
 623                case MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE:
 624                        mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
 625                        break;
 626                case MC_ERROR_UE_INDETERMINATE:
 627                default:
 628                        mce_err.u.ue_error_type = MCE_UE_ERROR_INDETERMINATE;
 629                        break;
 630                }
 631                if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED)
 632                        eaddr = be64_to_cpu(mce_log->effective_address);
 633
 634                if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
 635                        paddr = be64_to_cpu(mce_log->logical_address);
 636                } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
 637                        unsigned long pfn;
 638
 639                        pfn = addr_to_pfn(regs, eaddr);
 640                        if (pfn != ULONG_MAX)
 641                                paddr = pfn << PAGE_SHIFT;
 642                }
 643
 644                break;
 645        case MC_ERROR_TYPE_SLB:
 646                mce_err.error_type = MCE_ERROR_TYPE_SLB;
 647                switch (err_sub_type) {
 648                case MC_ERROR_SLB_PARITY:
 649                        mce_err.u.slb_error_type = MCE_SLB_ERROR_PARITY;
 650                        break;
 651                case MC_ERROR_SLB_MULTIHIT:
 652                        mce_err.u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
 653                        break;
 654                case MC_ERROR_SLB_INDETERMINATE:
 655                default:
 656                        mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
 657                        break;
 658                }
 659                if (mce_log->sub_err_type & 0x80)
 660                        eaddr = be64_to_cpu(mce_log->effective_address);
 661                break;
 662        case MC_ERROR_TYPE_ERAT:
 663                mce_err.error_type = MCE_ERROR_TYPE_ERAT;
 664                switch (err_sub_type) {
 665                case MC_ERROR_ERAT_PARITY:
 666                        mce_err.u.erat_error_type = MCE_ERAT_ERROR_PARITY;
 667                        break;
 668                case MC_ERROR_ERAT_MULTIHIT:
 669                        mce_err.u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
 670                        break;
 671                case MC_ERROR_ERAT_INDETERMINATE:
 672                default:
 673                        mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE;
 674                        break;
 675                }
 676                if (mce_log->sub_err_type & 0x80)
 677                        eaddr = be64_to_cpu(mce_log->effective_address);
 678                break;
 679        case MC_ERROR_TYPE_TLB:
 680                mce_err.error_type = MCE_ERROR_TYPE_TLB;
 681                switch (err_sub_type) {
 682                case MC_ERROR_TLB_PARITY:
 683                        mce_err.u.tlb_error_type = MCE_TLB_ERROR_PARITY;
 684                        break;
 685                case MC_ERROR_TLB_MULTIHIT:
 686                        mce_err.u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
 687                        break;
 688                case MC_ERROR_TLB_INDETERMINATE:
 689                default:
 690                        mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE;
 691                        break;
 692                }
 693                if (mce_log->sub_err_type & 0x80)
 694                        eaddr = be64_to_cpu(mce_log->effective_address);
 695                break;
 696        case MC_ERROR_TYPE_D_CACHE:
 697                mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
 698                break;
 699        case MC_ERROR_TYPE_I_CACHE:
 700                mce_err.error_type = MCE_ERROR_TYPE_ICACHE;
 701                break;
 702        case MC_ERROR_TYPE_UNKNOWN:
 703        default:
 704                mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
 705                break;
 706        }
 707out:
 708        save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
 709                       &mce_err, regs->nip, eaddr, paddr);
 710        return disposition;
 711}
 712
 713static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
 714{
 715        struct pseries_errorlog *pseries_log;
 716        struct pseries_mc_errorlog *mce_log = NULL;
 717        int disposition = rtas_error_disposition(errp);
 718        unsigned long msr;
 719        u8 error_type;
 720
 721        if (!rtas_error_extended(errp))
 722                goto out;
 723
 724        pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
 725        if (!pseries_log)
 726                goto out;
 727
 728        mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
 729        error_type = mce_log->error_type;
 730
 731        disposition = mce_handle_err_realmode(disposition, error_type);
 732
 733        /*
 734         * Enable translation as we will be accessing per-cpu variables
 735         * in save_mce_event() which may fall outside RMO region, also
 736         * leave it enabled because subsequently we will be queuing work
 737         * to workqueues where again per-cpu variables accessed, besides
 738         * fwnmi_release_errinfo() crashes when called in realmode on
 739         * pseries.
 740         * Note: All the realmode handling like flushing SLB entries for
 741         *       SLB multihit is done by now.
 742         */
 743out:
 744        msr = mfmsr();
 745        mtmsr(msr | MSR_IR | MSR_DR);
 746
 747        disposition = mce_handle_err_virtmode(regs, errp, mce_log,
 748                                              disposition);
 749
 750        /*
 751         * Queue irq work to log this rtas event later.
 752         * irq_work_queue uses per-cpu variables, so do this in virt
 753         * mode as well.
 754         */
 755        irq_work_queue(&mce_errlog_process_work);
 756
 757        mtmsr(msr);
 758
 759        return disposition;
 760}
 761
 762/*
 763 * Process MCE rtas errlog event.
 764 */
 765static void mce_process_errlog_event(struct irq_work *work)
 766{
 767        struct rtas_error_log *err;
 768
 769        err = fwnmi_get_errlog();
 770        log_error((char *)err, ERR_TYPE_RTAS_LOG, 0);
 771}
 772
 773/*
 774 * See if we can recover from a machine check exception.
 775 * This is only called on power4 (or above) and only via
 776 * the Firmware Non-Maskable Interrupts (fwnmi) handler
 777 * which provides the error analysis for us.
 778 *
 779 * Return 1 if corrected (or delivered a signal).
 780 * Return 0 if there is nothing we can do.
 781 */
 782static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt)
 783{
 784        int recovered = 0;
 785
 786        if (regs_is_unrecoverable(regs)) {
 787                /* If MSR_RI isn't set, we cannot recover */
 788                pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
 789                recovered = 0;
 790        } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
 791                /* Platform corrected itself */
 792                recovered = 1;
 793        } else if (evt->severity == MCE_SEV_FATAL) {
 794                /* Fatal machine check */
 795                pr_err("Machine check interrupt is fatal\n");
 796                recovered = 0;
 797        }
 798
 799        if (!recovered && evt->sync_error) {
 800                /*
 801                 * Try to kill processes if we get a synchronous machine check
 802                 * (e.g., one caused by execution of this instruction). This
 803                 * will devolve into a panic if we try to kill init or are in
 804                 * an interrupt etc.
 805                 *
 806                 * TODO: Queue up this address for hwpoisioning later.
 807                 * TODO: This is not quite right for d-side machine
 808                 *       checks ->nip is not necessarily the important
 809                 *       address.
 810                 */
 811                if ((user_mode(regs))) {
 812                        _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
 813                        recovered = 1;
 814                } else if (die_will_crash()) {
 815                        /*
 816                         * die() would kill the kernel, so better to go via
 817                         * the platform reboot code that will log the
 818                         * machine check.
 819                         */
 820                        recovered = 0;
 821                } else {
 822                        die_mce("Machine check", regs, SIGBUS);
 823                        recovered = 1;
 824                }
 825        }
 826
 827        return recovered;
 828}
 829
 830/*
 831 * Handle a machine check.
 832 *
 833 * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi)
 834 * should be present.  If so the handler which called us tells us if the
 835 * error was recovered (never true if RI=0).
 836 *
 837 * On hardware prior to Power 4 these exceptions were asynchronous which
 838 * means we can't tell exactly where it occurred and so we can't recover.
 839 */
 840int pSeries_machine_check_exception(struct pt_regs *regs)
 841{
 842        struct machine_check_event evt;
 843
 844        if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
 845                return 0;
 846
 847        /* Print things out */
 848        if (evt.version != MCE_V1) {
 849                pr_err("Machine Check Exception, Unknown event version %d !\n",
 850                       evt.version);
 851                return 0;
 852        }
 853        machine_check_print_event_info(&evt, user_mode(regs), false);
 854
 855        if (recover_mce(regs, &evt))
 856                return 1;
 857
 858        return 0;
 859}
 860
 861long pseries_machine_check_realmode(struct pt_regs *regs)
 862{
 863        struct rtas_error_log *errp;
 864        int disposition;
 865
 866        if (fwnmi_active) {
 867                errp = fwnmi_get_errinfo(regs);
 868                /*
 869                 * Call to fwnmi_release_errinfo() in real mode causes kernel
 870                 * to panic. Hence we will call it as soon as we go into
 871                 * virtual mode.
 872                 */
 873                disposition = mce_handle_error(regs, errp);
 874
 875                fwnmi_release_errinfo();
 876
 877                if (disposition == RTAS_DISP_FULLY_RECOVERED)
 878                        return 1;
 879        }
 880
 881        return 0;
 882}
 883