linux/arch/powerpc/kernel/mce.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Machine check exception handling.
   4 *
   5 * Copyright 2013 IBM Corporation
   6 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
   7 */
   8
   9#undef DEBUG
  10#define pr_fmt(fmt) "mce: " fmt
  11
  12#include <linux/hardirq.h>
  13#include <linux/types.h>
  14#include <linux/ptrace.h>
  15#include <linux/percpu.h>
  16#include <linux/export.h>
  17#include <linux/irq_work.h>
  18
  19#include <asm/machdep.h>
  20#include <asm/mce.h>
  21#include <asm/nmi.h>
  22
  23static DEFINE_PER_CPU(int, mce_nest_count);
  24static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
  25
  26/* Queue for delayed MCE events. */
  27static DEFINE_PER_CPU(int, mce_queue_count);
  28static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
  29
  30/* Queue for delayed MCE UE events. */
  31static DEFINE_PER_CPU(int, mce_ue_count);
  32static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
  33                                        mce_ue_event_queue);
  34
  35static void machine_check_process_queued_event(struct irq_work *work);
  36void machine_check_ue_event(struct machine_check_event *evt);
  37static void machine_process_ue_event(struct work_struct *work);
  38
  39static struct irq_work mce_event_process_work = {
  40        .func = machine_check_process_queued_event,
  41};
  42
  43DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
  44
  45static void mce_set_error_info(struct machine_check_event *mce,
  46                               struct mce_error_info *mce_err)
  47{
  48        mce->error_type = mce_err->error_type;
  49        switch (mce_err->error_type) {
  50        case MCE_ERROR_TYPE_UE:
  51                mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
  52                break;
  53        case MCE_ERROR_TYPE_SLB:
  54                mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
  55                break;
  56        case MCE_ERROR_TYPE_ERAT:
  57                mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
  58                break;
  59        case MCE_ERROR_TYPE_TLB:
  60                mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
  61                break;
  62        case MCE_ERROR_TYPE_USER:
  63                mce->u.user_error.user_error_type = mce_err->u.user_error_type;
  64                break;
  65        case MCE_ERROR_TYPE_RA:
  66                mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
  67                break;
  68        case MCE_ERROR_TYPE_LINK:
  69                mce->u.link_error.link_error_type = mce_err->u.link_error_type;
  70                break;
  71        case MCE_ERROR_TYPE_UNKNOWN:
  72        default:
  73                break;
  74        }
  75}
  76
  77/*
  78 * Decode and save high level MCE information into per cpu buffer which
  79 * is an array of machine_check_event structure.
  80 */
  81void save_mce_event(struct pt_regs *regs, long handled,
  82                    struct mce_error_info *mce_err,
  83                    uint64_t nip, uint64_t addr, uint64_t phys_addr)
  84{
  85        int index = __this_cpu_inc_return(mce_nest_count) - 1;
  86        struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
  87
  88        /*
  89         * Return if we don't have enough space to log mce event.
  90         * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
  91         * the check below will stop buffer overrun.
  92         */
  93        if (index >= MAX_MC_EVT)
  94                return;
  95
  96        /* Populate generic machine check info */
  97        mce->version = MCE_V1;
  98        mce->srr0 = nip;
  99        mce->srr1 = regs->msr;
 100        mce->gpr3 = regs->gpr[3];
 101        mce->in_use = 1;
 102        mce->cpu = get_paca()->paca_index;
 103
 104        /* Mark it recovered if we have handled it and MSR(RI=1). */
 105        if (handled && (regs->msr & MSR_RI))
 106                mce->disposition = MCE_DISPOSITION_RECOVERED;
 107        else
 108                mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
 109
 110        mce->initiator = mce_err->initiator;
 111        mce->severity = mce_err->severity;
 112        mce->sync_error = mce_err->sync_error;
 113        mce->error_class = mce_err->error_class;
 114
 115        /*
 116         * Populate the mce error_type and type-specific error_type.
 117         */
 118        mce_set_error_info(mce, mce_err);
 119
 120        if (!addr)
 121                return;
 122
 123        if (mce->error_type == MCE_ERROR_TYPE_TLB) {
 124                mce->u.tlb_error.effective_address_provided = true;
 125                mce->u.tlb_error.effective_address = addr;
 126        } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
 127                mce->u.slb_error.effective_address_provided = true;
 128                mce->u.slb_error.effective_address = addr;
 129        } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
 130                mce->u.erat_error.effective_address_provided = true;
 131                mce->u.erat_error.effective_address = addr;
 132        } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
 133                mce->u.user_error.effective_address_provided = true;
 134                mce->u.user_error.effective_address = addr;
 135        } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
 136                mce->u.ra_error.effective_address_provided = true;
 137                mce->u.ra_error.effective_address = addr;
 138        } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
 139                mce->u.link_error.effective_address_provided = true;
 140                mce->u.link_error.effective_address = addr;
 141        } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
 142                mce->u.ue_error.effective_address_provided = true;
 143                mce->u.ue_error.effective_address = addr;
 144                if (phys_addr != ULONG_MAX) {
 145                        mce->u.ue_error.physical_address_provided = true;
 146                        mce->u.ue_error.physical_address = phys_addr;
 147                        machine_check_ue_event(mce);
 148                }
 149        }
 150        return;
 151}
 152
 153/*
 154 * get_mce_event:
 155 *      mce     Pointer to machine_check_event structure to be filled.
 156 *      release Flag to indicate whether to free the event slot or not.
 157 *              0 <= do not release the mce event. Caller will invoke
 158 *                   release_mce_event() once event has been consumed.
 159 *              1 <= release the slot.
 160 *
 161 *      return  1 = success
 162 *              0 = failure
 163 *
 164 * get_mce_event() will be called by platform specific machine check
 165 * handle routine and in KVM.
 166 * When we call get_mce_event(), we are still in interrupt context and
 167 * preemption will not be scheduled until ret_from_expect() routine
 168 * is called.
 169 */
 170int get_mce_event(struct machine_check_event *mce, bool release)
 171{
 172        int index = __this_cpu_read(mce_nest_count) - 1;
 173        struct machine_check_event *mc_evt;
 174        int ret = 0;
 175
 176        /* Sanity check */
 177        if (index < 0)
 178                return ret;
 179
 180        /* Check if we have MCE info to process. */
 181        if (index < MAX_MC_EVT) {
 182                mc_evt = this_cpu_ptr(&mce_event[index]);
 183                /* Copy the event structure and release the original */
 184                if (mce)
 185                        *mce = *mc_evt;
 186                if (release)
 187                        mc_evt->in_use = 0;
 188                ret = 1;
 189        }
 190        /* Decrement the count to free the slot. */
 191        if (release)
 192                __this_cpu_dec(mce_nest_count);
 193
 194        return ret;
 195}
 196
 197void release_mce_event(void)
 198{
 199        get_mce_event(NULL, true);
 200}
 201
 202
 203/*
 204 * Queue up the MCE event which then can be handled later.
 205 */
 206void machine_check_ue_event(struct machine_check_event *evt)
 207{
 208        int index;
 209
 210        index = __this_cpu_inc_return(mce_ue_count) - 1;
 211        /* If queue is full, just return for now. */
 212        if (index >= MAX_MC_EVT) {
 213                __this_cpu_dec(mce_ue_count);
 214                return;
 215        }
 216        memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
 217
 218        /* Queue work to process this event later. */
 219        schedule_work(&mce_ue_event_work);
 220}
 221
 222/*
 223 * Queue up the MCE event which then can be handled later.
 224 */
 225void machine_check_queue_event(void)
 226{
 227        int index;
 228        struct machine_check_event evt;
 229
 230        if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
 231                return;
 232
 233        index = __this_cpu_inc_return(mce_queue_count) - 1;
 234        /* If queue is full, just return for now. */
 235        if (index >= MAX_MC_EVT) {
 236                __this_cpu_dec(mce_queue_count);
 237                return;
 238        }
 239        memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
 240
 241        /* Queue irq work to process this event later. */
 242        irq_work_queue(&mce_event_process_work);
 243}
 244/*
 245 * process pending MCE event from the mce event queue. This function will be
 246 * called during syscall exit.
 247 */
 248static void machine_process_ue_event(struct work_struct *work)
 249{
 250        int index;
 251        struct machine_check_event *evt;
 252
 253        while (__this_cpu_read(mce_ue_count) > 0) {
 254                index = __this_cpu_read(mce_ue_count) - 1;
 255                evt = this_cpu_ptr(&mce_ue_event_queue[index]);
 256#ifdef CONFIG_MEMORY_FAILURE
 257                /*
 258                 * This should probably queued elsewhere, but
 259                 * oh! well
 260                 */
 261                if (evt->error_type == MCE_ERROR_TYPE_UE) {
 262                        if (evt->u.ue_error.physical_address_provided) {
 263                                unsigned long pfn;
 264
 265                                pfn = evt->u.ue_error.physical_address >>
 266                                        PAGE_SHIFT;
 267                                memory_failure(pfn, 0);
 268                        } else
 269                                pr_warn("Failed to identify bad address from "
 270                                        "where the uncorrectable error (UE) "
 271                                        "was generated\n");
 272                }
 273#endif
 274                __this_cpu_dec(mce_ue_count);
 275        }
 276}
 277/*
 278 * process pending MCE event from the mce event queue. This function will be
 279 * called during syscall exit.
 280 */
 281static void machine_check_process_queued_event(struct irq_work *work)
 282{
 283        int index;
 284        struct machine_check_event *evt;
 285
 286        add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
 287
 288        /*
 289         * For now just print it to console.
 290         * TODO: log this error event to FSP or nvram.
 291         */
 292        while (__this_cpu_read(mce_queue_count) > 0) {
 293                index = __this_cpu_read(mce_queue_count) - 1;
 294                evt = this_cpu_ptr(&mce_event_queue[index]);
 295                machine_check_print_event_info(evt, false, false);
 296                __this_cpu_dec(mce_queue_count);
 297        }
 298}
 299
 300void machine_check_print_event_info(struct machine_check_event *evt,
 301                                    bool user_mode, bool in_guest)
 302{
 303        const char *level, *sevstr, *subtype, *err_type;
 304        uint64_t ea = 0, pa = 0;
 305        int n = 0;
 306        char dar_str[50];
 307        char pa_str[50];
 308        static const char *mc_ue_types[] = {
 309                "Indeterminate",
 310                "Instruction fetch",
 311                "Page table walk ifetch",
 312                "Load/Store",
 313                "Page table walk Load/Store",
 314        };
 315        static const char *mc_slb_types[] = {
 316                "Indeterminate",
 317                "Parity",
 318                "Multihit",
 319        };
 320        static const char *mc_erat_types[] = {
 321                "Indeterminate",
 322                "Parity",
 323                "Multihit",
 324        };
 325        static const char *mc_tlb_types[] = {
 326                "Indeterminate",
 327                "Parity",
 328                "Multihit",
 329        };
 330        static const char *mc_user_types[] = {
 331                "Indeterminate",
 332                "tlbie(l) invalid",
 333        };
 334        static const char *mc_ra_types[] = {
 335                "Indeterminate",
 336                "Instruction fetch (bad)",
 337                "Instruction fetch (foreign)",
 338                "Page table walk ifetch (bad)",
 339                "Page table walk ifetch (foreign)",
 340                "Load (bad)",
 341                "Store (bad)",
 342                "Page table walk Load/Store (bad)",
 343                "Page table walk Load/Store (foreign)",
 344                "Load/Store (foreign)",
 345        };
 346        static const char *mc_link_types[] = {
 347                "Indeterminate",
 348                "Instruction fetch (timeout)",
 349                "Page table walk ifetch (timeout)",
 350                "Load (timeout)",
 351                "Store (timeout)",
 352                "Page table walk Load/Store (timeout)",
 353        };
 354        static const char *mc_error_class[] = {
 355                "Unknown",
 356                "Hardware error",
 357                "Probable Hardware error (some chance of software cause)",
 358                "Software error",
 359                "Probable Software error (some chance of hardware cause)",
 360        };
 361
 362        /* Print things out */
 363        if (evt->version != MCE_V1) {
 364                pr_err("Machine Check Exception, Unknown event version %d !\n",
 365                       evt->version);
 366                return;
 367        }
 368        switch (evt->severity) {
 369        case MCE_SEV_NO_ERROR:
 370                level = KERN_INFO;
 371                sevstr = "Harmless";
 372                break;
 373        case MCE_SEV_WARNING:
 374                level = KERN_WARNING;
 375                sevstr = "Warning";
 376                break;
 377        case MCE_SEV_SEVERE:
 378                level = KERN_ERR;
 379                sevstr = "Severe";
 380                break;
 381        case MCE_SEV_FATAL:
 382        default:
 383                level = KERN_ERR;
 384                sevstr = "Fatal";
 385                break;
 386        }
 387
 388        switch (evt->error_type) {
 389        case MCE_ERROR_TYPE_UE:
 390                err_type = "UE";
 391                subtype = evt->u.ue_error.ue_error_type <
 392                        ARRAY_SIZE(mc_ue_types) ?
 393                        mc_ue_types[evt->u.ue_error.ue_error_type]
 394                        : "Unknown";
 395                if (evt->u.ue_error.effective_address_provided)
 396                        ea = evt->u.ue_error.effective_address;
 397                if (evt->u.ue_error.physical_address_provided)
 398                        pa = evt->u.ue_error.physical_address;
 399                break;
 400        case MCE_ERROR_TYPE_SLB:
 401                err_type = "SLB";
 402                subtype = evt->u.slb_error.slb_error_type <
 403                        ARRAY_SIZE(mc_slb_types) ?
 404                        mc_slb_types[evt->u.slb_error.slb_error_type]
 405                        : "Unknown";
 406                if (evt->u.slb_error.effective_address_provided)
 407                        ea = evt->u.slb_error.effective_address;
 408                break;
 409        case MCE_ERROR_TYPE_ERAT:
 410                err_type = "ERAT";
 411                subtype = evt->u.erat_error.erat_error_type <
 412                        ARRAY_SIZE(mc_erat_types) ?
 413                        mc_erat_types[evt->u.erat_error.erat_error_type]
 414                        : "Unknown";
 415                if (evt->u.erat_error.effective_address_provided)
 416                        ea = evt->u.erat_error.effective_address;
 417                break;
 418        case MCE_ERROR_TYPE_TLB:
 419                err_type = "TLB";
 420                subtype = evt->u.tlb_error.tlb_error_type <
 421                        ARRAY_SIZE(mc_tlb_types) ?
 422                        mc_tlb_types[evt->u.tlb_error.tlb_error_type]
 423                        : "Unknown";
 424                if (evt->u.tlb_error.effective_address_provided)
 425                        ea = evt->u.tlb_error.effective_address;
 426                break;
 427        case MCE_ERROR_TYPE_USER:
 428                err_type = "User";
 429                subtype = evt->u.user_error.user_error_type <
 430                        ARRAY_SIZE(mc_user_types) ?
 431                        mc_user_types[evt->u.user_error.user_error_type]
 432                        : "Unknown";
 433                if (evt->u.user_error.effective_address_provided)
 434                        ea = evt->u.user_error.effective_address;
 435                break;
 436        case MCE_ERROR_TYPE_RA:
 437                err_type = "Real address";
 438                subtype = evt->u.ra_error.ra_error_type <
 439                        ARRAY_SIZE(mc_ra_types) ?
 440                        mc_ra_types[evt->u.ra_error.ra_error_type]
 441                        : "Unknown";
 442                if (evt->u.ra_error.effective_address_provided)
 443                        ea = evt->u.ra_error.effective_address;
 444                break;
 445        case MCE_ERROR_TYPE_LINK:
 446                err_type = "Link";
 447                subtype = evt->u.link_error.link_error_type <
 448                        ARRAY_SIZE(mc_link_types) ?
 449                        mc_link_types[evt->u.link_error.link_error_type]
 450                        : "Unknown";
 451                if (evt->u.link_error.effective_address_provided)
 452                        ea = evt->u.link_error.effective_address;
 453                break;
 454        default:
 455        case MCE_ERROR_TYPE_UNKNOWN:
 456                err_type = "Unknown";
 457                subtype = "";
 458                break;
 459        }
 460
 461        dar_str[0] = pa_str[0] = '\0';
 462        if (ea && evt->srr0 != ea) {
 463                /* Load/Store address */
 464                n = sprintf(dar_str, "DAR: %016llx ", ea);
 465                if (pa)
 466                        sprintf(dar_str + n, "paddr: %016llx ", pa);
 467        } else if (pa) {
 468                sprintf(pa_str, " paddr: %016llx", pa);
 469        }
 470
 471        printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
 472                level, evt->cpu, sevstr, in_guest ? "Guest" : "Host",
 473                err_type, subtype, dar_str,
 474                evt->disposition == MCE_DISPOSITION_RECOVERED ?
 475                "Recovered" : "Not recovered");
 476
 477        if (in_guest || user_mode) {
 478                printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
 479                        level, evt->cpu, current->pid, current->comm,
 480                        in_guest ? "Guest " : "", evt->srr0, pa_str);
 481        } else {
 482                printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
 483                        level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
 484        }
 485
 486        subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
 487                mc_error_class[evt->error_class] : "Unknown";
 488        printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
 489}
 490EXPORT_SYMBOL_GPL(machine_check_print_event_info);
 491
 492/*
 493 * This function is called in real mode. Strictly no printk's please.
 494 *
 495 * regs->nip and regs->msr contains srr0 and ssr1.
 496 */
 497long machine_check_early(struct pt_regs *regs)
 498{
 499        long handled = 0;
 500
 501        hv_nmi_check_nonrecoverable(regs);
 502
 503        /*
 504         * See if platform is capable of handling machine check.
 505         */
 506        if (ppc_md.machine_check_early)
 507                handled = ppc_md.machine_check_early(regs);
 508        return handled;
 509}
 510
 511/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
 512static enum {
 513        DTRIG_UNKNOWN,
 514        DTRIG_VECTOR_CI,        /* need to emulate vector CI load instr */
 515        DTRIG_SUSPEND_ESCAPE,   /* need to escape from TM suspend mode */
 516} hmer_debug_trig_function;
 517
 518static int init_debug_trig_function(void)
 519{
 520        int pvr;
 521        struct device_node *cpun;
 522        struct property *prop = NULL;
 523        const char *str;
 524
 525        /* First look in the device tree */
 526        preempt_disable();
 527        cpun = of_get_cpu_node(smp_processor_id(), NULL);
 528        if (cpun) {
 529                of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
 530                                            prop, str) {
 531                        if (strcmp(str, "bit17-vector-ci-load") == 0)
 532                                hmer_debug_trig_function = DTRIG_VECTOR_CI;
 533                        else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
 534                                hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
 535                }
 536                of_node_put(cpun);
 537        }
 538        preempt_enable();
 539
 540        /* If we found the property, don't look at PVR */
 541        if (prop)
 542                goto out;
 543
 544        pvr = mfspr(SPRN_PVR);
 545        /* Check for POWER9 Nimbus (scale-out) */
 546        if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
 547                /* DD2.2 and later */
 548                if ((pvr & 0xfff) >= 0x202)
 549                        hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
 550                /* DD2.0 and DD2.1 - used for vector CI load emulation */
 551                else if ((pvr & 0xfff) >= 0x200)
 552                        hmer_debug_trig_function = DTRIG_VECTOR_CI;
 553        }
 554
 555 out:
 556        switch (hmer_debug_trig_function) {
 557        case DTRIG_VECTOR_CI:
 558                pr_debug("HMI debug trigger used for vector CI load\n");
 559                break;
 560        case DTRIG_SUSPEND_ESCAPE:
 561                pr_debug("HMI debug trigger used for TM suspend escape\n");
 562                break;
 563        default:
 564                break;
 565        }
 566        return 0;
 567}
 568__initcall(init_debug_trig_function);
 569
 570/*
 571 * Handle HMIs that occur as a result of a debug trigger.
 572 * Return values:
 573 * -1 means this is not a HMI cause that we know about
 574 *  0 means no further handling is required
 575 *  1 means further handling is required
 576 */
 577long hmi_handle_debugtrig(struct pt_regs *regs)
 578{
 579        unsigned long hmer = mfspr(SPRN_HMER);
 580        long ret = 0;
 581
 582        /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
 583        if (!((hmer & HMER_DEBUG_TRIG)
 584              && hmer_debug_trig_function != DTRIG_UNKNOWN))
 585                return -1;
 586                
 587        hmer &= ~HMER_DEBUG_TRIG;
 588        /* HMER is a write-AND register */
 589        mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
 590
 591        switch (hmer_debug_trig_function) {
 592        case DTRIG_VECTOR_CI:
 593                /*
 594                 * Now to avoid problems with soft-disable we
 595                 * only do the emulation if we are coming from
 596                 * host user space
 597                 */
 598                if (regs && user_mode(regs))
 599                        ret = local_paca->hmi_p9_special_emu = 1;
 600
 601                break;
 602
 603        default:
 604                break;
 605        }
 606
 607        /*
 608         * See if any other HMI causes remain to be handled
 609         */
 610        if (hmer & mfspr(SPRN_HMEER))
 611                return -1;
 612
 613        return ret;
 614}
 615
 616/*
 617 * Return values:
 618 */
 619long hmi_exception_realmode(struct pt_regs *regs)
 620{       
 621        int ret;
 622
 623        __this_cpu_inc(irq_stat.hmi_exceptions);
 624
 625        ret = hmi_handle_debugtrig(regs);
 626        if (ret >= 0)
 627                return ret;
 628
 629        wait_for_subcore_guest_exit();
 630
 631        if (ppc_md.hmi_exception_early)
 632                ppc_md.hmi_exception_early(regs);
 633
 634        wait_for_tb_resync();
 635
 636        return 1;
 637}
 638