linux/arch/powerpc/kernel/mce.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Machine check exception handling.
   4 *
   5 * Copyright 2013 IBM Corporation
   6 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
   7 */
   8
   9#undef DEBUG
  10#define pr_fmt(fmt) "mce: " fmt
  11
  12#include <linux/hardirq.h>
  13#include <linux/types.h>
  14#include <linux/ptrace.h>
  15#include <linux/percpu.h>
  16#include <linux/export.h>
  17#include <linux/irq_work.h>
  18#include <linux/extable.h>
  19#include <linux/ftrace.h>
  20#include <linux/memblock.h>
  21#include <linux/of.h>
  22
  23#include <asm/interrupt.h>
  24#include <asm/machdep.h>
  25#include <asm/mce.h>
  26#include <asm/nmi.h>
  27#include <asm/asm-prototypes.h>
  28
  29#include "setup.h"
  30
  31static void machine_check_process_queued_event(struct irq_work *work);
  32static void machine_check_ue_irq_work(struct irq_work *work);
  33static void machine_check_ue_event(struct machine_check_event *evt);
  34static void machine_process_ue_event(struct work_struct *work);
  35
  36static struct irq_work mce_event_process_work = {
  37        .func = machine_check_process_queued_event,
  38};
  39
  40static struct irq_work mce_ue_event_irq_work = {
  41        .func = machine_check_ue_irq_work,
  42};
  43
  44static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
  45
  46static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
  47
  48int mce_register_notifier(struct notifier_block *nb)
  49{
  50        return blocking_notifier_chain_register(&mce_notifier_list, nb);
  51}
  52EXPORT_SYMBOL_GPL(mce_register_notifier);
  53
  54int mce_unregister_notifier(struct notifier_block *nb)
  55{
  56        return blocking_notifier_chain_unregister(&mce_notifier_list, nb);
  57}
  58EXPORT_SYMBOL_GPL(mce_unregister_notifier);
  59
  60static void mce_set_error_info(struct machine_check_event *mce,
  61                               struct mce_error_info *mce_err)
  62{
  63        mce->error_type = mce_err->error_type;
  64        switch (mce_err->error_type) {
  65        case MCE_ERROR_TYPE_UE:
  66                mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
  67                break;
  68        case MCE_ERROR_TYPE_SLB:
  69                mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
  70                break;
  71        case MCE_ERROR_TYPE_ERAT:
  72                mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
  73                break;
  74        case MCE_ERROR_TYPE_TLB:
  75                mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
  76                break;
  77        case MCE_ERROR_TYPE_USER:
  78                mce->u.user_error.user_error_type = mce_err->u.user_error_type;
  79                break;
  80        case MCE_ERROR_TYPE_RA:
  81                mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
  82                break;
  83        case MCE_ERROR_TYPE_LINK:
  84                mce->u.link_error.link_error_type = mce_err->u.link_error_type;
  85                break;
  86        case MCE_ERROR_TYPE_UNKNOWN:
  87        default:
  88                break;
  89        }
  90}
  91
  92/*
  93 * Decode and save high level MCE information into per cpu buffer which
  94 * is an array of machine_check_event structure.
  95 */
  96void save_mce_event(struct pt_regs *regs, long handled,
  97                    struct mce_error_info *mce_err,
  98                    uint64_t nip, uint64_t addr, uint64_t phys_addr)
  99{
 100        int index = local_paca->mce_info->mce_nest_count++;
 101        struct machine_check_event *mce;
 102
 103        mce = &local_paca->mce_info->mce_event[index];
 104        /*
 105         * Return if we don't have enough space to log mce event.
 106         * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
 107         * the check below will stop buffer overrun.
 108         */
 109        if (index >= MAX_MC_EVT)
 110                return;
 111
 112        /* Populate generic machine check info */
 113        mce->version = MCE_V1;
 114        mce->srr0 = nip;
 115        mce->srr1 = regs->msr;
 116        mce->gpr3 = regs->gpr[3];
 117        mce->in_use = 1;
 118        mce->cpu = get_paca()->paca_index;
 119
 120        /* Mark it recovered if we have handled it and MSR(RI=1). */
 121        if (handled && (regs->msr & MSR_RI))
 122                mce->disposition = MCE_DISPOSITION_RECOVERED;
 123        else
 124                mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
 125
 126        mce->initiator = mce_err->initiator;
 127        mce->severity = mce_err->severity;
 128        mce->sync_error = mce_err->sync_error;
 129        mce->error_class = mce_err->error_class;
 130
 131        /*
 132         * Populate the mce error_type and type-specific error_type.
 133         */
 134        mce_set_error_info(mce, mce_err);
 135        if (mce->error_type == MCE_ERROR_TYPE_UE)
 136                mce->u.ue_error.ignore_event = mce_err->ignore_event;
 137
 138        if (!addr)
 139                return;
 140
 141        if (mce->error_type == MCE_ERROR_TYPE_TLB) {
 142                mce->u.tlb_error.effective_address_provided = true;
 143                mce->u.tlb_error.effective_address = addr;
 144        } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
 145                mce->u.slb_error.effective_address_provided = true;
 146                mce->u.slb_error.effective_address = addr;
 147        } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
 148                mce->u.erat_error.effective_address_provided = true;
 149                mce->u.erat_error.effective_address = addr;
 150        } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
 151                mce->u.user_error.effective_address_provided = true;
 152                mce->u.user_error.effective_address = addr;
 153        } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
 154                mce->u.ra_error.effective_address_provided = true;
 155                mce->u.ra_error.effective_address = addr;
 156        } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
 157                mce->u.link_error.effective_address_provided = true;
 158                mce->u.link_error.effective_address = addr;
 159        } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
 160                mce->u.ue_error.effective_address_provided = true;
 161                mce->u.ue_error.effective_address = addr;
 162                if (phys_addr != ULONG_MAX) {
 163                        mce->u.ue_error.physical_address_provided = true;
 164                        mce->u.ue_error.physical_address = phys_addr;
 165                        machine_check_ue_event(mce);
 166                }
 167        }
 168        return;
 169}
 170
 171/*
 172 * get_mce_event:
 173 *      mce     Pointer to machine_check_event structure to be filled.
 174 *      release Flag to indicate whether to free the event slot or not.
 175 *              0 <= do not release the mce event. Caller will invoke
 176 *                   release_mce_event() once event has been consumed.
 177 *              1 <= release the slot.
 178 *
 179 *      return  1 = success
 180 *              0 = failure
 181 *
 182 * get_mce_event() will be called by platform specific machine check
 183 * handle routine and in KVM.
 184 * When we call get_mce_event(), we are still in interrupt context and
 185 * preemption will not be scheduled until ret_from_expect() routine
 186 * is called.
 187 */
 188int get_mce_event(struct machine_check_event *mce, bool release)
 189{
 190        int index = local_paca->mce_info->mce_nest_count - 1;
 191        struct machine_check_event *mc_evt;
 192        int ret = 0;
 193
 194        /* Sanity check */
 195        if (index < 0)
 196                return ret;
 197
 198        /* Check if we have MCE info to process. */
 199        if (index < MAX_MC_EVT) {
 200                mc_evt = &local_paca->mce_info->mce_event[index];
 201                /* Copy the event structure and release the original */
 202                if (mce)
 203                        *mce = *mc_evt;
 204                if (release)
 205                        mc_evt->in_use = 0;
 206                ret = 1;
 207        }
 208        /* Decrement the count to free the slot. */
 209        if (release)
 210                local_paca->mce_info->mce_nest_count--;
 211
 212        return ret;
 213}
 214
 215void release_mce_event(void)
 216{
 217        get_mce_event(NULL, true);
 218}
 219
 220static void machine_check_ue_irq_work(struct irq_work *work)
 221{
 222        schedule_work(&mce_ue_event_work);
 223}
 224
 225/*
 226 * Queue up the MCE event which then can be handled later.
 227 */
 228static void machine_check_ue_event(struct machine_check_event *evt)
 229{
 230        int index;
 231
 232        index = local_paca->mce_info->mce_ue_count++;
 233        /* If queue is full, just return for now. */
 234        if (index >= MAX_MC_EVT) {
 235                local_paca->mce_info->mce_ue_count--;
 236                return;
 237        }
 238        memcpy(&local_paca->mce_info->mce_ue_event_queue[index],
 239               evt, sizeof(*evt));
 240
 241        /* Queue work to process this event later. */
 242        irq_work_queue(&mce_ue_event_irq_work);
 243}
 244
 245/*
 246 * Queue up the MCE event which then can be handled later.
 247 */
 248void machine_check_queue_event(void)
 249{
 250        int index;
 251        struct machine_check_event evt;
 252        unsigned long msr;
 253
 254        if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
 255                return;
 256
 257        index = local_paca->mce_info->mce_queue_count++;
 258        /* If queue is full, just return for now. */
 259        if (index >= MAX_MC_EVT) {
 260                local_paca->mce_info->mce_queue_count--;
 261                return;
 262        }
 263        memcpy(&local_paca->mce_info->mce_event_queue[index],
 264               &evt, sizeof(evt));
 265
 266        /*
 267         * Queue irq work to process this event later. Before
 268         * queuing the work enable translation for non radix LPAR,
 269         * as irq_work_queue may try to access memory outside RMO
 270         * region.
 271         */
 272        if (!radix_enabled() && firmware_has_feature(FW_FEATURE_LPAR)) {
 273                msr = mfmsr();
 274                mtmsr(msr | MSR_IR | MSR_DR);
 275                irq_work_queue(&mce_event_process_work);
 276                mtmsr(msr);
 277        } else {
 278                irq_work_queue(&mce_event_process_work);
 279        }
 280}
 281
 282void mce_common_process_ue(struct pt_regs *regs,
 283                           struct mce_error_info *mce_err)
 284{
 285        const struct exception_table_entry *entry;
 286
 287        entry = search_kernel_exception_table(regs->nip);
 288        if (entry) {
 289                mce_err->ignore_event = true;
 290                regs_set_return_ip(regs, extable_fixup(entry));
 291        }
 292}
 293
 294/*
 295 * process pending MCE event from the mce event queue. This function will be
 296 * called during syscall exit.
 297 */
 298static void machine_process_ue_event(struct work_struct *work)
 299{
 300        int index;
 301        struct machine_check_event *evt;
 302
 303        while (local_paca->mce_info->mce_ue_count > 0) {
 304                index = local_paca->mce_info->mce_ue_count - 1;
 305                evt = &local_paca->mce_info->mce_ue_event_queue[index];
 306                blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
 307#ifdef CONFIG_MEMORY_FAILURE
 308                /*
 309                 * This should probably queued elsewhere, but
 310                 * oh! well
 311                 *
 312                 * Don't report this machine check because the caller has a
 313                 * asked us to ignore the event, it has a fixup handler which
 314                 * will do the appropriate error handling and reporting.
 315                 */
 316                if (evt->error_type == MCE_ERROR_TYPE_UE) {
 317                        if (evt->u.ue_error.ignore_event) {
 318                                local_paca->mce_info->mce_ue_count--;
 319                                continue;
 320                        }
 321
 322                        if (evt->u.ue_error.physical_address_provided) {
 323                                unsigned long pfn;
 324
 325                                pfn = evt->u.ue_error.physical_address >>
 326                                        PAGE_SHIFT;
 327                                memory_failure(pfn, 0);
 328                        } else
 329                                pr_warn("Failed to identify bad address from "
 330                                        "where the uncorrectable error (UE) "
 331                                        "was generated\n");
 332                }
 333#endif
 334                local_paca->mce_info->mce_ue_count--;
 335        }
 336}
 337/*
 338 * process pending MCE event from the mce event queue. This function will be
 339 * called during syscall exit.
 340 */
 341static void machine_check_process_queued_event(struct irq_work *work)
 342{
 343        int index;
 344        struct machine_check_event *evt;
 345
 346        add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
 347
 348        /*
 349         * For now just print it to console.
 350         * TODO: log this error event to FSP or nvram.
 351         */
 352        while (local_paca->mce_info->mce_queue_count > 0) {
 353                index = local_paca->mce_info->mce_queue_count - 1;
 354                evt = &local_paca->mce_info->mce_event_queue[index];
 355
 356                if (evt->error_type == MCE_ERROR_TYPE_UE &&
 357                    evt->u.ue_error.ignore_event) {
 358                        local_paca->mce_info->mce_queue_count--;
 359                        continue;
 360                }
 361                machine_check_print_event_info(evt, false, false);
 362                local_paca->mce_info->mce_queue_count--;
 363        }
 364}
 365
 366void machine_check_print_event_info(struct machine_check_event *evt,
 367                                    bool user_mode, bool in_guest)
 368{
 369        const char *level, *sevstr, *subtype, *err_type, *initiator;
 370        uint64_t ea = 0, pa = 0;
 371        int n = 0;
 372        char dar_str[50];
 373        char pa_str[50];
 374        static const char *mc_ue_types[] = {
 375                "Indeterminate",
 376                "Instruction fetch",
 377                "Page table walk ifetch",
 378                "Load/Store",
 379                "Page table walk Load/Store",
 380        };
 381        static const char *mc_slb_types[] = {
 382                "Indeterminate",
 383                "Parity",
 384                "Multihit",
 385        };
 386        static const char *mc_erat_types[] = {
 387                "Indeterminate",
 388                "Parity",
 389                "Multihit",
 390        };
 391        static const char *mc_tlb_types[] = {
 392                "Indeterminate",
 393                "Parity",
 394                "Multihit",
 395        };
 396        static const char *mc_user_types[] = {
 397                "Indeterminate",
 398                "tlbie(l) invalid",
 399                "scv invalid",
 400        };
 401        static const char *mc_ra_types[] = {
 402                "Indeterminate",
 403                "Instruction fetch (bad)",
 404                "Instruction fetch (foreign)",
 405                "Page table walk ifetch (bad)",
 406                "Page table walk ifetch (foreign)",
 407                "Load (bad)",
 408                "Store (bad)",
 409                "Page table walk Load/Store (bad)",
 410                "Page table walk Load/Store (foreign)",
 411                "Load/Store (foreign)",
 412        };
 413        static const char *mc_link_types[] = {
 414                "Indeterminate",
 415                "Instruction fetch (timeout)",
 416                "Page table walk ifetch (timeout)",
 417                "Load (timeout)",
 418                "Store (timeout)",
 419                "Page table walk Load/Store (timeout)",
 420        };
 421        static const char *mc_error_class[] = {
 422                "Unknown",
 423                "Hardware error",
 424                "Probable Hardware error (some chance of software cause)",
 425                "Software error",
 426                "Probable Software error (some chance of hardware cause)",
 427        };
 428
 429        /* Print things out */
 430        if (evt->version != MCE_V1) {
 431                pr_err("Machine Check Exception, Unknown event version %d !\n",
 432                       evt->version);
 433                return;
 434        }
 435        switch (evt->severity) {
 436        case MCE_SEV_NO_ERROR:
 437                level = KERN_INFO;
 438                sevstr = "Harmless";
 439                break;
 440        case MCE_SEV_WARNING:
 441                level = KERN_WARNING;
 442                sevstr = "Warning";
 443                break;
 444        case MCE_SEV_SEVERE:
 445                level = KERN_ERR;
 446                sevstr = "Severe";
 447                break;
 448        case MCE_SEV_FATAL:
 449        default:
 450                level = KERN_ERR;
 451                sevstr = "Fatal";
 452                break;
 453        }
 454
 455        switch(evt->initiator) {
 456        case MCE_INITIATOR_CPU:
 457                initiator = "CPU";
 458                break;
 459        case MCE_INITIATOR_PCI:
 460                initiator = "PCI";
 461                break;
 462        case MCE_INITIATOR_ISA:
 463                initiator = "ISA";
 464                break;
 465        case MCE_INITIATOR_MEMORY:
 466                initiator = "Memory";
 467                break;
 468        case MCE_INITIATOR_POWERMGM:
 469                initiator = "Power Management";
 470                break;
 471        case MCE_INITIATOR_UNKNOWN:
 472        default:
 473                initiator = "Unknown";
 474                break;
 475        }
 476
 477        switch (evt->error_type) {
 478        case MCE_ERROR_TYPE_UE:
 479                err_type = "UE";
 480                subtype = evt->u.ue_error.ue_error_type <
 481                        ARRAY_SIZE(mc_ue_types) ?
 482                        mc_ue_types[evt->u.ue_error.ue_error_type]
 483                        : "Unknown";
 484                if (evt->u.ue_error.effective_address_provided)
 485                        ea = evt->u.ue_error.effective_address;
 486                if (evt->u.ue_error.physical_address_provided)
 487                        pa = evt->u.ue_error.physical_address;
 488                break;
 489        case MCE_ERROR_TYPE_SLB:
 490                err_type = "SLB";
 491                subtype = evt->u.slb_error.slb_error_type <
 492                        ARRAY_SIZE(mc_slb_types) ?
 493                        mc_slb_types[evt->u.slb_error.slb_error_type]
 494                        : "Unknown";
 495                if (evt->u.slb_error.effective_address_provided)
 496                        ea = evt->u.slb_error.effective_address;
 497                break;
 498        case MCE_ERROR_TYPE_ERAT:
 499                err_type = "ERAT";
 500                subtype = evt->u.erat_error.erat_error_type <
 501                        ARRAY_SIZE(mc_erat_types) ?
 502                        mc_erat_types[evt->u.erat_error.erat_error_type]
 503                        : "Unknown";
 504                if (evt->u.erat_error.effective_address_provided)
 505                        ea = evt->u.erat_error.effective_address;
 506                break;
 507        case MCE_ERROR_TYPE_TLB:
 508                err_type = "TLB";
 509                subtype = evt->u.tlb_error.tlb_error_type <
 510                        ARRAY_SIZE(mc_tlb_types) ?
 511                        mc_tlb_types[evt->u.tlb_error.tlb_error_type]
 512                        : "Unknown";
 513                if (evt->u.tlb_error.effective_address_provided)
 514                        ea = evt->u.tlb_error.effective_address;
 515                break;
 516        case MCE_ERROR_TYPE_USER:
 517                err_type = "User";
 518                subtype = evt->u.user_error.user_error_type <
 519                        ARRAY_SIZE(mc_user_types) ?
 520                        mc_user_types[evt->u.user_error.user_error_type]
 521                        : "Unknown";
 522                if (evt->u.user_error.effective_address_provided)
 523                        ea = evt->u.user_error.effective_address;
 524                break;
 525        case MCE_ERROR_TYPE_RA:
 526                err_type = "Real address";
 527                subtype = evt->u.ra_error.ra_error_type <
 528                        ARRAY_SIZE(mc_ra_types) ?
 529                        mc_ra_types[evt->u.ra_error.ra_error_type]
 530                        : "Unknown";
 531                if (evt->u.ra_error.effective_address_provided)
 532                        ea = evt->u.ra_error.effective_address;
 533                break;
 534        case MCE_ERROR_TYPE_LINK:
 535                err_type = "Link";
 536                subtype = evt->u.link_error.link_error_type <
 537                        ARRAY_SIZE(mc_link_types) ?
 538                        mc_link_types[evt->u.link_error.link_error_type]
 539                        : "Unknown";
 540                if (evt->u.link_error.effective_address_provided)
 541                        ea = evt->u.link_error.effective_address;
 542                break;
 543        case MCE_ERROR_TYPE_DCACHE:
 544                err_type = "D-Cache";
 545                subtype = "Unknown";
 546                break;
 547        case MCE_ERROR_TYPE_ICACHE:
 548                err_type = "I-Cache";
 549                subtype = "Unknown";
 550                break;
 551        default:
 552        case MCE_ERROR_TYPE_UNKNOWN:
 553                err_type = "Unknown";
 554                subtype = "";
 555                break;
 556        }
 557
 558        dar_str[0] = pa_str[0] = '\0';
 559        if (ea && evt->srr0 != ea) {
 560                /* Load/Store address */
 561                n = sprintf(dar_str, "DAR: %016llx ", ea);
 562                if (pa)
 563                        sprintf(dar_str + n, "paddr: %016llx ", pa);
 564        } else if (pa) {
 565                sprintf(pa_str, " paddr: %016llx", pa);
 566        }
 567
 568        printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
 569                level, evt->cpu, sevstr, in_guest ? "Guest" : "",
 570                err_type, subtype, dar_str,
 571                evt->disposition == MCE_DISPOSITION_RECOVERED ?
 572                "Recovered" : "Not recovered");
 573
 574        if (in_guest || user_mode) {
 575                printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
 576                        level, evt->cpu, current->pid, current->comm,
 577                        in_guest ? "Guest " : "", evt->srr0, pa_str);
 578        } else {
 579                printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
 580                        level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
 581        }
 582
 583        printk("%sMCE: CPU%d: Initiator %s\n", level, evt->cpu, initiator);
 584
 585        subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
 586                mc_error_class[evt->error_class] : "Unknown";
 587        printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
 588
 589#ifdef CONFIG_PPC_BOOK3S_64
 590        /* Display faulty slb contents for SLB errors. */
 591        if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest)
 592                slb_dump_contents(local_paca->mce_faulty_slbs);
 593#endif
 594}
 595EXPORT_SYMBOL_GPL(machine_check_print_event_info);
 596
 597/*
 598 * This function is called in real mode. Strictly no printk's please.
 599 *
 600 * regs->nip and regs->msr contains srr0 and ssr1.
 601 */
 602DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early)
 603{
 604        long handled = 0;
 605
 606        hv_nmi_check_nonrecoverable(regs);
 607
 608        /*
 609         * See if platform is capable of handling machine check.
 610         */
 611        if (ppc_md.machine_check_early)
 612                handled = ppc_md.machine_check_early(regs);
 613
 614        return handled;
 615}
 616
 617/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
 618static enum {
 619        DTRIG_UNKNOWN,
 620        DTRIG_VECTOR_CI,        /* need to emulate vector CI load instr */
 621        DTRIG_SUSPEND_ESCAPE,   /* need to escape from TM suspend mode */
 622} hmer_debug_trig_function;
 623
 624static int init_debug_trig_function(void)
 625{
 626        int pvr;
 627        struct device_node *cpun;
 628        struct property *prop = NULL;
 629        const char *str;
 630
 631        /* First look in the device tree */
 632        preempt_disable();
 633        cpun = of_get_cpu_node(smp_processor_id(), NULL);
 634        if (cpun) {
 635                of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
 636                                            prop, str) {
 637                        if (strcmp(str, "bit17-vector-ci-load") == 0)
 638                                hmer_debug_trig_function = DTRIG_VECTOR_CI;
 639                        else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
 640                                hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
 641                }
 642                of_node_put(cpun);
 643        }
 644        preempt_enable();
 645
 646        /* If we found the property, don't look at PVR */
 647        if (prop)
 648                goto out;
 649
 650        pvr = mfspr(SPRN_PVR);
 651        /* Check for POWER9 Nimbus (scale-out) */
 652        if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
 653                /* DD2.2 and later */
 654                if ((pvr & 0xfff) >= 0x202)
 655                        hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
 656                /* DD2.0 and DD2.1 - used for vector CI load emulation */
 657                else if ((pvr & 0xfff) >= 0x200)
 658                        hmer_debug_trig_function = DTRIG_VECTOR_CI;
 659        }
 660
 661 out:
 662        switch (hmer_debug_trig_function) {
 663        case DTRIG_VECTOR_CI:
 664                pr_debug("HMI debug trigger used for vector CI load\n");
 665                break;
 666        case DTRIG_SUSPEND_ESCAPE:
 667                pr_debug("HMI debug trigger used for TM suspend escape\n");
 668                break;
 669        default:
 670                break;
 671        }
 672        return 0;
 673}
 674__initcall(init_debug_trig_function);
 675
 676/*
 677 * Handle HMIs that occur as a result of a debug trigger.
 678 * Return values:
 679 * -1 means this is not a HMI cause that we know about
 680 *  0 means no further handling is required
 681 *  1 means further handling is required
 682 */
 683long hmi_handle_debugtrig(struct pt_regs *regs)
 684{
 685        unsigned long hmer = mfspr(SPRN_HMER);
 686        long ret = 0;
 687
 688        /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
 689        if (!((hmer & HMER_DEBUG_TRIG)
 690              && hmer_debug_trig_function != DTRIG_UNKNOWN))
 691                return -1;
 692                
 693        hmer &= ~HMER_DEBUG_TRIG;
 694        /* HMER is a write-AND register */
 695        mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
 696
 697        switch (hmer_debug_trig_function) {
 698        case DTRIG_VECTOR_CI:
 699                /*
 700                 * Now to avoid problems with soft-disable we
 701                 * only do the emulation if we are coming from
 702                 * host user space
 703                 */
 704                if (regs && user_mode(regs))
 705                        ret = local_paca->hmi_p9_special_emu = 1;
 706
 707                break;
 708
 709        default:
 710                break;
 711        }
 712
 713        /*
 714         * See if any other HMI causes remain to be handled
 715         */
 716        if (hmer & mfspr(SPRN_HMEER))
 717                return -1;
 718
 719        return ret;
 720}
 721
 722/*
 723 * Return values:
 724 */
 725DEFINE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode)
 726{       
 727        int ret;
 728
 729        local_paca->hmi_irqs++;
 730
 731        ret = hmi_handle_debugtrig(regs);
 732        if (ret >= 0)
 733                return ret;
 734
 735        wait_for_subcore_guest_exit();
 736
 737        if (ppc_md.hmi_exception_early)
 738                ppc_md.hmi_exception_early(regs);
 739
 740        wait_for_tb_resync();
 741
 742        return 1;
 743}
 744
 745void __init mce_init(void)
 746{
 747        struct mce_info *mce_info;
 748        u64 limit;
 749        int i;
 750
 751        limit = min(ppc64_bolted_size(), ppc64_rma_size);
 752        for_each_possible_cpu(i) {
 753                mce_info = memblock_alloc_try_nid(sizeof(*mce_info),
 754                                                  __alignof__(*mce_info),
 755                                                  MEMBLOCK_LOW_LIMIT,
 756                                                  limit, cpu_to_node(i));
 757                if (!mce_info)
 758                        goto err;
 759                paca_ptrs[i]->mce_info = mce_info;
 760        }
 761        return;
 762err:
 763        panic("Failed to allocate memory for MCE event data\n");
 764}
 765