linux/arch/powerpc/kernel/mce.c
<<
>>
Prefs
   1/*
   2 * Machine check exception handling.
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License as published by
   6 * the Free Software Foundation; either version 2 of the License, or
   7 * (at your option) any later version.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write to the Free Software
  16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17 *
  18 * Copyright 2013 IBM Corporation
  19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
  20 */
  21
  22#undef DEBUG
  23#define pr_fmt(fmt) "mce: " fmt
  24
  25#include <linux/hardirq.h>
  26#include <linux/types.h>
  27#include <linux/ptrace.h>
  28#include <linux/percpu.h>
  29#include <linux/export.h>
  30#include <linux/irq_work.h>
  31
  32#include <asm/machdep.h>
  33#include <asm/mce.h>
  34
  35static DEFINE_PER_CPU(int, mce_nest_count);
  36static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
  37
  38/* Queue for delayed MCE events. */
  39static DEFINE_PER_CPU(int, mce_queue_count);
  40static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
  41
  42/* Queue for delayed MCE UE events. */
  43static DEFINE_PER_CPU(int, mce_ue_count);
  44static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
  45                                        mce_ue_event_queue);
  46
  47static void machine_check_process_queued_event(struct irq_work *work);
  48void machine_check_ue_event(struct machine_check_event *evt);
  49static void machine_process_ue_event(struct work_struct *work);
  50
  51static struct irq_work mce_event_process_work = {
  52        .func = machine_check_process_queued_event,
  53};
  54
  55DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
  56
  57static void mce_set_error_info(struct machine_check_event *mce,
  58                               struct mce_error_info *mce_err)
  59{
  60        mce->error_type = mce_err->error_type;
  61        switch (mce_err->error_type) {
  62        case MCE_ERROR_TYPE_UE:
  63                mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
  64                break;
  65        case MCE_ERROR_TYPE_SLB:
  66                mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
  67                break;
  68        case MCE_ERROR_TYPE_ERAT:
  69                mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
  70                break;
  71        case MCE_ERROR_TYPE_TLB:
  72                mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
  73                break;
  74        case MCE_ERROR_TYPE_USER:
  75                mce->u.user_error.user_error_type = mce_err->u.user_error_type;
  76                break;
  77        case MCE_ERROR_TYPE_RA:
  78                mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
  79                break;
  80        case MCE_ERROR_TYPE_LINK:
  81                mce->u.link_error.link_error_type = mce_err->u.link_error_type;
  82                break;
  83        case MCE_ERROR_TYPE_UNKNOWN:
  84        default:
  85                break;
  86        }
  87}
  88
  89/*
  90 * Decode and save high level MCE information into per cpu buffer which
  91 * is an array of machine_check_event structure.
  92 */
  93void save_mce_event(struct pt_regs *regs, long handled,
  94                    struct mce_error_info *mce_err,
  95                    uint64_t nip, uint64_t addr, uint64_t phys_addr)
  96{
  97        int index = __this_cpu_inc_return(mce_nest_count) - 1;
  98        struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
  99
 100        /*
 101         * Return if we don't have enough space to log mce event.
 102         * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
 103         * the check below will stop buffer overrun.
 104         */
 105        if (index >= MAX_MC_EVT)
 106                return;
 107
 108        /* Populate generic machine check info */
 109        mce->version = MCE_V1;
 110        mce->srr0 = nip;
 111        mce->srr1 = regs->msr;
 112        mce->gpr3 = regs->gpr[3];
 113        mce->in_use = 1;
 114
 115        /* Mark it recovered if we have handled it and MSR(RI=1). */
 116        if (handled && (regs->msr & MSR_RI))
 117                mce->disposition = MCE_DISPOSITION_RECOVERED;
 118        else
 119                mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
 120
 121        mce->initiator = mce_err->initiator;
 122        mce->severity = mce_err->severity;
 123
 124        /*
 125         * Populate the mce error_type and type-specific error_type.
 126         */
 127        mce_set_error_info(mce, mce_err);
 128
 129        if (!addr)
 130                return;
 131
 132        if (mce->error_type == MCE_ERROR_TYPE_TLB) {
 133                mce->u.tlb_error.effective_address_provided = true;
 134                mce->u.tlb_error.effective_address = addr;
 135        } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
 136                mce->u.slb_error.effective_address_provided = true;
 137                mce->u.slb_error.effective_address = addr;
 138        } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
 139                mce->u.erat_error.effective_address_provided = true;
 140                mce->u.erat_error.effective_address = addr;
 141        } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
 142                mce->u.user_error.effective_address_provided = true;
 143                mce->u.user_error.effective_address = addr;
 144        } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
 145                mce->u.ra_error.effective_address_provided = true;
 146                mce->u.ra_error.effective_address = addr;
 147        } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
 148                mce->u.link_error.effective_address_provided = true;
 149                mce->u.link_error.effective_address = addr;
 150        } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
 151                mce->u.ue_error.effective_address_provided = true;
 152                mce->u.ue_error.effective_address = addr;
 153                if (phys_addr != ULONG_MAX) {
 154                        mce->u.ue_error.physical_address_provided = true;
 155                        mce->u.ue_error.physical_address = phys_addr;
 156                        machine_check_ue_event(mce);
 157                }
 158        }
 159        return;
 160}
 161
 162/*
 163 * get_mce_event:
 164 *      mce     Pointer to machine_check_event structure to be filled.
 165 *      release Flag to indicate whether to free the event slot or not.
 166 *              0 <= do not release the mce event. Caller will invoke
 167 *                   release_mce_event() once event has been consumed.
 168 *              1 <= release the slot.
 169 *
 170 *      return  1 = success
 171 *              0 = failure
 172 *
 173 * get_mce_event() will be called by platform specific machine check
 174 * handle routine and in KVM.
 175 * When we call get_mce_event(), we are still in interrupt context and
 176 * preemption will not be scheduled until ret_from_expect() routine
 177 * is called.
 178 */
 179int get_mce_event(struct machine_check_event *mce, bool release)
 180{
 181        int index = __this_cpu_read(mce_nest_count) - 1;
 182        struct machine_check_event *mc_evt;
 183        int ret = 0;
 184
 185        /* Sanity check */
 186        if (index < 0)
 187                return ret;
 188
 189        /* Check if we have MCE info to process. */
 190        if (index < MAX_MC_EVT) {
 191                mc_evt = this_cpu_ptr(&mce_event[index]);
 192                /* Copy the event structure and release the original */
 193                if (mce)
 194                        *mce = *mc_evt;
 195                if (release)
 196                        mc_evt->in_use = 0;
 197                ret = 1;
 198        }
 199        /* Decrement the count to free the slot. */
 200        if (release)
 201                __this_cpu_dec(mce_nest_count);
 202
 203        return ret;
 204}
 205
 206void release_mce_event(void)
 207{
 208        get_mce_event(NULL, true);
 209}
 210
 211
 212/*
 213 * Queue up the MCE event which then can be handled later.
 214 */
 215void machine_check_ue_event(struct machine_check_event *evt)
 216{
 217        int index;
 218
 219        index = __this_cpu_inc_return(mce_ue_count) - 1;
 220        /* If queue is full, just return for now. */
 221        if (index >= MAX_MC_EVT) {
 222                __this_cpu_dec(mce_ue_count);
 223                return;
 224        }
 225        memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
 226
 227        /* Queue work to process this event later. */
 228        schedule_work(&mce_ue_event_work);
 229}
 230
 231/*
 232 * Queue up the MCE event which then can be handled later.
 233 */
 234void machine_check_queue_event(void)
 235{
 236        int index;
 237        struct machine_check_event evt;
 238
 239        if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
 240                return;
 241
 242        index = __this_cpu_inc_return(mce_queue_count) - 1;
 243        /* If queue is full, just return for now. */
 244        if (index >= MAX_MC_EVT) {
 245                __this_cpu_dec(mce_queue_count);
 246                return;
 247        }
 248        memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
 249
 250        /* Queue irq work to process this event later. */
 251        irq_work_queue(&mce_event_process_work);
 252}
 253/*
 254 * process pending MCE event from the mce event queue. This function will be
 255 * called during syscall exit.
 256 */
 257static void machine_process_ue_event(struct work_struct *work)
 258{
 259        int index;
 260        struct machine_check_event *evt;
 261
 262        while (__this_cpu_read(mce_ue_count) > 0) {
 263                index = __this_cpu_read(mce_ue_count) - 1;
 264                evt = this_cpu_ptr(&mce_ue_event_queue[index]);
 265#ifdef CONFIG_MEMORY_FAILURE
 266                /*
 267                 * This should probably queued elsewhere, but
 268                 * oh! well
 269                 */
 270                if (evt->error_type == MCE_ERROR_TYPE_UE) {
 271                        if (evt->u.ue_error.physical_address_provided) {
 272                                unsigned long pfn;
 273
 274                                pfn = evt->u.ue_error.physical_address >>
 275                                        PAGE_SHIFT;
 276                                memory_failure(pfn, 0);
 277                        } else
 278                                pr_warn("Failed to identify bad address from "
 279                                        "where the uncorrectable error (UE) "
 280                                        "was generated\n");
 281                }
 282#endif
 283                __this_cpu_dec(mce_ue_count);
 284        }
 285}
 286/*
 287 * process pending MCE event from the mce event queue. This function will be
 288 * called during syscall exit.
 289 */
 290static void machine_check_process_queued_event(struct irq_work *work)
 291{
 292        int index;
 293        struct machine_check_event *evt;
 294
 295        add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
 296
 297        /*
 298         * For now just print it to console.
 299         * TODO: log this error event to FSP or nvram.
 300         */
 301        while (__this_cpu_read(mce_queue_count) > 0) {
 302                index = __this_cpu_read(mce_queue_count) - 1;
 303                evt = this_cpu_ptr(&mce_event_queue[index]);
 304                machine_check_print_event_info(evt, false);
 305                __this_cpu_dec(mce_queue_count);
 306        }
 307}
 308
 309void machine_check_print_event_info(struct machine_check_event *evt,
 310                                    bool user_mode)
 311{
 312        const char *level, *sevstr, *subtype;
 313        static const char *mc_ue_types[] = {
 314                "Indeterminate",
 315                "Instruction fetch",
 316                "Page table walk ifetch",
 317                "Load/Store",
 318                "Page table walk Load/Store",
 319        };
 320        static const char *mc_slb_types[] = {
 321                "Indeterminate",
 322                "Parity",
 323                "Multihit",
 324        };
 325        static const char *mc_erat_types[] = {
 326                "Indeterminate",
 327                "Parity",
 328                "Multihit",
 329        };
 330        static const char *mc_tlb_types[] = {
 331                "Indeterminate",
 332                "Parity",
 333                "Multihit",
 334        };
 335        static const char *mc_user_types[] = {
 336                "Indeterminate",
 337                "tlbie(l) invalid",
 338        };
 339        static const char *mc_ra_types[] = {
 340                "Indeterminate",
 341                "Instruction fetch (bad)",
 342                "Instruction fetch (foreign)",
 343                "Page table walk ifetch (bad)",
 344                "Page table walk ifetch (foreign)",
 345                "Load (bad)",
 346                "Store (bad)",
 347                "Page table walk Load/Store (bad)",
 348                "Page table walk Load/Store (foreign)",
 349                "Load/Store (foreign)",
 350        };
 351        static const char *mc_link_types[] = {
 352                "Indeterminate",
 353                "Instruction fetch (timeout)",
 354                "Page table walk ifetch (timeout)",
 355                "Load (timeout)",
 356                "Store (timeout)",
 357                "Page table walk Load/Store (timeout)",
 358        };
 359
 360        /* Print things out */
 361        if (evt->version != MCE_V1) {
 362                pr_err("Machine Check Exception, Unknown event version %d !\n",
 363                       evt->version);
 364                return;
 365        }
 366        switch (evt->severity) {
 367        case MCE_SEV_NO_ERROR:
 368                level = KERN_INFO;
 369                sevstr = "Harmless";
 370                break;
 371        case MCE_SEV_WARNING:
 372                level = KERN_WARNING;
 373                sevstr = "";
 374                break;
 375        case MCE_SEV_ERROR_SYNC:
 376                level = KERN_ERR;
 377                sevstr = "Severe";
 378                break;
 379        case MCE_SEV_FATAL:
 380        default:
 381                level = KERN_ERR;
 382                sevstr = "Fatal";
 383                break;
 384        }
 385
 386        printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
 387               evt->disposition == MCE_DISPOSITION_RECOVERED ?
 388               "Recovered" : "Not recovered");
 389
 390        if (user_mode) {
 391                printk("%s  NIP: [%016llx] PID: %d Comm: %s\n", level,
 392                        evt->srr0, current->pid, current->comm);
 393        } else {
 394                printk("%s  NIP [%016llx]: %pS\n", level, evt->srr0,
 395                       (void *)evt->srr0);
 396        }
 397
 398        printk("%s  Initiator: %s\n", level,
 399               evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
 400        switch (evt->error_type) {
 401        case MCE_ERROR_TYPE_UE:
 402                subtype = evt->u.ue_error.ue_error_type <
 403                        ARRAY_SIZE(mc_ue_types) ?
 404                        mc_ue_types[evt->u.ue_error.ue_error_type]
 405                        : "Unknown";
 406                printk("%s  Error type: UE [%s]\n", level, subtype);
 407                if (evt->u.ue_error.effective_address_provided)
 408                        printk("%s    Effective address: %016llx\n",
 409                               level, evt->u.ue_error.effective_address);
 410                if (evt->u.ue_error.physical_address_provided)
 411                        printk("%s    Physical address:  %016llx\n",
 412                               level, evt->u.ue_error.physical_address);
 413                break;
 414        case MCE_ERROR_TYPE_SLB:
 415                subtype = evt->u.slb_error.slb_error_type <
 416                        ARRAY_SIZE(mc_slb_types) ?
 417                        mc_slb_types[evt->u.slb_error.slb_error_type]
 418                        : "Unknown";
 419                printk("%s  Error type: SLB [%s]\n", level, subtype);
 420                if (evt->u.slb_error.effective_address_provided)
 421                        printk("%s    Effective address: %016llx\n",
 422                               level, evt->u.slb_error.effective_address);
 423                break;
 424        case MCE_ERROR_TYPE_ERAT:
 425                subtype = evt->u.erat_error.erat_error_type <
 426                        ARRAY_SIZE(mc_erat_types) ?
 427                        mc_erat_types[evt->u.erat_error.erat_error_type]
 428                        : "Unknown";
 429                printk("%s  Error type: ERAT [%s]\n", level, subtype);
 430                if (evt->u.erat_error.effective_address_provided)
 431                        printk("%s    Effective address: %016llx\n",
 432                               level, evt->u.erat_error.effective_address);
 433                break;
 434        case MCE_ERROR_TYPE_TLB:
 435                subtype = evt->u.tlb_error.tlb_error_type <
 436                        ARRAY_SIZE(mc_tlb_types) ?
 437                        mc_tlb_types[evt->u.tlb_error.tlb_error_type]
 438                        : "Unknown";
 439                printk("%s  Error type: TLB [%s]\n", level, subtype);
 440                if (evt->u.tlb_error.effective_address_provided)
 441                        printk("%s    Effective address: %016llx\n",
 442                               level, evt->u.tlb_error.effective_address);
 443                break;
 444        case MCE_ERROR_TYPE_USER:
 445                subtype = evt->u.user_error.user_error_type <
 446                        ARRAY_SIZE(mc_user_types) ?
 447                        mc_user_types[evt->u.user_error.user_error_type]
 448                        : "Unknown";
 449                printk("%s  Error type: User [%s]\n", level, subtype);
 450                if (evt->u.user_error.effective_address_provided)
 451                        printk("%s    Effective address: %016llx\n",
 452                               level, evt->u.user_error.effective_address);
 453                break;
 454        case MCE_ERROR_TYPE_RA:
 455                subtype = evt->u.ra_error.ra_error_type <
 456                        ARRAY_SIZE(mc_ra_types) ?
 457                        mc_ra_types[evt->u.ra_error.ra_error_type]
 458                        : "Unknown";
 459                printk("%s  Error type: Real address [%s]\n", level, subtype);
 460                if (evt->u.ra_error.effective_address_provided)
 461                        printk("%s    Effective address: %016llx\n",
 462                               level, evt->u.ra_error.effective_address);
 463                break;
 464        case MCE_ERROR_TYPE_LINK:
 465                subtype = evt->u.link_error.link_error_type <
 466                        ARRAY_SIZE(mc_link_types) ?
 467                        mc_link_types[evt->u.link_error.link_error_type]
 468                        : "Unknown";
 469                printk("%s  Error type: Link [%s]\n", level, subtype);
 470                if (evt->u.link_error.effective_address_provided)
 471                        printk("%s    Effective address: %016llx\n",
 472                               level, evt->u.link_error.effective_address);
 473                break;
 474        default:
 475        case MCE_ERROR_TYPE_UNKNOWN:
 476                printk("%s  Error type: Unknown\n", level);
 477                break;
 478        }
 479}
 480EXPORT_SYMBOL_GPL(machine_check_print_event_info);
 481
 482/*
 483 * This function is called in real mode. Strictly no printk's please.
 484 *
 485 * regs->nip and regs->msr contains srr0 and ssr1.
 486 */
 487long machine_check_early(struct pt_regs *regs)
 488{
 489        long handled = 0;
 490
 491        __this_cpu_inc(irq_stat.mce_exceptions);
 492
 493        if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
 494                handled = cur_cpu_spec->machine_check_early(regs);
 495        return handled;
 496}
 497
 498/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
 499static enum {
 500        DTRIG_UNKNOWN,
 501        DTRIG_VECTOR_CI,        /* need to emulate vector CI load instr */
 502        DTRIG_SUSPEND_ESCAPE,   /* need to escape from TM suspend mode */
 503} hmer_debug_trig_function;
 504
 505static int init_debug_trig_function(void)
 506{
 507        int pvr;
 508        struct device_node *cpun;
 509        struct property *prop = NULL;
 510        const char *str;
 511
 512        /* First look in the device tree */
 513        preempt_disable();
 514        cpun = of_get_cpu_node(smp_processor_id(), NULL);
 515        if (cpun) {
 516                of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
 517                                            prop, str) {
 518                        if (strcmp(str, "bit17-vector-ci-load") == 0)
 519                                hmer_debug_trig_function = DTRIG_VECTOR_CI;
 520                        else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
 521                                hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
 522                }
 523                of_node_put(cpun);
 524        }
 525        preempt_enable();
 526
 527        /* If we found the property, don't look at PVR */
 528        if (prop)
 529                goto out;
 530
 531        pvr = mfspr(SPRN_PVR);
 532        /* Check for POWER9 Nimbus (scale-out) */
 533        if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
 534                /* DD2.2 and later */
 535                if ((pvr & 0xfff) >= 0x202)
 536                        hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
 537                /* DD2.0 and DD2.1 - used for vector CI load emulation */
 538                else if ((pvr & 0xfff) >= 0x200)
 539                        hmer_debug_trig_function = DTRIG_VECTOR_CI;
 540        }
 541
 542 out:
 543        switch (hmer_debug_trig_function) {
 544        case DTRIG_VECTOR_CI:
 545                pr_debug("HMI debug trigger used for vector CI load\n");
 546                break;
 547        case DTRIG_SUSPEND_ESCAPE:
 548                pr_debug("HMI debug trigger used for TM suspend escape\n");
 549                break;
 550        default:
 551                break;
 552        }
 553        return 0;
 554}
 555__initcall(init_debug_trig_function);
 556
 557/*
 558 * Handle HMIs that occur as a result of a debug trigger.
 559 * Return values:
 560 * -1 means this is not a HMI cause that we know about
 561 *  0 means no further handling is required
 562 *  1 means further handling is required
 563 */
 564long hmi_handle_debugtrig(struct pt_regs *regs)
 565{
 566        unsigned long hmer = mfspr(SPRN_HMER);
 567        long ret = 0;
 568
 569        /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
 570        if (!((hmer & HMER_DEBUG_TRIG)
 571              && hmer_debug_trig_function != DTRIG_UNKNOWN))
 572                return -1;
 573                
 574        hmer &= ~HMER_DEBUG_TRIG;
 575        /* HMER is a write-AND register */
 576        mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
 577
 578        switch (hmer_debug_trig_function) {
 579        case DTRIG_VECTOR_CI:
 580                /*
 581                 * Now to avoid problems with soft-disable we
 582                 * only do the emulation if we are coming from
 583                 * host user space
 584                 */
 585                if (regs && user_mode(regs))
 586                        ret = local_paca->hmi_p9_special_emu = 1;
 587
 588                break;
 589
 590        default:
 591                break;
 592        }
 593
 594        /*
 595         * See if any other HMI causes remain to be handled
 596         */
 597        if (hmer & mfspr(SPRN_HMEER))
 598                return -1;
 599
 600        return ret;
 601}
 602
 603/*
 604 * Return values:
 605 */
 606long hmi_exception_realmode(struct pt_regs *regs)
 607{       
 608        int ret;
 609
 610        __this_cpu_inc(irq_stat.hmi_exceptions);
 611
 612        ret = hmi_handle_debugtrig(regs);
 613        if (ret >= 0)
 614                return ret;
 615
 616        wait_for_subcore_guest_exit();
 617
 618        if (ppc_md.hmi_exception_early)
 619                ppc_md.hmi_exception_early(regs);
 620
 621        wait_for_tb_resync();
 622
 623        return 1;
 624}
 625