linux/arch/powerpc/kernel/traps.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *  Copyright (C) 1995-1996  Gary Thomas (gdt@linuxppc.org)
   4 *  Copyright 2007-2010 Freescale Semiconductor, Inc.
   5 *
   6 *  Modified by Cort Dougan (cort@cs.nmt.edu)
   7 *  and Paul Mackerras (paulus@samba.org)
   8 */
   9
  10/*
  11 * This file handles the architecture-dependent parts of hardware exceptions
  12 */
  13
  14#include <linux/errno.h>
  15#include <linux/sched.h>
  16#include <linux/sched/debug.h>
  17#include <linux/kernel.h>
  18#include <linux/mm.h>
  19#include <linux/pkeys.h>
  20#include <linux/stddef.h>
  21#include <linux/unistd.h>
  22#include <linux/ptrace.h>
  23#include <linux/user.h>
  24#include <linux/interrupt.h>
  25#include <linux/init.h>
  26#include <linux/extable.h>
  27#include <linux/module.h>       /* print_modules */
  28#include <linux/prctl.h>
  29#include <linux/delay.h>
  30#include <linux/kprobes.h>
  31#include <linux/kexec.h>
  32#include <linux/backlight.h>
  33#include <linux/bug.h>
  34#include <linux/kdebug.h>
  35#include <linux/ratelimit.h>
  36#include <linux/context_tracking.h>
  37#include <linux/smp.h>
  38#include <linux/console.h>
  39#include <linux/kmsg_dump.h>
  40
  41#include <asm/emulated_ops.h>
  42#include <linux/uaccess.h>
  43#include <asm/debugfs.h>
  44#include <asm/interrupt.h>
  45#include <asm/io.h>
  46#include <asm/machdep.h>
  47#include <asm/rtas.h>
  48#include <asm/pmc.h>
  49#include <asm/reg.h>
  50#ifdef CONFIG_PMAC_BACKLIGHT
  51#include <asm/backlight.h>
  52#endif
  53#ifdef CONFIG_PPC64
  54#include <asm/firmware.h>
  55#include <asm/processor.h>
  56#endif
  57#include <asm/kexec.h>
  58#include <asm/ppc-opcode.h>
  59#include <asm/rio.h>
  60#include <asm/fadump.h>
  61#include <asm/switch_to.h>
  62#include <asm/tm.h>
  63#include <asm/debug.h>
  64#include <asm/asm-prototypes.h>
  65#include <asm/hmi.h>
  66#include <sysdev/fsl_pci.h>
  67#include <asm/kprobes.h>
  68#include <asm/stacktrace.h>
  69#include <asm/nmi.h>
  70
  71#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
  72int (*__debugger)(struct pt_regs *regs) __read_mostly;
  73int (*__debugger_ipi)(struct pt_regs *regs) __read_mostly;
  74int (*__debugger_bpt)(struct pt_regs *regs) __read_mostly;
  75int (*__debugger_sstep)(struct pt_regs *regs) __read_mostly;
  76int (*__debugger_iabr_match)(struct pt_regs *regs) __read_mostly;
  77int (*__debugger_break_match)(struct pt_regs *regs) __read_mostly;
  78int (*__debugger_fault_handler)(struct pt_regs *regs) __read_mostly;
  79
  80EXPORT_SYMBOL(__debugger);
  81EXPORT_SYMBOL(__debugger_ipi);
  82EXPORT_SYMBOL(__debugger_bpt);
  83EXPORT_SYMBOL(__debugger_sstep);
  84EXPORT_SYMBOL(__debugger_iabr_match);
  85EXPORT_SYMBOL(__debugger_break_match);
  86EXPORT_SYMBOL(__debugger_fault_handler);
  87#endif
  88
  89/* Transactional Memory trap debug */
  90#ifdef TM_DEBUG_SW
  91#define TM_DEBUG(x...) printk(KERN_INFO x)
  92#else
  93#define TM_DEBUG(x...) do { } while(0)
  94#endif
  95
  96static const char *signame(int signr)
  97{
  98        switch (signr) {
  99        case SIGBUS:    return "bus error";
 100        case SIGFPE:    return "floating point exception";
 101        case SIGILL:    return "illegal instruction";
 102        case SIGSEGV:   return "segfault";
 103        case SIGTRAP:   return "unhandled trap";
 104        }
 105
 106        return "unknown signal";
 107}
 108
 109/*
 110 * Trap & Exception support
 111 */
 112
 113#ifdef CONFIG_PMAC_BACKLIGHT
 114static void pmac_backlight_unblank(void)
 115{
 116        mutex_lock(&pmac_backlight_mutex);
 117        if (pmac_backlight) {
 118                struct backlight_properties *props;
 119
 120                props = &pmac_backlight->props;
 121                props->brightness = props->max_brightness;
 122                props->power = FB_BLANK_UNBLANK;
 123                backlight_update_status(pmac_backlight);
 124        }
 125        mutex_unlock(&pmac_backlight_mutex);
 126}
 127#else
 128static inline void pmac_backlight_unblank(void) { }
 129#endif
 130
 131/*
 132 * If oops/die is expected to crash the machine, return true here.
 133 *
 134 * This should not be expected to be 100% accurate, there may be
 135 * notifiers registered or other unexpected conditions that may bring
 136 * down the kernel. Or if the current process in the kernel is holding
 137 * locks or has other critical state, the kernel may become effectively
 138 * unusable anyway.
 139 */
 140bool die_will_crash(void)
 141{
 142        if (should_fadump_crash())
 143                return true;
 144        if (kexec_should_crash(current))
 145                return true;
 146        if (in_interrupt() || panic_on_oops ||
 147                        !current->pid || is_global_init(current))
 148                return true;
 149
 150        return false;
 151}
 152
 153static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 154static int die_owner = -1;
 155static unsigned int die_nest_count;
 156static int die_counter;
 157
 158extern void panic_flush_kmsg_start(void)
 159{
 160        /*
 161         * These are mostly taken from kernel/panic.c, but tries to do
 162         * relatively minimal work. Don't use delay functions (TB may
 163         * be broken), don't crash dump (need to set a firmware log),
 164         * don't run notifiers. We do want to get some information to
 165         * Linux console.
 166         */
 167        console_verbose();
 168        bust_spinlocks(1);
 169}
 170
 171extern void panic_flush_kmsg_end(void)
 172{
 173        printk_safe_flush_on_panic();
 174        kmsg_dump(KMSG_DUMP_PANIC);
 175        bust_spinlocks(0);
 176        debug_locks_off();
 177        console_flush_on_panic(CONSOLE_FLUSH_PENDING);
 178}
 179
 180static unsigned long oops_begin(struct pt_regs *regs)
 181{
 182        int cpu;
 183        unsigned long flags;
 184
 185        oops_enter();
 186
 187        /* racy, but better than risking deadlock. */
 188        raw_local_irq_save(flags);
 189        cpu = smp_processor_id();
 190        if (!arch_spin_trylock(&die_lock)) {
 191                if (cpu == die_owner)
 192                        /* nested oops. should stop eventually */;
 193                else
 194                        arch_spin_lock(&die_lock);
 195        }
 196        die_nest_count++;
 197        die_owner = cpu;
 198        console_verbose();
 199        bust_spinlocks(1);
 200        if (machine_is(powermac))
 201                pmac_backlight_unblank();
 202        return flags;
 203}
 204NOKPROBE_SYMBOL(oops_begin);
 205
 206static void oops_end(unsigned long flags, struct pt_regs *regs,
 207                               int signr)
 208{
 209        bust_spinlocks(0);
 210        add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 211        die_nest_count--;
 212        oops_exit();
 213        printk("\n");
 214        if (!die_nest_count) {
 215                /* Nest count reaches zero, release the lock. */
 216                die_owner = -1;
 217                arch_spin_unlock(&die_lock);
 218        }
 219        raw_local_irq_restore(flags);
 220
 221        /*
 222         * system_reset_excption handles debugger, crash dump, panic, for 0x100
 223         */
 224        if (TRAP(regs) == INTERRUPT_SYSTEM_RESET)
 225                return;
 226
 227        crash_fadump(regs, "die oops");
 228
 229        if (kexec_should_crash(current))
 230                crash_kexec(regs);
 231
 232        if (!signr)
 233                return;
 234
 235        /*
 236         * While our oops output is serialised by a spinlock, output
 237         * from panic() called below can race and corrupt it. If we
 238         * know we are going to panic, delay for 1 second so we have a
 239         * chance to get clean backtraces from all CPUs that are oopsing.
 240         */
 241        if (in_interrupt() || panic_on_oops || !current->pid ||
 242            is_global_init(current)) {
 243                mdelay(MSEC_PER_SEC);
 244        }
 245
 246        if (panic_on_oops)
 247                panic("Fatal exception");
 248        do_exit(signr);
 249}
 250NOKPROBE_SYMBOL(oops_end);
 251
 252static char *get_mmu_str(void)
 253{
 254        if (early_radix_enabled())
 255                return " MMU=Radix";
 256        if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
 257                return " MMU=Hash";
 258        return "";
 259}
 260
 261static int __die(const char *str, struct pt_regs *regs, long err)
 262{
 263        printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
 264
 265        printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n",
 266               IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
 267               PAGE_SIZE / 1024, get_mmu_str(),
 268               IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
 269               IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
 270               IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
 271               debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
 272               IS_ENABLED(CONFIG_NUMA) ? " NUMA" : "",
 273               ppc_md.name ? ppc_md.name : "");
 274
 275        if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP)
 276                return 1;
 277
 278        print_modules();
 279        show_regs(regs);
 280
 281        return 0;
 282}
 283NOKPROBE_SYMBOL(__die);
 284
 285void die(const char *str, struct pt_regs *regs, long err)
 286{
 287        unsigned long flags;
 288
 289        /*
 290         * system_reset_excption handles debugger, crash dump, panic, for 0x100
 291         */
 292        if (TRAP(regs) != INTERRUPT_SYSTEM_RESET) {
 293                if (debugger(regs))
 294                        return;
 295        }
 296
 297        flags = oops_begin(regs);
 298        if (__die(str, regs, err))
 299                err = 0;
 300        oops_end(flags, regs, err);
 301}
 302NOKPROBE_SYMBOL(die);
 303
 304void user_single_step_report(struct pt_regs *regs)
 305{
 306        force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)regs->nip);
 307}
 308
 309static void show_signal_msg(int signr, struct pt_regs *regs, int code,
 310                            unsigned long addr)
 311{
 312        static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
 313                                      DEFAULT_RATELIMIT_BURST);
 314
 315        if (!show_unhandled_signals)
 316                return;
 317
 318        if (!unhandled_signal(current, signr))
 319                return;
 320
 321        if (!__ratelimit(&rs))
 322                return;
 323
 324        pr_info("%s[%d]: %s (%d) at %lx nip %lx lr %lx code %x",
 325                current->comm, current->pid, signame(signr), signr,
 326                addr, regs->nip, regs->link, code);
 327
 328        print_vma_addr(KERN_CONT " in ", regs->nip);
 329
 330        pr_cont("\n");
 331
 332        show_user_instructions(regs);
 333}
 334
 335static bool exception_common(int signr, struct pt_regs *regs, int code,
 336                              unsigned long addr)
 337{
 338        if (!user_mode(regs)) {
 339                die("Exception in kernel mode", regs, signr);
 340                return false;
 341        }
 342
 343        show_signal_msg(signr, regs, code, addr);
 344
 345        if (arch_irqs_disabled())
 346                interrupt_cond_local_irq_enable(regs);
 347
 348        current->thread.trap_nr = code;
 349
 350        return true;
 351}
 352
 353void _exception_pkey(struct pt_regs *regs, unsigned long addr, int key)
 354{
 355        if (!exception_common(SIGSEGV, regs, SEGV_PKUERR, addr))
 356                return;
 357
 358        force_sig_pkuerr((void __user *) addr, key);
 359}
 360
 361void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
 362{
 363        if (!exception_common(signr, regs, code, addr))
 364                return;
 365
 366        force_sig_fault(signr, code, (void __user *)addr);
 367}
 368
 369/*
 370 * The interrupt architecture has a quirk in that the HV interrupts excluding
 371 * the NMIs (0x100 and 0x200) do not clear MSR[RI] at entry. The first thing
 372 * that an interrupt handler must do is save off a GPR into a scratch register,
 373 * and all interrupts on POWERNV (HV=1) use the HSPRG1 register as scratch.
 374 * Therefore an NMI can clobber an HV interrupt's live HSPRG1 without noticing
 375 * that it is non-reentrant, which leads to random data corruption.
 376 *
 377 * The solution is for NMI interrupts in HV mode to check if they originated
 378 * from these critical HV interrupt regions. If so, then mark them not
 379 * recoverable.
 380 *
 381 * An alternative would be for HV NMIs to use SPRG for scratch to avoid the
 382 * HSPRG1 clobber, however this would cause guest SPRG to be clobbered. Linux
 383 * guests should always have MSR[RI]=0 when its scratch SPRG is in use, so
 384 * that would work. However any other guest OS that may have the SPRG live
 385 * and MSR[RI]=1 could encounter silent corruption.
 386 *
 387 * Builds that do not support KVM could take this second option to increase
 388 * the recoverability of NMIs.
 389 */
 390void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
 391{
 392#ifdef CONFIG_PPC_POWERNV
 393        unsigned long kbase = (unsigned long)_stext;
 394        unsigned long nip = regs->nip;
 395
 396        if (!(regs->msr & MSR_RI))
 397                return;
 398        if (!(regs->msr & MSR_HV))
 399                return;
 400        if (regs->msr & MSR_PR)
 401                return;
 402
 403        /*
 404         * Now test if the interrupt has hit a range that may be using
 405         * HSPRG1 without having RI=0 (i.e., an HSRR interrupt). The
 406         * problem ranges all run un-relocated. Test real and virt modes
 407         * at the same time by dropping the high bit of the nip (virt mode
 408         * entry points still have the +0x4000 offset).
 409         */
 410        nip &= ~0xc000000000000000ULL;
 411        if ((nip >= 0x500 && nip < 0x600) || (nip >= 0x4500 && nip < 0x4600))
 412                goto nonrecoverable;
 413        if ((nip >= 0x980 && nip < 0xa00) || (nip >= 0x4980 && nip < 0x4a00))
 414                goto nonrecoverable;
 415        if ((nip >= 0xe00 && nip < 0xec0) || (nip >= 0x4e00 && nip < 0x4ec0))
 416                goto nonrecoverable;
 417        if ((nip >= 0xf80 && nip < 0xfa0) || (nip >= 0x4f80 && nip < 0x4fa0))
 418                goto nonrecoverable;
 419
 420        /* Trampoline code runs un-relocated so subtract kbase. */
 421        if (nip >= (unsigned long)(start_real_trampolines - kbase) &&
 422                        nip < (unsigned long)(end_real_trampolines - kbase))
 423                goto nonrecoverable;
 424        if (nip >= (unsigned long)(start_virt_trampolines - kbase) &&
 425                        nip < (unsigned long)(end_virt_trampolines - kbase))
 426                goto nonrecoverable;
 427        return;
 428
 429nonrecoverable:
 430        regs->msr &= ~MSR_RI;
 431#endif
 432}
 433DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception)
 434{
 435        unsigned long hsrr0, hsrr1;
 436        bool saved_hsrrs = false;
 437
 438        /*
 439         * System reset can interrupt code where HSRRs are live and MSR[RI]=1.
 440         * The system reset interrupt itself may clobber HSRRs (e.g., to call
 441         * OPAL), so save them here and restore them before returning.
 442         *
 443         * Machine checks don't need to save HSRRs, as the real mode handler
 444         * is careful to avoid them, and the regular handler is not delivered
 445         * as an NMI.
 446         */
 447        if (cpu_has_feature(CPU_FTR_HVMODE)) {
 448                hsrr0 = mfspr(SPRN_HSRR0);
 449                hsrr1 = mfspr(SPRN_HSRR1);
 450                saved_hsrrs = true;
 451        }
 452
 453        hv_nmi_check_nonrecoverable(regs);
 454
 455        __this_cpu_inc(irq_stat.sreset_irqs);
 456
 457        /* See if any machine dependent calls */
 458        if (ppc_md.system_reset_exception) {
 459                if (ppc_md.system_reset_exception(regs))
 460                        goto out;
 461        }
 462
 463        if (debugger(regs))
 464                goto out;
 465
 466        kmsg_dump(KMSG_DUMP_OOPS);
 467        /*
 468         * A system reset is a request to dump, so we always send
 469         * it through the crashdump code (if fadump or kdump are
 470         * registered).
 471         */
 472        crash_fadump(regs, "System Reset");
 473
 474        crash_kexec(regs);
 475
 476        /*
 477         * We aren't the primary crash CPU. We need to send it
 478         * to a holding pattern to avoid it ending up in the panic
 479         * code.
 480         */
 481        crash_kexec_secondary(regs);
 482
 483        /*
 484         * No debugger or crash dump registered, print logs then
 485         * panic.
 486         */
 487        die("System Reset", regs, SIGABRT);
 488
 489        mdelay(2*MSEC_PER_SEC); /* Wait a little while for others to print */
 490        add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 491        nmi_panic(regs, "System Reset");
 492
 493out:
 494#ifdef CONFIG_PPC_BOOK3S_64
 495        BUG_ON(get_paca()->in_nmi == 0);
 496        if (get_paca()->in_nmi > 1)
 497                die("Unrecoverable nested System Reset", regs, SIGABRT);
 498#endif
 499        /* Must die if the interrupt is not recoverable */
 500        if (!(regs->msr & MSR_RI)) {
 501                /* For the reason explained in die_mce, nmi_exit before die */
 502                nmi_exit();
 503                die("Unrecoverable System Reset", regs, SIGABRT);
 504        }
 505
 506        if (saved_hsrrs) {
 507                mtspr(SPRN_HSRR0, hsrr0);
 508                mtspr(SPRN_HSRR1, hsrr1);
 509        }
 510
 511        /* What should we do here? We could issue a shutdown or hard reset. */
 512
 513        return 0;
 514}
 515
 516/*
 517 * I/O accesses can cause machine checks on powermacs.
 518 * Check if the NIP corresponds to the address of a sync
 519 * instruction for which there is an entry in the exception
 520 * table.
 521 *  -- paulus.
 522 */
 523static inline int check_io_access(struct pt_regs *regs)
 524{
 525#ifdef CONFIG_PPC32
 526        unsigned long msr = regs->msr;
 527        const struct exception_table_entry *entry;
 528        unsigned int *nip = (unsigned int *)regs->nip;
 529
 530        if (((msr & 0xffff0000) == 0 || (msr & (0x80000 | 0x40000)))
 531            && (entry = search_exception_tables(regs->nip)) != NULL) {
 532                /*
 533                 * Check that it's a sync instruction, or somewhere
 534                 * in the twi; isync; nop sequence that inb/inw/inl uses.
 535                 * As the address is in the exception table
 536                 * we should be able to read the instr there.
 537                 * For the debug message, we look at the preceding
 538                 * load or store.
 539                 */
 540                if (*nip == PPC_INST_NOP)
 541                        nip -= 2;
 542                else if (*nip == PPC_INST_ISYNC)
 543                        --nip;
 544                if (*nip == PPC_INST_SYNC || (*nip >> 26) == OP_TRAP) {
 545                        unsigned int rb;
 546
 547                        --nip;
 548                        rb = (*nip >> 11) & 0x1f;
 549                        printk(KERN_DEBUG "%s bad port %lx at %p\n",
 550                               (*nip & 0x100)? "OUT to": "IN from",
 551                               regs->gpr[rb] - _IO_BASE, nip);
 552                        regs->msr |= MSR_RI;
 553                        regs->nip = extable_fixup(entry);
 554                        return 1;
 555                }
 556        }
 557#endif /* CONFIG_PPC32 */
 558        return 0;
 559}
 560
 561#ifdef CONFIG_PPC_ADV_DEBUG_REGS
 562/* On 4xx, the reason for the machine check or program exception
 563   is in the ESR. */
 564#define get_reason(regs)        ((regs)->dsisr)
 565#define REASON_FP               ESR_FP
 566#define REASON_ILLEGAL          (ESR_PIL | ESR_PUO)
 567#define REASON_PRIVILEGED       ESR_PPR
 568#define REASON_TRAP             ESR_PTR
 569#define REASON_PREFIXED         0
 570#define REASON_BOUNDARY         0
 571
 572/* single-step stuff */
 573#define single_stepping(regs)   (current->thread.debug.dbcr0 & DBCR0_IC)
 574#define clear_single_step(regs) (current->thread.debug.dbcr0 &= ~DBCR0_IC)
 575#define clear_br_trace(regs)    do {} while(0)
 576#else
 577/* On non-4xx, the reason for the machine check or program
 578   exception is in the MSR. */
 579#define get_reason(regs)        ((regs)->msr)
 580#define REASON_TM               SRR1_PROGTM
 581#define REASON_FP               SRR1_PROGFPE
 582#define REASON_ILLEGAL          SRR1_PROGILL
 583#define REASON_PRIVILEGED       SRR1_PROGPRIV
 584#define REASON_TRAP             SRR1_PROGTRAP
 585#define REASON_PREFIXED         SRR1_PREFIXED
 586#define REASON_BOUNDARY         SRR1_BOUNDARY
 587
 588#define single_stepping(regs)   ((regs)->msr & MSR_SE)
 589#define clear_single_step(regs) ((regs)->msr &= ~MSR_SE)
 590#define clear_br_trace(regs)    ((regs)->msr &= ~MSR_BE)
 591#endif
 592
 593#define inst_length(reason)     (((reason) & REASON_PREFIXED) ? 8 : 4)
 594
 595#if defined(CONFIG_E500)
 596int machine_check_e500mc(struct pt_regs *regs)
 597{
 598        unsigned long mcsr = mfspr(SPRN_MCSR);
 599        unsigned long pvr = mfspr(SPRN_PVR);
 600        unsigned long reason = mcsr;
 601        int recoverable = 1;
 602
 603        if (reason & MCSR_LD) {
 604                recoverable = fsl_rio_mcheck_exception(regs);
 605                if (recoverable == 1)
 606                        goto silent_out;
 607        }
 608
 609        printk("Machine check in kernel mode.\n");
 610        printk("Caused by (from MCSR=%lx): ", reason);
 611
 612        if (reason & MCSR_MCP)
 613                pr_cont("Machine Check Signal\n");
 614
 615        if (reason & MCSR_ICPERR) {
 616                pr_cont("Instruction Cache Parity Error\n");
 617
 618                /*
 619                 * This is recoverable by invalidating the i-cache.
 620                 */
 621                mtspr(SPRN_L1CSR1, mfspr(SPRN_L1CSR1) | L1CSR1_ICFI);
 622                while (mfspr(SPRN_L1CSR1) & L1CSR1_ICFI)
 623                        ;
 624
 625                /*
 626                 * This will generally be accompanied by an instruction
 627                 * fetch error report -- only treat MCSR_IF as fatal
 628                 * if it wasn't due to an L1 parity error.
 629                 */
 630                reason &= ~MCSR_IF;
 631        }
 632
 633        if (reason & MCSR_DCPERR_MC) {
 634                pr_cont("Data Cache Parity Error\n");
 635
 636                /*
 637                 * In write shadow mode we auto-recover from the error, but it
 638                 * may still get logged and cause a machine check.  We should
 639                 * only treat the non-write shadow case as non-recoverable.
 640                 */
 641                /* On e6500 core, L1 DCWS (Data cache write shadow mode) bit
 642                 * is not implemented but L1 data cache always runs in write
 643                 * shadow mode. Hence on data cache parity errors HW will
 644                 * automatically invalidate the L1 Data Cache.
 645                 */
 646                if (PVR_VER(pvr) != PVR_VER_E6500) {
 647                        if (!(mfspr(SPRN_L1CSR2) & L1CSR2_DCWS))
 648                                recoverable = 0;
 649                }
 650        }
 651
 652        if (reason & MCSR_L2MMU_MHIT) {
 653                pr_cont("Hit on multiple TLB entries\n");
 654                recoverable = 0;
 655        }
 656
 657        if (reason & MCSR_NMI)
 658                pr_cont("Non-maskable interrupt\n");
 659
 660        if (reason & MCSR_IF) {
 661                pr_cont("Instruction Fetch Error Report\n");
 662                recoverable = 0;
 663        }
 664
 665        if (reason & MCSR_LD) {
 666                pr_cont("Load Error Report\n");
 667                recoverable = 0;
 668        }
 669
 670        if (reason & MCSR_ST) {
 671                pr_cont("Store Error Report\n");
 672                recoverable = 0;
 673        }
 674
 675        if (reason & MCSR_LDG) {
 676                pr_cont("Guarded Load Error Report\n");
 677                recoverable = 0;
 678        }
 679
 680        if (reason & MCSR_TLBSYNC)
 681                pr_cont("Simultaneous tlbsync operations\n");
 682
 683        if (reason & MCSR_BSL2_ERR) {
 684                pr_cont("Level 2 Cache Error\n");
 685                recoverable = 0;
 686        }
 687
 688        if (reason & MCSR_MAV) {
 689                u64 addr;
 690
 691                addr = mfspr(SPRN_MCAR);
 692                addr |= (u64)mfspr(SPRN_MCARU) << 32;
 693
 694                pr_cont("Machine Check %s Address: %#llx\n",
 695                       reason & MCSR_MEA ? "Effective" : "Physical", addr);
 696        }
 697
 698silent_out:
 699        mtspr(SPRN_MCSR, mcsr);
 700        return mfspr(SPRN_MCSR) == 0 && recoverable;
 701}
 702
 703int machine_check_e500(struct pt_regs *regs)
 704{
 705        unsigned long reason = mfspr(SPRN_MCSR);
 706
 707        if (reason & MCSR_BUS_RBERR) {
 708                if (fsl_rio_mcheck_exception(regs))
 709                        return 1;
 710                if (fsl_pci_mcheck_exception(regs))
 711                        return 1;
 712        }
 713
 714        printk("Machine check in kernel mode.\n");
 715        printk("Caused by (from MCSR=%lx): ", reason);
 716
 717        if (reason & MCSR_MCP)
 718                pr_cont("Machine Check Signal\n");
 719        if (reason & MCSR_ICPERR)
 720                pr_cont("Instruction Cache Parity Error\n");
 721        if (reason & MCSR_DCP_PERR)
 722                pr_cont("Data Cache Push Parity Error\n");
 723        if (reason & MCSR_DCPERR)
 724                pr_cont("Data Cache Parity Error\n");
 725        if (reason & MCSR_BUS_IAERR)
 726                pr_cont("Bus - Instruction Address Error\n");
 727        if (reason & MCSR_BUS_RAERR)
 728                pr_cont("Bus - Read Address Error\n");
 729        if (reason & MCSR_BUS_WAERR)
 730                pr_cont("Bus - Write Address Error\n");
 731        if (reason & MCSR_BUS_IBERR)
 732                pr_cont("Bus - Instruction Data Error\n");
 733        if (reason & MCSR_BUS_RBERR)
 734                pr_cont("Bus - Read Data Bus Error\n");
 735        if (reason & MCSR_BUS_WBERR)
 736                pr_cont("Bus - Write Data Bus Error\n");
 737        if (reason & MCSR_BUS_IPERR)
 738                pr_cont("Bus - Instruction Parity Error\n");
 739        if (reason & MCSR_BUS_RPERR)
 740                pr_cont("Bus - Read Parity Error\n");
 741
 742        return 0;
 743}
 744
 745int machine_check_generic(struct pt_regs *regs)
 746{
 747        return 0;
 748}
 749#elif defined(CONFIG_PPC32)
 750int machine_check_generic(struct pt_regs *regs)
 751{
 752        unsigned long reason = regs->msr;
 753
 754        printk("Machine check in kernel mode.\n");
 755        printk("Caused by (from SRR1=%lx): ", reason);
 756        switch (reason & 0x601F0000) {
 757        case 0x80000:
 758                pr_cont("Machine check signal\n");
 759                break;
 760        case 0x40000:
 761        case 0x140000:  /* 7450 MSS error and TEA */
 762                pr_cont("Transfer error ack signal\n");
 763                break;
 764        case 0x20000:
 765                pr_cont("Data parity error signal\n");
 766                break;
 767        case 0x10000:
 768                pr_cont("Address parity error signal\n");
 769                break;
 770        case 0x20000000:
 771                pr_cont("L1 Data Cache error\n");
 772                break;
 773        case 0x40000000:
 774                pr_cont("L1 Instruction Cache error\n");
 775                break;
 776        case 0x00100000:
 777                pr_cont("L2 data cache parity error\n");
 778                break;
 779        default:
 780                pr_cont("Unknown values in msr\n");
 781        }
 782        return 0;
 783}
 784#endif /* everything else */
 785
 786void die_mce(const char *str, struct pt_regs *regs, long err)
 787{
 788        /*
 789         * The machine check wants to kill the interrupted context, but
 790         * do_exit() checks for in_interrupt() and panics in that case, so
 791         * exit the irq/nmi before calling die.
 792         */
 793        if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
 794                irq_exit();
 795        else
 796                nmi_exit();
 797        die(str, regs, err);
 798}
 799
 800/*
 801 * BOOK3S_64 does not call this handler as a non-maskable interrupt
 802 * (it uses its own early real-mode handler to handle the MCE proper
 803 * and then raises irq_work to call this handler when interrupts are
 804 * enabled).
 805 */
 806#ifdef CONFIG_PPC_BOOK3S_64
 807DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception)
 808#else
 809DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception)
 810#endif
 811{
 812        int recover = 0;
 813
 814        __this_cpu_inc(irq_stat.mce_exceptions);
 815
 816        add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
 817
 818        /* See if any machine dependent calls. In theory, we would want
 819         * to call the CPU first, and call the ppc_md. one if the CPU
 820         * one returns a positive number. However there is existing code
 821         * that assumes the board gets a first chance, so let's keep it
 822         * that way for now and fix things later. --BenH.
 823         */
 824        if (ppc_md.machine_check_exception)
 825                recover = ppc_md.machine_check_exception(regs);
 826        else if (cur_cpu_spec->machine_check)
 827                recover = cur_cpu_spec->machine_check(regs);
 828
 829        if (recover > 0)
 830                goto bail;
 831
 832        if (debugger_fault_handler(regs))
 833                goto bail;
 834
 835        if (check_io_access(regs))
 836                goto bail;
 837
 838        die_mce("Machine check", regs, SIGBUS);
 839
 840bail:
 841        /* Must die if the interrupt is not recoverable */
 842        if (!(regs->msr & MSR_RI))
 843                die_mce("Unrecoverable Machine check", regs, SIGBUS);
 844
 845#ifdef CONFIG_PPC_BOOK3S_64
 846        return;
 847#else
 848        return 0;
 849#endif
 850}
 851
 852DEFINE_INTERRUPT_HANDLER(SMIException) /* async? */
 853{
 854        die("System Management Interrupt", regs, SIGABRT);
 855}
 856
 857#ifdef CONFIG_VSX
 858static void p9_hmi_special_emu(struct pt_regs *regs)
 859{
 860        unsigned int ra, rb, t, i, sel, instr, rc;
 861        const void __user *addr;
 862        u8 vbuf[16] __aligned(16), *vdst;
 863        unsigned long ea, msr, msr_mask;
 864        bool swap;
 865
 866        if (__get_user(instr, (unsigned int __user *)regs->nip))
 867                return;
 868
 869        /*
 870         * lxvb16x      opcode: 0x7c0006d8
 871         * lxvd2x       opcode: 0x7c000698
 872         * lxvh8x       opcode: 0x7c000658
 873         * lxvw4x       opcode: 0x7c000618
 874         */
 875        if ((instr & 0xfc00073e) != 0x7c000618) {
 876                pr_devel("HMI vec emu: not vector CI %i:%s[%d] nip=%016lx"
 877                         " instr=%08x\n",
 878                         smp_processor_id(), current->comm, current->pid,
 879                         regs->nip, instr);
 880                return;
 881        }
 882
 883        /* Grab vector registers into the task struct */
 884        msr = regs->msr; /* Grab msr before we flush the bits */
 885        flush_vsx_to_thread(current);
 886        enable_kernel_altivec();
 887
 888        /*
 889         * Is userspace running with a different endian (this is rare but
 890         * not impossible)
 891         */
 892        swap = (msr & MSR_LE) != (MSR_KERNEL & MSR_LE);
 893
 894        /* Decode the instruction */
 895        ra = (instr >> 16) & 0x1f;
 896        rb = (instr >> 11) & 0x1f;
 897        t = (instr >> 21) & 0x1f;
 898        if (instr & 1)
 899                vdst = (u8 *)&current->thread.vr_state.vr[t];
 900        else
 901                vdst = (u8 *)&current->thread.fp_state.fpr[t][0];
 902
 903        /* Grab the vector address */
 904        ea = regs->gpr[rb] + (ra ? regs->gpr[ra] : 0);
 905        if (is_32bit_task())
 906                ea &= 0xfffffffful;
 907        addr = (__force const void __user *)ea;
 908
 909        /* Check it */
 910        if (!access_ok(addr, 16)) {
 911                pr_devel("HMI vec emu: bad access %i:%s[%d] nip=%016lx"
 912                         " instr=%08x addr=%016lx\n",
 913                         smp_processor_id(), current->comm, current->pid,
 914                         regs->nip, instr, (unsigned long)addr);
 915                return;
 916        }
 917
 918        /* Read the vector */
 919        rc = 0;
 920        if ((unsigned long)addr & 0xfUL)
 921                /* unaligned case */
 922                rc = __copy_from_user_inatomic(vbuf, addr, 16);
 923        else
 924                __get_user_atomic_128_aligned(vbuf, addr, rc);
 925        if (rc) {
 926                pr_devel("HMI vec emu: page fault %i:%s[%d] nip=%016lx"
 927                         " instr=%08x addr=%016lx\n",
 928                         smp_processor_id(), current->comm, current->pid,
 929                         regs->nip, instr, (unsigned long)addr);
 930                return;
 931        }
 932
 933        pr_devel("HMI vec emu: emulated vector CI %i:%s[%d] nip=%016lx"
 934                 " instr=%08x addr=%016lx\n",
 935                 smp_processor_id(), current->comm, current->pid, regs->nip,
 936                 instr, (unsigned long) addr);
 937
 938        /* Grab instruction "selector" */
 939        sel = (instr >> 6) & 3;
 940
 941        /*
 942         * Check to make sure the facility is actually enabled. This
 943         * could happen if we get a false positive hit.
 944         *
 945         * lxvd2x/lxvw4x always check MSR VSX sel = 0,2
 946         * lxvh8x/lxvb16x check MSR VSX or VEC depending on VSR used sel = 1,3
 947         */
 948        msr_mask = MSR_VSX;
 949        if ((sel & 1) && (instr & 1)) /* lxvh8x & lxvb16x + VSR >= 32 */
 950                msr_mask = MSR_VEC;
 951        if (!(msr & msr_mask)) {
 952                pr_devel("HMI vec emu: MSR fac clear %i:%s[%d] nip=%016lx"
 953                         " instr=%08x msr:%016lx\n",
 954                         smp_processor_id(), current->comm, current->pid,
 955                         regs->nip, instr, msr);
 956                return;
 957        }
 958
 959        /* Do logging here before we modify sel based on endian */
 960        switch (sel) {
 961        case 0: /* lxvw4x */
 962                PPC_WARN_EMULATED(lxvw4x, regs);
 963                break;
 964        case 1: /* lxvh8x */
 965                PPC_WARN_EMULATED(lxvh8x, regs);
 966                break;
 967        case 2: /* lxvd2x */
 968                PPC_WARN_EMULATED(lxvd2x, regs);
 969                break;
 970        case 3: /* lxvb16x */
 971                PPC_WARN_EMULATED(lxvb16x, regs);
 972                break;
 973        }
 974
 975#ifdef __LITTLE_ENDIAN__
 976        /*
 977         * An LE kernel stores the vector in the task struct as an LE
 978         * byte array (effectively swapping both the components and
 979         * the content of the components). Those instructions expect
 980         * the components to remain in ascending address order, so we
 981         * swap them back.
 982         *
 983         * If we are running a BE user space, the expectation is that
 984         * of a simple memcpy, so forcing the emulation to look like
 985         * a lxvb16x should do the trick.
 986         */
 987        if (swap)
 988                sel = 3;
 989
 990        switch (sel) {
 991        case 0: /* lxvw4x */
 992                for (i = 0; i < 4; i++)
 993                        ((u32 *)vdst)[i] = ((u32 *)vbuf)[3-i];
 994                break;
 995        case 1: /* lxvh8x */
 996                for (i = 0; i < 8; i++)
 997                        ((u16 *)vdst)[i] = ((u16 *)vbuf)[7-i];
 998                break;
 999        case 2: /* lxvd2x */
1000                for (i = 0; i < 2; i++)
1001                        ((u64 *)vdst)[i] = ((u64 *)vbuf)[1-i];
1002                break;
1003        case 3: /* lxvb16x */
1004                for (i = 0; i < 16; i++)
1005                        vdst[i] = vbuf[15-i];
1006                break;
1007        }
1008#else /* __LITTLE_ENDIAN__ */
1009        /* On a big endian kernel, a BE userspace only needs a memcpy */
1010        if (!swap)
1011                sel = 3;
1012
1013        /* Otherwise, we need to swap the content of the components */
1014        switch (sel) {
1015        case 0: /* lxvw4x */
1016                for (i = 0; i < 4; i++)
1017                        ((u32 *)vdst)[i] = cpu_to_le32(((u32 *)vbuf)[i]);
1018                break;
1019        case 1: /* lxvh8x */
1020                for (i = 0; i < 8; i++)
1021                        ((u16 *)vdst)[i] = cpu_to_le16(((u16 *)vbuf)[i]);
1022                break;
1023        case 2: /* lxvd2x */
1024                for (i = 0; i < 2; i++)
1025                        ((u64 *)vdst)[i] = cpu_to_le64(((u64 *)vbuf)[i]);
1026                break;
1027        case 3: /* lxvb16x */
1028                memcpy(vdst, vbuf, 16);
1029                break;
1030        }
1031#endif /* !__LITTLE_ENDIAN__ */
1032
1033        /* Go to next instruction */
1034        regs->nip += 4;
1035}
1036#endif /* CONFIG_VSX */
1037
1038DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception)
1039{
1040        struct pt_regs *old_regs;
1041
1042        old_regs = set_irq_regs(regs);
1043
1044#ifdef CONFIG_VSX
1045        /* Real mode flagged P9 special emu is needed */
1046        if (local_paca->hmi_p9_special_emu) {
1047                local_paca->hmi_p9_special_emu = 0;
1048
1049                /*
1050                 * We don't want to take page faults while doing the
1051                 * emulation, we just replay the instruction if necessary.
1052                 */
1053                pagefault_disable();
1054                p9_hmi_special_emu(regs);
1055                pagefault_enable();
1056        }
1057#endif /* CONFIG_VSX */
1058
1059        if (ppc_md.handle_hmi_exception)
1060                ppc_md.handle_hmi_exception(regs);
1061
1062        set_irq_regs(old_regs);
1063}
1064
1065DEFINE_INTERRUPT_HANDLER(unknown_exception)
1066{
1067        printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
1068               regs->nip, regs->msr, regs->trap);
1069
1070        _exception(SIGTRAP, regs, TRAP_UNK, 0);
1071}
1072
1073DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception)
1074{
1075        printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
1076               regs->nip, regs->msr, regs->trap);
1077
1078        _exception(SIGTRAP, regs, TRAP_UNK, 0);
1079}
1080
1081DEFINE_INTERRUPT_HANDLER_NMI(unknown_nmi_exception)
1082{
1083        printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
1084               regs->nip, regs->msr, regs->trap);
1085
1086        _exception(SIGTRAP, regs, TRAP_UNK, 0);
1087
1088        return 0;
1089}
1090
1091DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception)
1092{
1093        if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5,
1094                                        5, SIGTRAP) == NOTIFY_STOP)
1095                return;
1096        if (debugger_iabr_match(regs))
1097                return;
1098        _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
1099}
1100
1101DEFINE_INTERRUPT_HANDLER(RunModeException)
1102{
1103        _exception(SIGTRAP, regs, TRAP_UNK, 0);
1104}
1105
1106DEFINE_INTERRUPT_HANDLER(single_step_exception)
1107{
1108        clear_single_step(regs);
1109        clear_br_trace(regs);
1110
1111        if (kprobe_post_handler(regs))
1112                return;
1113
1114        if (notify_die(DIE_SSTEP, "single_step", regs, 5,
1115                                        5, SIGTRAP) == NOTIFY_STOP)
1116                return;
1117        if (debugger_sstep(regs))
1118                return;
1119
1120        _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
1121}
1122
1123/*
1124 * After we have successfully emulated an instruction, we have to
1125 * check if the instruction was being single-stepped, and if so,
1126 * pretend we got a single-step exception.  This was pointed out
1127 * by Kumar Gala.  -- paulus
1128 */
1129static void emulate_single_step(struct pt_regs *regs)
1130{
1131        if (single_stepping(regs))
1132                single_step_exception(regs);
1133}
1134
1135static inline int __parse_fpscr(unsigned long fpscr)
1136{
1137        int ret = FPE_FLTUNK;
1138
1139        /* Invalid operation */
1140        if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX))
1141                ret = FPE_FLTINV;
1142
1143        /* Overflow */
1144        else if ((fpscr & FPSCR_OE) && (fpscr & FPSCR_OX))
1145                ret = FPE_FLTOVF;
1146
1147        /* Underflow */
1148        else if ((fpscr & FPSCR_UE) && (fpscr & FPSCR_UX))
1149                ret = FPE_FLTUND;
1150
1151        /* Divide by zero */
1152        else if ((fpscr & FPSCR_ZE) && (fpscr & FPSCR_ZX))
1153                ret = FPE_FLTDIV;
1154
1155        /* Inexact result */
1156        else if ((fpscr & FPSCR_XE) && (fpscr & FPSCR_XX))
1157                ret = FPE_FLTRES;
1158
1159        return ret;
1160}
1161
1162static void parse_fpe(struct pt_regs *regs)
1163{
1164        int code = 0;
1165
1166        flush_fp_to_thread(current);
1167
1168#ifdef CONFIG_PPC_FPU_REGS
1169        code = __parse_fpscr(current->thread.fp_state.fpscr);
1170#endif
1171
1172        _exception(SIGFPE, regs, code, regs->nip);
1173}
1174
1175/*
1176 * Illegal instruction emulation support.  Originally written to
1177 * provide the PVR to user applications using the mfspr rd, PVR.
1178 * Return non-zero if we can't emulate, or -EFAULT if the associated
1179 * memory access caused an access fault.  Return zero on success.
1180 *
1181 * There are a couple of ways to do this, either "decode" the instruction
1182 * or directly match lots of bits.  In this case, matching lots of
1183 * bits is faster and easier.
1184 *
1185 */
1186static int emulate_string_inst(struct pt_regs *regs, u32 instword)
1187{
1188        u8 rT = (instword >> 21) & 0x1f;
1189        u8 rA = (instword >> 16) & 0x1f;
1190        u8 NB_RB = (instword >> 11) & 0x1f;
1191        u32 num_bytes;
1192        unsigned long EA;
1193        int pos = 0;
1194
1195        /* Early out if we are an invalid form of lswx */
1196        if ((instword & PPC_INST_STRING_MASK) == PPC_INST_LSWX)
1197                if ((rT == rA) || (rT == NB_RB))
1198                        return -EINVAL;
1199
1200        EA = (rA == 0) ? 0 : regs->gpr[rA];
1201
1202        switch (instword & PPC_INST_STRING_MASK) {
1203                case PPC_INST_LSWX:
1204                case PPC_INST_STSWX:
1205                        EA += NB_RB;
1206                        num_bytes = regs->xer & 0x7f;
1207                        break;
1208                case PPC_INST_LSWI:
1209                case PPC_INST_STSWI:
1210                        num_bytes = (NB_RB == 0) ? 32 : NB_RB;
1211                        break;
1212                default:
1213                        return -EINVAL;
1214        }
1215
1216        while (num_bytes != 0)
1217        {
1218                u8 val;
1219                u32 shift = 8 * (3 - (pos & 0x3));
1220
1221                /* if process is 32-bit, clear upper 32 bits of EA */
1222                if ((regs->msr & MSR_64BIT) == 0)
1223                        EA &= 0xFFFFFFFF;
1224
1225                switch ((instword & PPC_INST_STRING_MASK)) {
1226                        case PPC_INST_LSWX:
1227                        case PPC_INST_LSWI:
1228                                if (get_user(val, (u8 __user *)EA))
1229                                        return -EFAULT;
1230                                /* first time updating this reg,
1231                                 * zero it out */
1232                                if (pos == 0)
1233                                        regs->gpr[rT] = 0;
1234                                regs->gpr[rT] |= val << shift;
1235                                break;
1236                        case PPC_INST_STSWI:
1237                        case PPC_INST_STSWX:
1238                                val = regs->gpr[rT] >> shift;
1239                                if (put_user(val, (u8 __user *)EA))
1240                                        return -EFAULT;
1241                                break;
1242                }
1243                /* move EA to next address */
1244                EA += 1;
1245                num_bytes--;
1246
1247                /* manage our position within the register */
1248                if (++pos == 4) {
1249                        pos = 0;
1250                        if (++rT == 32)
1251                                rT = 0;
1252                }
1253        }
1254
1255        return 0;
1256}
1257
1258static int emulate_popcntb_inst(struct pt_regs *regs, u32 instword)
1259{
1260        u32 ra,rs;
1261        unsigned long tmp;
1262
1263        ra = (instword >> 16) & 0x1f;
1264        rs = (instword >> 21) & 0x1f;
1265
1266        tmp = regs->gpr[rs];
1267        tmp = tmp - ((tmp >> 1) & 0x5555555555555555ULL);
1268        tmp = (tmp & 0x3333333333333333ULL) + ((tmp >> 2) & 0x3333333333333333ULL);
1269        tmp = (tmp + (tmp >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
1270        regs->gpr[ra] = tmp;
1271
1272        return 0;
1273}
1274
1275static int emulate_isel(struct pt_regs *regs, u32 instword)
1276{
1277        u8 rT = (instword >> 21) & 0x1f;
1278        u8 rA = (instword >> 16) & 0x1f;
1279        u8 rB = (instword >> 11) & 0x1f;
1280        u8 BC = (instword >> 6) & 0x1f;
1281        u8 bit;
1282        unsigned long tmp;
1283
1284        tmp = (rA == 0) ? 0 : regs->gpr[rA];
1285        bit = (regs->ccr >> (31 - BC)) & 0x1;
1286
1287        regs->gpr[rT] = bit ? tmp : regs->gpr[rB];
1288
1289        return 0;
1290}
1291
1292#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1293static inline bool tm_abort_check(struct pt_regs *regs, int cause)
1294{
1295        /* If we're emulating a load/store in an active transaction, we cannot
1296         * emulate it as the kernel operates in transaction suspended context.
1297         * We need to abort the transaction.  This creates a persistent TM
1298         * abort so tell the user what caused it with a new code.
1299         */
1300        if (MSR_TM_TRANSACTIONAL(regs->msr)) {
1301                tm_enable();
1302                tm_abort(cause);
1303                return true;
1304        }
1305        return false;
1306}
1307#else
1308static inline bool tm_abort_check(struct pt_regs *regs, int reason)
1309{
1310        return false;
1311}
1312#endif
1313
1314static int emulate_instruction(struct pt_regs *regs)
1315{
1316        u32 instword;
1317        u32 rd;
1318
1319        if (!user_mode(regs))
1320                return -EINVAL;
1321
1322        if (get_user(instword, (u32 __user *)(regs->nip)))
1323                return -EFAULT;
1324
1325        /* Emulate the mfspr rD, PVR. */
1326        if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) {
1327                PPC_WARN_EMULATED(mfpvr, regs);
1328                rd = (instword >> 21) & 0x1f;
1329                regs->gpr[rd] = mfspr(SPRN_PVR);
1330                return 0;
1331        }
1332
1333        /* Emulating the dcba insn is just a no-op.  */
1334        if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) {
1335                PPC_WARN_EMULATED(dcba, regs);
1336                return 0;
1337        }
1338
1339        /* Emulate the mcrxr insn.  */
1340        if ((instword & PPC_INST_MCRXR_MASK) == PPC_INST_MCRXR) {
1341                int shift = (instword >> 21) & 0x1c;
1342                unsigned long msk = 0xf0000000UL >> shift;
1343
1344                PPC_WARN_EMULATED(mcrxr, regs);
1345                regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk);
1346                regs->xer &= ~0xf0000000UL;
1347                return 0;
1348        }
1349
1350        /* Emulate load/store string insn. */
1351        if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) {
1352                if (tm_abort_check(regs,
1353                                   TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT))
1354                        return -EINVAL;
1355                PPC_WARN_EMULATED(string, regs);
1356                return emulate_string_inst(regs, instword);
1357        }
1358
1359        /* Emulate the popcntb (Population Count Bytes) instruction. */
1360        if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) {
1361                PPC_WARN_EMULATED(popcntb, regs);
1362                return emulate_popcntb_inst(regs, instword);
1363        }
1364
1365        /* Emulate isel (Integer Select) instruction */
1366        if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) {
1367                PPC_WARN_EMULATED(isel, regs);
1368                return emulate_isel(regs, instword);
1369        }
1370
1371        /* Emulate sync instruction variants */
1372        if ((instword & PPC_INST_SYNC_MASK) == PPC_INST_SYNC) {
1373                PPC_WARN_EMULATED(sync, regs);
1374                asm volatile("sync");
1375                return 0;
1376        }
1377
1378#ifdef CONFIG_PPC64
1379        /* Emulate the mfspr rD, DSCR. */
1380        if ((((instword & PPC_INST_MFSPR_DSCR_USER_MASK) ==
1381                PPC_INST_MFSPR_DSCR_USER) ||
1382             ((instword & PPC_INST_MFSPR_DSCR_MASK) ==
1383                PPC_INST_MFSPR_DSCR)) &&
1384                        cpu_has_feature(CPU_FTR_DSCR)) {
1385                PPC_WARN_EMULATED(mfdscr, regs);
1386                rd = (instword >> 21) & 0x1f;
1387                regs->gpr[rd] = mfspr(SPRN_DSCR);
1388                return 0;
1389        }
1390        /* Emulate the mtspr DSCR, rD. */
1391        if ((((instword & PPC_INST_MTSPR_DSCR_USER_MASK) ==
1392                PPC_INST_MTSPR_DSCR_USER) ||
1393             ((instword & PPC_INST_MTSPR_DSCR_MASK) ==
1394                PPC_INST_MTSPR_DSCR)) &&
1395                        cpu_has_feature(CPU_FTR_DSCR)) {
1396                PPC_WARN_EMULATED(mtdscr, regs);
1397                rd = (instword >> 21) & 0x1f;
1398                current->thread.dscr = regs->gpr[rd];
1399                current->thread.dscr_inherit = 1;
1400                mtspr(SPRN_DSCR, current->thread.dscr);
1401                return 0;
1402        }
1403#endif
1404
1405        return -EINVAL;
1406}
1407
1408int is_valid_bugaddr(unsigned long addr)
1409{
1410        return is_kernel_addr(addr);
1411}
1412
1413#ifdef CONFIG_MATH_EMULATION
1414static int emulate_math(struct pt_regs *regs)
1415{
1416        int ret;
1417
1418        ret = do_mathemu(regs);
1419        if (ret >= 0)
1420                PPC_WARN_EMULATED(math, regs);
1421
1422        switch (ret) {
1423        case 0:
1424                emulate_single_step(regs);
1425                return 0;
1426        case 1: {
1427                        int code = 0;
1428                        code = __parse_fpscr(current->thread.fp_state.fpscr);
1429                        _exception(SIGFPE, regs, code, regs->nip);
1430                        return 0;
1431                }
1432        case -EFAULT:
1433                _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
1434                return 0;
1435        }
1436
1437        return -1;
1438}
1439#else
1440static inline int emulate_math(struct pt_regs *regs) { return -1; }
1441#endif
1442
1443static void do_program_check(struct pt_regs *regs)
1444{
1445        unsigned int reason = get_reason(regs);
1446
1447        /* We can now get here via a FP Unavailable exception if the core
1448         * has no FPU, in that case the reason flags will be 0 */
1449
1450        if (reason & REASON_FP) {
1451                /* IEEE FP exception */
1452                parse_fpe(regs);
1453                return;
1454        }
1455        if (reason & REASON_TRAP) {
1456                unsigned long bugaddr;
1457                /* Debugger is first in line to stop recursive faults in
1458                 * rcu_lock, notify_die, or atomic_notifier_call_chain */
1459                if (debugger_bpt(regs))
1460                        return;
1461
1462                if (kprobe_handler(regs))
1463                        return;
1464
1465                /* trap exception */
1466                if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP)
1467                                == NOTIFY_STOP)
1468                        return;
1469
1470                bugaddr = regs->nip;
1471                /*
1472                 * Fixup bugaddr for BUG_ON() in real mode
1473                 */
1474                if (!is_kernel_addr(bugaddr) && !(regs->msr & MSR_IR))
1475                        bugaddr += PAGE_OFFSET;
1476
1477                if (!(regs->msr & MSR_PR) &&  /* not user-mode */
1478                    report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
1479                        regs->nip += 4;
1480                        return;
1481                }
1482                _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
1483                return;
1484        }
1485#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1486        if (reason & REASON_TM) {
1487                /* This is a TM "Bad Thing Exception" program check.
1488                 * This occurs when:
1489                 * -  An rfid/hrfid/mtmsrd attempts to cause an illegal
1490                 *    transition in TM states.
1491                 * -  A trechkpt is attempted when transactional.
1492                 * -  A treclaim is attempted when non transactional.
1493                 * -  A tend is illegally attempted.
1494                 * -  writing a TM SPR when transactional.
1495                 *
1496                 * If usermode caused this, it's done something illegal and
1497                 * gets a SIGILL slap on the wrist.  We call it an illegal
1498                 * operand to distinguish from the instruction just being bad
1499                 * (e.g. executing a 'tend' on a CPU without TM!); it's an
1500                 * illegal /placement/ of a valid instruction.
1501                 */
1502                if (user_mode(regs)) {
1503                        _exception(SIGILL, regs, ILL_ILLOPN, regs->nip);
1504                        return;
1505                } else {
1506                        printk(KERN_EMERG "Unexpected TM Bad Thing exception "
1507                               "at %lx (msr 0x%lx) tm_scratch=%llx\n",
1508                               regs->nip, regs->msr, get_paca()->tm_scratch);
1509                        die("Unrecoverable exception", regs, SIGABRT);
1510                }
1511        }
1512#endif
1513
1514        /*
1515         * If we took the program check in the kernel skip down to sending a
1516         * SIGILL. The subsequent cases all relate to emulating instructions
1517         * which we should only do for userspace. We also do not want to enable
1518         * interrupts for kernel faults because that might lead to further
1519         * faults, and loose the context of the original exception.
1520         */
1521        if (!user_mode(regs))
1522                goto sigill;
1523
1524        interrupt_cond_local_irq_enable(regs);
1525
1526        /* (reason & REASON_ILLEGAL) would be the obvious thing here,
1527         * but there seems to be a hardware bug on the 405GP (RevD)
1528         * that means ESR is sometimes set incorrectly - either to
1529         * ESR_DST (!?) or 0.  In the process of chasing this with the
1530         * hardware people - not sure if it can happen on any illegal
1531         * instruction or only on FP instructions, whether there is a
1532         * pattern to occurrences etc. -dgibson 31/Mar/2003
1533         */
1534        if (!emulate_math(regs))
1535                return;
1536
1537        /* Try to emulate it if we should. */
1538        if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) {
1539                switch (emulate_instruction(regs)) {
1540                case 0:
1541                        regs->nip += 4;
1542                        emulate_single_step(regs);
1543                        return;
1544                case -EFAULT:
1545                        _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
1546                        return;
1547                }
1548        }
1549
1550sigill:
1551        if (reason & REASON_PRIVILEGED)
1552                _exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
1553        else
1554                _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1555
1556}
1557
1558DEFINE_INTERRUPT_HANDLER(program_check_exception)
1559{
1560        do_program_check(regs);
1561}
1562
1563/*
1564 * This occurs when running in hypervisor mode on POWER6 or later
1565 * and an illegal instruction is encountered.
1566 */
1567DEFINE_INTERRUPT_HANDLER(emulation_assist_interrupt)
1568{
1569        regs->msr |= REASON_ILLEGAL;
1570        do_program_check(regs);
1571}
1572
1573DEFINE_INTERRUPT_HANDLER(alignment_exception)
1574{
1575        int sig, code, fixed = 0;
1576        unsigned long  reason;
1577
1578        interrupt_cond_local_irq_enable(regs);
1579
1580        reason = get_reason(regs);
1581        if (reason & REASON_BOUNDARY) {
1582                sig = SIGBUS;
1583                code = BUS_ADRALN;
1584                goto bad;
1585        }
1586
1587        if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT))
1588                return;
1589
1590        /* we don't implement logging of alignment exceptions */
1591        if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS))
1592                fixed = fix_alignment(regs);
1593
1594        if (fixed == 1) {
1595                /* skip over emulated instruction */
1596                regs->nip += inst_length(reason);
1597                emulate_single_step(regs);
1598                return;
1599        }
1600
1601        /* Operand address was bad */
1602        if (fixed == -EFAULT) {
1603                sig = SIGSEGV;
1604                code = SEGV_ACCERR;
1605        } else {
1606                sig = SIGBUS;
1607                code = BUS_ADRALN;
1608        }
1609bad:
1610        if (user_mode(regs))
1611                _exception(sig, regs, code, regs->dar);
1612        else
1613                bad_page_fault(regs, sig);
1614}
1615
1616DEFINE_INTERRUPT_HANDLER(stack_overflow_exception)
1617{
1618        die("Kernel stack overflow", regs, SIGSEGV);
1619}
1620
1621DEFINE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception)
1622{
1623        printk(KERN_EMERG "Unrecoverable FP Unavailable Exception "
1624                          "%lx at %lx\n", regs->trap, regs->nip);
1625        die("Unrecoverable FP Unavailable Exception", regs, SIGABRT);
1626}
1627
1628DEFINE_INTERRUPT_HANDLER(altivec_unavailable_exception)
1629{
1630        if (user_mode(regs)) {
1631                /* A user program has executed an altivec instruction,
1632                   but this kernel doesn't support altivec. */
1633                _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1634                return;
1635        }
1636
1637        printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception "
1638                        "%lx at %lx\n", regs->trap, regs->nip);
1639        die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT);
1640}
1641
1642DEFINE_INTERRUPT_HANDLER(vsx_unavailable_exception)
1643{
1644        if (user_mode(regs)) {
1645                /* A user program has executed an vsx instruction,
1646                   but this kernel doesn't support vsx. */
1647                _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1648                return;
1649        }
1650
1651        printk(KERN_EMERG "Unrecoverable VSX Unavailable Exception "
1652                        "%lx at %lx\n", regs->trap, regs->nip);
1653        die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT);
1654}
1655
1656#ifdef CONFIG_PPC64
1657static void tm_unavailable(struct pt_regs *regs)
1658{
1659#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1660        if (user_mode(regs)) {
1661                current->thread.load_tm++;
1662                regs->msr |= MSR_TM;
1663                tm_enable();
1664                tm_restore_sprs(&current->thread);
1665                return;
1666        }
1667#endif
1668        pr_emerg("Unrecoverable TM Unavailable Exception "
1669                        "%lx at %lx\n", regs->trap, regs->nip);
1670        die("Unrecoverable TM Unavailable Exception", regs, SIGABRT);
1671}
1672
1673DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception)
1674{
1675        static char *facility_strings[] = {
1676                [FSCR_FP_LG] = "FPU",
1677                [FSCR_VECVSX_LG] = "VMX/VSX",
1678                [FSCR_DSCR_LG] = "DSCR",
1679                [FSCR_PM_LG] = "PMU SPRs",
1680                [FSCR_BHRB_LG] = "BHRB",
1681                [FSCR_TM_LG] = "TM",
1682                [FSCR_EBB_LG] = "EBB",
1683                [FSCR_TAR_LG] = "TAR",
1684                [FSCR_MSGP_LG] = "MSGP",
1685                [FSCR_SCV_LG] = "SCV",
1686                [FSCR_PREFIX_LG] = "PREFIX",
1687        };
1688        char *facility = "unknown";
1689        u64 value;
1690        u32 instword, rd;
1691        u8 status;
1692        bool hv;
1693
1694        hv = (TRAP(regs) == INTERRUPT_H_FAC_UNAVAIL);
1695        if (hv)
1696                value = mfspr(SPRN_HFSCR);
1697        else
1698                value = mfspr(SPRN_FSCR);
1699
1700        status = value >> 56;
1701        if ((hv || status >= 2) &&
1702            (status < ARRAY_SIZE(facility_strings)) &&
1703            facility_strings[status])
1704                facility = facility_strings[status];
1705
1706        /* We should not have taken this interrupt in kernel */
1707        if (!user_mode(regs)) {
1708                pr_emerg("Facility '%s' unavailable (%d) exception in kernel mode at %lx\n",
1709                         facility, status, regs->nip);
1710                die("Unexpected facility unavailable exception", regs, SIGABRT);
1711        }
1712
1713        interrupt_cond_local_irq_enable(regs);
1714
1715        if (status == FSCR_DSCR_LG) {
1716                /*
1717                 * User is accessing the DSCR register using the problem
1718                 * state only SPR number (0x03) either through a mfspr or
1719                 * a mtspr instruction. If it is a write attempt through
1720                 * a mtspr, then we set the inherit bit. This also allows
1721                 * the user to write or read the register directly in the
1722                 * future by setting via the FSCR DSCR bit. But in case it
1723                 * is a read DSCR attempt through a mfspr instruction, we
1724                 * just emulate the instruction instead. This code path will
1725                 * always emulate all the mfspr instructions till the user
1726                 * has attempted at least one mtspr instruction. This way it
1727                 * preserves the same behaviour when the user is accessing
1728                 * the DSCR through privilege level only SPR number (0x11)
1729                 * which is emulated through illegal instruction exception.
1730                 * We always leave HFSCR DSCR set.
1731                 */
1732                if (get_user(instword, (u32 __user *)(regs->nip))) {
1733                        pr_err("Failed to fetch the user instruction\n");
1734                        return;
1735                }
1736
1737                /* Write into DSCR (mtspr 0x03, RS) */
1738                if ((instword & PPC_INST_MTSPR_DSCR_USER_MASK)
1739                                == PPC_INST_MTSPR_DSCR_USER) {
1740                        rd = (instword >> 21) & 0x1f;
1741                        current->thread.dscr = regs->gpr[rd];
1742                        current->thread.dscr_inherit = 1;
1743                        current->thread.fscr |= FSCR_DSCR;
1744                        mtspr(SPRN_FSCR, current->thread.fscr);
1745                }
1746
1747                /* Read from DSCR (mfspr RT, 0x03) */
1748                if ((instword & PPC_INST_MFSPR_DSCR_USER_MASK)
1749                                == PPC_INST_MFSPR_DSCR_USER) {
1750                        if (emulate_instruction(regs)) {
1751                                pr_err("DSCR based mfspr emulation failed\n");
1752                                return;
1753                        }
1754                        regs->nip += 4;
1755                        emulate_single_step(regs);
1756                }
1757                return;
1758        }
1759
1760        if (status == FSCR_TM_LG) {
1761                /*
1762                 * If we're here then the hardware is TM aware because it
1763                 * generated an exception with FSRM_TM set.
1764                 *
1765                 * If cpu_has_feature(CPU_FTR_TM) is false, then either firmware
1766                 * told us not to do TM, or the kernel is not built with TM
1767                 * support.
1768                 *
1769                 * If both of those things are true, then userspace can spam the
1770                 * console by triggering the printk() below just by continually
1771                 * doing tbegin (or any TM instruction). So in that case just
1772                 * send the process a SIGILL immediately.
1773                 */
1774                if (!cpu_has_feature(CPU_FTR_TM))
1775                        goto out;
1776
1777                tm_unavailable(regs);
1778                return;
1779        }
1780
1781        pr_err_ratelimited("%sFacility '%s' unavailable (%d), exception at 0x%lx, MSR=%lx\n",
1782                hv ? "Hypervisor " : "", facility, status, regs->nip, regs->msr);
1783
1784out:
1785        _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1786}
1787#endif
1788
1789#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1790
1791DEFINE_INTERRUPT_HANDLER(fp_unavailable_tm)
1792{
1793        /* Note:  This does not handle any kind of FP laziness. */
1794
1795        TM_DEBUG("FP Unavailable trap whilst transactional at 0x%lx, MSR=%lx\n",
1796                 regs->nip, regs->msr);
1797
1798        /* We can only have got here if the task started using FP after
1799         * beginning the transaction.  So, the transactional regs are just a
1800         * copy of the checkpointed ones.  But, we still need to recheckpoint
1801         * as we're enabling FP for the process; it will return, abort the
1802         * transaction, and probably retry but now with FP enabled.  So the
1803         * checkpointed FP registers need to be loaded.
1804         */
1805        tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1806
1807        /*
1808         * Reclaim initially saved out bogus (lazy) FPRs to ckfp_state, and
1809         * then it was overwrite by the thr->fp_state by tm_reclaim_thread().
1810         *
1811         * At this point, ck{fp,vr}_state contains the exact values we want to
1812         * recheckpoint.
1813         */
1814
1815        /* Enable FP for the task: */
1816        current->thread.load_fp = 1;
1817
1818        /*
1819         * Recheckpoint all the checkpointed ckpt, ck{fp, vr}_state registers.
1820         */
1821        tm_recheckpoint(&current->thread);
1822}
1823
1824DEFINE_INTERRUPT_HANDLER(altivec_unavailable_tm)
1825{
1826        /* See the comments in fp_unavailable_tm().  This function operates
1827         * the same way.
1828         */
1829
1830        TM_DEBUG("Vector Unavailable trap whilst transactional at 0x%lx,"
1831                 "MSR=%lx\n",
1832                 regs->nip, regs->msr);
1833        tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1834        current->thread.load_vec = 1;
1835        tm_recheckpoint(&current->thread);
1836        current->thread.used_vr = 1;
1837}
1838
1839DEFINE_INTERRUPT_HANDLER(vsx_unavailable_tm)
1840{
1841        /* See the comments in fp_unavailable_tm().  This works similarly,
1842         * though we're loading both FP and VEC registers in here.
1843         *
1844         * If FP isn't in use, load FP regs.  If VEC isn't in use, load VEC
1845         * regs.  Either way, set MSR_VSX.
1846         */
1847
1848        TM_DEBUG("VSX Unavailable trap whilst transactional at 0x%lx,"
1849                 "MSR=%lx\n",
1850                 regs->nip, regs->msr);
1851
1852        current->thread.used_vsr = 1;
1853
1854        /* This reclaims FP and/or VR regs if they're already enabled */
1855        tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1856
1857        current->thread.load_vec = 1;
1858        current->thread.load_fp = 1;
1859
1860        tm_recheckpoint(&current->thread);
1861}
1862#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
1863
1864#ifdef CONFIG_PPC64
1865DECLARE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi);
1866DEFINE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi)
1867{
1868        __this_cpu_inc(irq_stat.pmu_irqs);
1869
1870        perf_irq(regs);
1871
1872        return 0;
1873}
1874#endif
1875
1876DECLARE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async);
1877DEFINE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async)
1878{
1879        __this_cpu_inc(irq_stat.pmu_irqs);
1880
1881        perf_irq(regs);
1882}
1883
1884DEFINE_INTERRUPT_HANDLER_RAW(performance_monitor_exception)
1885{
1886        /*
1887         * On 64-bit, if perf interrupts hit in a local_irq_disable
1888         * (soft-masked) region, we consider them as NMIs. This is required to
1889         * prevent hash faults on user addresses when reading callchains (and
1890         * looks better from an irq tracing perspective).
1891         */
1892        if (IS_ENABLED(CONFIG_PPC64) && unlikely(arch_irq_disabled_regs(regs)))
1893                performance_monitor_exception_nmi(regs);
1894        else
1895                performance_monitor_exception_async(regs);
1896
1897        return 0;
1898}
1899
1900#ifdef CONFIG_PPC_ADV_DEBUG_REGS
1901static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
1902{
1903        int changed = 0;
1904        /*
1905         * Determine the cause of the debug event, clear the
1906         * event flags and send a trap to the handler. Torez
1907         */
1908        if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) {
1909                dbcr_dac(current) &= ~(DBCR_DAC1R | DBCR_DAC1W);
1910#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
1911                current->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
1912#endif
1913                do_send_trap(regs, mfspr(SPRN_DAC1), debug_status,
1914                             5);
1915                changed |= 0x01;
1916        }  else if (debug_status & (DBSR_DAC2R | DBSR_DAC2W)) {
1917                dbcr_dac(current) &= ~(DBCR_DAC2R | DBCR_DAC2W);
1918                do_send_trap(regs, mfspr(SPRN_DAC2), debug_status,
1919                             6);
1920                changed |= 0x01;
1921        }  else if (debug_status & DBSR_IAC1) {
1922                current->thread.debug.dbcr0 &= ~DBCR0_IAC1;
1923                dbcr_iac_range(current) &= ~DBCR_IAC12MODE;
1924                do_send_trap(regs, mfspr(SPRN_IAC1), debug_status,
1925                             1);
1926                changed |= 0x01;
1927        }  else if (debug_status & DBSR_IAC2) {
1928                current->thread.debug.dbcr0 &= ~DBCR0_IAC2;
1929                do_send_trap(regs, mfspr(SPRN_IAC2), debug_status,
1930                             2);
1931                changed |= 0x01;
1932        }  else if (debug_status & DBSR_IAC3) {
1933                current->thread.debug.dbcr0 &= ~DBCR0_IAC3;
1934                dbcr_iac_range(current) &= ~DBCR_IAC34MODE;
1935                do_send_trap(regs, mfspr(SPRN_IAC3), debug_status,
1936                             3);
1937                changed |= 0x01;
1938        }  else if (debug_status & DBSR_IAC4) {
1939                current->thread.debug.dbcr0 &= ~DBCR0_IAC4;
1940                do_send_trap(regs, mfspr(SPRN_IAC4), debug_status,
1941                             4);
1942                changed |= 0x01;
1943        }
1944        /*
1945         * At the point this routine was called, the MSR(DE) was turned off.
1946         * Check all other debug flags and see if that bit needs to be turned
1947         * back on or not.
1948         */
1949        if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
1950                               current->thread.debug.dbcr1))
1951                regs->msr |= MSR_DE;
1952        else
1953                /* Make sure the IDM flag is off */
1954                current->thread.debug.dbcr0 &= ~DBCR0_IDM;
1955
1956        if (changed & 0x01)
1957                mtspr(SPRN_DBCR0, current->thread.debug.dbcr0);
1958}
1959
1960DEFINE_INTERRUPT_HANDLER(DebugException)
1961{
1962        unsigned long debug_status = regs->dsisr;
1963
1964        current->thread.debug.dbsr = debug_status;
1965
1966        /* Hack alert: On BookE, Branch Taken stops on the branch itself, while
1967         * on server, it stops on the target of the branch. In order to simulate
1968         * the server behaviour, we thus restart right away with a single step
1969         * instead of stopping here when hitting a BT
1970         */
1971        if (debug_status & DBSR_BT) {
1972                regs->msr &= ~MSR_DE;
1973
1974                /* Disable BT */
1975                mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_BT);
1976                /* Clear the BT event */
1977                mtspr(SPRN_DBSR, DBSR_BT);
1978
1979                /* Do the single step trick only when coming from userspace */
1980                if (user_mode(regs)) {
1981                        current->thread.debug.dbcr0 &= ~DBCR0_BT;
1982                        current->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
1983                        regs->msr |= MSR_DE;
1984                        return;
1985                }
1986
1987                if (kprobe_post_handler(regs))
1988                        return;
1989
1990                if (notify_die(DIE_SSTEP, "block_step", regs, 5,
1991                               5, SIGTRAP) == NOTIFY_STOP) {
1992                        return;
1993                }
1994                if (debugger_sstep(regs))
1995                        return;
1996        } else if (debug_status & DBSR_IC) {    /* Instruction complete */
1997                regs->msr &= ~MSR_DE;
1998
1999                /* Disable instruction completion */
2000                mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC);
2001                /* Clear the instruction completion event */
2002                mtspr(SPRN_DBSR, DBSR_IC);
2003
2004                if (kprobe_post_handler(regs))
2005                        return;
2006
2007                if (notify_die(DIE_SSTEP, "single_step", regs, 5,
2008                               5, SIGTRAP) == NOTIFY_STOP) {
2009                        return;
2010                }
2011
2012                if (debugger_sstep(regs))
2013                        return;
2014
2015                if (user_mode(regs)) {
2016                        current->thread.debug.dbcr0 &= ~DBCR0_IC;
2017                        if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
2018                                               current->thread.debug.dbcr1))
2019                                regs->msr |= MSR_DE;
2020                        else
2021                                /* Make sure the IDM bit is off */
2022                                current->thread.debug.dbcr0 &= ~DBCR0_IDM;
2023                }
2024
2025                _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
2026        } else
2027                handle_debug(regs, debug_status);
2028}
2029#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
2030
2031#ifdef CONFIG_ALTIVEC
2032DEFINE_INTERRUPT_HANDLER(altivec_assist_exception)
2033{
2034        int err;
2035
2036        if (!user_mode(regs)) {
2037                printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode"
2038                       " at %lx\n", regs->nip);
2039                die("Kernel VMX/Altivec assist exception", regs, SIGILL);
2040        }
2041
2042        flush_altivec_to_thread(current);
2043
2044        PPC_WARN_EMULATED(altivec, regs);
2045        err = emulate_altivec(regs);
2046        if (err == 0) {
2047                regs->nip += 4;         /* skip emulated instruction */
2048                emulate_single_step(regs);
2049                return;
2050        }
2051
2052        if (err == -EFAULT) {
2053                /* got an error reading the instruction */
2054                _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip);
2055        } else {
2056                /* didn't recognize the instruction */
2057                /* XXX quick hack for now: set the non-Java bit in the VSCR */
2058                printk_ratelimited(KERN_ERR "Unrecognized altivec instruction "
2059                                   "in %s at %lx\n", current->comm, regs->nip);
2060                current->thread.vr_state.vscr.u[3] |= 0x10000;
2061        }
2062}
2063#endif /* CONFIG_ALTIVEC */
2064
2065#ifdef CONFIG_FSL_BOOKE
2066DEFINE_INTERRUPT_HANDLER(CacheLockingException)
2067{
2068        unsigned long error_code = regs->dsisr;
2069
2070        /* We treat cache locking instructions from the user
2071         * as priv ops, in the future we could try to do
2072         * something smarter
2073         */
2074        if (error_code & (ESR_DLK|ESR_ILK))
2075                _exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
2076        return;
2077}
2078#endif /* CONFIG_FSL_BOOKE */
2079
2080#ifdef CONFIG_SPE
2081DEFINE_INTERRUPT_HANDLER(SPEFloatingPointException)
2082{
2083        extern int do_spe_mathemu(struct pt_regs *regs);
2084        unsigned long spefscr;
2085        int fpexc_mode;
2086        int code = FPE_FLTUNK;
2087        int err;
2088
2089        interrupt_cond_local_irq_enable(regs);
2090
2091        flush_spe_to_thread(current);
2092
2093        spefscr = current->thread.spefscr;
2094        fpexc_mode = current->thread.fpexc_mode;
2095
2096        if ((spefscr & SPEFSCR_FOVF) && (fpexc_mode & PR_FP_EXC_OVF)) {
2097                code = FPE_FLTOVF;
2098        }
2099        else if ((spefscr & SPEFSCR_FUNF) && (fpexc_mode & PR_FP_EXC_UND)) {
2100                code = FPE_FLTUND;
2101        }
2102        else if ((spefscr & SPEFSCR_FDBZ) && (fpexc_mode & PR_FP_EXC_DIV))
2103                code = FPE_FLTDIV;
2104        else if ((spefscr & SPEFSCR_FINV) && (fpexc_mode & PR_FP_EXC_INV)) {
2105                code = FPE_FLTINV;
2106        }
2107        else if ((spefscr & (SPEFSCR_FG | SPEFSCR_FX)) && (fpexc_mode & PR_FP_EXC_RES))
2108                code = FPE_FLTRES;
2109
2110        err = do_spe_mathemu(regs);
2111        if (err == 0) {
2112                regs->nip += 4;         /* skip emulated instruction */
2113                emulate_single_step(regs);
2114                return;
2115        }
2116
2117        if (err == -EFAULT) {
2118                /* got an error reading the instruction */
2119                _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip);
2120        } else if (err == -EINVAL) {
2121                /* didn't recognize the instruction */
2122                printk(KERN_ERR "unrecognized spe instruction "
2123                       "in %s at %lx\n", current->comm, regs->nip);
2124        } else {
2125                _exception(SIGFPE, regs, code, regs->nip);
2126        }
2127
2128        return;
2129}
2130
2131DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException)
2132{
2133        extern int speround_handler(struct pt_regs *regs);
2134        int err;
2135
2136        interrupt_cond_local_irq_enable(regs);
2137
2138        preempt_disable();
2139        if (regs->msr & MSR_SPE)
2140                giveup_spe(current);
2141        preempt_enable();
2142
2143        regs->nip -= 4;
2144        err = speround_handler(regs);
2145        if (err == 0) {
2146                regs->nip += 4;         /* skip emulated instruction */
2147                emulate_single_step(regs);
2148                return;
2149        }
2150
2151        if (err == -EFAULT) {
2152                /* got an error reading the instruction */
2153                _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip);
2154        } else if (err == -EINVAL) {
2155                /* didn't recognize the instruction */
2156                printk(KERN_ERR "unrecognized spe instruction "
2157                       "in %s at %lx\n", current->comm, regs->nip);
2158        } else {
2159                _exception(SIGFPE, regs, FPE_FLTUNK, regs->nip);
2160                return;
2161        }
2162}
2163#endif
2164
2165/*
2166 * We enter here if we get an unrecoverable exception, that is, one
2167 * that happened at a point where the RI (recoverable interrupt) bit
2168 * in the MSR is 0.  This indicates that SRR0/1 are live, and that
2169 * we therefore lost state by taking this exception.
2170 */
2171void __noreturn unrecoverable_exception(struct pt_regs *regs)
2172{
2173        pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n",
2174                 regs->trap, regs->nip, regs->msr);
2175        die("Unrecoverable exception", regs, SIGABRT);
2176        /* die() should not return */
2177        for (;;)
2178                ;
2179}
2180
2181#if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x)
2182/*
2183 * Default handler for a Watchdog exception,
2184 * spins until a reboot occurs
2185 */
2186void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs)
2187{
2188        /* Generic WatchdogHandler, implement your own */
2189        mtspr(SPRN_TCR, mfspr(SPRN_TCR)&(~TCR_WIE));
2190        return;
2191}
2192
2193DEFINE_INTERRUPT_HANDLER_NMI(WatchdogException)
2194{
2195        printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n");
2196        WatchdogHandler(regs);
2197        return 0;
2198}
2199#endif
2200
2201/*
2202 * We enter here if we discover during exception entry that we are
2203 * running in supervisor mode with a userspace value in the stack pointer.
2204 */
2205DEFINE_INTERRUPT_HANDLER(kernel_bad_stack)
2206{
2207        printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n",
2208               regs->gpr[1], regs->nip);
2209        die("Bad kernel stack pointer", regs, SIGABRT);
2210}
2211
2212void __init trap_init(void)
2213{
2214}
2215
2216
2217#ifdef CONFIG_PPC_EMULATED_STATS
2218
2219#define WARN_EMULATED_SETUP(type)       .type = { .name = #type }
2220
2221struct ppc_emulated ppc_emulated = {
2222#ifdef CONFIG_ALTIVEC
2223        WARN_EMULATED_SETUP(altivec),
2224#endif
2225        WARN_EMULATED_SETUP(dcba),
2226        WARN_EMULATED_SETUP(dcbz),
2227        WARN_EMULATED_SETUP(fp_pair),
2228        WARN_EMULATED_SETUP(isel),
2229        WARN_EMULATED_SETUP(mcrxr),
2230        WARN_EMULATED_SETUP(mfpvr),
2231        WARN_EMULATED_SETUP(multiple),
2232        WARN_EMULATED_SETUP(popcntb),
2233        WARN_EMULATED_SETUP(spe),
2234        WARN_EMULATED_SETUP(string),
2235        WARN_EMULATED_SETUP(sync),
2236        WARN_EMULATED_SETUP(unaligned),
2237#ifdef CONFIG_MATH_EMULATION
2238        WARN_EMULATED_SETUP(math),
2239#endif
2240#ifdef CONFIG_VSX
2241        WARN_EMULATED_SETUP(vsx),
2242#endif
2243#ifdef CONFIG_PPC64
2244        WARN_EMULATED_SETUP(mfdscr),
2245        WARN_EMULATED_SETUP(mtdscr),
2246        WARN_EMULATED_SETUP(lq_stq),
2247        WARN_EMULATED_SETUP(lxvw4x),
2248        WARN_EMULATED_SETUP(lxvh8x),
2249        WARN_EMULATED_SETUP(lxvd2x),
2250        WARN_EMULATED_SETUP(lxvb16x),
2251#endif
2252};
2253
2254u32 ppc_warn_emulated;
2255
2256void ppc_warn_emulated_print(const char *type)
2257{
2258        pr_warn_ratelimited("%s used emulated %s instruction\n", current->comm,
2259                            type);
2260}
2261
2262static int __init ppc_warn_emulated_init(void)
2263{
2264        struct dentry *dir;
2265        unsigned int i;
2266        struct ppc_emulated_entry *entries = (void *)&ppc_emulated;
2267
2268        dir = debugfs_create_dir("emulated_instructions",
2269                                 powerpc_debugfs_root);
2270
2271        debugfs_create_u32("do_warn", 0644, dir, &ppc_warn_emulated);
2272
2273        for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++)
2274                debugfs_create_u32(entries[i].name, 0644, dir,
2275                                   (u32 *)&entries[i].val.counter);
2276
2277        return 0;
2278}
2279
2280device_initcall(ppc_warn_emulated_init);
2281
2282#endif /* CONFIG_PPC_EMULATED_STATS */
2283