linux/arch/powerpc/kernel/traps.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *  Copyright (C) 1995-1996  Gary Thomas (gdt@linuxppc.org)
   4 *  Copyright 2007-2010 Freescale Semiconductor, Inc.
   5 *
   6 *  Modified by Cort Dougan (cort@cs.nmt.edu)
   7 *  and Paul Mackerras (paulus@samba.org)
   8 */
   9
  10/*
  11 * This file handles the architecture-dependent parts of hardware exceptions
  12 */
  13
  14#include <linux/errno.h>
  15#include <linux/sched.h>
  16#include <linux/sched/debug.h>
  17#include <linux/kernel.h>
  18#include <linux/mm.h>
  19#include <linux/pkeys.h>
  20#include <linux/stddef.h>
  21#include <linux/unistd.h>
  22#include <linux/ptrace.h>
  23#include <linux/user.h>
  24#include <linux/interrupt.h>
  25#include <linux/init.h>
  26#include <linux/extable.h>
  27#include <linux/module.h>       /* print_modules */
  28#include <linux/prctl.h>
  29#include <linux/delay.h>
  30#include <linux/kprobes.h>
  31#include <linux/kexec.h>
  32#include <linux/backlight.h>
  33#include <linux/bug.h>
  34#include <linux/kdebug.h>
  35#include <linux/ratelimit.h>
  36#include <linux/context_tracking.h>
  37#include <linux/smp.h>
  38#include <linux/console.h>
  39#include <linux/kmsg_dump.h>
  40
  41#include <asm/emulated_ops.h>
  42#include <asm/pgtable.h>
  43#include <linux/uaccess.h>
  44#include <asm/debugfs.h>
  45#include <asm/io.h>
  46#include <asm/machdep.h>
  47#include <asm/rtas.h>
  48#include <asm/pmc.h>
  49#include <asm/reg.h>
  50#ifdef CONFIG_PMAC_BACKLIGHT
  51#include <asm/backlight.h>
  52#endif
  53#ifdef CONFIG_PPC64
  54#include <asm/firmware.h>
  55#include <asm/processor.h>
  56#include <asm/tm.h>
  57#endif
  58#include <asm/kexec.h>
  59#include <asm/ppc-opcode.h>
  60#include <asm/rio.h>
  61#include <asm/fadump.h>
  62#include <asm/switch_to.h>
  63#include <asm/tm.h>
  64#include <asm/debug.h>
  65#include <asm/asm-prototypes.h>
  66#include <asm/hmi.h>
  67#include <sysdev/fsl_pci.h>
  68#include <asm/kprobes.h>
  69#include <asm/stacktrace.h>
  70#include <asm/nmi.h>
  71
  72#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
  73int (*__debugger)(struct pt_regs *regs) __read_mostly;
  74int (*__debugger_ipi)(struct pt_regs *regs) __read_mostly;
  75int (*__debugger_bpt)(struct pt_regs *regs) __read_mostly;
  76int (*__debugger_sstep)(struct pt_regs *regs) __read_mostly;
  77int (*__debugger_iabr_match)(struct pt_regs *regs) __read_mostly;
  78int (*__debugger_break_match)(struct pt_regs *regs) __read_mostly;
  79int (*__debugger_fault_handler)(struct pt_regs *regs) __read_mostly;
  80
  81EXPORT_SYMBOL(__debugger);
  82EXPORT_SYMBOL(__debugger_ipi);
  83EXPORT_SYMBOL(__debugger_bpt);
  84EXPORT_SYMBOL(__debugger_sstep);
  85EXPORT_SYMBOL(__debugger_iabr_match);
  86EXPORT_SYMBOL(__debugger_break_match);
  87EXPORT_SYMBOL(__debugger_fault_handler);
  88#endif
  89
  90/* Transactional Memory trap debug */
  91#ifdef TM_DEBUG_SW
  92#define TM_DEBUG(x...) printk(KERN_INFO x)
  93#else
  94#define TM_DEBUG(x...) do { } while(0)
  95#endif
  96
  97static const char *signame(int signr)
  98{
  99        switch (signr) {
 100        case SIGBUS:    return "bus error";
 101        case SIGFPE:    return "floating point exception";
 102        case SIGILL:    return "illegal instruction";
 103        case SIGSEGV:   return "segfault";
 104        case SIGTRAP:   return "unhandled trap";
 105        }
 106
 107        return "unknown signal";
 108}
 109
 110/*
 111 * Trap & Exception support
 112 */
 113
 114#ifdef CONFIG_PMAC_BACKLIGHT
 115static void pmac_backlight_unblank(void)
 116{
 117        mutex_lock(&pmac_backlight_mutex);
 118        if (pmac_backlight) {
 119                struct backlight_properties *props;
 120
 121                props = &pmac_backlight->props;
 122                props->brightness = props->max_brightness;
 123                props->power = FB_BLANK_UNBLANK;
 124                backlight_update_status(pmac_backlight);
 125        }
 126        mutex_unlock(&pmac_backlight_mutex);
 127}
 128#else
 129static inline void pmac_backlight_unblank(void) { }
 130#endif
 131
 132/*
 133 * If oops/die is expected to crash the machine, return true here.
 134 *
 135 * This should not be expected to be 100% accurate, there may be
 136 * notifiers registered or other unexpected conditions that may bring
 137 * down the kernel. Or if the current process in the kernel is holding
 138 * locks or has other critical state, the kernel may become effectively
 139 * unusable anyway.
 140 */
 141bool die_will_crash(void)
 142{
 143        if (should_fadump_crash())
 144                return true;
 145        if (kexec_should_crash(current))
 146                return true;
 147        if (in_interrupt() || panic_on_oops ||
 148                        !current->pid || is_global_init(current))
 149                return true;
 150
 151        return false;
 152}
 153
 154static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 155static int die_owner = -1;
 156static unsigned int die_nest_count;
 157static int die_counter;
 158
 159extern void panic_flush_kmsg_start(void)
 160{
 161        /*
 162         * These are mostly taken from kernel/panic.c, but tries to do
 163         * relatively minimal work. Don't use delay functions (TB may
 164         * be broken), don't crash dump (need to set a firmware log),
 165         * don't run notifiers. We do want to get some information to
 166         * Linux console.
 167         */
 168        console_verbose();
 169        bust_spinlocks(1);
 170}
 171
 172extern void panic_flush_kmsg_end(void)
 173{
 174        printk_safe_flush_on_panic();
 175        kmsg_dump(KMSG_DUMP_PANIC);
 176        bust_spinlocks(0);
 177        debug_locks_off();
 178        console_flush_on_panic(CONSOLE_FLUSH_PENDING);
 179}
 180
 181static unsigned long oops_begin(struct pt_regs *regs)
 182{
 183        int cpu;
 184        unsigned long flags;
 185
 186        oops_enter();
 187
 188        /* racy, but better than risking deadlock. */
 189        raw_local_irq_save(flags);
 190        cpu = smp_processor_id();
 191        if (!arch_spin_trylock(&die_lock)) {
 192                if (cpu == die_owner)
 193                        /* nested oops. should stop eventually */;
 194                else
 195                        arch_spin_lock(&die_lock);
 196        }
 197        die_nest_count++;
 198        die_owner = cpu;
 199        console_verbose();
 200        bust_spinlocks(1);
 201        if (machine_is(powermac))
 202                pmac_backlight_unblank();
 203        return flags;
 204}
 205NOKPROBE_SYMBOL(oops_begin);
 206
 207static void oops_end(unsigned long flags, struct pt_regs *regs,
 208                               int signr)
 209{
 210        bust_spinlocks(0);
 211        add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 212        die_nest_count--;
 213        oops_exit();
 214        printk("\n");
 215        if (!die_nest_count) {
 216                /* Nest count reaches zero, release the lock. */
 217                die_owner = -1;
 218                arch_spin_unlock(&die_lock);
 219        }
 220        raw_local_irq_restore(flags);
 221
 222        /*
 223         * system_reset_excption handles debugger, crash dump, panic, for 0x100
 224         */
 225        if (TRAP(regs) == 0x100)
 226                return;
 227
 228        crash_fadump(regs, "die oops");
 229
 230        if (kexec_should_crash(current))
 231                crash_kexec(regs);
 232
 233        if (!signr)
 234                return;
 235
 236        /*
 237         * While our oops output is serialised by a spinlock, output
 238         * from panic() called below can race and corrupt it. If we
 239         * know we are going to panic, delay for 1 second so we have a
 240         * chance to get clean backtraces from all CPUs that are oopsing.
 241         */
 242        if (in_interrupt() || panic_on_oops || !current->pid ||
 243            is_global_init(current)) {
 244                mdelay(MSEC_PER_SEC);
 245        }
 246
 247        if (panic_on_oops)
 248                panic("Fatal exception");
 249        do_exit(signr);
 250}
 251NOKPROBE_SYMBOL(oops_end);
 252
 253static char *get_mmu_str(void)
 254{
 255        if (early_radix_enabled())
 256                return " MMU=Radix";
 257        if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
 258                return " MMU=Hash";
 259        return "";
 260}
 261
 262static int __die(const char *str, struct pt_regs *regs, long err)
 263{
 264        printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
 265
 266        printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n",
 267               IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
 268               PAGE_SIZE / 1024, get_mmu_str(),
 269               IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
 270               IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
 271               IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
 272               debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
 273               IS_ENABLED(CONFIG_NUMA) ? " NUMA" : "",
 274               ppc_md.name ? ppc_md.name : "");
 275
 276        if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP)
 277                return 1;
 278
 279        print_modules();
 280        show_regs(regs);
 281
 282        return 0;
 283}
 284NOKPROBE_SYMBOL(__die);
 285
 286void die(const char *str, struct pt_regs *regs, long err)
 287{
 288        unsigned long flags;
 289
 290        /*
 291         * system_reset_excption handles debugger, crash dump, panic, for 0x100
 292         */
 293        if (TRAP(regs) != 0x100) {
 294                if (debugger(regs))
 295                        return;
 296        }
 297
 298        flags = oops_begin(regs);
 299        if (__die(str, regs, err))
 300                err = 0;
 301        oops_end(flags, regs, err);
 302}
 303NOKPROBE_SYMBOL(die);
 304
 305void user_single_step_report(struct pt_regs *regs)
 306{
 307        force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)regs->nip);
 308}
 309
 310static void show_signal_msg(int signr, struct pt_regs *regs, int code,
 311                            unsigned long addr)
 312{
 313        static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
 314                                      DEFAULT_RATELIMIT_BURST);
 315
 316        if (!show_unhandled_signals)
 317                return;
 318
 319        if (!unhandled_signal(current, signr))
 320                return;
 321
 322        if (!__ratelimit(&rs))
 323                return;
 324
 325        pr_info("%s[%d]: %s (%d) at %lx nip %lx lr %lx code %x",
 326                current->comm, current->pid, signame(signr), signr,
 327                addr, regs->nip, regs->link, code);
 328
 329        print_vma_addr(KERN_CONT " in ", regs->nip);
 330
 331        pr_cont("\n");
 332
 333        show_user_instructions(regs);
 334}
 335
 336static bool exception_common(int signr, struct pt_regs *regs, int code,
 337                              unsigned long addr)
 338{
 339        if (!user_mode(regs)) {
 340                die("Exception in kernel mode", regs, signr);
 341                return false;
 342        }
 343
 344        show_signal_msg(signr, regs, code, addr);
 345
 346        if (arch_irqs_disabled() && !arch_irq_disabled_regs(regs))
 347                local_irq_enable();
 348
 349        current->thread.trap_nr = code;
 350
 351        /*
 352         * Save all the pkey registers AMR/IAMR/UAMOR. Eg: Core dumps need
 353         * to capture the content, if the task gets killed.
 354         */
 355        thread_pkey_regs_save(&current->thread);
 356
 357        return true;
 358}
 359
 360void _exception_pkey(struct pt_regs *regs, unsigned long addr, int key)
 361{
 362        if (!exception_common(SIGSEGV, regs, SEGV_PKUERR, addr))
 363                return;
 364
 365        force_sig_pkuerr((void __user *) addr, key);
 366}
 367
 368void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
 369{
 370        if (!exception_common(signr, regs, code, addr))
 371                return;
 372
 373        force_sig_fault(signr, code, (void __user *)addr);
 374}
 375
 376/*
 377 * The interrupt architecture has a quirk in that the HV interrupts excluding
 378 * the NMIs (0x100 and 0x200) do not clear MSR[RI] at entry. The first thing
 379 * that an interrupt handler must do is save off a GPR into a scratch register,
 380 * and all interrupts on POWERNV (HV=1) use the HSPRG1 register as scratch.
 381 * Therefore an NMI can clobber an HV interrupt's live HSPRG1 without noticing
 382 * that it is non-reentrant, which leads to random data corruption.
 383 *
 384 * The solution is for NMI interrupts in HV mode to check if they originated
 385 * from these critical HV interrupt regions. If so, then mark them not
 386 * recoverable.
 387 *
 388 * An alternative would be for HV NMIs to use SPRG for scratch to avoid the
 389 * HSPRG1 clobber, however this would cause guest SPRG to be clobbered. Linux
 390 * guests should always have MSR[RI]=0 when its scratch SPRG is in use, so
 391 * that would work. However any other guest OS that may have the SPRG live
 392 * and MSR[RI]=1 could encounter silent corruption.
 393 *
 394 * Builds that do not support KVM could take this second option to increase
 395 * the recoverability of NMIs.
 396 */
 397void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
 398{
 399#ifdef CONFIG_PPC_POWERNV
 400        unsigned long kbase = (unsigned long)_stext;
 401        unsigned long nip = regs->nip;
 402
 403        if (!(regs->msr & MSR_RI))
 404                return;
 405        if (!(regs->msr & MSR_HV))
 406                return;
 407        if (regs->msr & MSR_PR)
 408                return;
 409
 410        /*
 411         * Now test if the interrupt has hit a range that may be using
 412         * HSPRG1 without having RI=0 (i.e., an HSRR interrupt). The
 413         * problem ranges all run un-relocated. Test real and virt modes
 414         * at the same time by droping the high bit of the nip (virt mode
 415         * entry points still have the +0x4000 offset).
 416         */
 417        nip &= ~0xc000000000000000ULL;
 418        if ((nip >= 0x500 && nip < 0x600) || (nip >= 0x4500 && nip < 0x4600))
 419                goto nonrecoverable;
 420        if ((nip >= 0x980 && nip < 0xa00) || (nip >= 0x4980 && nip < 0x4a00))
 421                goto nonrecoverable;
 422        if ((nip >= 0xe00 && nip < 0xec0) || (nip >= 0x4e00 && nip < 0x4ec0))
 423                goto nonrecoverable;
 424        if ((nip >= 0xf80 && nip < 0xfa0) || (nip >= 0x4f80 && nip < 0x4fa0))
 425                goto nonrecoverable;
 426
 427        /* Trampoline code runs un-relocated so subtract kbase. */
 428        if (nip >= (unsigned long)(start_real_trampolines - kbase) &&
 429                        nip < (unsigned long)(end_real_trampolines - kbase))
 430                goto nonrecoverable;
 431        if (nip >= (unsigned long)(start_virt_trampolines - kbase) &&
 432                        nip < (unsigned long)(end_virt_trampolines - kbase))
 433                goto nonrecoverable;
 434        return;
 435
 436nonrecoverable:
 437        regs->msr &= ~MSR_RI;
 438#endif
 439}
 440
 441void system_reset_exception(struct pt_regs *regs)
 442{
 443        unsigned long hsrr0, hsrr1;
 444        bool nested = in_nmi();
 445        bool saved_hsrrs = false;
 446
 447        /*
 448         * Avoid crashes in case of nested NMI exceptions. Recoverability
 449         * is determined by RI and in_nmi
 450         */
 451        if (!nested)
 452                nmi_enter();
 453
 454        /*
 455         * System reset can interrupt code where HSRRs are live and MSR[RI]=1.
 456         * The system reset interrupt itself may clobber HSRRs (e.g., to call
 457         * OPAL), so save them here and restore them before returning.
 458         *
 459         * Machine checks don't need to save HSRRs, as the real mode handler
 460         * is careful to avoid them, and the regular handler is not delivered
 461         * as an NMI.
 462         */
 463        if (cpu_has_feature(CPU_FTR_HVMODE)) {
 464                hsrr0 = mfspr(SPRN_HSRR0);
 465                hsrr1 = mfspr(SPRN_HSRR1);
 466                saved_hsrrs = true;
 467        }
 468
 469        hv_nmi_check_nonrecoverable(regs);
 470
 471        __this_cpu_inc(irq_stat.sreset_irqs);
 472
 473        /* See if any machine dependent calls */
 474        if (ppc_md.system_reset_exception) {
 475                if (ppc_md.system_reset_exception(regs))
 476                        goto out;
 477        }
 478
 479        if (debugger(regs))
 480                goto out;
 481
 482        kmsg_dump(KMSG_DUMP_OOPS);
 483        /*
 484         * A system reset is a request to dump, so we always send
 485         * it through the crashdump code (if fadump or kdump are
 486         * registered).
 487         */
 488        crash_fadump(regs, "System Reset");
 489
 490        crash_kexec(regs);
 491
 492        /*
 493         * We aren't the primary crash CPU. We need to send it
 494         * to a holding pattern to avoid it ending up in the panic
 495         * code.
 496         */
 497        crash_kexec_secondary(regs);
 498
 499        /*
 500         * No debugger or crash dump registered, print logs then
 501         * panic.
 502         */
 503        die("System Reset", regs, SIGABRT);
 504
 505        mdelay(2*MSEC_PER_SEC); /* Wait a little while for others to print */
 506        add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 507        nmi_panic(regs, "System Reset");
 508
 509out:
 510#ifdef CONFIG_PPC_BOOK3S_64
 511        BUG_ON(get_paca()->in_nmi == 0);
 512        if (get_paca()->in_nmi > 1)
 513                nmi_panic(regs, "Unrecoverable nested System Reset");
 514#endif
 515        /* Must die if the interrupt is not recoverable */
 516        if (!(regs->msr & MSR_RI))
 517                nmi_panic(regs, "Unrecoverable System Reset");
 518
 519        if (saved_hsrrs) {
 520                mtspr(SPRN_HSRR0, hsrr0);
 521                mtspr(SPRN_HSRR1, hsrr1);
 522        }
 523
 524        if (!nested)
 525                nmi_exit();
 526
 527        /* What should we do here? We could issue a shutdown or hard reset. */
 528}
 529
 530/*
 531 * I/O accesses can cause machine checks on powermacs.
 532 * Check if the NIP corresponds to the address of a sync
 533 * instruction for which there is an entry in the exception
 534 * table.
 535 * Note that the 601 only takes a machine check on TEA
 536 * (transfer error ack) signal assertion, and does not
 537 * set any of the top 16 bits of SRR1.
 538 *  -- paulus.
 539 */
 540static inline int check_io_access(struct pt_regs *regs)
 541{
 542#ifdef CONFIG_PPC32
 543        unsigned long msr = regs->msr;
 544        const struct exception_table_entry *entry;
 545        unsigned int *nip = (unsigned int *)regs->nip;
 546
 547        if (((msr & 0xffff0000) == 0 || (msr & (0x80000 | 0x40000)))
 548            && (entry = search_exception_tables(regs->nip)) != NULL) {
 549                /*
 550                 * Check that it's a sync instruction, or somewhere
 551                 * in the twi; isync; nop sequence that inb/inw/inl uses.
 552                 * As the address is in the exception table
 553                 * we should be able to read the instr there.
 554                 * For the debug message, we look at the preceding
 555                 * load or store.
 556                 */
 557                if (*nip == PPC_INST_NOP)
 558                        nip -= 2;
 559                else if (*nip == PPC_INST_ISYNC)
 560                        --nip;
 561                if (*nip == PPC_INST_SYNC || (*nip >> 26) == OP_TRAP) {
 562                        unsigned int rb;
 563
 564                        --nip;
 565                        rb = (*nip >> 11) & 0x1f;
 566                        printk(KERN_DEBUG "%s bad port %lx at %p\n",
 567                               (*nip & 0x100)? "OUT to": "IN from",
 568                               regs->gpr[rb] - _IO_BASE, nip);
 569                        regs->msr |= MSR_RI;
 570                        regs->nip = extable_fixup(entry);
 571                        return 1;
 572                }
 573        }
 574#endif /* CONFIG_PPC32 */
 575        return 0;
 576}
 577
 578#ifdef CONFIG_PPC_ADV_DEBUG_REGS
 579/* On 4xx, the reason for the machine check or program exception
 580   is in the ESR. */
 581#define get_reason(regs)        ((regs)->dsisr)
 582#define REASON_FP               ESR_FP
 583#define REASON_ILLEGAL          (ESR_PIL | ESR_PUO)
 584#define REASON_PRIVILEGED       ESR_PPR
 585#define REASON_TRAP             ESR_PTR
 586
 587/* single-step stuff */
 588#define single_stepping(regs)   (current->thread.debug.dbcr0 & DBCR0_IC)
 589#define clear_single_step(regs) (current->thread.debug.dbcr0 &= ~DBCR0_IC)
 590#define clear_br_trace(regs)    do {} while(0)
 591#else
 592/* On non-4xx, the reason for the machine check or program
 593   exception is in the MSR. */
 594#define get_reason(regs)        ((regs)->msr)
 595#define REASON_TM               SRR1_PROGTM
 596#define REASON_FP               SRR1_PROGFPE
 597#define REASON_ILLEGAL          SRR1_PROGILL
 598#define REASON_PRIVILEGED       SRR1_PROGPRIV
 599#define REASON_TRAP             SRR1_PROGTRAP
 600
 601#define single_stepping(regs)   ((regs)->msr & MSR_SE)
 602#define clear_single_step(regs) ((regs)->msr &= ~MSR_SE)
 603#define clear_br_trace(regs)    ((regs)->msr &= ~MSR_BE)
 604#endif
 605
 606#if defined(CONFIG_E500)
 607int machine_check_e500mc(struct pt_regs *regs)
 608{
 609        unsigned long mcsr = mfspr(SPRN_MCSR);
 610        unsigned long pvr = mfspr(SPRN_PVR);
 611        unsigned long reason = mcsr;
 612        int recoverable = 1;
 613
 614        if (reason & MCSR_LD) {
 615                recoverable = fsl_rio_mcheck_exception(regs);
 616                if (recoverable == 1)
 617                        goto silent_out;
 618        }
 619
 620        printk("Machine check in kernel mode.\n");
 621        printk("Caused by (from MCSR=%lx): ", reason);
 622
 623        if (reason & MCSR_MCP)
 624                pr_cont("Machine Check Signal\n");
 625
 626        if (reason & MCSR_ICPERR) {
 627                pr_cont("Instruction Cache Parity Error\n");
 628
 629                /*
 630                 * This is recoverable by invalidating the i-cache.
 631                 */
 632                mtspr(SPRN_L1CSR1, mfspr(SPRN_L1CSR1) | L1CSR1_ICFI);
 633                while (mfspr(SPRN_L1CSR1) & L1CSR1_ICFI)
 634                        ;
 635
 636                /*
 637                 * This will generally be accompanied by an instruction
 638                 * fetch error report -- only treat MCSR_IF as fatal
 639                 * if it wasn't due to an L1 parity error.
 640                 */
 641                reason &= ~MCSR_IF;
 642        }
 643
 644        if (reason & MCSR_DCPERR_MC) {
 645                pr_cont("Data Cache Parity Error\n");
 646
 647                /*
 648                 * In write shadow mode we auto-recover from the error, but it
 649                 * may still get logged and cause a machine check.  We should
 650                 * only treat the non-write shadow case as non-recoverable.
 651                 */
 652                /* On e6500 core, L1 DCWS (Data cache write shadow mode) bit
 653                 * is not implemented but L1 data cache always runs in write
 654                 * shadow mode. Hence on data cache parity errors HW will
 655                 * automatically invalidate the L1 Data Cache.
 656                 */
 657                if (PVR_VER(pvr) != PVR_VER_E6500) {
 658                        if (!(mfspr(SPRN_L1CSR2) & L1CSR2_DCWS))
 659                                recoverable = 0;
 660                }
 661        }
 662
 663        if (reason & MCSR_L2MMU_MHIT) {
 664                pr_cont("Hit on multiple TLB entries\n");
 665                recoverable = 0;
 666        }
 667
 668        if (reason & MCSR_NMI)
 669                pr_cont("Non-maskable interrupt\n");
 670
 671        if (reason & MCSR_IF) {
 672                pr_cont("Instruction Fetch Error Report\n");
 673                recoverable = 0;
 674        }
 675
 676        if (reason & MCSR_LD) {
 677                pr_cont("Load Error Report\n");
 678                recoverable = 0;
 679        }
 680
 681        if (reason & MCSR_ST) {
 682                pr_cont("Store Error Report\n");
 683                recoverable = 0;
 684        }
 685
 686        if (reason & MCSR_LDG) {
 687                pr_cont("Guarded Load Error Report\n");
 688                recoverable = 0;
 689        }
 690
 691        if (reason & MCSR_TLBSYNC)
 692                pr_cont("Simultaneous tlbsync operations\n");
 693
 694        if (reason & MCSR_BSL2_ERR) {
 695                pr_cont("Level 2 Cache Error\n");
 696                recoverable = 0;
 697        }
 698
 699        if (reason & MCSR_MAV) {
 700                u64 addr;
 701
 702                addr = mfspr(SPRN_MCAR);
 703                addr |= (u64)mfspr(SPRN_MCARU) << 32;
 704
 705                pr_cont("Machine Check %s Address: %#llx\n",
 706                       reason & MCSR_MEA ? "Effective" : "Physical", addr);
 707        }
 708
 709silent_out:
 710        mtspr(SPRN_MCSR, mcsr);
 711        return mfspr(SPRN_MCSR) == 0 && recoverable;
 712}
 713
 714int machine_check_e500(struct pt_regs *regs)
 715{
 716        unsigned long reason = mfspr(SPRN_MCSR);
 717
 718        if (reason & MCSR_BUS_RBERR) {
 719                if (fsl_rio_mcheck_exception(regs))
 720                        return 1;
 721                if (fsl_pci_mcheck_exception(regs))
 722                        return 1;
 723        }
 724
 725        printk("Machine check in kernel mode.\n");
 726        printk("Caused by (from MCSR=%lx): ", reason);
 727
 728        if (reason & MCSR_MCP)
 729                pr_cont("Machine Check Signal\n");
 730        if (reason & MCSR_ICPERR)
 731                pr_cont("Instruction Cache Parity Error\n");
 732        if (reason & MCSR_DCP_PERR)
 733                pr_cont("Data Cache Push Parity Error\n");
 734        if (reason & MCSR_DCPERR)
 735                pr_cont("Data Cache Parity Error\n");
 736        if (reason & MCSR_BUS_IAERR)
 737                pr_cont("Bus - Instruction Address Error\n");
 738        if (reason & MCSR_BUS_RAERR)
 739                pr_cont("Bus - Read Address Error\n");
 740        if (reason & MCSR_BUS_WAERR)
 741                pr_cont("Bus - Write Address Error\n");
 742        if (reason & MCSR_BUS_IBERR)
 743                pr_cont("Bus - Instruction Data Error\n");
 744        if (reason & MCSR_BUS_RBERR)
 745                pr_cont("Bus - Read Data Bus Error\n");
 746        if (reason & MCSR_BUS_WBERR)
 747                pr_cont("Bus - Write Data Bus Error\n");
 748        if (reason & MCSR_BUS_IPERR)
 749                pr_cont("Bus - Instruction Parity Error\n");
 750        if (reason & MCSR_BUS_RPERR)
 751                pr_cont("Bus - Read Parity Error\n");
 752
 753        return 0;
 754}
 755
 756int machine_check_generic(struct pt_regs *regs)
 757{
 758        return 0;
 759}
 760#elif defined(CONFIG_E200)
 761int machine_check_e200(struct pt_regs *regs)
 762{
 763        unsigned long reason = mfspr(SPRN_MCSR);
 764
 765        printk("Machine check in kernel mode.\n");
 766        printk("Caused by (from MCSR=%lx): ", reason);
 767
 768        if (reason & MCSR_MCP)
 769                pr_cont("Machine Check Signal\n");
 770        if (reason & MCSR_CP_PERR)
 771                pr_cont("Cache Push Parity Error\n");
 772        if (reason & MCSR_CPERR)
 773                pr_cont("Cache Parity Error\n");
 774        if (reason & MCSR_EXCP_ERR)
 775                pr_cont("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n");
 776        if (reason & MCSR_BUS_IRERR)
 777                pr_cont("Bus - Read Bus Error on instruction fetch\n");
 778        if (reason & MCSR_BUS_DRERR)
 779                pr_cont("Bus - Read Bus Error on data load\n");
 780        if (reason & MCSR_BUS_WRERR)
 781                pr_cont("Bus - Write Bus Error on buffered store or cache line push\n");
 782
 783        return 0;
 784}
 785#elif defined(CONFIG_PPC32)
 786int machine_check_generic(struct pt_regs *regs)
 787{
 788        unsigned long reason = regs->msr;
 789
 790        printk("Machine check in kernel mode.\n");
 791        printk("Caused by (from SRR1=%lx): ", reason);
 792        switch (reason & 0x601F0000) {
 793        case 0x80000:
 794                pr_cont("Machine check signal\n");
 795                break;
 796        case 0:         /* for 601 */
 797        case 0x40000:
 798        case 0x140000:  /* 7450 MSS error and TEA */
 799                pr_cont("Transfer error ack signal\n");
 800                break;
 801        case 0x20000:
 802                pr_cont("Data parity error signal\n");
 803                break;
 804        case 0x10000:
 805                pr_cont("Address parity error signal\n");
 806                break;
 807        case 0x20000000:
 808                pr_cont("L1 Data Cache error\n");
 809                break;
 810        case 0x40000000:
 811                pr_cont("L1 Instruction Cache error\n");
 812                break;
 813        case 0x00100000:
 814                pr_cont("L2 data cache parity error\n");
 815                break;
 816        default:
 817                pr_cont("Unknown values in msr\n");
 818        }
 819        return 0;
 820}
 821#endif /* everything else */
 822
 823void machine_check_exception(struct pt_regs *regs)
 824{
 825        int recover = 0;
 826        bool nested = in_nmi();
 827        if (!nested)
 828                nmi_enter();
 829
 830        __this_cpu_inc(irq_stat.mce_exceptions);
 831
 832        add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
 833
 834        /* See if any machine dependent calls. In theory, we would want
 835         * to call the CPU first, and call the ppc_md. one if the CPU
 836         * one returns a positive number. However there is existing code
 837         * that assumes the board gets a first chance, so let's keep it
 838         * that way for now and fix things later. --BenH.
 839         */
 840        if (ppc_md.machine_check_exception)
 841                recover = ppc_md.machine_check_exception(regs);
 842        else if (cur_cpu_spec->machine_check)
 843                recover = cur_cpu_spec->machine_check(regs);
 844
 845        if (recover > 0)
 846                goto bail;
 847
 848        if (debugger_fault_handler(regs))
 849                goto bail;
 850
 851        if (check_io_access(regs))
 852                goto bail;
 853
 854        if (!nested)
 855                nmi_exit();
 856
 857        die("Machine check", regs, SIGBUS);
 858
 859        /* Must die if the interrupt is not recoverable */
 860        if (!(regs->msr & MSR_RI))
 861                nmi_panic(regs, "Unrecoverable Machine check");
 862
 863        return;
 864
 865bail:
 866        if (!nested)
 867                nmi_exit();
 868}
 869
 870void SMIException(struct pt_regs *regs)
 871{
 872        die("System Management Interrupt", regs, SIGABRT);
 873}
 874
 875#ifdef CONFIG_VSX
 876static void p9_hmi_special_emu(struct pt_regs *regs)
 877{
 878        unsigned int ra, rb, t, i, sel, instr, rc;
 879        const void __user *addr;
 880        u8 vbuf[16], *vdst;
 881        unsigned long ea, msr, msr_mask;
 882        bool swap;
 883
 884        if (__get_user_inatomic(instr, (unsigned int __user *)regs->nip))
 885                return;
 886
 887        /*
 888         * lxvb16x      opcode: 0x7c0006d8
 889         * lxvd2x       opcode: 0x7c000698
 890         * lxvh8x       opcode: 0x7c000658
 891         * lxvw4x       opcode: 0x7c000618
 892         */
 893        if ((instr & 0xfc00073e) != 0x7c000618) {
 894                pr_devel("HMI vec emu: not vector CI %i:%s[%d] nip=%016lx"
 895                         " instr=%08x\n",
 896                         smp_processor_id(), current->comm, current->pid,
 897                         regs->nip, instr);
 898                return;
 899        }
 900
 901        /* Grab vector registers into the task struct */
 902        msr = regs->msr; /* Grab msr before we flush the bits */
 903        flush_vsx_to_thread(current);
 904        enable_kernel_altivec();
 905
 906        /*
 907         * Is userspace running with a different endian (this is rare but
 908         * not impossible)
 909         */
 910        swap = (msr & MSR_LE) != (MSR_KERNEL & MSR_LE);
 911
 912        /* Decode the instruction */
 913        ra = (instr >> 16) & 0x1f;
 914        rb = (instr >> 11) & 0x1f;
 915        t = (instr >> 21) & 0x1f;
 916        if (instr & 1)
 917                vdst = (u8 *)&current->thread.vr_state.vr[t];
 918        else
 919                vdst = (u8 *)&current->thread.fp_state.fpr[t][0];
 920
 921        /* Grab the vector address */
 922        ea = regs->gpr[rb] + (ra ? regs->gpr[ra] : 0);
 923        if (is_32bit_task())
 924                ea &= 0xfffffffful;
 925        addr = (__force const void __user *)ea;
 926
 927        /* Check it */
 928        if (!access_ok(addr, 16)) {
 929                pr_devel("HMI vec emu: bad access %i:%s[%d] nip=%016lx"
 930                         " instr=%08x addr=%016lx\n",
 931                         smp_processor_id(), current->comm, current->pid,
 932                         regs->nip, instr, (unsigned long)addr);
 933                return;
 934        }
 935
 936        /* Read the vector */
 937        rc = 0;
 938        if ((unsigned long)addr & 0xfUL)
 939                /* unaligned case */
 940                rc = __copy_from_user_inatomic(vbuf, addr, 16);
 941        else
 942                __get_user_atomic_128_aligned(vbuf, addr, rc);
 943        if (rc) {
 944                pr_devel("HMI vec emu: page fault %i:%s[%d] nip=%016lx"
 945                         " instr=%08x addr=%016lx\n",
 946                         smp_processor_id(), current->comm, current->pid,
 947                         regs->nip, instr, (unsigned long)addr);
 948                return;
 949        }
 950
 951        pr_devel("HMI vec emu: emulated vector CI %i:%s[%d] nip=%016lx"
 952                 " instr=%08x addr=%016lx\n",
 953                 smp_processor_id(), current->comm, current->pid, regs->nip,
 954                 instr, (unsigned long) addr);
 955
 956        /* Grab instruction "selector" */
 957        sel = (instr >> 6) & 3;
 958
 959        /*
 960         * Check to make sure the facility is actually enabled. This
 961         * could happen if we get a false positive hit.
 962         *
 963         * lxvd2x/lxvw4x always check MSR VSX sel = 0,2
 964         * lxvh8x/lxvb16x check MSR VSX or VEC depending on VSR used sel = 1,3
 965         */
 966        msr_mask = MSR_VSX;
 967        if ((sel & 1) && (instr & 1)) /* lxvh8x & lxvb16x + VSR >= 32 */
 968                msr_mask = MSR_VEC;
 969        if (!(msr & msr_mask)) {
 970                pr_devel("HMI vec emu: MSR fac clear %i:%s[%d] nip=%016lx"
 971                         " instr=%08x msr:%016lx\n",
 972                         smp_processor_id(), current->comm, current->pid,
 973                         regs->nip, instr, msr);
 974                return;
 975        }
 976
 977        /* Do logging here before we modify sel based on endian */
 978        switch (sel) {
 979        case 0: /* lxvw4x */
 980                PPC_WARN_EMULATED(lxvw4x, regs);
 981                break;
 982        case 1: /* lxvh8x */
 983                PPC_WARN_EMULATED(lxvh8x, regs);
 984                break;
 985        case 2: /* lxvd2x */
 986                PPC_WARN_EMULATED(lxvd2x, regs);
 987                break;
 988        case 3: /* lxvb16x */
 989                PPC_WARN_EMULATED(lxvb16x, regs);
 990                break;
 991        }
 992
 993#ifdef __LITTLE_ENDIAN__
 994        /*
 995         * An LE kernel stores the vector in the task struct as an LE
 996         * byte array (effectively swapping both the components and
 997         * the content of the components). Those instructions expect
 998         * the components to remain in ascending address order, so we
 999         * swap them back.
1000         *
1001         * If we are running a BE user space, the expectation is that
1002         * of a simple memcpy, so forcing the emulation to look like
1003         * a lxvb16x should do the trick.
1004         */
1005        if (swap)
1006                sel = 3;
1007
1008        switch (sel) {
1009        case 0: /* lxvw4x */
1010                for (i = 0; i < 4; i++)
1011                        ((u32 *)vdst)[i] = ((u32 *)vbuf)[3-i];
1012                break;
1013        case 1: /* lxvh8x */
1014                for (i = 0; i < 8; i++)
1015                        ((u16 *)vdst)[i] = ((u16 *)vbuf)[7-i];
1016                break;
1017        case 2: /* lxvd2x */
1018                for (i = 0; i < 2; i++)
1019                        ((u64 *)vdst)[i] = ((u64 *)vbuf)[1-i];
1020                break;
1021        case 3: /* lxvb16x */
1022                for (i = 0; i < 16; i++)
1023                        vdst[i] = vbuf[15-i];
1024                break;
1025        }
1026#else /* __LITTLE_ENDIAN__ */
1027        /* On a big endian kernel, a BE userspace only needs a memcpy */
1028        if (!swap)
1029                sel = 3;
1030
1031        /* Otherwise, we need to swap the content of the components */
1032        switch (sel) {
1033        case 0: /* lxvw4x */
1034                for (i = 0; i < 4; i++)
1035                        ((u32 *)vdst)[i] = cpu_to_le32(((u32 *)vbuf)[i]);
1036                break;
1037        case 1: /* lxvh8x */
1038                for (i = 0; i < 8; i++)
1039                        ((u16 *)vdst)[i] = cpu_to_le16(((u16 *)vbuf)[i]);
1040                break;
1041        case 2: /* lxvd2x */
1042                for (i = 0; i < 2; i++)
1043                        ((u64 *)vdst)[i] = cpu_to_le64(((u64 *)vbuf)[i]);
1044                break;
1045        case 3: /* lxvb16x */
1046                memcpy(vdst, vbuf, 16);
1047                break;
1048        }
1049#endif /* !__LITTLE_ENDIAN__ */
1050
1051        /* Go to next instruction */
1052        regs->nip += 4;
1053}
1054#endif /* CONFIG_VSX */
1055
1056void handle_hmi_exception(struct pt_regs *regs)
1057{
1058        struct pt_regs *old_regs;
1059
1060        old_regs = set_irq_regs(regs);
1061        irq_enter();
1062
1063#ifdef CONFIG_VSX
1064        /* Real mode flagged P9 special emu is needed */
1065        if (local_paca->hmi_p9_special_emu) {
1066                local_paca->hmi_p9_special_emu = 0;
1067
1068                /*
1069                 * We don't want to take page faults while doing the
1070                 * emulation, we just replay the instruction if necessary.
1071                 */
1072                pagefault_disable();
1073                p9_hmi_special_emu(regs);
1074                pagefault_enable();
1075        }
1076#endif /* CONFIG_VSX */
1077
1078        if (ppc_md.handle_hmi_exception)
1079                ppc_md.handle_hmi_exception(regs);
1080
1081        irq_exit();
1082        set_irq_regs(old_regs);
1083}
1084
1085void unknown_exception(struct pt_regs *regs)
1086{
1087        enum ctx_state prev_state = exception_enter();
1088
1089        printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
1090               regs->nip, regs->msr, regs->trap);
1091
1092        _exception(SIGTRAP, regs, TRAP_UNK, 0);
1093
1094        exception_exit(prev_state);
1095}
1096
1097void instruction_breakpoint_exception(struct pt_regs *regs)
1098{
1099        enum ctx_state prev_state = exception_enter();
1100
1101        if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5,
1102                                        5, SIGTRAP) == NOTIFY_STOP)
1103                goto bail;
1104        if (debugger_iabr_match(regs))
1105                goto bail;
1106        _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
1107
1108bail:
1109        exception_exit(prev_state);
1110}
1111
1112void RunModeException(struct pt_regs *regs)
1113{
1114        _exception(SIGTRAP, regs, TRAP_UNK, 0);
1115}
1116
1117void single_step_exception(struct pt_regs *regs)
1118{
1119        enum ctx_state prev_state = exception_enter();
1120
1121        clear_single_step(regs);
1122        clear_br_trace(regs);
1123
1124        if (kprobe_post_handler(regs))
1125                return;
1126
1127        if (notify_die(DIE_SSTEP, "single_step", regs, 5,
1128                                        5, SIGTRAP) == NOTIFY_STOP)
1129                goto bail;
1130        if (debugger_sstep(regs))
1131                goto bail;
1132
1133        _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
1134
1135bail:
1136        exception_exit(prev_state);
1137}
1138NOKPROBE_SYMBOL(single_step_exception);
1139
1140/*
1141 * After we have successfully emulated an instruction, we have to
1142 * check if the instruction was being single-stepped, and if so,
1143 * pretend we got a single-step exception.  This was pointed out
1144 * by Kumar Gala.  -- paulus
1145 */
1146static void emulate_single_step(struct pt_regs *regs)
1147{
1148        if (single_stepping(regs))
1149                single_step_exception(regs);
1150}
1151
1152static inline int __parse_fpscr(unsigned long fpscr)
1153{
1154        int ret = FPE_FLTUNK;
1155
1156        /* Invalid operation */
1157        if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX))
1158                ret = FPE_FLTINV;
1159
1160        /* Overflow */
1161        else if ((fpscr & FPSCR_OE) && (fpscr & FPSCR_OX))
1162                ret = FPE_FLTOVF;
1163
1164        /* Underflow */
1165        else if ((fpscr & FPSCR_UE) && (fpscr & FPSCR_UX))
1166                ret = FPE_FLTUND;
1167
1168        /* Divide by zero */
1169        else if ((fpscr & FPSCR_ZE) && (fpscr & FPSCR_ZX))
1170                ret = FPE_FLTDIV;
1171
1172        /* Inexact result */
1173        else if ((fpscr & FPSCR_XE) && (fpscr & FPSCR_XX))
1174                ret = FPE_FLTRES;
1175
1176        return ret;
1177}
1178
1179static void parse_fpe(struct pt_regs *regs)
1180{
1181        int code = 0;
1182
1183        flush_fp_to_thread(current);
1184
1185        code = __parse_fpscr(current->thread.fp_state.fpscr);
1186
1187        _exception(SIGFPE, regs, code, regs->nip);
1188}
1189
1190/*
1191 * Illegal instruction emulation support.  Originally written to
1192 * provide the PVR to user applications using the mfspr rd, PVR.
1193 * Return non-zero if we can't emulate, or -EFAULT if the associated
1194 * memory access caused an access fault.  Return zero on success.
1195 *
1196 * There are a couple of ways to do this, either "decode" the instruction
1197 * or directly match lots of bits.  In this case, matching lots of
1198 * bits is faster and easier.
1199 *
1200 */
1201static int emulate_string_inst(struct pt_regs *regs, u32 instword)
1202{
1203        u8 rT = (instword >> 21) & 0x1f;
1204        u8 rA = (instword >> 16) & 0x1f;
1205        u8 NB_RB = (instword >> 11) & 0x1f;
1206        u32 num_bytes;
1207        unsigned long EA;
1208        int pos = 0;
1209
1210        /* Early out if we are an invalid form of lswx */
1211        if ((instword & PPC_INST_STRING_MASK) == PPC_INST_LSWX)
1212                if ((rT == rA) || (rT == NB_RB))
1213                        return -EINVAL;
1214
1215        EA = (rA == 0) ? 0 : regs->gpr[rA];
1216
1217        switch (instword & PPC_INST_STRING_MASK) {
1218                case PPC_INST_LSWX:
1219                case PPC_INST_STSWX:
1220                        EA += NB_RB;
1221                        num_bytes = regs->xer & 0x7f;
1222                        break;
1223                case PPC_INST_LSWI:
1224                case PPC_INST_STSWI:
1225                        num_bytes = (NB_RB == 0) ? 32 : NB_RB;
1226                        break;
1227                default:
1228                        return -EINVAL;
1229        }
1230
1231        while (num_bytes != 0)
1232        {
1233                u8 val;
1234                u32 shift = 8 * (3 - (pos & 0x3));
1235
1236                /* if process is 32-bit, clear upper 32 bits of EA */
1237                if ((regs->msr & MSR_64BIT) == 0)
1238                        EA &= 0xFFFFFFFF;
1239
1240                switch ((instword & PPC_INST_STRING_MASK)) {
1241                        case PPC_INST_LSWX:
1242                        case PPC_INST_LSWI:
1243                                if (get_user(val, (u8 __user *)EA))
1244                                        return -EFAULT;
1245                                /* first time updating this reg,
1246                                 * zero it out */
1247                                if (pos == 0)
1248                                        regs->gpr[rT] = 0;
1249                                regs->gpr[rT] |= val << shift;
1250                                break;
1251                        case PPC_INST_STSWI:
1252                        case PPC_INST_STSWX:
1253                                val = regs->gpr[rT] >> shift;
1254                                if (put_user(val, (u8 __user *)EA))
1255                                        return -EFAULT;
1256                                break;
1257                }
1258                /* move EA to next address */
1259                EA += 1;
1260                num_bytes--;
1261
1262                /* manage our position within the register */
1263                if (++pos == 4) {
1264                        pos = 0;
1265                        if (++rT == 32)
1266                                rT = 0;
1267                }
1268        }
1269
1270        return 0;
1271}
1272
1273static int emulate_popcntb_inst(struct pt_regs *regs, u32 instword)
1274{
1275        u32 ra,rs;
1276        unsigned long tmp;
1277
1278        ra = (instword >> 16) & 0x1f;
1279        rs = (instword >> 21) & 0x1f;
1280
1281        tmp = regs->gpr[rs];
1282        tmp = tmp - ((tmp >> 1) & 0x5555555555555555ULL);
1283        tmp = (tmp & 0x3333333333333333ULL) + ((tmp >> 2) & 0x3333333333333333ULL);
1284        tmp = (tmp + (tmp >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
1285        regs->gpr[ra] = tmp;
1286
1287        return 0;
1288}
1289
1290static int emulate_isel(struct pt_regs *regs, u32 instword)
1291{
1292        u8 rT = (instword >> 21) & 0x1f;
1293        u8 rA = (instword >> 16) & 0x1f;
1294        u8 rB = (instword >> 11) & 0x1f;
1295        u8 BC = (instword >> 6) & 0x1f;
1296        u8 bit;
1297        unsigned long tmp;
1298
1299        tmp = (rA == 0) ? 0 : regs->gpr[rA];
1300        bit = (regs->ccr >> (31 - BC)) & 0x1;
1301
1302        regs->gpr[rT] = bit ? tmp : regs->gpr[rB];
1303
1304        return 0;
1305}
1306
1307#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1308static inline bool tm_abort_check(struct pt_regs *regs, int cause)
1309{
1310        /* If we're emulating a load/store in an active transaction, we cannot
1311         * emulate it as the kernel operates in transaction suspended context.
1312         * We need to abort the transaction.  This creates a persistent TM
1313         * abort so tell the user what caused it with a new code.
1314         */
1315        if (MSR_TM_TRANSACTIONAL(regs->msr)) {
1316                tm_enable();
1317                tm_abort(cause);
1318                return true;
1319        }
1320        return false;
1321}
1322#else
1323static inline bool tm_abort_check(struct pt_regs *regs, int reason)
1324{
1325        return false;
1326}
1327#endif
1328
1329static int emulate_instruction(struct pt_regs *regs)
1330{
1331        u32 instword;
1332        u32 rd;
1333
1334        if (!user_mode(regs))
1335                return -EINVAL;
1336        CHECK_FULL_REGS(regs);
1337
1338        if (get_user(instword, (u32 __user *)(regs->nip)))
1339                return -EFAULT;
1340
1341        /* Emulate the mfspr rD, PVR. */
1342        if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) {
1343                PPC_WARN_EMULATED(mfpvr, regs);
1344                rd = (instword >> 21) & 0x1f;
1345                regs->gpr[rd] = mfspr(SPRN_PVR);
1346                return 0;
1347        }
1348
1349        /* Emulating the dcba insn is just a no-op.  */
1350        if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) {
1351                PPC_WARN_EMULATED(dcba, regs);
1352                return 0;
1353        }
1354
1355        /* Emulate the mcrxr insn.  */
1356        if ((instword & PPC_INST_MCRXR_MASK) == PPC_INST_MCRXR) {
1357                int shift = (instword >> 21) & 0x1c;
1358                unsigned long msk = 0xf0000000UL >> shift;
1359
1360                PPC_WARN_EMULATED(mcrxr, regs);
1361                regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk);
1362                regs->xer &= ~0xf0000000UL;
1363                return 0;
1364        }
1365
1366        /* Emulate load/store string insn. */
1367        if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) {
1368                if (tm_abort_check(regs,
1369                                   TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT))
1370                        return -EINVAL;
1371                PPC_WARN_EMULATED(string, regs);
1372                return emulate_string_inst(regs, instword);
1373        }
1374
1375        /* Emulate the popcntb (Population Count Bytes) instruction. */
1376        if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) {
1377                PPC_WARN_EMULATED(popcntb, regs);
1378                return emulate_popcntb_inst(regs, instword);
1379        }
1380
1381        /* Emulate isel (Integer Select) instruction */
1382        if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) {
1383                PPC_WARN_EMULATED(isel, regs);
1384                return emulate_isel(regs, instword);
1385        }
1386
1387        /* Emulate sync instruction variants */
1388        if ((instword & PPC_INST_SYNC_MASK) == PPC_INST_SYNC) {
1389                PPC_WARN_EMULATED(sync, regs);
1390                asm volatile("sync");
1391                return 0;
1392        }
1393
1394#ifdef CONFIG_PPC64
1395        /* Emulate the mfspr rD, DSCR. */
1396        if ((((instword & PPC_INST_MFSPR_DSCR_USER_MASK) ==
1397                PPC_INST_MFSPR_DSCR_USER) ||
1398             ((instword & PPC_INST_MFSPR_DSCR_MASK) ==
1399                PPC_INST_MFSPR_DSCR)) &&
1400                        cpu_has_feature(CPU_FTR_DSCR)) {
1401                PPC_WARN_EMULATED(mfdscr, regs);
1402                rd = (instword >> 21) & 0x1f;
1403                regs->gpr[rd] = mfspr(SPRN_DSCR);
1404                return 0;
1405        }
1406        /* Emulate the mtspr DSCR, rD. */
1407        if ((((instword & PPC_INST_MTSPR_DSCR_USER_MASK) ==
1408                PPC_INST_MTSPR_DSCR_USER) ||
1409             ((instword & PPC_INST_MTSPR_DSCR_MASK) ==
1410                PPC_INST_MTSPR_DSCR)) &&
1411                        cpu_has_feature(CPU_FTR_DSCR)) {
1412                PPC_WARN_EMULATED(mtdscr, regs);
1413                rd = (instword >> 21) & 0x1f;
1414                current->thread.dscr = regs->gpr[rd];
1415                current->thread.dscr_inherit = 1;
1416                mtspr(SPRN_DSCR, current->thread.dscr);
1417                return 0;
1418        }
1419#endif
1420
1421        return -EINVAL;
1422}
1423
1424int is_valid_bugaddr(unsigned long addr)
1425{
1426        return is_kernel_addr(addr);
1427}
1428
1429#ifdef CONFIG_MATH_EMULATION
1430static int emulate_math(struct pt_regs *regs)
1431{
1432        int ret;
1433        extern int do_mathemu(struct pt_regs *regs);
1434
1435        ret = do_mathemu(regs);
1436        if (ret >= 0)
1437                PPC_WARN_EMULATED(math, regs);
1438
1439        switch (ret) {
1440        case 0:
1441                emulate_single_step(regs);
1442                return 0;
1443        case 1: {
1444                        int code = 0;
1445                        code = __parse_fpscr(current->thread.fp_state.fpscr);
1446                        _exception(SIGFPE, regs, code, regs->nip);
1447                        return 0;
1448                }
1449        case -EFAULT:
1450                _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
1451                return 0;
1452        }
1453
1454        return -1;
1455}
1456#else
1457static inline int emulate_math(struct pt_regs *regs) { return -1; }
1458#endif
1459
1460void program_check_exception(struct pt_regs *regs)
1461{
1462        enum ctx_state prev_state = exception_enter();
1463        unsigned int reason = get_reason(regs);
1464
1465        /* We can now get here via a FP Unavailable exception if the core
1466         * has no FPU, in that case the reason flags will be 0 */
1467
1468        if (reason & REASON_FP) {
1469                /* IEEE FP exception */
1470                parse_fpe(regs);
1471                goto bail;
1472        }
1473        if (reason & REASON_TRAP) {
1474                unsigned long bugaddr;
1475                /* Debugger is first in line to stop recursive faults in
1476                 * rcu_lock, notify_die, or atomic_notifier_call_chain */
1477                if (debugger_bpt(regs))
1478                        goto bail;
1479
1480                if (kprobe_handler(regs))
1481                        goto bail;
1482
1483                /* trap exception */
1484                if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP)
1485                                == NOTIFY_STOP)
1486                        goto bail;
1487
1488                bugaddr = regs->nip;
1489                /*
1490                 * Fixup bugaddr for BUG_ON() in real mode
1491                 */
1492                if (!is_kernel_addr(bugaddr) && !(regs->msr & MSR_IR))
1493                        bugaddr += PAGE_OFFSET;
1494
1495                if (!(regs->msr & MSR_PR) &&  /* not user-mode */
1496                    report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
1497                        regs->nip += 4;
1498                        goto bail;
1499                }
1500                _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
1501                goto bail;
1502        }
1503#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1504        if (reason & REASON_TM) {
1505                /* This is a TM "Bad Thing Exception" program check.
1506                 * This occurs when:
1507                 * -  An rfid/hrfid/mtmsrd attempts to cause an illegal
1508                 *    transition in TM states.
1509                 * -  A trechkpt is attempted when transactional.
1510                 * -  A treclaim is attempted when non transactional.
1511                 * -  A tend is illegally attempted.
1512                 * -  writing a TM SPR when transactional.
1513                 *
1514                 * If usermode caused this, it's done something illegal and
1515                 * gets a SIGILL slap on the wrist.  We call it an illegal
1516                 * operand to distinguish from the instruction just being bad
1517                 * (e.g. executing a 'tend' on a CPU without TM!); it's an
1518                 * illegal /placement/ of a valid instruction.
1519                 */
1520                if (user_mode(regs)) {
1521                        _exception(SIGILL, regs, ILL_ILLOPN, regs->nip);
1522                        goto bail;
1523                } else {
1524                        printk(KERN_EMERG "Unexpected TM Bad Thing exception "
1525                               "at %lx (msr 0x%lx) tm_scratch=%llx\n",
1526                               regs->nip, regs->msr, get_paca()->tm_scratch);
1527                        die("Unrecoverable exception", regs, SIGABRT);
1528                }
1529        }
1530#endif
1531
1532        /*
1533         * If we took the program check in the kernel skip down to sending a
1534         * SIGILL. The subsequent cases all relate to emulating instructions
1535         * which we should only do for userspace. We also do not want to enable
1536         * interrupts for kernel faults because that might lead to further
1537         * faults, and loose the context of the original exception.
1538         */
1539        if (!user_mode(regs))
1540                goto sigill;
1541
1542        /* We restore the interrupt state now */
1543        if (!arch_irq_disabled_regs(regs))
1544                local_irq_enable();
1545
1546        /* (reason & REASON_ILLEGAL) would be the obvious thing here,
1547         * but there seems to be a hardware bug on the 405GP (RevD)
1548         * that means ESR is sometimes set incorrectly - either to
1549         * ESR_DST (!?) or 0.  In the process of chasing this with the
1550         * hardware people - not sure if it can happen on any illegal
1551         * instruction or only on FP instructions, whether there is a
1552         * pattern to occurrences etc. -dgibson 31/Mar/2003
1553         */
1554        if (!emulate_math(regs))
1555                goto bail;
1556
1557        /* Try to emulate it if we should. */
1558        if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) {
1559                switch (emulate_instruction(regs)) {
1560                case 0:
1561                        regs->nip += 4;
1562                        emulate_single_step(regs);
1563                        goto bail;
1564                case -EFAULT:
1565                        _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
1566                        goto bail;
1567                }
1568        }
1569
1570sigill:
1571        if (reason & REASON_PRIVILEGED)
1572                _exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
1573        else
1574                _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1575
1576bail:
1577        exception_exit(prev_state);
1578}
1579NOKPROBE_SYMBOL(program_check_exception);
1580
1581/*
1582 * This occurs when running in hypervisor mode on POWER6 or later
1583 * and an illegal instruction is encountered.
1584 */
1585void emulation_assist_interrupt(struct pt_regs *regs)
1586{
1587        regs->msr |= REASON_ILLEGAL;
1588        program_check_exception(regs);
1589}
1590NOKPROBE_SYMBOL(emulation_assist_interrupt);
1591
1592void alignment_exception(struct pt_regs *regs)
1593{
1594        enum ctx_state prev_state = exception_enter();
1595        int sig, code, fixed = 0;
1596
1597        /* We restore the interrupt state now */
1598        if (!arch_irq_disabled_regs(regs))
1599                local_irq_enable();
1600
1601        if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT))
1602                goto bail;
1603
1604        /* we don't implement logging of alignment exceptions */
1605        if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS))
1606                fixed = fix_alignment(regs);
1607
1608        if (fixed == 1) {
1609                regs->nip += 4; /* skip over emulated instruction */
1610                emulate_single_step(regs);
1611                goto bail;
1612        }
1613
1614        /* Operand address was bad */
1615        if (fixed == -EFAULT) {
1616                sig = SIGSEGV;
1617                code = SEGV_ACCERR;
1618        } else {
1619                sig = SIGBUS;
1620                code = BUS_ADRALN;
1621        }
1622        if (user_mode(regs))
1623                _exception(sig, regs, code, regs->dar);
1624        else
1625                bad_page_fault(regs, regs->dar, sig);
1626
1627bail:
1628        exception_exit(prev_state);
1629}
1630
1631void StackOverflow(struct pt_regs *regs)
1632{
1633        pr_crit("Kernel stack overflow in process %s[%d], r1=%lx\n",
1634                current->comm, task_pid_nr(current), regs->gpr[1]);
1635        debugger(regs);
1636        show_regs(regs);
1637        panic("kernel stack overflow");
1638}
1639
1640void kernel_fp_unavailable_exception(struct pt_regs *regs)
1641{
1642        enum ctx_state prev_state = exception_enter();
1643
1644        printk(KERN_EMERG "Unrecoverable FP Unavailable Exception "
1645                          "%lx at %lx\n", regs->trap, regs->nip);
1646        die("Unrecoverable FP Unavailable Exception", regs, SIGABRT);
1647
1648        exception_exit(prev_state);
1649}
1650
1651void altivec_unavailable_exception(struct pt_regs *regs)
1652{
1653        enum ctx_state prev_state = exception_enter();
1654
1655        if (user_mode(regs)) {
1656                /* A user program has executed an altivec instruction,
1657                   but this kernel doesn't support altivec. */
1658                _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1659                goto bail;
1660        }
1661
1662        printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception "
1663                        "%lx at %lx\n", regs->trap, regs->nip);
1664        die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT);
1665
1666bail:
1667        exception_exit(prev_state);
1668}
1669
1670void vsx_unavailable_exception(struct pt_regs *regs)
1671{
1672        if (user_mode(regs)) {
1673                /* A user program has executed an vsx instruction,
1674                   but this kernel doesn't support vsx. */
1675                _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1676                return;
1677        }
1678
1679        printk(KERN_EMERG "Unrecoverable VSX Unavailable Exception "
1680                        "%lx at %lx\n", regs->trap, regs->nip);
1681        die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT);
1682}
1683
1684#ifdef CONFIG_PPC64
1685static void tm_unavailable(struct pt_regs *regs)
1686{
1687#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1688        if (user_mode(regs)) {
1689                current->thread.load_tm++;
1690                regs->msr |= MSR_TM;
1691                tm_enable();
1692                tm_restore_sprs(&current->thread);
1693                return;
1694        }
1695#endif
1696        pr_emerg("Unrecoverable TM Unavailable Exception "
1697                        "%lx at %lx\n", regs->trap, regs->nip);
1698        die("Unrecoverable TM Unavailable Exception", regs, SIGABRT);
1699}
1700
1701void facility_unavailable_exception(struct pt_regs *regs)
1702{
1703        static char *facility_strings[] = {
1704                [FSCR_FP_LG] = "FPU",
1705                [FSCR_VECVSX_LG] = "VMX/VSX",
1706                [FSCR_DSCR_LG] = "DSCR",
1707                [FSCR_PM_LG] = "PMU SPRs",
1708                [FSCR_BHRB_LG] = "BHRB",
1709                [FSCR_TM_LG] = "TM",
1710                [FSCR_EBB_LG] = "EBB",
1711                [FSCR_TAR_LG] = "TAR",
1712                [FSCR_MSGP_LG] = "MSGP",
1713                [FSCR_SCV_LG] = "SCV",
1714        };
1715        char *facility = "unknown";
1716        u64 value;
1717        u32 instword, rd;
1718        u8 status;
1719        bool hv;
1720
1721        hv = (TRAP(regs) == 0xf80);
1722        if (hv)
1723                value = mfspr(SPRN_HFSCR);
1724        else
1725                value = mfspr(SPRN_FSCR);
1726
1727        status = value >> 56;
1728        if ((hv || status >= 2) &&
1729            (status < ARRAY_SIZE(facility_strings)) &&
1730            facility_strings[status])
1731                facility = facility_strings[status];
1732
1733        /* We should not have taken this interrupt in kernel */
1734        if (!user_mode(regs)) {
1735                pr_emerg("Facility '%s' unavailable (%d) exception in kernel mode at %lx\n",
1736                         facility, status, regs->nip);
1737                die("Unexpected facility unavailable exception", regs, SIGABRT);
1738        }
1739
1740        /* We restore the interrupt state now */
1741        if (!arch_irq_disabled_regs(regs))
1742                local_irq_enable();
1743
1744        if (status == FSCR_DSCR_LG) {
1745                /*
1746                 * User is accessing the DSCR register using the problem
1747                 * state only SPR number (0x03) either through a mfspr or
1748                 * a mtspr instruction. If it is a write attempt through
1749                 * a mtspr, then we set the inherit bit. This also allows
1750                 * the user to write or read the register directly in the
1751                 * future by setting via the FSCR DSCR bit. But in case it
1752                 * is a read DSCR attempt through a mfspr instruction, we
1753                 * just emulate the instruction instead. This code path will
1754                 * always emulate all the mfspr instructions till the user
1755                 * has attempted at least one mtspr instruction. This way it
1756                 * preserves the same behaviour when the user is accessing
1757                 * the DSCR through privilege level only SPR number (0x11)
1758                 * which is emulated through illegal instruction exception.
1759                 * We always leave HFSCR DSCR set.
1760                 */
1761                if (get_user(instword, (u32 __user *)(regs->nip))) {
1762                        pr_err("Failed to fetch the user instruction\n");
1763                        return;
1764                }
1765
1766                /* Write into DSCR (mtspr 0x03, RS) */
1767                if ((instword & PPC_INST_MTSPR_DSCR_USER_MASK)
1768                                == PPC_INST_MTSPR_DSCR_USER) {
1769                        rd = (instword >> 21) & 0x1f;
1770                        current->thread.dscr = regs->gpr[rd];
1771                        current->thread.dscr_inherit = 1;
1772                        current->thread.fscr |= FSCR_DSCR;
1773                        mtspr(SPRN_FSCR, current->thread.fscr);
1774                }
1775
1776                /* Read from DSCR (mfspr RT, 0x03) */
1777                if ((instword & PPC_INST_MFSPR_DSCR_USER_MASK)
1778                                == PPC_INST_MFSPR_DSCR_USER) {
1779                        if (emulate_instruction(regs)) {
1780                                pr_err("DSCR based mfspr emulation failed\n");
1781                                return;
1782                        }
1783                        regs->nip += 4;
1784                        emulate_single_step(regs);
1785                }
1786                return;
1787        }
1788
1789        if (status == FSCR_TM_LG) {
1790                /*
1791                 * If we're here then the hardware is TM aware because it
1792                 * generated an exception with FSRM_TM set.
1793                 *
1794                 * If cpu_has_feature(CPU_FTR_TM) is false, then either firmware
1795                 * told us not to do TM, or the kernel is not built with TM
1796                 * support.
1797                 *
1798                 * If both of those things are true, then userspace can spam the
1799                 * console by triggering the printk() below just by continually
1800                 * doing tbegin (or any TM instruction). So in that case just
1801                 * send the process a SIGILL immediately.
1802                 */
1803                if (!cpu_has_feature(CPU_FTR_TM))
1804                        goto out;
1805
1806                tm_unavailable(regs);
1807                return;
1808        }
1809
1810        pr_err_ratelimited("%sFacility '%s' unavailable (%d), exception at 0x%lx, MSR=%lx\n",
1811                hv ? "Hypervisor " : "", facility, status, regs->nip, regs->msr);
1812
1813out:
1814        _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1815}
1816#endif
1817
1818#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1819
1820void fp_unavailable_tm(struct pt_regs *regs)
1821{
1822        /* Note:  This does not handle any kind of FP laziness. */
1823
1824        TM_DEBUG("FP Unavailable trap whilst transactional at 0x%lx, MSR=%lx\n",
1825                 regs->nip, regs->msr);
1826
1827        /* We can only have got here if the task started using FP after
1828         * beginning the transaction.  So, the transactional regs are just a
1829         * copy of the checkpointed ones.  But, we still need to recheckpoint
1830         * as we're enabling FP for the process; it will return, abort the
1831         * transaction, and probably retry but now with FP enabled.  So the
1832         * checkpointed FP registers need to be loaded.
1833         */
1834        tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1835
1836        /*
1837         * Reclaim initially saved out bogus (lazy) FPRs to ckfp_state, and
1838         * then it was overwrite by the thr->fp_state by tm_reclaim_thread().
1839         *
1840         * At this point, ck{fp,vr}_state contains the exact values we want to
1841         * recheckpoint.
1842         */
1843
1844        /* Enable FP for the task: */
1845        current->thread.load_fp = 1;
1846
1847        /*
1848         * Recheckpoint all the checkpointed ckpt, ck{fp, vr}_state registers.
1849         */
1850        tm_recheckpoint(&current->thread);
1851}
1852
1853void altivec_unavailable_tm(struct pt_regs *regs)
1854{
1855        /* See the comments in fp_unavailable_tm().  This function operates
1856         * the same way.
1857         */
1858
1859        TM_DEBUG("Vector Unavailable trap whilst transactional at 0x%lx,"
1860                 "MSR=%lx\n",
1861                 regs->nip, regs->msr);
1862        tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1863        current->thread.load_vec = 1;
1864        tm_recheckpoint(&current->thread);
1865        current->thread.used_vr = 1;
1866}
1867
1868void vsx_unavailable_tm(struct pt_regs *regs)
1869{
1870        /* See the comments in fp_unavailable_tm().  This works similarly,
1871         * though we're loading both FP and VEC registers in here.
1872         *
1873         * If FP isn't in use, load FP regs.  If VEC isn't in use, load VEC
1874         * regs.  Either way, set MSR_VSX.
1875         */
1876
1877        TM_DEBUG("VSX Unavailable trap whilst transactional at 0x%lx,"
1878                 "MSR=%lx\n",
1879                 regs->nip, regs->msr);
1880
1881        current->thread.used_vsr = 1;
1882
1883        /* This reclaims FP and/or VR regs if they're already enabled */
1884        tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1885
1886        current->thread.load_vec = 1;
1887        current->thread.load_fp = 1;
1888
1889        tm_recheckpoint(&current->thread);
1890}
1891#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
1892
1893void performance_monitor_exception(struct pt_regs *regs)
1894{
1895        __this_cpu_inc(irq_stat.pmu_irqs);
1896
1897        perf_irq(regs);
1898}
1899
1900#ifdef CONFIG_PPC_ADV_DEBUG_REGS
1901static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
1902{
1903        int changed = 0;
1904        /*
1905         * Determine the cause of the debug event, clear the
1906         * event flags and send a trap to the handler. Torez
1907         */
1908        if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) {
1909                dbcr_dac(current) &= ~(DBCR_DAC1R | DBCR_DAC1W);
1910#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
1911                current->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
1912#endif
1913                do_send_trap(regs, mfspr(SPRN_DAC1), debug_status,
1914                             5);
1915                changed |= 0x01;
1916        }  else if (debug_status & (DBSR_DAC2R | DBSR_DAC2W)) {
1917                dbcr_dac(current) &= ~(DBCR_DAC2R | DBCR_DAC2W);
1918                do_send_trap(regs, mfspr(SPRN_DAC2), debug_status,
1919                             6);
1920                changed |= 0x01;
1921        }  else if (debug_status & DBSR_IAC1) {
1922                current->thread.debug.dbcr0 &= ~DBCR0_IAC1;
1923                dbcr_iac_range(current) &= ~DBCR_IAC12MODE;
1924                do_send_trap(regs, mfspr(SPRN_IAC1), debug_status,
1925                             1);
1926                changed |= 0x01;
1927        }  else if (debug_status & DBSR_IAC2) {
1928                current->thread.debug.dbcr0 &= ~DBCR0_IAC2;
1929                do_send_trap(regs, mfspr(SPRN_IAC2), debug_status,
1930                             2);
1931                changed |= 0x01;
1932        }  else if (debug_status & DBSR_IAC3) {
1933                current->thread.debug.dbcr0 &= ~DBCR0_IAC3;
1934                dbcr_iac_range(current) &= ~DBCR_IAC34MODE;
1935                do_send_trap(regs, mfspr(SPRN_IAC3), debug_status,
1936                             3);
1937                changed |= 0x01;
1938        }  else if (debug_status & DBSR_IAC4) {
1939                current->thread.debug.dbcr0 &= ~DBCR0_IAC4;
1940                do_send_trap(regs, mfspr(SPRN_IAC4), debug_status,
1941                             4);
1942                changed |= 0x01;
1943        }
1944        /*
1945         * At the point this routine was called, the MSR(DE) was turned off.
1946         * Check all other debug flags and see if that bit needs to be turned
1947         * back on or not.
1948         */
1949        if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
1950                               current->thread.debug.dbcr1))
1951                regs->msr |= MSR_DE;
1952        else
1953                /* Make sure the IDM flag is off */
1954                current->thread.debug.dbcr0 &= ~DBCR0_IDM;
1955
1956        if (changed & 0x01)
1957                mtspr(SPRN_DBCR0, current->thread.debug.dbcr0);
1958}
1959
1960void DebugException(struct pt_regs *regs, unsigned long debug_status)
1961{
1962        current->thread.debug.dbsr = debug_status;
1963
1964        /* Hack alert: On BookE, Branch Taken stops on the branch itself, while
1965         * on server, it stops on the target of the branch. In order to simulate
1966         * the server behaviour, we thus restart right away with a single step
1967         * instead of stopping here when hitting a BT
1968         */
1969        if (debug_status & DBSR_BT) {
1970                regs->msr &= ~MSR_DE;
1971
1972                /* Disable BT */
1973                mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_BT);
1974                /* Clear the BT event */
1975                mtspr(SPRN_DBSR, DBSR_BT);
1976
1977                /* Do the single step trick only when coming from userspace */
1978                if (user_mode(regs)) {
1979                        current->thread.debug.dbcr0 &= ~DBCR0_BT;
1980                        current->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
1981                        regs->msr |= MSR_DE;
1982                        return;
1983                }
1984
1985                if (kprobe_post_handler(regs))
1986                        return;
1987
1988                if (notify_die(DIE_SSTEP, "block_step", regs, 5,
1989                               5, SIGTRAP) == NOTIFY_STOP) {
1990                        return;
1991                }
1992                if (debugger_sstep(regs))
1993                        return;
1994        } else if (debug_status & DBSR_IC) {    /* Instruction complete */
1995                regs->msr &= ~MSR_DE;
1996
1997                /* Disable instruction completion */
1998                mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC);
1999                /* Clear the instruction completion event */
2000                mtspr(SPRN_DBSR, DBSR_IC);
2001
2002                if (kprobe_post_handler(regs))
2003                        return;
2004
2005                if (notify_die(DIE_SSTEP, "single_step", regs, 5,
2006                               5, SIGTRAP) == NOTIFY_STOP) {
2007                        return;
2008                }
2009
2010                if (debugger_sstep(regs))
2011                        return;
2012
2013                if (user_mode(regs)) {
2014                        current->thread.debug.dbcr0 &= ~DBCR0_IC;
2015                        if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
2016                                               current->thread.debug.dbcr1))
2017                                regs->msr |= MSR_DE;
2018                        else
2019                                /* Make sure the IDM bit is off */
2020                                current->thread.debug.dbcr0 &= ~DBCR0_IDM;
2021                }
2022
2023                _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
2024        } else
2025                handle_debug(regs, debug_status);
2026}
2027NOKPROBE_SYMBOL(DebugException);
2028#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
2029
2030#if !defined(CONFIG_TAU_INT)
2031void TAUException(struct pt_regs *regs)
2032{
2033        printk("TAU trap at PC: %lx, MSR: %lx, vector=%lx    %s\n",
2034               regs->nip, regs->msr, regs->trap, print_tainted());
2035}
2036#endif /* CONFIG_INT_TAU */
2037
2038#ifdef CONFIG_ALTIVEC
2039void altivec_assist_exception(struct pt_regs *regs)
2040{
2041        int err;
2042
2043        if (!user_mode(regs)) {
2044                printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode"
2045                       " at %lx\n", regs->nip);
2046                die("Kernel VMX/Altivec assist exception", regs, SIGILL);
2047        }
2048
2049        flush_altivec_to_thread(current);
2050
2051        PPC_WARN_EMULATED(altivec, regs);
2052        err = emulate_altivec(regs);
2053        if (err == 0) {
2054                regs->nip += 4;         /* skip emulated instruction */
2055                emulate_single_step(regs);
2056                return;
2057        }
2058
2059        if (err == -EFAULT) {
2060                /* got an error reading the instruction */
2061                _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip);
2062        } else {
2063                /* didn't recognize the instruction */
2064                /* XXX quick hack for now: set the non-Java bit in the VSCR */
2065                printk_ratelimited(KERN_ERR "Unrecognized altivec instruction "
2066                                   "in %s at %lx\n", current->comm, regs->nip);
2067                current->thread.vr_state.vscr.u[3] |= 0x10000;
2068        }
2069}
2070#endif /* CONFIG_ALTIVEC */
2071
2072#ifdef CONFIG_FSL_BOOKE
2073void CacheLockingException(struct pt_regs *regs, unsigned long address,
2074                           unsigned long error_code)
2075{
2076        /* We treat cache locking instructions from the user
2077         * as priv ops, in the future we could try to do
2078         * something smarter
2079         */
2080        if (error_code & (ESR_DLK|ESR_ILK))
2081                _exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
2082        return;
2083}
2084#endif /* CONFIG_FSL_BOOKE */
2085
2086#ifdef CONFIG_SPE
2087void SPEFloatingPointException(struct pt_regs *regs)
2088{
2089        extern int do_spe_mathemu(struct pt_regs *regs);
2090        unsigned long spefscr;
2091        int fpexc_mode;
2092        int code = FPE_FLTUNK;
2093        int err;
2094
2095        /* We restore the interrupt state now */
2096        if (!arch_irq_disabled_regs(regs))
2097                local_irq_enable();
2098
2099        flush_spe_to_thread(current);
2100
2101        spefscr = current->thread.spefscr;
2102        fpexc_mode = current->thread.fpexc_mode;
2103
2104        if ((spefscr & SPEFSCR_FOVF) && (fpexc_mode & PR_FP_EXC_OVF)) {
2105                code = FPE_FLTOVF;
2106        }
2107        else if ((spefscr & SPEFSCR_FUNF) && (fpexc_mode & PR_FP_EXC_UND)) {
2108                code = FPE_FLTUND;
2109        }
2110        else if ((spefscr & SPEFSCR_FDBZ) && (fpexc_mode & PR_FP_EXC_DIV))
2111                code = FPE_FLTDIV;
2112        else if ((spefscr & SPEFSCR_FINV) && (fpexc_mode & PR_FP_EXC_INV)) {
2113                code = FPE_FLTINV;
2114        }
2115        else if ((spefscr & (SPEFSCR_FG | SPEFSCR_FX)) && (fpexc_mode & PR_FP_EXC_RES))
2116                code = FPE_FLTRES;
2117
2118        err = do_spe_mathemu(regs);
2119        if (err == 0) {
2120                regs->nip += 4;         /* skip emulated instruction */
2121                emulate_single_step(regs);
2122                return;
2123        }
2124
2125        if (err == -EFAULT) {
2126                /* got an error reading the instruction */
2127                _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip);
2128        } else if (err == -EINVAL) {
2129                /* didn't recognize the instruction */
2130                printk(KERN_ERR "unrecognized spe instruction "
2131                       "in %s at %lx\n", current->comm, regs->nip);
2132        } else {
2133                _exception(SIGFPE, regs, code, regs->nip);
2134        }
2135
2136        return;
2137}
2138
2139void SPEFloatingPointRoundException(struct pt_regs *regs)
2140{
2141        extern int speround_handler(struct pt_regs *regs);
2142        int err;
2143
2144        /* We restore the interrupt state now */
2145        if (!arch_irq_disabled_regs(regs))
2146                local_irq_enable();
2147
2148        preempt_disable();
2149        if (regs->msr & MSR_SPE)
2150                giveup_spe(current);
2151        preempt_enable();
2152
2153        regs->nip -= 4;
2154        err = speround_handler(regs);
2155        if (err == 0) {
2156                regs->nip += 4;         /* skip emulated instruction */
2157                emulate_single_step(regs);
2158                return;
2159        }
2160
2161        if (err == -EFAULT) {
2162                /* got an error reading the instruction */
2163                _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip);
2164        } else if (err == -EINVAL) {
2165                /* didn't recognize the instruction */
2166                printk(KERN_ERR "unrecognized spe instruction "
2167                       "in %s at %lx\n", current->comm, regs->nip);
2168        } else {
2169                _exception(SIGFPE, regs, FPE_FLTUNK, regs->nip);
2170                return;
2171        }
2172}
2173#endif
2174
2175/*
2176 * We enter here if we get an unrecoverable exception, that is, one
2177 * that happened at a point where the RI (recoverable interrupt) bit
2178 * in the MSR is 0.  This indicates that SRR0/1 are live, and that
2179 * we therefore lost state by taking this exception.
2180 */
2181void unrecoverable_exception(struct pt_regs *regs)
2182{
2183        pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n",
2184                 regs->trap, regs->nip, regs->msr);
2185        die("Unrecoverable exception", regs, SIGABRT);
2186}
2187NOKPROBE_SYMBOL(unrecoverable_exception);
2188
2189#if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x)
2190/*
2191 * Default handler for a Watchdog exception,
2192 * spins until a reboot occurs
2193 */
2194void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs)
2195{
2196        /* Generic WatchdogHandler, implement your own */
2197        mtspr(SPRN_TCR, mfspr(SPRN_TCR)&(~TCR_WIE));
2198        return;
2199}
2200
2201void WatchdogException(struct pt_regs *regs)
2202{
2203        printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n");
2204        WatchdogHandler(regs);
2205}
2206#endif
2207
2208/*
2209 * We enter here if we discover during exception entry that we are
2210 * running in supervisor mode with a userspace value in the stack pointer.
2211 */
2212void kernel_bad_stack(struct pt_regs *regs)
2213{
2214        printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n",
2215               regs->gpr[1], regs->nip);
2216        die("Bad kernel stack pointer", regs, SIGABRT);
2217}
2218NOKPROBE_SYMBOL(kernel_bad_stack);
2219
2220void __init trap_init(void)
2221{
2222}
2223
2224
2225#ifdef CONFIG_PPC_EMULATED_STATS
2226
2227#define WARN_EMULATED_SETUP(type)       .type = { .name = #type }
2228
2229struct ppc_emulated ppc_emulated = {
2230#ifdef CONFIG_ALTIVEC
2231        WARN_EMULATED_SETUP(altivec),
2232#endif
2233        WARN_EMULATED_SETUP(dcba),
2234        WARN_EMULATED_SETUP(dcbz),
2235        WARN_EMULATED_SETUP(fp_pair),
2236        WARN_EMULATED_SETUP(isel),
2237        WARN_EMULATED_SETUP(mcrxr),
2238        WARN_EMULATED_SETUP(mfpvr),
2239        WARN_EMULATED_SETUP(multiple),
2240        WARN_EMULATED_SETUP(popcntb),
2241        WARN_EMULATED_SETUP(spe),
2242        WARN_EMULATED_SETUP(string),
2243        WARN_EMULATED_SETUP(sync),
2244        WARN_EMULATED_SETUP(unaligned),
2245#ifdef CONFIG_MATH_EMULATION
2246        WARN_EMULATED_SETUP(math),
2247#endif
2248#ifdef CONFIG_VSX
2249        WARN_EMULATED_SETUP(vsx),
2250#endif
2251#ifdef CONFIG_PPC64
2252        WARN_EMULATED_SETUP(mfdscr),
2253        WARN_EMULATED_SETUP(mtdscr),
2254        WARN_EMULATED_SETUP(lq_stq),
2255        WARN_EMULATED_SETUP(lxvw4x),
2256        WARN_EMULATED_SETUP(lxvh8x),
2257        WARN_EMULATED_SETUP(lxvd2x),
2258        WARN_EMULATED_SETUP(lxvb16x),
2259#endif
2260};
2261
2262u32 ppc_warn_emulated;
2263
2264void ppc_warn_emulated_print(const char *type)
2265{
2266        pr_warn_ratelimited("%s used emulated %s instruction\n", current->comm,
2267                            type);
2268}
2269
2270static int __init ppc_warn_emulated_init(void)
2271{
2272        struct dentry *dir, *d;
2273        unsigned int i;
2274        struct ppc_emulated_entry *entries = (void *)&ppc_emulated;
2275
2276        if (!powerpc_debugfs_root)
2277                return -ENODEV;
2278
2279        dir = debugfs_create_dir("emulated_instructions",
2280                                 powerpc_debugfs_root);
2281        if (!dir)
2282                return -ENOMEM;
2283
2284        d = debugfs_create_u32("do_warn", 0644, dir,
2285                               &ppc_warn_emulated);
2286        if (!d)
2287                goto fail;
2288
2289        for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++) {
2290                d = debugfs_create_u32(entries[i].name, 0644, dir,
2291                                       (u32 *)&entries[i].val.counter);
2292                if (!d)
2293                        goto fail;
2294        }
2295
2296        return 0;
2297
2298fail:
2299        debugfs_remove_recursive(dir);
2300        return -ENOMEM;
2301}
2302
2303device_initcall(ppc_warn_emulated_init);
2304
2305#endif /* CONFIG_PPC_EMULATED_STATS */
2306