linux/arch/powerpc/kernel/traps.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *  Copyright (C) 1995-1996  Gary Thomas (gdt@linuxppc.org)
   4 *  Copyright 2007-2010 Freescale Semiconductor, Inc.
   5 *
   6 *  Modified by Cort Dougan (cort@cs.nmt.edu)
   7 *  and Paul Mackerras (paulus@samba.org)
   8 */
   9
  10/*
  11 * This file handles the architecture-dependent parts of hardware exceptions
  12 */
  13
  14#include <linux/errno.h>
  15#include <linux/sched.h>
  16#include <linux/sched/debug.h>
  17#include <linux/kernel.h>
  18#include <linux/mm.h>
  19#include <linux/pkeys.h>
  20#include <linux/stddef.h>
  21#include <linux/unistd.h>
  22#include <linux/ptrace.h>
  23#include <linux/user.h>
  24#include <linux/interrupt.h>
  25#include <linux/init.h>
  26#include <linux/extable.h>
  27#include <linux/module.h>       /* print_modules */
  28#include <linux/prctl.h>
  29#include <linux/delay.h>
  30#include <linux/kprobes.h>
  31#include <linux/kexec.h>
  32#include <linux/backlight.h>
  33#include <linux/bug.h>
  34#include <linux/kdebug.h>
  35#include <linux/ratelimit.h>
  36#include <linux/context_tracking.h>
  37#include <linux/smp.h>
  38#include <linux/console.h>
  39#include <linux/kmsg_dump.h>
  40
  41#include <asm/emulated_ops.h>
  42#include <asm/pgtable.h>
  43#include <linux/uaccess.h>
  44#include <asm/debugfs.h>
  45#include <asm/io.h>
  46#include <asm/machdep.h>
  47#include <asm/rtas.h>
  48#include <asm/pmc.h>
  49#include <asm/reg.h>
  50#ifdef CONFIG_PMAC_BACKLIGHT
  51#include <asm/backlight.h>
  52#endif
  53#ifdef CONFIG_PPC64
  54#include <asm/firmware.h>
  55#include <asm/processor.h>
  56#include <asm/tm.h>
  57#endif
  58#include <asm/kexec.h>
  59#include <asm/ppc-opcode.h>
  60#include <asm/rio.h>
  61#include <asm/fadump.h>
  62#include <asm/switch_to.h>
  63#include <asm/tm.h>
  64#include <asm/debug.h>
  65#include <asm/asm-prototypes.h>
  66#include <asm/hmi.h>
  67#include <sysdev/fsl_pci.h>
  68#include <asm/kprobes.h>
  69#include <asm/stacktrace.h>
  70#include <asm/nmi.h>
  71
  72#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
  73int (*__debugger)(struct pt_regs *regs) __read_mostly;
  74int (*__debugger_ipi)(struct pt_regs *regs) __read_mostly;
  75int (*__debugger_bpt)(struct pt_regs *regs) __read_mostly;
  76int (*__debugger_sstep)(struct pt_regs *regs) __read_mostly;
  77int (*__debugger_iabr_match)(struct pt_regs *regs) __read_mostly;
  78int (*__debugger_break_match)(struct pt_regs *regs) __read_mostly;
  79int (*__debugger_fault_handler)(struct pt_regs *regs) __read_mostly;
  80
  81EXPORT_SYMBOL(__debugger);
  82EXPORT_SYMBOL(__debugger_ipi);
  83EXPORT_SYMBOL(__debugger_bpt);
  84EXPORT_SYMBOL(__debugger_sstep);
  85EXPORT_SYMBOL(__debugger_iabr_match);
  86EXPORT_SYMBOL(__debugger_break_match);
  87EXPORT_SYMBOL(__debugger_fault_handler);
  88#endif
  89
  90/* Transactional Memory trap debug */
  91#ifdef TM_DEBUG_SW
  92#define TM_DEBUG(x...) printk(KERN_INFO x)
  93#else
  94#define TM_DEBUG(x...) do { } while(0)
  95#endif
  96
  97static const char *signame(int signr)
  98{
  99        switch (signr) {
 100        case SIGBUS:    return "bus error";
 101        case SIGFPE:    return "floating point exception";
 102        case SIGILL:    return "illegal instruction";
 103        case SIGSEGV:   return "segfault";
 104        case SIGTRAP:   return "unhandled trap";
 105        }
 106
 107        return "unknown signal";
 108}
 109
 110/*
 111 * Trap & Exception support
 112 */
 113
 114#ifdef CONFIG_PMAC_BACKLIGHT
 115static void pmac_backlight_unblank(void)
 116{
 117        mutex_lock(&pmac_backlight_mutex);
 118        if (pmac_backlight) {
 119                struct backlight_properties *props;
 120
 121                props = &pmac_backlight->props;
 122                props->brightness = props->max_brightness;
 123                props->power = FB_BLANK_UNBLANK;
 124                backlight_update_status(pmac_backlight);
 125        }
 126        mutex_unlock(&pmac_backlight_mutex);
 127}
 128#else
 129static inline void pmac_backlight_unblank(void) { }
 130#endif
 131
 132/*
 133 * If oops/die is expected to crash the machine, return true here.
 134 *
 135 * This should not be expected to be 100% accurate, there may be
 136 * notifiers registered or other unexpected conditions that may bring
 137 * down the kernel. Or if the current process in the kernel is holding
 138 * locks or has other critical state, the kernel may become effectively
 139 * unusable anyway.
 140 */
 141bool die_will_crash(void)
 142{
 143        if (should_fadump_crash())
 144                return true;
 145        if (kexec_should_crash(current))
 146                return true;
 147        if (in_interrupt() || panic_on_oops ||
 148                        !current->pid || is_global_init(current))
 149                return true;
 150
 151        return false;
 152}
 153
 154static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 155static int die_owner = -1;
 156static unsigned int die_nest_count;
 157static int die_counter;
 158
 159extern void panic_flush_kmsg_start(void)
 160{
 161        /*
 162         * These are mostly taken from kernel/panic.c, but tries to do
 163         * relatively minimal work. Don't use delay functions (TB may
 164         * be broken), don't crash dump (need to set a firmware log),
 165         * don't run notifiers. We do want to get some information to
 166         * Linux console.
 167         */
 168        console_verbose();
 169        bust_spinlocks(1);
 170}
 171
 172extern void panic_flush_kmsg_end(void)
 173{
 174        printk_safe_flush_on_panic();
 175        kmsg_dump(KMSG_DUMP_PANIC);
 176        bust_spinlocks(0);
 177        debug_locks_off();
 178        console_flush_on_panic(CONSOLE_FLUSH_PENDING);
 179}
 180
 181static unsigned long oops_begin(struct pt_regs *regs)
 182{
 183        int cpu;
 184        unsigned long flags;
 185
 186        oops_enter();
 187
 188        /* racy, but better than risking deadlock. */
 189        raw_local_irq_save(flags);
 190        cpu = smp_processor_id();
 191        if (!arch_spin_trylock(&die_lock)) {
 192                if (cpu == die_owner)
 193                        /* nested oops. should stop eventually */;
 194                else
 195                        arch_spin_lock(&die_lock);
 196        }
 197        die_nest_count++;
 198        die_owner = cpu;
 199        console_verbose();
 200        bust_spinlocks(1);
 201        if (machine_is(powermac))
 202                pmac_backlight_unblank();
 203        return flags;
 204}
 205NOKPROBE_SYMBOL(oops_begin);
 206
 207static void oops_end(unsigned long flags, struct pt_regs *regs,
 208                               int signr)
 209{
 210        bust_spinlocks(0);
 211        add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 212        die_nest_count--;
 213        oops_exit();
 214        printk("\n");
 215        if (!die_nest_count) {
 216                /* Nest count reaches zero, release the lock. */
 217                die_owner = -1;
 218                arch_spin_unlock(&die_lock);
 219        }
 220        raw_local_irq_restore(flags);
 221
 222        /*
 223         * system_reset_excption handles debugger, crash dump, panic, for 0x100
 224         */
 225        if (TRAP(regs) == 0x100)
 226                return;
 227
 228        crash_fadump(regs, "die oops");
 229
 230        if (kexec_should_crash(current))
 231                crash_kexec(regs);
 232
 233        if (!signr)
 234                return;
 235
 236        /*
 237         * While our oops output is serialised by a spinlock, output
 238         * from panic() called below can race and corrupt it. If we
 239         * know we are going to panic, delay for 1 second so we have a
 240         * chance to get clean backtraces from all CPUs that are oopsing.
 241         */
 242        if (in_interrupt() || panic_on_oops || !current->pid ||
 243            is_global_init(current)) {
 244                mdelay(MSEC_PER_SEC);
 245        }
 246
 247        if (panic_on_oops)
 248                panic("Fatal exception");
 249        do_exit(signr);
 250}
 251NOKPROBE_SYMBOL(oops_end);
 252
 253static int __die(const char *str, struct pt_regs *regs, long err)
 254{
 255        printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
 256
 257        printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s%s %s\n",
 258               IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
 259               PAGE_SIZE / 1024,
 260               early_radix_enabled() ? " MMU=Radix" : "",
 261               early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ? " MMU=Hash" : "",
 262               IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
 263               IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
 264               IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
 265               debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
 266               IS_ENABLED(CONFIG_NUMA) ? " NUMA" : "",
 267               ppc_md.name ? ppc_md.name : "");
 268
 269        if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP)
 270                return 1;
 271
 272        print_modules();
 273        show_regs(regs);
 274
 275        return 0;
 276}
 277NOKPROBE_SYMBOL(__die);
 278
 279void die(const char *str, struct pt_regs *regs, long err)
 280{
 281        unsigned long flags;
 282
 283        /*
 284         * system_reset_excption handles debugger, crash dump, panic, for 0x100
 285         */
 286        if (TRAP(regs) != 0x100) {
 287                if (debugger(regs))
 288                        return;
 289        }
 290
 291        flags = oops_begin(regs);
 292        if (__die(str, regs, err))
 293                err = 0;
 294        oops_end(flags, regs, err);
 295}
 296NOKPROBE_SYMBOL(die);
 297
 298void user_single_step_report(struct pt_regs *regs)
 299{
 300        force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)regs->nip);
 301}
 302
 303static void show_signal_msg(int signr, struct pt_regs *regs, int code,
 304                            unsigned long addr)
 305{
 306        static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
 307                                      DEFAULT_RATELIMIT_BURST);
 308
 309        if (!show_unhandled_signals)
 310                return;
 311
 312        if (!unhandled_signal(current, signr))
 313                return;
 314
 315        if (!__ratelimit(&rs))
 316                return;
 317
 318        pr_info("%s[%d]: %s (%d) at %lx nip %lx lr %lx code %x",
 319                current->comm, current->pid, signame(signr), signr,
 320                addr, regs->nip, regs->link, code);
 321
 322        print_vma_addr(KERN_CONT " in ", regs->nip);
 323
 324        pr_cont("\n");
 325
 326        show_user_instructions(regs);
 327}
 328
 329static bool exception_common(int signr, struct pt_regs *regs, int code,
 330                              unsigned long addr)
 331{
 332        if (!user_mode(regs)) {
 333                die("Exception in kernel mode", regs, signr);
 334                return false;
 335        }
 336
 337        show_signal_msg(signr, regs, code, addr);
 338
 339        if (arch_irqs_disabled() && !arch_irq_disabled_regs(regs))
 340                local_irq_enable();
 341
 342        current->thread.trap_nr = code;
 343
 344        /*
 345         * Save all the pkey registers AMR/IAMR/UAMOR. Eg: Core dumps need
 346         * to capture the content, if the task gets killed.
 347         */
 348        thread_pkey_regs_save(&current->thread);
 349
 350        return true;
 351}
 352
 353void _exception_pkey(struct pt_regs *regs, unsigned long addr, int key)
 354{
 355        if (!exception_common(SIGSEGV, regs, SEGV_PKUERR, addr))
 356                return;
 357
 358        force_sig_pkuerr((void __user *) addr, key);
 359}
 360
 361void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
 362{
 363        if (!exception_common(signr, regs, code, addr))
 364                return;
 365
 366        force_sig_fault(signr, code, (void __user *)addr);
 367}
 368
 369/*
 370 * The interrupt architecture has a quirk in that the HV interrupts excluding
 371 * the NMIs (0x100 and 0x200) do not clear MSR[RI] at entry. The first thing
 372 * that an interrupt handler must do is save off a GPR into a scratch register,
 373 * and all interrupts on POWERNV (HV=1) use the HSPRG1 register as scratch.
 374 * Therefore an NMI can clobber an HV interrupt's live HSPRG1 without noticing
 375 * that it is non-reentrant, which leads to random data corruption.
 376 *
 377 * The solution is for NMI interrupts in HV mode to check if they originated
 378 * from these critical HV interrupt regions. If so, then mark them not
 379 * recoverable.
 380 *
 381 * An alternative would be for HV NMIs to use SPRG for scratch to avoid the
 382 * HSPRG1 clobber, however this would cause guest SPRG to be clobbered. Linux
 383 * guests should always have MSR[RI]=0 when its scratch SPRG is in use, so
 384 * that would work. However any other guest OS that may have the SPRG live
 385 * and MSR[RI]=1 could encounter silent corruption.
 386 *
 387 * Builds that do not support KVM could take this second option to increase
 388 * the recoverability of NMIs.
 389 */
 390void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
 391{
 392#ifdef CONFIG_PPC_POWERNV
 393        unsigned long kbase = (unsigned long)_stext;
 394        unsigned long nip = regs->nip;
 395
 396        if (!(regs->msr & MSR_RI))
 397                return;
 398        if (!(regs->msr & MSR_HV))
 399                return;
 400        if (regs->msr & MSR_PR)
 401                return;
 402
 403        /*
 404         * Now test if the interrupt has hit a range that may be using
 405         * HSPRG1 without having RI=0 (i.e., an HSRR interrupt). The
 406         * problem ranges all run un-relocated. Test real and virt modes
 407         * at the same time by droping the high bit of the nip (virt mode
 408         * entry points still have the +0x4000 offset).
 409         */
 410        nip &= ~0xc000000000000000ULL;
 411        if ((nip >= 0x500 && nip < 0x600) || (nip >= 0x4500 && nip < 0x4600))
 412                goto nonrecoverable;
 413        if ((nip >= 0x980 && nip < 0xa00) || (nip >= 0x4980 && nip < 0x4a00))
 414                goto nonrecoverable;
 415        if ((nip >= 0xe00 && nip < 0xec0) || (nip >= 0x4e00 && nip < 0x4ec0))
 416                goto nonrecoverable;
 417        if ((nip >= 0xf80 && nip < 0xfa0) || (nip >= 0x4f80 && nip < 0x4fa0))
 418                goto nonrecoverable;
 419
 420        /* Trampoline code runs un-relocated so subtract kbase. */
 421        if (nip >= (unsigned long)(start_real_trampolines - kbase) &&
 422                        nip < (unsigned long)(end_real_trampolines - kbase))
 423                goto nonrecoverable;
 424        if (nip >= (unsigned long)(start_virt_trampolines - kbase) &&
 425                        nip < (unsigned long)(end_virt_trampolines - kbase))
 426                goto nonrecoverable;
 427        return;
 428
 429nonrecoverable:
 430        regs->msr &= ~MSR_RI;
 431#endif
 432}
 433
 434void system_reset_exception(struct pt_regs *regs)
 435{
 436        unsigned long hsrr0, hsrr1;
 437        bool nested = in_nmi();
 438        bool saved_hsrrs = false;
 439
 440        /*
 441         * Avoid crashes in case of nested NMI exceptions. Recoverability
 442         * is determined by RI and in_nmi
 443         */
 444        if (!nested)
 445                nmi_enter();
 446
 447        /*
 448         * System reset can interrupt code where HSRRs are live and MSR[RI]=1.
 449         * The system reset interrupt itself may clobber HSRRs (e.g., to call
 450         * OPAL), so save them here and restore them before returning.
 451         *
 452         * Machine checks don't need to save HSRRs, as the real mode handler
 453         * is careful to avoid them, and the regular handler is not delivered
 454         * as an NMI.
 455         */
 456        if (cpu_has_feature(CPU_FTR_HVMODE)) {
 457                hsrr0 = mfspr(SPRN_HSRR0);
 458                hsrr1 = mfspr(SPRN_HSRR1);
 459                saved_hsrrs = true;
 460        }
 461
 462        hv_nmi_check_nonrecoverable(regs);
 463
 464        __this_cpu_inc(irq_stat.sreset_irqs);
 465
 466        /* See if any machine dependent calls */
 467        if (ppc_md.system_reset_exception) {
 468                if (ppc_md.system_reset_exception(regs))
 469                        goto out;
 470        }
 471
 472        if (debugger(regs))
 473                goto out;
 474
 475        kmsg_dump(KMSG_DUMP_OOPS);
 476        /*
 477         * A system reset is a request to dump, so we always send
 478         * it through the crashdump code (if fadump or kdump are
 479         * registered).
 480         */
 481        crash_fadump(regs, "System Reset");
 482
 483        crash_kexec(regs);
 484
 485        /*
 486         * We aren't the primary crash CPU. We need to send it
 487         * to a holding pattern to avoid it ending up in the panic
 488         * code.
 489         */
 490        crash_kexec_secondary(regs);
 491
 492        /*
 493         * No debugger or crash dump registered, print logs then
 494         * panic.
 495         */
 496        die("System Reset", regs, SIGABRT);
 497
 498        mdelay(2*MSEC_PER_SEC); /* Wait a little while for others to print */
 499        add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 500        nmi_panic(regs, "System Reset");
 501
 502out:
 503#ifdef CONFIG_PPC_BOOK3S_64
 504        BUG_ON(get_paca()->in_nmi == 0);
 505        if (get_paca()->in_nmi > 1)
 506                nmi_panic(regs, "Unrecoverable nested System Reset");
 507#endif
 508        /* Must die if the interrupt is not recoverable */
 509        if (!(regs->msr & MSR_RI))
 510                nmi_panic(regs, "Unrecoverable System Reset");
 511
 512        if (saved_hsrrs) {
 513                mtspr(SPRN_HSRR0, hsrr0);
 514                mtspr(SPRN_HSRR1, hsrr1);
 515        }
 516
 517        if (!nested)
 518                nmi_exit();
 519
 520        /* What should we do here? We could issue a shutdown or hard reset. */
 521}
 522
 523/*
 524 * I/O accesses can cause machine checks on powermacs.
 525 * Check if the NIP corresponds to the address of a sync
 526 * instruction for which there is an entry in the exception
 527 * table.
 528 * Note that the 601 only takes a machine check on TEA
 529 * (transfer error ack) signal assertion, and does not
 530 * set any of the top 16 bits of SRR1.
 531 *  -- paulus.
 532 */
 533static inline int check_io_access(struct pt_regs *regs)
 534{
 535#ifdef CONFIG_PPC32
 536        unsigned long msr = regs->msr;
 537        const struct exception_table_entry *entry;
 538        unsigned int *nip = (unsigned int *)regs->nip;
 539
 540        if (((msr & 0xffff0000) == 0 || (msr & (0x80000 | 0x40000)))
 541            && (entry = search_exception_tables(regs->nip)) != NULL) {
 542                /*
 543                 * Check that it's a sync instruction, or somewhere
 544                 * in the twi; isync; nop sequence that inb/inw/inl uses.
 545                 * As the address is in the exception table
 546                 * we should be able to read the instr there.
 547                 * For the debug message, we look at the preceding
 548                 * load or store.
 549                 */
 550                if (*nip == PPC_INST_NOP)
 551                        nip -= 2;
 552                else if (*nip == PPC_INST_ISYNC)
 553                        --nip;
 554                if (*nip == PPC_INST_SYNC || (*nip >> 26) == OP_TRAP) {
 555                        unsigned int rb;
 556
 557                        --nip;
 558                        rb = (*nip >> 11) & 0x1f;
 559                        printk(KERN_DEBUG "%s bad port %lx at %p\n",
 560                               (*nip & 0x100)? "OUT to": "IN from",
 561                               regs->gpr[rb] - _IO_BASE, nip);
 562                        regs->msr |= MSR_RI;
 563                        regs->nip = extable_fixup(entry);
 564                        return 1;
 565                }
 566        }
 567#endif /* CONFIG_PPC32 */
 568        return 0;
 569}
 570
 571#ifdef CONFIG_PPC_ADV_DEBUG_REGS
 572/* On 4xx, the reason for the machine check or program exception
 573   is in the ESR. */
 574#define get_reason(regs)        ((regs)->dsisr)
 575#define REASON_FP               ESR_FP
 576#define REASON_ILLEGAL          (ESR_PIL | ESR_PUO)
 577#define REASON_PRIVILEGED       ESR_PPR
 578#define REASON_TRAP             ESR_PTR
 579
 580/* single-step stuff */
 581#define single_stepping(regs)   (current->thread.debug.dbcr0 & DBCR0_IC)
 582#define clear_single_step(regs) (current->thread.debug.dbcr0 &= ~DBCR0_IC)
 583#define clear_br_trace(regs)    do {} while(0)
 584#else
 585/* On non-4xx, the reason for the machine check or program
 586   exception is in the MSR. */
 587#define get_reason(regs)        ((regs)->msr)
 588#define REASON_TM               SRR1_PROGTM
 589#define REASON_FP               SRR1_PROGFPE
 590#define REASON_ILLEGAL          SRR1_PROGILL
 591#define REASON_PRIVILEGED       SRR1_PROGPRIV
 592#define REASON_TRAP             SRR1_PROGTRAP
 593
 594#define single_stepping(regs)   ((regs)->msr & MSR_SE)
 595#define clear_single_step(regs) ((regs)->msr &= ~MSR_SE)
 596#define clear_br_trace(regs)    ((regs)->msr &= ~MSR_BE)
 597#endif
 598
 599#if defined(CONFIG_E500)
 600int machine_check_e500mc(struct pt_regs *regs)
 601{
 602        unsigned long mcsr = mfspr(SPRN_MCSR);
 603        unsigned long pvr = mfspr(SPRN_PVR);
 604        unsigned long reason = mcsr;
 605        int recoverable = 1;
 606
 607        if (reason & MCSR_LD) {
 608                recoverable = fsl_rio_mcheck_exception(regs);
 609                if (recoverable == 1)
 610                        goto silent_out;
 611        }
 612
 613        printk("Machine check in kernel mode.\n");
 614        printk("Caused by (from MCSR=%lx): ", reason);
 615
 616        if (reason & MCSR_MCP)
 617                pr_cont("Machine Check Signal\n");
 618
 619        if (reason & MCSR_ICPERR) {
 620                pr_cont("Instruction Cache Parity Error\n");
 621
 622                /*
 623                 * This is recoverable by invalidating the i-cache.
 624                 */
 625                mtspr(SPRN_L1CSR1, mfspr(SPRN_L1CSR1) | L1CSR1_ICFI);
 626                while (mfspr(SPRN_L1CSR1) & L1CSR1_ICFI)
 627                        ;
 628
 629                /*
 630                 * This will generally be accompanied by an instruction
 631                 * fetch error report -- only treat MCSR_IF as fatal
 632                 * if it wasn't due to an L1 parity error.
 633                 */
 634                reason &= ~MCSR_IF;
 635        }
 636
 637        if (reason & MCSR_DCPERR_MC) {
 638                pr_cont("Data Cache Parity Error\n");
 639
 640                /*
 641                 * In write shadow mode we auto-recover from the error, but it
 642                 * may still get logged and cause a machine check.  We should
 643                 * only treat the non-write shadow case as non-recoverable.
 644                 */
 645                /* On e6500 core, L1 DCWS (Data cache write shadow mode) bit
 646                 * is not implemented but L1 data cache always runs in write
 647                 * shadow mode. Hence on data cache parity errors HW will
 648                 * automatically invalidate the L1 Data Cache.
 649                 */
 650                if (PVR_VER(pvr) != PVR_VER_E6500) {
 651                        if (!(mfspr(SPRN_L1CSR2) & L1CSR2_DCWS))
 652                                recoverable = 0;
 653                }
 654        }
 655
 656        if (reason & MCSR_L2MMU_MHIT) {
 657                pr_cont("Hit on multiple TLB entries\n");
 658                recoverable = 0;
 659        }
 660
 661        if (reason & MCSR_NMI)
 662                pr_cont("Non-maskable interrupt\n");
 663
 664        if (reason & MCSR_IF) {
 665                pr_cont("Instruction Fetch Error Report\n");
 666                recoverable = 0;
 667        }
 668
 669        if (reason & MCSR_LD) {
 670                pr_cont("Load Error Report\n");
 671                recoverable = 0;
 672        }
 673
 674        if (reason & MCSR_ST) {
 675                pr_cont("Store Error Report\n");
 676                recoverable = 0;
 677        }
 678
 679        if (reason & MCSR_LDG) {
 680                pr_cont("Guarded Load Error Report\n");
 681                recoverable = 0;
 682        }
 683
 684        if (reason & MCSR_TLBSYNC)
 685                pr_cont("Simultaneous tlbsync operations\n");
 686
 687        if (reason & MCSR_BSL2_ERR) {
 688                pr_cont("Level 2 Cache Error\n");
 689                recoverable = 0;
 690        }
 691
 692        if (reason & MCSR_MAV) {
 693                u64 addr;
 694
 695                addr = mfspr(SPRN_MCAR);
 696                addr |= (u64)mfspr(SPRN_MCARU) << 32;
 697
 698                pr_cont("Machine Check %s Address: %#llx\n",
 699                       reason & MCSR_MEA ? "Effective" : "Physical", addr);
 700        }
 701
 702silent_out:
 703        mtspr(SPRN_MCSR, mcsr);
 704        return mfspr(SPRN_MCSR) == 0 && recoverable;
 705}
 706
 707int machine_check_e500(struct pt_regs *regs)
 708{
 709        unsigned long reason = mfspr(SPRN_MCSR);
 710
 711        if (reason & MCSR_BUS_RBERR) {
 712                if (fsl_rio_mcheck_exception(regs))
 713                        return 1;
 714                if (fsl_pci_mcheck_exception(regs))
 715                        return 1;
 716        }
 717
 718        printk("Machine check in kernel mode.\n");
 719        printk("Caused by (from MCSR=%lx): ", reason);
 720
 721        if (reason & MCSR_MCP)
 722                pr_cont("Machine Check Signal\n");
 723        if (reason & MCSR_ICPERR)
 724                pr_cont("Instruction Cache Parity Error\n");
 725        if (reason & MCSR_DCP_PERR)
 726                pr_cont("Data Cache Push Parity Error\n");
 727        if (reason & MCSR_DCPERR)
 728                pr_cont("Data Cache Parity Error\n");
 729        if (reason & MCSR_BUS_IAERR)
 730                pr_cont("Bus - Instruction Address Error\n");
 731        if (reason & MCSR_BUS_RAERR)
 732                pr_cont("Bus - Read Address Error\n");
 733        if (reason & MCSR_BUS_WAERR)
 734                pr_cont("Bus - Write Address Error\n");
 735        if (reason & MCSR_BUS_IBERR)
 736                pr_cont("Bus - Instruction Data Error\n");
 737        if (reason & MCSR_BUS_RBERR)
 738                pr_cont("Bus - Read Data Bus Error\n");
 739        if (reason & MCSR_BUS_WBERR)
 740                pr_cont("Bus - Write Data Bus Error\n");
 741        if (reason & MCSR_BUS_IPERR)
 742                pr_cont("Bus - Instruction Parity Error\n");
 743        if (reason & MCSR_BUS_RPERR)
 744                pr_cont("Bus - Read Parity Error\n");
 745
 746        return 0;
 747}
 748
 749int machine_check_generic(struct pt_regs *regs)
 750{
 751        return 0;
 752}
 753#elif defined(CONFIG_E200)
 754int machine_check_e200(struct pt_regs *regs)
 755{
 756        unsigned long reason = mfspr(SPRN_MCSR);
 757
 758        printk("Machine check in kernel mode.\n");
 759        printk("Caused by (from MCSR=%lx): ", reason);
 760
 761        if (reason & MCSR_MCP)
 762                pr_cont("Machine Check Signal\n");
 763        if (reason & MCSR_CP_PERR)
 764                pr_cont("Cache Push Parity Error\n");
 765        if (reason & MCSR_CPERR)
 766                pr_cont("Cache Parity Error\n");
 767        if (reason & MCSR_EXCP_ERR)
 768                pr_cont("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n");
 769        if (reason & MCSR_BUS_IRERR)
 770                pr_cont("Bus - Read Bus Error on instruction fetch\n");
 771        if (reason & MCSR_BUS_DRERR)
 772                pr_cont("Bus - Read Bus Error on data load\n");
 773        if (reason & MCSR_BUS_WRERR)
 774                pr_cont("Bus - Write Bus Error on buffered store or cache line push\n");
 775
 776        return 0;
 777}
 778#elif defined(CONFIG_PPC32)
 779int machine_check_generic(struct pt_regs *regs)
 780{
 781        unsigned long reason = regs->msr;
 782
 783        printk("Machine check in kernel mode.\n");
 784        printk("Caused by (from SRR1=%lx): ", reason);
 785        switch (reason & 0x601F0000) {
 786        case 0x80000:
 787                pr_cont("Machine check signal\n");
 788                break;
 789        case 0:         /* for 601 */
 790        case 0x40000:
 791        case 0x140000:  /* 7450 MSS error and TEA */
 792                pr_cont("Transfer error ack signal\n");
 793                break;
 794        case 0x20000:
 795                pr_cont("Data parity error signal\n");
 796                break;
 797        case 0x10000:
 798                pr_cont("Address parity error signal\n");
 799                break;
 800        case 0x20000000:
 801                pr_cont("L1 Data Cache error\n");
 802                break;
 803        case 0x40000000:
 804                pr_cont("L1 Instruction Cache error\n");
 805                break;
 806        case 0x00100000:
 807                pr_cont("L2 data cache parity error\n");
 808                break;
 809        default:
 810                pr_cont("Unknown values in msr\n");
 811        }
 812        return 0;
 813}
 814#endif /* everything else */
 815
 816void machine_check_exception(struct pt_regs *regs)
 817{
 818        int recover = 0;
 819        bool nested = in_nmi();
 820        if (!nested)
 821                nmi_enter();
 822
 823        __this_cpu_inc(irq_stat.mce_exceptions);
 824
 825        add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
 826
 827        /* See if any machine dependent calls. In theory, we would want
 828         * to call the CPU first, and call the ppc_md. one if the CPU
 829         * one returns a positive number. However there is existing code
 830         * that assumes the board gets a first chance, so let's keep it
 831         * that way for now and fix things later. --BenH.
 832         */
 833        if (ppc_md.machine_check_exception)
 834                recover = ppc_md.machine_check_exception(regs);
 835        else if (cur_cpu_spec->machine_check)
 836                recover = cur_cpu_spec->machine_check(regs);
 837
 838        if (recover > 0)
 839                goto bail;
 840
 841        if (debugger_fault_handler(regs))
 842                goto bail;
 843
 844        if (check_io_access(regs))
 845                goto bail;
 846
 847        if (!nested)
 848                nmi_exit();
 849
 850        die("Machine check", regs, SIGBUS);
 851
 852        /* Must die if the interrupt is not recoverable */
 853        if (!(regs->msr & MSR_RI))
 854                nmi_panic(regs, "Unrecoverable Machine check");
 855
 856        return;
 857
 858bail:
 859        if (!nested)
 860                nmi_exit();
 861}
 862
 863void SMIException(struct pt_regs *regs)
 864{
 865        die("System Management Interrupt", regs, SIGABRT);
 866}
 867
 868#ifdef CONFIG_VSX
 869static void p9_hmi_special_emu(struct pt_regs *regs)
 870{
 871        unsigned int ra, rb, t, i, sel, instr, rc;
 872        const void __user *addr;
 873        u8 vbuf[16], *vdst;
 874        unsigned long ea, msr, msr_mask;
 875        bool swap;
 876
 877        if (__get_user_inatomic(instr, (unsigned int __user *)regs->nip))
 878                return;
 879
 880        /*
 881         * lxvb16x      opcode: 0x7c0006d8
 882         * lxvd2x       opcode: 0x7c000698
 883         * lxvh8x       opcode: 0x7c000658
 884         * lxvw4x       opcode: 0x7c000618
 885         */
 886        if ((instr & 0xfc00073e) != 0x7c000618) {
 887                pr_devel("HMI vec emu: not vector CI %i:%s[%d] nip=%016lx"
 888                         " instr=%08x\n",
 889                         smp_processor_id(), current->comm, current->pid,
 890                         regs->nip, instr);
 891                return;
 892        }
 893
 894        /* Grab vector registers into the task struct */
 895        msr = regs->msr; /* Grab msr before we flush the bits */
 896        flush_vsx_to_thread(current);
 897        enable_kernel_altivec();
 898
 899        /*
 900         * Is userspace running with a different endian (this is rare but
 901         * not impossible)
 902         */
 903        swap = (msr & MSR_LE) != (MSR_KERNEL & MSR_LE);
 904
 905        /* Decode the instruction */
 906        ra = (instr >> 16) & 0x1f;
 907        rb = (instr >> 11) & 0x1f;
 908        t = (instr >> 21) & 0x1f;
 909        if (instr & 1)
 910                vdst = (u8 *)&current->thread.vr_state.vr[t];
 911        else
 912                vdst = (u8 *)&current->thread.fp_state.fpr[t][0];
 913
 914        /* Grab the vector address */
 915        ea = regs->gpr[rb] + (ra ? regs->gpr[ra] : 0);
 916        if (is_32bit_task())
 917                ea &= 0xfffffffful;
 918        addr = (__force const void __user *)ea;
 919
 920        /* Check it */
 921        if (!access_ok(addr, 16)) {
 922                pr_devel("HMI vec emu: bad access %i:%s[%d] nip=%016lx"
 923                         " instr=%08x addr=%016lx\n",
 924                         smp_processor_id(), current->comm, current->pid,
 925                         regs->nip, instr, (unsigned long)addr);
 926                return;
 927        }
 928
 929        /* Read the vector */
 930        rc = 0;
 931        if ((unsigned long)addr & 0xfUL)
 932                /* unaligned case */
 933                rc = __copy_from_user_inatomic(vbuf, addr, 16);
 934        else
 935                __get_user_atomic_128_aligned(vbuf, addr, rc);
 936        if (rc) {
 937                pr_devel("HMI vec emu: page fault %i:%s[%d] nip=%016lx"
 938                         " instr=%08x addr=%016lx\n",
 939                         smp_processor_id(), current->comm, current->pid,
 940                         regs->nip, instr, (unsigned long)addr);
 941                return;
 942        }
 943
 944        pr_devel("HMI vec emu: emulated vector CI %i:%s[%d] nip=%016lx"
 945                 " instr=%08x addr=%016lx\n",
 946                 smp_processor_id(), current->comm, current->pid, regs->nip,
 947                 instr, (unsigned long) addr);
 948
 949        /* Grab instruction "selector" */
 950        sel = (instr >> 6) & 3;
 951
 952        /*
 953         * Check to make sure the facility is actually enabled. This
 954         * could happen if we get a false positive hit.
 955         *
 956         * lxvd2x/lxvw4x always check MSR VSX sel = 0,2
 957         * lxvh8x/lxvb16x check MSR VSX or VEC depending on VSR used sel = 1,3
 958         */
 959        msr_mask = MSR_VSX;
 960        if ((sel & 1) && (instr & 1)) /* lxvh8x & lxvb16x + VSR >= 32 */
 961                msr_mask = MSR_VEC;
 962        if (!(msr & msr_mask)) {
 963                pr_devel("HMI vec emu: MSR fac clear %i:%s[%d] nip=%016lx"
 964                         " instr=%08x msr:%016lx\n",
 965                         smp_processor_id(), current->comm, current->pid,
 966                         regs->nip, instr, msr);
 967                return;
 968        }
 969
 970        /* Do logging here before we modify sel based on endian */
 971        switch (sel) {
 972        case 0: /* lxvw4x */
 973                PPC_WARN_EMULATED(lxvw4x, regs);
 974                break;
 975        case 1: /* lxvh8x */
 976                PPC_WARN_EMULATED(lxvh8x, regs);
 977                break;
 978        case 2: /* lxvd2x */
 979                PPC_WARN_EMULATED(lxvd2x, regs);
 980                break;
 981        case 3: /* lxvb16x */
 982                PPC_WARN_EMULATED(lxvb16x, regs);
 983                break;
 984        }
 985
 986#ifdef __LITTLE_ENDIAN__
 987        /*
 988         * An LE kernel stores the vector in the task struct as an LE
 989         * byte array (effectively swapping both the components and
 990         * the content of the components). Those instructions expect
 991         * the components to remain in ascending address order, so we
 992         * swap them back.
 993         *
 994         * If we are running a BE user space, the expectation is that
 995         * of a simple memcpy, so forcing the emulation to look like
 996         * a lxvb16x should do the trick.
 997         */
 998        if (swap)
 999                sel = 3;
1000
1001        switch (sel) {
1002        case 0: /* lxvw4x */
1003                for (i = 0; i < 4; i++)
1004                        ((u32 *)vdst)[i] = ((u32 *)vbuf)[3-i];
1005                break;
1006        case 1: /* lxvh8x */
1007                for (i = 0; i < 8; i++)
1008                        ((u16 *)vdst)[i] = ((u16 *)vbuf)[7-i];
1009                break;
1010        case 2: /* lxvd2x */
1011                for (i = 0; i < 2; i++)
1012                        ((u64 *)vdst)[i] = ((u64 *)vbuf)[1-i];
1013                break;
1014        case 3: /* lxvb16x */
1015                for (i = 0; i < 16; i++)
1016                        vdst[i] = vbuf[15-i];
1017                break;
1018        }
1019#else /* __LITTLE_ENDIAN__ */
1020        /* On a big endian kernel, a BE userspace only needs a memcpy */
1021        if (!swap)
1022                sel = 3;
1023
1024        /* Otherwise, we need to swap the content of the components */
1025        switch (sel) {
1026        case 0: /* lxvw4x */
1027                for (i = 0; i < 4; i++)
1028                        ((u32 *)vdst)[i] = cpu_to_le32(((u32 *)vbuf)[i]);
1029                break;
1030        case 1: /* lxvh8x */
1031                for (i = 0; i < 8; i++)
1032                        ((u16 *)vdst)[i] = cpu_to_le16(((u16 *)vbuf)[i]);
1033                break;
1034        case 2: /* lxvd2x */
1035                for (i = 0; i < 2; i++)
1036                        ((u64 *)vdst)[i] = cpu_to_le64(((u64 *)vbuf)[i]);
1037                break;
1038        case 3: /* lxvb16x */
1039                memcpy(vdst, vbuf, 16);
1040                break;
1041        }
1042#endif /* !__LITTLE_ENDIAN__ */
1043
1044        /* Go to next instruction */
1045        regs->nip += 4;
1046}
1047#endif /* CONFIG_VSX */
1048
1049void handle_hmi_exception(struct pt_regs *regs)
1050{
1051        struct pt_regs *old_regs;
1052
1053        old_regs = set_irq_regs(regs);
1054        irq_enter();
1055
1056#ifdef CONFIG_VSX
1057        /* Real mode flagged P9 special emu is needed */
1058        if (local_paca->hmi_p9_special_emu) {
1059                local_paca->hmi_p9_special_emu = 0;
1060
1061                /*
1062                 * We don't want to take page faults while doing the
1063                 * emulation, we just replay the instruction if necessary.
1064                 */
1065                pagefault_disable();
1066                p9_hmi_special_emu(regs);
1067                pagefault_enable();
1068        }
1069#endif /* CONFIG_VSX */
1070
1071        if (ppc_md.handle_hmi_exception)
1072                ppc_md.handle_hmi_exception(regs);
1073
1074        irq_exit();
1075        set_irq_regs(old_regs);
1076}
1077
1078void unknown_exception(struct pt_regs *regs)
1079{
1080        enum ctx_state prev_state = exception_enter();
1081
1082        printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
1083               regs->nip, regs->msr, regs->trap);
1084
1085        _exception(SIGTRAP, regs, TRAP_UNK, 0);
1086
1087        exception_exit(prev_state);
1088}
1089
1090void instruction_breakpoint_exception(struct pt_regs *regs)
1091{
1092        enum ctx_state prev_state = exception_enter();
1093
1094        if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5,
1095                                        5, SIGTRAP) == NOTIFY_STOP)
1096                goto bail;
1097        if (debugger_iabr_match(regs))
1098                goto bail;
1099        _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
1100
1101bail:
1102        exception_exit(prev_state);
1103}
1104
1105void RunModeException(struct pt_regs *regs)
1106{
1107        _exception(SIGTRAP, regs, TRAP_UNK, 0);
1108}
1109
1110void single_step_exception(struct pt_regs *regs)
1111{
1112        enum ctx_state prev_state = exception_enter();
1113
1114        clear_single_step(regs);
1115        clear_br_trace(regs);
1116
1117        if (kprobe_post_handler(regs))
1118                return;
1119
1120        if (notify_die(DIE_SSTEP, "single_step", regs, 5,
1121                                        5, SIGTRAP) == NOTIFY_STOP)
1122                goto bail;
1123        if (debugger_sstep(regs))
1124                goto bail;
1125
1126        _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
1127
1128bail:
1129        exception_exit(prev_state);
1130}
1131NOKPROBE_SYMBOL(single_step_exception);
1132
1133/*
1134 * After we have successfully emulated an instruction, we have to
1135 * check if the instruction was being single-stepped, and if so,
1136 * pretend we got a single-step exception.  This was pointed out
1137 * by Kumar Gala.  -- paulus
1138 */
1139static void emulate_single_step(struct pt_regs *regs)
1140{
1141        if (single_stepping(regs))
1142                single_step_exception(regs);
1143}
1144
1145static inline int __parse_fpscr(unsigned long fpscr)
1146{
1147        int ret = FPE_FLTUNK;
1148
1149        /* Invalid operation */
1150        if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX))
1151                ret = FPE_FLTINV;
1152
1153        /* Overflow */
1154        else if ((fpscr & FPSCR_OE) && (fpscr & FPSCR_OX))
1155                ret = FPE_FLTOVF;
1156
1157        /* Underflow */
1158        else if ((fpscr & FPSCR_UE) && (fpscr & FPSCR_UX))
1159                ret = FPE_FLTUND;
1160
1161        /* Divide by zero */
1162        else if ((fpscr & FPSCR_ZE) && (fpscr & FPSCR_ZX))
1163                ret = FPE_FLTDIV;
1164
1165        /* Inexact result */
1166        else if ((fpscr & FPSCR_XE) && (fpscr & FPSCR_XX))
1167                ret = FPE_FLTRES;
1168
1169        return ret;
1170}
1171
1172static void parse_fpe(struct pt_regs *regs)
1173{
1174        int code = 0;
1175
1176        flush_fp_to_thread(current);
1177
1178        code = __parse_fpscr(current->thread.fp_state.fpscr);
1179
1180        _exception(SIGFPE, regs, code, regs->nip);
1181}
1182
1183/*
1184 * Illegal instruction emulation support.  Originally written to
1185 * provide the PVR to user applications using the mfspr rd, PVR.
1186 * Return non-zero if we can't emulate, or -EFAULT if the associated
1187 * memory access caused an access fault.  Return zero on success.
1188 *
1189 * There are a couple of ways to do this, either "decode" the instruction
1190 * or directly match lots of bits.  In this case, matching lots of
1191 * bits is faster and easier.
1192 *
1193 */
1194static int emulate_string_inst(struct pt_regs *regs, u32 instword)
1195{
1196        u8 rT = (instword >> 21) & 0x1f;
1197        u8 rA = (instword >> 16) & 0x1f;
1198        u8 NB_RB = (instword >> 11) & 0x1f;
1199        u32 num_bytes;
1200        unsigned long EA;
1201        int pos = 0;
1202
1203        /* Early out if we are an invalid form of lswx */
1204        if ((instword & PPC_INST_STRING_MASK) == PPC_INST_LSWX)
1205                if ((rT == rA) || (rT == NB_RB))
1206                        return -EINVAL;
1207
1208        EA = (rA == 0) ? 0 : regs->gpr[rA];
1209
1210        switch (instword & PPC_INST_STRING_MASK) {
1211                case PPC_INST_LSWX:
1212                case PPC_INST_STSWX:
1213                        EA += NB_RB;
1214                        num_bytes = regs->xer & 0x7f;
1215                        break;
1216                case PPC_INST_LSWI:
1217                case PPC_INST_STSWI:
1218                        num_bytes = (NB_RB == 0) ? 32 : NB_RB;
1219                        break;
1220                default:
1221                        return -EINVAL;
1222        }
1223
1224        while (num_bytes != 0)
1225        {
1226                u8 val;
1227                u32 shift = 8 * (3 - (pos & 0x3));
1228
1229                /* if process is 32-bit, clear upper 32 bits of EA */
1230                if ((regs->msr & MSR_64BIT) == 0)
1231                        EA &= 0xFFFFFFFF;
1232
1233                switch ((instword & PPC_INST_STRING_MASK)) {
1234                        case PPC_INST_LSWX:
1235                        case PPC_INST_LSWI:
1236                                if (get_user(val, (u8 __user *)EA))
1237                                        return -EFAULT;
1238                                /* first time updating this reg,
1239                                 * zero it out */
1240                                if (pos == 0)
1241                                        regs->gpr[rT] = 0;
1242                                regs->gpr[rT] |= val << shift;
1243                                break;
1244                        case PPC_INST_STSWI:
1245                        case PPC_INST_STSWX:
1246                                val = regs->gpr[rT] >> shift;
1247                                if (put_user(val, (u8 __user *)EA))
1248                                        return -EFAULT;
1249                                break;
1250                }
1251                /* move EA to next address */
1252                EA += 1;
1253                num_bytes--;
1254
1255                /* manage our position within the register */
1256                if (++pos == 4) {
1257                        pos = 0;
1258                        if (++rT == 32)
1259                                rT = 0;
1260                }
1261        }
1262
1263        return 0;
1264}
1265
1266static int emulate_popcntb_inst(struct pt_regs *regs, u32 instword)
1267{
1268        u32 ra,rs;
1269        unsigned long tmp;
1270
1271        ra = (instword >> 16) & 0x1f;
1272        rs = (instword >> 21) & 0x1f;
1273
1274        tmp = regs->gpr[rs];
1275        tmp = tmp - ((tmp >> 1) & 0x5555555555555555ULL);
1276        tmp = (tmp & 0x3333333333333333ULL) + ((tmp >> 2) & 0x3333333333333333ULL);
1277        tmp = (tmp + (tmp >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
1278        regs->gpr[ra] = tmp;
1279
1280        return 0;
1281}
1282
1283static int emulate_isel(struct pt_regs *regs, u32 instword)
1284{
1285        u8 rT = (instword >> 21) & 0x1f;
1286        u8 rA = (instword >> 16) & 0x1f;
1287        u8 rB = (instword >> 11) & 0x1f;
1288        u8 BC = (instword >> 6) & 0x1f;
1289        u8 bit;
1290        unsigned long tmp;
1291
1292        tmp = (rA == 0) ? 0 : regs->gpr[rA];
1293        bit = (regs->ccr >> (31 - BC)) & 0x1;
1294
1295        regs->gpr[rT] = bit ? tmp : regs->gpr[rB];
1296
1297        return 0;
1298}
1299
1300#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1301static inline bool tm_abort_check(struct pt_regs *regs, int cause)
1302{
1303        /* If we're emulating a load/store in an active transaction, we cannot
1304         * emulate it as the kernel operates in transaction suspended context.
1305         * We need to abort the transaction.  This creates a persistent TM
1306         * abort so tell the user what caused it with a new code.
1307         */
1308        if (MSR_TM_TRANSACTIONAL(regs->msr)) {
1309                tm_enable();
1310                tm_abort(cause);
1311                return true;
1312        }
1313        return false;
1314}
1315#else
1316static inline bool tm_abort_check(struct pt_regs *regs, int reason)
1317{
1318        return false;
1319}
1320#endif
1321
1322static int emulate_instruction(struct pt_regs *regs)
1323{
1324        u32 instword;
1325        u32 rd;
1326
1327        if (!user_mode(regs))
1328                return -EINVAL;
1329        CHECK_FULL_REGS(regs);
1330
1331        if (get_user(instword, (u32 __user *)(regs->nip)))
1332                return -EFAULT;
1333
1334        /* Emulate the mfspr rD, PVR. */
1335        if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) {
1336                PPC_WARN_EMULATED(mfpvr, regs);
1337                rd = (instword >> 21) & 0x1f;
1338                regs->gpr[rd] = mfspr(SPRN_PVR);
1339                return 0;
1340        }
1341
1342        /* Emulating the dcba insn is just a no-op.  */
1343        if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) {
1344                PPC_WARN_EMULATED(dcba, regs);
1345                return 0;
1346        }
1347
1348        /* Emulate the mcrxr insn.  */
1349        if ((instword & PPC_INST_MCRXR_MASK) == PPC_INST_MCRXR) {
1350                int shift = (instword >> 21) & 0x1c;
1351                unsigned long msk = 0xf0000000UL >> shift;
1352
1353                PPC_WARN_EMULATED(mcrxr, regs);
1354                regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk);
1355                regs->xer &= ~0xf0000000UL;
1356                return 0;
1357        }
1358
1359        /* Emulate load/store string insn. */
1360        if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) {
1361                if (tm_abort_check(regs,
1362                                   TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT))
1363                        return -EINVAL;
1364                PPC_WARN_EMULATED(string, regs);
1365                return emulate_string_inst(regs, instword);
1366        }
1367
1368        /* Emulate the popcntb (Population Count Bytes) instruction. */
1369        if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) {
1370                PPC_WARN_EMULATED(popcntb, regs);
1371                return emulate_popcntb_inst(regs, instword);
1372        }
1373
1374        /* Emulate isel (Integer Select) instruction */
1375        if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) {
1376                PPC_WARN_EMULATED(isel, regs);
1377                return emulate_isel(regs, instword);
1378        }
1379
1380        /* Emulate sync instruction variants */
1381        if ((instword & PPC_INST_SYNC_MASK) == PPC_INST_SYNC) {
1382                PPC_WARN_EMULATED(sync, regs);
1383                asm volatile("sync");
1384                return 0;
1385        }
1386
1387#ifdef CONFIG_PPC64
1388        /* Emulate the mfspr rD, DSCR. */
1389        if ((((instword & PPC_INST_MFSPR_DSCR_USER_MASK) ==
1390                PPC_INST_MFSPR_DSCR_USER) ||
1391             ((instword & PPC_INST_MFSPR_DSCR_MASK) ==
1392                PPC_INST_MFSPR_DSCR)) &&
1393                        cpu_has_feature(CPU_FTR_DSCR)) {
1394                PPC_WARN_EMULATED(mfdscr, regs);
1395                rd = (instword >> 21) & 0x1f;
1396                regs->gpr[rd] = mfspr(SPRN_DSCR);
1397                return 0;
1398        }
1399        /* Emulate the mtspr DSCR, rD. */
1400        if ((((instword & PPC_INST_MTSPR_DSCR_USER_MASK) ==
1401                PPC_INST_MTSPR_DSCR_USER) ||
1402             ((instword & PPC_INST_MTSPR_DSCR_MASK) ==
1403                PPC_INST_MTSPR_DSCR)) &&
1404                        cpu_has_feature(CPU_FTR_DSCR)) {
1405                PPC_WARN_EMULATED(mtdscr, regs);
1406                rd = (instword >> 21) & 0x1f;
1407                current->thread.dscr = regs->gpr[rd];
1408                current->thread.dscr_inherit = 1;
1409                mtspr(SPRN_DSCR, current->thread.dscr);
1410                return 0;
1411        }
1412#endif
1413
1414        return -EINVAL;
1415}
1416
1417int is_valid_bugaddr(unsigned long addr)
1418{
1419        return is_kernel_addr(addr);
1420}
1421
1422#ifdef CONFIG_MATH_EMULATION
1423static int emulate_math(struct pt_regs *regs)
1424{
1425        int ret;
1426        extern int do_mathemu(struct pt_regs *regs);
1427
1428        ret = do_mathemu(regs);
1429        if (ret >= 0)
1430                PPC_WARN_EMULATED(math, regs);
1431
1432        switch (ret) {
1433        case 0:
1434                emulate_single_step(regs);
1435                return 0;
1436        case 1: {
1437                        int code = 0;
1438                        code = __parse_fpscr(current->thread.fp_state.fpscr);
1439                        _exception(SIGFPE, regs, code, regs->nip);
1440                        return 0;
1441                }
1442        case -EFAULT:
1443                _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
1444                return 0;
1445        }
1446
1447        return -1;
1448}
1449#else
1450static inline int emulate_math(struct pt_regs *regs) { return -1; }
1451#endif
1452
1453void program_check_exception(struct pt_regs *regs)
1454{
1455        enum ctx_state prev_state = exception_enter();
1456        unsigned int reason = get_reason(regs);
1457
1458        /* We can now get here via a FP Unavailable exception if the core
1459         * has no FPU, in that case the reason flags will be 0 */
1460
1461        if (reason & REASON_FP) {
1462                /* IEEE FP exception */
1463                parse_fpe(regs);
1464                goto bail;
1465        }
1466        if (reason & REASON_TRAP) {
1467                unsigned long bugaddr;
1468                /* Debugger is first in line to stop recursive faults in
1469                 * rcu_lock, notify_die, or atomic_notifier_call_chain */
1470                if (debugger_bpt(regs))
1471                        goto bail;
1472
1473                if (kprobe_handler(regs))
1474                        goto bail;
1475
1476                /* trap exception */
1477                if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP)
1478                                == NOTIFY_STOP)
1479                        goto bail;
1480
1481                bugaddr = regs->nip;
1482                /*
1483                 * Fixup bugaddr for BUG_ON() in real mode
1484                 */
1485                if (!is_kernel_addr(bugaddr) && !(regs->msr & MSR_IR))
1486                        bugaddr += PAGE_OFFSET;
1487
1488                if (!(regs->msr & MSR_PR) &&  /* not user-mode */
1489                    report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
1490                        regs->nip += 4;
1491                        goto bail;
1492                }
1493                _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
1494                goto bail;
1495        }
1496#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1497        if (reason & REASON_TM) {
1498                /* This is a TM "Bad Thing Exception" program check.
1499                 * This occurs when:
1500                 * -  An rfid/hrfid/mtmsrd attempts to cause an illegal
1501                 *    transition in TM states.
1502                 * -  A trechkpt is attempted when transactional.
1503                 * -  A treclaim is attempted when non transactional.
1504                 * -  A tend is illegally attempted.
1505                 * -  writing a TM SPR when transactional.
1506                 *
1507                 * If usermode caused this, it's done something illegal and
1508                 * gets a SIGILL slap on the wrist.  We call it an illegal
1509                 * operand to distinguish from the instruction just being bad
1510                 * (e.g. executing a 'tend' on a CPU without TM!); it's an
1511                 * illegal /placement/ of a valid instruction.
1512                 */
1513                if (user_mode(regs)) {
1514                        _exception(SIGILL, regs, ILL_ILLOPN, regs->nip);
1515                        goto bail;
1516                } else {
1517                        printk(KERN_EMERG "Unexpected TM Bad Thing exception "
1518                               "at %lx (msr 0x%lx) tm_scratch=%llx\n",
1519                               regs->nip, regs->msr, get_paca()->tm_scratch);
1520                        die("Unrecoverable exception", regs, SIGABRT);
1521                }
1522        }
1523#endif
1524
1525        /*
1526         * If we took the program check in the kernel skip down to sending a
1527         * SIGILL. The subsequent cases all relate to emulating instructions
1528         * which we should only do for userspace. We also do not want to enable
1529         * interrupts for kernel faults because that might lead to further
1530         * faults, and loose the context of the original exception.
1531         */
1532        if (!user_mode(regs))
1533                goto sigill;
1534
1535        /* We restore the interrupt state now */
1536        if (!arch_irq_disabled_regs(regs))
1537                local_irq_enable();
1538
1539        /* (reason & REASON_ILLEGAL) would be the obvious thing here,
1540         * but there seems to be a hardware bug on the 405GP (RevD)
1541         * that means ESR is sometimes set incorrectly - either to
1542         * ESR_DST (!?) or 0.  In the process of chasing this with the
1543         * hardware people - not sure if it can happen on any illegal
1544         * instruction or only on FP instructions, whether there is a
1545         * pattern to occurrences etc. -dgibson 31/Mar/2003
1546         */
1547        if (!emulate_math(regs))
1548                goto bail;
1549
1550        /* Try to emulate it if we should. */
1551        if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) {
1552                switch (emulate_instruction(regs)) {
1553                case 0:
1554                        regs->nip += 4;
1555                        emulate_single_step(regs);
1556                        goto bail;
1557                case -EFAULT:
1558                        _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
1559                        goto bail;
1560                }
1561        }
1562
1563sigill:
1564        if (reason & REASON_PRIVILEGED)
1565                _exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
1566        else
1567                _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1568
1569bail:
1570        exception_exit(prev_state);
1571}
1572NOKPROBE_SYMBOL(program_check_exception);
1573
1574/*
1575 * This occurs when running in hypervisor mode on POWER6 or later
1576 * and an illegal instruction is encountered.
1577 */
1578void emulation_assist_interrupt(struct pt_regs *regs)
1579{
1580        regs->msr |= REASON_ILLEGAL;
1581        program_check_exception(regs);
1582}
1583NOKPROBE_SYMBOL(emulation_assist_interrupt);
1584
1585void alignment_exception(struct pt_regs *regs)
1586{
1587        enum ctx_state prev_state = exception_enter();
1588        int sig, code, fixed = 0;
1589
1590        /* We restore the interrupt state now */
1591        if (!arch_irq_disabled_regs(regs))
1592                local_irq_enable();
1593
1594        if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT))
1595                goto bail;
1596
1597        /* we don't implement logging of alignment exceptions */
1598        if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS))
1599                fixed = fix_alignment(regs);
1600
1601        if (fixed == 1) {
1602                regs->nip += 4; /* skip over emulated instruction */
1603                emulate_single_step(regs);
1604                goto bail;
1605        }
1606
1607        /* Operand address was bad */
1608        if (fixed == -EFAULT) {
1609                sig = SIGSEGV;
1610                code = SEGV_ACCERR;
1611        } else {
1612                sig = SIGBUS;
1613                code = BUS_ADRALN;
1614        }
1615        if (user_mode(regs))
1616                _exception(sig, regs, code, regs->dar);
1617        else
1618                bad_page_fault(regs, regs->dar, sig);
1619
1620bail:
1621        exception_exit(prev_state);
1622}
1623
1624void StackOverflow(struct pt_regs *regs)
1625{
1626        pr_crit("Kernel stack overflow in process %s[%d], r1=%lx\n",
1627                current->comm, task_pid_nr(current), regs->gpr[1]);
1628        debugger(regs);
1629        show_regs(regs);
1630        panic("kernel stack overflow");
1631}
1632
1633void kernel_fp_unavailable_exception(struct pt_regs *regs)
1634{
1635        enum ctx_state prev_state = exception_enter();
1636
1637        printk(KERN_EMERG "Unrecoverable FP Unavailable Exception "
1638                          "%lx at %lx\n", regs->trap, regs->nip);
1639        die("Unrecoverable FP Unavailable Exception", regs, SIGABRT);
1640
1641        exception_exit(prev_state);
1642}
1643
1644void altivec_unavailable_exception(struct pt_regs *regs)
1645{
1646        enum ctx_state prev_state = exception_enter();
1647
1648        if (user_mode(regs)) {
1649                /* A user program has executed an altivec instruction,
1650                   but this kernel doesn't support altivec. */
1651                _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1652                goto bail;
1653        }
1654
1655        printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception "
1656                        "%lx at %lx\n", regs->trap, regs->nip);
1657        die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT);
1658
1659bail:
1660        exception_exit(prev_state);
1661}
1662
1663void vsx_unavailable_exception(struct pt_regs *regs)
1664{
1665        if (user_mode(regs)) {
1666                /* A user program has executed an vsx instruction,
1667                   but this kernel doesn't support vsx. */
1668                _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1669                return;
1670        }
1671
1672        printk(KERN_EMERG "Unrecoverable VSX Unavailable Exception "
1673                        "%lx at %lx\n", regs->trap, regs->nip);
1674        die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT);
1675}
1676
1677#ifdef CONFIG_PPC64
1678static void tm_unavailable(struct pt_regs *regs)
1679{
1680#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1681        if (user_mode(regs)) {
1682                current->thread.load_tm++;
1683                regs->msr |= MSR_TM;
1684                tm_enable();
1685                tm_restore_sprs(&current->thread);
1686                return;
1687        }
1688#endif
1689        pr_emerg("Unrecoverable TM Unavailable Exception "
1690                        "%lx at %lx\n", regs->trap, regs->nip);
1691        die("Unrecoverable TM Unavailable Exception", regs, SIGABRT);
1692}
1693
1694void facility_unavailable_exception(struct pt_regs *regs)
1695{
1696        static char *facility_strings[] = {
1697                [FSCR_FP_LG] = "FPU",
1698                [FSCR_VECVSX_LG] = "VMX/VSX",
1699                [FSCR_DSCR_LG] = "DSCR",
1700                [FSCR_PM_LG] = "PMU SPRs",
1701                [FSCR_BHRB_LG] = "BHRB",
1702                [FSCR_TM_LG] = "TM",
1703                [FSCR_EBB_LG] = "EBB",
1704                [FSCR_TAR_LG] = "TAR",
1705                [FSCR_MSGP_LG] = "MSGP",
1706                [FSCR_SCV_LG] = "SCV",
1707        };
1708        char *facility = "unknown";
1709        u64 value;
1710        u32 instword, rd;
1711        u8 status;
1712        bool hv;
1713
1714        hv = (TRAP(regs) == 0xf80);
1715        if (hv)
1716                value = mfspr(SPRN_HFSCR);
1717        else
1718                value = mfspr(SPRN_FSCR);
1719
1720        status = value >> 56;
1721        if ((hv || status >= 2) &&
1722            (status < ARRAY_SIZE(facility_strings)) &&
1723            facility_strings[status])
1724                facility = facility_strings[status];
1725
1726        /* We should not have taken this interrupt in kernel */
1727        if (!user_mode(regs)) {
1728                pr_emerg("Facility '%s' unavailable (%d) exception in kernel mode at %lx\n",
1729                         facility, status, regs->nip);
1730                die("Unexpected facility unavailable exception", regs, SIGABRT);
1731        }
1732
1733        /* We restore the interrupt state now */
1734        if (!arch_irq_disabled_regs(regs))
1735                local_irq_enable();
1736
1737        if (status == FSCR_DSCR_LG) {
1738                /*
1739                 * User is accessing the DSCR register using the problem
1740                 * state only SPR number (0x03) either through a mfspr or
1741                 * a mtspr instruction. If it is a write attempt through
1742                 * a mtspr, then we set the inherit bit. This also allows
1743                 * the user to write or read the register directly in the
1744                 * future by setting via the FSCR DSCR bit. But in case it
1745                 * is a read DSCR attempt through a mfspr instruction, we
1746                 * just emulate the instruction instead. This code path will
1747                 * always emulate all the mfspr instructions till the user
1748                 * has attempted at least one mtspr instruction. This way it
1749                 * preserves the same behaviour when the user is accessing
1750                 * the DSCR through privilege level only SPR number (0x11)
1751                 * which is emulated through illegal instruction exception.
1752                 * We always leave HFSCR DSCR set.
1753                 */
1754                if (get_user(instword, (u32 __user *)(regs->nip))) {
1755                        pr_err("Failed to fetch the user instruction\n");
1756                        return;
1757                }
1758
1759                /* Write into DSCR (mtspr 0x03, RS) */
1760                if ((instword & PPC_INST_MTSPR_DSCR_USER_MASK)
1761                                == PPC_INST_MTSPR_DSCR_USER) {
1762                        rd = (instword >> 21) & 0x1f;
1763                        current->thread.dscr = regs->gpr[rd];
1764                        current->thread.dscr_inherit = 1;
1765                        current->thread.fscr |= FSCR_DSCR;
1766                        mtspr(SPRN_FSCR, current->thread.fscr);
1767                }
1768
1769                /* Read from DSCR (mfspr RT, 0x03) */
1770                if ((instword & PPC_INST_MFSPR_DSCR_USER_MASK)
1771                                == PPC_INST_MFSPR_DSCR_USER) {
1772                        if (emulate_instruction(regs)) {
1773                                pr_err("DSCR based mfspr emulation failed\n");
1774                                return;
1775                        }
1776                        regs->nip += 4;
1777                        emulate_single_step(regs);
1778                }
1779                return;
1780        }
1781
1782        if (status == FSCR_TM_LG) {
1783                /*
1784                 * If we're here then the hardware is TM aware because it
1785                 * generated an exception with FSRM_TM set.
1786                 *
1787                 * If cpu_has_feature(CPU_FTR_TM) is false, then either firmware
1788                 * told us not to do TM, or the kernel is not built with TM
1789                 * support.
1790                 *
1791                 * If both of those things are true, then userspace can spam the
1792                 * console by triggering the printk() below just by continually
1793                 * doing tbegin (or any TM instruction). So in that case just
1794                 * send the process a SIGILL immediately.
1795                 */
1796                if (!cpu_has_feature(CPU_FTR_TM))
1797                        goto out;
1798
1799                tm_unavailable(regs);
1800                return;
1801        }
1802
1803        pr_err_ratelimited("%sFacility '%s' unavailable (%d), exception at 0x%lx, MSR=%lx\n",
1804                hv ? "Hypervisor " : "", facility, status, regs->nip, regs->msr);
1805
1806out:
1807        _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1808}
1809#endif
1810
1811#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1812
1813void fp_unavailable_tm(struct pt_regs *regs)
1814{
1815        /* Note:  This does not handle any kind of FP laziness. */
1816
1817        TM_DEBUG("FP Unavailable trap whilst transactional at 0x%lx, MSR=%lx\n",
1818                 regs->nip, regs->msr);
1819
1820        /* We can only have got here if the task started using FP after
1821         * beginning the transaction.  So, the transactional regs are just a
1822         * copy of the checkpointed ones.  But, we still need to recheckpoint
1823         * as we're enabling FP for the process; it will return, abort the
1824         * transaction, and probably retry but now with FP enabled.  So the
1825         * checkpointed FP registers need to be loaded.
1826         */
1827        tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1828
1829        /*
1830         * Reclaim initially saved out bogus (lazy) FPRs to ckfp_state, and
1831         * then it was overwrite by the thr->fp_state by tm_reclaim_thread().
1832         *
1833         * At this point, ck{fp,vr}_state contains the exact values we want to
1834         * recheckpoint.
1835         */
1836
1837        /* Enable FP for the task: */
1838        current->thread.load_fp = 1;
1839
1840        /*
1841         * Recheckpoint all the checkpointed ckpt, ck{fp, vr}_state registers.
1842         */
1843        tm_recheckpoint(&current->thread);
1844}
1845
1846void altivec_unavailable_tm(struct pt_regs *regs)
1847{
1848        /* See the comments in fp_unavailable_tm().  This function operates
1849         * the same way.
1850         */
1851
1852        TM_DEBUG("Vector Unavailable trap whilst transactional at 0x%lx,"
1853                 "MSR=%lx\n",
1854                 regs->nip, regs->msr);
1855        tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1856        current->thread.load_vec = 1;
1857        tm_recheckpoint(&current->thread);
1858        current->thread.used_vr = 1;
1859}
1860
1861void vsx_unavailable_tm(struct pt_regs *regs)
1862{
1863        /* See the comments in fp_unavailable_tm().  This works similarly,
1864         * though we're loading both FP and VEC registers in here.
1865         *
1866         * If FP isn't in use, load FP regs.  If VEC isn't in use, load VEC
1867         * regs.  Either way, set MSR_VSX.
1868         */
1869
1870        TM_DEBUG("VSX Unavailable trap whilst transactional at 0x%lx,"
1871                 "MSR=%lx\n",
1872                 regs->nip, regs->msr);
1873
1874        current->thread.used_vsr = 1;
1875
1876        /* This reclaims FP and/or VR regs if they're already enabled */
1877        tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1878
1879        current->thread.load_vec = 1;
1880        current->thread.load_fp = 1;
1881
1882        tm_recheckpoint(&current->thread);
1883}
1884#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
1885
1886void performance_monitor_exception(struct pt_regs *regs)
1887{
1888        __this_cpu_inc(irq_stat.pmu_irqs);
1889
1890        perf_irq(regs);
1891}
1892
1893#ifdef CONFIG_PPC_ADV_DEBUG_REGS
1894static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
1895{
1896        int changed = 0;
1897        /*
1898         * Determine the cause of the debug event, clear the
1899         * event flags and send a trap to the handler. Torez
1900         */
1901        if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) {
1902                dbcr_dac(current) &= ~(DBCR_DAC1R | DBCR_DAC1W);
1903#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
1904                current->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
1905#endif
1906                do_send_trap(regs, mfspr(SPRN_DAC1), debug_status,
1907                             5);
1908                changed |= 0x01;
1909        }  else if (debug_status & (DBSR_DAC2R | DBSR_DAC2W)) {
1910                dbcr_dac(current) &= ~(DBCR_DAC2R | DBCR_DAC2W);
1911                do_send_trap(regs, mfspr(SPRN_DAC2), debug_status,
1912                             6);
1913                changed |= 0x01;
1914        }  else if (debug_status & DBSR_IAC1) {
1915                current->thread.debug.dbcr0 &= ~DBCR0_IAC1;
1916                dbcr_iac_range(current) &= ~DBCR_IAC12MODE;
1917                do_send_trap(regs, mfspr(SPRN_IAC1), debug_status,
1918                             1);
1919                changed |= 0x01;
1920        }  else if (debug_status & DBSR_IAC2) {
1921                current->thread.debug.dbcr0 &= ~DBCR0_IAC2;
1922                do_send_trap(regs, mfspr(SPRN_IAC2), debug_status,
1923                             2);
1924                changed |= 0x01;
1925        }  else if (debug_status & DBSR_IAC3) {
1926                current->thread.debug.dbcr0 &= ~DBCR0_IAC3;
1927                dbcr_iac_range(current) &= ~DBCR_IAC34MODE;
1928                do_send_trap(regs, mfspr(SPRN_IAC3), debug_status,
1929                             3);
1930                changed |= 0x01;
1931        }  else if (debug_status & DBSR_IAC4) {
1932                current->thread.debug.dbcr0 &= ~DBCR0_IAC4;
1933                do_send_trap(regs, mfspr(SPRN_IAC4), debug_status,
1934                             4);
1935                changed |= 0x01;
1936        }
1937        /*
1938         * At the point this routine was called, the MSR(DE) was turned off.
1939         * Check all other debug flags and see if that bit needs to be turned
1940         * back on or not.
1941         */
1942        if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
1943                               current->thread.debug.dbcr1))
1944                regs->msr |= MSR_DE;
1945        else
1946                /* Make sure the IDM flag is off */
1947                current->thread.debug.dbcr0 &= ~DBCR0_IDM;
1948
1949        if (changed & 0x01)
1950                mtspr(SPRN_DBCR0, current->thread.debug.dbcr0);
1951}
1952
1953void DebugException(struct pt_regs *regs, unsigned long debug_status)
1954{
1955        current->thread.debug.dbsr = debug_status;
1956
1957        /* Hack alert: On BookE, Branch Taken stops on the branch itself, while
1958         * on server, it stops on the target of the branch. In order to simulate
1959         * the server behaviour, we thus restart right away with a single step
1960         * instead of stopping here when hitting a BT
1961         */
1962        if (debug_status & DBSR_BT) {
1963                regs->msr &= ~MSR_DE;
1964
1965                /* Disable BT */
1966                mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_BT);
1967                /* Clear the BT event */
1968                mtspr(SPRN_DBSR, DBSR_BT);
1969
1970                /* Do the single step trick only when coming from userspace */
1971                if (user_mode(regs)) {
1972                        current->thread.debug.dbcr0 &= ~DBCR0_BT;
1973                        current->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
1974                        regs->msr |= MSR_DE;
1975                        return;
1976                }
1977
1978                if (kprobe_post_handler(regs))
1979                        return;
1980
1981                if (notify_die(DIE_SSTEP, "block_step", regs, 5,
1982                               5, SIGTRAP) == NOTIFY_STOP) {
1983                        return;
1984                }
1985                if (debugger_sstep(regs))
1986                        return;
1987        } else if (debug_status & DBSR_IC) {    /* Instruction complete */
1988                regs->msr &= ~MSR_DE;
1989
1990                /* Disable instruction completion */
1991                mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC);
1992                /* Clear the instruction completion event */
1993                mtspr(SPRN_DBSR, DBSR_IC);
1994
1995                if (kprobe_post_handler(regs))
1996                        return;
1997
1998                if (notify_die(DIE_SSTEP, "single_step", regs, 5,
1999                               5, SIGTRAP) == NOTIFY_STOP) {
2000                        return;
2001                }
2002
2003                if (debugger_sstep(regs))
2004                        return;
2005
2006                if (user_mode(regs)) {
2007                        current->thread.debug.dbcr0 &= ~DBCR0_IC;
2008                        if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
2009                                               current->thread.debug.dbcr1))
2010                                regs->msr |= MSR_DE;
2011                        else
2012                                /* Make sure the IDM bit is off */
2013                                current->thread.debug.dbcr0 &= ~DBCR0_IDM;
2014                }
2015
2016                _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
2017        } else
2018                handle_debug(regs, debug_status);
2019}
2020NOKPROBE_SYMBOL(DebugException);
2021#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
2022
2023#if !defined(CONFIG_TAU_INT)
2024void TAUException(struct pt_regs *regs)
2025{
2026        printk("TAU trap at PC: %lx, MSR: %lx, vector=%lx    %s\n",
2027               regs->nip, regs->msr, regs->trap, print_tainted());
2028}
2029#endif /* CONFIG_INT_TAU */
2030
2031#ifdef CONFIG_ALTIVEC
2032void altivec_assist_exception(struct pt_regs *regs)
2033{
2034        int err;
2035
2036        if (!user_mode(regs)) {
2037                printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode"
2038                       " at %lx\n", regs->nip);
2039                die("Kernel VMX/Altivec assist exception", regs, SIGILL);
2040        }
2041
2042        flush_altivec_to_thread(current);
2043
2044        PPC_WARN_EMULATED(altivec, regs);
2045        err = emulate_altivec(regs);
2046        if (err == 0) {
2047                regs->nip += 4;         /* skip emulated instruction */
2048                emulate_single_step(regs);
2049                return;
2050        }
2051
2052        if (err == -EFAULT) {
2053                /* got an error reading the instruction */
2054                _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip);
2055        } else {
2056                /* didn't recognize the instruction */
2057                /* XXX quick hack for now: set the non-Java bit in the VSCR */
2058                printk_ratelimited(KERN_ERR "Unrecognized altivec instruction "
2059                                   "in %s at %lx\n", current->comm, regs->nip);
2060                current->thread.vr_state.vscr.u[3] |= 0x10000;
2061        }
2062}
2063#endif /* CONFIG_ALTIVEC */
2064
2065#ifdef CONFIG_FSL_BOOKE
2066void CacheLockingException(struct pt_regs *regs, unsigned long address,
2067                           unsigned long error_code)
2068{
2069        /* We treat cache locking instructions from the user
2070         * as priv ops, in the future we could try to do
2071         * something smarter
2072         */
2073        if (error_code & (ESR_DLK|ESR_ILK))
2074                _exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
2075        return;
2076}
2077#endif /* CONFIG_FSL_BOOKE */
2078
2079#ifdef CONFIG_SPE
2080void SPEFloatingPointException(struct pt_regs *regs)
2081{
2082        extern int do_spe_mathemu(struct pt_regs *regs);
2083        unsigned long spefscr;
2084        int fpexc_mode;
2085        int code = FPE_FLTUNK;
2086        int err;
2087
2088        /* We restore the interrupt state now */
2089        if (!arch_irq_disabled_regs(regs))
2090                local_irq_enable();
2091
2092        flush_spe_to_thread(current);
2093
2094        spefscr = current->thread.spefscr;
2095        fpexc_mode = current->thread.fpexc_mode;
2096
2097        if ((spefscr & SPEFSCR_FOVF) && (fpexc_mode & PR_FP_EXC_OVF)) {
2098                code = FPE_FLTOVF;
2099        }
2100        else if ((spefscr & SPEFSCR_FUNF) && (fpexc_mode & PR_FP_EXC_UND)) {
2101                code = FPE_FLTUND;
2102        }
2103        else if ((spefscr & SPEFSCR_FDBZ) && (fpexc_mode & PR_FP_EXC_DIV))
2104                code = FPE_FLTDIV;
2105        else if ((spefscr & SPEFSCR_FINV) && (fpexc_mode & PR_FP_EXC_INV)) {
2106                code = FPE_FLTINV;
2107        }
2108        else if ((spefscr & (SPEFSCR_FG | SPEFSCR_FX)) && (fpexc_mode & PR_FP_EXC_RES))
2109                code = FPE_FLTRES;
2110
2111        err = do_spe_mathemu(regs);
2112        if (err == 0) {
2113                regs->nip += 4;         /* skip emulated instruction */
2114                emulate_single_step(regs);
2115                return;
2116        }
2117
2118        if (err == -EFAULT) {
2119                /* got an error reading the instruction */
2120                _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip);
2121        } else if (err == -EINVAL) {
2122                /* didn't recognize the instruction */
2123                printk(KERN_ERR "unrecognized spe instruction "
2124                       "in %s at %lx\n", current->comm, regs->nip);
2125        } else {
2126                _exception(SIGFPE, regs, code, regs->nip);
2127        }
2128
2129        return;
2130}
2131
2132void SPEFloatingPointRoundException(struct pt_regs *regs)
2133{
2134        extern int speround_handler(struct pt_regs *regs);
2135        int err;
2136
2137        /* We restore the interrupt state now */
2138        if (!arch_irq_disabled_regs(regs))
2139                local_irq_enable();
2140
2141        preempt_disable();
2142        if (regs->msr & MSR_SPE)
2143                giveup_spe(current);
2144        preempt_enable();
2145
2146        regs->nip -= 4;
2147        err = speround_handler(regs);
2148        if (err == 0) {
2149                regs->nip += 4;         /* skip emulated instruction */
2150                emulate_single_step(regs);
2151                return;
2152        }
2153
2154        if (err == -EFAULT) {
2155                /* got an error reading the instruction */
2156                _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip);
2157        } else if (err == -EINVAL) {
2158                /* didn't recognize the instruction */
2159                printk(KERN_ERR "unrecognized spe instruction "
2160                       "in %s at %lx\n", current->comm, regs->nip);
2161        } else {
2162                _exception(SIGFPE, regs, FPE_FLTUNK, regs->nip);
2163                return;
2164        }
2165}
2166#endif
2167
2168/*
2169 * We enter here if we get an unrecoverable exception, that is, one
2170 * that happened at a point where the RI (recoverable interrupt) bit
2171 * in the MSR is 0.  This indicates that SRR0/1 are live, and that
2172 * we therefore lost state by taking this exception.
2173 */
2174void unrecoverable_exception(struct pt_regs *regs)
2175{
2176        pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n",
2177                 regs->trap, regs->nip, regs->msr);
2178        die("Unrecoverable exception", regs, SIGABRT);
2179}
2180NOKPROBE_SYMBOL(unrecoverable_exception);
2181
2182#if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x)
2183/*
2184 * Default handler for a Watchdog exception,
2185 * spins until a reboot occurs
2186 */
2187void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs)
2188{
2189        /* Generic WatchdogHandler, implement your own */
2190        mtspr(SPRN_TCR, mfspr(SPRN_TCR)&(~TCR_WIE));
2191        return;
2192}
2193
2194void WatchdogException(struct pt_regs *regs)
2195{
2196        printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n");
2197        WatchdogHandler(regs);
2198}
2199#endif
2200
2201/*
2202 * We enter here if we discover during exception entry that we are
2203 * running in supervisor mode with a userspace value in the stack pointer.
2204 */
2205void kernel_bad_stack(struct pt_regs *regs)
2206{
2207        printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n",
2208               regs->gpr[1], regs->nip);
2209        die("Bad kernel stack pointer", regs, SIGABRT);
2210}
2211NOKPROBE_SYMBOL(kernel_bad_stack);
2212
2213void __init trap_init(void)
2214{
2215}
2216
2217
2218#ifdef CONFIG_PPC_EMULATED_STATS
2219
2220#define WARN_EMULATED_SETUP(type)       .type = { .name = #type }
2221
2222struct ppc_emulated ppc_emulated = {
2223#ifdef CONFIG_ALTIVEC
2224        WARN_EMULATED_SETUP(altivec),
2225#endif
2226        WARN_EMULATED_SETUP(dcba),
2227        WARN_EMULATED_SETUP(dcbz),
2228        WARN_EMULATED_SETUP(fp_pair),
2229        WARN_EMULATED_SETUP(isel),
2230        WARN_EMULATED_SETUP(mcrxr),
2231        WARN_EMULATED_SETUP(mfpvr),
2232        WARN_EMULATED_SETUP(multiple),
2233        WARN_EMULATED_SETUP(popcntb),
2234        WARN_EMULATED_SETUP(spe),
2235        WARN_EMULATED_SETUP(string),
2236        WARN_EMULATED_SETUP(sync),
2237        WARN_EMULATED_SETUP(unaligned),
2238#ifdef CONFIG_MATH_EMULATION
2239        WARN_EMULATED_SETUP(math),
2240#endif
2241#ifdef CONFIG_VSX
2242        WARN_EMULATED_SETUP(vsx),
2243#endif
2244#ifdef CONFIG_PPC64
2245        WARN_EMULATED_SETUP(mfdscr),
2246        WARN_EMULATED_SETUP(mtdscr),
2247        WARN_EMULATED_SETUP(lq_stq),
2248        WARN_EMULATED_SETUP(lxvw4x),
2249        WARN_EMULATED_SETUP(lxvh8x),
2250        WARN_EMULATED_SETUP(lxvd2x),
2251        WARN_EMULATED_SETUP(lxvb16x),
2252#endif
2253};
2254
2255u32 ppc_warn_emulated;
2256
2257void ppc_warn_emulated_print(const char *type)
2258{
2259        pr_warn_ratelimited("%s used emulated %s instruction\n", current->comm,
2260                            type);
2261}
2262
2263static int __init ppc_warn_emulated_init(void)
2264{
2265        struct dentry *dir, *d;
2266        unsigned int i;
2267        struct ppc_emulated_entry *entries = (void *)&ppc_emulated;
2268
2269        if (!powerpc_debugfs_root)
2270                return -ENODEV;
2271
2272        dir = debugfs_create_dir("emulated_instructions",
2273                                 powerpc_debugfs_root);
2274        if (!dir)
2275                return -ENOMEM;
2276
2277        d = debugfs_create_u32("do_warn", 0644, dir,
2278                               &ppc_warn_emulated);
2279        if (!d)
2280                goto fail;
2281
2282        for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++) {
2283                d = debugfs_create_u32(entries[i].name, 0644, dir,
2284                                       (u32 *)&entries[i].val.counter);
2285                if (!d)
2286                        goto fail;
2287        }
2288
2289        return 0;
2290
2291fail:
2292        debugfs_remove_recursive(dir);
2293        return -ENOMEM;
2294}
2295
2296device_initcall(ppc_warn_emulated_init);
2297
2298#endif /* CONFIG_PPC_EMULATED_STATS */
2299