linux/arch/mips/dec/ecc-berr.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *      Bus error event handling code for systems equipped with ECC
   4 *      handling logic, i.e. DECstation/DECsystem 5000/200 (KN02),
   5 *      5000/240 (KN03), 5000/260 (KN05) and DECsystem 5900 (KN03),
   6 *      5900/260 (KN05) systems.
   7 *
   8 *      Copyright (c) 2003, 2005  Maciej W. Rozycki
   9 */
  10
  11#include <linux/init.h>
  12#include <linux/interrupt.h>
  13#include <linux/kernel.h>
  14#include <linux/sched.h>
  15#include <linux/types.h>
  16
  17#include <asm/addrspace.h>
  18#include <asm/bootinfo.h>
  19#include <asm/cpu.h>
  20#include <asm/cpu-type.h>
  21#include <asm/irq_regs.h>
  22#include <asm/processor.h>
  23#include <asm/ptrace.h>
  24#include <asm/traps.h>
  25
  26#include <asm/dec/ecc.h>
  27#include <asm/dec/kn02.h>
  28#include <asm/dec/kn03.h>
  29#include <asm/dec/kn05.h>
  30
  31static volatile u32 *kn0x_erraddr;
  32static volatile u32 *kn0x_chksyn;
  33
  34static inline void dec_ecc_be_ack(void)
  35{
  36        *kn0x_erraddr = 0;                      /* any write clears the IRQ */
  37        iob();
  38}
  39
  40static int dec_ecc_be_backend(struct pt_regs *regs, int is_fixup, int invoker)
  41{
  42        static const char excstr[] = "exception";
  43        static const char intstr[] = "interrupt";
  44        static const char cpustr[] = "CPU";
  45        static const char dmastr[] = "DMA";
  46        static const char readstr[] = "read";
  47        static const char mreadstr[] = "memory read";
  48        static const char writestr[] = "write";
  49        static const char mwritstr[] = "partial memory write";
  50        static const char timestr[] = "timeout";
  51        static const char overstr[] = "overrun";
  52        static const char eccstr[] = "ECC error";
  53
  54        const char *kind, *agent, *cycle, *event;
  55        const char *status = "", *xbit = "", *fmt = "";
  56        unsigned long address;
  57        u16 syn = 0, sngl;
  58
  59        int i = 0;
  60
  61        u32 erraddr = *kn0x_erraddr;
  62        u32 chksyn = *kn0x_chksyn;
  63        int action = MIPS_BE_FATAL;
  64
  65        /* For non-ECC ack ASAP, so that any subsequent errors get caught. */
  66        if ((erraddr & (KN0X_EAR_VALID | KN0X_EAR_ECCERR)) == KN0X_EAR_VALID)
  67                dec_ecc_be_ack();
  68
  69        kind = invoker ? intstr : excstr;
  70
  71        if (!(erraddr & KN0X_EAR_VALID)) {
  72                /* No idea what happened. */
  73                printk(KERN_ALERT "Unidentified bus error %s\n", kind);
  74                return action;
  75        }
  76
  77        agent = (erraddr & KN0X_EAR_CPU) ? cpustr : dmastr;
  78
  79        if (erraddr & KN0X_EAR_ECCERR) {
  80                /* An ECC error on a CPU or DMA transaction. */
  81                cycle = (erraddr & KN0X_EAR_WRITE) ? mwritstr : mreadstr;
  82                event = eccstr;
  83        } else {
  84                /* A CPU timeout or a DMA overrun. */
  85                cycle = (erraddr & KN0X_EAR_WRITE) ? writestr : readstr;
  86                event = (erraddr & KN0X_EAR_CPU) ? timestr : overstr;
  87        }
  88
  89        address = erraddr & KN0X_EAR_ADDRESS;
  90        /* For ECC errors on reads adjust for MT pipelining. */
  91        if ((erraddr & (KN0X_EAR_WRITE | KN0X_EAR_ECCERR)) == KN0X_EAR_ECCERR)
  92                address = (address & ~0xfffLL) | ((address - 5) & 0xfffLL);
  93        address <<= 2;
  94
  95        /* Only CPU errors are fixable. */
  96        if (erraddr & KN0X_EAR_CPU && is_fixup)
  97                action = MIPS_BE_FIXUP;
  98
  99        if (erraddr & KN0X_EAR_ECCERR) {
 100                static const u8 data_sbit[32] = {
 101                        0x4f, 0x4a, 0x52, 0x54, 0x57, 0x58, 0x5b, 0x5d,
 102                        0x23, 0x25, 0x26, 0x29, 0x2a, 0x2c, 0x31, 0x34,
 103                        0x0e, 0x0b, 0x13, 0x15, 0x16, 0x19, 0x1a, 0x1c,
 104                        0x62, 0x64, 0x67, 0x68, 0x6b, 0x6d, 0x70, 0x75,
 105                };
 106                static const u8 data_mbit[25] = {
 107                        0x07, 0x0d, 0x1f,
 108                        0x2f, 0x32, 0x37, 0x38, 0x3b, 0x3d, 0x3e,
 109                        0x43, 0x45, 0x46, 0x49, 0x4c, 0x51, 0x5e,
 110                        0x61, 0x6e, 0x73, 0x76, 0x79, 0x7a, 0x7c, 0x7f,
 111                };
 112                static const char sbestr[] = "corrected single";
 113                static const char dbestr[] = "uncorrectable double";
 114                static const char mbestr[] = "uncorrectable multiple";
 115
 116                if (!(address & 0x4))
 117                        syn = chksyn;                   /* Low bank. */
 118                else
 119                        syn = chksyn >> 16;             /* High bank. */
 120
 121                if (!(syn & KN0X_ESR_VLDLO)) {
 122                        /* Ack now, no rewrite will happen. */
 123                        dec_ecc_be_ack();
 124
 125                        fmt = KERN_ALERT "%s" "invalid\n";
 126                } else {
 127                        sngl = syn & KN0X_ESR_SNGLO;
 128                        syn &= KN0X_ESR_SYNLO;
 129
 130                        /*
 131                         * Multibit errors may be tagged incorrectly;
 132                         * check the syndrome explicitly.
 133                         */
 134                        for (i = 0; i < 25; i++)
 135                                if (syn == data_mbit[i])
 136                                        break;
 137
 138                        if (i < 25) {
 139                                status = mbestr;
 140                        } else if (!sngl) {
 141                                status = dbestr;
 142                        } else {
 143                                volatile u32 *ptr =
 144                                        (void *)CKSEG1ADDR(address);
 145
 146                                *ptr = *ptr;            /* Rewrite. */
 147                                iob();
 148
 149                                status = sbestr;
 150                                action = MIPS_BE_DISCARD;
 151                        }
 152
 153                        /* Ack now, now we've rewritten (or not). */
 154                        dec_ecc_be_ack();
 155
 156                        if (syn && syn == (syn & -syn)) {
 157                                if (syn == 0x01) {
 158                                        fmt = KERN_ALERT "%s"
 159                                              "%#04x -- %s bit error "
 160                                              "at check bit C%s\n";
 161                                        xbit = "X";
 162                                } else {
 163                                        fmt = KERN_ALERT "%s"
 164                                              "%#04x -- %s bit error "
 165                                              "at check bit C%s%u\n";
 166                                }
 167                                i = syn >> 2;
 168                        } else {
 169                                for (i = 0; i < 32; i++)
 170                                        if (syn == data_sbit[i])
 171                                                break;
 172                                if (i < 32)
 173                                        fmt = KERN_ALERT "%s"
 174                                              "%#04x -- %s bit error "
 175                                              "at data bit D%s%u\n";
 176                                else
 177                                        fmt = KERN_ALERT "%s"
 178                                              "%#04x -- %s bit error\n";
 179                        }
 180                }
 181        }
 182
 183        if (action != MIPS_BE_FIXUP)
 184                printk(KERN_ALERT "Bus error %s: %s %s %s at %#010lx\n",
 185                        kind, agent, cycle, event, address);
 186
 187        if (action != MIPS_BE_FIXUP && erraddr & KN0X_EAR_ECCERR)
 188                printk(fmt, "  ECC syndrome ", syn, status, xbit, i);
 189
 190        return action;
 191}
 192
 193int dec_ecc_be_handler(struct pt_regs *regs, int is_fixup)
 194{
 195        return dec_ecc_be_backend(regs, is_fixup, 0);
 196}
 197
 198irqreturn_t dec_ecc_be_interrupt(int irq, void *dev_id)
 199{
 200        struct pt_regs *regs = get_irq_regs();
 201
 202        int action = dec_ecc_be_backend(regs, 0, 1);
 203
 204        if (action == MIPS_BE_DISCARD)
 205                return IRQ_HANDLED;
 206
 207        /*
 208         * FIXME: Find the affected processes and kill them, otherwise
 209         * we must die.
 210         *
 211         * The interrupt is asynchronously delivered thus EPC and RA
 212         * may be irrelevant, but are printed for a reference.
 213         */
 214        printk(KERN_ALERT "Fatal bus interrupt, epc == %08lx, ra == %08lx\n",
 215               regs->cp0_epc, regs->regs[31]);
 216        die("Unrecoverable bus error", regs);
 217}
 218
 219
 220/*
 221 * Initialization differs a bit between KN02 and KN03/KN05, so we
 222 * need two variants.  Once set up, all systems can be handled the
 223 * same way.
 224 */
 225static inline void dec_kn02_be_init(void)
 226{
 227        volatile u32 *csr = (void *)CKSEG1ADDR(KN02_SLOT_BASE + KN02_CSR);
 228
 229        kn0x_erraddr = (void *)CKSEG1ADDR(KN02_SLOT_BASE + KN02_ERRADDR);
 230        kn0x_chksyn = (void *)CKSEG1ADDR(KN02_SLOT_BASE + KN02_CHKSYN);
 231
 232        /* Preset write-only bits of the Control Register cache. */
 233        cached_kn02_csr = *csr | KN02_CSR_LEDS;
 234
 235        /* Set normal ECC detection and generation. */
 236        cached_kn02_csr &= ~(KN02_CSR_DIAGCHK | KN02_CSR_DIAGGEN);
 237        /* Enable ECC correction. */
 238        cached_kn02_csr |= KN02_CSR_CORRECT;
 239        *csr = cached_kn02_csr;
 240        iob();
 241}
 242
 243static inline void dec_kn03_be_init(void)
 244{
 245        volatile u32 *mcr = (void *)CKSEG1ADDR(KN03_SLOT_BASE + IOASIC_MCR);
 246        volatile u32 *mbcs = (void *)CKSEG1ADDR(KN4K_SLOT_BASE + KN4K_MB_CSR);
 247
 248        kn0x_erraddr = (void *)CKSEG1ADDR(KN03_SLOT_BASE + IOASIC_ERRADDR);
 249        kn0x_chksyn = (void *)CKSEG1ADDR(KN03_SLOT_BASE + IOASIC_CHKSYN);
 250
 251        /*
 252         * Set normal ECC detection and generation, enable ECC correction.
 253         * For KN05 we also need to make sure EE (?) is enabled in the MB.
 254         * Otherwise DBE/IBE exceptions would be masked but bus error
 255         * interrupts would still arrive, resulting in an inevitable crash
 256         * if get_dbe() triggers one.
 257         */
 258        *mcr = (*mcr & ~(KN03_MCR_DIAGCHK | KN03_MCR_DIAGGEN)) |
 259               KN03_MCR_CORRECT;
 260        if (current_cpu_type() == CPU_R4400SC)
 261                *mbcs |= KN4K_MB_CSR_EE;
 262        fast_iob();
 263}
 264
 265void __init dec_ecc_be_init(void)
 266{
 267        if (mips_machtype == MACH_DS5000_200)
 268                dec_kn02_be_init();
 269        else
 270                dec_kn03_be_init();
 271
 272        /* Clear any leftover errors from the firmware. */
 273        dec_ecc_be_ack();
 274}
 275