linux/arch/s390/kernel/nmi.c
<<
>>
Prefs
   1/*
   2 *   Machine check handler
   3 *
   4 *    Copyright IBM Corp. 2000, 2009
   5 *    Author(s): Ingo Adlung <adlung@de.ibm.com>,
   6 *               Martin Schwidefsky <schwidefsky@de.ibm.com>,
   7 *               Cornelia Huck <cornelia.huck@de.ibm.com>,
   8 *               Heiko Carstens <heiko.carstens@de.ibm.com>,
   9 */
  10
  11#include <linux/kernel_stat.h>
  12#include <linux/init.h>
  13#include <linux/errno.h>
  14#include <linux/hardirq.h>
  15#include <linux/time.h>
  16#include <linux/module.h>
  17#include <asm/lowcore.h>
  18#include <asm/smp.h>
  19#include <asm/stp.h>
  20#include <asm/cputime.h>
  21#include <asm/nmi.h>
  22#include <asm/crw.h>
  23#include <asm/switch_to.h>
  24#include <asm/ctl_reg.h>
  25
  26struct mcck_struct {
  27        unsigned int kill_task : 1;
  28        unsigned int channel_report : 1;
  29        unsigned int warning : 1;
  30        unsigned int stp_queue : 1;
  31        unsigned long mcck_code;
  32};
  33
  34static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
  35
  36static void s390_handle_damage(void)
  37{
  38        smp_send_stop();
  39        disabled_wait((unsigned long) __builtin_return_address(0));
  40        while (1);
  41}
  42
  43/*
  44 * Main machine check handler function. Will be called with interrupts enabled
  45 * or disabled and machine checks enabled or disabled.
  46 */
  47void s390_handle_mcck(void)
  48{
  49        unsigned long flags;
  50        struct mcck_struct mcck;
  51
  52        /*
  53         * Disable machine checks and get the current state of accumulated
  54         * machine checks. Afterwards delete the old state and enable machine
  55         * checks again.
  56         */
  57        local_irq_save(flags);
  58        local_mcck_disable();
  59        mcck = *this_cpu_ptr(&cpu_mcck);
  60        memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck));
  61        clear_cpu_flag(CIF_MCCK_PENDING);
  62        local_mcck_enable();
  63        local_irq_restore(flags);
  64
  65        if (mcck.channel_report)
  66                crw_handle_channel_report();
  67        /*
  68         * A warning may remain for a prolonged period on the bare iron.
  69         * (actually until the machine is powered off, or the problem is gone)
  70         * So we just stop listening for the WARNING MCH and avoid continuously
  71         * being interrupted.  One caveat is however, that we must do this per
  72         * processor and cannot use the smp version of ctl_clear_bit().
  73         * On VM we only get one interrupt per virtally presented machinecheck.
  74         * Though one suffices, we may get one interrupt per (virtual) cpu.
  75         */
  76        if (mcck.warning) {     /* WARNING pending ? */
  77                static int mchchk_wng_posted = 0;
  78
  79                /* Use single cpu clear, as we cannot handle smp here. */
  80                __ctl_clear_bit(14, 24);        /* Disable WARNING MCH */
  81                if (xchg(&mchchk_wng_posted, 1) == 0)
  82                        kill_cad_pid(SIGPWR, 1);
  83        }
  84        if (mcck.stp_queue)
  85                stp_queue_work();
  86        if (mcck.kill_task) {
  87                local_irq_enable();
  88                printk(KERN_EMERG "mcck: Terminating task because of machine "
  89                       "malfunction (code 0x%016lx).\n", mcck.mcck_code);
  90                printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
  91                       current->comm, current->pid);
  92                do_exit(SIGSEGV);
  93        }
  94}
  95EXPORT_SYMBOL_GPL(s390_handle_mcck);
  96
  97/*
  98 * returns 0 if all registers could be validated
  99 * returns 1 otherwise
 100 */
 101static int notrace s390_validate_registers(union mci mci)
 102{
 103        int kill_task;
 104        u64 zero;
 105        void *fpt_save_area, *fpt_creg_save_area;
 106
 107        kill_task = 0;
 108        zero = 0;
 109
 110        if (!mci.gr) {
 111                /*
 112                 * General purpose registers couldn't be restored and have
 113                 * unknown contents. Process needs to be terminated.
 114                 */
 115                kill_task = 1;
 116        }
 117        if (!mci.fp) {
 118                /*
 119                 * Floating point registers can't be restored and
 120                 * therefore the process needs to be terminated.
 121                 */
 122                kill_task = 1;
 123        }
 124        fpt_save_area = &S390_lowcore.floating_pt_save_area;
 125        fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
 126        if (!mci.fc) {
 127                /*
 128                 * Floating point control register can't be restored.
 129                 * Task will be terminated.
 130                 */
 131                asm volatile("lfpc 0(%0)" : : "a" (&zero), "m" (zero));
 132                kill_task = 1;
 133        } else
 134                asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area));
 135
 136        if (!MACHINE_HAS_VX) {
 137                /* Validate floating point registers */
 138                asm volatile(
 139                        "       ld      0,0(%0)\n"
 140                        "       ld      1,8(%0)\n"
 141                        "       ld      2,16(%0)\n"
 142                        "       ld      3,24(%0)\n"
 143                        "       ld      4,32(%0)\n"
 144                        "       ld      5,40(%0)\n"
 145                        "       ld      6,48(%0)\n"
 146                        "       ld      7,56(%0)\n"
 147                        "       ld      8,64(%0)\n"
 148                        "       ld      9,72(%0)\n"
 149                        "       ld      10,80(%0)\n"
 150                        "       ld      11,88(%0)\n"
 151                        "       ld      12,96(%0)\n"
 152                        "       ld      13,104(%0)\n"
 153                        "       ld      14,112(%0)\n"
 154                        "       ld      15,120(%0)\n"
 155                        : : "a" (fpt_save_area));
 156        } else {
 157                /* Validate vector registers */
 158                union ctlreg0 cr0;
 159
 160                if (!mci.vr) {
 161                        /*
 162                         * Vector registers can't be restored and therefore
 163                         * the process needs to be terminated.
 164                         */
 165                        kill_task = 1;
 166                }
 167                cr0.val = S390_lowcore.cregs_save_area[0];
 168                cr0.afp = cr0.vx = 1;
 169                __ctl_load(cr0.val, 0, 0);
 170                asm volatile(
 171                        "       la      1,%0\n"
 172                        "       .word   0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */
 173                        "       .word   0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */
 174                        : : "Q" (*(struct vx_array *)
 175                                 &S390_lowcore.vector_save_area) : "1");
 176                __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
 177        }
 178        /* Validate access registers */
 179        asm volatile(
 180                "       lam     0,15,0(%0)"
 181                : : "a" (&S390_lowcore.access_regs_save_area));
 182        if (!mci.ar) {
 183                /*
 184                 * Access registers have unknown contents.
 185                 * Terminating task.
 186                 */
 187                kill_task = 1;
 188        }
 189        /* Validate control registers */
 190        if (!mci.cr) {
 191                /*
 192                 * Control registers have unknown contents.
 193                 * Can't recover and therefore stopping machine.
 194                 */
 195                s390_handle_damage();
 196        } else {
 197                asm volatile(
 198                        "       lctlg   0,15,0(%0)"
 199                        : : "a" (&S390_lowcore.cregs_save_area));
 200        }
 201        /*
 202         * We don't even try to validate the TOD register, since we simply
 203         * can't write something sensible into that register.
 204         */
 205        /*
 206         * See if we can validate the TOD programmable register with its
 207         * old contents (should be zero) otherwise set it to zero.
 208         */
 209        if (!mci.pr)
 210                asm volatile(
 211                        "       sr      0,0\n"
 212                        "       sckpf"
 213                        : : : "0", "cc");
 214        else
 215                asm volatile(
 216                        "       l       0,0(%0)\n"
 217                        "       sckpf"
 218                        : : "a" (&S390_lowcore.tod_progreg_save_area)
 219                        : "0", "cc");
 220        /* Validate clock comparator register */
 221        set_clock_comparator(S390_lowcore.clock_comparator);
 222        /* Check if old PSW is valid */
 223        if (!mci.wp)
 224                /*
 225                 * Can't tell if we come from user or kernel mode
 226                 * -> stopping machine.
 227                 */
 228                s390_handle_damage();
 229
 230        if (!mci.ms || !mci.pm || !mci.ia)
 231                kill_task = 1;
 232
 233        return kill_task;
 234}
 235
 236#define MAX_IPD_COUNT   29
 237#define MAX_IPD_TIME    (5 * 60 * USEC_PER_SEC) /* 5 minutes */
 238
 239#define ED_STP_ISLAND   6       /* External damage STP island check */
 240#define ED_STP_SYNC     7       /* External damage STP sync check */
 241
 242/*
 243 * machine check handler.
 244 */
 245void notrace s390_do_machine_check(struct pt_regs *regs)
 246{
 247        static int ipd_count;
 248        static DEFINE_SPINLOCK(ipd_lock);
 249        static unsigned long long last_ipd;
 250        struct mcck_struct *mcck;
 251        unsigned long long tmp;
 252        union mci mci;
 253        int umode;
 254
 255        nmi_enter();
 256        inc_irq_stat(NMI_NMI);
 257        mci.val = S390_lowcore.mcck_interruption_code;
 258        mcck = this_cpu_ptr(&cpu_mcck);
 259        umode = user_mode(regs);
 260
 261        if (mci.sd) {
 262                /* System damage -> stopping machine */
 263                s390_handle_damage();
 264        }
 265        if (mci.pd) {
 266                if (mci.b) {
 267                        /* Processing backup -> verify if we can survive this */
 268                        u64 z_mcic, o_mcic, t_mcic;
 269                        z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
 270                        o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
 271                                  1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
 272                                  1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
 273                                  1ULL<<16);
 274                        t_mcic = mci.val;
 275
 276                        if (((t_mcic & z_mcic) != 0) ||
 277                            ((t_mcic & o_mcic) != o_mcic)) {
 278                                s390_handle_damage();
 279                        }
 280
 281                        /*
 282                         * Nullifying exigent condition, therefore we might
 283                         * retry this instruction.
 284                         */
 285                        spin_lock(&ipd_lock);
 286                        tmp = get_tod_clock();
 287                        if (((tmp - last_ipd) >> 12) < MAX_IPD_TIME)
 288                                ipd_count++;
 289                        else
 290                                ipd_count = 1;
 291                        last_ipd = tmp;
 292                        if (ipd_count == MAX_IPD_COUNT)
 293                                s390_handle_damage();
 294                        spin_unlock(&ipd_lock);
 295                } else {
 296                        /* Processing damage -> stopping machine */
 297                        s390_handle_damage();
 298                }
 299        }
 300        if (s390_validate_registers(mci)) {
 301                if (umode) {
 302                        /*
 303                         * Couldn't restore all register contents while in
 304                         * user mode -> mark task for termination.
 305                         */
 306                        mcck->kill_task = 1;
 307                        mcck->mcck_code = mci.val;
 308                        set_cpu_flag(CIF_MCCK_PENDING);
 309                } else {
 310                        /*
 311                         * Couldn't restore all register contents while in
 312                         * kernel mode -> stopping machine.
 313                         */
 314                        s390_handle_damage();
 315                }
 316        }
 317        if (mci.cd) {
 318                /* Timing facility damage */
 319                s390_handle_damage();
 320        }
 321        if (mci.ed && mci.ec) {
 322                /* External damage */
 323                if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
 324                        mcck->stp_queue |= stp_sync_check();
 325                if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
 326                        mcck->stp_queue |= stp_island_check();
 327                if (mcck->stp_queue)
 328                        set_cpu_flag(CIF_MCCK_PENDING);
 329        }
 330        if (mci.se)
 331                /* Storage error uncorrected */
 332                s390_handle_damage();
 333        if (mci.ke)
 334                /* Storage key-error uncorrected */
 335                s390_handle_damage();
 336        if (mci.ds && mci.fa)
 337                /* Storage degradation */
 338                s390_handle_damage();
 339        if (mci.cp) {
 340                /* Channel report word pending */
 341                mcck->channel_report = 1;
 342                set_cpu_flag(CIF_MCCK_PENDING);
 343        }
 344        if (mci.w) {
 345                /* Warning pending */
 346                mcck->warning = 1;
 347                set_cpu_flag(CIF_MCCK_PENDING);
 348        }
 349        nmi_exit();
 350}
 351
 352static int __init machine_check_init(void)
 353{
 354        ctl_set_bit(14, 25);    /* enable external damage MCH */
 355        ctl_set_bit(14, 27);    /* enable system recovery MCH */
 356        ctl_set_bit(14, 24);    /* enable warning MCH */
 357        return 0;
 358}
 359early_initcall(machine_check_init);
 360