linux/arch/s390/kernel/nmi.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *   Machine check handler
   4 *
   5 *    Copyright IBM Corp. 2000, 2009
   6 *    Author(s): Ingo Adlung <adlung@de.ibm.com>,
   7 *               Martin Schwidefsky <schwidefsky@de.ibm.com>,
   8 *               Cornelia Huck <cornelia.huck@de.ibm.com>,
   9 *               Heiko Carstens <heiko.carstens@de.ibm.com>,
  10 */
  11
  12#include <linux/kernel_stat.h>
  13#include <linux/init.h>
  14#include <linux/errno.h>
  15#include <linux/hardirq.h>
  16#include <linux/log2.h>
  17#include <linux/kprobes.h>
  18#include <linux/kmemleak.h>
  19#include <linux/time.h>
  20#include <linux/module.h>
  21#include <linux/sched/signal.h>
  22
  23#include <linux/export.h>
  24#include <asm/lowcore.h>
  25#include <asm/smp.h>
  26#include <asm/stp.h>
  27#include <asm/cputime.h>
  28#include <asm/nmi.h>
  29#include <asm/crw.h>
  30#include <asm/switch_to.h>
  31#include <asm/ctl_reg.h>
  32#include <asm/asm-offsets.h>
  33#include <linux/kvm_host.h>
  34
  35struct mcck_struct {
  36        unsigned int kill_task : 1;
  37        unsigned int channel_report : 1;
  38        unsigned int warning : 1;
  39        unsigned int stp_queue : 1;
  40        unsigned long mcck_code;
  41};
  42
  43static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
  44static struct kmem_cache *mcesa_cache;
  45static unsigned long mcesa_origin_lc;
  46
  47static inline int nmi_needs_mcesa(void)
  48{
  49        return MACHINE_HAS_VX || MACHINE_HAS_GS;
  50}
  51
  52static inline unsigned long nmi_get_mcesa_size(void)
  53{
  54        if (MACHINE_HAS_GS)
  55                return MCESA_MAX_SIZE;
  56        return MCESA_MIN_SIZE;
  57}
  58
  59/*
  60 * The initial machine check extended save area for the boot CPU.
  61 * It will be replaced by nmi_init() with an allocated structure.
  62 * The structure is required for machine check happening early in
  63 * the boot process.
  64 */
  65static struct mcesa boot_mcesa __initdata __aligned(MCESA_MAX_SIZE);
  66
  67void __init nmi_alloc_boot_cpu(struct lowcore *lc)
  68{
  69        if (!nmi_needs_mcesa())
  70                return;
  71        lc->mcesad = (unsigned long) &boot_mcesa;
  72        if (MACHINE_HAS_GS)
  73                lc->mcesad |= ilog2(MCESA_MAX_SIZE);
  74}
  75
  76static int __init nmi_init(void)
  77{
  78        unsigned long origin, cr0, size;
  79
  80        if (!nmi_needs_mcesa())
  81                return 0;
  82        size = nmi_get_mcesa_size();
  83        if (size > MCESA_MIN_SIZE)
  84                mcesa_origin_lc = ilog2(size);
  85        /* create slab cache for the machine-check-extended-save-areas */
  86        mcesa_cache = kmem_cache_create("nmi_save_areas", size, size, 0, NULL);
  87        if (!mcesa_cache)
  88                panic("Couldn't create nmi save area cache");
  89        origin = (unsigned long) kmem_cache_alloc(mcesa_cache, GFP_KERNEL);
  90        if (!origin)
  91                panic("Couldn't allocate nmi save area");
  92        /* The pointer is stored with mcesa_bits ORed in */
  93        kmemleak_not_leak((void *) origin);
  94        __ctl_store(cr0, 0, 0);
  95        __ctl_clear_bit(0, 28); /* disable lowcore protection */
  96        /* Replace boot_mcesa on the boot CPU */
  97        S390_lowcore.mcesad = origin | mcesa_origin_lc;
  98        __ctl_load(cr0, 0, 0);
  99        return 0;
 100}
 101early_initcall(nmi_init);
 102
 103int nmi_alloc_per_cpu(struct lowcore *lc)
 104{
 105        unsigned long origin;
 106
 107        if (!nmi_needs_mcesa())
 108                return 0;
 109        origin = (unsigned long) kmem_cache_alloc(mcesa_cache, GFP_KERNEL);
 110        if (!origin)
 111                return -ENOMEM;
 112        /* The pointer is stored with mcesa_bits ORed in */
 113        kmemleak_not_leak((void *) origin);
 114        lc->mcesad = origin | mcesa_origin_lc;
 115        return 0;
 116}
 117
 118void nmi_free_per_cpu(struct lowcore *lc)
 119{
 120        if (!nmi_needs_mcesa())
 121                return;
 122        kmem_cache_free(mcesa_cache, (void *)(lc->mcesad & MCESA_ORIGIN_MASK));
 123}
 124
 125static notrace void s390_handle_damage(void)
 126{
 127        smp_emergency_stop();
 128        disabled_wait();
 129        while (1);
 130}
 131NOKPROBE_SYMBOL(s390_handle_damage);
 132
 133/*
 134 * Main machine check handler function. Will be called with interrupts enabled
 135 * or disabled and machine checks enabled or disabled.
 136 */
 137void s390_handle_mcck(void)
 138{
 139        unsigned long flags;
 140        struct mcck_struct mcck;
 141
 142        /*
 143         * Disable machine checks and get the current state of accumulated
 144         * machine checks. Afterwards delete the old state and enable machine
 145         * checks again.
 146         */
 147        local_irq_save(flags);
 148        local_mcck_disable();
 149        mcck = *this_cpu_ptr(&cpu_mcck);
 150        memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck));
 151        clear_cpu_flag(CIF_MCCK_PENDING);
 152        local_mcck_enable();
 153        local_irq_restore(flags);
 154
 155        if (mcck.channel_report)
 156                crw_handle_channel_report();
 157        /*
 158         * A warning may remain for a prolonged period on the bare iron.
 159         * (actually until the machine is powered off, or the problem is gone)
 160         * So we just stop listening for the WARNING MCH and avoid continuously
 161         * being interrupted.  One caveat is however, that we must do this per
 162         * processor and cannot use the smp version of ctl_clear_bit().
 163         * On VM we only get one interrupt per virtally presented machinecheck.
 164         * Though one suffices, we may get one interrupt per (virtual) cpu.
 165         */
 166        if (mcck.warning) {     /* WARNING pending ? */
 167                static int mchchk_wng_posted = 0;
 168
 169                /* Use single cpu clear, as we cannot handle smp here. */
 170                __ctl_clear_bit(14, 24);        /* Disable WARNING MCH */
 171                if (xchg(&mchchk_wng_posted, 1) == 0)
 172                        kill_cad_pid(SIGPWR, 1);
 173        }
 174        if (mcck.stp_queue)
 175                stp_queue_work();
 176        if (mcck.kill_task) {
 177                local_irq_enable();
 178                printk(KERN_EMERG "mcck: Terminating task because of machine "
 179                       "malfunction (code 0x%016lx).\n", mcck.mcck_code);
 180                printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
 181                       current->comm, current->pid);
 182                do_exit(SIGSEGV);
 183        }
 184}
 185EXPORT_SYMBOL_GPL(s390_handle_mcck);
 186
 187/*
 188 * returns 0 if all required registers are available
 189 * returns 1 otherwise
 190 */
 191static int notrace s390_check_registers(union mci mci, int umode)
 192{
 193        union ctlreg2 cr2;
 194        int kill_task;
 195
 196        kill_task = 0;
 197
 198        if (!mci.gr) {
 199                /*
 200                 * General purpose registers couldn't be restored and have
 201                 * unknown contents. Stop system or terminate process.
 202                 */
 203                if (!umode)
 204                        s390_handle_damage();
 205                kill_task = 1;
 206        }
 207        /* Check control registers */
 208        if (!mci.cr) {
 209                /*
 210                 * Control registers have unknown contents.
 211                 * Can't recover and therefore stopping machine.
 212                 */
 213                s390_handle_damage();
 214        }
 215        if (!mci.fp) {
 216                /*
 217                 * Floating point registers can't be restored. If the
 218                 * kernel currently uses floating point registers the
 219                 * system is stopped. If the process has its floating
 220                 * pointer registers loaded it is terminated.
 221                 */
 222                if (S390_lowcore.fpu_flags & KERNEL_VXR_V0V7)
 223                        s390_handle_damage();
 224                if (!test_cpu_flag(CIF_FPU))
 225                        kill_task = 1;
 226        }
 227        if (!mci.fc) {
 228                /*
 229                 * Floating point control register can't be restored.
 230                 * If the kernel currently uses the floating pointer
 231                 * registers and needs the FPC register the system is
 232                 * stopped. If the process has its floating pointer
 233                 * registers loaded it is terminated.
 234                 */
 235                if (S390_lowcore.fpu_flags & KERNEL_FPC)
 236                        s390_handle_damage();
 237                if (!test_cpu_flag(CIF_FPU))
 238                        kill_task = 1;
 239        }
 240
 241        if (MACHINE_HAS_VX) {
 242                if (!mci.vr) {
 243                        /*
 244                         * Vector registers can't be restored. If the kernel
 245                         * currently uses vector registers the system is
 246                         * stopped. If the process has its vector registers
 247                         * loaded it is terminated.
 248                         */
 249                        if (S390_lowcore.fpu_flags & KERNEL_VXR)
 250                                s390_handle_damage();
 251                        if (!test_cpu_flag(CIF_FPU))
 252                                kill_task = 1;
 253                }
 254        }
 255        /* Check if access registers are valid */
 256        if (!mci.ar) {
 257                /*
 258                 * Access registers have unknown contents.
 259                 * Terminating task.
 260                 */
 261                kill_task = 1;
 262        }
 263        /* Check guarded storage registers */
 264        cr2.val = S390_lowcore.cregs_save_area[2];
 265        if (cr2.gse) {
 266                if (!mci.gs) {
 267                        /*
 268                         * Guarded storage register can't be restored and
 269                         * the current processes uses guarded storage.
 270                         * It has to be terminated.
 271                         */
 272                        kill_task = 1;
 273                }
 274        }
 275        /* Check if old PSW is valid */
 276        if (!mci.wp) {
 277                /*
 278                 * Can't tell if we come from user or kernel mode
 279                 * -> stopping machine.
 280                 */
 281                s390_handle_damage();
 282        }
 283        /* Check for invalid kernel instruction address */
 284        if (!mci.ia && !umode) {
 285                /*
 286                 * The instruction address got lost while running
 287                 * in the kernel -> stopping machine.
 288                 */
 289                s390_handle_damage();
 290        }
 291
 292        if (!mci.ms || !mci.pm || !mci.ia)
 293                kill_task = 1;
 294
 295        return kill_task;
 296}
 297NOKPROBE_SYMBOL(s390_check_registers);
 298
 299/*
 300 * Backup the guest's machine check info to its description block
 301 */
 302static void notrace s390_backup_mcck_info(struct pt_regs *regs)
 303{
 304        struct mcck_volatile_info *mcck_backup;
 305        struct sie_page *sie_page;
 306
 307        /* r14 contains the sie block, which was set in sie64a */
 308        struct kvm_s390_sie_block *sie_block =
 309                        (struct kvm_s390_sie_block *) regs->gprs[14];
 310
 311        if (sie_block == NULL)
 312                /* Something's seriously wrong, stop system. */
 313                s390_handle_damage();
 314
 315        sie_page = container_of(sie_block, struct sie_page, sie_block);
 316        mcck_backup = &sie_page->mcck_info;
 317        mcck_backup->mcic = S390_lowcore.mcck_interruption_code &
 318                                ~(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE);
 319        mcck_backup->ext_damage_code = S390_lowcore.external_damage_code;
 320        mcck_backup->failing_storage_address
 321                        = S390_lowcore.failing_storage_address;
 322}
 323NOKPROBE_SYMBOL(s390_backup_mcck_info);
 324
 325#define MAX_IPD_COUNT   29
 326#define MAX_IPD_TIME    (5 * 60 * USEC_PER_SEC) /* 5 minutes */
 327
 328#define ED_STP_ISLAND   6       /* External damage STP island check */
 329#define ED_STP_SYNC     7       /* External damage STP sync check */
 330
 331#define MCCK_CODE_NO_GUEST      (MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE)
 332
 333/*
 334 * machine check handler.
 335 */
 336void notrace s390_do_machine_check(struct pt_regs *regs)
 337{
 338        static int ipd_count;
 339        static DEFINE_SPINLOCK(ipd_lock);
 340        static unsigned long long last_ipd;
 341        struct mcck_struct *mcck;
 342        unsigned long long tmp;
 343        union mci mci;
 344        unsigned long mcck_dam_code;
 345
 346        nmi_enter();
 347        inc_irq_stat(NMI_NMI);
 348        mci.val = S390_lowcore.mcck_interruption_code;
 349        mcck = this_cpu_ptr(&cpu_mcck);
 350
 351        if (mci.sd) {
 352                /* System damage -> stopping machine */
 353                s390_handle_damage();
 354        }
 355
 356        /*
 357         * Reinject the instruction processing damages' machine checks
 358         * including Delayed Access Exception into the guest
 359         * instead of damaging the host if they happen in the guest.
 360         */
 361        if (mci.pd && !test_cpu_flag(CIF_MCCK_GUEST)) {
 362                if (mci.b) {
 363                        /* Processing backup -> verify if we can survive this */
 364                        u64 z_mcic, o_mcic, t_mcic;
 365                        z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
 366                        o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
 367                                  1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
 368                                  1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
 369                                  1ULL<<16);
 370                        t_mcic = mci.val;
 371
 372                        if (((t_mcic & z_mcic) != 0) ||
 373                            ((t_mcic & o_mcic) != o_mcic)) {
 374                                s390_handle_damage();
 375                        }
 376
 377                        /*
 378                         * Nullifying exigent condition, therefore we might
 379                         * retry this instruction.
 380                         */
 381                        spin_lock(&ipd_lock);
 382                        tmp = get_tod_clock();
 383                        if (((tmp - last_ipd) >> 12) < MAX_IPD_TIME)
 384                                ipd_count++;
 385                        else
 386                                ipd_count = 1;
 387                        last_ipd = tmp;
 388                        if (ipd_count == MAX_IPD_COUNT)
 389                                s390_handle_damage();
 390                        spin_unlock(&ipd_lock);
 391                } else {
 392                        /* Processing damage -> stopping machine */
 393                        s390_handle_damage();
 394                }
 395        }
 396        if (s390_check_registers(mci, user_mode(regs))) {
 397                /*
 398                 * Couldn't restore all register contents for the
 399                 * user space process -> mark task for termination.
 400                 */
 401                mcck->kill_task = 1;
 402                mcck->mcck_code = mci.val;
 403                set_cpu_flag(CIF_MCCK_PENDING);
 404        }
 405
 406        /*
 407         * Backup the machine check's info if it happens when the guest
 408         * is running.
 409         */
 410        if (test_cpu_flag(CIF_MCCK_GUEST))
 411                s390_backup_mcck_info(regs);
 412
 413        if (mci.cd) {
 414                /* Timing facility damage */
 415                s390_handle_damage();
 416        }
 417        if (mci.ed && mci.ec) {
 418                /* External damage */
 419                if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
 420                        mcck->stp_queue |= stp_sync_check();
 421                if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
 422                        mcck->stp_queue |= stp_island_check();
 423                if (mcck->stp_queue)
 424                        set_cpu_flag(CIF_MCCK_PENDING);
 425        }
 426
 427        /*
 428         * Reinject storage related machine checks into the guest if they
 429         * happen when the guest is running.
 430         */
 431        if (!test_cpu_flag(CIF_MCCK_GUEST)) {
 432                if (mci.se)
 433                        /* Storage error uncorrected */
 434                        s390_handle_damage();
 435                if (mci.ke)
 436                        /* Storage key-error uncorrected */
 437                        s390_handle_damage();
 438                if (mci.ds && mci.fa)
 439                        /* Storage degradation */
 440                        s390_handle_damage();
 441        }
 442        if (mci.cp) {
 443                /* Channel report word pending */
 444                mcck->channel_report = 1;
 445                set_cpu_flag(CIF_MCCK_PENDING);
 446        }
 447        if (mci.w) {
 448                /* Warning pending */
 449                mcck->warning = 1;
 450                set_cpu_flag(CIF_MCCK_PENDING);
 451        }
 452
 453        /*
 454         * If there are only Channel Report Pending and External Damage
 455         * machine checks, they will not be reinjected into the guest
 456         * because they refer to host conditions only.
 457         */
 458        mcck_dam_code = (mci.val & MCIC_SUBCLASS_MASK);
 459        if (test_cpu_flag(CIF_MCCK_GUEST) &&
 460        (mcck_dam_code & MCCK_CODE_NO_GUEST) != mcck_dam_code) {
 461                /* Set exit reason code for host's later handling */
 462                *((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR;
 463        }
 464        clear_cpu_flag(CIF_MCCK_GUEST);
 465        nmi_exit();
 466}
 467NOKPROBE_SYMBOL(s390_do_machine_check);
 468
 469static int __init machine_check_init(void)
 470{
 471        ctl_set_bit(14, 25);    /* enable external damage MCH */
 472        ctl_set_bit(14, 27);    /* enable system recovery MCH */
 473        ctl_set_bit(14, 24);    /* enable warning MCH */
 474        return 0;
 475}
 476early_initcall(machine_check_init);
 477