linux/arch/x86/kernel/cpu/mce/intel.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Intel specific MCE features.
   4 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
   5 * Copyright (C) 2008, 2009 Intel Corporation
   6 * Author: Andi Kleen
   7 */
   8
   9#include <linux/gfp.h>
  10#include <linux/interrupt.h>
  11#include <linux/percpu.h>
  12#include <linux/sched.h>
  13#include <linux/cpumask.h>
  14#include <asm/apic.h>
  15#include <asm/cpufeature.h>
  16#include <asm/intel-family.h>
  17#include <asm/processor.h>
  18#include <asm/msr.h>
  19#include <asm/mce.h>
  20
  21#include "internal.h"
  22
  23/*
  24 * Support for Intel Correct Machine Check Interrupts. This allows
  25 * the CPU to raise an interrupt when a corrected machine check happened.
  26 * Normally we pick those up using a regular polling timer.
  27 * Also supports reliable discovery of shared banks.
  28 */
  29
  30/*
  31 * CMCI can be delivered to multiple cpus that share a machine check bank
  32 * so we need to designate a single cpu to process errors logged in each bank
  33 * in the interrupt handler (otherwise we would have many races and potential
  34 * double reporting of the same error).
  35 * Note that this can change when a cpu is offlined or brought online since
  36 * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()
  37 * disables CMCI on all banks owned by the cpu and clears this bitfield. At
  38 * this point, cmci_rediscover() kicks in and a different cpu may end up
  39 * taking ownership of some of the shared MCA banks that were previously
  40 * owned by the offlined cpu.
  41 */
  42static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
  43
  44/*
  45 * CMCI storm detection backoff counter
  46 *
  47 * During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've
  48 * encountered an error. If not, we decrement it by one. We signal the end of
  49 * the CMCI storm when it reaches 0.
  50 */
  51static DEFINE_PER_CPU(int, cmci_backoff_cnt);
  52
  53/*
  54 * cmci_discover_lock protects against parallel discovery attempts
  55 * which could race against each other.
  56 */
  57static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
  58
  59#define CMCI_THRESHOLD          1
  60#define CMCI_POLL_INTERVAL      (30 * HZ)
  61#define CMCI_STORM_INTERVAL     (HZ)
  62#define CMCI_STORM_THRESHOLD    15
  63
  64static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
  65static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
  66static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
  67
  68enum {
  69        CMCI_STORM_NONE,
  70        CMCI_STORM_ACTIVE,
  71        CMCI_STORM_SUBSIDED,
  72};
  73
  74static atomic_t cmci_storm_on_cpus;
  75
  76static int cmci_supported(int *banks)
  77{
  78        u64 cap;
  79
  80        if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
  81                return 0;
  82
  83        /*
  84         * Vendor check is not strictly needed, but the initial
  85         * initialization is vendor keyed and this
  86         * makes sure none of the backdoors are entered otherwise.
  87         */
  88        if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
  89                return 0;
  90        if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
  91                return 0;
  92        rdmsrl(MSR_IA32_MCG_CAP, cap);
  93        *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
  94        return !!(cap & MCG_CMCI_P);
  95}
  96
  97static bool lmce_supported(void)
  98{
  99        u64 tmp;
 100
 101        if (mca_cfg.lmce_disabled)
 102                return false;
 103
 104        rdmsrl(MSR_IA32_MCG_CAP, tmp);
 105
 106        /*
 107         * LMCE depends on recovery support in the processor. Hence both
 108         * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP.
 109         */
 110        if ((tmp & (MCG_SER_P | MCG_LMCE_P)) !=
 111                   (MCG_SER_P | MCG_LMCE_P))
 112                return false;
 113
 114        /*
 115         * BIOS should indicate support for LMCE by setting bit 20 in
 116         * IA32_FEATURE_CONTROL without which touching MCG_EXT_CTL will
 117         * generate a #GP fault.
 118         */
 119        rdmsrl(MSR_IA32_FEATURE_CONTROL, tmp);
 120        if ((tmp & (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) ==
 121                   (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE))
 122                return true;
 123
 124        return false;
 125}
 126
 127bool mce_intel_cmci_poll(void)
 128{
 129        if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
 130                return false;
 131
 132        /*
 133         * Reset the counter if we've logged an error in the last poll
 134         * during the storm.
 135         */
 136        if (machine_check_poll(0, this_cpu_ptr(&mce_banks_owned)))
 137                this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
 138        else
 139                this_cpu_dec(cmci_backoff_cnt);
 140
 141        return true;
 142}
 143
 144void mce_intel_hcpu_update(unsigned long cpu)
 145{
 146        if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
 147                atomic_dec(&cmci_storm_on_cpus);
 148
 149        per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
 150}
 151
 152static void cmci_toggle_interrupt_mode(bool on)
 153{
 154        unsigned long flags, *owned;
 155        int bank;
 156        u64 val;
 157
 158        raw_spin_lock_irqsave(&cmci_discover_lock, flags);
 159        owned = this_cpu_ptr(mce_banks_owned);
 160        for_each_set_bit(bank, owned, MAX_NR_BANKS) {
 161                rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
 162
 163                if (on)
 164                        val |= MCI_CTL2_CMCI_EN;
 165                else
 166                        val &= ~MCI_CTL2_CMCI_EN;
 167
 168                wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
 169        }
 170        raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
 171}
 172
 173unsigned long cmci_intel_adjust_timer(unsigned long interval)
 174{
 175        if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
 176            (__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) {
 177                mce_notify_irq();
 178                return CMCI_STORM_INTERVAL;
 179        }
 180
 181        switch (__this_cpu_read(cmci_storm_state)) {
 182        case CMCI_STORM_ACTIVE:
 183
 184                /*
 185                 * We switch back to interrupt mode once the poll timer has
 186                 * silenced itself. That means no events recorded and the timer
 187                 * interval is back to our poll interval.
 188                 */
 189                __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
 190                if (!atomic_sub_return(1, &cmci_storm_on_cpus))
 191                        pr_notice("CMCI storm subsided: switching to interrupt mode\n");
 192
 193                /* FALLTHROUGH */
 194
 195        case CMCI_STORM_SUBSIDED:
 196                /*
 197                 * We wait for all CPUs to go back to SUBSIDED state. When that
 198                 * happens we switch back to interrupt mode.
 199                 */
 200                if (!atomic_read(&cmci_storm_on_cpus)) {
 201                        __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
 202                        cmci_toggle_interrupt_mode(true);
 203                        cmci_recheck();
 204                }
 205                return CMCI_POLL_INTERVAL;
 206        default:
 207
 208                /* We have shiny weather. Let the poll do whatever it thinks. */
 209                return interval;
 210        }
 211}
 212
 213static bool cmci_storm_detect(void)
 214{
 215        unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
 216        unsigned long ts = __this_cpu_read(cmci_time_stamp);
 217        unsigned long now = jiffies;
 218        int r;
 219
 220        if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
 221                return true;
 222
 223        if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
 224                cnt++;
 225        } else {
 226                cnt = 1;
 227                __this_cpu_write(cmci_time_stamp, now);
 228        }
 229        __this_cpu_write(cmci_storm_cnt, cnt);
 230
 231        if (cnt <= CMCI_STORM_THRESHOLD)
 232                return false;
 233
 234        cmci_toggle_interrupt_mode(false);
 235        __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
 236        r = atomic_add_return(1, &cmci_storm_on_cpus);
 237        mce_timer_kick(CMCI_STORM_INTERVAL);
 238        this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
 239
 240        if (r == 1)
 241                pr_notice("CMCI storm detected: switching to poll mode\n");
 242        return true;
 243}
 244
 245/*
 246 * The interrupt handler. This is called on every event.
 247 * Just call the poller directly to log any events.
 248 * This could in theory increase the threshold under high load,
 249 * but doesn't for now.
 250 */
 251static void intel_threshold_interrupt(void)
 252{
 253        if (cmci_storm_detect())
 254                return;
 255
 256        machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
 257}
 258
 259/*
 260 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
 261 * on this CPU. Use the algorithm recommended in the SDM to discover shared
 262 * banks.
 263 */
 264static void cmci_discover(int banks)
 265{
 266        unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned);
 267        unsigned long flags;
 268        int i;
 269        int bios_wrong_thresh = 0;
 270
 271        raw_spin_lock_irqsave(&cmci_discover_lock, flags);
 272        for (i = 0; i < banks; i++) {
 273                u64 val;
 274                int bios_zero_thresh = 0;
 275
 276                if (test_bit(i, owned))
 277                        continue;
 278
 279                /* Skip banks in firmware first mode */
 280                if (test_bit(i, mce_banks_ce_disabled))
 281                        continue;
 282
 283                rdmsrl(MSR_IA32_MCx_CTL2(i), val);
 284
 285                /* Already owned by someone else? */
 286                if (val & MCI_CTL2_CMCI_EN) {
 287                        clear_bit(i, owned);
 288                        __clear_bit(i, this_cpu_ptr(mce_poll_banks));
 289                        continue;
 290                }
 291
 292                if (!mca_cfg.bios_cmci_threshold) {
 293                        val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
 294                        val |= CMCI_THRESHOLD;
 295                } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
 296                        /*
 297                         * If bios_cmci_threshold boot option was specified
 298                         * but the threshold is zero, we'll try to initialize
 299                         * it to 1.
 300                         */
 301                        bios_zero_thresh = 1;
 302                        val |= CMCI_THRESHOLD;
 303                }
 304
 305                val |= MCI_CTL2_CMCI_EN;
 306                wrmsrl(MSR_IA32_MCx_CTL2(i), val);
 307                rdmsrl(MSR_IA32_MCx_CTL2(i), val);
 308
 309                /* Did the enable bit stick? -- the bank supports CMCI */
 310                if (val & MCI_CTL2_CMCI_EN) {
 311                        set_bit(i, owned);
 312                        __clear_bit(i, this_cpu_ptr(mce_poll_banks));
 313                        /*
 314                         * We are able to set thresholds for some banks that
 315                         * had a threshold of 0. This means the BIOS has not
 316                         * set the thresholds properly or does not work with
 317                         * this boot option. Note down now and report later.
 318                         */
 319                        if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
 320                                        (val & MCI_CTL2_CMCI_THRESHOLD_MASK))
 321                                bios_wrong_thresh = 1;
 322                } else {
 323                        WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks)));
 324                }
 325        }
 326        raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
 327        if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
 328                pr_info_once(
 329                        "bios_cmci_threshold: Some banks do not have valid thresholds set\n");
 330                pr_info_once(
 331                        "bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
 332        }
 333}
 334
 335/*
 336 * Just in case we missed an event during initialization check
 337 * all the CMCI owned banks.
 338 */
 339void cmci_recheck(void)
 340{
 341        unsigned long flags;
 342        int banks;
 343
 344        if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
 345                return;
 346
 347        local_irq_save(flags);
 348        machine_check_poll(0, this_cpu_ptr(&mce_banks_owned));
 349        local_irq_restore(flags);
 350}
 351
 352/* Caller must hold the lock on cmci_discover_lock */
 353static void __cmci_disable_bank(int bank)
 354{
 355        u64 val;
 356
 357        if (!test_bit(bank, this_cpu_ptr(mce_banks_owned)))
 358                return;
 359        rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
 360        val &= ~MCI_CTL2_CMCI_EN;
 361        wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
 362        __clear_bit(bank, this_cpu_ptr(mce_banks_owned));
 363}
 364
 365/*
 366 * Disable CMCI on this CPU for all banks it owns when it goes down.
 367 * This allows other CPUs to claim the banks on rediscovery.
 368 */
 369void cmci_clear(void)
 370{
 371        unsigned long flags;
 372        int i;
 373        int banks;
 374
 375        if (!cmci_supported(&banks))
 376                return;
 377        raw_spin_lock_irqsave(&cmci_discover_lock, flags);
 378        for (i = 0; i < banks; i++)
 379                __cmci_disable_bank(i);
 380        raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
 381}
 382
 383static void cmci_rediscover_work_func(void *arg)
 384{
 385        int banks;
 386
 387        /* Recheck banks in case CPUs don't all have the same */
 388        if (cmci_supported(&banks))
 389                cmci_discover(banks);
 390}
 391
 392/* After a CPU went down cycle through all the others and rediscover */
 393void cmci_rediscover(void)
 394{
 395        int banks;
 396
 397        if (!cmci_supported(&banks))
 398                return;
 399
 400        on_each_cpu(cmci_rediscover_work_func, NULL, 1);
 401}
 402
 403/*
 404 * Reenable CMCI on this CPU in case a CPU down failed.
 405 */
 406void cmci_reenable(void)
 407{
 408        int banks;
 409        if (cmci_supported(&banks))
 410                cmci_discover(banks);
 411}
 412
 413void cmci_disable_bank(int bank)
 414{
 415        int banks;
 416        unsigned long flags;
 417
 418        if (!cmci_supported(&banks))
 419                return;
 420
 421        raw_spin_lock_irqsave(&cmci_discover_lock, flags);
 422        __cmci_disable_bank(bank);
 423        raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
 424}
 425
 426static void intel_init_cmci(void)
 427{
 428        int banks;
 429
 430        if (!cmci_supported(&banks))
 431                return;
 432
 433        mce_threshold_vector = intel_threshold_interrupt;
 434        cmci_discover(banks);
 435        /*
 436         * For CPU #0 this runs with still disabled APIC, but that's
 437         * ok because only the vector is set up. We still do another
 438         * check for the banks later for CPU #0 just to make sure
 439         * to not miss any events.
 440         */
 441        apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
 442        cmci_recheck();
 443}
 444
 445static void intel_init_lmce(void)
 446{
 447        u64 val;
 448
 449        if (!lmce_supported())
 450                return;
 451
 452        rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
 453
 454        if (!(val & MCG_EXT_CTL_LMCE_EN))
 455                wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
 456}
 457
 458static void intel_clear_lmce(void)
 459{
 460        u64 val;
 461
 462        if (!lmce_supported())
 463                return;
 464
 465        rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
 466        val &= ~MCG_EXT_CTL_LMCE_EN;
 467        wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
 468}
 469
 470static void intel_ppin_init(struct cpuinfo_x86 *c)
 471{
 472        unsigned long long val;
 473
 474        /*
 475         * Even if testing the presence of the MSR would be enough, we don't
 476         * want to risk the situation where other models reuse this MSR for
 477         * other purposes.
 478         */
 479        switch (c->x86_model) {
 480        case INTEL_FAM6_IVYBRIDGE_X:
 481        case INTEL_FAM6_HASWELL_X:
 482        case INTEL_FAM6_BROADWELL_XEON_D:
 483        case INTEL_FAM6_BROADWELL_X:
 484        case INTEL_FAM6_SKYLAKE_X:
 485        case INTEL_FAM6_XEON_PHI_KNL:
 486        case INTEL_FAM6_XEON_PHI_KNM:
 487
 488                if (rdmsrl_safe(MSR_PPIN_CTL, &val))
 489                        return;
 490
 491                if ((val & 3UL) == 1UL) {
 492                        /* PPIN available but disabled: */
 493                        return;
 494                }
 495
 496                /* If PPIN is disabled, but not locked, try to enable: */
 497                if (!(val & 3UL)) {
 498                        wrmsrl_safe(MSR_PPIN_CTL,  val | 2UL);
 499                        rdmsrl_safe(MSR_PPIN_CTL, &val);
 500                }
 501
 502                if ((val & 3UL) == 2UL)
 503                        set_cpu_cap(c, X86_FEATURE_INTEL_PPIN);
 504        }
 505}
 506
 507void mce_intel_feature_init(struct cpuinfo_x86 *c)
 508{
 509        intel_init_thermal(c);
 510        intel_init_cmci();
 511        intel_init_lmce();
 512        intel_ppin_init(c);
 513}
 514
 515void mce_intel_feature_clear(struct cpuinfo_x86 *c)
 516{
 517        intel_clear_lmce();
 518}
 519