linux/arch/x86/kernel/cpu/mcheck/mce_intel.c
<<
>>
Prefs
   1/*
   2 * Intel specific MCE features.
   3 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
   4 * Copyright (C) 2008, 2009 Intel Corporation
   5 * Author: Andi Kleen
   6 */
   7
   8#include <linux/gfp.h>
   9#include <linux/interrupt.h>
  10#include <linux/percpu.h>
  11#include <linux/sched.h>
  12#include <linux/cpumask.h>
  13#include <asm/apic.h>
  14#include <asm/processor.h>
  15#include <asm/msr.h>
  16#include <asm/mce.h>
  17
  18#include "mce-internal.h"
  19
  20/*
  21 * Support for Intel Correct Machine Check Interrupts. This allows
  22 * the CPU to raise an interrupt when a corrected machine check happened.
  23 * Normally we pick those up using a regular polling timer.
  24 * Also supports reliable discovery of shared banks.
  25 */
  26
  27/*
  28 * CMCI can be delivered to multiple cpus that share a machine check bank
  29 * so we need to designate a single cpu to process errors logged in each bank
  30 * in the interrupt handler (otherwise we would have many races and potential
  31 * double reporting of the same error).
  32 * Note that this can change when a cpu is offlined or brought online since
  33 * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()
  34 * disables CMCI on all banks owned by the cpu and clears this bitfield. At
  35 * this point, cmci_rediscover() kicks in and a different cpu may end up
  36 * taking ownership of some of the shared MCA banks that were previously
  37 * owned by the offlined cpu.
  38 */
  39static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
  40
  41/*
  42 * CMCI storm detection backoff counter
  43 *
  44 * During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've
  45 * encountered an error. If not, we decrement it by one. We signal the end of
  46 * the CMCI storm when it reaches 0.
  47 */
  48static DEFINE_PER_CPU(int, cmci_backoff_cnt);
  49
  50/*
  51 * cmci_discover_lock protects against parallel discovery attempts
  52 * which could race against each other.
  53 */
  54static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
  55
  56#define CMCI_THRESHOLD          1
  57#define CMCI_POLL_INTERVAL      (30 * HZ)
  58#define CMCI_STORM_INTERVAL     (HZ)
  59#define CMCI_STORM_THRESHOLD    15
  60
  61static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
  62static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
  63static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
  64
  65enum {
  66        CMCI_STORM_NONE,
  67        CMCI_STORM_ACTIVE,
  68        CMCI_STORM_SUBSIDED,
  69};
  70
  71static atomic_t cmci_storm_on_cpus;
  72
  73static int cmci_supported(int *banks)
  74{
  75        u64 cap;
  76
  77        if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
  78                return 0;
  79
  80        /*
  81         * Vendor check is not strictly needed, but the initial
  82         * initialization is vendor keyed and this
  83         * makes sure none of the backdoors are entered otherwise.
  84         */
  85        if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
  86                return 0;
  87        if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
  88                return 0;
  89        rdmsrl(MSR_IA32_MCG_CAP, cap);
  90        *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
  91        return !!(cap & MCG_CMCI_P);
  92}
  93
  94static bool lmce_supported(void)
  95{
  96        u64 tmp;
  97
  98        if (mca_cfg.lmce_disabled)
  99                return false;
 100
 101        rdmsrl(MSR_IA32_MCG_CAP, tmp);
 102
 103        /*
 104         * LMCE depends on recovery support in the processor. Hence both
 105         * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP.
 106         */
 107        if ((tmp & (MCG_SER_P | MCG_LMCE_P)) !=
 108                   (MCG_SER_P | MCG_LMCE_P))
 109                return false;
 110
 111        /*
 112         * BIOS should indicate support for LMCE by setting bit 20 in
 113         * IA32_FEATURE_CONTROL without which touching MCG_EXT_CTL will
 114         * generate a #GP fault.
 115         */
 116        rdmsrl(MSR_IA32_FEATURE_CONTROL, tmp);
 117        if ((tmp & (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) ==
 118                   (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE))
 119                return true;
 120
 121        return false;
 122}
 123
 124bool mce_intel_cmci_poll(void)
 125{
 126        if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
 127                return false;
 128
 129        /*
 130         * Reset the counter if we've logged an error in the last poll
 131         * during the storm.
 132         */
 133        if (machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)))
 134                this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
 135        else
 136                this_cpu_dec(cmci_backoff_cnt);
 137
 138        return true;
 139}
 140
 141void mce_intel_hcpu_update(unsigned long cpu)
 142{
 143        if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
 144                atomic_dec(&cmci_storm_on_cpus);
 145
 146        per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
 147}
 148
 149static void cmci_toggle_interrupt_mode(bool on)
 150{
 151        unsigned long flags, *owned;
 152        int bank;
 153        u64 val;
 154
 155        raw_spin_lock_irqsave(&cmci_discover_lock, flags);
 156        owned = this_cpu_ptr(mce_banks_owned);
 157        for_each_set_bit(bank, owned, MAX_NR_BANKS) {
 158                rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
 159
 160                if (on)
 161                        val |= MCI_CTL2_CMCI_EN;
 162                else
 163                        val &= ~MCI_CTL2_CMCI_EN;
 164
 165                wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
 166        }
 167        raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
 168}
 169
 170unsigned long cmci_intel_adjust_timer(unsigned long interval)
 171{
 172        if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
 173            (__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) {
 174                mce_notify_irq();
 175                return CMCI_STORM_INTERVAL;
 176        }
 177
 178        switch (__this_cpu_read(cmci_storm_state)) {
 179        case CMCI_STORM_ACTIVE:
 180
 181                /*
 182                 * We switch back to interrupt mode once the poll timer has
 183                 * silenced itself. That means no events recorded and the timer
 184                 * interval is back to our poll interval.
 185                 */
 186                __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
 187                if (!atomic_sub_return(1, &cmci_storm_on_cpus))
 188                        pr_notice("CMCI storm subsided: switching to interrupt mode\n");
 189
 190                /* FALLTHROUGH */
 191
 192        case CMCI_STORM_SUBSIDED:
 193                /*
 194                 * We wait for all CPUs to go back to SUBSIDED state. When that
 195                 * happens we switch back to interrupt mode.
 196                 */
 197                if (!atomic_read(&cmci_storm_on_cpus)) {
 198                        __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
 199                        cmci_toggle_interrupt_mode(true);
 200                        cmci_recheck();
 201                }
 202                return CMCI_POLL_INTERVAL;
 203        default:
 204
 205                /* We have shiny weather. Let the poll do whatever it thinks. */
 206                return interval;
 207        }
 208}
 209
 210static bool cmci_storm_detect(void)
 211{
 212        unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
 213        unsigned long ts = __this_cpu_read(cmci_time_stamp);
 214        unsigned long now = jiffies;
 215        int r;
 216
 217        if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
 218                return true;
 219
 220        if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
 221                cnt++;
 222        } else {
 223                cnt = 1;
 224                __this_cpu_write(cmci_time_stamp, now);
 225        }
 226        __this_cpu_write(cmci_storm_cnt, cnt);
 227
 228        if (cnt <= CMCI_STORM_THRESHOLD)
 229                return false;
 230
 231        cmci_toggle_interrupt_mode(false);
 232        __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
 233        r = atomic_add_return(1, &cmci_storm_on_cpus);
 234        mce_timer_kick(CMCI_STORM_INTERVAL);
 235        this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
 236
 237        if (r == 1)
 238                pr_notice("CMCI storm detected: switching to poll mode\n");
 239        return true;
 240}
 241
 242/*
 243 * The interrupt handler. This is called on every event.
 244 * Just call the poller directly to log any events.
 245 * This could in theory increase the threshold under high load,
 246 * but doesn't for now.
 247 */
 248static void intel_threshold_interrupt(void)
 249{
 250        if (cmci_storm_detect())
 251                return;
 252
 253        machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
 254}
 255
 256/*
 257 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
 258 * on this CPU. Use the algorithm recommended in the SDM to discover shared
 259 * banks.
 260 */
 261static void cmci_discover(int banks)
 262{
 263        unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned);
 264        unsigned long flags;
 265        int i;
 266        int bios_wrong_thresh = 0;
 267
 268        raw_spin_lock_irqsave(&cmci_discover_lock, flags);
 269        for (i = 0; i < banks; i++) {
 270                u64 val;
 271                int bios_zero_thresh = 0;
 272
 273                if (test_bit(i, owned))
 274                        continue;
 275
 276                /* Skip banks in firmware first mode */
 277                if (test_bit(i, mce_banks_ce_disabled))
 278                        continue;
 279
 280                rdmsrl(MSR_IA32_MCx_CTL2(i), val);
 281
 282                /* Already owned by someone else? */
 283                if (val & MCI_CTL2_CMCI_EN) {
 284                        clear_bit(i, owned);
 285                        __clear_bit(i, this_cpu_ptr(mce_poll_banks));
 286                        continue;
 287                }
 288
 289                if (!mca_cfg.bios_cmci_threshold) {
 290                        val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
 291                        val |= CMCI_THRESHOLD;
 292                } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
 293                        /*
 294                         * If bios_cmci_threshold boot option was specified
 295                         * but the threshold is zero, we'll try to initialize
 296                         * it to 1.
 297                         */
 298                        bios_zero_thresh = 1;
 299                        val |= CMCI_THRESHOLD;
 300                }
 301
 302                val |= MCI_CTL2_CMCI_EN;
 303                wrmsrl(MSR_IA32_MCx_CTL2(i), val);
 304                rdmsrl(MSR_IA32_MCx_CTL2(i), val);
 305
 306                /* Did the enable bit stick? -- the bank supports CMCI */
 307                if (val & MCI_CTL2_CMCI_EN) {
 308                        set_bit(i, owned);
 309                        __clear_bit(i, this_cpu_ptr(mce_poll_banks));
 310                        /*
 311                         * We are able to set thresholds for some banks that
 312                         * had a threshold of 0. This means the BIOS has not
 313                         * set the thresholds properly or does not work with
 314                         * this boot option. Note down now and report later.
 315                         */
 316                        if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
 317                                        (val & MCI_CTL2_CMCI_THRESHOLD_MASK))
 318                                bios_wrong_thresh = 1;
 319                } else {
 320                        WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks)));
 321                }
 322        }
 323        raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
 324        if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
 325                pr_info_once(
 326                        "bios_cmci_threshold: Some banks do not have valid thresholds set\n");
 327                pr_info_once(
 328                        "bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
 329        }
 330}
 331
 332/*
 333 * Just in case we missed an event during initialization check
 334 * all the CMCI owned banks.
 335 */
 336void cmci_recheck(void)
 337{
 338        unsigned long flags;
 339        int banks;
 340
 341        if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
 342                return;
 343
 344        local_irq_save(flags);
 345        machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
 346        local_irq_restore(flags);
 347}
 348
 349/* Caller must hold the lock on cmci_discover_lock */
 350static void __cmci_disable_bank(int bank)
 351{
 352        u64 val;
 353
 354        if (!test_bit(bank, this_cpu_ptr(mce_banks_owned)))
 355                return;
 356        rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
 357        val &= ~MCI_CTL2_CMCI_EN;
 358        wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
 359        __clear_bit(bank, this_cpu_ptr(mce_banks_owned));
 360}
 361
 362/*
 363 * Disable CMCI on this CPU for all banks it owns when it goes down.
 364 * This allows other CPUs to claim the banks on rediscovery.
 365 */
 366void cmci_clear(void)
 367{
 368        unsigned long flags;
 369        int i;
 370        int banks;
 371
 372        if (!cmci_supported(&banks))
 373                return;
 374        raw_spin_lock_irqsave(&cmci_discover_lock, flags);
 375        for (i = 0; i < banks; i++)
 376                __cmci_disable_bank(i);
 377        raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
 378}
 379
 380static void cmci_rediscover_work_func(void *arg)
 381{
 382        int banks;
 383
 384        /* Recheck banks in case CPUs don't all have the same */
 385        if (cmci_supported(&banks))
 386                cmci_discover(banks);
 387}
 388
 389/* After a CPU went down cycle through all the others and rediscover */
 390void cmci_rediscover(void)
 391{
 392        int banks;
 393
 394        if (!cmci_supported(&banks))
 395                return;
 396
 397        on_each_cpu(cmci_rediscover_work_func, NULL, 1);
 398}
 399
 400/*
 401 * Reenable CMCI on this CPU in case a CPU down failed.
 402 */
 403void cmci_reenable(void)
 404{
 405        int banks;
 406        if (cmci_supported(&banks))
 407                cmci_discover(banks);
 408}
 409
 410void cmci_disable_bank(int bank)
 411{
 412        int banks;
 413        unsigned long flags;
 414
 415        if (!cmci_supported(&banks))
 416                return;
 417
 418        raw_spin_lock_irqsave(&cmci_discover_lock, flags);
 419        __cmci_disable_bank(bank);
 420        raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
 421}
 422
 423static void intel_init_cmci(void)
 424{
 425        int banks;
 426
 427        if (!cmci_supported(&banks))
 428                return;
 429
 430        mce_threshold_vector = intel_threshold_interrupt;
 431        cmci_discover(banks);
 432        /*
 433         * For CPU #0 this runs with still disabled APIC, but that's
 434         * ok because only the vector is set up. We still do another
 435         * check for the banks later for CPU #0 just to make sure
 436         * to not miss any events.
 437         */
 438        apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
 439        cmci_recheck();
 440}
 441
 442static void intel_init_lmce(void)
 443{
 444        u64 val;
 445
 446        if (!lmce_supported())
 447                return;
 448
 449        rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
 450
 451        if (!(val & MCG_EXT_CTL_LMCE_EN))
 452                wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
 453}
 454
 455static void intel_clear_lmce(void)
 456{
 457        u64 val;
 458
 459        if (!lmce_supported())
 460                return;
 461
 462        rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
 463        val &= ~MCG_EXT_CTL_LMCE_EN;
 464        wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
 465}
 466
 467void mce_intel_feature_init(struct cpuinfo_x86 *c)
 468{
 469        intel_init_thermal(c);
 470        intel_init_cmci();
 471        intel_init_lmce();
 472}
 473
 474void mce_intel_feature_clear(struct cpuinfo_x86 *c)
 475{
 476        intel_clear_lmce();
 477}
 478