linux/arch/x86/kernel/cpu/mcheck/mce_intel.c
<<
>>
Prefs
   1/*
   2 * Intel specific MCE features.
   3 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
   4 * Copyright (C) 2008, 2009 Intel Corporation
   5 * Author: Andi Kleen
   6 */
   7
   8#include <linux/gfp.h>
   9#include <linux/init.h>
  10#include <linux/interrupt.h>
  11#include <linux/percpu.h>
  12#include <linux/sched.h>
  13#include <asm/apic.h>
  14#include <asm/processor.h>
  15#include <asm/msr.h>
  16#include <asm/mce.h>
  17
  18#include "mce-internal.h"
  19
  20/*
  21 * Support for Intel Correct Machine Check Interrupts. This allows
  22 * the CPU to raise an interrupt when a corrected machine check happened.
  23 * Normally we pick those up using a regular polling timer.
  24 * Also supports reliable discovery of shared banks.
  25 */
  26
  27static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
  28
  29/*
  30 * cmci_discover_lock protects against parallel discovery attempts
  31 * which could race against each other.
  32 */
  33static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
  34
  35#define CMCI_THRESHOLD          1
  36#define CMCI_POLL_INTERVAL      (30 * HZ)
  37#define CMCI_STORM_INTERVAL     (1 * HZ)
  38#define CMCI_STORM_THRESHOLD    15
  39
  40static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
  41static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
  42static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
  43
  44enum {
  45        CMCI_STORM_NONE,
  46        CMCI_STORM_ACTIVE,
  47        CMCI_STORM_SUBSIDED,
  48};
  49
  50static atomic_t cmci_storm_on_cpus;
  51
  52static int cmci_supported(int *banks)
  53{
  54        u64 cap;
  55
  56        if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
  57                return 0;
  58
  59        /*
  60         * Vendor check is not strictly needed, but the initial
  61         * initialization is vendor keyed and this
  62         * makes sure none of the backdoors are entered otherwise.
  63         */
  64        if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
  65                return 0;
  66        if (!cpu_has_apic || lapic_get_maxlvt() < 6)
  67                return 0;
  68        rdmsrl(MSR_IA32_MCG_CAP, cap);
  69        *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
  70        return !!(cap & MCG_CMCI_P);
  71}
  72
  73void mce_intel_cmci_poll(void)
  74{
  75        if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
  76                return;
  77        machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
  78}
  79
  80void mce_intel_hcpu_update(unsigned long cpu)
  81{
  82        if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
  83                atomic_dec(&cmci_storm_on_cpus);
  84
  85        per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
  86}
  87
  88unsigned long mce_intel_adjust_timer(unsigned long interval)
  89{
  90        int r;
  91
  92        if (interval < CMCI_POLL_INTERVAL)
  93                return interval;
  94
  95        switch (__this_cpu_read(cmci_storm_state)) {
  96        case CMCI_STORM_ACTIVE:
  97                /*
  98                 * We switch back to interrupt mode once the poll timer has
  99                 * silenced itself. That means no events recorded and the
 100                 * timer interval is back to our poll interval.
 101                 */
 102                __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
 103                r = atomic_sub_return(1, &cmci_storm_on_cpus);
 104                if (r == 0)
 105                        pr_notice("CMCI storm subsided: switching to interrupt mode\n");
 106                /* FALLTHROUGH */
 107
 108        case CMCI_STORM_SUBSIDED:
 109                /*
 110                 * We wait for all cpus to go back to SUBSIDED
 111                 * state. When that happens we switch back to
 112                 * interrupt mode.
 113                 */
 114                if (!atomic_read(&cmci_storm_on_cpus)) {
 115                        __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
 116                        cmci_reenable();
 117                        cmci_recheck();
 118                }
 119                return CMCI_POLL_INTERVAL;
 120        default:
 121                /*
 122                 * We have shiny weather. Let the poll do whatever it
 123                 * thinks.
 124                 */
 125                return interval;
 126        }
 127}
 128
 129static bool cmci_storm_detect(void)
 130{
 131        unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
 132        unsigned long ts = __this_cpu_read(cmci_time_stamp);
 133        unsigned long now = jiffies;
 134        int r;
 135
 136        if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
 137                return true;
 138
 139        if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
 140                cnt++;
 141        } else {
 142                cnt = 1;
 143                __this_cpu_write(cmci_time_stamp, now);
 144        }
 145        __this_cpu_write(cmci_storm_cnt, cnt);
 146
 147        if (cnt <= CMCI_STORM_THRESHOLD)
 148                return false;
 149
 150        cmci_clear();
 151        __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
 152        r = atomic_add_return(1, &cmci_storm_on_cpus);
 153        mce_timer_kick(CMCI_POLL_INTERVAL);
 154
 155        if (r == 1)
 156                pr_notice("CMCI storm detected: switching to poll mode\n");
 157        return true;
 158}
 159
 160/*
 161 * The interrupt handler. This is called on every event.
 162 * Just call the poller directly to log any events.
 163 * This could in theory increase the threshold under high load,
 164 * but doesn't for now.
 165 */
 166static void intel_threshold_interrupt(void)
 167{
 168        if (cmci_storm_detect())
 169                return;
 170        machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
 171        mce_notify_irq();
 172}
 173
 174/*
 175 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
 176 * on this CPU. Use the algorithm recommended in the SDM to discover shared
 177 * banks.
 178 */
 179static void cmci_discover(int banks)
 180{
 181        unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
 182        unsigned long flags;
 183        int i;
 184        int bios_wrong_thresh = 0;
 185
 186        raw_spin_lock_irqsave(&cmci_discover_lock, flags);
 187        for (i = 0; i < banks; i++) {
 188                u64 val;
 189                int bios_zero_thresh = 0;
 190
 191                if (test_bit(i, owned))
 192                        continue;
 193
 194                rdmsrl(MSR_IA32_MCx_CTL2(i), val);
 195
 196                /* Already owned by someone else? */
 197                if (val & MCI_CTL2_CMCI_EN) {
 198                        clear_bit(i, owned);
 199                        __clear_bit(i, __get_cpu_var(mce_poll_banks));
 200                        continue;
 201                }
 202
 203                if (!mca_cfg.bios_cmci_threshold) {
 204                        val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
 205                        val |= CMCI_THRESHOLD;
 206                } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
 207                        /*
 208                         * If bios_cmci_threshold boot option was specified
 209                         * but the threshold is zero, we'll try to initialize
 210                         * it to 1.
 211                         */
 212                        bios_zero_thresh = 1;
 213                        val |= CMCI_THRESHOLD;
 214                }
 215
 216                val |= MCI_CTL2_CMCI_EN;
 217                wrmsrl(MSR_IA32_MCx_CTL2(i), val);
 218                rdmsrl(MSR_IA32_MCx_CTL2(i), val);
 219
 220                /* Did the enable bit stick? -- the bank supports CMCI */
 221                if (val & MCI_CTL2_CMCI_EN) {
 222                        set_bit(i, owned);
 223                        __clear_bit(i, __get_cpu_var(mce_poll_banks));
 224                        /*
 225                         * We are able to set thresholds for some banks that
 226                         * had a threshold of 0. This means the BIOS has not
 227                         * set the thresholds properly or does not work with
 228                         * this boot option. Note down now and report later.
 229                         */
 230                        if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
 231                                        (val & MCI_CTL2_CMCI_THRESHOLD_MASK))
 232                                bios_wrong_thresh = 1;
 233                } else {
 234                        WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
 235                }
 236        }
 237        raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
 238        if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
 239                pr_info_once(
 240                        "bios_cmci_threshold: Some banks do not have valid thresholds set\n");
 241                pr_info_once(
 242                        "bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
 243        }
 244}
 245
 246/*
 247 * Just in case we missed an event during initialization check
 248 * all the CMCI owned banks.
 249 */
 250void cmci_recheck(void)
 251{
 252        unsigned long flags;
 253        int banks;
 254
 255        if (!mce_available(__this_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
 256                return;
 257        local_irq_save(flags);
 258        machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
 259        local_irq_restore(flags);
 260}
 261
 262/*
 263 * Disable CMCI on this CPU for all banks it owns when it goes down.
 264 * This allows other CPUs to claim the banks on rediscovery.
 265 */
 266void cmci_clear(void)
 267{
 268        unsigned long flags;
 269        int i;
 270        int banks;
 271        u64 val;
 272
 273        if (!cmci_supported(&banks))
 274                return;
 275        raw_spin_lock_irqsave(&cmci_discover_lock, flags);
 276        for (i = 0; i < banks; i++) {
 277                if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
 278                        continue;
 279                /* Disable CMCI */
 280                rdmsrl(MSR_IA32_MCx_CTL2(i), val);
 281                val &= ~MCI_CTL2_CMCI_EN;
 282                wrmsrl(MSR_IA32_MCx_CTL2(i), val);
 283                __clear_bit(i, __get_cpu_var(mce_banks_owned));
 284        }
 285        raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
 286}
 287
 288static void cmci_rediscover_work_func(void *arg)
 289{
 290        int banks;
 291
 292        /* Recheck banks in case CPUs don't all have the same */
 293        if (cmci_supported(&banks))
 294                cmci_discover(banks);
 295}
 296
 297/* After a CPU went down cycle through all the others and rediscover */
 298void cmci_rediscover(void)
 299{
 300        int banks;
 301
 302        if (!cmci_supported(&banks))
 303                return;
 304
 305        on_each_cpu(cmci_rediscover_work_func, NULL, 1);
 306}
 307
 308/*
 309 * Reenable CMCI on this CPU in case a CPU down failed.
 310 */
 311void cmci_reenable(void)
 312{
 313        int banks;
 314        if (cmci_supported(&banks))
 315                cmci_discover(banks);
 316}
 317
 318static void intel_init_cmci(void)
 319{
 320        int banks;
 321
 322        if (!cmci_supported(&banks))
 323                return;
 324
 325        mce_threshold_vector = intel_threshold_interrupt;
 326        cmci_discover(banks);
 327        /*
 328         * For CPU #0 this runs with still disabled APIC, but that's
 329         * ok because only the vector is set up. We still do another
 330         * check for the banks later for CPU #0 just to make sure
 331         * to not miss any events.
 332         */
 333        apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
 334        cmci_recheck();
 335}
 336
 337void mce_intel_feature_init(struct cpuinfo_x86 *c)
 338{
 339        intel_init_thermal(c);
 340        intel_init_cmci();
 341}
 342