linux/arch/x86/kernel/cpu/mcheck/therm_throt.c
<<
>>
Prefs
   1/*
   2 * Thermal throttle event support code (such as syslog messaging and rate
   3 * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
   4 *
   5 * This allows consistent reporting of CPU thermal throttle events.
   6 *
   7 * Maintains a counter in /sys that keeps track of the number of thermal
   8 * events, such that the user knows how bad the thermal problem might be
   9 * (since the logging to syslog and mcelog is rate limited).
  10 *
  11 * Author: Dmitriy Zavin (dmitriyz@google.com)
  12 *
  13 * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
  14 *          Inspired by Ross Biro's and Al Borchers' counter code.
  15 */
  16#include <linux/interrupt.h>
  17#include <linux/notifier.h>
  18#include <linux/jiffies.h>
  19#include <linux/kernel.h>
  20#include <linux/percpu.h>
  21#include <linux/export.h>
  22#include <linux/types.h>
  23#include <linux/init.h>
  24#include <linux/smp.h>
  25#include <linux/cpu.h>
  26
  27#include <asm/processor.h>
  28#include <asm/apic.h>
  29#include <asm/idle.h>
  30#include <asm/mce.h>
  31#include <asm/msr.h>
  32#include <asm/trace/irq_vectors.h>
  33
  34/* How long to wait between reporting thermal events */
  35#define CHECK_INTERVAL          (300 * HZ)
  36
  37#define THERMAL_THROTTLING_EVENT        0
  38#define POWER_LIMIT_EVENT               1
  39
  40/*
  41 * Current thermal event state:
  42 */
  43struct _thermal_state {
  44        bool                    new_event;
  45        int                     event;
  46        u64                     next_check;
  47        unsigned long           count;
  48        unsigned long           last_count;
  49};
  50
  51struct thermal_state {
  52        struct _thermal_state core_throttle;
  53        struct _thermal_state core_power_limit;
  54        struct _thermal_state package_throttle;
  55        struct _thermal_state package_power_limit;
  56        struct _thermal_state core_thresh0;
  57        struct _thermal_state core_thresh1;
  58        struct _thermal_state pkg_thresh0;
  59        struct _thermal_state pkg_thresh1;
  60};
  61
  62/* Callback to handle core threshold interrupts */
  63int (*platform_thermal_notify)(__u64 msr_val);
  64EXPORT_SYMBOL(platform_thermal_notify);
  65
  66/* Callback to handle core package threshold_interrupts */
  67int (*platform_thermal_package_notify)(__u64 msr_val);
  68EXPORT_SYMBOL_GPL(platform_thermal_package_notify);
  69
  70/* Callback support of rate control, return true, if
  71 * callback has rate control */
  72bool (*platform_thermal_package_rate_control)(void);
  73EXPORT_SYMBOL_GPL(platform_thermal_package_rate_control);
  74
  75
  76static DEFINE_PER_CPU(struct thermal_state, thermal_state);
  77
  78static atomic_t therm_throt_en  = ATOMIC_INIT(0);
  79
  80static u32 lvtthmr_init __read_mostly;
  81
  82#ifdef CONFIG_SYSFS
  83#define define_therm_throt_device_one_ro(_name)                         \
  84        static DEVICE_ATTR(_name, 0444,                                 \
  85                           therm_throt_device_show_##_name,             \
  86                                   NULL)                                \
  87
  88#define define_therm_throt_device_show_func(event, name)                \
  89                                                                        \
  90static ssize_t therm_throt_device_show_##event##_##name(                \
  91                        struct device *dev,                             \
  92                        struct device_attribute *attr,                  \
  93                        char *buf)                                      \
  94{                                                                       \
  95        unsigned int cpu = dev->id;                                     \
  96        ssize_t ret;                                                    \
  97                                                                        \
  98        preempt_disable();      /* CPU hotplug */                       \
  99        if (cpu_online(cpu)) {                                          \
 100                ret = sprintf(buf, "%lu\n",                             \
 101                              per_cpu(thermal_state, cpu).event.name);  \
 102        } else                                                          \
 103                ret = 0;                                                \
 104        preempt_enable();                                               \
 105                                                                        \
 106        return ret;                                                     \
 107}
 108
 109define_therm_throt_device_show_func(core_throttle, count);
 110define_therm_throt_device_one_ro(core_throttle_count);
 111
 112define_therm_throt_device_show_func(core_power_limit, count);
 113define_therm_throt_device_one_ro(core_power_limit_count);
 114
 115define_therm_throt_device_show_func(package_throttle, count);
 116define_therm_throt_device_one_ro(package_throttle_count);
 117
 118define_therm_throt_device_show_func(package_power_limit, count);
 119define_therm_throt_device_one_ro(package_power_limit_count);
 120
 121static struct attribute *thermal_throttle_attrs[] = {
 122        &dev_attr_core_throttle_count.attr,
 123        NULL
 124};
 125
 126static struct attribute_group thermal_attr_group = {
 127        .attrs  = thermal_throttle_attrs,
 128        .name   = "thermal_throttle"
 129};
 130#endif /* CONFIG_SYSFS */
 131
 132#define CORE_LEVEL      0
 133#define PACKAGE_LEVEL   1
 134
 135/***
 136 * therm_throt_process - Process thermal throttling event from interrupt
 137 * @curr: Whether the condition is current or not (boolean), since the
 138 *        thermal interrupt normally gets called both when the thermal
 139 *        event begins and once the event has ended.
 140 *
 141 * This function is called by the thermal interrupt after the
 142 * IRQ has been acknowledged.
 143 *
 144 * It will take care of rate limiting and printing messages to the syslog.
 145 *
 146 * Returns: 0 : Event should NOT be further logged, i.e. still in
 147 *              "timeout" from previous log message.
 148 *          1 : Event should be logged further, and a message has been
 149 *              printed to the syslog.
 150 */
 151static int therm_throt_process(bool new_event, int event, int level)
 152{
 153        struct _thermal_state *state;
 154        unsigned int this_cpu = smp_processor_id();
 155        bool old_event;
 156        u64 now;
 157        struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
 158
 159        now = get_jiffies_64();
 160        if (level == CORE_LEVEL) {
 161                if (event == THERMAL_THROTTLING_EVENT)
 162                        state = &pstate->core_throttle;
 163                else if (event == POWER_LIMIT_EVENT)
 164                        state = &pstate->core_power_limit;
 165                else
 166                         return 0;
 167        } else if (level == PACKAGE_LEVEL) {
 168                if (event == THERMAL_THROTTLING_EVENT)
 169                        state = &pstate->package_throttle;
 170                else if (event == POWER_LIMIT_EVENT)
 171                        state = &pstate->package_power_limit;
 172                else
 173                        return 0;
 174        } else
 175                return 0;
 176
 177        old_event = state->new_event;
 178        state->new_event = new_event;
 179
 180        if (new_event)
 181                state->count++;
 182
 183        if (time_before64(now, state->next_check) &&
 184                        state->count != state->last_count)
 185                return 0;
 186
 187        state->next_check = now + CHECK_INTERVAL;
 188        state->last_count = state->count;
 189
 190        /* if we just entered the thermal event */
 191        if (new_event) {
 192                if (event == THERMAL_THROTTLING_EVENT)
 193                        printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
 194                                this_cpu,
 195                                level == CORE_LEVEL ? "Core" : "Package",
 196                                state->count);
 197                return 1;
 198        }
 199        if (old_event) {
 200                if (event == THERMAL_THROTTLING_EVENT)
 201                        printk(KERN_INFO "CPU%d: %s temperature/speed normal\n",
 202                                this_cpu,
 203                                level == CORE_LEVEL ? "Core" : "Package");
 204                return 1;
 205        }
 206
 207        return 0;
 208}
 209
 210static int thresh_event_valid(int level, int event)
 211{
 212        struct _thermal_state *state;
 213        unsigned int this_cpu = smp_processor_id();
 214        struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
 215        u64 now = get_jiffies_64();
 216
 217        if (level == PACKAGE_LEVEL)
 218                state = (event == 0) ? &pstate->pkg_thresh0 :
 219                                                &pstate->pkg_thresh1;
 220        else
 221                state = (event == 0) ? &pstate->core_thresh0 :
 222                                                &pstate->core_thresh1;
 223
 224        if (time_before64(now, state->next_check))
 225                return 0;
 226
 227        state->next_check = now + CHECK_INTERVAL;
 228
 229        return 1;
 230}
 231
 232static bool int_pln_enable;
 233static int __init int_pln_enable_setup(char *s)
 234{
 235        int_pln_enable = true;
 236
 237        return 1;
 238}
 239__setup("int_pln_enable", int_pln_enable_setup);
 240
 241#ifdef CONFIG_SYSFS
 242/* Add/Remove thermal_throttle interface for CPU device: */
 243static int thermal_throttle_add_dev(struct device *dev, unsigned int cpu)
 244{
 245        int err;
 246        struct cpuinfo_x86 *c = &cpu_data(cpu);
 247
 248        err = sysfs_create_group(&dev->kobj, &thermal_attr_group);
 249        if (err)
 250                return err;
 251
 252        if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
 253                err = sysfs_add_file_to_group(&dev->kobj,
 254                                              &dev_attr_core_power_limit_count.attr,
 255                                              thermal_attr_group.name);
 256        if (cpu_has(c, X86_FEATURE_PTS)) {
 257                err = sysfs_add_file_to_group(&dev->kobj,
 258                                              &dev_attr_package_throttle_count.attr,
 259                                              thermal_attr_group.name);
 260                if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
 261                        err = sysfs_add_file_to_group(&dev->kobj,
 262                                        &dev_attr_package_power_limit_count.attr,
 263                                        thermal_attr_group.name);
 264        }
 265
 266        return err;
 267}
 268
 269static void thermal_throttle_remove_dev(struct device *dev)
 270{
 271        sysfs_remove_group(&dev->kobj, &thermal_attr_group);
 272}
 273
 274/* Mutex protecting device creation against CPU hotplug: */
 275static DEFINE_MUTEX(therm_cpu_lock);
 276
 277/* Get notified when a cpu comes on/off. Be hotplug friendly. */
 278static int
 279thermal_throttle_cpu_callback(struct notifier_block *nfb,
 280                              unsigned long action,
 281                              void *hcpu)
 282{
 283        unsigned int cpu = (unsigned long)hcpu;
 284        struct device *dev;
 285        int err = 0;
 286
 287        dev = get_cpu_device(cpu);
 288
 289        switch (action) {
 290        case CPU_UP_PREPARE:
 291        case CPU_UP_PREPARE_FROZEN:
 292                mutex_lock(&therm_cpu_lock);
 293                err = thermal_throttle_add_dev(dev, cpu);
 294                mutex_unlock(&therm_cpu_lock);
 295                WARN_ON(err);
 296                break;
 297        case CPU_UP_CANCELED:
 298        case CPU_UP_CANCELED_FROZEN:
 299        case CPU_DEAD:
 300        case CPU_DEAD_FROZEN:
 301                mutex_lock(&therm_cpu_lock);
 302                thermal_throttle_remove_dev(dev);
 303                mutex_unlock(&therm_cpu_lock);
 304                break;
 305        }
 306        return notifier_from_errno(err);
 307}
 308
 309static struct notifier_block thermal_throttle_cpu_notifier =
 310{
 311        .notifier_call = thermal_throttle_cpu_callback,
 312};
 313
 314static __init int thermal_throttle_init_device(void)
 315{
 316        unsigned int cpu = 0;
 317        int err;
 318
 319        if (!atomic_read(&therm_throt_en))
 320                return 0;
 321
 322        register_hotcpu_notifier(&thermal_throttle_cpu_notifier);
 323
 324#ifdef CONFIG_HOTPLUG_CPU
 325        mutex_lock(&therm_cpu_lock);
 326#endif
 327        /* connect live CPUs to sysfs */
 328        for_each_online_cpu(cpu) {
 329                err = thermal_throttle_add_dev(get_cpu_device(cpu), cpu);
 330                WARN_ON(err);
 331        }
 332#ifdef CONFIG_HOTPLUG_CPU
 333        mutex_unlock(&therm_cpu_lock);
 334#endif
 335
 336        return 0;
 337}
 338device_initcall(thermal_throttle_init_device);
 339
 340#endif /* CONFIG_SYSFS */
 341
 342static void notify_package_thresholds(__u64 msr_val)
 343{
 344        bool notify_thres_0 = false;
 345        bool notify_thres_1 = false;
 346
 347        if (!platform_thermal_package_notify)
 348                return;
 349
 350        /* lower threshold check */
 351        if (msr_val & THERM_LOG_THRESHOLD0)
 352                notify_thres_0 = true;
 353        /* higher threshold check */
 354        if (msr_val & THERM_LOG_THRESHOLD1)
 355                notify_thres_1 = true;
 356
 357        if (!notify_thres_0 && !notify_thres_1)
 358                return;
 359
 360        if (platform_thermal_package_rate_control &&
 361                platform_thermal_package_rate_control()) {
 362                /* Rate control is implemented in callback */
 363                platform_thermal_package_notify(msr_val);
 364                return;
 365        }
 366
 367        /* lower threshold reached */
 368        if (notify_thres_0 && thresh_event_valid(PACKAGE_LEVEL, 0))
 369                platform_thermal_package_notify(msr_val);
 370        /* higher threshold reached */
 371        if (notify_thres_1 && thresh_event_valid(PACKAGE_LEVEL, 1))
 372                platform_thermal_package_notify(msr_val);
 373}
 374
 375static void notify_thresholds(__u64 msr_val)
 376{
 377        /* check whether the interrupt handler is defined;
 378         * otherwise simply return
 379         */
 380        if (!platform_thermal_notify)
 381                return;
 382
 383        /* lower threshold reached */
 384        if ((msr_val & THERM_LOG_THRESHOLD0) &&
 385                        thresh_event_valid(CORE_LEVEL, 0))
 386                platform_thermal_notify(msr_val);
 387        /* higher threshold reached */
 388        if ((msr_val & THERM_LOG_THRESHOLD1) &&
 389                        thresh_event_valid(CORE_LEVEL, 1))
 390                platform_thermal_notify(msr_val);
 391}
 392
 393/* Thermal transition interrupt handler */
 394static void intel_thermal_interrupt(void)
 395{
 396        __u64 msr_val;
 397
 398        rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
 399
 400        /* Check for violation of core thermal thresholds*/
 401        notify_thresholds(msr_val);
 402
 403        if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
 404                                THERMAL_THROTTLING_EVENT,
 405                                CORE_LEVEL) != 0)
 406                mce_log_therm_throt_event(msr_val);
 407
 408        if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
 409                therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
 410                                        POWER_LIMIT_EVENT,
 411                                        CORE_LEVEL);
 412
 413        if (this_cpu_has(X86_FEATURE_PTS)) {
 414                rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
 415                /* check violations of package thermal thresholds */
 416                notify_package_thresholds(msr_val);
 417                therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
 418                                        THERMAL_THROTTLING_EVENT,
 419                                        PACKAGE_LEVEL);
 420                if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
 421                        therm_throt_process(msr_val &
 422                                        PACKAGE_THERM_STATUS_POWER_LIMIT,
 423                                        POWER_LIMIT_EVENT,
 424                                        PACKAGE_LEVEL);
 425        }
 426}
 427
 428static void unexpected_thermal_interrupt(void)
 429{
 430        printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n",
 431                        smp_processor_id());
 432}
 433
 434static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
 435
 436static inline void __smp_thermal_interrupt(void)
 437{
 438        inc_irq_stat(irq_thermal_count);
 439        smp_thermal_vector();
 440}
 441
 442asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
 443{
 444        entering_irq();
 445        __smp_thermal_interrupt();
 446        exiting_ack_irq();
 447}
 448
 449asmlinkage void smp_trace_thermal_interrupt(struct pt_regs *regs)
 450{
 451        entering_irq();
 452        trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
 453        __smp_thermal_interrupt();
 454        trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
 455        exiting_ack_irq();
 456}
 457
 458/* Thermal monitoring depends on APIC, ACPI and clock modulation */
 459static int intel_thermal_supported(struct cpuinfo_x86 *c)
 460{
 461        if (!cpu_has_apic)
 462                return 0;
 463        if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
 464                return 0;
 465        return 1;
 466}
 467
 468void __init mcheck_intel_therm_init(void)
 469{
 470        /*
 471         * This function is only called on boot CPU. Save the init thermal
 472         * LVT value on BSP and use that value to restore APs' thermal LVT
 473         * entry BIOS programmed later
 474         */
 475        if (intel_thermal_supported(&boot_cpu_data))
 476                lvtthmr_init = apic_read(APIC_LVTTHMR);
 477}
 478
 479void intel_init_thermal(struct cpuinfo_x86 *c)
 480{
 481        unsigned int cpu = smp_processor_id();
 482        int tm2 = 0;
 483        u32 l, h;
 484
 485        if (!intel_thermal_supported(c))
 486                return;
 487
 488        /*
 489         * First check if its enabled already, in which case there might
 490         * be some SMM goo which handles it, so we can't even put a handler
 491         * since it might be delivered via SMI already:
 492         */
 493        rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 494
 495        h = lvtthmr_init;
 496        /*
 497         * The initial value of thermal LVT entries on all APs always reads
 498         * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
 499         * sequence to them and LVT registers are reset to 0s except for
 500         * the mask bits which are set to 1s when APs receive INIT IPI.
 501         * If BIOS takes over the thermal interrupt and sets its interrupt
 502         * delivery mode to SMI (not fixed), it restores the value that the
 503         * BIOS has programmed on AP based on BSP's info we saved since BIOS
 504         * is always setting the same value for all threads/cores.
 505         */
 506        if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
 507                apic_write(APIC_LVTTHMR, lvtthmr_init);
 508
 509
 510        if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
 511                printk(KERN_DEBUG
 512                       "CPU%d: Thermal monitoring handled by SMI\n", cpu);
 513                return;
 514        }
 515
 516        /* Check whether a vector already exists */
 517        if (h & APIC_VECTOR_MASK) {
 518                printk(KERN_DEBUG
 519                       "CPU%d: Thermal LVT vector (%#x) already installed\n",
 520                       cpu, (h & APIC_VECTOR_MASK));
 521                return;
 522        }
 523
 524        /* early Pentium M models use different method for enabling TM2 */
 525        if (cpu_has(c, X86_FEATURE_TM2)) {
 526                if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
 527                        rdmsr(MSR_THERM2_CTL, l, h);
 528                        if (l & MSR_THERM2_CTL_TM_SELECT)
 529                                tm2 = 1;
 530                } else if (l & MSR_IA32_MISC_ENABLE_TM2)
 531                        tm2 = 1;
 532        }
 533
 534        /* We'll mask the thermal vector in the lapic till we're ready: */
 535        h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
 536        apic_write(APIC_LVTTHMR, h);
 537
 538        rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
 539        if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
 540                wrmsr(MSR_IA32_THERM_INTERRUPT,
 541                        (l | (THERM_INT_LOW_ENABLE
 542                        | THERM_INT_HIGH_ENABLE)) & ~THERM_INT_PLN_ENABLE, h);
 543        else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
 544                wrmsr(MSR_IA32_THERM_INTERRUPT,
 545                        l | (THERM_INT_LOW_ENABLE
 546                        | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h);
 547        else
 548                wrmsr(MSR_IA32_THERM_INTERRUPT,
 549                      l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
 550
 551        if (cpu_has(c, X86_FEATURE_PTS)) {
 552                rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
 553                if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
 554                        wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
 555                                (l | (PACKAGE_THERM_INT_LOW_ENABLE
 556                                | PACKAGE_THERM_INT_HIGH_ENABLE))
 557                                & ~PACKAGE_THERM_INT_PLN_ENABLE, h);
 558                else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
 559                        wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
 560                                l | (PACKAGE_THERM_INT_LOW_ENABLE
 561                                | PACKAGE_THERM_INT_HIGH_ENABLE
 562                                | PACKAGE_THERM_INT_PLN_ENABLE), h);
 563                else
 564                        wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
 565                              l | (PACKAGE_THERM_INT_LOW_ENABLE
 566                                | PACKAGE_THERM_INT_HIGH_ENABLE), h);
 567        }
 568
 569        smp_thermal_vector = intel_thermal_interrupt;
 570
 571        rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 572        wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
 573
 574        /* Unmask the thermal vector: */
 575        l = apic_read(APIC_LVTTHMR);
 576        apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
 577
 578        printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n",
 579                       tm2 ? "TM2" : "TM1");
 580
 581        /* enable thermal throttle processing */
 582        atomic_set(&therm_throt_en, 1);
 583}
 584