linux/drivers/thermal/intel/therm_throt.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Thermal throttle event support code (such as syslog messaging and rate
   4 * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
   5 *
   6 * This allows consistent reporting of CPU thermal throttle events.
   7 *
   8 * Maintains a counter in /sys that keeps track of the number of thermal
   9 * events, such that the user knows how bad the thermal problem might be
  10 * (since the logging to syslog is rate limited).
  11 *
  12 * Author: Dmitriy Zavin (dmitriyz@google.com)
  13 *
  14 * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
  15 *          Inspired by Ross Biro's and Al Borchers' counter code.
  16 */
  17#include <linux/interrupt.h>
  18#include <linux/notifier.h>
  19#include <linux/jiffies.h>
  20#include <linux/kernel.h>
  21#include <linux/percpu.h>
  22#include <linux/export.h>
  23#include <linux/types.h>
  24#include <linux/init.h>
  25#include <linux/smp.h>
  26#include <linux/cpu.h>
  27
  28#include <asm/processor.h>
  29#include <asm/thermal.h>
  30#include <asm/traps.h>
  31#include <asm/apic.h>
  32#include <asm/irq.h>
  33#include <asm/msr.h>
  34
  35#include "thermal_interrupt.h"
  36
  37/* How long to wait between reporting thermal events */
  38#define CHECK_INTERVAL          (300 * HZ)
  39
  40#define THERMAL_THROTTLING_EVENT        0
  41#define POWER_LIMIT_EVENT               1
  42
  43/**
  44 * struct _thermal_state - Represent the current thermal event state
  45 * @next_check:                 Stores the next timestamp, when it is allowed
  46 *                              to log the next warning message.
  47 * @last_interrupt_time:        Stores the timestamp for the last threshold
  48 *                              high event.
  49 * @therm_work:                 Delayed workqueue structure
  50 * @count:                      Stores the current running count for thermal
  51 *                              or power threshold interrupts.
  52 * @last_count:                 Stores the previous running count for thermal
  53 *                              or power threshold interrupts.
  54 * @max_time_ms:                This shows the maximum amount of time CPU was
  55 *                              in throttled state for a single thermal
  56 *                              threshold high to low state.
  57 * @total_time_ms:              This is a cumulative time during which CPU was
  58 *                              in the throttled state.
  59 * @rate_control_active:        Set when a throttling message is logged.
  60 *                              This is used for the purpose of rate-control.
  61 * @new_event:                  Stores the last high/low status of the
  62 *                              THERM_STATUS_PROCHOT or
  63 *                              THERM_STATUS_POWER_LIMIT.
  64 * @level:                      Stores whether this _thermal_state instance is
  65 *                              for a CORE level or for PACKAGE level.
  66 * @sample_index:               Index for storing the next sample in the buffer
  67 *                              temp_samples[].
  68 * @sample_count:               Total number of samples collected in the buffer
  69 *                              temp_samples[].
  70 * @average:                    The last moving average of temperature samples
  71 * @baseline_temp:              Temperature at which thermal threshold high
  72 *                              interrupt was generated.
  73 * @temp_samples:               Storage for temperature samples to calculate
  74 *                              moving average.
  75 *
  76 * This structure is used to represent data related to thermal state for a CPU.
  77 * There is a separate storage for core and package level for each CPU.
  78 */
  79struct _thermal_state {
  80        u64                     next_check;
  81        u64                     last_interrupt_time;
  82        struct delayed_work     therm_work;
  83        unsigned long           count;
  84        unsigned long           last_count;
  85        unsigned long           max_time_ms;
  86        unsigned long           total_time_ms;
  87        bool                    rate_control_active;
  88        bool                    new_event;
  89        u8                      level;
  90        u8                      sample_index;
  91        u8                      sample_count;
  92        u8                      average;
  93        u8                      baseline_temp;
  94        u8                      temp_samples[3];
  95};
  96
  97struct thermal_state {
  98        struct _thermal_state core_throttle;
  99        struct _thermal_state core_power_limit;
 100        struct _thermal_state package_throttle;
 101        struct _thermal_state package_power_limit;
 102        struct _thermal_state core_thresh0;
 103        struct _thermal_state core_thresh1;
 104        struct _thermal_state pkg_thresh0;
 105        struct _thermal_state pkg_thresh1;
 106};
 107
 108/* Callback to handle core threshold interrupts */
 109int (*platform_thermal_notify)(__u64 msr_val);
 110EXPORT_SYMBOL(platform_thermal_notify);
 111
 112/* Callback to handle core package threshold_interrupts */
 113int (*platform_thermal_package_notify)(__u64 msr_val);
 114EXPORT_SYMBOL_GPL(platform_thermal_package_notify);
 115
 116/* Callback support of rate control, return true, if
 117 * callback has rate control */
 118bool (*platform_thermal_package_rate_control)(void);
 119EXPORT_SYMBOL_GPL(platform_thermal_package_rate_control);
 120
 121
 122static DEFINE_PER_CPU(struct thermal_state, thermal_state);
 123
 124static atomic_t therm_throt_en  = ATOMIC_INIT(0);
 125
 126static u32 lvtthmr_init __read_mostly;
 127
 128#ifdef CONFIG_SYSFS
 129#define define_therm_throt_device_one_ro(_name)                         \
 130        static DEVICE_ATTR(_name, 0444,                                 \
 131                           therm_throt_device_show_##_name,             \
 132                                   NULL)                                \
 133
 134#define define_therm_throt_device_show_func(event, name)                \
 135                                                                        \
 136static ssize_t therm_throt_device_show_##event##_##name(                \
 137                        struct device *dev,                             \
 138                        struct device_attribute *attr,                  \
 139                        char *buf)                                      \
 140{                                                                       \
 141        unsigned int cpu = dev->id;                                     \
 142        ssize_t ret;                                                    \
 143                                                                        \
 144        preempt_disable();      /* CPU hotplug */                       \
 145        if (cpu_online(cpu)) {                                          \
 146                ret = sprintf(buf, "%lu\n",                             \
 147                              per_cpu(thermal_state, cpu).event.name);  \
 148        } else                                                          \
 149                ret = 0;                                                \
 150        preempt_enable();                                               \
 151                                                                        \
 152        return ret;                                                     \
 153}
 154
 155define_therm_throt_device_show_func(core_throttle, count);
 156define_therm_throt_device_one_ro(core_throttle_count);
 157
 158define_therm_throt_device_show_func(core_power_limit, count);
 159define_therm_throt_device_one_ro(core_power_limit_count);
 160
 161define_therm_throt_device_show_func(package_throttle, count);
 162define_therm_throt_device_one_ro(package_throttle_count);
 163
 164define_therm_throt_device_show_func(package_power_limit, count);
 165define_therm_throt_device_one_ro(package_power_limit_count);
 166
 167define_therm_throt_device_show_func(core_throttle, max_time_ms);
 168define_therm_throt_device_one_ro(core_throttle_max_time_ms);
 169
 170define_therm_throt_device_show_func(package_throttle, max_time_ms);
 171define_therm_throt_device_one_ro(package_throttle_max_time_ms);
 172
 173define_therm_throt_device_show_func(core_throttle, total_time_ms);
 174define_therm_throt_device_one_ro(core_throttle_total_time_ms);
 175
 176define_therm_throt_device_show_func(package_throttle, total_time_ms);
 177define_therm_throt_device_one_ro(package_throttle_total_time_ms);
 178
 179static struct attribute *thermal_throttle_attrs[] = {
 180        &dev_attr_core_throttle_count.attr,
 181        &dev_attr_core_throttle_max_time_ms.attr,
 182        &dev_attr_core_throttle_total_time_ms.attr,
 183        NULL
 184};
 185
 186static const struct attribute_group thermal_attr_group = {
 187        .attrs  = thermal_throttle_attrs,
 188        .name   = "thermal_throttle"
 189};
 190#endif /* CONFIG_SYSFS */
 191
 192#define CORE_LEVEL      0
 193#define PACKAGE_LEVEL   1
 194
 195#define THERM_THROT_POLL_INTERVAL       HZ
 196#define THERM_STATUS_PROCHOT_LOG        BIT(1)
 197
 198#define THERM_STATUS_CLEAR_CORE_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(13) | BIT(15))
 199#define THERM_STATUS_CLEAR_PKG_MASK  (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11))
 200
 201static void clear_therm_status_log(int level)
 202{
 203        int msr;
 204        u64 mask, msr_val;
 205
 206        if (level == CORE_LEVEL) {
 207                msr  = MSR_IA32_THERM_STATUS;
 208                mask = THERM_STATUS_CLEAR_CORE_MASK;
 209        } else {
 210                msr  = MSR_IA32_PACKAGE_THERM_STATUS;
 211                mask = THERM_STATUS_CLEAR_PKG_MASK;
 212        }
 213
 214        rdmsrl(msr, msr_val);
 215        msr_val &= mask;
 216        wrmsrl(msr, msr_val & ~THERM_STATUS_PROCHOT_LOG);
 217}
 218
 219static void get_therm_status(int level, bool *proc_hot, u8 *temp)
 220{
 221        int msr;
 222        u64 msr_val;
 223
 224        if (level == CORE_LEVEL)
 225                msr = MSR_IA32_THERM_STATUS;
 226        else
 227                msr = MSR_IA32_PACKAGE_THERM_STATUS;
 228
 229        rdmsrl(msr, msr_val);
 230        if (msr_val & THERM_STATUS_PROCHOT_LOG)
 231                *proc_hot = true;
 232        else
 233                *proc_hot = false;
 234
 235        *temp = (msr_val >> 16) & 0x7F;
 236}
 237
 238static void __maybe_unused throttle_active_work(struct work_struct *work)
 239{
 240        struct _thermal_state *state = container_of(to_delayed_work(work),
 241                                                struct _thermal_state, therm_work);
 242        unsigned int i, avg, this_cpu = smp_processor_id();
 243        u64 now = get_jiffies_64();
 244        bool hot;
 245        u8 temp;
 246
 247        get_therm_status(state->level, &hot, &temp);
 248        /* temperature value is offset from the max so lesser means hotter */
 249        if (!hot && temp > state->baseline_temp) {
 250                if (state->rate_control_active)
 251                        pr_info("CPU%d: %s temperature/speed normal (total events = %lu)\n",
 252                                this_cpu,
 253                                state->level == CORE_LEVEL ? "Core" : "Package",
 254                                state->count);
 255
 256                state->rate_control_active = false;
 257                return;
 258        }
 259
 260        if (time_before64(now, state->next_check) &&
 261                          state->rate_control_active)
 262                goto re_arm;
 263
 264        state->next_check = now + CHECK_INTERVAL;
 265
 266        if (state->count != state->last_count) {
 267                /* There was one new thermal interrupt */
 268                state->last_count = state->count;
 269                state->average = 0;
 270                state->sample_count = 0;
 271                state->sample_index = 0;
 272        }
 273
 274        state->temp_samples[state->sample_index] = temp;
 275        state->sample_count++;
 276        state->sample_index = (state->sample_index + 1) % ARRAY_SIZE(state->temp_samples);
 277        if (state->sample_count < ARRAY_SIZE(state->temp_samples))
 278                goto re_arm;
 279
 280        avg = 0;
 281        for (i = 0; i < ARRAY_SIZE(state->temp_samples); ++i)
 282                avg += state->temp_samples[i];
 283
 284        avg /= ARRAY_SIZE(state->temp_samples);
 285
 286        if (state->average > avg) {
 287                pr_warn("CPU%d: %s temperature is above threshold, cpu clock is throttled (total events = %lu)\n",
 288                        this_cpu,
 289                        state->level == CORE_LEVEL ? "Core" : "Package",
 290                        state->count);
 291                state->rate_control_active = true;
 292        }
 293
 294        state->average = avg;
 295
 296re_arm:
 297        clear_therm_status_log(state->level);
 298        schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
 299}
 300
 301/***
 302 * therm_throt_process - Process thermal throttling event from interrupt
 303 * @curr: Whether the condition is current or not (boolean), since the
 304 *        thermal interrupt normally gets called both when the thermal
 305 *        event begins and once the event has ended.
 306 *
 307 * This function is called by the thermal interrupt after the
 308 * IRQ has been acknowledged.
 309 *
 310 * It will take care of rate limiting and printing messages to the syslog.
 311 */
 312static void therm_throt_process(bool new_event, int event, int level)
 313{
 314        struct _thermal_state *state;
 315        unsigned int this_cpu = smp_processor_id();
 316        bool old_event;
 317        u64 now;
 318        struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
 319
 320        now = get_jiffies_64();
 321        if (level == CORE_LEVEL) {
 322                if (event == THERMAL_THROTTLING_EVENT)
 323                        state = &pstate->core_throttle;
 324                else if (event == POWER_LIMIT_EVENT)
 325                        state = &pstate->core_power_limit;
 326                else
 327                        return;
 328        } else if (level == PACKAGE_LEVEL) {
 329                if (event == THERMAL_THROTTLING_EVENT)
 330                        state = &pstate->package_throttle;
 331                else if (event == POWER_LIMIT_EVENT)
 332                        state = &pstate->package_power_limit;
 333                else
 334                        return;
 335        } else
 336                return;
 337
 338        old_event = state->new_event;
 339        state->new_event = new_event;
 340
 341        if (new_event)
 342                state->count++;
 343
 344        if (event != THERMAL_THROTTLING_EVENT)
 345                return;
 346
 347        if (new_event && !state->last_interrupt_time) {
 348                bool hot;
 349                u8 temp;
 350
 351                get_therm_status(state->level, &hot, &temp);
 352                /*
 353                 * Ignore short temperature spike as the system is not close
 354                 * to PROCHOT. 10C offset is large enough to ignore. It is
 355                 * already dropped from the high threshold temperature.
 356                 */
 357                if (temp > 10)
 358                        return;
 359
 360                state->baseline_temp = temp;
 361                state->last_interrupt_time = now;
 362                schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
 363        } else if (old_event && state->last_interrupt_time) {
 364                unsigned long throttle_time;
 365
 366                throttle_time = jiffies_delta_to_msecs(now - state->last_interrupt_time);
 367                if (throttle_time > state->max_time_ms)
 368                        state->max_time_ms = throttle_time;
 369                state->total_time_ms += throttle_time;
 370                state->last_interrupt_time = 0;
 371        }
 372}
 373
 374static int thresh_event_valid(int level, int event)
 375{
 376        struct _thermal_state *state;
 377        unsigned int this_cpu = smp_processor_id();
 378        struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
 379        u64 now = get_jiffies_64();
 380
 381        if (level == PACKAGE_LEVEL)
 382                state = (event == 0) ? &pstate->pkg_thresh0 :
 383                                                &pstate->pkg_thresh1;
 384        else
 385                state = (event == 0) ? &pstate->core_thresh0 :
 386                                                &pstate->core_thresh1;
 387
 388        if (time_before64(now, state->next_check))
 389                return 0;
 390
 391        state->next_check = now + CHECK_INTERVAL;
 392
 393        return 1;
 394}
 395
 396static bool int_pln_enable;
 397static int __init int_pln_enable_setup(char *s)
 398{
 399        int_pln_enable = true;
 400
 401        return 1;
 402}
 403__setup("int_pln_enable", int_pln_enable_setup);
 404
 405#ifdef CONFIG_SYSFS
 406/* Add/Remove thermal_throttle interface for CPU device: */
 407static int thermal_throttle_add_dev(struct device *dev, unsigned int cpu)
 408{
 409        int err;
 410        struct cpuinfo_x86 *c = &cpu_data(cpu);
 411
 412        err = sysfs_create_group(&dev->kobj, &thermal_attr_group);
 413        if (err)
 414                return err;
 415
 416        if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) {
 417                err = sysfs_add_file_to_group(&dev->kobj,
 418                                              &dev_attr_core_power_limit_count.attr,
 419                                              thermal_attr_group.name);
 420                if (err)
 421                        goto del_group;
 422        }
 423
 424        if (cpu_has(c, X86_FEATURE_PTS)) {
 425                err = sysfs_add_file_to_group(&dev->kobj,
 426                                              &dev_attr_package_throttle_count.attr,
 427                                              thermal_attr_group.name);
 428                if (err)
 429                        goto del_group;
 430
 431                err = sysfs_add_file_to_group(&dev->kobj,
 432                                              &dev_attr_package_throttle_max_time_ms.attr,
 433                                              thermal_attr_group.name);
 434                if (err)
 435                        goto del_group;
 436
 437                err = sysfs_add_file_to_group(&dev->kobj,
 438                                              &dev_attr_package_throttle_total_time_ms.attr,
 439                                              thermal_attr_group.name);
 440                if (err)
 441                        goto del_group;
 442
 443                if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) {
 444                        err = sysfs_add_file_to_group(&dev->kobj,
 445                                        &dev_attr_package_power_limit_count.attr,
 446                                        thermal_attr_group.name);
 447                        if (err)
 448                                goto del_group;
 449                }
 450        }
 451
 452        return 0;
 453
 454del_group:
 455        sysfs_remove_group(&dev->kobj, &thermal_attr_group);
 456
 457        return err;
 458}
 459
 460static void thermal_throttle_remove_dev(struct device *dev)
 461{
 462        sysfs_remove_group(&dev->kobj, &thermal_attr_group);
 463}
 464
 465/* Get notified when a cpu comes on/off. Be hotplug friendly. */
 466static int thermal_throttle_online(unsigned int cpu)
 467{
 468        struct thermal_state *state = &per_cpu(thermal_state, cpu);
 469        struct device *dev = get_cpu_device(cpu);
 470        u32 l;
 471
 472        state->package_throttle.level = PACKAGE_LEVEL;
 473        state->core_throttle.level = CORE_LEVEL;
 474
 475        INIT_DELAYED_WORK(&state->package_throttle.therm_work, throttle_active_work);
 476        INIT_DELAYED_WORK(&state->core_throttle.therm_work, throttle_active_work);
 477
 478        /* Unmask the thermal vector after the above workqueues are initialized. */
 479        l = apic_read(APIC_LVTTHMR);
 480        apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
 481
 482        return thermal_throttle_add_dev(dev, cpu);
 483}
 484
 485static int thermal_throttle_offline(unsigned int cpu)
 486{
 487        struct thermal_state *state = &per_cpu(thermal_state, cpu);
 488        struct device *dev = get_cpu_device(cpu);
 489        u32 l;
 490
 491        /* Mask the thermal vector before draining evtl. pending work */
 492        l = apic_read(APIC_LVTTHMR);
 493        apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED);
 494
 495        cancel_delayed_work_sync(&state->package_throttle.therm_work);
 496        cancel_delayed_work_sync(&state->core_throttle.therm_work);
 497
 498        state->package_throttle.rate_control_active = false;
 499        state->core_throttle.rate_control_active = false;
 500
 501        thermal_throttle_remove_dev(dev);
 502        return 0;
 503}
 504
 505static __init int thermal_throttle_init_device(void)
 506{
 507        int ret;
 508
 509        if (!atomic_read(&therm_throt_en))
 510                return 0;
 511
 512        ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/therm:online",
 513                                thermal_throttle_online,
 514                                thermal_throttle_offline);
 515        return ret < 0 ? ret : 0;
 516}
 517device_initcall(thermal_throttle_init_device);
 518
 519#endif /* CONFIG_SYSFS */
 520
 521static void notify_package_thresholds(__u64 msr_val)
 522{
 523        bool notify_thres_0 = false;
 524        bool notify_thres_1 = false;
 525
 526        if (!platform_thermal_package_notify)
 527                return;
 528
 529        /* lower threshold check */
 530        if (msr_val & THERM_LOG_THRESHOLD0)
 531                notify_thres_0 = true;
 532        /* higher threshold check */
 533        if (msr_val & THERM_LOG_THRESHOLD1)
 534                notify_thres_1 = true;
 535
 536        if (!notify_thres_0 && !notify_thres_1)
 537                return;
 538
 539        if (platform_thermal_package_rate_control &&
 540                platform_thermal_package_rate_control()) {
 541                /* Rate control is implemented in callback */
 542                platform_thermal_package_notify(msr_val);
 543                return;
 544        }
 545
 546        /* lower threshold reached */
 547        if (notify_thres_0 && thresh_event_valid(PACKAGE_LEVEL, 0))
 548                platform_thermal_package_notify(msr_val);
 549        /* higher threshold reached */
 550        if (notify_thres_1 && thresh_event_valid(PACKAGE_LEVEL, 1))
 551                platform_thermal_package_notify(msr_val);
 552}
 553
 554static void notify_thresholds(__u64 msr_val)
 555{
 556        /* check whether the interrupt handler is defined;
 557         * otherwise simply return
 558         */
 559        if (!platform_thermal_notify)
 560                return;
 561
 562        /* lower threshold reached */
 563        if ((msr_val & THERM_LOG_THRESHOLD0) &&
 564                        thresh_event_valid(CORE_LEVEL, 0))
 565                platform_thermal_notify(msr_val);
 566        /* higher threshold reached */
 567        if ((msr_val & THERM_LOG_THRESHOLD1) &&
 568                        thresh_event_valid(CORE_LEVEL, 1))
 569                platform_thermal_notify(msr_val);
 570}
 571
 572void __weak notify_hwp_interrupt(void)
 573{
 574        wrmsrl_safe(MSR_HWP_STATUS, 0);
 575}
 576
 577/* Thermal transition interrupt handler */
 578void intel_thermal_interrupt(void)
 579{
 580        __u64 msr_val;
 581
 582        if (static_cpu_has(X86_FEATURE_HWP))
 583                notify_hwp_interrupt();
 584
 585        rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
 586
 587        /* Check for violation of core thermal thresholds*/
 588        notify_thresholds(msr_val);
 589
 590        therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
 591                            THERMAL_THROTTLING_EVENT,
 592                            CORE_LEVEL);
 593
 594        if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
 595                therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
 596                                        POWER_LIMIT_EVENT,
 597                                        CORE_LEVEL);
 598
 599        if (this_cpu_has(X86_FEATURE_PTS)) {
 600                rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
 601                /* check violations of package thermal thresholds */
 602                notify_package_thresholds(msr_val);
 603                therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
 604                                        THERMAL_THROTTLING_EVENT,
 605                                        PACKAGE_LEVEL);
 606                if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
 607                        therm_throt_process(msr_val &
 608                                        PACKAGE_THERM_STATUS_POWER_LIMIT,
 609                                        POWER_LIMIT_EVENT,
 610                                        PACKAGE_LEVEL);
 611        }
 612}
 613
 614/* Thermal monitoring depends on APIC, ACPI and clock modulation */
 615static int intel_thermal_supported(struct cpuinfo_x86 *c)
 616{
 617        if (!boot_cpu_has(X86_FEATURE_APIC))
 618                return 0;
 619        if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
 620                return 0;
 621        return 1;
 622}
 623
 624bool x86_thermal_enabled(void)
 625{
 626        return atomic_read(&therm_throt_en);
 627}
 628
 629void __init therm_lvt_init(void)
 630{
 631        /*
 632         * This function is only called on boot CPU. Save the init thermal
 633         * LVT value on BSP and use that value to restore APs' thermal LVT
 634         * entry BIOS programmed later
 635         */
 636        if (intel_thermal_supported(&boot_cpu_data))
 637                lvtthmr_init = apic_read(APIC_LVTTHMR);
 638}
 639
 640void intel_init_thermal(struct cpuinfo_x86 *c)
 641{
 642        unsigned int cpu = smp_processor_id();
 643        int tm2 = 0;
 644        u32 l, h;
 645
 646        if (!intel_thermal_supported(c))
 647                return;
 648
 649        /*
 650         * First check if its enabled already, in which case there might
 651         * be some SMM goo which handles it, so we can't even put a handler
 652         * since it might be delivered via SMI already:
 653         */
 654        rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 655
 656        h = lvtthmr_init;
 657        /*
 658         * The initial value of thermal LVT entries on all APs always reads
 659         * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
 660         * sequence to them and LVT registers are reset to 0s except for
 661         * the mask bits which are set to 1s when APs receive INIT IPI.
 662         * If BIOS takes over the thermal interrupt and sets its interrupt
 663         * delivery mode to SMI (not fixed), it restores the value that the
 664         * BIOS has programmed on AP based on BSP's info we saved since BIOS
 665         * is always setting the same value for all threads/cores.
 666         */
 667        if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
 668                apic_write(APIC_LVTTHMR, lvtthmr_init);
 669
 670
 671        if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
 672                if (system_state == SYSTEM_BOOTING)
 673                        pr_debug("CPU%d: Thermal monitoring handled by SMI\n", cpu);
 674                return;
 675        }
 676
 677        /* early Pentium M models use different method for enabling TM2 */
 678        if (cpu_has(c, X86_FEATURE_TM2)) {
 679                if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
 680                        rdmsr(MSR_THERM2_CTL, l, h);
 681                        if (l & MSR_THERM2_CTL_TM_SELECT)
 682                                tm2 = 1;
 683                } else if (l & MSR_IA32_MISC_ENABLE_TM2)
 684                        tm2 = 1;
 685        }
 686
 687        /* We'll mask the thermal vector in the lapic till we're ready: */
 688        h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
 689        apic_write(APIC_LVTTHMR, h);
 690
 691        rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
 692        if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
 693                wrmsr(MSR_IA32_THERM_INTERRUPT,
 694                        (l | (THERM_INT_LOW_ENABLE
 695                        | THERM_INT_HIGH_ENABLE)) & ~THERM_INT_PLN_ENABLE, h);
 696        else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
 697                wrmsr(MSR_IA32_THERM_INTERRUPT,
 698                        l | (THERM_INT_LOW_ENABLE
 699                        | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h);
 700        else
 701                wrmsr(MSR_IA32_THERM_INTERRUPT,
 702                      l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
 703
 704        if (cpu_has(c, X86_FEATURE_PTS)) {
 705                rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
 706                if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
 707                        wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
 708                                (l | (PACKAGE_THERM_INT_LOW_ENABLE
 709                                | PACKAGE_THERM_INT_HIGH_ENABLE))
 710                                & ~PACKAGE_THERM_INT_PLN_ENABLE, h);
 711                else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
 712                        wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
 713                                l | (PACKAGE_THERM_INT_LOW_ENABLE
 714                                | PACKAGE_THERM_INT_HIGH_ENABLE
 715                                | PACKAGE_THERM_INT_PLN_ENABLE), h);
 716                else
 717                        wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
 718                              l | (PACKAGE_THERM_INT_LOW_ENABLE
 719                                | PACKAGE_THERM_INT_HIGH_ENABLE), h);
 720        }
 721
 722        rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 723        wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
 724
 725        pr_info_once("CPU0: Thermal monitoring enabled (%s)\n",
 726                      tm2 ? "TM2" : "TM1");
 727
 728        /* enable thermal throttle processing */
 729        atomic_set(&therm_throt_en, 1);
 730}
 731