linux/drivers/platform/x86/intel_ips.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2009-2010 Intel Corporation
   3 *
   4 * This program is free software; you can redistribute it and/or modify it
   5 * under the terms and conditions of the GNU General Public License,
   6 * version 2, as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope it will be useful, but WITHOUT
   9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11 * more details.
  12 *
  13 * You should have received a copy of the GNU General Public License along with
  14 * this program; if not, write to the Free Software Foundation, Inc.,
  15 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  16 *
  17 * The full GNU General Public License is included in this distribution in
  18 * the file called "COPYING".
  19 *
  20 * Authors:
  21 *      Jesse Barnes <jbarnes@virtuousgeek.org>
  22 */
  23
  24/*
  25 * Some Intel Ibex Peak based platforms support so-called "intelligent
  26 * power sharing", which allows the CPU and GPU to cooperate to maximize
  27 * performance within a given TDP (thermal design point).  This driver
  28 * performs the coordination between the CPU and GPU, monitors thermal and
  29 * power statistics in the platform, and initializes power monitoring
  30 * hardware.  It also provides a few tunables to control behavior.  Its
  31 * primary purpose is to safely allow CPU and GPU turbo modes to be enabled
  32 * by tracking power and thermal budget; secondarily it can boost turbo
  33 * performance by allocating more power or thermal budget to the CPU or GPU
  34 * based on available headroom and activity.
  35 *
  36 * The basic algorithm is driven by a 5s moving average of tempurature.  If
  37 * thermal headroom is available, the CPU and/or GPU power clamps may be
  38 * adjusted upwards.  If we hit the thermal ceiling or a thermal trigger,
  39 * we scale back the clamp.  Aside from trigger events (when we're critically
  40 * close or over our TDP) we don't adjust the clamps more than once every
  41 * five seconds.
  42 *
  43 * The thermal device (device 31, function 6) has a set of registers that
  44 * are updated by the ME firmware.  The ME should also take the clamp values
  45 * written to those registers and write them to the CPU, but we currently
  46 * bypass that functionality and write the CPU MSR directly.
  47 *
  48 * UNSUPPORTED:
  49 *   - dual MCP configs
  50 *
  51 * TODO:
  52 *   - handle CPU hotplug
  53 *   - provide turbo enable/disable api
  54 *
  55 * Related documents:
  56 *   - CDI 403777, 403778 - Auburndale EDS vol 1 & 2
  57 *   - CDI 401376 - Ibex Peak EDS
  58 *   - ref 26037, 26641 - IPS BIOS spec
  59 *   - ref 26489 - Nehalem BIOS writer's guide
  60 *   - ref 26921 - Ibex Peak BIOS Specification
  61 */
  62
  63#include <linux/debugfs.h>
  64#include <linux/delay.h>
  65#include <linux/interrupt.h>
  66#include <linux/kernel.h>
  67#include <linux/kthread.h>
  68#include <linux/module.h>
  69#include <linux/pci.h>
  70#include <linux/sched.h>
  71#include <linux/seq_file.h>
  72#include <linux/string.h>
  73#include <linux/tick.h>
  74#include <linux/timer.h>
  75#include <linux/dmi.h>
  76#include <drm/i915_drm.h>
  77#include <asm/msr.h>
  78#include <asm/processor.h>
  79#include "intel_ips.h"
  80
  81#include <asm-generic/io-64-nonatomic-lo-hi.h>
  82
  83#define PCI_DEVICE_ID_INTEL_THERMAL_SENSOR 0x3b32
  84
  85/*
  86 * Package level MSRs for monitor/control
  87 */
  88#define PLATFORM_INFO   0xce
  89#define   PLATFORM_TDP          (1<<29)
  90#define   PLATFORM_RATIO        (1<<28)
  91
  92#define IA32_MISC_ENABLE        0x1a0
  93#define   IA32_MISC_TURBO_EN    (1ULL<<38)
  94
  95#define TURBO_POWER_CURRENT_LIMIT       0x1ac
  96#define   TURBO_TDC_OVR_EN      (1UL<<31)
  97#define   TURBO_TDC_MASK        (0x000000007fff0000UL)
  98#define   TURBO_TDC_SHIFT       (16)
  99#define   TURBO_TDP_OVR_EN      (1UL<<15)
 100#define   TURBO_TDP_MASK        (0x0000000000003fffUL)
 101
 102/*
 103 * Core/thread MSRs for monitoring
 104 */
 105#define IA32_PERF_CTL           0x199
 106#define   IA32_PERF_TURBO_DIS   (1ULL<<32)
 107
 108/*
 109 * Thermal PCI device regs
 110 */
 111#define THM_CFG_TBAR    0x10
 112#define THM_CFG_TBAR_HI 0x14
 113
 114#define THM_TSIU        0x00
 115#define THM_TSE         0x01
 116#define   TSE_EN        0xb8
 117#define THM_TSS         0x02
 118#define THM_TSTR        0x03
 119#define THM_TSTTP       0x04
 120#define THM_TSCO        0x08
 121#define THM_TSES        0x0c
 122#define THM_TSGPEN      0x0d
 123#define   TSGPEN_HOT_LOHI       (1<<1)
 124#define   TSGPEN_CRIT_LOHI      (1<<2)
 125#define THM_TSPC        0x0e
 126#define THM_PPEC        0x10
 127#define THM_CTA         0x12
 128#define THM_PTA         0x14
 129#define   PTA_SLOPE_MASK        (0xff00)
 130#define   PTA_SLOPE_SHIFT       8
 131#define   PTA_OFFSET_MASK       (0x00ff)
 132#define THM_MGTA        0x16
 133#define   MGTA_SLOPE_MASK       (0xff00)
 134#define   MGTA_SLOPE_SHIFT      8
 135#define   MGTA_OFFSET_MASK      (0x00ff)
 136#define THM_TRC         0x1a
 137#define   TRC_CORE2_EN  (1<<15)
 138#define   TRC_THM_EN    (1<<12)
 139#define   TRC_C6_WAR    (1<<8)
 140#define   TRC_CORE1_EN  (1<<7)
 141#define   TRC_CORE_PWR  (1<<6)
 142#define   TRC_PCH_EN    (1<<5)
 143#define   TRC_MCH_EN    (1<<4)
 144#define   TRC_DIMM4     (1<<3)
 145#define   TRC_DIMM3     (1<<2)
 146#define   TRC_DIMM2     (1<<1)
 147#define   TRC_DIMM1     (1<<0)
 148#define THM_TES         0x20
 149#define THM_TEN         0x21
 150#define   TEN_UPDATE_EN 1
 151#define THM_PSC         0x24
 152#define   PSC_NTG       (1<<0) /* No GFX turbo support */
 153#define   PSC_NTPC      (1<<1) /* No CPU turbo support */
 154#define   PSC_PP_DEF    (0<<2) /* Perf policy up to driver */
 155#define   PSP_PP_PC     (1<<2) /* BIOS prefers CPU perf */
 156#define   PSP_PP_BAL    (2<<2) /* BIOS wants balanced perf */
 157#define   PSP_PP_GFX    (3<<2) /* BIOS prefers GFX perf */
 158#define   PSP_PBRT      (1<<4) /* BIOS run time support */
 159#define THM_CTV1        0x30
 160#define   CTV_TEMP_ERROR (1<<15)
 161#define   CTV_TEMP_MASK 0x3f
 162#define   CTV_
 163#define THM_CTV2        0x32
 164#define THM_CEC         0x34 /* undocumented power accumulator in joules */
 165#define THM_AE          0x3f
 166#define THM_HTS         0x50 /* 32 bits */
 167#define   HTS_PCPL_MASK (0x7fe00000)
 168#define   HTS_PCPL_SHIFT 21
 169#define   HTS_GPL_MASK  (0x001ff000)
 170#define   HTS_GPL_SHIFT 12
 171#define   HTS_PP_MASK   (0x00000c00)
 172#define   HTS_PP_SHIFT  10
 173#define   HTS_PP_DEF    0
 174#define   HTS_PP_PROC   1
 175#define   HTS_PP_BAL    2
 176#define   HTS_PP_GFX    3
 177#define   HTS_PCTD_DIS  (1<<9)
 178#define   HTS_GTD_DIS   (1<<8)
 179#define   HTS_PTL_MASK  (0x000000fe)
 180#define   HTS_PTL_SHIFT 1
 181#define   HTS_NVV       (1<<0)
 182#define THM_HTSHI       0x54 /* 16 bits */
 183#define   HTS2_PPL_MASK         (0x03ff)
 184#define   HTS2_PRST_MASK        (0x3c00)
 185#define   HTS2_PRST_SHIFT       10
 186#define   HTS2_PRST_UNLOADED    0
 187#define   HTS2_PRST_RUNNING     1
 188#define   HTS2_PRST_TDISOP      2 /* turbo disabled due to power */
 189#define   HTS2_PRST_TDISHT      3 /* turbo disabled due to high temp */
 190#define   HTS2_PRST_TDISUSR     4 /* user disabled turbo */
 191#define   HTS2_PRST_TDISPLAT    5 /* platform disabled turbo */
 192#define   HTS2_PRST_TDISPM      6 /* power management disabled turbo */
 193#define   HTS2_PRST_TDISERR     7 /* some kind of error disabled turbo */
 194#define THM_PTL         0x56
 195#define THM_MGTV        0x58
 196#define   TV_MASK       0x000000000000ff00
 197#define   TV_SHIFT      8
 198#define THM_PTV         0x60
 199#define   PTV_MASK      0x00ff
 200#define THM_MMGPC       0x64
 201#define THM_MPPC        0x66
 202#define THM_MPCPC       0x68
 203#define THM_TSPIEN      0x82
 204#define   TSPIEN_AUX_LOHI       (1<<0)
 205#define   TSPIEN_HOT_LOHI       (1<<1)
 206#define   TSPIEN_CRIT_LOHI      (1<<2)
 207#define   TSPIEN_AUX2_LOHI      (1<<3)
 208#define THM_TSLOCK      0x83
 209#define THM_ATR         0x84
 210#define THM_TOF         0x87
 211#define THM_STS         0x98
 212#define   STS_PCPL_MASK         (0x7fe00000)
 213#define   STS_PCPL_SHIFT        21
 214#define   STS_GPL_MASK          (0x001ff000)
 215#define   STS_GPL_SHIFT         12
 216#define   STS_PP_MASK           (0x00000c00)
 217#define   STS_PP_SHIFT          10
 218#define   STS_PP_DEF            0
 219#define   STS_PP_PROC           1
 220#define   STS_PP_BAL            2
 221#define   STS_PP_GFX            3
 222#define   STS_PCTD_DIS          (1<<9)
 223#define   STS_GTD_DIS           (1<<8)
 224#define   STS_PTL_MASK          (0x000000fe)
 225#define   STS_PTL_SHIFT         1
 226#define   STS_NVV               (1<<0)
 227#define THM_SEC         0x9c
 228#define   SEC_ACK       (1<<0)
 229#define THM_TC3         0xa4
 230#define THM_TC1         0xa8
 231#define   STS_PPL_MASK          (0x0003ff00)
 232#define   STS_PPL_SHIFT         16
 233#define THM_TC2         0xac
 234#define THM_DTV         0xb0
 235#define THM_ITV         0xd8
 236#define   ITV_ME_SEQNO_MASK 0x00ff0000 /* ME should update every ~200ms */
 237#define   ITV_ME_SEQNO_SHIFT (16)
 238#define   ITV_MCH_TEMP_MASK 0x0000ff00
 239#define   ITV_MCH_TEMP_SHIFT (8)
 240#define   ITV_PCH_TEMP_MASK 0x000000ff
 241
 242#define thm_readb(off) readb(ips->regmap + (off))
 243#define thm_readw(off) readw(ips->regmap + (off))
 244#define thm_readl(off) readl(ips->regmap + (off))
 245#define thm_readq(off) readq(ips->regmap + (off))
 246
 247#define thm_writeb(off, val) writeb((val), ips->regmap + (off))
 248#define thm_writew(off, val) writew((val), ips->regmap + (off))
 249#define thm_writel(off, val) writel((val), ips->regmap + (off))
 250
 251static const int IPS_ADJUST_PERIOD = 5000; /* ms */
 252static bool late_i915_load = false;
 253
 254/* For initial average collection */
 255static const int IPS_SAMPLE_PERIOD = 200; /* ms */
 256static const int IPS_SAMPLE_WINDOW = 5000; /* 5s moving window of samples */
 257#define IPS_SAMPLE_COUNT (IPS_SAMPLE_WINDOW / IPS_SAMPLE_PERIOD)
 258
 259/* Per-SKU limits */
 260struct ips_mcp_limits {
 261        int cpu_family;
 262        int cpu_model; /* includes extended model... */
 263        int mcp_power_limit; /* mW units */
 264        int core_power_limit;
 265        int mch_power_limit;
 266        int core_temp_limit; /* degrees C */
 267        int mch_temp_limit;
 268};
 269
 270/* Max temps are -10 degrees C to avoid PROCHOT# */
 271
 272struct ips_mcp_limits ips_sv_limits = {
 273        .mcp_power_limit = 35000,
 274        .core_power_limit = 29000,
 275        .mch_power_limit = 20000,
 276        .core_temp_limit = 95,
 277        .mch_temp_limit = 90
 278};
 279
 280struct ips_mcp_limits ips_lv_limits = {
 281        .mcp_power_limit = 25000,
 282        .core_power_limit = 21000,
 283        .mch_power_limit = 13000,
 284        .core_temp_limit = 95,
 285        .mch_temp_limit = 90
 286};
 287
 288struct ips_mcp_limits ips_ulv_limits = {
 289        .mcp_power_limit = 18000,
 290        .core_power_limit = 14000,
 291        .mch_power_limit = 11000,
 292        .core_temp_limit = 95,
 293        .mch_temp_limit = 90
 294};
 295
 296struct ips_driver {
 297        struct pci_dev *dev;
 298        void *regmap;
 299        struct task_struct *monitor;
 300        struct task_struct *adjust;
 301        struct dentry *debug_root;
 302
 303        /* Average CPU core temps (all averages in .01 degrees C for precision) */
 304        u16 ctv1_avg_temp;
 305        u16 ctv2_avg_temp;
 306        /* GMCH average */
 307        u16 mch_avg_temp;
 308        /* Average for the CPU (both cores?) */
 309        u16 mcp_avg_temp;
 310        /* Average power consumption (in mW) */
 311        u32 cpu_avg_power;
 312        u32 mch_avg_power;
 313
 314        /* Offset values */
 315        u16 cta_val;
 316        u16 pta_val;
 317        u16 mgta_val;
 318
 319        /* Maximums & prefs, protected by turbo status lock */
 320        spinlock_t turbo_status_lock;
 321        u16 mcp_temp_limit;
 322        u16 mcp_power_limit;
 323        u16 core_power_limit;
 324        u16 mch_power_limit;
 325        bool cpu_turbo_enabled;
 326        bool __cpu_turbo_on;
 327        bool gpu_turbo_enabled;
 328        bool __gpu_turbo_on;
 329        bool gpu_preferred;
 330        bool poll_turbo_status;
 331        bool second_cpu;
 332        bool turbo_toggle_allowed;
 333        struct ips_mcp_limits *limits;
 334
 335        /* Optional MCH interfaces for if i915 is in use */
 336        unsigned long (*read_mch_val)(void);
 337        bool (*gpu_raise)(void);
 338        bool (*gpu_lower)(void);
 339        bool (*gpu_busy)(void);
 340        bool (*gpu_turbo_disable)(void);
 341
 342        /* For restoration at unload */
 343        u64 orig_turbo_limit;
 344        u64 orig_turbo_ratios;
 345};
 346
 347static bool
 348ips_gpu_turbo_enabled(struct ips_driver *ips);
 349
 350/**
 351 * ips_cpu_busy - is CPU busy?
 352 * @ips: IPS driver struct
 353 *
 354 * Check CPU for load to see whether we should increase its thermal budget.
 355 *
 356 * RETURNS:
 357 * True if the CPU could use more power, false otherwise.
 358 */
 359static bool ips_cpu_busy(struct ips_driver *ips)
 360{
 361        if ((avenrun[0] >> FSHIFT) > 1)
 362                return true;
 363
 364        return false;
 365}
 366
 367/**
 368 * ips_cpu_raise - raise CPU power clamp
 369 * @ips: IPS driver struct
 370 *
 371 * Raise the CPU power clamp by %IPS_CPU_STEP, in accordance with TDP for
 372 * this platform.
 373 *
 374 * We do this by adjusting the TURBO_POWER_CURRENT_LIMIT MSR upwards (as
 375 * long as we haven't hit the TDP limit for the SKU).
 376 */
 377static void ips_cpu_raise(struct ips_driver *ips)
 378{
 379        u64 turbo_override;
 380        u16 cur_tdp_limit, new_tdp_limit;
 381
 382        if (!ips->cpu_turbo_enabled)
 383                return;
 384
 385        rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
 386
 387        cur_tdp_limit = turbo_override & TURBO_TDP_MASK;
 388        new_tdp_limit = cur_tdp_limit + 8; /* 1W increase */
 389
 390        /* Clamp to SKU TDP limit */
 391        if (((new_tdp_limit * 10) / 8) > ips->core_power_limit)
 392                new_tdp_limit = cur_tdp_limit;
 393
 394        thm_writew(THM_MPCPC, (new_tdp_limit * 10) / 8);
 395
 396        turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN;
 397        wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
 398
 399        turbo_override &= ~TURBO_TDP_MASK;
 400        turbo_override |= new_tdp_limit;
 401
 402        wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
 403}
 404
 405/**
 406 * ips_cpu_lower - lower CPU power clamp
 407 * @ips: IPS driver struct
 408 *
 409 * Lower CPU power clamp b %IPS_CPU_STEP if possible.
 410 *
 411 * We do this by adjusting the TURBO_POWER_CURRENT_LIMIT MSR down, going
 412 * as low as the platform limits will allow (though we could go lower there
 413 * wouldn't be much point).
 414 */
 415static void ips_cpu_lower(struct ips_driver *ips)
 416{
 417        u64 turbo_override;
 418        u16 cur_limit, new_limit;
 419
 420        rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
 421
 422        cur_limit = turbo_override & TURBO_TDP_MASK;
 423        new_limit = cur_limit - 8; /* 1W decrease */
 424
 425        /* Clamp to SKU TDP limit */
 426        if (new_limit  < (ips->orig_turbo_limit & TURBO_TDP_MASK))
 427                new_limit = ips->orig_turbo_limit & TURBO_TDP_MASK;
 428
 429        thm_writew(THM_MPCPC, (new_limit * 10) / 8);
 430
 431        turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN;
 432        wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
 433
 434        turbo_override &= ~TURBO_TDP_MASK;
 435        turbo_override |= new_limit;
 436
 437        wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
 438}
 439
 440/**
 441 * do_enable_cpu_turbo - internal turbo enable function
 442 * @data: unused
 443 *
 444 * Internal function for actually updating MSRs.  When we enable/disable
 445 * turbo, we need to do it on each CPU; this function is the one called
 446 * by on_each_cpu() when needed.
 447 */
 448static void do_enable_cpu_turbo(void *data)
 449{
 450        u64 perf_ctl;
 451
 452        rdmsrl(IA32_PERF_CTL, perf_ctl);
 453        if (perf_ctl & IA32_PERF_TURBO_DIS) {
 454                perf_ctl &= ~IA32_PERF_TURBO_DIS;
 455                wrmsrl(IA32_PERF_CTL, perf_ctl);
 456        }
 457}
 458
 459/**
 460 * ips_enable_cpu_turbo - enable turbo mode on all CPUs
 461 * @ips: IPS driver struct
 462 *
 463 * Enable turbo mode by clearing the disable bit in IA32_PERF_CTL on
 464 * all logical threads.
 465 */
 466static void ips_enable_cpu_turbo(struct ips_driver *ips)
 467{
 468        /* Already on, no need to mess with MSRs */
 469        if (ips->__cpu_turbo_on)
 470                return;
 471
 472        if (ips->turbo_toggle_allowed)
 473                on_each_cpu(do_enable_cpu_turbo, ips, 1);
 474
 475        ips->__cpu_turbo_on = true;
 476}
 477
 478/**
 479 * do_disable_cpu_turbo - internal turbo disable function
 480 * @data: unused
 481 *
 482 * Internal function for actually updating MSRs.  When we enable/disable
 483 * turbo, we need to do it on each CPU; this function is the one called
 484 * by on_each_cpu() when needed.
 485 */
 486static void do_disable_cpu_turbo(void *data)
 487{
 488        u64 perf_ctl;
 489
 490        rdmsrl(IA32_PERF_CTL, perf_ctl);
 491        if (!(perf_ctl & IA32_PERF_TURBO_DIS)) {
 492                perf_ctl |= IA32_PERF_TURBO_DIS;
 493                wrmsrl(IA32_PERF_CTL, perf_ctl);
 494        }
 495}
 496
 497/**
 498 * ips_disable_cpu_turbo - disable turbo mode on all CPUs
 499 * @ips: IPS driver struct
 500 *
 501 * Disable turbo mode by setting the disable bit in IA32_PERF_CTL on
 502 * all logical threads.
 503 */
 504static void ips_disable_cpu_turbo(struct ips_driver *ips)
 505{
 506        /* Already off, leave it */
 507        if (!ips->__cpu_turbo_on)
 508                return;
 509
 510        if (ips->turbo_toggle_allowed)
 511                on_each_cpu(do_disable_cpu_turbo, ips, 1);
 512
 513        ips->__cpu_turbo_on = false;
 514}
 515
 516/**
 517 * ips_gpu_busy - is GPU busy?
 518 * @ips: IPS driver struct
 519 *
 520 * Check GPU for load to see whether we should increase its thermal budget.
 521 * We need to call into the i915 driver in this case.
 522 *
 523 * RETURNS:
 524 * True if the GPU could use more power, false otherwise.
 525 */
 526static bool ips_gpu_busy(struct ips_driver *ips)
 527{
 528        if (!ips_gpu_turbo_enabled(ips))
 529                return false;
 530
 531        return ips->gpu_busy();
 532}
 533
 534/**
 535 * ips_gpu_raise - raise GPU power clamp
 536 * @ips: IPS driver struct
 537 *
 538 * Raise the GPU frequency/power if possible.  We need to call into the
 539 * i915 driver in this case.
 540 */
 541static void ips_gpu_raise(struct ips_driver *ips)
 542{
 543        if (!ips_gpu_turbo_enabled(ips))
 544                return;
 545
 546        if (!ips->gpu_raise())
 547                ips->gpu_turbo_enabled = false;
 548
 549        return;
 550}
 551
 552/**
 553 * ips_gpu_lower - lower GPU power clamp
 554 * @ips: IPS driver struct
 555 *
 556 * Lower GPU frequency/power if possible.  Need to call i915.
 557 */
 558static void ips_gpu_lower(struct ips_driver *ips)
 559{
 560        if (!ips_gpu_turbo_enabled(ips))
 561                return;
 562
 563        if (!ips->gpu_lower())
 564                ips->gpu_turbo_enabled = false;
 565
 566        return;
 567}
 568
 569/**
 570 * ips_enable_gpu_turbo - notify the gfx driver turbo is available
 571 * @ips: IPS driver struct
 572 *
 573 * Call into the graphics driver indicating that it can safely use
 574 * turbo mode.
 575 */
 576static void ips_enable_gpu_turbo(struct ips_driver *ips)
 577{
 578        if (ips->__gpu_turbo_on)
 579                return;
 580        ips->__gpu_turbo_on = true;
 581}
 582
 583/**
 584 * ips_disable_gpu_turbo - notify the gfx driver to disable turbo mode
 585 * @ips: IPS driver struct
 586 *
 587 * Request that the graphics driver disable turbo mode.
 588 */
 589static void ips_disable_gpu_turbo(struct ips_driver *ips)
 590{
 591        /* Avoid calling i915 if turbo is already disabled */
 592        if (!ips->__gpu_turbo_on)
 593                return;
 594
 595        if (!ips->gpu_turbo_disable())
 596                dev_err(&ips->dev->dev, "failed to disable graphis turbo\n");
 597        else
 598                ips->__gpu_turbo_on = false;
 599}
 600
 601/**
 602 * mcp_exceeded - check whether we're outside our thermal & power limits
 603 * @ips: IPS driver struct
 604 *
 605 * Check whether the MCP is over its thermal or power budget.
 606 */
 607static bool mcp_exceeded(struct ips_driver *ips)
 608{
 609        unsigned long flags;
 610        bool ret = false;
 611        u32 temp_limit;
 612        u32 avg_power;
 613
 614        spin_lock_irqsave(&ips->turbo_status_lock, flags);
 615
 616        temp_limit = ips->mcp_temp_limit * 100;
 617        if (ips->mcp_avg_temp > temp_limit)
 618                ret = true;
 619
 620        avg_power = ips->cpu_avg_power + ips->mch_avg_power;
 621        if (avg_power > ips->mcp_power_limit)
 622                ret = true;
 623
 624        spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
 625
 626        return ret;
 627}
 628
 629/**
 630 * cpu_exceeded - check whether a CPU core is outside its limits
 631 * @ips: IPS driver struct
 632 * @cpu: CPU number to check
 633 *
 634 * Check a given CPU's average temp or power is over its limit.
 635 */
 636static bool cpu_exceeded(struct ips_driver *ips, int cpu)
 637{
 638        unsigned long flags;
 639        int avg;
 640        bool ret = false;
 641
 642        spin_lock_irqsave(&ips->turbo_status_lock, flags);
 643        avg = cpu ? ips->ctv2_avg_temp : ips->ctv1_avg_temp;
 644        if (avg > (ips->limits->core_temp_limit * 100))
 645                ret = true;
 646        if (ips->cpu_avg_power > ips->core_power_limit * 100)
 647                ret = true;
 648        spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
 649
 650        if (ret)
 651                dev_info(&ips->dev->dev,
 652                         "CPU power or thermal limit exceeded\n");
 653
 654        return ret;
 655}
 656
 657/**
 658 * mch_exceeded - check whether the GPU is over budget
 659 * @ips: IPS driver struct
 660 *
 661 * Check the MCH temp & power against their maximums.
 662 */
 663static bool mch_exceeded(struct ips_driver *ips)
 664{
 665        unsigned long flags;
 666        bool ret = false;
 667
 668        spin_lock_irqsave(&ips->turbo_status_lock, flags);
 669        if (ips->mch_avg_temp > (ips->limits->mch_temp_limit * 100))
 670                ret = true;
 671        if (ips->mch_avg_power > ips->mch_power_limit)
 672                ret = true;
 673        spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
 674
 675        return ret;
 676}
 677
 678/**
 679 * verify_limits - verify BIOS provided limits
 680 * @ips: IPS structure
 681 *
 682 * BIOS can optionally provide non-default limits for power and temp.  Check
 683 * them here and use the defaults if the BIOS values are not provided or
 684 * are otherwise unusable.
 685 */
 686static void verify_limits(struct ips_driver *ips)
 687{
 688        if (ips->mcp_power_limit < ips->limits->mcp_power_limit ||
 689            ips->mcp_power_limit > 35000)
 690                ips->mcp_power_limit = ips->limits->mcp_power_limit;
 691
 692        if (ips->mcp_temp_limit < ips->limits->core_temp_limit ||
 693            ips->mcp_temp_limit < ips->limits->mch_temp_limit ||
 694            ips->mcp_temp_limit > 150)
 695                ips->mcp_temp_limit = min(ips->limits->core_temp_limit,
 696                                          ips->limits->mch_temp_limit);
 697}
 698
 699/**
 700 * update_turbo_limits - get various limits & settings from regs
 701 * @ips: IPS driver struct
 702 *
 703 * Update the IPS power & temp limits, along with turbo enable flags,
 704 * based on latest register contents.
 705 *
 706 * Used at init time and for runtime BIOS support, which requires polling
 707 * the regs for updates (as a result of AC->DC transition for example).
 708 *
 709 * LOCKING:
 710 * Caller must hold turbo_status_lock (outside of init)
 711 */
 712static void update_turbo_limits(struct ips_driver *ips)
 713{
 714        u32 hts = thm_readl(THM_HTS);
 715
 716        ips->cpu_turbo_enabled = !(hts & HTS_PCTD_DIS);
 717        /* 
 718         * Disable turbo for now, until we can figure out why the power figures
 719         * are wrong
 720         */
 721        ips->cpu_turbo_enabled = false;
 722
 723        if (ips->gpu_busy)
 724                ips->gpu_turbo_enabled = !(hts & HTS_GTD_DIS);
 725
 726        ips->core_power_limit = thm_readw(THM_MPCPC);
 727        ips->mch_power_limit = thm_readw(THM_MMGPC);
 728        ips->mcp_temp_limit = thm_readw(THM_PTL);
 729        ips->mcp_power_limit = thm_readw(THM_MPPC);
 730
 731        verify_limits(ips);
 732        /* Ignore BIOS CPU vs GPU pref */
 733}
 734
 735/**
 736 * ips_adjust - adjust power clamp based on thermal state
 737 * @data: ips driver structure
 738 *
 739 * Wake up every 5s or so and check whether we should adjust the power clamp.
 740 * Check CPU and GPU load to determine which needs adjustment.  There are
 741 * several things to consider here:
 742 *   - do we need to adjust up or down?
 743 *   - is CPU busy?
 744 *   - is GPU busy?
 745 *   - is CPU in turbo?
 746 *   - is GPU in turbo?
 747 *   - is CPU or GPU preferred? (CPU is default)
 748 *
 749 * So, given the above, we do the following:
 750 *   - up (TDP available)
 751 *     - CPU not busy, GPU not busy - nothing
 752 *     - CPU busy, GPU not busy - adjust CPU up
 753 *     - CPU not busy, GPU busy - adjust GPU up
 754 *     - CPU busy, GPU busy - adjust preferred unit up, taking headroom from
 755 *       non-preferred unit if necessary
 756 *   - down (at TDP limit)
 757 *     - adjust both CPU and GPU down if possible
 758 *
 759                cpu+ gpu+       cpu+gpu-        cpu-gpu+        cpu-gpu-
 760cpu < gpu <     cpu+gpu+        cpu+            gpu+            nothing
 761cpu < gpu >=    cpu+gpu-(mcp<)  cpu+gpu-(mcp<)  gpu-            gpu-
 762cpu >= gpu <    cpu-gpu+(mcp<)  cpu-            cpu-gpu+(mcp<)  cpu-
 763cpu >= gpu >=   cpu-gpu-        cpu-gpu-        cpu-gpu-        cpu-gpu-
 764 *
 765 */
 766static int ips_adjust(void *data)
 767{
 768        struct ips_driver *ips = data;
 769        unsigned long flags;
 770
 771        dev_dbg(&ips->dev->dev, "starting ips-adjust thread\n");
 772
 773        /*
 774         * Adjust CPU and GPU clamps every 5s if needed.  Doing it more
 775         * often isn't recommended due to ME interaction.
 776         */
 777        do {
 778                bool cpu_busy = ips_cpu_busy(ips);
 779                bool gpu_busy = ips_gpu_busy(ips);
 780
 781                spin_lock_irqsave(&ips->turbo_status_lock, flags);
 782                if (ips->poll_turbo_status)
 783                        update_turbo_limits(ips);
 784                spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
 785
 786                /* Update turbo status if necessary */
 787                if (ips->cpu_turbo_enabled)
 788                        ips_enable_cpu_turbo(ips);
 789                else
 790                        ips_disable_cpu_turbo(ips);
 791
 792                if (ips->gpu_turbo_enabled)
 793                        ips_enable_gpu_turbo(ips);
 794                else
 795                        ips_disable_gpu_turbo(ips);
 796
 797                /* We're outside our comfort zone, crank them down */
 798                if (mcp_exceeded(ips)) {
 799                        ips_cpu_lower(ips);
 800                        ips_gpu_lower(ips);
 801                        goto sleep;
 802                }
 803
 804                if (!cpu_exceeded(ips, 0) && cpu_busy)
 805                        ips_cpu_raise(ips);
 806                else
 807                        ips_cpu_lower(ips);
 808
 809                if (!mch_exceeded(ips) && gpu_busy)
 810                        ips_gpu_raise(ips);
 811                else
 812                        ips_gpu_lower(ips);
 813
 814sleep:
 815                schedule_timeout_interruptible(msecs_to_jiffies(IPS_ADJUST_PERIOD));
 816        } while (!kthread_should_stop());
 817
 818        dev_dbg(&ips->dev->dev, "ips-adjust thread stopped\n");
 819
 820        return 0;
 821}
 822
 823/*
 824 * Helpers for reading out temp/power values and calculating their
 825 * averages for the decision making and monitoring functions.
 826 */
 827
 828static u16 calc_avg_temp(struct ips_driver *ips, u16 *array)
 829{
 830        u64 total = 0;
 831        int i;
 832        u16 avg;
 833
 834        for (i = 0; i < IPS_SAMPLE_COUNT; i++)
 835                total += (u64)(array[i] * 100);
 836
 837        do_div(total, IPS_SAMPLE_COUNT);
 838
 839        avg = (u16)total;
 840
 841        return avg;
 842}
 843
 844static u16 read_mgtv(struct ips_driver *ips)
 845{
 846        u16 ret;
 847        u64 slope, offset;
 848        u64 val;
 849
 850        val = thm_readq(THM_MGTV);
 851        val = (val & TV_MASK) >> TV_SHIFT;
 852
 853        slope = offset = thm_readw(THM_MGTA);
 854        slope = (slope & MGTA_SLOPE_MASK) >> MGTA_SLOPE_SHIFT;
 855        offset = offset & MGTA_OFFSET_MASK;
 856
 857        ret = ((val * slope + 0x40) >> 7) + offset;
 858
 859        return 0; /* MCH temp reporting buggy */
 860}
 861
 862static u16 read_ptv(struct ips_driver *ips)
 863{
 864        u16 val, slope, offset;
 865
 866        slope = (ips->pta_val & PTA_SLOPE_MASK) >> PTA_SLOPE_SHIFT;
 867        offset = ips->pta_val & PTA_OFFSET_MASK;
 868
 869        val = thm_readw(THM_PTV) & PTV_MASK;
 870
 871        return val;
 872}
 873
 874static u16 read_ctv(struct ips_driver *ips, int cpu)
 875{
 876        int reg = cpu ? THM_CTV2 : THM_CTV1;
 877        u16 val;
 878
 879        val = thm_readw(reg);
 880        if (!(val & CTV_TEMP_ERROR))
 881                val = (val) >> 6; /* discard fractional component */
 882        else
 883                val = 0;
 884
 885        return val;
 886}
 887
 888static u32 get_cpu_power(struct ips_driver *ips, u32 *last, int period)
 889{
 890        u32 val;
 891        u32 ret;
 892
 893        /*
 894         * CEC is in joules/65535.  Take difference over time to
 895         * get watts.
 896         */
 897        val = thm_readl(THM_CEC);
 898
 899        /* period is in ms and we want mW */
 900        ret = (((val - *last) * 1000) / period);
 901        ret = (ret * 1000) / 65535;
 902        *last = val;
 903
 904        return 0;
 905}
 906
 907static const u16 temp_decay_factor = 2;
 908static u16 update_average_temp(u16 avg, u16 val)
 909{
 910        u16 ret;
 911
 912        /* Multiply by 100 for extra precision */
 913        ret = (val * 100 / temp_decay_factor) +
 914                (((temp_decay_factor - 1) * avg) / temp_decay_factor);
 915        return ret;
 916}
 917
 918static const u16 power_decay_factor = 2;
 919static u16 update_average_power(u32 avg, u32 val)
 920{
 921        u32 ret;
 922
 923        ret = (val / power_decay_factor) +
 924                (((power_decay_factor - 1) * avg) / power_decay_factor);
 925
 926        return ret;
 927}
 928
 929static u32 calc_avg_power(struct ips_driver *ips, u32 *array)
 930{
 931        u64 total = 0;
 932        u32 avg;
 933        int i;
 934
 935        for (i = 0; i < IPS_SAMPLE_COUNT; i++)
 936                total += array[i];
 937
 938        do_div(total, IPS_SAMPLE_COUNT);
 939        avg = (u32)total;
 940
 941        return avg;
 942}
 943
 944static void monitor_timeout(unsigned long arg)
 945{
 946        wake_up_process((struct task_struct *)arg);
 947}
 948
 949/**
 950 * ips_monitor - temp/power monitoring thread
 951 * @data: ips driver structure
 952 *
 953 * This is the main function for the IPS driver.  It monitors power and
 954 * tempurature in the MCP and adjusts CPU and GPU power clams accordingly.
 955 *
 956 * We keep a 5s moving average of power consumption and tempurature.  Using
 957 * that data, along with CPU vs GPU preference, we adjust the power clamps
 958 * up or down.
 959 */
 960static int ips_monitor(void *data)
 961{
 962        struct ips_driver *ips = data;
 963        struct timer_list timer;
 964        unsigned long seqno_timestamp, expire, last_msecs, last_sample_period;
 965        int i;
 966        u32 *cpu_samples, *mchp_samples, old_cpu_power;
 967        u16 *mcp_samples, *ctv1_samples, *ctv2_samples, *mch_samples;
 968        u8 cur_seqno, last_seqno;
 969
 970        mcp_samples = kzalloc(sizeof(u16) * IPS_SAMPLE_COUNT, GFP_KERNEL);
 971        ctv1_samples = kzalloc(sizeof(u16) * IPS_SAMPLE_COUNT, GFP_KERNEL);
 972        ctv2_samples = kzalloc(sizeof(u16) * IPS_SAMPLE_COUNT, GFP_KERNEL);
 973        mch_samples = kzalloc(sizeof(u16) * IPS_SAMPLE_COUNT, GFP_KERNEL);
 974        cpu_samples = kzalloc(sizeof(u32) * IPS_SAMPLE_COUNT, GFP_KERNEL);
 975        mchp_samples = kzalloc(sizeof(u32) * IPS_SAMPLE_COUNT, GFP_KERNEL);
 976        if (!mcp_samples || !ctv1_samples || !ctv2_samples || !mch_samples ||
 977                        !cpu_samples || !mchp_samples) {
 978                dev_err(&ips->dev->dev,
 979                        "failed to allocate sample array, ips disabled\n");
 980                kfree(mcp_samples);
 981                kfree(ctv1_samples);
 982                kfree(ctv2_samples);
 983                kfree(mch_samples);
 984                kfree(cpu_samples);
 985                kfree(mchp_samples);
 986                return -ENOMEM;
 987        }
 988
 989        last_seqno = (thm_readl(THM_ITV) & ITV_ME_SEQNO_MASK) >>
 990                ITV_ME_SEQNO_SHIFT;
 991        seqno_timestamp = get_jiffies_64();
 992
 993        old_cpu_power = thm_readl(THM_CEC);
 994        schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
 995
 996        /* Collect an initial average */
 997        for (i = 0; i < IPS_SAMPLE_COUNT; i++) {
 998                u32 mchp, cpu_power;
 999                u16 val;
1000
1001                mcp_samples[i] = read_ptv(ips);
1002
1003                val = read_ctv(ips, 0);
1004                ctv1_samples[i] = val;
1005
1006                val = read_ctv(ips, 1);
1007                ctv2_samples[i] = val;
1008
1009                val = read_mgtv(ips);
1010                mch_samples[i] = val;
1011
1012                cpu_power = get_cpu_power(ips, &old_cpu_power,
1013                                          IPS_SAMPLE_PERIOD);
1014                cpu_samples[i] = cpu_power;
1015
1016                if (ips->read_mch_val) {
1017                        mchp = ips->read_mch_val();
1018                        mchp_samples[i] = mchp;
1019                }
1020
1021                schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
1022                if (kthread_should_stop())
1023                        break;
1024        }
1025
1026        ips->mcp_avg_temp = calc_avg_temp(ips, mcp_samples);
1027        ips->ctv1_avg_temp = calc_avg_temp(ips, ctv1_samples);
1028        ips->ctv2_avg_temp = calc_avg_temp(ips, ctv2_samples);
1029        ips->mch_avg_temp = calc_avg_temp(ips, mch_samples);
1030        ips->cpu_avg_power = calc_avg_power(ips, cpu_samples);
1031        ips->mch_avg_power = calc_avg_power(ips, mchp_samples);
1032        kfree(mcp_samples);
1033        kfree(ctv1_samples);
1034        kfree(ctv2_samples);
1035        kfree(mch_samples);
1036        kfree(cpu_samples);
1037        kfree(mchp_samples);
1038
1039        /* Start the adjustment thread now that we have data */
1040        wake_up_process(ips->adjust);
1041
1042        /*
1043         * Ok, now we have an initial avg.  From here on out, we track the
1044         * running avg using a decaying average calculation.  This allows
1045         * us to reduce the sample frequency if the CPU and GPU are idle.
1046         */
1047        old_cpu_power = thm_readl(THM_CEC);
1048        schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
1049        last_sample_period = IPS_SAMPLE_PERIOD;
1050
1051        setup_deferrable_timer_on_stack(&timer, monitor_timeout,
1052                                        (unsigned long)current);
1053        do {
1054                u32 cpu_val, mch_val;
1055                u16 val;
1056
1057                /* MCP itself */
1058                val = read_ptv(ips);
1059                ips->mcp_avg_temp = update_average_temp(ips->mcp_avg_temp, val);
1060
1061                /* Processor 0 */
1062                val = read_ctv(ips, 0);
1063                ips->ctv1_avg_temp =
1064                        update_average_temp(ips->ctv1_avg_temp, val);
1065                /* Power */
1066                cpu_val = get_cpu_power(ips, &old_cpu_power,
1067                                        last_sample_period);
1068                ips->cpu_avg_power =
1069                        update_average_power(ips->cpu_avg_power, cpu_val);
1070
1071                if (ips->second_cpu) {
1072                        /* Processor 1 */
1073                        val = read_ctv(ips, 1);
1074                        ips->ctv2_avg_temp =
1075                                update_average_temp(ips->ctv2_avg_temp, val);
1076                }
1077
1078                /* MCH */
1079                val = read_mgtv(ips);
1080                ips->mch_avg_temp = update_average_temp(ips->mch_avg_temp, val);
1081                /* Power */
1082                if (ips->read_mch_val) {
1083                        mch_val = ips->read_mch_val();
1084                        ips->mch_avg_power =
1085                                update_average_power(ips->mch_avg_power,
1086                                                     mch_val);
1087                }
1088
1089                /*
1090                 * Make sure ME is updating thermal regs.
1091                 * Note:
1092                 * If it's been more than a second since the last update,
1093                 * the ME is probably hung.
1094                 */
1095                cur_seqno = (thm_readl(THM_ITV) & ITV_ME_SEQNO_MASK) >>
1096                        ITV_ME_SEQNO_SHIFT;
1097                if (cur_seqno == last_seqno &&
1098                    time_after(jiffies, seqno_timestamp + HZ)) {
1099                        dev_warn(&ips->dev->dev, "ME failed to update for more than 1s, likely hung\n");
1100                } else {
1101                        seqno_timestamp = get_jiffies_64();
1102                        last_seqno = cur_seqno;
1103                }
1104
1105                last_msecs = jiffies_to_msecs(jiffies);
1106                expire = jiffies + msecs_to_jiffies(IPS_SAMPLE_PERIOD);
1107
1108                __set_current_state(TASK_INTERRUPTIBLE);
1109                mod_timer(&timer, expire);
1110                schedule();
1111
1112                /* Calculate actual sample period for power averaging */
1113                last_sample_period = jiffies_to_msecs(jiffies) - last_msecs;
1114                if (!last_sample_period)
1115                        last_sample_period = 1;
1116        } while (!kthread_should_stop());
1117
1118        del_timer_sync(&timer);
1119        destroy_timer_on_stack(&timer);
1120
1121        dev_dbg(&ips->dev->dev, "ips-monitor thread stopped\n");
1122
1123        return 0;
1124}
1125
1126#if 0
1127#define THM_DUMPW(reg) \
1128        { \
1129        u16 val = thm_readw(reg); \
1130        dev_dbg(&ips->dev->dev, #reg ": 0x%04x\n", val); \
1131        }
1132#define THM_DUMPL(reg) \
1133        { \
1134        u32 val = thm_readl(reg); \
1135        dev_dbg(&ips->dev->dev, #reg ": 0x%08x\n", val); \
1136        }
1137#define THM_DUMPQ(reg) \
1138        { \
1139        u64 val = thm_readq(reg); \
1140        dev_dbg(&ips->dev->dev, #reg ": 0x%016x\n", val); \
1141        }
1142
1143static void dump_thermal_info(struct ips_driver *ips)
1144{
1145        u16 ptl;
1146
1147        ptl = thm_readw(THM_PTL);
1148        dev_dbg(&ips->dev->dev, "Processor temp limit: %d\n", ptl);
1149
1150        THM_DUMPW(THM_CTA);
1151        THM_DUMPW(THM_TRC);
1152        THM_DUMPW(THM_CTV1);
1153        THM_DUMPL(THM_STS);
1154        THM_DUMPW(THM_PTV);
1155        THM_DUMPQ(THM_MGTV);
1156}
1157#endif
1158
1159/**
1160 * ips_irq_handler - handle temperature triggers and other IPS events
1161 * @irq: irq number
1162 * @arg: unused
1163 *
1164 * Handle temperature limit trigger events, generally by lowering the clamps.
1165 * If we're at a critical limit, we clamp back to the lowest possible value
1166 * to prevent emergency shutdown.
1167 */
1168static irqreturn_t ips_irq_handler(int irq, void *arg)
1169{
1170        struct ips_driver *ips = arg;
1171        u8 tses = thm_readb(THM_TSES);
1172        u8 tes = thm_readb(THM_TES);
1173
1174        if (!tses && !tes)
1175                return IRQ_NONE;
1176
1177        dev_info(&ips->dev->dev, "TSES: 0x%02x\n", tses);
1178        dev_info(&ips->dev->dev, "TES: 0x%02x\n", tes);
1179
1180        /* STS update from EC? */
1181        if (tes & 1) {
1182                u32 sts, tc1;
1183
1184                sts = thm_readl(THM_STS);
1185                tc1 = thm_readl(THM_TC1);
1186
1187                if (sts & STS_NVV) {
1188                        spin_lock(&ips->turbo_status_lock);
1189                        ips->core_power_limit = (sts & STS_PCPL_MASK) >>
1190                                STS_PCPL_SHIFT;
1191                        ips->mch_power_limit = (sts & STS_GPL_MASK) >>
1192                                STS_GPL_SHIFT;
1193                        /* ignore EC CPU vs GPU pref */
1194                        ips->cpu_turbo_enabled = !(sts & STS_PCTD_DIS);
1195                        /* 
1196                         * Disable turbo for now, until we can figure
1197                         * out why the power figures are wrong
1198                         */
1199                        ips->cpu_turbo_enabled = false;
1200                        if (ips->gpu_busy)
1201                                ips->gpu_turbo_enabled = !(sts & STS_GTD_DIS);
1202                        ips->mcp_temp_limit = (sts & STS_PTL_MASK) >>
1203                                STS_PTL_SHIFT;
1204                        ips->mcp_power_limit = (tc1 & STS_PPL_MASK) >>
1205                                STS_PPL_SHIFT;
1206                        verify_limits(ips);
1207                        spin_unlock(&ips->turbo_status_lock);
1208
1209                        thm_writeb(THM_SEC, SEC_ACK);
1210                }
1211                thm_writeb(THM_TES, tes);
1212        }
1213
1214        /* Thermal trip */
1215        if (tses) {
1216                dev_warn(&ips->dev->dev,
1217                         "thermal trip occurred, tses: 0x%04x\n", tses);
1218                thm_writeb(THM_TSES, tses);
1219        }
1220
1221        return IRQ_HANDLED;
1222}
1223
1224#ifndef CONFIG_DEBUG_FS
1225static void ips_debugfs_init(struct ips_driver *ips) { return; }
1226static void ips_debugfs_cleanup(struct ips_driver *ips) { return; }
1227#else
1228
1229/* Expose current state and limits in debugfs if possible */
1230
1231struct ips_debugfs_node {
1232        struct ips_driver *ips;
1233        char *name;
1234        int (*show)(struct seq_file *m, void *data);
1235};
1236
1237static int show_cpu_temp(struct seq_file *m, void *data)
1238{
1239        struct ips_driver *ips = m->private;
1240
1241        seq_printf(m, "%d.%02d\n", ips->ctv1_avg_temp / 100,
1242                   ips->ctv1_avg_temp % 100);
1243
1244        return 0;
1245}
1246
1247static int show_cpu_power(struct seq_file *m, void *data)
1248{
1249        struct ips_driver *ips = m->private;
1250
1251        seq_printf(m, "%dmW\n", ips->cpu_avg_power);
1252
1253        return 0;
1254}
1255
1256static int show_cpu_clamp(struct seq_file *m, void *data)
1257{
1258        u64 turbo_override;
1259        int tdp, tdc;
1260
1261        rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
1262
1263        tdp = (int)(turbo_override & TURBO_TDP_MASK);
1264        tdc = (int)((turbo_override & TURBO_TDC_MASK) >> TURBO_TDC_SHIFT);
1265
1266        /* Convert to .1W/A units */
1267        tdp = tdp * 10 / 8;
1268        tdc = tdc * 10 / 8;
1269
1270        /* Watts Amperes */
1271        seq_printf(m, "%d.%dW %d.%dA\n", tdp / 10, tdp % 10,
1272                   tdc / 10, tdc % 10);
1273
1274        return 0;
1275}
1276
1277static int show_mch_temp(struct seq_file *m, void *data)
1278{
1279        struct ips_driver *ips = m->private;
1280
1281        seq_printf(m, "%d.%02d\n", ips->mch_avg_temp / 100,
1282                   ips->mch_avg_temp % 100);
1283
1284        return 0;
1285}
1286
1287static int show_mch_power(struct seq_file *m, void *data)
1288{
1289        struct ips_driver *ips = m->private;
1290
1291        seq_printf(m, "%dmW\n", ips->mch_avg_power);
1292
1293        return 0;
1294}
1295
1296static struct ips_debugfs_node ips_debug_files[] = {
1297        { NULL, "cpu_temp", show_cpu_temp },
1298        { NULL, "cpu_power", show_cpu_power },
1299        { NULL, "cpu_clamp", show_cpu_clamp },
1300        { NULL, "mch_temp", show_mch_temp },
1301        { NULL, "mch_power", show_mch_power },
1302};
1303
1304static int ips_debugfs_open(struct inode *inode, struct file *file)
1305{
1306        struct ips_debugfs_node *node = inode->i_private;
1307
1308        return single_open(file, node->show, node->ips);
1309}
1310
1311static const struct file_operations ips_debugfs_ops = {
1312        .owner = THIS_MODULE,
1313        .open = ips_debugfs_open,
1314        .read = seq_read,
1315        .llseek = seq_lseek,
1316        .release = single_release,
1317};
1318
1319static void ips_debugfs_cleanup(struct ips_driver *ips)
1320{
1321        if (ips->debug_root)
1322                debugfs_remove_recursive(ips->debug_root);
1323        return;
1324}
1325
1326static void ips_debugfs_init(struct ips_driver *ips)
1327{
1328        int i;
1329
1330        ips->debug_root = debugfs_create_dir("ips", NULL);
1331        if (!ips->debug_root) {
1332                dev_err(&ips->dev->dev,
1333                        "failed to create debugfs entries: %ld\n",
1334                        PTR_ERR(ips->debug_root));
1335                return;
1336        }
1337
1338        for (i = 0; i < ARRAY_SIZE(ips_debug_files); i++) {
1339                struct dentry *ent;
1340                struct ips_debugfs_node *node = &ips_debug_files[i];
1341
1342                node->ips = ips;
1343                ent = debugfs_create_file(node->name, S_IFREG | S_IRUGO,
1344                                          ips->debug_root, node,
1345                                          &ips_debugfs_ops);
1346                if (!ent) {
1347                        dev_err(&ips->dev->dev,
1348                                "failed to create debug file: %ld\n",
1349                                PTR_ERR(ent));
1350                        goto err_cleanup;
1351                }
1352        }
1353
1354        return;
1355
1356err_cleanup:
1357        ips_debugfs_cleanup(ips);
1358        return;
1359}
1360#endif /* CONFIG_DEBUG_FS */
1361
1362/**
1363 * ips_detect_cpu - detect whether CPU supports IPS
1364 *
1365 * Walk our list and see if we're on a supported CPU.  If we find one,
1366 * return the limits for it.
1367 */
1368static struct ips_mcp_limits *ips_detect_cpu(struct ips_driver *ips)
1369{
1370        u64 turbo_power, misc_en;
1371        struct ips_mcp_limits *limits = NULL;
1372        u16 tdp;
1373
1374        if (!(boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 37)) {
1375                dev_info(&ips->dev->dev, "Non-IPS CPU detected.\n");
1376                goto out;
1377        }
1378
1379        rdmsrl(IA32_MISC_ENABLE, misc_en);
1380        /*
1381         * If the turbo enable bit isn't set, we shouldn't try to enable/disable
1382         * turbo manually or we'll get an illegal MSR access, even though
1383         * turbo will still be available.
1384         */
1385        if (misc_en & IA32_MISC_TURBO_EN)
1386                ips->turbo_toggle_allowed = true;
1387        else
1388                ips->turbo_toggle_allowed = false;
1389
1390        if (strstr(boot_cpu_data.x86_model_id, "CPU       M"))
1391                limits = &ips_sv_limits;
1392        else if (strstr(boot_cpu_data.x86_model_id, "CPU       L"))
1393                limits = &ips_lv_limits;
1394        else if (strstr(boot_cpu_data.x86_model_id, "CPU       U"))
1395                limits = &ips_ulv_limits;
1396        else {
1397                dev_info(&ips->dev->dev, "No CPUID match found.\n");
1398                goto out;
1399        }
1400
1401        rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_power);
1402        tdp = turbo_power & TURBO_TDP_MASK;
1403
1404        /* Sanity check TDP against CPU */
1405        if (limits->core_power_limit != (tdp / 8) * 1000) {
1406                dev_info(&ips->dev->dev, "CPU TDP doesn't match expected value (found %d, expected %d)\n",
1407                         tdp / 8, limits->core_power_limit / 1000);
1408                limits->core_power_limit = (tdp / 8) * 1000;
1409        }
1410
1411out:
1412        return limits;
1413}
1414
1415/**
1416 * ips_get_i915_syms - try to get GPU control methods from i915 driver
1417 * @ips: IPS driver
1418 *
1419 * The i915 driver exports several interfaces to allow the IPS driver to
1420 * monitor and control graphics turbo mode.  If we can find them, we can
1421 * enable graphics turbo, otherwise we must disable it to avoid exceeding
1422 * thermal and power limits in the MCP.
1423 */
1424static bool ips_get_i915_syms(struct ips_driver *ips)
1425{
1426        ips->read_mch_val = symbol_get(i915_read_mch_val);
1427        if (!ips->read_mch_val)
1428                goto out_err;
1429        ips->gpu_raise = symbol_get(i915_gpu_raise);
1430        if (!ips->gpu_raise)
1431                goto out_put_mch;
1432        ips->gpu_lower = symbol_get(i915_gpu_lower);
1433        if (!ips->gpu_lower)
1434                goto out_put_raise;
1435        ips->gpu_busy = symbol_get(i915_gpu_busy);
1436        if (!ips->gpu_busy)
1437                goto out_put_lower;
1438        ips->gpu_turbo_disable = symbol_get(i915_gpu_turbo_disable);
1439        if (!ips->gpu_turbo_disable)
1440                goto out_put_busy;
1441
1442        return true;
1443
1444out_put_busy:
1445        symbol_put(i915_gpu_busy);
1446out_put_lower:
1447        symbol_put(i915_gpu_lower);
1448out_put_raise:
1449        symbol_put(i915_gpu_raise);
1450out_put_mch:
1451        symbol_put(i915_read_mch_val);
1452out_err:
1453        return false;
1454}
1455
1456static bool
1457ips_gpu_turbo_enabled(struct ips_driver *ips)
1458{
1459        if (!ips->gpu_busy && late_i915_load) {
1460                if (ips_get_i915_syms(ips)) {
1461                        dev_info(&ips->dev->dev,
1462                                 "i915 driver attached, reenabling gpu turbo\n");
1463                        ips->gpu_turbo_enabled = !(thm_readl(THM_HTS) & HTS_GTD_DIS);
1464                }
1465        }
1466
1467        return ips->gpu_turbo_enabled;
1468}
1469
1470void
1471ips_link_to_i915_driver(void)
1472{
1473        /* We can't cleanly get at the various ips_driver structs from
1474         * this caller (the i915 driver), so just set a flag saying
1475         * that it's time to try getting the symbols again.
1476         */
1477        late_i915_load = true;
1478}
1479EXPORT_SYMBOL_GPL(ips_link_to_i915_driver);
1480
1481static const struct pci_device_id ips_id_table[] = {
1482        { PCI_DEVICE(PCI_VENDOR_ID_INTEL,
1483                     PCI_DEVICE_ID_INTEL_THERMAL_SENSOR), },
1484        { 0, }
1485};
1486
1487MODULE_DEVICE_TABLE(pci, ips_id_table);
1488
1489static int ips_blacklist_callback(const struct dmi_system_id *id)
1490{
1491        pr_info("Blacklisted intel_ips for %s\n", id->ident);
1492        return 1;
1493}
1494
1495static const struct dmi_system_id ips_blacklist[] = {
1496        {
1497                .callback = ips_blacklist_callback,
1498                .ident = "HP ProBook",
1499                .matches = {
1500                        DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
1501                        DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook"),
1502                },
1503        },
1504        { }     /* terminating entry */
1505};
1506
1507static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id)
1508{
1509        u64 platform_info;
1510        struct ips_driver *ips;
1511        u32 hts;
1512        int ret = 0;
1513        u16 htshi, trc, trc_required_mask;
1514        u8 tse;
1515
1516        if (dmi_check_system(ips_blacklist))
1517                return -ENODEV;
1518
1519        ips = kzalloc(sizeof(struct ips_driver), GFP_KERNEL);
1520        if (!ips)
1521                return -ENOMEM;
1522
1523        pci_set_drvdata(dev, ips);
1524        ips->dev = dev;
1525
1526        ips->limits = ips_detect_cpu(ips);
1527        if (!ips->limits) {
1528                dev_info(&dev->dev, "IPS not supported on this CPU\n");
1529                ret = -ENXIO;
1530                goto error_free;
1531        }
1532
1533        spin_lock_init(&ips->turbo_status_lock);
1534
1535        ret = pci_enable_device(dev);
1536        if (ret) {
1537                dev_err(&dev->dev, "can't enable PCI device, aborting\n");
1538                goto error_free;
1539        }
1540
1541        if (!pci_resource_start(dev, 0)) {
1542                dev_err(&dev->dev, "TBAR not assigned, aborting\n");
1543                ret = -ENXIO;
1544                goto error_free;
1545        }
1546
1547        ret = pci_request_regions(dev, "ips thermal sensor");
1548        if (ret) {
1549                dev_err(&dev->dev, "thermal resource busy, aborting\n");
1550                goto error_free;
1551        }
1552
1553
1554        ips->regmap = ioremap(pci_resource_start(dev, 0),
1555                              pci_resource_len(dev, 0));
1556        if (!ips->regmap) {
1557                dev_err(&dev->dev, "failed to map thermal regs, aborting\n");
1558                ret = -EBUSY;
1559                goto error_release;
1560        }
1561
1562        tse = thm_readb(THM_TSE);
1563        if (tse != TSE_EN) {
1564                dev_err(&dev->dev, "thermal device not enabled (0x%02x), aborting\n", tse);
1565                ret = -ENXIO;
1566                goto error_unmap;
1567        }
1568
1569        trc = thm_readw(THM_TRC);
1570        trc_required_mask = TRC_CORE1_EN | TRC_CORE_PWR | TRC_MCH_EN;
1571        if ((trc & trc_required_mask) != trc_required_mask) {
1572                dev_err(&dev->dev, "thermal reporting for required devices not enabled, aborting\n");
1573                ret = -ENXIO;
1574                goto error_unmap;
1575        }
1576
1577        if (trc & TRC_CORE2_EN)
1578                ips->second_cpu = true;
1579
1580        update_turbo_limits(ips);
1581        dev_dbg(&dev->dev, "max cpu power clamp: %dW\n",
1582                ips->mcp_power_limit / 10);
1583        dev_dbg(&dev->dev, "max core power clamp: %dW\n",
1584                ips->core_power_limit / 10);
1585        /* BIOS may update limits at runtime */
1586        if (thm_readl(THM_PSC) & PSP_PBRT)
1587                ips->poll_turbo_status = true;
1588
1589        if (!ips_get_i915_syms(ips)) {
1590                dev_info(&dev->dev, "failed to get i915 symbols, graphics turbo disabled until i915 loads\n");
1591                ips->gpu_turbo_enabled = false;
1592        } else {
1593                dev_dbg(&dev->dev, "graphics turbo enabled\n");
1594                ips->gpu_turbo_enabled = true;
1595        }
1596
1597        /*
1598         * Check PLATFORM_INFO MSR to make sure this chip is
1599         * turbo capable.
1600         */
1601        rdmsrl(PLATFORM_INFO, platform_info);
1602        if (!(platform_info & PLATFORM_TDP)) {
1603                dev_err(&dev->dev, "platform indicates TDP override unavailable, aborting\n");
1604                ret = -ENODEV;
1605                goto error_unmap;
1606        }
1607
1608        /*
1609         * IRQ handler for ME interaction
1610         * Note: don't use MSI here as the PCH has bugs.
1611         */
1612        pci_disable_msi(dev);
1613        ret = request_irq(dev->irq, ips_irq_handler, IRQF_SHARED, "ips",
1614                          ips);
1615        if (ret) {
1616                dev_err(&dev->dev, "request irq failed, aborting\n");
1617                goto error_unmap;
1618        }
1619
1620        /* Enable aux, hot & critical interrupts */
1621        thm_writeb(THM_TSPIEN, TSPIEN_AUX2_LOHI | TSPIEN_CRIT_LOHI |
1622                   TSPIEN_HOT_LOHI | TSPIEN_AUX_LOHI);
1623        thm_writeb(THM_TEN, TEN_UPDATE_EN);
1624
1625        /* Collect adjustment values */
1626        ips->cta_val = thm_readw(THM_CTA);
1627        ips->pta_val = thm_readw(THM_PTA);
1628        ips->mgta_val = thm_readw(THM_MGTA);
1629
1630        /* Save turbo limits & ratios */
1631        rdmsrl(TURBO_POWER_CURRENT_LIMIT, ips->orig_turbo_limit);
1632
1633        ips_disable_cpu_turbo(ips);
1634        ips->cpu_turbo_enabled = false;
1635
1636        /* Create thermal adjust thread */
1637        ips->adjust = kthread_create(ips_adjust, ips, "ips-adjust");
1638        if (IS_ERR(ips->adjust)) {
1639                dev_err(&dev->dev,
1640                        "failed to create thermal adjust thread, aborting\n");
1641                ret = -ENOMEM;
1642                goto error_free_irq;
1643
1644        }
1645
1646        /*
1647         * Set up the work queue and monitor thread. The monitor thread
1648         * will wake up ips_adjust thread.
1649         */
1650        ips->monitor = kthread_run(ips_monitor, ips, "ips-monitor");
1651        if (IS_ERR(ips->monitor)) {
1652                dev_err(&dev->dev,
1653                        "failed to create thermal monitor thread, aborting\n");
1654                ret = -ENOMEM;
1655                goto error_thread_cleanup;
1656        }
1657
1658        hts = (ips->core_power_limit << HTS_PCPL_SHIFT) |
1659                (ips->mcp_temp_limit << HTS_PTL_SHIFT) | HTS_NVV;
1660        htshi = HTS2_PRST_RUNNING << HTS2_PRST_SHIFT;
1661
1662        thm_writew(THM_HTSHI, htshi);
1663        thm_writel(THM_HTS, hts);
1664
1665        ips_debugfs_init(ips);
1666
1667        dev_info(&dev->dev, "IPS driver initialized, MCP temp limit %d\n",
1668                 ips->mcp_temp_limit);
1669        return ret;
1670
1671error_thread_cleanup:
1672        kthread_stop(ips->adjust);
1673error_free_irq:
1674        free_irq(ips->dev->irq, ips);
1675error_unmap:
1676        iounmap(ips->regmap);
1677error_release:
1678        pci_release_regions(dev);
1679error_free:
1680        kfree(ips);
1681        return ret;
1682}
1683
1684static void ips_remove(struct pci_dev *dev)
1685{
1686        struct ips_driver *ips = pci_get_drvdata(dev);
1687        u64 turbo_override;
1688
1689        if (!ips)
1690                return;
1691
1692        ips_debugfs_cleanup(ips);
1693
1694        /* Release i915 driver */
1695        if (ips->read_mch_val)
1696                symbol_put(i915_read_mch_val);
1697        if (ips->gpu_raise)
1698                symbol_put(i915_gpu_raise);
1699        if (ips->gpu_lower)
1700                symbol_put(i915_gpu_lower);
1701        if (ips->gpu_busy)
1702                symbol_put(i915_gpu_busy);
1703        if (ips->gpu_turbo_disable)
1704                symbol_put(i915_gpu_turbo_disable);
1705
1706        rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
1707        turbo_override &= ~(TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN);
1708        wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
1709        wrmsrl(TURBO_POWER_CURRENT_LIMIT, ips->orig_turbo_limit);
1710
1711        free_irq(ips->dev->irq, ips);
1712        if (ips->adjust)
1713                kthread_stop(ips->adjust);
1714        if (ips->monitor)
1715                kthread_stop(ips->monitor);
1716        iounmap(ips->regmap);
1717        pci_release_regions(dev);
1718        kfree(ips);
1719        dev_dbg(&dev->dev, "IPS driver removed\n");
1720}
1721
1722static void ips_shutdown(struct pci_dev *dev)
1723{
1724}
1725
1726static struct pci_driver ips_pci_driver = {
1727        .name = "intel ips",
1728        .id_table = ips_id_table,
1729        .probe = ips_probe,
1730        .remove = ips_remove,
1731        .shutdown = ips_shutdown,
1732};
1733
1734static int __init ips_init(void)
1735{
1736        return pci_register_driver(&ips_pci_driver);
1737}
1738module_init(ips_init);
1739
1740static void ips_exit(void)
1741{
1742        pci_unregister_driver(&ips_pci_driver);
1743        return;
1744}
1745module_exit(ips_exit);
1746
1747MODULE_LICENSE("GPL");
1748MODULE_AUTHOR("Jesse Barnes <jbarnes@virtuousgeek.org>");
1749MODULE_DESCRIPTION("Intelligent Power Sharing Driver");
1750