linux/drivers/platform/x86/intel_ips.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2009-2010 Intel Corporation
   3 *
   4 * This program is free software; you can redistribute it and/or modify it
   5 * under the terms and conditions of the GNU General Public License,
   6 * version 2, as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope it will be useful, but WITHOUT
   9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11 * more details.
  12 *
  13 * You should have received a copy of the GNU General Public License along with
  14 * this program; if not, write to the Free Software Foundation, Inc.,
  15 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  16 *
  17 * The full GNU General Public License is included in this distribution in
  18 * the file called "COPYING".
  19 *
  20 * Authors:
  21 *      Jesse Barnes <jbarnes@virtuousgeek.org>
  22 */
  23
  24/*
  25 * Some Intel Ibex Peak based platforms support so-called "intelligent
  26 * power sharing", which allows the CPU and GPU to cooperate to maximize
  27 * performance within a given TDP (thermal design point).  This driver
  28 * performs the coordination between the CPU and GPU, monitors thermal and
  29 * power statistics in the platform, and initializes power monitoring
  30 * hardware.  It also provides a few tunables to control behavior.  Its
  31 * primary purpose is to safely allow CPU and GPU turbo modes to be enabled
  32 * by tracking power and thermal budget; secondarily it can boost turbo
  33 * performance by allocating more power or thermal budget to the CPU or GPU
  34 * based on available headroom and activity.
  35 *
  36 * The basic algorithm is driven by a 5s moving average of temperature.  If
  37 * thermal headroom is available, the CPU and/or GPU power clamps may be
  38 * adjusted upwards.  If we hit the thermal ceiling or a thermal trigger,
  39 * we scale back the clamp.  Aside from trigger events (when we're critically
  40 * close or over our TDP) we don't adjust the clamps more than once every
  41 * five seconds.
  42 *
  43 * The thermal device (device 31, function 6) has a set of registers that
  44 * are updated by the ME firmware.  The ME should also take the clamp values
  45 * written to those registers and write them to the CPU, but we currently
  46 * bypass that functionality and write the CPU MSR directly.
  47 *
  48 * UNSUPPORTED:
  49 *   - dual MCP configs
  50 *
  51 * TODO:
  52 *   - handle CPU hotplug
  53 *   - provide turbo enable/disable api
  54 *
  55 * Related documents:
  56 *   - CDI 403777, 403778 - Auburndale EDS vol 1 & 2
  57 *   - CDI 401376 - Ibex Peak EDS
  58 *   - ref 26037, 26641 - IPS BIOS spec
  59 *   - ref 26489 - Nehalem BIOS writer's guide
  60 *   - ref 26921 - Ibex Peak BIOS Specification
  61 */
  62
  63#include <linux/debugfs.h>
  64#include <linux/delay.h>
  65#include <linux/interrupt.h>
  66#include <linux/kernel.h>
  67#include <linux/kthread.h>
  68#include <linux/module.h>
  69#include <linux/pci.h>
  70#include <linux/sched.h>
  71#include <linux/sched/loadavg.h>
  72#include <linux/seq_file.h>
  73#include <linux/string.h>
  74#include <linux/tick.h>
  75#include <linux/timer.h>
  76#include <linux/dmi.h>
  77#include <drm/i915_drm.h>
  78#include <asm/msr.h>
  79#include <asm/processor.h>
  80#include "intel_ips.h"
  81
  82#include <linux/io-64-nonatomic-lo-hi.h>
  83
  84#define PCI_DEVICE_ID_INTEL_THERMAL_SENSOR 0x3b32
  85
  86/*
  87 * Package level MSRs for monitor/control
  88 */
  89#define PLATFORM_INFO   0xce
  90#define   PLATFORM_TDP          (1<<29)
  91#define   PLATFORM_RATIO        (1<<28)
  92
  93#define IA32_MISC_ENABLE        0x1a0
  94#define   IA32_MISC_TURBO_EN    (1ULL<<38)
  95
  96#define TURBO_POWER_CURRENT_LIMIT       0x1ac
  97#define   TURBO_TDC_OVR_EN      (1UL<<31)
  98#define   TURBO_TDC_MASK        (0x000000007fff0000UL)
  99#define   TURBO_TDC_SHIFT       (16)
 100#define   TURBO_TDP_OVR_EN      (1UL<<15)
 101#define   TURBO_TDP_MASK        (0x0000000000003fffUL)
 102
 103/*
 104 * Core/thread MSRs for monitoring
 105 */
 106#define IA32_PERF_CTL           0x199
 107#define   IA32_PERF_TURBO_DIS   (1ULL<<32)
 108
 109/*
 110 * Thermal PCI device regs
 111 */
 112#define THM_CFG_TBAR    0x10
 113#define THM_CFG_TBAR_HI 0x14
 114
 115#define THM_TSIU        0x00
 116#define THM_TSE         0x01
 117#define   TSE_EN        0xb8
 118#define THM_TSS         0x02
 119#define THM_TSTR        0x03
 120#define THM_TSTTP       0x04
 121#define THM_TSCO        0x08
 122#define THM_TSES        0x0c
 123#define THM_TSGPEN      0x0d
 124#define   TSGPEN_HOT_LOHI       (1<<1)
 125#define   TSGPEN_CRIT_LOHI      (1<<2)
 126#define THM_TSPC        0x0e
 127#define THM_PPEC        0x10
 128#define THM_CTA         0x12
 129#define THM_PTA         0x14
 130#define   PTA_SLOPE_MASK        (0xff00)
 131#define   PTA_SLOPE_SHIFT       8
 132#define   PTA_OFFSET_MASK       (0x00ff)
 133#define THM_MGTA        0x16
 134#define   MGTA_SLOPE_MASK       (0xff00)
 135#define   MGTA_SLOPE_SHIFT      8
 136#define   MGTA_OFFSET_MASK      (0x00ff)
 137#define THM_TRC         0x1a
 138#define   TRC_CORE2_EN  (1<<15)
 139#define   TRC_THM_EN    (1<<12)
 140#define   TRC_C6_WAR    (1<<8)
 141#define   TRC_CORE1_EN  (1<<7)
 142#define   TRC_CORE_PWR  (1<<6)
 143#define   TRC_PCH_EN    (1<<5)
 144#define   TRC_MCH_EN    (1<<4)
 145#define   TRC_DIMM4     (1<<3)
 146#define   TRC_DIMM3     (1<<2)
 147#define   TRC_DIMM2     (1<<1)
 148#define   TRC_DIMM1     (1<<0)
 149#define THM_TES         0x20
 150#define THM_TEN         0x21
 151#define   TEN_UPDATE_EN 1
 152#define THM_PSC         0x24
 153#define   PSC_NTG       (1<<0) /* No GFX turbo support */
 154#define   PSC_NTPC      (1<<1) /* No CPU turbo support */
 155#define   PSC_PP_DEF    (0<<2) /* Perf policy up to driver */
 156#define   PSP_PP_PC     (1<<2) /* BIOS prefers CPU perf */
 157#define   PSP_PP_BAL    (2<<2) /* BIOS wants balanced perf */
 158#define   PSP_PP_GFX    (3<<2) /* BIOS prefers GFX perf */
 159#define   PSP_PBRT      (1<<4) /* BIOS run time support */
 160#define THM_CTV1        0x30
 161#define   CTV_TEMP_ERROR (1<<15)
 162#define   CTV_TEMP_MASK 0x3f
 163#define   CTV_
 164#define THM_CTV2        0x32
 165#define THM_CEC         0x34 /* undocumented power accumulator in joules */
 166#define THM_AE          0x3f
 167#define THM_HTS         0x50 /* 32 bits */
 168#define   HTS_PCPL_MASK (0x7fe00000)
 169#define   HTS_PCPL_SHIFT 21
 170#define   HTS_GPL_MASK  (0x001ff000)
 171#define   HTS_GPL_SHIFT 12
 172#define   HTS_PP_MASK   (0x00000c00)
 173#define   HTS_PP_SHIFT  10
 174#define   HTS_PP_DEF    0
 175#define   HTS_PP_PROC   1
 176#define   HTS_PP_BAL    2
 177#define   HTS_PP_GFX    3
 178#define   HTS_PCTD_DIS  (1<<9)
 179#define   HTS_GTD_DIS   (1<<8)
 180#define   HTS_PTL_MASK  (0x000000fe)
 181#define   HTS_PTL_SHIFT 1
 182#define   HTS_NVV       (1<<0)
 183#define THM_HTSHI       0x54 /* 16 bits */
 184#define   HTS2_PPL_MASK         (0x03ff)
 185#define   HTS2_PRST_MASK        (0x3c00)
 186#define   HTS2_PRST_SHIFT       10
 187#define   HTS2_PRST_UNLOADED    0
 188#define   HTS2_PRST_RUNNING     1
 189#define   HTS2_PRST_TDISOP      2 /* turbo disabled due to power */
 190#define   HTS2_PRST_TDISHT      3 /* turbo disabled due to high temp */
 191#define   HTS2_PRST_TDISUSR     4 /* user disabled turbo */
 192#define   HTS2_PRST_TDISPLAT    5 /* platform disabled turbo */
 193#define   HTS2_PRST_TDISPM      6 /* power management disabled turbo */
 194#define   HTS2_PRST_TDISERR     7 /* some kind of error disabled turbo */
 195#define THM_PTL         0x56
 196#define THM_MGTV        0x58
 197#define   TV_MASK       0x000000000000ff00
 198#define   TV_SHIFT      8
 199#define THM_PTV         0x60
 200#define   PTV_MASK      0x00ff
 201#define THM_MMGPC       0x64
 202#define THM_MPPC        0x66
 203#define THM_MPCPC       0x68
 204#define THM_TSPIEN      0x82
 205#define   TSPIEN_AUX_LOHI       (1<<0)
 206#define   TSPIEN_HOT_LOHI       (1<<1)
 207#define   TSPIEN_CRIT_LOHI      (1<<2)
 208#define   TSPIEN_AUX2_LOHI      (1<<3)
 209#define THM_TSLOCK      0x83
 210#define THM_ATR         0x84
 211#define THM_TOF         0x87
 212#define THM_STS         0x98
 213#define   STS_PCPL_MASK         (0x7fe00000)
 214#define   STS_PCPL_SHIFT        21
 215#define   STS_GPL_MASK          (0x001ff000)
 216#define   STS_GPL_SHIFT         12
 217#define   STS_PP_MASK           (0x00000c00)
 218#define   STS_PP_SHIFT          10
 219#define   STS_PP_DEF            0
 220#define   STS_PP_PROC           1
 221#define   STS_PP_BAL            2
 222#define   STS_PP_GFX            3
 223#define   STS_PCTD_DIS          (1<<9)
 224#define   STS_GTD_DIS           (1<<8)
 225#define   STS_PTL_MASK          (0x000000fe)
 226#define   STS_PTL_SHIFT         1
 227#define   STS_NVV               (1<<0)
 228#define THM_SEC         0x9c
 229#define   SEC_ACK       (1<<0)
 230#define THM_TC3         0xa4
 231#define THM_TC1         0xa8
 232#define   STS_PPL_MASK          (0x0003ff00)
 233#define   STS_PPL_SHIFT         16
 234#define THM_TC2         0xac
 235#define THM_DTV         0xb0
 236#define THM_ITV         0xd8
 237#define   ITV_ME_SEQNO_MASK 0x00ff0000 /* ME should update every ~200ms */
 238#define   ITV_ME_SEQNO_SHIFT (16)
 239#define   ITV_MCH_TEMP_MASK 0x0000ff00
 240#define   ITV_MCH_TEMP_SHIFT (8)
 241#define   ITV_PCH_TEMP_MASK 0x000000ff
 242
 243#define thm_readb(off) readb(ips->regmap + (off))
 244#define thm_readw(off) readw(ips->regmap + (off))
 245#define thm_readl(off) readl(ips->regmap + (off))
 246#define thm_readq(off) readq(ips->regmap + (off))
 247
 248#define thm_writeb(off, val) writeb((val), ips->regmap + (off))
 249#define thm_writew(off, val) writew((val), ips->regmap + (off))
 250#define thm_writel(off, val) writel((val), ips->regmap + (off))
 251
 252static const int IPS_ADJUST_PERIOD = 5000; /* ms */
 253static bool late_i915_load = false;
 254
 255/* For initial average collection */
 256static const int IPS_SAMPLE_PERIOD = 200; /* ms */
 257static const int IPS_SAMPLE_WINDOW = 5000; /* 5s moving window of samples */
 258#define IPS_SAMPLE_COUNT (IPS_SAMPLE_WINDOW / IPS_SAMPLE_PERIOD)
 259
 260/* Per-SKU limits */
 261struct ips_mcp_limits {
 262        int cpu_family;
 263        int cpu_model; /* includes extended model... */
 264        int mcp_power_limit; /* mW units */
 265        int core_power_limit;
 266        int mch_power_limit;
 267        int core_temp_limit; /* degrees C */
 268        int mch_temp_limit;
 269};
 270
 271/* Max temps are -10 degrees C to avoid PROCHOT# */
 272
 273static struct ips_mcp_limits ips_sv_limits = {
 274        .mcp_power_limit = 35000,
 275        .core_power_limit = 29000,
 276        .mch_power_limit = 20000,
 277        .core_temp_limit = 95,
 278        .mch_temp_limit = 90
 279};
 280
 281static struct ips_mcp_limits ips_lv_limits = {
 282        .mcp_power_limit = 25000,
 283        .core_power_limit = 21000,
 284        .mch_power_limit = 13000,
 285        .core_temp_limit = 95,
 286        .mch_temp_limit = 90
 287};
 288
 289static struct ips_mcp_limits ips_ulv_limits = {
 290        .mcp_power_limit = 18000,
 291        .core_power_limit = 14000,
 292        .mch_power_limit = 11000,
 293        .core_temp_limit = 95,
 294        .mch_temp_limit = 90
 295};
 296
 297struct ips_driver {
 298        struct pci_dev *dev;
 299        void *regmap;
 300        struct task_struct *monitor;
 301        struct task_struct *adjust;
 302        struct dentry *debug_root;
 303
 304        /* Average CPU core temps (all averages in .01 degrees C for precision) */
 305        u16 ctv1_avg_temp;
 306        u16 ctv2_avg_temp;
 307        /* GMCH average */
 308        u16 mch_avg_temp;
 309        /* Average for the CPU (both cores?) */
 310        u16 mcp_avg_temp;
 311        /* Average power consumption (in mW) */
 312        u32 cpu_avg_power;
 313        u32 mch_avg_power;
 314
 315        /* Offset values */
 316        u16 cta_val;
 317        u16 pta_val;
 318        u16 mgta_val;
 319
 320        /* Maximums & prefs, protected by turbo status lock */
 321        spinlock_t turbo_status_lock;
 322        u16 mcp_temp_limit;
 323        u16 mcp_power_limit;
 324        u16 core_power_limit;
 325        u16 mch_power_limit;
 326        bool cpu_turbo_enabled;
 327        bool __cpu_turbo_on;
 328        bool gpu_turbo_enabled;
 329        bool __gpu_turbo_on;
 330        bool gpu_preferred;
 331        bool poll_turbo_status;
 332        bool second_cpu;
 333        bool turbo_toggle_allowed;
 334        struct ips_mcp_limits *limits;
 335
 336        /* Optional MCH interfaces for if i915 is in use */
 337        unsigned long (*read_mch_val)(void);
 338        bool (*gpu_raise)(void);
 339        bool (*gpu_lower)(void);
 340        bool (*gpu_busy)(void);
 341        bool (*gpu_turbo_disable)(void);
 342
 343        /* For restoration at unload */
 344        u64 orig_turbo_limit;
 345        u64 orig_turbo_ratios;
 346};
 347
 348static bool
 349ips_gpu_turbo_enabled(struct ips_driver *ips);
 350
 351/**
 352 * ips_cpu_busy - is CPU busy?
 353 * @ips: IPS driver struct
 354 *
 355 * Check CPU for load to see whether we should increase its thermal budget.
 356 *
 357 * RETURNS:
 358 * True if the CPU could use more power, false otherwise.
 359 */
 360static bool ips_cpu_busy(struct ips_driver *ips)
 361{
 362        if ((avenrun[0] >> FSHIFT) > 1)
 363                return true;
 364
 365        return false;
 366}
 367
 368/**
 369 * ips_cpu_raise - raise CPU power clamp
 370 * @ips: IPS driver struct
 371 *
 372 * Raise the CPU power clamp by %IPS_CPU_STEP, in accordance with TDP for
 373 * this platform.
 374 *
 375 * We do this by adjusting the TURBO_POWER_CURRENT_LIMIT MSR upwards (as
 376 * long as we haven't hit the TDP limit for the SKU).
 377 */
 378static void ips_cpu_raise(struct ips_driver *ips)
 379{
 380        u64 turbo_override;
 381        u16 cur_tdp_limit, new_tdp_limit;
 382
 383        if (!ips->cpu_turbo_enabled)
 384                return;
 385
 386        rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
 387
 388        cur_tdp_limit = turbo_override & TURBO_TDP_MASK;
 389        new_tdp_limit = cur_tdp_limit + 8; /* 1W increase */
 390
 391        /* Clamp to SKU TDP limit */
 392        if (((new_tdp_limit * 10) / 8) > ips->core_power_limit)
 393                new_tdp_limit = cur_tdp_limit;
 394
 395        thm_writew(THM_MPCPC, (new_tdp_limit * 10) / 8);
 396
 397        turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN;
 398        wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
 399
 400        turbo_override &= ~TURBO_TDP_MASK;
 401        turbo_override |= new_tdp_limit;
 402
 403        wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
 404}
 405
 406/**
 407 * ips_cpu_lower - lower CPU power clamp
 408 * @ips: IPS driver struct
 409 *
 410 * Lower CPU power clamp b %IPS_CPU_STEP if possible.
 411 *
 412 * We do this by adjusting the TURBO_POWER_CURRENT_LIMIT MSR down, going
 413 * as low as the platform limits will allow (though we could go lower there
 414 * wouldn't be much point).
 415 */
 416static void ips_cpu_lower(struct ips_driver *ips)
 417{
 418        u64 turbo_override;
 419        u16 cur_limit, new_limit;
 420
 421        rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
 422
 423        cur_limit = turbo_override & TURBO_TDP_MASK;
 424        new_limit = cur_limit - 8; /* 1W decrease */
 425
 426        /* Clamp to SKU TDP limit */
 427        if (new_limit  < (ips->orig_turbo_limit & TURBO_TDP_MASK))
 428                new_limit = ips->orig_turbo_limit & TURBO_TDP_MASK;
 429
 430        thm_writew(THM_MPCPC, (new_limit * 10) / 8);
 431
 432        turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN;
 433        wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
 434
 435        turbo_override &= ~TURBO_TDP_MASK;
 436        turbo_override |= new_limit;
 437
 438        wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
 439}
 440
 441/**
 442 * do_enable_cpu_turbo - internal turbo enable function
 443 * @data: unused
 444 *
 445 * Internal function for actually updating MSRs.  When we enable/disable
 446 * turbo, we need to do it on each CPU; this function is the one called
 447 * by on_each_cpu() when needed.
 448 */
 449static void do_enable_cpu_turbo(void *data)
 450{
 451        u64 perf_ctl;
 452
 453        rdmsrl(IA32_PERF_CTL, perf_ctl);
 454        if (perf_ctl & IA32_PERF_TURBO_DIS) {
 455                perf_ctl &= ~IA32_PERF_TURBO_DIS;
 456                wrmsrl(IA32_PERF_CTL, perf_ctl);
 457        }
 458}
 459
 460/**
 461 * ips_enable_cpu_turbo - enable turbo mode on all CPUs
 462 * @ips: IPS driver struct
 463 *
 464 * Enable turbo mode by clearing the disable bit in IA32_PERF_CTL on
 465 * all logical threads.
 466 */
 467static void ips_enable_cpu_turbo(struct ips_driver *ips)
 468{
 469        /* Already on, no need to mess with MSRs */
 470        if (ips->__cpu_turbo_on)
 471                return;
 472
 473        if (ips->turbo_toggle_allowed)
 474                on_each_cpu(do_enable_cpu_turbo, ips, 1);
 475
 476        ips->__cpu_turbo_on = true;
 477}
 478
 479/**
 480 * do_disable_cpu_turbo - internal turbo disable function
 481 * @data: unused
 482 *
 483 * Internal function for actually updating MSRs.  When we enable/disable
 484 * turbo, we need to do it on each CPU; this function is the one called
 485 * by on_each_cpu() when needed.
 486 */
 487static void do_disable_cpu_turbo(void *data)
 488{
 489        u64 perf_ctl;
 490
 491        rdmsrl(IA32_PERF_CTL, perf_ctl);
 492        if (!(perf_ctl & IA32_PERF_TURBO_DIS)) {
 493                perf_ctl |= IA32_PERF_TURBO_DIS;
 494                wrmsrl(IA32_PERF_CTL, perf_ctl);
 495        }
 496}
 497
 498/**
 499 * ips_disable_cpu_turbo - disable turbo mode on all CPUs
 500 * @ips: IPS driver struct
 501 *
 502 * Disable turbo mode by setting the disable bit in IA32_PERF_CTL on
 503 * all logical threads.
 504 */
 505static void ips_disable_cpu_turbo(struct ips_driver *ips)
 506{
 507        /* Already off, leave it */
 508        if (!ips->__cpu_turbo_on)
 509                return;
 510
 511        if (ips->turbo_toggle_allowed)
 512                on_each_cpu(do_disable_cpu_turbo, ips, 1);
 513
 514        ips->__cpu_turbo_on = false;
 515}
 516
 517/**
 518 * ips_gpu_busy - is GPU busy?
 519 * @ips: IPS driver struct
 520 *
 521 * Check GPU for load to see whether we should increase its thermal budget.
 522 * We need to call into the i915 driver in this case.
 523 *
 524 * RETURNS:
 525 * True if the GPU could use more power, false otherwise.
 526 */
 527static bool ips_gpu_busy(struct ips_driver *ips)
 528{
 529        if (!ips_gpu_turbo_enabled(ips))
 530                return false;
 531
 532        return ips->gpu_busy();
 533}
 534
 535/**
 536 * ips_gpu_raise - raise GPU power clamp
 537 * @ips: IPS driver struct
 538 *
 539 * Raise the GPU frequency/power if possible.  We need to call into the
 540 * i915 driver in this case.
 541 */
 542static void ips_gpu_raise(struct ips_driver *ips)
 543{
 544        if (!ips_gpu_turbo_enabled(ips))
 545                return;
 546
 547        if (!ips->gpu_raise())
 548                ips->gpu_turbo_enabled = false;
 549
 550        return;
 551}
 552
 553/**
 554 * ips_gpu_lower - lower GPU power clamp
 555 * @ips: IPS driver struct
 556 *
 557 * Lower GPU frequency/power if possible.  Need to call i915.
 558 */
 559static void ips_gpu_lower(struct ips_driver *ips)
 560{
 561        if (!ips_gpu_turbo_enabled(ips))
 562                return;
 563
 564        if (!ips->gpu_lower())
 565                ips->gpu_turbo_enabled = false;
 566
 567        return;
 568}
 569
 570/**
 571 * ips_enable_gpu_turbo - notify the gfx driver turbo is available
 572 * @ips: IPS driver struct
 573 *
 574 * Call into the graphics driver indicating that it can safely use
 575 * turbo mode.
 576 */
 577static void ips_enable_gpu_turbo(struct ips_driver *ips)
 578{
 579        if (ips->__gpu_turbo_on)
 580                return;
 581        ips->__gpu_turbo_on = true;
 582}
 583
 584/**
 585 * ips_disable_gpu_turbo - notify the gfx driver to disable turbo mode
 586 * @ips: IPS driver struct
 587 *
 588 * Request that the graphics driver disable turbo mode.
 589 */
 590static void ips_disable_gpu_turbo(struct ips_driver *ips)
 591{
 592        /* Avoid calling i915 if turbo is already disabled */
 593        if (!ips->__gpu_turbo_on)
 594                return;
 595
 596        if (!ips->gpu_turbo_disable())
 597                dev_err(&ips->dev->dev, "failed to disable graphics turbo\n");
 598        else
 599                ips->__gpu_turbo_on = false;
 600}
 601
 602/**
 603 * mcp_exceeded - check whether we're outside our thermal & power limits
 604 * @ips: IPS driver struct
 605 *
 606 * Check whether the MCP is over its thermal or power budget.
 607 */
 608static bool mcp_exceeded(struct ips_driver *ips)
 609{
 610        unsigned long flags;
 611        bool ret = false;
 612        u32 temp_limit;
 613        u32 avg_power;
 614
 615        spin_lock_irqsave(&ips->turbo_status_lock, flags);
 616
 617        temp_limit = ips->mcp_temp_limit * 100;
 618        if (ips->mcp_avg_temp > temp_limit)
 619                ret = true;
 620
 621        avg_power = ips->cpu_avg_power + ips->mch_avg_power;
 622        if (avg_power > ips->mcp_power_limit)
 623                ret = true;
 624
 625        spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
 626
 627        return ret;
 628}
 629
 630/**
 631 * cpu_exceeded - check whether a CPU core is outside its limits
 632 * @ips: IPS driver struct
 633 * @cpu: CPU number to check
 634 *
 635 * Check a given CPU's average temp or power is over its limit.
 636 */
 637static bool cpu_exceeded(struct ips_driver *ips, int cpu)
 638{
 639        unsigned long flags;
 640        int avg;
 641        bool ret = false;
 642
 643        spin_lock_irqsave(&ips->turbo_status_lock, flags);
 644        avg = cpu ? ips->ctv2_avg_temp : ips->ctv1_avg_temp;
 645        if (avg > (ips->limits->core_temp_limit * 100))
 646                ret = true;
 647        if (ips->cpu_avg_power > ips->core_power_limit * 100)
 648                ret = true;
 649        spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
 650
 651        if (ret)
 652                dev_info(&ips->dev->dev,
 653                         "CPU power or thermal limit exceeded\n");
 654
 655        return ret;
 656}
 657
 658/**
 659 * mch_exceeded - check whether the GPU is over budget
 660 * @ips: IPS driver struct
 661 *
 662 * Check the MCH temp & power against their maximums.
 663 */
 664static bool mch_exceeded(struct ips_driver *ips)
 665{
 666        unsigned long flags;
 667        bool ret = false;
 668
 669        spin_lock_irqsave(&ips->turbo_status_lock, flags);
 670        if (ips->mch_avg_temp > (ips->limits->mch_temp_limit * 100))
 671                ret = true;
 672        if (ips->mch_avg_power > ips->mch_power_limit)
 673                ret = true;
 674        spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
 675
 676        return ret;
 677}
 678
 679/**
 680 * verify_limits - verify BIOS provided limits
 681 * @ips: IPS structure
 682 *
 683 * BIOS can optionally provide non-default limits for power and temp.  Check
 684 * them here and use the defaults if the BIOS values are not provided or
 685 * are otherwise unusable.
 686 */
 687static void verify_limits(struct ips_driver *ips)
 688{
 689        if (ips->mcp_power_limit < ips->limits->mcp_power_limit ||
 690            ips->mcp_power_limit > 35000)
 691                ips->mcp_power_limit = ips->limits->mcp_power_limit;
 692
 693        if (ips->mcp_temp_limit < ips->limits->core_temp_limit ||
 694            ips->mcp_temp_limit < ips->limits->mch_temp_limit ||
 695            ips->mcp_temp_limit > 150)
 696                ips->mcp_temp_limit = min(ips->limits->core_temp_limit,
 697                                          ips->limits->mch_temp_limit);
 698}
 699
 700/**
 701 * update_turbo_limits - get various limits & settings from regs
 702 * @ips: IPS driver struct
 703 *
 704 * Update the IPS power & temp limits, along with turbo enable flags,
 705 * based on latest register contents.
 706 *
 707 * Used at init time and for runtime BIOS support, which requires polling
 708 * the regs for updates (as a result of AC->DC transition for example).
 709 *
 710 * LOCKING:
 711 * Caller must hold turbo_status_lock (outside of init)
 712 */
 713static void update_turbo_limits(struct ips_driver *ips)
 714{
 715        u32 hts = thm_readl(THM_HTS);
 716
 717        ips->cpu_turbo_enabled = !(hts & HTS_PCTD_DIS);
 718        /* 
 719         * Disable turbo for now, until we can figure out why the power figures
 720         * are wrong
 721         */
 722        ips->cpu_turbo_enabled = false;
 723
 724        if (ips->gpu_busy)
 725                ips->gpu_turbo_enabled = !(hts & HTS_GTD_DIS);
 726
 727        ips->core_power_limit = thm_readw(THM_MPCPC);
 728        ips->mch_power_limit = thm_readw(THM_MMGPC);
 729        ips->mcp_temp_limit = thm_readw(THM_PTL);
 730        ips->mcp_power_limit = thm_readw(THM_MPPC);
 731
 732        verify_limits(ips);
 733        /* Ignore BIOS CPU vs GPU pref */
 734}
 735
 736/**
 737 * ips_adjust - adjust power clamp based on thermal state
 738 * @data: ips driver structure
 739 *
 740 * Wake up every 5s or so and check whether we should adjust the power clamp.
 741 * Check CPU and GPU load to determine which needs adjustment.  There are
 742 * several things to consider here:
 743 *   - do we need to adjust up or down?
 744 *   - is CPU busy?
 745 *   - is GPU busy?
 746 *   - is CPU in turbo?
 747 *   - is GPU in turbo?
 748 *   - is CPU or GPU preferred? (CPU is default)
 749 *
 750 * So, given the above, we do the following:
 751 *   - up (TDP available)
 752 *     - CPU not busy, GPU not busy - nothing
 753 *     - CPU busy, GPU not busy - adjust CPU up
 754 *     - CPU not busy, GPU busy - adjust GPU up
 755 *     - CPU busy, GPU busy - adjust preferred unit up, taking headroom from
 756 *       non-preferred unit if necessary
 757 *   - down (at TDP limit)
 758 *     - adjust both CPU and GPU down if possible
 759 *
 760                cpu+ gpu+       cpu+gpu-        cpu-gpu+        cpu-gpu-
 761cpu < gpu <     cpu+gpu+        cpu+            gpu+            nothing
 762cpu < gpu >=    cpu+gpu-(mcp<)  cpu+gpu-(mcp<)  gpu-            gpu-
 763cpu >= gpu <    cpu-gpu+(mcp<)  cpu-            cpu-gpu+(mcp<)  cpu-
 764cpu >= gpu >=   cpu-gpu-        cpu-gpu-        cpu-gpu-        cpu-gpu-
 765 *
 766 */
 767static int ips_adjust(void *data)
 768{
 769        struct ips_driver *ips = data;
 770        unsigned long flags;
 771
 772        dev_dbg(&ips->dev->dev, "starting ips-adjust thread\n");
 773
 774        /*
 775         * Adjust CPU and GPU clamps every 5s if needed.  Doing it more
 776         * often isn't recommended due to ME interaction.
 777         */
 778        do {
 779                bool cpu_busy = ips_cpu_busy(ips);
 780                bool gpu_busy = ips_gpu_busy(ips);
 781
 782                spin_lock_irqsave(&ips->turbo_status_lock, flags);
 783                if (ips->poll_turbo_status)
 784                        update_turbo_limits(ips);
 785                spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
 786
 787                /* Update turbo status if necessary */
 788                if (ips->cpu_turbo_enabled)
 789                        ips_enable_cpu_turbo(ips);
 790                else
 791                        ips_disable_cpu_turbo(ips);
 792
 793                if (ips->gpu_turbo_enabled)
 794                        ips_enable_gpu_turbo(ips);
 795                else
 796                        ips_disable_gpu_turbo(ips);
 797
 798                /* We're outside our comfort zone, crank them down */
 799                if (mcp_exceeded(ips)) {
 800                        ips_cpu_lower(ips);
 801                        ips_gpu_lower(ips);
 802                        goto sleep;
 803                }
 804
 805                if (!cpu_exceeded(ips, 0) && cpu_busy)
 806                        ips_cpu_raise(ips);
 807                else
 808                        ips_cpu_lower(ips);
 809
 810                if (!mch_exceeded(ips) && gpu_busy)
 811                        ips_gpu_raise(ips);
 812                else
 813                        ips_gpu_lower(ips);
 814
 815sleep:
 816                schedule_timeout_interruptible(msecs_to_jiffies(IPS_ADJUST_PERIOD));
 817        } while (!kthread_should_stop());
 818
 819        dev_dbg(&ips->dev->dev, "ips-adjust thread stopped\n");
 820
 821        return 0;
 822}
 823
 824/*
 825 * Helpers for reading out temp/power values and calculating their
 826 * averages for the decision making and monitoring functions.
 827 */
 828
 829static u16 calc_avg_temp(struct ips_driver *ips, u16 *array)
 830{
 831        u64 total = 0;
 832        int i;
 833        u16 avg;
 834
 835        for (i = 0; i < IPS_SAMPLE_COUNT; i++)
 836                total += (u64)(array[i] * 100);
 837
 838        do_div(total, IPS_SAMPLE_COUNT);
 839
 840        avg = (u16)total;
 841
 842        return avg;
 843}
 844
 845static u16 read_mgtv(struct ips_driver *ips)
 846{
 847        u16 ret;
 848        u64 slope, offset;
 849        u64 val;
 850
 851        val = thm_readq(THM_MGTV);
 852        val = (val & TV_MASK) >> TV_SHIFT;
 853
 854        slope = offset = thm_readw(THM_MGTA);
 855        slope = (slope & MGTA_SLOPE_MASK) >> MGTA_SLOPE_SHIFT;
 856        offset = offset & MGTA_OFFSET_MASK;
 857
 858        ret = ((val * slope + 0x40) >> 7) + offset;
 859
 860        return 0; /* MCH temp reporting buggy */
 861}
 862
 863static u16 read_ptv(struct ips_driver *ips)
 864{
 865        u16 val, slope, offset;
 866
 867        slope = (ips->pta_val & PTA_SLOPE_MASK) >> PTA_SLOPE_SHIFT;
 868        offset = ips->pta_val & PTA_OFFSET_MASK;
 869
 870        val = thm_readw(THM_PTV) & PTV_MASK;
 871
 872        return val;
 873}
 874
 875static u16 read_ctv(struct ips_driver *ips, int cpu)
 876{
 877        int reg = cpu ? THM_CTV2 : THM_CTV1;
 878        u16 val;
 879
 880        val = thm_readw(reg);
 881        if (!(val & CTV_TEMP_ERROR))
 882                val = (val) >> 6; /* discard fractional component */
 883        else
 884                val = 0;
 885
 886        return val;
 887}
 888
 889static u32 get_cpu_power(struct ips_driver *ips, u32 *last, int period)
 890{
 891        u32 val;
 892        u32 ret;
 893
 894        /*
 895         * CEC is in joules/65535.  Take difference over time to
 896         * get watts.
 897         */
 898        val = thm_readl(THM_CEC);
 899
 900        /* period is in ms and we want mW */
 901        ret = (((val - *last) * 1000) / period);
 902        ret = (ret * 1000) / 65535;
 903        *last = val;
 904
 905        return 0;
 906}
 907
 908static const u16 temp_decay_factor = 2;
 909static u16 update_average_temp(u16 avg, u16 val)
 910{
 911        u16 ret;
 912
 913        /* Multiply by 100 for extra precision */
 914        ret = (val * 100 / temp_decay_factor) +
 915                (((temp_decay_factor - 1) * avg) / temp_decay_factor);
 916        return ret;
 917}
 918
 919static const u16 power_decay_factor = 2;
 920static u16 update_average_power(u32 avg, u32 val)
 921{
 922        u32 ret;
 923
 924        ret = (val / power_decay_factor) +
 925                (((power_decay_factor - 1) * avg) / power_decay_factor);
 926
 927        return ret;
 928}
 929
 930static u32 calc_avg_power(struct ips_driver *ips, u32 *array)
 931{
 932        u64 total = 0;
 933        u32 avg;
 934        int i;
 935
 936        for (i = 0; i < IPS_SAMPLE_COUNT; i++)
 937                total += array[i];
 938
 939        do_div(total, IPS_SAMPLE_COUNT);
 940        avg = (u32)total;
 941
 942        return avg;
 943}
 944
 945static void monitor_timeout(unsigned long arg)
 946{
 947        wake_up_process((struct task_struct *)arg);
 948}
 949
 950/**
 951 * ips_monitor - temp/power monitoring thread
 952 * @data: ips driver structure
 953 *
 954 * This is the main function for the IPS driver.  It monitors power and
 955 * tempurature in the MCP and adjusts CPU and GPU power clams accordingly.
 956 *
 957 * We keep a 5s moving average of power consumption and tempurature.  Using
 958 * that data, along with CPU vs GPU preference, we adjust the power clamps
 959 * up or down.
 960 */
 961static int ips_monitor(void *data)
 962{
 963        struct ips_driver *ips = data;
 964        struct timer_list timer;
 965        unsigned long seqno_timestamp, expire, last_msecs, last_sample_period;
 966        int i;
 967        u32 *cpu_samples, *mchp_samples, old_cpu_power;
 968        u16 *mcp_samples, *ctv1_samples, *ctv2_samples, *mch_samples;
 969        u8 cur_seqno, last_seqno;
 970
 971        mcp_samples = kzalloc(sizeof(u16) * IPS_SAMPLE_COUNT, GFP_KERNEL);
 972        ctv1_samples = kzalloc(sizeof(u16) * IPS_SAMPLE_COUNT, GFP_KERNEL);
 973        ctv2_samples = kzalloc(sizeof(u16) * IPS_SAMPLE_COUNT, GFP_KERNEL);
 974        mch_samples = kzalloc(sizeof(u16) * IPS_SAMPLE_COUNT, GFP_KERNEL);
 975        cpu_samples = kzalloc(sizeof(u32) * IPS_SAMPLE_COUNT, GFP_KERNEL);
 976        mchp_samples = kzalloc(sizeof(u32) * IPS_SAMPLE_COUNT, GFP_KERNEL);
 977        if (!mcp_samples || !ctv1_samples || !ctv2_samples || !mch_samples ||
 978                        !cpu_samples || !mchp_samples) {
 979                dev_err(&ips->dev->dev,
 980                        "failed to allocate sample array, ips disabled\n");
 981                kfree(mcp_samples);
 982                kfree(ctv1_samples);
 983                kfree(ctv2_samples);
 984                kfree(mch_samples);
 985                kfree(cpu_samples);
 986                kfree(mchp_samples);
 987                return -ENOMEM;
 988        }
 989
 990        last_seqno = (thm_readl(THM_ITV) & ITV_ME_SEQNO_MASK) >>
 991                ITV_ME_SEQNO_SHIFT;
 992        seqno_timestamp = get_jiffies_64();
 993
 994        old_cpu_power = thm_readl(THM_CEC);
 995        schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
 996
 997        /* Collect an initial average */
 998        for (i = 0; i < IPS_SAMPLE_COUNT; i++) {
 999                u32 mchp, cpu_power;
1000                u16 val;
1001
1002                mcp_samples[i] = read_ptv(ips);
1003
1004                val = read_ctv(ips, 0);
1005                ctv1_samples[i] = val;
1006
1007                val = read_ctv(ips, 1);
1008                ctv2_samples[i] = val;
1009
1010                val = read_mgtv(ips);
1011                mch_samples[i] = val;
1012
1013                cpu_power = get_cpu_power(ips, &old_cpu_power,
1014                                          IPS_SAMPLE_PERIOD);
1015                cpu_samples[i] = cpu_power;
1016
1017                if (ips->read_mch_val) {
1018                        mchp = ips->read_mch_val();
1019                        mchp_samples[i] = mchp;
1020                }
1021
1022                schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
1023                if (kthread_should_stop())
1024                        break;
1025        }
1026
1027        ips->mcp_avg_temp = calc_avg_temp(ips, mcp_samples);
1028        ips->ctv1_avg_temp = calc_avg_temp(ips, ctv1_samples);
1029        ips->ctv2_avg_temp = calc_avg_temp(ips, ctv2_samples);
1030        ips->mch_avg_temp = calc_avg_temp(ips, mch_samples);
1031        ips->cpu_avg_power = calc_avg_power(ips, cpu_samples);
1032        ips->mch_avg_power = calc_avg_power(ips, mchp_samples);
1033        kfree(mcp_samples);
1034        kfree(ctv1_samples);
1035        kfree(ctv2_samples);
1036        kfree(mch_samples);
1037        kfree(cpu_samples);
1038        kfree(mchp_samples);
1039
1040        /* Start the adjustment thread now that we have data */
1041        wake_up_process(ips->adjust);
1042
1043        /*
1044         * Ok, now we have an initial avg.  From here on out, we track the
1045         * running avg using a decaying average calculation.  This allows
1046         * us to reduce the sample frequency if the CPU and GPU are idle.
1047         */
1048        old_cpu_power = thm_readl(THM_CEC);
1049        schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
1050        last_sample_period = IPS_SAMPLE_PERIOD;
1051
1052        setup_deferrable_timer_on_stack(&timer, monitor_timeout,
1053                                        (unsigned long)current);
1054        do {
1055                u32 cpu_val, mch_val;
1056                u16 val;
1057
1058                /* MCP itself */
1059                val = read_ptv(ips);
1060                ips->mcp_avg_temp = update_average_temp(ips->mcp_avg_temp, val);
1061
1062                /* Processor 0 */
1063                val = read_ctv(ips, 0);
1064                ips->ctv1_avg_temp =
1065                        update_average_temp(ips->ctv1_avg_temp, val);
1066                /* Power */
1067                cpu_val = get_cpu_power(ips, &old_cpu_power,
1068                                        last_sample_period);
1069                ips->cpu_avg_power =
1070                        update_average_power(ips->cpu_avg_power, cpu_val);
1071
1072                if (ips->second_cpu) {
1073                        /* Processor 1 */
1074                        val = read_ctv(ips, 1);
1075                        ips->ctv2_avg_temp =
1076                                update_average_temp(ips->ctv2_avg_temp, val);
1077                }
1078
1079                /* MCH */
1080                val = read_mgtv(ips);
1081                ips->mch_avg_temp = update_average_temp(ips->mch_avg_temp, val);
1082                /* Power */
1083                if (ips->read_mch_val) {
1084                        mch_val = ips->read_mch_val();
1085                        ips->mch_avg_power =
1086                                update_average_power(ips->mch_avg_power,
1087                                                     mch_val);
1088                }
1089
1090                /*
1091                 * Make sure ME is updating thermal regs.
1092                 * Note:
1093                 * If it's been more than a second since the last update,
1094                 * the ME is probably hung.
1095                 */
1096                cur_seqno = (thm_readl(THM_ITV) & ITV_ME_SEQNO_MASK) >>
1097                        ITV_ME_SEQNO_SHIFT;
1098                if (cur_seqno == last_seqno &&
1099                    time_after(jiffies, seqno_timestamp + HZ)) {
1100                        dev_warn(&ips->dev->dev, "ME failed to update for more than 1s, likely hung\n");
1101                } else {
1102                        seqno_timestamp = get_jiffies_64();
1103                        last_seqno = cur_seqno;
1104                }
1105
1106                last_msecs = jiffies_to_msecs(jiffies);
1107                expire = jiffies + msecs_to_jiffies(IPS_SAMPLE_PERIOD);
1108
1109                __set_current_state(TASK_INTERRUPTIBLE);
1110                mod_timer(&timer, expire);
1111                schedule();
1112
1113                /* Calculate actual sample period for power averaging */
1114                last_sample_period = jiffies_to_msecs(jiffies) - last_msecs;
1115                if (!last_sample_period)
1116                        last_sample_period = 1;
1117        } while (!kthread_should_stop());
1118
1119        del_timer_sync(&timer);
1120        destroy_timer_on_stack(&timer);
1121
1122        dev_dbg(&ips->dev->dev, "ips-monitor thread stopped\n");
1123
1124        return 0;
1125}
1126
1127#if 0
1128#define THM_DUMPW(reg) \
1129        { \
1130        u16 val = thm_readw(reg); \
1131        dev_dbg(&ips->dev->dev, #reg ": 0x%04x\n", val); \
1132        }
1133#define THM_DUMPL(reg) \
1134        { \
1135        u32 val = thm_readl(reg); \
1136        dev_dbg(&ips->dev->dev, #reg ": 0x%08x\n", val); \
1137        }
1138#define THM_DUMPQ(reg) \
1139        { \
1140        u64 val = thm_readq(reg); \
1141        dev_dbg(&ips->dev->dev, #reg ": 0x%016x\n", val); \
1142        }
1143
1144static void dump_thermal_info(struct ips_driver *ips)
1145{
1146        u16 ptl;
1147
1148        ptl = thm_readw(THM_PTL);
1149        dev_dbg(&ips->dev->dev, "Processor temp limit: %d\n", ptl);
1150
1151        THM_DUMPW(THM_CTA);
1152        THM_DUMPW(THM_TRC);
1153        THM_DUMPW(THM_CTV1);
1154        THM_DUMPL(THM_STS);
1155        THM_DUMPW(THM_PTV);
1156        THM_DUMPQ(THM_MGTV);
1157}
1158#endif
1159
1160/**
1161 * ips_irq_handler - handle temperature triggers and other IPS events
1162 * @irq: irq number
1163 * @arg: unused
1164 *
1165 * Handle temperature limit trigger events, generally by lowering the clamps.
1166 * If we're at a critical limit, we clamp back to the lowest possible value
1167 * to prevent emergency shutdown.
1168 */
1169static irqreturn_t ips_irq_handler(int irq, void *arg)
1170{
1171        struct ips_driver *ips = arg;
1172        u8 tses = thm_readb(THM_TSES);
1173        u8 tes = thm_readb(THM_TES);
1174
1175        if (!tses && !tes)
1176                return IRQ_NONE;
1177
1178        dev_info(&ips->dev->dev, "TSES: 0x%02x\n", tses);
1179        dev_info(&ips->dev->dev, "TES: 0x%02x\n", tes);
1180
1181        /* STS update from EC? */
1182        if (tes & 1) {
1183                u32 sts, tc1;
1184
1185                sts = thm_readl(THM_STS);
1186                tc1 = thm_readl(THM_TC1);
1187
1188                if (sts & STS_NVV) {
1189                        spin_lock(&ips->turbo_status_lock);
1190                        ips->core_power_limit = (sts & STS_PCPL_MASK) >>
1191                                STS_PCPL_SHIFT;
1192                        ips->mch_power_limit = (sts & STS_GPL_MASK) >>
1193                                STS_GPL_SHIFT;
1194                        /* ignore EC CPU vs GPU pref */
1195                        ips->cpu_turbo_enabled = !(sts & STS_PCTD_DIS);
1196                        /* 
1197                         * Disable turbo for now, until we can figure
1198                         * out why the power figures are wrong
1199                         */
1200                        ips->cpu_turbo_enabled = false;
1201                        if (ips->gpu_busy)
1202                                ips->gpu_turbo_enabled = !(sts & STS_GTD_DIS);
1203                        ips->mcp_temp_limit = (sts & STS_PTL_MASK) >>
1204                                STS_PTL_SHIFT;
1205                        ips->mcp_power_limit = (tc1 & STS_PPL_MASK) >>
1206                                STS_PPL_SHIFT;
1207                        verify_limits(ips);
1208                        spin_unlock(&ips->turbo_status_lock);
1209
1210                        thm_writeb(THM_SEC, SEC_ACK);
1211                }
1212                thm_writeb(THM_TES, tes);
1213        }
1214
1215        /* Thermal trip */
1216        if (tses) {
1217                dev_warn(&ips->dev->dev,
1218                         "thermal trip occurred, tses: 0x%04x\n", tses);
1219                thm_writeb(THM_TSES, tses);
1220        }
1221
1222        return IRQ_HANDLED;
1223}
1224
1225#ifndef CONFIG_DEBUG_FS
1226static void ips_debugfs_init(struct ips_driver *ips) { return; }
1227static void ips_debugfs_cleanup(struct ips_driver *ips) { return; }
1228#else
1229
1230/* Expose current state and limits in debugfs if possible */
1231
1232struct ips_debugfs_node {
1233        struct ips_driver *ips;
1234        char *name;
1235        int (*show)(struct seq_file *m, void *data);
1236};
1237
1238static int show_cpu_temp(struct seq_file *m, void *data)
1239{
1240        struct ips_driver *ips = m->private;
1241
1242        seq_printf(m, "%d.%02d\n", ips->ctv1_avg_temp / 100,
1243                   ips->ctv1_avg_temp % 100);
1244
1245        return 0;
1246}
1247
1248static int show_cpu_power(struct seq_file *m, void *data)
1249{
1250        struct ips_driver *ips = m->private;
1251
1252        seq_printf(m, "%dmW\n", ips->cpu_avg_power);
1253
1254        return 0;
1255}
1256
1257static int show_cpu_clamp(struct seq_file *m, void *data)
1258{
1259        u64 turbo_override;
1260        int tdp, tdc;
1261
1262        rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
1263
1264        tdp = (int)(turbo_override & TURBO_TDP_MASK);
1265        tdc = (int)((turbo_override & TURBO_TDC_MASK) >> TURBO_TDC_SHIFT);
1266
1267        /* Convert to .1W/A units */
1268        tdp = tdp * 10 / 8;
1269        tdc = tdc * 10 / 8;
1270
1271        /* Watts Amperes */
1272        seq_printf(m, "%d.%dW %d.%dA\n", tdp / 10, tdp % 10,
1273                   tdc / 10, tdc % 10);
1274
1275        return 0;
1276}
1277
1278static int show_mch_temp(struct seq_file *m, void *data)
1279{
1280        struct ips_driver *ips = m->private;
1281
1282        seq_printf(m, "%d.%02d\n", ips->mch_avg_temp / 100,
1283                   ips->mch_avg_temp % 100);
1284
1285        return 0;
1286}
1287
1288static int show_mch_power(struct seq_file *m, void *data)
1289{
1290        struct ips_driver *ips = m->private;
1291
1292        seq_printf(m, "%dmW\n", ips->mch_avg_power);
1293
1294        return 0;
1295}
1296
1297static struct ips_debugfs_node ips_debug_files[] = {
1298        { NULL, "cpu_temp", show_cpu_temp },
1299        { NULL, "cpu_power", show_cpu_power },
1300        { NULL, "cpu_clamp", show_cpu_clamp },
1301        { NULL, "mch_temp", show_mch_temp },
1302        { NULL, "mch_power", show_mch_power },
1303};
1304
1305static int ips_debugfs_open(struct inode *inode, struct file *file)
1306{
1307        struct ips_debugfs_node *node = inode->i_private;
1308
1309        return single_open(file, node->show, node->ips);
1310}
1311
1312static const struct file_operations ips_debugfs_ops = {
1313        .owner = THIS_MODULE,
1314        .open = ips_debugfs_open,
1315        .read = seq_read,
1316        .llseek = seq_lseek,
1317        .release = single_release,
1318};
1319
1320static void ips_debugfs_cleanup(struct ips_driver *ips)
1321{
1322        if (ips->debug_root)
1323                debugfs_remove_recursive(ips->debug_root);
1324        return;
1325}
1326
1327static void ips_debugfs_init(struct ips_driver *ips)
1328{
1329        int i;
1330
1331        ips->debug_root = debugfs_create_dir("ips", NULL);
1332        if (!ips->debug_root) {
1333                dev_err(&ips->dev->dev,
1334                        "failed to create debugfs entries: %ld\n",
1335                        PTR_ERR(ips->debug_root));
1336                return;
1337        }
1338
1339        for (i = 0; i < ARRAY_SIZE(ips_debug_files); i++) {
1340                struct dentry *ent;
1341                struct ips_debugfs_node *node = &ips_debug_files[i];
1342
1343                node->ips = ips;
1344                ent = debugfs_create_file(node->name, S_IFREG | S_IRUGO,
1345                                          ips->debug_root, node,
1346                                          &ips_debugfs_ops);
1347                if (!ent) {
1348                        dev_err(&ips->dev->dev,
1349                                "failed to create debug file: %ld\n",
1350                                PTR_ERR(ent));
1351                        goto err_cleanup;
1352                }
1353        }
1354
1355        return;
1356
1357err_cleanup:
1358        ips_debugfs_cleanup(ips);
1359        return;
1360}
1361#endif /* CONFIG_DEBUG_FS */
1362
1363/**
1364 * ips_detect_cpu - detect whether CPU supports IPS
1365 *
1366 * Walk our list and see if we're on a supported CPU.  If we find one,
1367 * return the limits for it.
1368 */
1369static struct ips_mcp_limits *ips_detect_cpu(struct ips_driver *ips)
1370{
1371        u64 turbo_power, misc_en;
1372        struct ips_mcp_limits *limits = NULL;
1373        u16 tdp;
1374
1375        if (!(boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 37)) {
1376                dev_info(&ips->dev->dev, "Non-IPS CPU detected.\n");
1377                goto out;
1378        }
1379
1380        rdmsrl(IA32_MISC_ENABLE, misc_en);
1381        /*
1382         * If the turbo enable bit isn't set, we shouldn't try to enable/disable
1383         * turbo manually or we'll get an illegal MSR access, even though
1384         * turbo will still be available.
1385         */
1386        if (misc_en & IA32_MISC_TURBO_EN)
1387                ips->turbo_toggle_allowed = true;
1388        else
1389                ips->turbo_toggle_allowed = false;
1390
1391        if (strstr(boot_cpu_data.x86_model_id, "CPU       M"))
1392                limits = &ips_sv_limits;
1393        else if (strstr(boot_cpu_data.x86_model_id, "CPU       L"))
1394                limits = &ips_lv_limits;
1395        else if (strstr(boot_cpu_data.x86_model_id, "CPU       U"))
1396                limits = &ips_ulv_limits;
1397        else {
1398                dev_info(&ips->dev->dev, "No CPUID match found.\n");
1399                goto out;
1400        }
1401
1402        rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_power);
1403        tdp = turbo_power & TURBO_TDP_MASK;
1404
1405        /* Sanity check TDP against CPU */
1406        if (limits->core_power_limit != (tdp / 8) * 1000) {
1407                dev_info(&ips->dev->dev, "CPU TDP doesn't match expected value (found %d, expected %d)\n",
1408                         tdp / 8, limits->core_power_limit / 1000);
1409                limits->core_power_limit = (tdp / 8) * 1000;
1410        }
1411
1412out:
1413        return limits;
1414}
1415
1416/**
1417 * ips_get_i915_syms - try to get GPU control methods from i915 driver
1418 * @ips: IPS driver
1419 *
1420 * The i915 driver exports several interfaces to allow the IPS driver to
1421 * monitor and control graphics turbo mode.  If we can find them, we can
1422 * enable graphics turbo, otherwise we must disable it to avoid exceeding
1423 * thermal and power limits in the MCP.
1424 */
1425static bool ips_get_i915_syms(struct ips_driver *ips)
1426{
1427        ips->read_mch_val = symbol_get(i915_read_mch_val);
1428        if (!ips->read_mch_val)
1429                goto out_err;
1430        ips->gpu_raise = symbol_get(i915_gpu_raise);
1431        if (!ips->gpu_raise)
1432                goto out_put_mch;
1433        ips->gpu_lower = symbol_get(i915_gpu_lower);
1434        if (!ips->gpu_lower)
1435                goto out_put_raise;
1436        ips->gpu_busy = symbol_get(i915_gpu_busy);
1437        if (!ips->gpu_busy)
1438                goto out_put_lower;
1439        ips->gpu_turbo_disable = symbol_get(i915_gpu_turbo_disable);
1440        if (!ips->gpu_turbo_disable)
1441                goto out_put_busy;
1442
1443        return true;
1444
1445out_put_busy:
1446        symbol_put(i915_gpu_busy);
1447out_put_lower:
1448        symbol_put(i915_gpu_lower);
1449out_put_raise:
1450        symbol_put(i915_gpu_raise);
1451out_put_mch:
1452        symbol_put(i915_read_mch_val);
1453out_err:
1454        return false;
1455}
1456
1457static bool
1458ips_gpu_turbo_enabled(struct ips_driver *ips)
1459{
1460        if (!ips->gpu_busy && late_i915_load) {
1461                if (ips_get_i915_syms(ips)) {
1462                        dev_info(&ips->dev->dev,
1463                                 "i915 driver attached, reenabling gpu turbo\n");
1464                        ips->gpu_turbo_enabled = !(thm_readl(THM_HTS) & HTS_GTD_DIS);
1465                }
1466        }
1467
1468        return ips->gpu_turbo_enabled;
1469}
1470
1471void
1472ips_link_to_i915_driver(void)
1473{
1474        /* We can't cleanly get at the various ips_driver structs from
1475         * this caller (the i915 driver), so just set a flag saying
1476         * that it's time to try getting the symbols again.
1477         */
1478        late_i915_load = true;
1479}
1480EXPORT_SYMBOL_GPL(ips_link_to_i915_driver);
1481
1482static const struct pci_device_id ips_id_table[] = {
1483        { PCI_DEVICE(PCI_VENDOR_ID_INTEL,
1484                     PCI_DEVICE_ID_INTEL_THERMAL_SENSOR), },
1485        { 0, }
1486};
1487
1488MODULE_DEVICE_TABLE(pci, ips_id_table);
1489
1490static int ips_blacklist_callback(const struct dmi_system_id *id)
1491{
1492        pr_info("Blacklisted intel_ips for %s\n", id->ident);
1493        return 1;
1494}
1495
1496static const struct dmi_system_id ips_blacklist[] = {
1497        {
1498                .callback = ips_blacklist_callback,
1499                .ident = "HP ProBook",
1500                .matches = {
1501                        DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
1502                        DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook"),
1503                },
1504        },
1505        { }     /* terminating entry */
1506};
1507
1508static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id)
1509{
1510        u64 platform_info;
1511        struct ips_driver *ips;
1512        u32 hts;
1513        int ret = 0;
1514        u16 htshi, trc, trc_required_mask;
1515        u8 tse;
1516
1517        if (dmi_check_system(ips_blacklist))
1518                return -ENODEV;
1519
1520        ips = kzalloc(sizeof(struct ips_driver), GFP_KERNEL);
1521        if (!ips)
1522                return -ENOMEM;
1523
1524        pci_set_drvdata(dev, ips);
1525        ips->dev = dev;
1526
1527        ips->limits = ips_detect_cpu(ips);
1528        if (!ips->limits) {
1529                dev_info(&dev->dev, "IPS not supported on this CPU\n");
1530                ret = -ENXIO;
1531                goto error_free;
1532        }
1533
1534        spin_lock_init(&ips->turbo_status_lock);
1535
1536        ret = pci_enable_device(dev);
1537        if (ret) {
1538                dev_err(&dev->dev, "can't enable PCI device, aborting\n");
1539                goto error_free;
1540        }
1541
1542        if (!pci_resource_start(dev, 0)) {
1543                dev_err(&dev->dev, "TBAR not assigned, aborting\n");
1544                ret = -ENXIO;
1545                goto error_free;
1546        }
1547
1548        ret = pci_request_regions(dev, "ips thermal sensor");
1549        if (ret) {
1550                dev_err(&dev->dev, "thermal resource busy, aborting\n");
1551                goto error_free;
1552        }
1553
1554
1555        ips->regmap = ioremap(pci_resource_start(dev, 0),
1556                              pci_resource_len(dev, 0));
1557        if (!ips->regmap) {
1558                dev_err(&dev->dev, "failed to map thermal regs, aborting\n");
1559                ret = -EBUSY;
1560                goto error_release;
1561        }
1562
1563        tse = thm_readb(THM_TSE);
1564        if (tse != TSE_EN) {
1565                dev_err(&dev->dev, "thermal device not enabled (0x%02x), aborting\n", tse);
1566                ret = -ENXIO;
1567                goto error_unmap;
1568        }
1569
1570        trc = thm_readw(THM_TRC);
1571        trc_required_mask = TRC_CORE1_EN | TRC_CORE_PWR | TRC_MCH_EN;
1572        if ((trc & trc_required_mask) != trc_required_mask) {
1573                dev_err(&dev->dev, "thermal reporting for required devices not enabled, aborting\n");
1574                ret = -ENXIO;
1575                goto error_unmap;
1576        }
1577
1578        if (trc & TRC_CORE2_EN)
1579                ips->second_cpu = true;
1580
1581        update_turbo_limits(ips);
1582        dev_dbg(&dev->dev, "max cpu power clamp: %dW\n",
1583                ips->mcp_power_limit / 10);
1584        dev_dbg(&dev->dev, "max core power clamp: %dW\n",
1585                ips->core_power_limit / 10);
1586        /* BIOS may update limits at runtime */
1587        if (thm_readl(THM_PSC) & PSP_PBRT)
1588                ips->poll_turbo_status = true;
1589
1590        if (!ips_get_i915_syms(ips)) {
1591                dev_info(&dev->dev, "failed to get i915 symbols, graphics turbo disabled until i915 loads\n");
1592                ips->gpu_turbo_enabled = false;
1593        } else {
1594                dev_dbg(&dev->dev, "graphics turbo enabled\n");
1595                ips->gpu_turbo_enabled = true;
1596        }
1597
1598        /*
1599         * Check PLATFORM_INFO MSR to make sure this chip is
1600         * turbo capable.
1601         */
1602        rdmsrl(PLATFORM_INFO, platform_info);
1603        if (!(platform_info & PLATFORM_TDP)) {
1604                dev_err(&dev->dev, "platform indicates TDP override unavailable, aborting\n");
1605                ret = -ENODEV;
1606                goto error_unmap;
1607        }
1608
1609        /*
1610         * IRQ handler for ME interaction
1611         * Note: don't use MSI here as the PCH has bugs.
1612         */
1613        pci_disable_msi(dev);
1614        ret = request_irq(dev->irq, ips_irq_handler, IRQF_SHARED, "ips",
1615                          ips);
1616        if (ret) {
1617                dev_err(&dev->dev, "request irq failed, aborting\n");
1618                goto error_unmap;
1619        }
1620
1621        /* Enable aux, hot & critical interrupts */
1622        thm_writeb(THM_TSPIEN, TSPIEN_AUX2_LOHI | TSPIEN_CRIT_LOHI |
1623                   TSPIEN_HOT_LOHI | TSPIEN_AUX_LOHI);
1624        thm_writeb(THM_TEN, TEN_UPDATE_EN);
1625
1626        /* Collect adjustment values */
1627        ips->cta_val = thm_readw(THM_CTA);
1628        ips->pta_val = thm_readw(THM_PTA);
1629        ips->mgta_val = thm_readw(THM_MGTA);
1630
1631        /* Save turbo limits & ratios */
1632        rdmsrl(TURBO_POWER_CURRENT_LIMIT, ips->orig_turbo_limit);
1633
1634        ips_disable_cpu_turbo(ips);
1635        ips->cpu_turbo_enabled = false;
1636
1637        /* Create thermal adjust thread */
1638        ips->adjust = kthread_create(ips_adjust, ips, "ips-adjust");
1639        if (IS_ERR(ips->adjust)) {
1640                dev_err(&dev->dev,
1641                        "failed to create thermal adjust thread, aborting\n");
1642                ret = -ENOMEM;
1643                goto error_free_irq;
1644
1645        }
1646
1647        /*
1648         * Set up the work queue and monitor thread. The monitor thread
1649         * will wake up ips_adjust thread.
1650         */
1651        ips->monitor = kthread_run(ips_monitor, ips, "ips-monitor");
1652        if (IS_ERR(ips->monitor)) {
1653                dev_err(&dev->dev,
1654                        "failed to create thermal monitor thread, aborting\n");
1655                ret = -ENOMEM;
1656                goto error_thread_cleanup;
1657        }
1658
1659        hts = (ips->core_power_limit << HTS_PCPL_SHIFT) |
1660                (ips->mcp_temp_limit << HTS_PTL_SHIFT) | HTS_NVV;
1661        htshi = HTS2_PRST_RUNNING << HTS2_PRST_SHIFT;
1662
1663        thm_writew(THM_HTSHI, htshi);
1664        thm_writel(THM_HTS, hts);
1665
1666        ips_debugfs_init(ips);
1667
1668        dev_info(&dev->dev, "IPS driver initialized, MCP temp limit %d\n",
1669                 ips->mcp_temp_limit);
1670        return ret;
1671
1672error_thread_cleanup:
1673        kthread_stop(ips->adjust);
1674error_free_irq:
1675        free_irq(ips->dev->irq, ips);
1676error_unmap:
1677        iounmap(ips->regmap);
1678error_release:
1679        pci_release_regions(dev);
1680error_free:
1681        kfree(ips);
1682        return ret;
1683}
1684
1685static void ips_remove(struct pci_dev *dev)
1686{
1687        struct ips_driver *ips = pci_get_drvdata(dev);
1688        u64 turbo_override;
1689
1690        if (!ips)
1691                return;
1692
1693        ips_debugfs_cleanup(ips);
1694
1695        /* Release i915 driver */
1696        if (ips->read_mch_val)
1697                symbol_put(i915_read_mch_val);
1698        if (ips->gpu_raise)
1699                symbol_put(i915_gpu_raise);
1700        if (ips->gpu_lower)
1701                symbol_put(i915_gpu_lower);
1702        if (ips->gpu_busy)
1703                symbol_put(i915_gpu_busy);
1704        if (ips->gpu_turbo_disable)
1705                symbol_put(i915_gpu_turbo_disable);
1706
1707        rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
1708        turbo_override &= ~(TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN);
1709        wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
1710        wrmsrl(TURBO_POWER_CURRENT_LIMIT, ips->orig_turbo_limit);
1711
1712        free_irq(ips->dev->irq, ips);
1713        if (ips->adjust)
1714                kthread_stop(ips->adjust);
1715        if (ips->monitor)
1716                kthread_stop(ips->monitor);
1717        iounmap(ips->regmap);
1718        pci_release_regions(dev);
1719        kfree(ips);
1720        dev_dbg(&dev->dev, "IPS driver removed\n");
1721}
1722
1723static void ips_shutdown(struct pci_dev *dev)
1724{
1725}
1726
1727static struct pci_driver ips_pci_driver = {
1728        .name = "intel ips",
1729        .id_table = ips_id_table,
1730        .probe = ips_probe,
1731        .remove = ips_remove,
1732        .shutdown = ips_shutdown,
1733};
1734
1735module_pci_driver(ips_pci_driver);
1736
1737MODULE_LICENSE("GPL");
1738MODULE_AUTHOR("Jesse Barnes <jbarnes@virtuousgeek.org>");
1739MODULE_DESCRIPTION("Intelligent Power Sharing Driver");
1740