linux/drivers/idle/intel_idle.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * intel_idle.c - native hardware idle loop for modern Intel processors
   4 *
   5 * Copyright (c) 2013 - 2020, Intel Corporation.
   6 * Len Brown <len.brown@intel.com>
   7 * Rafael J. Wysocki <rafael.j.wysocki@intel.com>
   8 */
   9
  10/*
  11 * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT
  12 * in lieu of the legacy ACPI processor_idle driver.  The intent is to
  13 * make Linux more efficient on these processors, as intel_idle knows
  14 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
  15 */
  16
  17/*
  18 * Design Assumptions
  19 *
  20 * All CPUs have same idle states as boot CPU
  21 *
  22 * Chipset BM_STS (bus master status) bit is a NOP
  23 *      for preventing entry into deep C-states
  24 *
  25 * CPU will flush caches as needed when entering a C-state via MWAIT
  26 *      (in contrast to entering ACPI C3, in which case the WBINVD
  27 *      instruction needs to be executed to flush the caches)
  28 */
  29
  30/*
  31 * Known limitations
  32 *
  33 * ACPI has a .suspend hack to turn off deep c-statees during suspend
  34 * to avoid complications with the lapic timer workaround.
  35 * Have not seen issues with suspend, but may need same workaround here.
  36 *
  37 */
  38
  39/* un-comment DEBUG to enable pr_debug() statements */
  40/* #define DEBUG */
  41
  42#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  43
  44#include <linux/acpi.h>
  45#include <linux/kernel.h>
  46#include <linux/cpuidle.h>
  47#include <linux/tick.h>
  48#include <trace/events/power.h>
  49#include <linux/sched.h>
  50#include <linux/notifier.h>
  51#include <linux/cpu.h>
  52#include <linux/moduleparam.h>
  53#include <asm/cpu_device_id.h>
  54#include <asm/intel-family.h>
  55#include <asm/mwait.h>
  56#include <asm/msr.h>
  57
  58#define INTEL_IDLE_VERSION "0.5.1"
  59
  60static struct cpuidle_driver intel_idle_driver = {
  61        .name = "intel_idle",
  62        .owner = THIS_MODULE,
  63};
  64/* intel_idle.max_cstate=0 disables driver */
  65static int max_cstate = CPUIDLE_STATE_MAX - 1;
  66static unsigned int disabled_states_mask;
  67
  68static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
  69
  70static unsigned long auto_demotion_disable_flags;
  71static bool disable_promotion_to_c1e;
  72
  73struct idle_cpu {
  74        struct cpuidle_state *state_table;
  75
  76        /*
  77         * Hardware C-state auto-demotion may not always be optimal.
  78         * Indicate which enable bits to clear here.
  79         */
  80        unsigned long auto_demotion_disable_flags;
  81        bool byt_auto_demotion_disable_flag;
  82        bool disable_promotion_to_c1e;
  83        bool use_acpi;
  84};
  85
  86static const struct idle_cpu *icpu __initdata;
  87static struct cpuidle_state *cpuidle_state_table __initdata;
  88
  89static unsigned int mwait_substates __initdata;
  90
  91/*
  92 * Enable this state by default even if the ACPI _CST does not list it.
  93 */
  94#define CPUIDLE_FLAG_ALWAYS_ENABLE      BIT(15)
  95
  96/*
  97 * MWAIT takes an 8-bit "hint" in EAX "suggesting"
  98 * the C-state (top nibble) and sub-state (bottom nibble)
  99 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
 100 *
 101 * We store the hint at the top of our "flags" for each state.
 102 */
 103#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
 104#define MWAIT2flg(eax) ((eax & 0xFF) << 24)
 105
 106/**
 107 * intel_idle - Ask the processor to enter the given idle state.
 108 * @dev: cpuidle device of the target CPU.
 109 * @drv: cpuidle driver (assumed to point to intel_idle_driver).
 110 * @index: Target idle state index.
 111 *
 112 * Use the MWAIT instruction to notify the processor that the CPU represented by
 113 * @dev is idle and it can try to enter the idle state corresponding to @index.
 114 *
 115 * If the local APIC timer is not known to be reliable in the target idle state,
 116 * enable one-shot tick broadcasting for the target CPU before executing MWAIT.
 117 *
 118 * Optionally call leave_mm() for the target CPU upfront to avoid wakeups due to
 119 * flushing user TLBs.
 120 *
 121 * Must be called under local_irq_disable().
 122 */
 123static __cpuidle int intel_idle(struct cpuidle_device *dev,
 124                                struct cpuidle_driver *drv, int index)
 125{
 126        struct cpuidle_state *state = &drv->states[index];
 127        unsigned long eax = flg2MWAIT(state->flags);
 128        unsigned long ecx = 1; /* break on interrupt flag */
 129
 130        mwait_idle_with_hints(eax, ecx);
 131
 132        return index;
 133}
 134
 135/**
 136 * intel_idle_s2idle - Ask the processor to enter the given idle state.
 137 * @dev: cpuidle device of the target CPU.
 138 * @drv: cpuidle driver (assumed to point to intel_idle_driver).
 139 * @index: Target idle state index.
 140 *
 141 * Use the MWAIT instruction to notify the processor that the CPU represented by
 142 * @dev is idle and it can try to enter the idle state corresponding to @index.
 143 *
 144 * Invoked as a suspend-to-idle callback routine with frozen user space, frozen
 145 * scheduler tick and suspended scheduler clock on the target CPU.
 146 */
 147static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
 148                                       struct cpuidle_driver *drv, int index)
 149{
 150        unsigned long eax = flg2MWAIT(drv->states[index].flags);
 151        unsigned long ecx = 1; /* break on interrupt flag */
 152
 153        mwait_idle_with_hints(eax, ecx);
 154
 155        return 0;
 156}
 157
 158/*
 159 * States are indexed by the cstate number,
 160 * which is also the index into the MWAIT hint array.
 161 * Thus C0 is a dummy.
 162 */
 163static struct cpuidle_state nehalem_cstates[] __initdata = {
 164        {
 165                .name = "C1",
 166                .desc = "MWAIT 0x00",
 167                .flags = MWAIT2flg(0x00),
 168                .exit_latency = 3,
 169                .target_residency = 6,
 170                .enter = &intel_idle,
 171                .enter_s2idle = intel_idle_s2idle, },
 172        {
 173                .name = "C1E",
 174                .desc = "MWAIT 0x01",
 175                .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 176                .exit_latency = 10,
 177                .target_residency = 20,
 178                .enter = &intel_idle,
 179                .enter_s2idle = intel_idle_s2idle, },
 180        {
 181                .name = "C3",
 182                .desc = "MWAIT 0x10",
 183                .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 184                .exit_latency = 20,
 185                .target_residency = 80,
 186                .enter = &intel_idle,
 187                .enter_s2idle = intel_idle_s2idle, },
 188        {
 189                .name = "C6",
 190                .desc = "MWAIT 0x20",
 191                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 192                .exit_latency = 200,
 193                .target_residency = 800,
 194                .enter = &intel_idle,
 195                .enter_s2idle = intel_idle_s2idle, },
 196        {
 197                .enter = NULL }
 198};
 199
 200static struct cpuidle_state snb_cstates[] __initdata = {
 201        {
 202                .name = "C1",
 203                .desc = "MWAIT 0x00",
 204                .flags = MWAIT2flg(0x00),
 205                .exit_latency = 2,
 206                .target_residency = 2,
 207                .enter = &intel_idle,
 208                .enter_s2idle = intel_idle_s2idle, },
 209        {
 210                .name = "C1E",
 211                .desc = "MWAIT 0x01",
 212                .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 213                .exit_latency = 10,
 214                .target_residency = 20,
 215                .enter = &intel_idle,
 216                .enter_s2idle = intel_idle_s2idle, },
 217        {
 218                .name = "C3",
 219                .desc = "MWAIT 0x10",
 220                .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 221                .exit_latency = 80,
 222                .target_residency = 211,
 223                .enter = &intel_idle,
 224                .enter_s2idle = intel_idle_s2idle, },
 225        {
 226                .name = "C6",
 227                .desc = "MWAIT 0x20",
 228                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 229                .exit_latency = 104,
 230                .target_residency = 345,
 231                .enter = &intel_idle,
 232                .enter_s2idle = intel_idle_s2idle, },
 233        {
 234                .name = "C7",
 235                .desc = "MWAIT 0x30",
 236                .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
 237                .exit_latency = 109,
 238                .target_residency = 345,
 239                .enter = &intel_idle,
 240                .enter_s2idle = intel_idle_s2idle, },
 241        {
 242                .enter = NULL }
 243};
 244
 245static struct cpuidle_state byt_cstates[] __initdata = {
 246        {
 247                .name = "C1",
 248                .desc = "MWAIT 0x00",
 249                .flags = MWAIT2flg(0x00),
 250                .exit_latency = 1,
 251                .target_residency = 1,
 252                .enter = &intel_idle,
 253                .enter_s2idle = intel_idle_s2idle, },
 254        {
 255                .name = "C6N",
 256                .desc = "MWAIT 0x58",
 257                .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
 258                .exit_latency = 300,
 259                .target_residency = 275,
 260                .enter = &intel_idle,
 261                .enter_s2idle = intel_idle_s2idle, },
 262        {
 263                .name = "C6S",
 264                .desc = "MWAIT 0x52",
 265                .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
 266                .exit_latency = 500,
 267                .target_residency = 560,
 268                .enter = &intel_idle,
 269                .enter_s2idle = intel_idle_s2idle, },
 270        {
 271                .name = "C7",
 272                .desc = "MWAIT 0x60",
 273                .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 274                .exit_latency = 1200,
 275                .target_residency = 4000,
 276                .enter = &intel_idle,
 277                .enter_s2idle = intel_idle_s2idle, },
 278        {
 279                .name = "C7S",
 280                .desc = "MWAIT 0x64",
 281                .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
 282                .exit_latency = 10000,
 283                .target_residency = 20000,
 284                .enter = &intel_idle,
 285                .enter_s2idle = intel_idle_s2idle, },
 286        {
 287                .enter = NULL }
 288};
 289
 290static struct cpuidle_state cht_cstates[] __initdata = {
 291        {
 292                .name = "C1",
 293                .desc = "MWAIT 0x00",
 294                .flags = MWAIT2flg(0x00),
 295                .exit_latency = 1,
 296                .target_residency = 1,
 297                .enter = &intel_idle,
 298                .enter_s2idle = intel_idle_s2idle, },
 299        {
 300                .name = "C6N",
 301                .desc = "MWAIT 0x58",
 302                .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
 303                .exit_latency = 80,
 304                .target_residency = 275,
 305                .enter = &intel_idle,
 306                .enter_s2idle = intel_idle_s2idle, },
 307        {
 308                .name = "C6S",
 309                .desc = "MWAIT 0x52",
 310                .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
 311                .exit_latency = 200,
 312                .target_residency = 560,
 313                .enter = &intel_idle,
 314                .enter_s2idle = intel_idle_s2idle, },
 315        {
 316                .name = "C7",
 317                .desc = "MWAIT 0x60",
 318                .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 319                .exit_latency = 1200,
 320                .target_residency = 4000,
 321                .enter = &intel_idle,
 322                .enter_s2idle = intel_idle_s2idle, },
 323        {
 324                .name = "C7S",
 325                .desc = "MWAIT 0x64",
 326                .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
 327                .exit_latency = 10000,
 328                .target_residency = 20000,
 329                .enter = &intel_idle,
 330                .enter_s2idle = intel_idle_s2idle, },
 331        {
 332                .enter = NULL }
 333};
 334
 335static struct cpuidle_state ivb_cstates[] __initdata = {
 336        {
 337                .name = "C1",
 338                .desc = "MWAIT 0x00",
 339                .flags = MWAIT2flg(0x00),
 340                .exit_latency = 1,
 341                .target_residency = 1,
 342                .enter = &intel_idle,
 343                .enter_s2idle = intel_idle_s2idle, },
 344        {
 345                .name = "C1E",
 346                .desc = "MWAIT 0x01",
 347                .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 348                .exit_latency = 10,
 349                .target_residency = 20,
 350                .enter = &intel_idle,
 351                .enter_s2idle = intel_idle_s2idle, },
 352        {
 353                .name = "C3",
 354                .desc = "MWAIT 0x10",
 355                .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 356                .exit_latency = 59,
 357                .target_residency = 156,
 358                .enter = &intel_idle,
 359                .enter_s2idle = intel_idle_s2idle, },
 360        {
 361                .name = "C6",
 362                .desc = "MWAIT 0x20",
 363                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 364                .exit_latency = 80,
 365                .target_residency = 300,
 366                .enter = &intel_idle,
 367                .enter_s2idle = intel_idle_s2idle, },
 368        {
 369                .name = "C7",
 370                .desc = "MWAIT 0x30",
 371                .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
 372                .exit_latency = 87,
 373                .target_residency = 300,
 374                .enter = &intel_idle,
 375                .enter_s2idle = intel_idle_s2idle, },
 376        {
 377                .enter = NULL }
 378};
 379
 380static struct cpuidle_state ivt_cstates[] __initdata = {
 381        {
 382                .name = "C1",
 383                .desc = "MWAIT 0x00",
 384                .flags = MWAIT2flg(0x00),
 385                .exit_latency = 1,
 386                .target_residency = 1,
 387                .enter = &intel_idle,
 388                .enter_s2idle = intel_idle_s2idle, },
 389        {
 390                .name = "C1E",
 391                .desc = "MWAIT 0x01",
 392                .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 393                .exit_latency = 10,
 394                .target_residency = 80,
 395                .enter = &intel_idle,
 396                .enter_s2idle = intel_idle_s2idle, },
 397        {
 398                .name = "C3",
 399                .desc = "MWAIT 0x10",
 400                .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 401                .exit_latency = 59,
 402                .target_residency = 156,
 403                .enter = &intel_idle,
 404                .enter_s2idle = intel_idle_s2idle, },
 405        {
 406                .name = "C6",
 407                .desc = "MWAIT 0x20",
 408                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 409                .exit_latency = 82,
 410                .target_residency = 300,
 411                .enter = &intel_idle,
 412                .enter_s2idle = intel_idle_s2idle, },
 413        {
 414                .enter = NULL }
 415};
 416
 417static struct cpuidle_state ivt_cstates_4s[] __initdata = {
 418        {
 419                .name = "C1",
 420                .desc = "MWAIT 0x00",
 421                .flags = MWAIT2flg(0x00),
 422                .exit_latency = 1,
 423                .target_residency = 1,
 424                .enter = &intel_idle,
 425                .enter_s2idle = intel_idle_s2idle, },
 426        {
 427                .name = "C1E",
 428                .desc = "MWAIT 0x01",
 429                .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 430                .exit_latency = 10,
 431                .target_residency = 250,
 432                .enter = &intel_idle,
 433                .enter_s2idle = intel_idle_s2idle, },
 434        {
 435                .name = "C3",
 436                .desc = "MWAIT 0x10",
 437                .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 438                .exit_latency = 59,
 439                .target_residency = 300,
 440                .enter = &intel_idle,
 441                .enter_s2idle = intel_idle_s2idle, },
 442        {
 443                .name = "C6",
 444                .desc = "MWAIT 0x20",
 445                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 446                .exit_latency = 84,
 447                .target_residency = 400,
 448                .enter = &intel_idle,
 449                .enter_s2idle = intel_idle_s2idle, },
 450        {
 451                .enter = NULL }
 452};
 453
 454static struct cpuidle_state ivt_cstates_8s[] __initdata = {
 455        {
 456                .name = "C1",
 457                .desc = "MWAIT 0x00",
 458                .flags = MWAIT2flg(0x00),
 459                .exit_latency = 1,
 460                .target_residency = 1,
 461                .enter = &intel_idle,
 462                .enter_s2idle = intel_idle_s2idle, },
 463        {
 464                .name = "C1E",
 465                .desc = "MWAIT 0x01",
 466                .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 467                .exit_latency = 10,
 468                .target_residency = 500,
 469                .enter = &intel_idle,
 470                .enter_s2idle = intel_idle_s2idle, },
 471        {
 472                .name = "C3",
 473                .desc = "MWAIT 0x10",
 474                .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 475                .exit_latency = 59,
 476                .target_residency = 600,
 477                .enter = &intel_idle,
 478                .enter_s2idle = intel_idle_s2idle, },
 479        {
 480                .name = "C6",
 481                .desc = "MWAIT 0x20",
 482                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 483                .exit_latency = 88,
 484                .target_residency = 700,
 485                .enter = &intel_idle,
 486                .enter_s2idle = intel_idle_s2idle, },
 487        {
 488                .enter = NULL }
 489};
 490
 491static struct cpuidle_state hsw_cstates[] __initdata = {
 492        {
 493                .name = "C1",
 494                .desc = "MWAIT 0x00",
 495                .flags = MWAIT2flg(0x00),
 496                .exit_latency = 2,
 497                .target_residency = 2,
 498                .enter = &intel_idle,
 499                .enter_s2idle = intel_idle_s2idle, },
 500        {
 501                .name = "C1E",
 502                .desc = "MWAIT 0x01",
 503                .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 504                .exit_latency = 10,
 505                .target_residency = 20,
 506                .enter = &intel_idle,
 507                .enter_s2idle = intel_idle_s2idle, },
 508        {
 509                .name = "C3",
 510                .desc = "MWAIT 0x10",
 511                .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 512                .exit_latency = 33,
 513                .target_residency = 100,
 514                .enter = &intel_idle,
 515                .enter_s2idle = intel_idle_s2idle, },
 516        {
 517                .name = "C6",
 518                .desc = "MWAIT 0x20",
 519                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 520                .exit_latency = 133,
 521                .target_residency = 400,
 522                .enter = &intel_idle,
 523                .enter_s2idle = intel_idle_s2idle, },
 524        {
 525                .name = "C7s",
 526                .desc = "MWAIT 0x32",
 527                .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
 528                .exit_latency = 166,
 529                .target_residency = 500,
 530                .enter = &intel_idle,
 531                .enter_s2idle = intel_idle_s2idle, },
 532        {
 533                .name = "C8",
 534                .desc = "MWAIT 0x40",
 535                .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 536                .exit_latency = 300,
 537                .target_residency = 900,
 538                .enter = &intel_idle,
 539                .enter_s2idle = intel_idle_s2idle, },
 540        {
 541                .name = "C9",
 542                .desc = "MWAIT 0x50",
 543                .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 544                .exit_latency = 600,
 545                .target_residency = 1800,
 546                .enter = &intel_idle,
 547                .enter_s2idle = intel_idle_s2idle, },
 548        {
 549                .name = "C10",
 550                .desc = "MWAIT 0x60",
 551                .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 552                .exit_latency = 2600,
 553                .target_residency = 7700,
 554                .enter = &intel_idle,
 555                .enter_s2idle = intel_idle_s2idle, },
 556        {
 557                .enter = NULL }
 558};
 559static struct cpuidle_state bdw_cstates[] __initdata = {
 560        {
 561                .name = "C1",
 562                .desc = "MWAIT 0x00",
 563                .flags = MWAIT2flg(0x00),
 564                .exit_latency = 2,
 565                .target_residency = 2,
 566                .enter = &intel_idle,
 567                .enter_s2idle = intel_idle_s2idle, },
 568        {
 569                .name = "C1E",
 570                .desc = "MWAIT 0x01",
 571                .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 572                .exit_latency = 10,
 573                .target_residency = 20,
 574                .enter = &intel_idle,
 575                .enter_s2idle = intel_idle_s2idle, },
 576        {
 577                .name = "C3",
 578                .desc = "MWAIT 0x10",
 579                .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 580                .exit_latency = 40,
 581                .target_residency = 100,
 582                .enter = &intel_idle,
 583                .enter_s2idle = intel_idle_s2idle, },
 584        {
 585                .name = "C6",
 586                .desc = "MWAIT 0x20",
 587                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 588                .exit_latency = 133,
 589                .target_residency = 400,
 590                .enter = &intel_idle,
 591                .enter_s2idle = intel_idle_s2idle, },
 592        {
 593                .name = "C7s",
 594                .desc = "MWAIT 0x32",
 595                .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
 596                .exit_latency = 166,
 597                .target_residency = 500,
 598                .enter = &intel_idle,
 599                .enter_s2idle = intel_idle_s2idle, },
 600        {
 601                .name = "C8",
 602                .desc = "MWAIT 0x40",
 603                .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 604                .exit_latency = 300,
 605                .target_residency = 900,
 606                .enter = &intel_idle,
 607                .enter_s2idle = intel_idle_s2idle, },
 608        {
 609                .name = "C9",
 610                .desc = "MWAIT 0x50",
 611                .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 612                .exit_latency = 600,
 613                .target_residency = 1800,
 614                .enter = &intel_idle,
 615                .enter_s2idle = intel_idle_s2idle, },
 616        {
 617                .name = "C10",
 618                .desc = "MWAIT 0x60",
 619                .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 620                .exit_latency = 2600,
 621                .target_residency = 7700,
 622                .enter = &intel_idle,
 623                .enter_s2idle = intel_idle_s2idle, },
 624        {
 625                .enter = NULL }
 626};
 627
 628static struct cpuidle_state skl_cstates[] __initdata = {
 629        {
 630                .name = "C1",
 631                .desc = "MWAIT 0x00",
 632                .flags = MWAIT2flg(0x00),
 633                .exit_latency = 2,
 634                .target_residency = 2,
 635                .enter = &intel_idle,
 636                .enter_s2idle = intel_idle_s2idle, },
 637        {
 638                .name = "C1E",
 639                .desc = "MWAIT 0x01",
 640                .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 641                .exit_latency = 10,
 642                .target_residency = 20,
 643                .enter = &intel_idle,
 644                .enter_s2idle = intel_idle_s2idle, },
 645        {
 646                .name = "C3",
 647                .desc = "MWAIT 0x10",
 648                .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 649                .exit_latency = 70,
 650                .target_residency = 100,
 651                .enter = &intel_idle,
 652                .enter_s2idle = intel_idle_s2idle, },
 653        {
 654                .name = "C6",
 655                .desc = "MWAIT 0x20",
 656                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 657                .exit_latency = 85,
 658                .target_residency = 200,
 659                .enter = &intel_idle,
 660                .enter_s2idle = intel_idle_s2idle, },
 661        {
 662                .name = "C7s",
 663                .desc = "MWAIT 0x33",
 664                .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
 665                .exit_latency = 124,
 666                .target_residency = 800,
 667                .enter = &intel_idle,
 668                .enter_s2idle = intel_idle_s2idle, },
 669        {
 670                .name = "C8",
 671                .desc = "MWAIT 0x40",
 672                .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 673                .exit_latency = 200,
 674                .target_residency = 800,
 675                .enter = &intel_idle,
 676                .enter_s2idle = intel_idle_s2idle, },
 677        {
 678                .name = "C9",
 679                .desc = "MWAIT 0x50",
 680                .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 681                .exit_latency = 480,
 682                .target_residency = 5000,
 683                .enter = &intel_idle,
 684                .enter_s2idle = intel_idle_s2idle, },
 685        {
 686                .name = "C10",
 687                .desc = "MWAIT 0x60",
 688                .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 689                .exit_latency = 890,
 690                .target_residency = 5000,
 691                .enter = &intel_idle,
 692                .enter_s2idle = intel_idle_s2idle, },
 693        {
 694                .enter = NULL }
 695};
 696
 697static struct cpuidle_state skx_cstates[] __initdata = {
 698        {
 699                .name = "C1",
 700                .desc = "MWAIT 0x00",
 701                .flags = MWAIT2flg(0x00),
 702                .exit_latency = 2,
 703                .target_residency = 2,
 704                .enter = &intel_idle,
 705                .enter_s2idle = intel_idle_s2idle, },
 706        {
 707                .name = "C1E",
 708                .desc = "MWAIT 0x01",
 709                .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 710                .exit_latency = 10,
 711                .target_residency = 20,
 712                .enter = &intel_idle,
 713                .enter_s2idle = intel_idle_s2idle, },
 714        {
 715                .name = "C6",
 716                .desc = "MWAIT 0x20",
 717                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 718                .exit_latency = 133,
 719                .target_residency = 600,
 720                .enter = &intel_idle,
 721                .enter_s2idle = intel_idle_s2idle, },
 722        {
 723                .enter = NULL }
 724};
 725
 726static struct cpuidle_state icx_cstates[] __initdata = {
 727        {
 728                .name = "C1",
 729                .desc = "MWAIT 0x00",
 730                .flags = MWAIT2flg(0x00),
 731                .exit_latency = 1,
 732                .target_residency = 1,
 733                .enter = &intel_idle,
 734                .enter_s2idle = intel_idle_s2idle, },
 735        {
 736                .name = "C1E",
 737                .desc = "MWAIT 0x01",
 738                .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 739                .exit_latency = 4,
 740                .target_residency = 4,
 741                .enter = &intel_idle,
 742                .enter_s2idle = intel_idle_s2idle, },
 743        {
 744                .name = "C6",
 745                .desc = "MWAIT 0x20",
 746                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 747                .exit_latency = 170,
 748                .target_residency = 600,
 749                .enter = &intel_idle,
 750                .enter_s2idle = intel_idle_s2idle, },
 751        {
 752                .enter = NULL }
 753};
 754
 755static struct cpuidle_state atom_cstates[] __initdata = {
 756        {
 757                .name = "C1E",
 758                .desc = "MWAIT 0x00",
 759                .flags = MWAIT2flg(0x00),
 760                .exit_latency = 10,
 761                .target_residency = 20,
 762                .enter = &intel_idle,
 763                .enter_s2idle = intel_idle_s2idle, },
 764        {
 765                .name = "C2",
 766                .desc = "MWAIT 0x10",
 767                .flags = MWAIT2flg(0x10),
 768                .exit_latency = 20,
 769                .target_residency = 80,
 770                .enter = &intel_idle,
 771                .enter_s2idle = intel_idle_s2idle, },
 772        {
 773                .name = "C4",
 774                .desc = "MWAIT 0x30",
 775                .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
 776                .exit_latency = 100,
 777                .target_residency = 400,
 778                .enter = &intel_idle,
 779                .enter_s2idle = intel_idle_s2idle, },
 780        {
 781                .name = "C6",
 782                .desc = "MWAIT 0x52",
 783                .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
 784                .exit_latency = 140,
 785                .target_residency = 560,
 786                .enter = &intel_idle,
 787                .enter_s2idle = intel_idle_s2idle, },
 788        {
 789                .enter = NULL }
 790};
 791static struct cpuidle_state tangier_cstates[] __initdata = {
 792        {
 793                .name = "C1",
 794                .desc = "MWAIT 0x00",
 795                .flags = MWAIT2flg(0x00),
 796                .exit_latency = 1,
 797                .target_residency = 4,
 798                .enter = &intel_idle,
 799                .enter_s2idle = intel_idle_s2idle, },
 800        {
 801                .name = "C4",
 802                .desc = "MWAIT 0x30",
 803                .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
 804                .exit_latency = 100,
 805                .target_residency = 400,
 806                .enter = &intel_idle,
 807                .enter_s2idle = intel_idle_s2idle, },
 808        {
 809                .name = "C6",
 810                .desc = "MWAIT 0x52",
 811                .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
 812                .exit_latency = 140,
 813                .target_residency = 560,
 814                .enter = &intel_idle,
 815                .enter_s2idle = intel_idle_s2idle, },
 816        {
 817                .name = "C7",
 818                .desc = "MWAIT 0x60",
 819                .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 820                .exit_latency = 1200,
 821                .target_residency = 4000,
 822                .enter = &intel_idle,
 823                .enter_s2idle = intel_idle_s2idle, },
 824        {
 825                .name = "C9",
 826                .desc = "MWAIT 0x64",
 827                .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
 828                .exit_latency = 10000,
 829                .target_residency = 20000,
 830                .enter = &intel_idle,
 831                .enter_s2idle = intel_idle_s2idle, },
 832        {
 833                .enter = NULL }
 834};
 835static struct cpuidle_state avn_cstates[] __initdata = {
 836        {
 837                .name = "C1",
 838                .desc = "MWAIT 0x00",
 839                .flags = MWAIT2flg(0x00),
 840                .exit_latency = 2,
 841                .target_residency = 2,
 842                .enter = &intel_idle,
 843                .enter_s2idle = intel_idle_s2idle, },
 844        {
 845                .name = "C6",
 846                .desc = "MWAIT 0x51",
 847                .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
 848                .exit_latency = 15,
 849                .target_residency = 45,
 850                .enter = &intel_idle,
 851                .enter_s2idle = intel_idle_s2idle, },
 852        {
 853                .enter = NULL }
 854};
 855static struct cpuidle_state knl_cstates[] __initdata = {
 856        {
 857                .name = "C1",
 858                .desc = "MWAIT 0x00",
 859                .flags = MWAIT2flg(0x00),
 860                .exit_latency = 1,
 861                .target_residency = 2,
 862                .enter = &intel_idle,
 863                .enter_s2idle = intel_idle_s2idle },
 864        {
 865                .name = "C6",
 866                .desc = "MWAIT 0x10",
 867                .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 868                .exit_latency = 120,
 869                .target_residency = 500,
 870                .enter = &intel_idle,
 871                .enter_s2idle = intel_idle_s2idle },
 872        {
 873                .enter = NULL }
 874};
 875
 876static struct cpuidle_state bxt_cstates[] __initdata = {
 877        {
 878                .name = "C1",
 879                .desc = "MWAIT 0x00",
 880                .flags = MWAIT2flg(0x00),
 881                .exit_latency = 2,
 882                .target_residency = 2,
 883                .enter = &intel_idle,
 884                .enter_s2idle = intel_idle_s2idle, },
 885        {
 886                .name = "C1E",
 887                .desc = "MWAIT 0x01",
 888                .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 889                .exit_latency = 10,
 890                .target_residency = 20,
 891                .enter = &intel_idle,
 892                .enter_s2idle = intel_idle_s2idle, },
 893        {
 894                .name = "C6",
 895                .desc = "MWAIT 0x20",
 896                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 897                .exit_latency = 133,
 898                .target_residency = 133,
 899                .enter = &intel_idle,
 900                .enter_s2idle = intel_idle_s2idle, },
 901        {
 902                .name = "C7s",
 903                .desc = "MWAIT 0x31",
 904                .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
 905                .exit_latency = 155,
 906                .target_residency = 155,
 907                .enter = &intel_idle,
 908                .enter_s2idle = intel_idle_s2idle, },
 909        {
 910                .name = "C8",
 911                .desc = "MWAIT 0x40",
 912                .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 913                .exit_latency = 1000,
 914                .target_residency = 1000,
 915                .enter = &intel_idle,
 916                .enter_s2idle = intel_idle_s2idle, },
 917        {
 918                .name = "C9",
 919                .desc = "MWAIT 0x50",
 920                .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 921                .exit_latency = 2000,
 922                .target_residency = 2000,
 923                .enter = &intel_idle,
 924                .enter_s2idle = intel_idle_s2idle, },
 925        {
 926                .name = "C10",
 927                .desc = "MWAIT 0x60",
 928                .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 929                .exit_latency = 10000,
 930                .target_residency = 10000,
 931                .enter = &intel_idle,
 932                .enter_s2idle = intel_idle_s2idle, },
 933        {
 934                .enter = NULL }
 935};
 936
 937static struct cpuidle_state dnv_cstates[] __initdata = {
 938        {
 939                .name = "C1",
 940                .desc = "MWAIT 0x00",
 941                .flags = MWAIT2flg(0x00),
 942                .exit_latency = 2,
 943                .target_residency = 2,
 944                .enter = &intel_idle,
 945                .enter_s2idle = intel_idle_s2idle, },
 946        {
 947                .name = "C1E",
 948                .desc = "MWAIT 0x01",
 949                .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 950                .exit_latency = 10,
 951                .target_residency = 20,
 952                .enter = &intel_idle,
 953                .enter_s2idle = intel_idle_s2idle, },
 954        {
 955                .name = "C6",
 956                .desc = "MWAIT 0x20",
 957                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 958                .exit_latency = 50,
 959                .target_residency = 500,
 960                .enter = &intel_idle,
 961                .enter_s2idle = intel_idle_s2idle, },
 962        {
 963                .enter = NULL }
 964};
 965
 966/*
 967 * Note, depending on HW and FW revision, SnowRidge SoC may or may not support
 968 * C6, and this is indicated in the CPUID mwait leaf.
 969 */
 970static struct cpuidle_state snr_cstates[] __initdata = {
 971        {
 972                .name = "C1",
 973                .desc = "MWAIT 0x00",
 974                .flags = MWAIT2flg(0x00),
 975                .exit_latency = 2,
 976                .target_residency = 2,
 977                .enter = &intel_idle,
 978                .enter_s2idle = intel_idle_s2idle, },
 979        {
 980                .name = "C1E",
 981                .desc = "MWAIT 0x01",
 982                .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 983                .exit_latency = 15,
 984                .target_residency = 25,
 985                .enter = &intel_idle,
 986                .enter_s2idle = intel_idle_s2idle, },
 987        {
 988                .name = "C6",
 989                .desc = "MWAIT 0x20",
 990                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 991                .exit_latency = 130,
 992                .target_residency = 500,
 993                .enter = &intel_idle,
 994                .enter_s2idle = intel_idle_s2idle, },
 995        {
 996                .enter = NULL }
 997};
 998
 999static const struct idle_cpu idle_cpu_nehalem __initconst = {
1000        .state_table = nehalem_cstates,
1001        .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1002        .disable_promotion_to_c1e = true,
1003};
1004
1005static const struct idle_cpu idle_cpu_nhx __initconst = {
1006        .state_table = nehalem_cstates,
1007        .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1008        .disable_promotion_to_c1e = true,
1009        .use_acpi = true,
1010};
1011
1012static const struct idle_cpu idle_cpu_atom __initconst = {
1013        .state_table = atom_cstates,
1014};
1015
1016static const struct idle_cpu idle_cpu_tangier __initconst = {
1017        .state_table = tangier_cstates,
1018};
1019
1020static const struct idle_cpu idle_cpu_lincroft __initconst = {
1021        .state_table = atom_cstates,
1022        .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
1023};
1024
1025static const struct idle_cpu idle_cpu_snb __initconst = {
1026        .state_table = snb_cstates,
1027        .disable_promotion_to_c1e = true,
1028};
1029
1030static const struct idle_cpu idle_cpu_snx __initconst = {
1031        .state_table = snb_cstates,
1032        .disable_promotion_to_c1e = true,
1033        .use_acpi = true,
1034};
1035
1036static const struct idle_cpu idle_cpu_byt __initconst = {
1037        .state_table = byt_cstates,
1038        .disable_promotion_to_c1e = true,
1039        .byt_auto_demotion_disable_flag = true,
1040};
1041
1042static const struct idle_cpu idle_cpu_cht __initconst = {
1043        .state_table = cht_cstates,
1044        .disable_promotion_to_c1e = true,
1045        .byt_auto_demotion_disable_flag = true,
1046};
1047
1048static const struct idle_cpu idle_cpu_ivb __initconst = {
1049        .state_table = ivb_cstates,
1050        .disable_promotion_to_c1e = true,
1051};
1052
1053static const struct idle_cpu idle_cpu_ivt __initconst = {
1054        .state_table = ivt_cstates,
1055        .disable_promotion_to_c1e = true,
1056        .use_acpi = true,
1057};
1058
1059static const struct idle_cpu idle_cpu_hsw __initconst = {
1060        .state_table = hsw_cstates,
1061        .disable_promotion_to_c1e = true,
1062};
1063
1064static const struct idle_cpu idle_cpu_hsx __initconst = {
1065        .state_table = hsw_cstates,
1066        .disable_promotion_to_c1e = true,
1067        .use_acpi = true,
1068};
1069
1070static const struct idle_cpu idle_cpu_bdw __initconst = {
1071        .state_table = bdw_cstates,
1072        .disable_promotion_to_c1e = true,
1073};
1074
1075static const struct idle_cpu idle_cpu_bdx __initconst = {
1076        .state_table = bdw_cstates,
1077        .disable_promotion_to_c1e = true,
1078        .use_acpi = true,
1079};
1080
1081static const struct idle_cpu idle_cpu_skl __initconst = {
1082        .state_table = skl_cstates,
1083        .disable_promotion_to_c1e = true,
1084};
1085
1086static const struct idle_cpu idle_cpu_skx __initconst = {
1087        .state_table = skx_cstates,
1088        .disable_promotion_to_c1e = true,
1089        .use_acpi = true,
1090};
1091
1092static const struct idle_cpu idle_cpu_icx __initconst = {
1093        .state_table = icx_cstates,
1094        .disable_promotion_to_c1e = true,
1095        .use_acpi = true,
1096};
1097
1098static const struct idle_cpu idle_cpu_avn __initconst = {
1099        .state_table = avn_cstates,
1100        .disable_promotion_to_c1e = true,
1101        .use_acpi = true,
1102};
1103
1104static const struct idle_cpu idle_cpu_knl __initconst = {
1105        .state_table = knl_cstates,
1106        .use_acpi = true,
1107};
1108
1109static const struct idle_cpu idle_cpu_bxt __initconst = {
1110        .state_table = bxt_cstates,
1111        .disable_promotion_to_c1e = true,
1112};
1113
1114static const struct idle_cpu idle_cpu_dnv __initconst = {
1115        .state_table = dnv_cstates,
1116        .disable_promotion_to_c1e = true,
1117        .use_acpi = true,
1118};
1119
1120static const struct idle_cpu idle_cpu_snr __initconst = {
1121        .state_table = snr_cstates,
1122        .disable_promotion_to_c1e = true,
1123        .use_acpi = true,
1124};
1125
1126static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1127        X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,          &idle_cpu_nhx),
1128        X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,             &idle_cpu_nehalem),
1129        X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G,           &idle_cpu_nehalem),
1130        X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,            &idle_cpu_nehalem),
1131        X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,         &idle_cpu_nhx),
1132        X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,          &idle_cpu_nhx),
1133        X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL,        &idle_cpu_atom),
1134        X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID,    &idle_cpu_lincroft),
1135        X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,         &idle_cpu_nhx),
1136        X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,         &idle_cpu_snb),
1137        X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,       &idle_cpu_snx),
1138        X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL,       &idle_cpu_atom),
1139        X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT,     &idle_cpu_byt),
1140        X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier),
1141        X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT,        &idle_cpu_cht),
1142        X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,           &idle_cpu_ivb),
1143        X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,         &idle_cpu_ivt),
1144        X86_MATCH_INTEL_FAM6_MODEL(HASWELL,             &idle_cpu_hsw),
1145        X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,           &idle_cpu_hsx),
1146        X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,           &idle_cpu_hsw),
1147        X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,           &idle_cpu_hsw),
1148        X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D,   &idle_cpu_avn),
1149        X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,           &idle_cpu_bdw),
1150        X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,         &idle_cpu_bdw),
1151        X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,         &idle_cpu_bdx),
1152        X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,         &idle_cpu_bdx),
1153        X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,           &idle_cpu_skl),
1154        X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,             &idle_cpu_skl),
1155        X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,          &idle_cpu_skl),
1156        X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,            &idle_cpu_skl),
1157        X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,           &idle_cpu_skx),
1158        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,           &idle_cpu_icx),
1159        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,           &idle_cpu_icx),
1160        X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,        &idle_cpu_knl),
1161        X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,        &idle_cpu_knl),
1162        X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT,       &idle_cpu_bxt),
1163        X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS,  &idle_cpu_bxt),
1164        X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D,     &idle_cpu_dnv),
1165        X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,      &idle_cpu_snr),
1166        {}
1167};
1168
1169static const struct x86_cpu_id intel_mwait_ids[] __initconst = {
1170        X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL),
1171        {}
1172};
1173
1174static bool __init intel_idle_max_cstate_reached(int cstate)
1175{
1176        if (cstate + 1 > max_cstate) {
1177                pr_info("max_cstate %d reached\n", max_cstate);
1178                return true;
1179        }
1180        return false;
1181}
1182
1183static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
1184{
1185        unsigned long eax = flg2MWAIT(state->flags);
1186
1187        if (boot_cpu_has(X86_FEATURE_ARAT))
1188                return false;
1189
1190        /*
1191         * Switch over to one-shot tick broadcast if the target C-state
1192         * is deeper than C1.
1193         */
1194        return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK);
1195}
1196
1197#ifdef CONFIG_ACPI_PROCESSOR_CSTATE
1198#include <acpi/processor.h>
1199
1200static bool no_acpi __read_mostly;
1201module_param(no_acpi, bool, 0444);
1202MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list");
1203
1204static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */
1205module_param_named(use_acpi, force_use_acpi, bool, 0444);
1206MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list");
1207
1208static struct acpi_processor_power acpi_state_table __initdata;
1209
1210/**
1211 * intel_idle_cst_usable - Check if the _CST information can be used.
1212 *
1213 * Check if all of the C-states listed by _CST in the max_cstate range are
1214 * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT.
1215 */
1216static bool __init intel_idle_cst_usable(void)
1217{
1218        int cstate, limit;
1219
1220        limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1),
1221                      acpi_state_table.count);
1222
1223        for (cstate = 1; cstate < limit; cstate++) {
1224                struct acpi_processor_cx *cx = &acpi_state_table.states[cstate];
1225
1226                if (cx->entry_method != ACPI_CSTATE_FFH)
1227                        return false;
1228        }
1229
1230        return true;
1231}
1232
1233static bool __init intel_idle_acpi_cst_extract(void)
1234{
1235        unsigned int cpu;
1236
1237        if (no_acpi) {
1238                pr_debug("Not allowed to use ACPI _CST\n");
1239                return false;
1240        }
1241
1242        for_each_possible_cpu(cpu) {
1243                struct acpi_processor *pr = per_cpu(processors, cpu);
1244
1245                if (!pr)
1246                        continue;
1247
1248                if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table))
1249                        continue;
1250
1251                acpi_state_table.count++;
1252
1253                if (!intel_idle_cst_usable())
1254                        continue;
1255
1256                if (!acpi_processor_claim_cst_control())
1257                        break;
1258
1259                return true;
1260        }
1261
1262        acpi_state_table.count = 0;
1263        pr_debug("ACPI _CST not found or not usable\n");
1264        return false;
1265}
1266
1267static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
1268{
1269        int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1270
1271        /*
1272         * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1273         * the interesting states are ACPI_CSTATE_FFH.
1274         */
1275        for (cstate = 1; cstate < limit; cstate++) {
1276                struct acpi_processor_cx *cx;
1277                struct cpuidle_state *state;
1278
1279                if (intel_idle_max_cstate_reached(cstate - 1))
1280                        break;
1281
1282                cx = &acpi_state_table.states[cstate];
1283
1284                state = &drv->states[drv->state_count++];
1285
1286                snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate);
1287                strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
1288                state->exit_latency = cx->latency;
1289                /*
1290                 * For C1-type C-states use the same number for both the exit
1291                 * latency and target residency, because that is the case for
1292                 * C1 in the majority of the static C-states tables above.
1293                 * For the other types of C-states, however, set the target
1294                 * residency to 3 times the exit latency which should lead to
1295                 * a reasonable balance between energy-efficiency and
1296                 * performance in the majority of interesting cases.
1297                 */
1298                state->target_residency = cx->latency;
1299                if (cx->type > ACPI_STATE_C1)
1300                        state->target_residency *= 3;
1301
1302                state->flags = MWAIT2flg(cx->address);
1303                if (cx->type > ACPI_STATE_C2)
1304                        state->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
1305
1306                if (disabled_states_mask & BIT(cstate))
1307                        state->flags |= CPUIDLE_FLAG_OFF;
1308
1309                if (intel_idle_state_needs_timer_stop(state))
1310                        state->flags |= CPUIDLE_FLAG_TIMER_STOP;
1311
1312                state->enter = intel_idle;
1313                state->enter_s2idle = intel_idle_s2idle;
1314        }
1315}
1316
1317static bool __init intel_idle_off_by_default(u32 mwait_hint)
1318{
1319        int cstate, limit;
1320
1321        /*
1322         * If there are no _CST C-states, do not disable any C-states by
1323         * default.
1324         */
1325        if (!acpi_state_table.count)
1326                return false;
1327
1328        limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1329        /*
1330         * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1331         * the interesting states are ACPI_CSTATE_FFH.
1332         */
1333        for (cstate = 1; cstate < limit; cstate++) {
1334                if (acpi_state_table.states[cstate].address == mwait_hint)
1335                        return false;
1336        }
1337        return true;
1338}
1339#else /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1340#define force_use_acpi  (false)
1341
1342static inline bool intel_idle_acpi_cst_extract(void) { return false; }
1343static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
1344static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
1345#endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1346
1347/**
1348 * ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
1349 *
1350 * Tune IVT multi-socket targets.
1351 * Assumption: num_sockets == (max_package_num + 1).
1352 */
1353static void __init ivt_idle_state_table_update(void)
1354{
1355        /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1356        int cpu, package_num, num_sockets = 1;
1357
1358        for_each_online_cpu(cpu) {
1359                package_num = topology_physical_package_id(cpu);
1360                if (package_num + 1 > num_sockets) {
1361                        num_sockets = package_num + 1;
1362
1363                        if (num_sockets > 4) {
1364                                cpuidle_state_table = ivt_cstates_8s;
1365                                return;
1366                        }
1367                }
1368        }
1369
1370        if (num_sockets > 2)
1371                cpuidle_state_table = ivt_cstates_4s;
1372
1373        /* else, 1 and 2 socket systems use default ivt_cstates */
1374}
1375
1376/**
1377 * irtl_2_usec - IRTL to microseconds conversion.
1378 * @irtl: IRTL MSR value.
1379 *
1380 * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds.
1381 */
1382static unsigned long long __init irtl_2_usec(unsigned long long irtl)
1383{
1384        static const unsigned int irtl_ns_units[] __initconst = {
1385                1, 32, 1024, 32768, 1048576, 33554432, 0, 0
1386        };
1387        unsigned long long ns;
1388
1389        if (!irtl)
1390                return 0;
1391
1392        ns = irtl_ns_units[(irtl >> 10) & 0x7];
1393
1394        return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC);
1395}
1396
1397/**
1398 * bxt_idle_state_table_update - Fix up the Broxton idle states table.
1399 *
1400 * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the
1401 * definitive maximum latency and use the same value for target_residency.
1402 */
1403static void __init bxt_idle_state_table_update(void)
1404{
1405        unsigned long long msr;
1406        unsigned int usec;
1407
1408        rdmsrl(MSR_PKGC6_IRTL, msr);
1409        usec = irtl_2_usec(msr);
1410        if (usec) {
1411                bxt_cstates[2].exit_latency = usec;
1412                bxt_cstates[2].target_residency = usec;
1413        }
1414
1415        rdmsrl(MSR_PKGC7_IRTL, msr);
1416        usec = irtl_2_usec(msr);
1417        if (usec) {
1418                bxt_cstates[3].exit_latency = usec;
1419                bxt_cstates[3].target_residency = usec;
1420        }
1421
1422        rdmsrl(MSR_PKGC8_IRTL, msr);
1423        usec = irtl_2_usec(msr);
1424        if (usec) {
1425                bxt_cstates[4].exit_latency = usec;
1426                bxt_cstates[4].target_residency = usec;
1427        }
1428
1429        rdmsrl(MSR_PKGC9_IRTL, msr);
1430        usec = irtl_2_usec(msr);
1431        if (usec) {
1432                bxt_cstates[5].exit_latency = usec;
1433                bxt_cstates[5].target_residency = usec;
1434        }
1435
1436        rdmsrl(MSR_PKGC10_IRTL, msr);
1437        usec = irtl_2_usec(msr);
1438        if (usec) {
1439                bxt_cstates[6].exit_latency = usec;
1440                bxt_cstates[6].target_residency = usec;
1441        }
1442
1443}
1444
1445/**
1446 * sklh_idle_state_table_update - Fix up the Sky Lake idle states table.
1447 *
1448 * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled.
1449 */
1450static void __init sklh_idle_state_table_update(void)
1451{
1452        unsigned long long msr;
1453        unsigned int eax, ebx, ecx, edx;
1454
1455
1456        /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1457        if (max_cstate <= 7)
1458                return;
1459
1460        /* if PC10 not present in CPUID.MWAIT.EDX */
1461        if ((mwait_substates & (0xF << 28)) == 0)
1462                return;
1463
1464        rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1465
1466        /* PC10 is not enabled in PKG C-state limit */
1467        if ((msr & 0xF) != 8)
1468                return;
1469
1470        ecx = 0;
1471        cpuid(7, &eax, &ebx, &ecx, &edx);
1472
1473        /* if SGX is present */
1474        if (ebx & (1 << 2)) {
1475
1476                rdmsrl(MSR_IA32_FEAT_CTL, msr);
1477
1478                /* if SGX is enabled */
1479                if (msr & (1 << 18))
1480                        return;
1481        }
1482
1483        skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE;  /* C8-SKL */
1484        skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE;  /* C9-SKL */
1485}
1486
1487/**
1488 * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake
1489 * idle states table.
1490 */
1491static void __init skx_idle_state_table_update(void)
1492{
1493        unsigned long long msr;
1494
1495        rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1496
1497        /*
1498         * 000b: C0/C1 (no package C-state support)
1499         * 001b: C2
1500         * 010b: C6 (non-retention)
1501         * 011b: C6 (retention)
1502         * 111b: No Package C state limits.
1503         */
1504        if ((msr & 0x7) < 2) {
1505                /*
1506                 * Uses the CC6 + PC0 latency and 3 times of
1507                 * latency for target_residency if the PC6
1508                 * is disabled in BIOS. This is consistent
1509                 * with how intel_idle driver uses _CST
1510                 * to set the target_residency.
1511                 */
1512                skx_cstates[2].exit_latency = 92;
1513                skx_cstates[2].target_residency = 276;
1514        }
1515}
1516
1517static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
1518{
1519        unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
1520        unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) &
1521                                        MWAIT_SUBSTATE_MASK;
1522
1523        /* Ignore the C-state if there are NO sub-states in CPUID for it. */
1524        if (num_substates == 0)
1525                return false;
1526
1527        if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1528                mark_tsc_unstable("TSC halts in idle states deeper than C2");
1529
1530        return true;
1531}
1532
1533static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
1534{
1535        int cstate;
1536
1537        switch (boot_cpu_data.x86_model) {
1538        case INTEL_FAM6_IVYBRIDGE_X:
1539                ivt_idle_state_table_update();
1540                break;
1541        case INTEL_FAM6_ATOM_GOLDMONT:
1542        case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1543                bxt_idle_state_table_update();
1544                break;
1545        case INTEL_FAM6_SKYLAKE:
1546                sklh_idle_state_table_update();
1547                break;
1548        case INTEL_FAM6_SKYLAKE_X:
1549                skx_idle_state_table_update();
1550                break;
1551        }
1552
1553        for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1554                unsigned int mwait_hint;
1555
1556                if (intel_idle_max_cstate_reached(cstate))
1557                        break;
1558
1559                if (!cpuidle_state_table[cstate].enter &&
1560                    !cpuidle_state_table[cstate].enter_s2idle)
1561                        break;
1562
1563                /* If marked as unusable, skip this state. */
1564                if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
1565                        pr_debug("state %s is disabled\n",
1566                                 cpuidle_state_table[cstate].name);
1567                        continue;
1568                }
1569
1570                mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1571                if (!intel_idle_verify_cstate(mwait_hint))
1572                        continue;
1573
1574                /* Structure copy. */
1575                drv->states[drv->state_count] = cpuidle_state_table[cstate];
1576
1577                if ((disabled_states_mask & BIT(drv->state_count)) ||
1578                    ((icpu->use_acpi || force_use_acpi) &&
1579                     intel_idle_off_by_default(mwait_hint) &&
1580                     !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
1581                        drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
1582
1583                if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count]))
1584                        drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP;
1585
1586                drv->state_count++;
1587        }
1588
1589        if (icpu->byt_auto_demotion_disable_flag) {
1590                wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1591                wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1592        }
1593}
1594
1595/**
1596 * intel_idle_cpuidle_driver_init - Create the list of available idle states.
1597 * @drv: cpuidle driver structure to initialize.
1598 */
1599static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv)
1600{
1601        cpuidle_poll_state_init(drv);
1602
1603        if (disabled_states_mask & BIT(0))
1604                drv->states[0].flags |= CPUIDLE_FLAG_OFF;
1605
1606        drv->state_count = 1;
1607
1608        if (icpu)
1609                intel_idle_init_cstates_icpu(drv);
1610        else
1611                intel_idle_init_cstates_acpi(drv);
1612}
1613
1614static void auto_demotion_disable(void)
1615{
1616        unsigned long long msr_bits;
1617
1618        rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1619        msr_bits &= ~auto_demotion_disable_flags;
1620        wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1621}
1622
1623static void c1e_promotion_disable(void)
1624{
1625        unsigned long long msr_bits;
1626
1627        rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1628        msr_bits &= ~0x2;
1629        wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1630}
1631
1632/**
1633 * intel_idle_cpu_init - Register the target CPU with the cpuidle core.
1634 * @cpu: CPU to initialize.
1635 *
1636 * Register a cpuidle device object for @cpu and update its MSRs in accordance
1637 * with the processor model flags.
1638 */
1639static int intel_idle_cpu_init(unsigned int cpu)
1640{
1641        struct cpuidle_device *dev;
1642
1643        dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1644        dev->cpu = cpu;
1645
1646        if (cpuidle_register_device(dev)) {
1647                pr_debug("cpuidle_register_device %d failed!\n", cpu);
1648                return -EIO;
1649        }
1650
1651        if (auto_demotion_disable_flags)
1652                auto_demotion_disable();
1653
1654        if (disable_promotion_to_c1e)
1655                c1e_promotion_disable();
1656
1657        return 0;
1658}
1659
1660static int intel_idle_cpu_online(unsigned int cpu)
1661{
1662        struct cpuidle_device *dev;
1663
1664        if (!boot_cpu_has(X86_FEATURE_ARAT))
1665                tick_broadcast_enable();
1666
1667        /*
1668         * Some systems can hotplug a cpu at runtime after
1669         * the kernel has booted, we have to initialize the
1670         * driver in this case
1671         */
1672        dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1673        if (!dev->registered)
1674                return intel_idle_cpu_init(cpu);
1675
1676        return 0;
1677}
1678
1679/**
1680 * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices.
1681 */
1682static void __init intel_idle_cpuidle_devices_uninit(void)
1683{
1684        int i;
1685
1686        for_each_online_cpu(i)
1687                cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
1688}
1689
1690static int __init intel_idle_init(void)
1691{
1692        const struct x86_cpu_id *id;
1693        unsigned int eax, ebx, ecx;
1694        int retval;
1695
1696        /* Do not load intel_idle at all for now if idle= is passed */
1697        if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1698                return -ENODEV;
1699
1700        if (max_cstate == 0) {
1701                pr_debug("disabled\n");
1702                return -EPERM;
1703        }
1704
1705        id = x86_match_cpu(intel_idle_ids);
1706        if (id) {
1707                if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
1708                        pr_debug("Please enable MWAIT in BIOS SETUP\n");
1709                        return -ENODEV;
1710                }
1711        } else {
1712                id = x86_match_cpu(intel_mwait_ids);
1713                if (!id)
1714                        return -ENODEV;
1715        }
1716
1717        if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1718                return -ENODEV;
1719
1720        cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1721
1722        if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1723            !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1724            !mwait_substates)
1725                        return -ENODEV;
1726
1727        pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
1728
1729        icpu = (const struct idle_cpu *)id->driver_data;
1730        if (icpu) {
1731                cpuidle_state_table = icpu->state_table;
1732                auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
1733                disable_promotion_to_c1e = icpu->disable_promotion_to_c1e;
1734                if (icpu->use_acpi || force_use_acpi)
1735                        intel_idle_acpi_cst_extract();
1736        } else if (!intel_idle_acpi_cst_extract()) {
1737                return -ENODEV;
1738        }
1739
1740        pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
1741                 boot_cpu_data.x86_model);
1742
1743        intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
1744        if (!intel_idle_cpuidle_devices)
1745                return -ENOMEM;
1746
1747        intel_idle_cpuidle_driver_init(&intel_idle_driver);
1748
1749        retval = cpuidle_register_driver(&intel_idle_driver);
1750        if (retval) {
1751                struct cpuidle_driver *drv = cpuidle_get_driver();
1752                printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
1753                       drv ? drv->name : "none");
1754                goto init_driver_fail;
1755        }
1756
1757        retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
1758                                   intel_idle_cpu_online, NULL);
1759        if (retval < 0)
1760                goto hp_setup_fail;
1761
1762        pr_debug("Local APIC timer is reliable in %s\n",
1763                 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1");
1764
1765        return 0;
1766
1767hp_setup_fail:
1768        intel_idle_cpuidle_devices_uninit();
1769        cpuidle_unregister_driver(&intel_idle_driver);
1770init_driver_fail:
1771        free_percpu(intel_idle_cpuidle_devices);
1772        return retval;
1773
1774}
1775device_initcall(intel_idle_init);
1776
1777/*
1778 * We are not really modular, but we used to support that.  Meaning we also
1779 * support "intel_idle.max_cstate=..." at boot and also a read-only export of
1780 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
1781 * is the easiest way (currently) to continue doing that.
1782 */
1783module_param(max_cstate, int, 0444);
1784/*
1785 * The positions of the bits that are set in this number are the indices of the
1786 * idle states to be disabled by default (as reflected by the names of the
1787 * corresponding idle state directories in sysfs, "state0", "state1" ...
1788 * "state<i>" ..., where <i> is the index of the given state).
1789 */
1790module_param_named(states_off, disabled_states_mask, uint, 0444);
1791MODULE_PARM_DESC(states_off, "Mask of disabled idle states");
1792