linux/drivers/idle/intel_idle.c
<<
>>
Prefs
   1/*
   2 * intel_idle.c - native hardware idle loop for modern Intel processors
   3 *
   4 * Copyright (c) 2013, Intel Corporation.
   5 * Len Brown <len.brown@intel.com>
   6 *
   7 * This program is free software; you can redistribute it and/or modify it
   8 * under the terms and conditions of the GNU General Public License,
   9 * version 2, as published by the Free Software Foundation.
  10 *
  11 * This program is distributed in the hope it will be useful, but WITHOUT
  12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  14 * more details.
  15 *
  16 * You should have received a copy of the GNU General Public License along with
  17 * this program; if not, write to the Free Software Foundation, Inc.,
  18 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  19 */
  20
  21/*
  22 * intel_idle is a cpuidle driver that loads on specific Intel processors
  23 * in lieu of the legacy ACPI processor_idle driver.  The intent is to
  24 * make Linux more efficient on these processors, as intel_idle knows
  25 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
  26 */
  27
  28/*
  29 * Design Assumptions
  30 *
  31 * All CPUs have same idle states as boot CPU
  32 *
  33 * Chipset BM_STS (bus master status) bit is a NOP
  34 *      for preventing entry into deep C-stats
  35 */
  36
  37/*
  38 * Known limitations
  39 *
  40 * The driver currently initializes for_each_online_cpu() upon modprobe.
  41 * It it unaware of subsequent processors hot-added to the system.
  42 * This means that if you boot with maxcpus=n and later online
  43 * processors above n, those processors will use C1 only.
  44 *
  45 * ACPI has a .suspend hack to turn off deep c-statees during suspend
  46 * to avoid complications with the lapic timer workaround.
  47 * Have not seen issues with suspend, but may need same workaround here.
  48 *
  49 */
  50
  51/* un-comment DEBUG to enable pr_debug() statements */
  52#define DEBUG
  53
  54#include <linux/kernel.h>
  55#include <linux/cpuidle.h>
  56#include <linux/tick.h>
  57#include <trace/events/power.h>
  58#include <linux/sched.h>
  59#include <linux/notifier.h>
  60#include <linux/cpu.h>
  61#include <linux/moduleparam.h>
  62#include <asm/cpu_device_id.h>
  63#include <asm/intel-family.h>
  64#include <asm/mwait.h>
  65#include <asm/msr.h>
  66
  67#define INTEL_IDLE_VERSION "0.4.1"
  68#define PREFIX "intel_idle: "
  69
  70static struct cpuidle_driver intel_idle_driver = {
  71        .name = "intel_idle",
  72        .owner = THIS_MODULE,
  73};
  74/* intel_idle.max_cstate=0 disables driver */
  75static int max_cstate = CPUIDLE_STATE_MAX - 1;
  76
  77static unsigned int mwait_substates;
  78
  79#define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF
  80/* Reliable LAPIC Timer States, bit 1 for C1 etc.  */
  81static unsigned int lapic_timer_reliable_states = (1 << 1);      /* Default to only C1 */
  82
  83struct idle_cpu {
  84        struct cpuidle_state *state_table;
  85
  86        /*
  87         * Hardware C-state auto-demotion may not always be optimal.
  88         * Indicate which enable bits to clear here.
  89         */
  90        unsigned long auto_demotion_disable_flags;
  91        bool byt_auto_demotion_disable_flag;
  92        bool disable_promotion_to_c1e;
  93};
  94
  95static const struct idle_cpu *icpu;
  96static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
  97static int intel_idle(struct cpuidle_device *dev,
  98                        struct cpuidle_driver *drv, int index);
  99static void intel_idle_freeze(struct cpuidle_device *dev,
 100                              struct cpuidle_driver *drv, int index);
 101static struct cpuidle_state *cpuidle_state_table;
 102
 103/*
 104 * Set this flag for states where the HW flushes the TLB for us
 105 * and so we don't need cross-calls to keep it consistent.
 106 * If this flag is set, SW flushes the TLB, so even if the
 107 * HW doesn't do the flushing, this flag is safe to use.
 108 */
 109#define CPUIDLE_FLAG_TLB_FLUSHED        0x10000
 110
 111/*
 112 * MWAIT takes an 8-bit "hint" in EAX "suggesting"
 113 * the C-state (top nibble) and sub-state (bottom nibble)
 114 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
 115 *
 116 * We store the hint at the top of our "flags" for each state.
 117 */
 118#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
 119#define MWAIT2flg(eax) ((eax & 0xFF) << 24)
 120
 121/*
 122 * States are indexed by the cstate number,
 123 * which is also the index into the MWAIT hint array.
 124 * Thus C0 is a dummy.
 125 */
 126static struct cpuidle_state nehalem_cstates[] = {
 127        {
 128                .name = "C1",
 129                .desc = "MWAIT 0x00",
 130                .flags = MWAIT2flg(0x00),
 131                .exit_latency = 3,
 132                .target_residency = 6,
 133                .enter = &intel_idle,
 134                .enter_freeze = intel_idle_freeze, },
 135        {
 136                .name = "C1E",
 137                .desc = "MWAIT 0x01",
 138                .flags = MWAIT2flg(0x01),
 139                .exit_latency = 10,
 140                .target_residency = 20,
 141                .enter = &intel_idle,
 142                .enter_freeze = intel_idle_freeze, },
 143        {
 144                .name = "C3",
 145                .desc = "MWAIT 0x10",
 146                .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 147                .exit_latency = 20,
 148                .target_residency = 80,
 149                .enter = &intel_idle,
 150                .enter_freeze = intel_idle_freeze, },
 151        {
 152                .name = "C6",
 153                .desc = "MWAIT 0x20",
 154                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 155                .exit_latency = 200,
 156                .target_residency = 800,
 157                .enter = &intel_idle,
 158                .enter_freeze = intel_idle_freeze, },
 159        {
 160                .enter = NULL }
 161};
 162
 163static struct cpuidle_state snb_cstates[] = {
 164        {
 165                .name = "C1",
 166                .desc = "MWAIT 0x00",
 167                .flags = MWAIT2flg(0x00),
 168                .exit_latency = 2,
 169                .target_residency = 2,
 170                .enter = &intel_idle,
 171                .enter_freeze = intel_idle_freeze, },
 172        {
 173                .name = "C1E",
 174                .desc = "MWAIT 0x01",
 175                .flags = MWAIT2flg(0x01),
 176                .exit_latency = 10,
 177                .target_residency = 20,
 178                .enter = &intel_idle,
 179                .enter_freeze = intel_idle_freeze, },
 180        {
 181                .name = "C3",
 182                .desc = "MWAIT 0x10",
 183                .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 184                .exit_latency = 80,
 185                .target_residency = 211,
 186                .enter = &intel_idle,
 187                .enter_freeze = intel_idle_freeze, },
 188        {
 189                .name = "C6",
 190                .desc = "MWAIT 0x20",
 191                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 192                .exit_latency = 104,
 193                .target_residency = 345,
 194                .enter = &intel_idle,
 195                .enter_freeze = intel_idle_freeze, },
 196        {
 197                .name = "C7",
 198                .desc = "MWAIT 0x30",
 199                .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
 200                .exit_latency = 109,
 201                .target_residency = 345,
 202                .enter = &intel_idle,
 203                .enter_freeze = intel_idle_freeze, },
 204        {
 205                .enter = NULL }
 206};
 207
 208static struct cpuidle_state byt_cstates[] = {
 209        {
 210                .name = "C1",
 211                .desc = "MWAIT 0x00",
 212                .flags = MWAIT2flg(0x00),
 213                .exit_latency = 1,
 214                .target_residency = 1,
 215                .enter = &intel_idle,
 216                .enter_freeze = intel_idle_freeze, },
 217        {
 218                .name = "C6N",
 219                .desc = "MWAIT 0x58",
 220                .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
 221                .exit_latency = 300,
 222                .target_residency = 275,
 223                .enter = &intel_idle,
 224                .enter_freeze = intel_idle_freeze, },
 225        {
 226                .name = "C6S",
 227                .desc = "MWAIT 0x52",
 228                .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
 229                .exit_latency = 500,
 230                .target_residency = 560,
 231                .enter = &intel_idle,
 232                .enter_freeze = intel_idle_freeze, },
 233        {
 234                .name = "C7",
 235                .desc = "MWAIT 0x60",
 236                .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 237                .exit_latency = 1200,
 238                .target_residency = 4000,
 239                .enter = &intel_idle,
 240                .enter_freeze = intel_idle_freeze, },
 241        {
 242                .name = "C7S",
 243                .desc = "MWAIT 0x64",
 244                .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
 245                .exit_latency = 10000,
 246                .target_residency = 20000,
 247                .enter = &intel_idle,
 248                .enter_freeze = intel_idle_freeze, },
 249        {
 250                .enter = NULL }
 251};
 252
 253static struct cpuidle_state cht_cstates[] = {
 254        {
 255                .name = "C1",
 256                .desc = "MWAIT 0x00",
 257                .flags = MWAIT2flg(0x00),
 258                .exit_latency = 1,
 259                .target_residency = 1,
 260                .enter = &intel_idle,
 261                .enter_freeze = intel_idle_freeze, },
 262        {
 263                .name = "C6N",
 264                .desc = "MWAIT 0x58",
 265                .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
 266                .exit_latency = 80,
 267                .target_residency = 275,
 268                .enter = &intel_idle,
 269                .enter_freeze = intel_idle_freeze, },
 270        {
 271                .name = "C6S",
 272                .desc = "MWAIT 0x52",
 273                .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
 274                .exit_latency = 200,
 275                .target_residency = 560,
 276                .enter = &intel_idle,
 277                .enter_freeze = intel_idle_freeze, },
 278        {
 279                .name = "C7",
 280                .desc = "MWAIT 0x60",
 281                .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 282                .exit_latency = 1200,
 283                .target_residency = 4000,
 284                .enter = &intel_idle,
 285                .enter_freeze = intel_idle_freeze, },
 286        {
 287                .name = "C7S",
 288                .desc = "MWAIT 0x64",
 289                .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
 290                .exit_latency = 10000,
 291                .target_residency = 20000,
 292                .enter = &intel_idle,
 293                .enter_freeze = intel_idle_freeze, },
 294        {
 295                .enter = NULL }
 296};
 297
 298static struct cpuidle_state ivb_cstates[] = {
 299        {
 300                .name = "C1",
 301                .desc = "MWAIT 0x00",
 302                .flags = MWAIT2flg(0x00),
 303                .exit_latency = 1,
 304                .target_residency = 1,
 305                .enter = &intel_idle,
 306                .enter_freeze = intel_idle_freeze, },
 307        {
 308                .name = "C1E",
 309                .desc = "MWAIT 0x01",
 310                .flags = MWAIT2flg(0x01),
 311                .exit_latency = 10,
 312                .target_residency = 20,
 313                .enter = &intel_idle,
 314                .enter_freeze = intel_idle_freeze, },
 315        {
 316                .name = "C3",
 317                .desc = "MWAIT 0x10",
 318                .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 319                .exit_latency = 59,
 320                .target_residency = 156,
 321                .enter = &intel_idle,
 322                .enter_freeze = intel_idle_freeze, },
 323        {
 324                .name = "C6",
 325                .desc = "MWAIT 0x20",
 326                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 327                .exit_latency = 80,
 328                .target_residency = 300,
 329                .enter = &intel_idle,
 330                .enter_freeze = intel_idle_freeze, },
 331        {
 332                .name = "C7",
 333                .desc = "MWAIT 0x30",
 334                .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
 335                .exit_latency = 87,
 336                .target_residency = 300,
 337                .enter = &intel_idle,
 338                .enter_freeze = intel_idle_freeze, },
 339        {
 340                .enter = NULL }
 341};
 342
 343static struct cpuidle_state ivt_cstates[] = {
 344        {
 345                .name = "C1",
 346                .desc = "MWAIT 0x00",
 347                .flags = MWAIT2flg(0x00),
 348                .exit_latency = 1,
 349                .target_residency = 1,
 350                .enter = &intel_idle,
 351                .enter_freeze = intel_idle_freeze, },
 352        {
 353                .name = "C1E",
 354                .desc = "MWAIT 0x01",
 355                .flags = MWAIT2flg(0x01),
 356                .exit_latency = 10,
 357                .target_residency = 80,
 358                .enter = &intel_idle,
 359                .enter_freeze = intel_idle_freeze, },
 360        {
 361                .name = "C3",
 362                .desc = "MWAIT 0x10",
 363                .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 364                .exit_latency = 59,
 365                .target_residency = 156,
 366                .enter = &intel_idle,
 367                .enter_freeze = intel_idle_freeze, },
 368        {
 369                .name = "C6",
 370                .desc = "MWAIT 0x20",
 371                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 372                .exit_latency = 82,
 373                .target_residency = 300,
 374                .enter = &intel_idle,
 375                .enter_freeze = intel_idle_freeze, },
 376        {
 377                .enter = NULL }
 378};
 379
 380static struct cpuidle_state ivt_cstates_4s[] = {
 381        {
 382                .name = "C1",
 383                .desc = "MWAIT 0x00",
 384                .flags = MWAIT2flg(0x00),
 385                .exit_latency = 1,
 386                .target_residency = 1,
 387                .enter = &intel_idle,
 388                .enter_freeze = intel_idle_freeze, },
 389        {
 390                .name = "C1E",
 391                .desc = "MWAIT 0x01",
 392                .flags = MWAIT2flg(0x01),
 393                .exit_latency = 10,
 394                .target_residency = 250,
 395                .enter = &intel_idle,
 396                .enter_freeze = intel_idle_freeze, },
 397        {
 398                .name = "C3",
 399                .desc = "MWAIT 0x10",
 400                .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 401                .exit_latency = 59,
 402                .target_residency = 300,
 403                .enter = &intel_idle,
 404                .enter_freeze = intel_idle_freeze, },
 405        {
 406                .name = "C6",
 407                .desc = "MWAIT 0x20",
 408                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 409                .exit_latency = 84,
 410                .target_residency = 400,
 411                .enter = &intel_idle,
 412                .enter_freeze = intel_idle_freeze, },
 413        {
 414                .enter = NULL }
 415};
 416
 417static struct cpuidle_state ivt_cstates_8s[] = {
 418        {
 419                .name = "C1",
 420                .desc = "MWAIT 0x00",
 421                .flags = MWAIT2flg(0x00),
 422                .exit_latency = 1,
 423                .target_residency = 1,
 424                .enter = &intel_idle,
 425                .enter_freeze = intel_idle_freeze, },
 426        {
 427                .name = "C1E",
 428                .desc = "MWAIT 0x01",
 429                .flags = MWAIT2flg(0x01),
 430                .exit_latency = 10,
 431                .target_residency = 500,
 432                .enter = &intel_idle,
 433                .enter_freeze = intel_idle_freeze, },
 434        {
 435                .name = "C3",
 436                .desc = "MWAIT 0x10",
 437                .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 438                .exit_latency = 59,
 439                .target_residency = 600,
 440                .enter = &intel_idle,
 441                .enter_freeze = intel_idle_freeze, },
 442        {
 443                .name = "C6",
 444                .desc = "MWAIT 0x20",
 445                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 446                .exit_latency = 88,
 447                .target_residency = 700,
 448                .enter = &intel_idle,
 449                .enter_freeze = intel_idle_freeze, },
 450        {
 451                .enter = NULL }
 452};
 453
 454static struct cpuidle_state hsw_cstates[] = {
 455        {
 456                .name = "C1",
 457                .desc = "MWAIT 0x00",
 458                .flags = MWAIT2flg(0x00),
 459                .exit_latency = 2,
 460                .target_residency = 2,
 461                .enter = &intel_idle,
 462                .enter_freeze = intel_idle_freeze, },
 463        {
 464                .name = "C1E",
 465                .desc = "MWAIT 0x01",
 466                .flags = MWAIT2flg(0x01),
 467                .exit_latency = 10,
 468                .target_residency = 20,
 469                .enter = &intel_idle,
 470                .enter_freeze = intel_idle_freeze, },
 471        {
 472                .name = "C3",
 473                .desc = "MWAIT 0x10",
 474                .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 475                .exit_latency = 33,
 476                .target_residency = 100,
 477                .enter = &intel_idle,
 478                .enter_freeze = intel_idle_freeze, },
 479        {
 480                .name = "C6",
 481                .desc = "MWAIT 0x20",
 482                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 483                .exit_latency = 133,
 484                .target_residency = 400,
 485                .enter = &intel_idle,
 486                .enter_freeze = intel_idle_freeze, },
 487        {
 488                .name = "C7s",
 489                .desc = "MWAIT 0x32",
 490                .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
 491                .exit_latency = 166,
 492                .target_residency = 500,
 493                .enter = &intel_idle,
 494                .enter_freeze = intel_idle_freeze, },
 495        {
 496                .name = "C8",
 497                .desc = "MWAIT 0x40",
 498                .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 499                .exit_latency = 300,
 500                .target_residency = 900,
 501                .enter = &intel_idle,
 502                .enter_freeze = intel_idle_freeze, },
 503        {
 504                .name = "C9",
 505                .desc = "MWAIT 0x50",
 506                .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 507                .exit_latency = 600,
 508                .target_residency = 1800,
 509                .enter = &intel_idle,
 510                .enter_freeze = intel_idle_freeze, },
 511        {
 512                .name = "C10",
 513                .desc = "MWAIT 0x60",
 514                .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 515                .exit_latency = 2600,
 516                .target_residency = 7700,
 517                .enter = &intel_idle,
 518                .enter_freeze = intel_idle_freeze, },
 519        {
 520                .enter = NULL }
 521};
 522static struct cpuidle_state bdw_cstates[] = {
 523        {
 524                .name = "C1",
 525                .desc = "MWAIT 0x00",
 526                .flags = MWAIT2flg(0x00),
 527                .exit_latency = 2,
 528                .target_residency = 2,
 529                .enter = &intel_idle,
 530                .enter_freeze = intel_idle_freeze, },
 531        {
 532                .name = "C1E",
 533                .desc = "MWAIT 0x01",
 534                .flags = MWAIT2flg(0x01),
 535                .exit_latency = 10,
 536                .target_residency = 20,
 537                .enter = &intel_idle,
 538                .enter_freeze = intel_idle_freeze, },
 539        {
 540                .name = "C3",
 541                .desc = "MWAIT 0x10",
 542                .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 543                .exit_latency = 40,
 544                .target_residency = 100,
 545                .enter = &intel_idle,
 546                .enter_freeze = intel_idle_freeze, },
 547        {
 548                .name = "C6",
 549                .desc = "MWAIT 0x20",
 550                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 551                .exit_latency = 133,
 552                .target_residency = 400,
 553                .enter = &intel_idle,
 554                .enter_freeze = intel_idle_freeze, },
 555        {
 556                .name = "C7s",
 557                .desc = "MWAIT 0x32",
 558                .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
 559                .exit_latency = 166,
 560                .target_residency = 500,
 561                .enter = &intel_idle,
 562                .enter_freeze = intel_idle_freeze, },
 563        {
 564                .name = "C8",
 565                .desc = "MWAIT 0x40",
 566                .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 567                .exit_latency = 300,
 568                .target_residency = 900,
 569                .enter = &intel_idle,
 570                .enter_freeze = intel_idle_freeze, },
 571        {
 572                .name = "C9",
 573                .desc = "MWAIT 0x50",
 574                .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 575                .exit_latency = 600,
 576                .target_residency = 1800,
 577                .enter = &intel_idle,
 578                .enter_freeze = intel_idle_freeze, },
 579        {
 580                .name = "C10",
 581                .desc = "MWAIT 0x60",
 582                .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 583                .exit_latency = 2600,
 584                .target_residency = 7700,
 585                .enter = &intel_idle,
 586                .enter_freeze = intel_idle_freeze, },
 587        {
 588                .enter = NULL }
 589};
 590
 591static struct cpuidle_state skl_cstates[] = {
 592        {
 593                .name = "C1",
 594                .desc = "MWAIT 0x00",
 595                .flags = MWAIT2flg(0x00),
 596                .exit_latency = 2,
 597                .target_residency = 2,
 598                .enter = &intel_idle,
 599                .enter_freeze = intel_idle_freeze, },
 600        {
 601                .name = "C1E",
 602                .desc = "MWAIT 0x01",
 603                .flags = MWAIT2flg(0x01),
 604                .exit_latency = 10,
 605                .target_residency = 20,
 606                .enter = &intel_idle,
 607                .enter_freeze = intel_idle_freeze, },
 608        {
 609                .name = "C3",
 610                .desc = "MWAIT 0x10",
 611                .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 612                .exit_latency = 70,
 613                .target_residency = 100,
 614                .enter = &intel_idle,
 615                .enter_freeze = intel_idle_freeze, },
 616        {
 617                .name = "C6",
 618                .desc = "MWAIT 0x20",
 619                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 620                .exit_latency = 85,
 621                .target_residency = 200,
 622                .enter = &intel_idle,
 623                .enter_freeze = intel_idle_freeze, },
 624        {
 625                .name = "C7s",
 626                .desc = "MWAIT 0x33",
 627                .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
 628                .exit_latency = 124,
 629                .target_residency = 800,
 630                .enter = &intel_idle,
 631                .enter_freeze = intel_idle_freeze, },
 632        {
 633                .name = "C8",
 634                .desc = "MWAIT 0x40",
 635                .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 636                .exit_latency = 200,
 637                .target_residency = 800,
 638                .enter = &intel_idle,
 639                .enter_freeze = intel_idle_freeze, },
 640        {
 641                .name = "C9",
 642                .desc = "MWAIT 0x50",
 643                .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 644                .exit_latency = 480,
 645                .target_residency = 5000,
 646                .enter = &intel_idle,
 647                .enter_freeze = intel_idle_freeze, },
 648        {
 649                .name = "C10",
 650                .desc = "MWAIT 0x60",
 651                .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 652                .exit_latency = 890,
 653                .target_residency = 5000,
 654                .enter = &intel_idle,
 655                .enter_freeze = intel_idle_freeze, },
 656        {
 657                .enter = NULL }
 658};
 659
 660static struct cpuidle_state skx_cstates[] = {
 661        {
 662                .name = "C1",
 663                .desc = "MWAIT 0x00",
 664                .flags = MWAIT2flg(0x00),
 665                .exit_latency = 2,
 666                .target_residency = 2,
 667                .enter = &intel_idle,
 668                .enter_freeze = intel_idle_freeze, },
 669        {
 670                .name = "C1E",
 671                .desc = "MWAIT 0x01",
 672                .flags = MWAIT2flg(0x01),
 673                .exit_latency = 10,
 674                .target_residency = 20,
 675                .enter = &intel_idle,
 676                .enter_freeze = intel_idle_freeze, },
 677        {
 678                .name = "C6",
 679                .desc = "MWAIT 0x20",
 680                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 681                .exit_latency = 133,
 682                .target_residency = 600,
 683                .enter = &intel_idle,
 684                .enter_freeze = intel_idle_freeze, },
 685        {
 686                .enter = NULL }
 687};
 688
 689static struct cpuidle_state atom_cstates[] = {
 690        {
 691                .name = "C1E",
 692                .desc = "MWAIT 0x00",
 693                .flags = MWAIT2flg(0x00),
 694                .exit_latency = 10,
 695                .target_residency = 20,
 696                .enter = &intel_idle,
 697                .enter_freeze = intel_idle_freeze, },
 698        {
 699                .name = "C2",
 700                .desc = "MWAIT 0x10",
 701                .flags = MWAIT2flg(0x10),
 702                .exit_latency = 20,
 703                .target_residency = 80,
 704                .enter = &intel_idle,
 705                .enter_freeze = intel_idle_freeze, },
 706        {
 707                .name = "C4",
 708                .desc = "MWAIT 0x30",
 709                .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
 710                .exit_latency = 100,
 711                .target_residency = 400,
 712                .enter = &intel_idle,
 713                .enter_freeze = intel_idle_freeze, },
 714        {
 715                .name = "C6",
 716                .desc = "MWAIT 0x52",
 717                .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
 718                .exit_latency = 140,
 719                .target_residency = 560,
 720                .enter = &intel_idle,
 721                .enter_freeze = intel_idle_freeze, },
 722        {
 723                .enter = NULL }
 724};
 725static struct cpuidle_state tangier_cstates[] = {
 726        {
 727                .name = "C1",
 728                .desc = "MWAIT 0x00",
 729                .flags = MWAIT2flg(0x00),
 730                .exit_latency = 1,
 731                .target_residency = 4,
 732                .enter = &intel_idle,
 733                .enter_freeze = intel_idle_freeze, },
 734        {
 735                .name = "C4",
 736                .desc = "MWAIT 0x30",
 737                .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
 738                .exit_latency = 100,
 739                .target_residency = 400,
 740                .enter = &intel_idle,
 741                .enter_freeze = intel_idle_freeze, },
 742        {
 743                .name = "C6",
 744                .desc = "MWAIT 0x52",
 745                .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
 746                .exit_latency = 140,
 747                .target_residency = 560,
 748                .enter = &intel_idle,
 749                .enter_freeze = intel_idle_freeze, },
 750        {
 751                .name = "C7",
 752                .desc = "MWAIT 0x60",
 753                .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 754                .exit_latency = 1200,
 755                .target_residency = 4000,
 756                .enter = &intel_idle,
 757                .enter_freeze = intel_idle_freeze, },
 758        {
 759                .name = "C9",
 760                .desc = "MWAIT 0x64",
 761                .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
 762                .exit_latency = 10000,
 763                .target_residency = 20000,
 764                .enter = &intel_idle,
 765                .enter_freeze = intel_idle_freeze, },
 766        {
 767                .enter = NULL }
 768};
 769static struct cpuidle_state avn_cstates[] = {
 770        {
 771                .name = "C1",
 772                .desc = "MWAIT 0x00",
 773                .flags = MWAIT2flg(0x00),
 774                .exit_latency = 2,
 775                .target_residency = 2,
 776                .enter = &intel_idle,
 777                .enter_freeze = intel_idle_freeze, },
 778        {
 779                .name = "C6",
 780                .desc = "MWAIT 0x51",
 781                .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
 782                .exit_latency = 15,
 783                .target_residency = 45,
 784                .enter = &intel_idle,
 785                .enter_freeze = intel_idle_freeze, },
 786        {
 787                .enter = NULL }
 788};
 789static struct cpuidle_state knl_cstates[] = {
 790        {
 791                .name = "C1",
 792                .desc = "MWAIT 0x00",
 793                .flags = MWAIT2flg(0x00),
 794                .exit_latency = 1,
 795                .target_residency = 2,
 796                .enter = &intel_idle,
 797                .enter_freeze = intel_idle_freeze },
 798        {
 799                .name = "C6",
 800                .desc = "MWAIT 0x10",
 801                .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 802                .exit_latency = 120,
 803                .target_residency = 500,
 804                .enter = &intel_idle,
 805                .enter_freeze = intel_idle_freeze },
 806        {
 807                .enter = NULL }
 808};
 809
 810static struct cpuidle_state bxt_cstates[] = {
 811        {
 812                .name = "C1",
 813                .desc = "MWAIT 0x00",
 814                .flags = MWAIT2flg(0x00),
 815                .exit_latency = 2,
 816                .target_residency = 2,
 817                .enter = &intel_idle,
 818                .enter_freeze = intel_idle_freeze, },
 819        {
 820                .name = "C1E",
 821                .desc = "MWAIT 0x01",
 822                .flags = MWAIT2flg(0x01),
 823                .exit_latency = 10,
 824                .target_residency = 20,
 825                .enter = &intel_idle,
 826                .enter_freeze = intel_idle_freeze, },
 827        {
 828                .name = "C6",
 829                .desc = "MWAIT 0x20",
 830                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 831                .exit_latency = 133,
 832                .target_residency = 133,
 833                .enter = &intel_idle,
 834                .enter_freeze = intel_idle_freeze, },
 835        {
 836                .name = "C7s",
 837                .desc = "MWAIT 0x31",
 838                .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
 839                .exit_latency = 155,
 840                .target_residency = 155,
 841                .enter = &intel_idle,
 842                .enter_freeze = intel_idle_freeze, },
 843        {
 844                .name = "C8",
 845                .desc = "MWAIT 0x40",
 846                .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 847                .exit_latency = 1000,
 848                .target_residency = 1000,
 849                .enter = &intel_idle,
 850                .enter_freeze = intel_idle_freeze, },
 851        {
 852                .name = "C9",
 853                .desc = "MWAIT 0x50",
 854                .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 855                .exit_latency = 2000,
 856                .target_residency = 2000,
 857                .enter = &intel_idle,
 858                .enter_freeze = intel_idle_freeze, },
 859        {
 860                .name = "C10",
 861                .desc = "MWAIT 0x60",
 862                .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 863                .exit_latency = 10000,
 864                .target_residency = 10000,
 865                .enter = &intel_idle,
 866                .enter_freeze = intel_idle_freeze, },
 867        {
 868                .enter = NULL }
 869};
 870
 871static struct cpuidle_state dnv_cstates[] = {
 872        {
 873                .name = "C1",
 874                .desc = "MWAIT 0x00",
 875                .flags = MWAIT2flg(0x00),
 876                .exit_latency = 2,
 877                .target_residency = 2,
 878                .enter = &intel_idle,
 879                .enter_freeze = intel_idle_freeze, },
 880        {
 881                .name = "C1E",
 882                .desc = "MWAIT 0x01",
 883                .flags = MWAIT2flg(0x01),
 884                .exit_latency = 10,
 885                .target_residency = 20,
 886                .enter = &intel_idle,
 887                .enter_freeze = intel_idle_freeze, },
 888        {
 889                .name = "C6",
 890                .desc = "MWAIT 0x20",
 891                .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 892                .exit_latency = 50,
 893                .target_residency = 500,
 894                .enter = &intel_idle,
 895                .enter_freeze = intel_idle_freeze, },
 896        {
 897                .enter = NULL }
 898};
 899
 900/**
 901 * intel_idle
 902 * @dev: cpuidle_device
 903 * @drv: cpuidle driver
 904 * @index: index of cpuidle state
 905 *
 906 * Must be called under local_irq_disable().
 907 */
 908static __cpuidle int intel_idle(struct cpuidle_device *dev,
 909                                struct cpuidle_driver *drv, int index)
 910{
 911        unsigned long ecx = 1; /* break on interrupt flag */
 912        struct cpuidle_state *state = &drv->states[index];
 913        unsigned long eax = flg2MWAIT(state->flags);
 914        unsigned int cstate;
 915        int cpu = smp_processor_id();
 916
 917        cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1;
 918
 919        /*
 920         * leave_mm() to avoid costly and often unnecessary wakeups
 921         * for flushing the user TLB's associated with the active mm.
 922         */
 923        if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
 924                leave_mm(cpu);
 925
 926        if (!(lapic_timer_reliable_states & (1 << (cstate))))
 927                tick_broadcast_enter();
 928
 929        mwait_idle_with_hints(eax, ecx);
 930
 931        if (!(lapic_timer_reliable_states & (1 << (cstate))))
 932                tick_broadcast_exit();
 933
 934        return index;
 935}
 936
 937/**
 938 * intel_idle_freeze - simplified "enter" callback routine for suspend-to-idle
 939 * @dev: cpuidle_device
 940 * @drv: cpuidle driver
 941 * @index: state index
 942 */
 943static void intel_idle_freeze(struct cpuidle_device *dev,
 944                             struct cpuidle_driver *drv, int index)
 945{
 946        unsigned long ecx = 1; /* break on interrupt flag */
 947        unsigned long eax = flg2MWAIT(drv->states[index].flags);
 948
 949        mwait_idle_with_hints(eax, ecx);
 950}
 951
 952static void __setup_broadcast_timer(bool on)
 953{
 954        if (on)
 955                tick_broadcast_enable();
 956        else
 957                tick_broadcast_disable();
 958}
 959
 960static void auto_demotion_disable(void)
 961{
 962        unsigned long long msr_bits;
 963
 964        rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
 965        msr_bits &= ~(icpu->auto_demotion_disable_flags);
 966        wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
 967}
 968static void c1e_promotion_disable(void)
 969{
 970        unsigned long long msr_bits;
 971
 972        rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
 973        msr_bits &= ~0x2;
 974        wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
 975}
 976
 977static const struct idle_cpu idle_cpu_nehalem = {
 978        .state_table = nehalem_cstates,
 979        .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
 980        .disable_promotion_to_c1e = true,
 981};
 982
 983static const struct idle_cpu idle_cpu_atom = {
 984        .state_table = atom_cstates,
 985};
 986
 987static const struct idle_cpu idle_cpu_tangier = {
 988        .state_table = tangier_cstates,
 989};
 990
 991static const struct idle_cpu idle_cpu_lincroft = {
 992        .state_table = atom_cstates,
 993        .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
 994};
 995
 996static const struct idle_cpu idle_cpu_snb = {
 997        .state_table = snb_cstates,
 998        .disable_promotion_to_c1e = true,
 999};
1000
1001static const struct idle_cpu idle_cpu_byt = {
1002        .state_table = byt_cstates,
1003        .disable_promotion_to_c1e = true,
1004        .byt_auto_demotion_disable_flag = true,
1005};
1006
1007static const struct idle_cpu idle_cpu_cht = {
1008        .state_table = cht_cstates,
1009        .disable_promotion_to_c1e = true,
1010        .byt_auto_demotion_disable_flag = true,
1011};
1012
1013static const struct idle_cpu idle_cpu_ivb = {
1014        .state_table = ivb_cstates,
1015        .disable_promotion_to_c1e = true,
1016};
1017
1018static const struct idle_cpu idle_cpu_ivt = {
1019        .state_table = ivt_cstates,
1020        .disable_promotion_to_c1e = true,
1021};
1022
1023static const struct idle_cpu idle_cpu_hsw = {
1024        .state_table = hsw_cstates,
1025        .disable_promotion_to_c1e = true,
1026};
1027
1028static const struct idle_cpu idle_cpu_bdw = {
1029        .state_table = bdw_cstates,
1030        .disable_promotion_to_c1e = true,
1031};
1032
1033static const struct idle_cpu idle_cpu_skl = {
1034        .state_table = skl_cstates,
1035        .disable_promotion_to_c1e = true,
1036};
1037
1038static const struct idle_cpu idle_cpu_skx = {
1039        .state_table = skx_cstates,
1040        .disable_promotion_to_c1e = true,
1041};
1042
1043static const struct idle_cpu idle_cpu_avn = {
1044        .state_table = avn_cstates,
1045        .disable_promotion_to_c1e = true,
1046};
1047
1048static const struct idle_cpu idle_cpu_knl = {
1049        .state_table = knl_cstates,
1050};
1051
1052static const struct idle_cpu idle_cpu_bxt = {
1053        .state_table = bxt_cstates,
1054        .disable_promotion_to_c1e = true,
1055};
1056
1057static const struct idle_cpu idle_cpu_dnv = {
1058        .state_table = dnv_cstates,
1059        .disable_promotion_to_c1e = true,
1060};
1061
1062#define ICPU(model, cpu) \
1063        { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu }
1064
1065static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1066        ICPU(INTEL_FAM6_NEHALEM_EP,             idle_cpu_nehalem),
1067        ICPU(INTEL_FAM6_NEHALEM,                idle_cpu_nehalem),
1068        ICPU(INTEL_FAM6_NEHALEM_G,              idle_cpu_nehalem),
1069        ICPU(INTEL_FAM6_WESTMERE,               idle_cpu_nehalem),
1070        ICPU(INTEL_FAM6_WESTMERE_EP,            idle_cpu_nehalem),
1071        ICPU(INTEL_FAM6_NEHALEM_EX,             idle_cpu_nehalem),
1072        ICPU(INTEL_FAM6_ATOM_PINEVIEW,          idle_cpu_atom),
1073        ICPU(INTEL_FAM6_ATOM_LINCROFT,          idle_cpu_lincroft),
1074        ICPU(INTEL_FAM6_WESTMERE_EX,            idle_cpu_nehalem),
1075        ICPU(INTEL_FAM6_SANDYBRIDGE,            idle_cpu_snb),
1076        ICPU(INTEL_FAM6_SANDYBRIDGE_X,          idle_cpu_snb),
1077        ICPU(INTEL_FAM6_ATOM_CEDARVIEW,         idle_cpu_atom),
1078        ICPU(INTEL_FAM6_ATOM_SILVERMONT1,       idle_cpu_byt),
1079        ICPU(INTEL_FAM6_ATOM_MERRIFIELD,        idle_cpu_tangier),
1080        ICPU(INTEL_FAM6_ATOM_AIRMONT,           idle_cpu_cht),
1081        ICPU(INTEL_FAM6_IVYBRIDGE,              idle_cpu_ivb),
1082        ICPU(INTEL_FAM6_IVYBRIDGE_X,            idle_cpu_ivt),
1083        ICPU(INTEL_FAM6_HASWELL_CORE,           idle_cpu_hsw),
1084        ICPU(INTEL_FAM6_HASWELL_X,              idle_cpu_hsw),
1085        ICPU(INTEL_FAM6_HASWELL_ULT,            idle_cpu_hsw),
1086        ICPU(INTEL_FAM6_HASWELL_GT3E,           idle_cpu_hsw),
1087        ICPU(INTEL_FAM6_ATOM_SILVERMONT2,       idle_cpu_avn),
1088        ICPU(INTEL_FAM6_BROADWELL_CORE,         idle_cpu_bdw),
1089        ICPU(INTEL_FAM6_BROADWELL_GT3E,         idle_cpu_bdw),
1090        ICPU(INTEL_FAM6_BROADWELL_X,            idle_cpu_bdw),
1091        ICPU(INTEL_FAM6_BROADWELL_XEON_D,       idle_cpu_bdw),
1092        ICPU(INTEL_FAM6_SKYLAKE_MOBILE,         idle_cpu_skl),
1093        ICPU(INTEL_FAM6_SKYLAKE_DESKTOP,        idle_cpu_skl),
1094        ICPU(INTEL_FAM6_KABYLAKE_MOBILE,        idle_cpu_skl),
1095        ICPU(INTEL_FAM6_KABYLAKE_DESKTOP,       idle_cpu_skl),
1096        ICPU(INTEL_FAM6_SKYLAKE_X,              idle_cpu_skx),
1097        ICPU(INTEL_FAM6_XEON_PHI_KNL,           idle_cpu_knl),
1098        ICPU(INTEL_FAM6_XEON_PHI_KNM,           idle_cpu_knl),
1099        ICPU(INTEL_FAM6_ATOM_GOLDMONT,          idle_cpu_bxt),
1100        ICPU(INTEL_FAM6_ATOM_DENVERTON,         idle_cpu_dnv),
1101        {}
1102};
1103
1104/*
1105 * intel_idle_probe()
1106 */
1107static int __init intel_idle_probe(void)
1108{
1109        unsigned int eax, ebx, ecx;
1110        const struct x86_cpu_id *id;
1111
1112        if (max_cstate == 0) {
1113                pr_debug(PREFIX "disabled\n");
1114                return -EPERM;
1115        }
1116
1117        id = x86_match_cpu(intel_idle_ids);
1118        if (!id) {
1119                if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
1120                    boot_cpu_data.x86 == 6)
1121                        pr_debug(PREFIX "does not run on family %d model %d\n",
1122                                boot_cpu_data.x86, boot_cpu_data.x86_model);
1123                return -ENODEV;
1124        }
1125
1126        if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1127                return -ENODEV;
1128
1129        cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1130
1131        if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1132            !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1133            !mwait_substates)
1134                        return -ENODEV;
1135
1136        pr_debug(PREFIX "MWAIT substates: 0x%x\n", mwait_substates);
1137
1138        icpu = (const struct idle_cpu *)id->driver_data;
1139        cpuidle_state_table = icpu->state_table;
1140
1141        pr_debug(PREFIX "v" INTEL_IDLE_VERSION
1142                " model 0x%X\n", boot_cpu_data.x86_model);
1143
1144        return 0;
1145}
1146
1147/*
1148 * intel_idle_cpuidle_devices_uninit()
1149 * Unregisters the cpuidle devices.
1150 */
1151static void intel_idle_cpuidle_devices_uninit(void)
1152{
1153        int i;
1154        struct cpuidle_device *dev;
1155
1156        for_each_online_cpu(i) {
1157                dev = per_cpu_ptr(intel_idle_cpuidle_devices, i);
1158                cpuidle_unregister_device(dev);
1159        }
1160}
1161
1162/*
1163 * ivt_idle_state_table_update(void)
1164 *
1165 * Tune IVT multi-socket targets
1166 * Assumption: num_sockets == (max_package_num + 1)
1167 */
1168static void ivt_idle_state_table_update(void)
1169{
1170        /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1171        int cpu, package_num, num_sockets = 1;
1172
1173        for_each_online_cpu(cpu) {
1174                package_num = topology_physical_package_id(cpu);
1175                if (package_num + 1 > num_sockets) {
1176                        num_sockets = package_num + 1;
1177
1178                        if (num_sockets > 4) {
1179                                cpuidle_state_table = ivt_cstates_8s;
1180                                return;
1181                        }
1182                }
1183        }
1184
1185        if (num_sockets > 2)
1186                cpuidle_state_table = ivt_cstates_4s;
1187
1188        /* else, 1 and 2 socket systems use default ivt_cstates */
1189}
1190
1191/*
1192 * Translate IRTL (Interrupt Response Time Limit) MSR to usec
1193 */
1194
1195static unsigned int irtl_ns_units[] = {
1196        1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
1197
1198static unsigned long long irtl_2_usec(unsigned long long irtl)
1199{
1200        unsigned long long ns;
1201
1202        if (!irtl)
1203                return 0;
1204
1205        ns = irtl_ns_units[(irtl >> 10) & 0x7];
1206
1207        return div64_u64((irtl & 0x3FF) * ns, 1000);
1208}
1209/*
1210 * bxt_idle_state_table_update(void)
1211 *
1212 * On BXT, we trust the IRTL to show the definitive maximum latency
1213 * We use the same value for target_residency.
1214 */
1215static void bxt_idle_state_table_update(void)
1216{
1217        unsigned long long msr;
1218        unsigned int usec;
1219
1220        rdmsrl(MSR_PKGC6_IRTL, msr);
1221        usec = irtl_2_usec(msr);
1222        if (usec) {
1223                bxt_cstates[2].exit_latency = usec;
1224                bxt_cstates[2].target_residency = usec;
1225        }
1226
1227        rdmsrl(MSR_PKGC7_IRTL, msr);
1228        usec = irtl_2_usec(msr);
1229        if (usec) {
1230                bxt_cstates[3].exit_latency = usec;
1231                bxt_cstates[3].target_residency = usec;
1232        }
1233
1234        rdmsrl(MSR_PKGC8_IRTL, msr);
1235        usec = irtl_2_usec(msr);
1236        if (usec) {
1237                bxt_cstates[4].exit_latency = usec;
1238                bxt_cstates[4].target_residency = usec;
1239        }
1240
1241        rdmsrl(MSR_PKGC9_IRTL, msr);
1242        usec = irtl_2_usec(msr);
1243        if (usec) {
1244                bxt_cstates[5].exit_latency = usec;
1245                bxt_cstates[5].target_residency = usec;
1246        }
1247
1248        rdmsrl(MSR_PKGC10_IRTL, msr);
1249        usec = irtl_2_usec(msr);
1250        if (usec) {
1251                bxt_cstates[6].exit_latency = usec;
1252                bxt_cstates[6].target_residency = usec;
1253        }
1254
1255}
1256/*
1257 * sklh_idle_state_table_update(void)
1258 *
1259 * On SKL-H (model 0x5e) disable C8 and C9 if:
1260 * C10 is enabled and SGX disabled
1261 */
1262static void sklh_idle_state_table_update(void)
1263{
1264        unsigned long long msr;
1265        unsigned int eax, ebx, ecx, edx;
1266
1267
1268        /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1269        if (max_cstate <= 7)
1270                return;
1271
1272        /* if PC10 not present in CPUID.MWAIT.EDX */
1273        if ((mwait_substates & (0xF << 28)) == 0)
1274                return;
1275
1276        rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1277
1278        /* PC10 is not enabled in PKG C-state limit */
1279        if ((msr & 0xF) != 8)
1280                return;
1281
1282        ecx = 0;
1283        cpuid(7, &eax, &ebx, &ecx, &edx);
1284
1285        /* if SGX is present */
1286        if (ebx & (1 << 2)) {
1287
1288                rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
1289
1290                /* if SGX is enabled */
1291                if (msr & (1 << 18))
1292                        return;
1293        }
1294
1295        skl_cstates[5].disabled = 1;    /* C8-SKL */
1296        skl_cstates[6].disabled = 1;    /* C9-SKL */
1297}
1298/*
1299 * intel_idle_state_table_update()
1300 *
1301 * Update the default state_table for this CPU-id
1302 */
1303
1304static void intel_idle_state_table_update(void)
1305{
1306        switch (boot_cpu_data.x86_model) {
1307
1308        case INTEL_FAM6_IVYBRIDGE_X:
1309                ivt_idle_state_table_update();
1310                break;
1311        case INTEL_FAM6_ATOM_GOLDMONT:
1312                bxt_idle_state_table_update();
1313                break;
1314        case INTEL_FAM6_SKYLAKE_DESKTOP:
1315                sklh_idle_state_table_update();
1316                break;
1317        }
1318}
1319
1320/*
1321 * intel_idle_cpuidle_driver_init()
1322 * allocate, initialize cpuidle_states
1323 */
1324static void __init intel_idle_cpuidle_driver_init(void)
1325{
1326        int cstate;
1327        struct cpuidle_driver *drv = &intel_idle_driver;
1328
1329        intel_idle_state_table_update();
1330
1331        drv->state_count = 1;
1332
1333        for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1334                int num_substates, mwait_hint, mwait_cstate;
1335
1336                if ((cpuidle_state_table[cstate].enter == NULL) &&
1337                    (cpuidle_state_table[cstate].enter_freeze == NULL))
1338                        break;
1339
1340                if (cstate + 1 > max_cstate) {
1341                        printk(PREFIX "max_cstate %d reached\n",
1342                                max_cstate);
1343                        break;
1344                }
1345
1346                mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1347                mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint);
1348
1349                /* number of sub-states for this state in CPUID.MWAIT */
1350                num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4))
1351                                        & MWAIT_SUBSTATE_MASK;
1352
1353                /* if NO sub-states for this state in CPUID, skip it */
1354                if (num_substates == 0)
1355                        continue;
1356
1357                /* if state marked as disabled, skip it */
1358                if (cpuidle_state_table[cstate].disabled != 0) {
1359                        pr_debug(PREFIX "state %s is disabled",
1360                                cpuidle_state_table[cstate].name);
1361                        continue;
1362                }
1363
1364
1365                if (((mwait_cstate + 1) > 2) &&
1366                        !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1367                        mark_tsc_unstable("TSC halts in idle"
1368                                        " states deeper than C2");
1369
1370                drv->states[drv->state_count] = /* structure copy */
1371                        cpuidle_state_table[cstate];
1372
1373                drv->state_count += 1;
1374        }
1375
1376        if (icpu->byt_auto_demotion_disable_flag) {
1377                wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1378                wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1379        }
1380}
1381
1382
1383/*
1384 * intel_idle_cpu_init()
1385 * allocate, initialize, register cpuidle_devices
1386 * @cpu: cpu/core to initialize
1387 */
1388static int intel_idle_cpu_init(unsigned int cpu)
1389{
1390        struct cpuidle_device *dev;
1391
1392        dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1393        dev->cpu = cpu;
1394
1395        if (cpuidle_register_device(dev)) {
1396                pr_debug(PREFIX "cpuidle_register_device %d failed!\n", cpu);
1397                return -EIO;
1398        }
1399
1400        if (icpu->auto_demotion_disable_flags)
1401                auto_demotion_disable();
1402
1403        if (icpu->disable_promotion_to_c1e)
1404                c1e_promotion_disable();
1405
1406        return 0;
1407}
1408
1409static int intel_idle_cpu_online(unsigned int cpu)
1410{
1411        struct cpuidle_device *dev;
1412
1413        if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE)
1414                __setup_broadcast_timer(true);
1415
1416        /*
1417         * Some systems can hotplug a cpu at runtime after
1418         * the kernel has booted, we have to initialize the
1419         * driver in this case
1420         */
1421        dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1422        if (!dev->registered)
1423                return intel_idle_cpu_init(cpu);
1424
1425        return 0;
1426}
1427
1428static int __init intel_idle_init(void)
1429{
1430        int retval;
1431
1432        /* Do not load intel_idle at all for now if idle= is passed */
1433        if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1434                return -ENODEV;
1435
1436        retval = intel_idle_probe();
1437        if (retval)
1438                return retval;
1439
1440        intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
1441        if (intel_idle_cpuidle_devices == NULL)
1442                return -ENOMEM;
1443
1444        intel_idle_cpuidle_driver_init();
1445        retval = cpuidle_register_driver(&intel_idle_driver);
1446        if (retval) {
1447                struct cpuidle_driver *drv = cpuidle_get_driver();
1448                printk(KERN_DEBUG PREFIX "intel_idle yielding to %s",
1449                        drv ? drv->name : "none");
1450                goto init_driver_fail;
1451        }
1452
1453        if (boot_cpu_has(X86_FEATURE_ARAT))     /* Always Reliable APIC Timer */
1454                lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE;
1455
1456        retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
1457                                   intel_idle_cpu_online, NULL);
1458        if (retval < 0)
1459                goto hp_setup_fail;
1460
1461        pr_debug(PREFIX "lapic_timer_reliable_states 0x%x\n",
1462                lapic_timer_reliable_states);
1463
1464        return 0;
1465
1466hp_setup_fail:
1467        intel_idle_cpuidle_devices_uninit();
1468        cpuidle_unregister_driver(&intel_idle_driver);
1469init_driver_fail:
1470        free_percpu(intel_idle_cpuidle_devices);
1471        return retval;
1472
1473}
1474device_initcall(intel_idle_init);
1475
1476/*
1477 * We are not really modular, but we used to support that.  Meaning we also
1478 * support "intel_idle.max_cstate=..." at boot and also a read-only export of
1479 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
1480 * is the easiest way (currently) to continue doing that.
1481 */
1482module_param(max_cstate, int, 0444);
1483