linux/drivers/acpi/processor_idle.c
<<
>>
Prefs
   1/*
   2 * processor_idle - idle state submodule to the ACPI processor driver
   3 *
   4 *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
   5 *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
   6 *  Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de>
   7 *  Copyright (C) 2004  Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
   8 *                      - Added processor hotplug support
   9 *  Copyright (C) 2005  Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
  10 *                      - Added support for C3 on SMP
  11 *
  12 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  13 *
  14 *  This program is free software; you can redistribute it and/or modify
  15 *  it under the terms of the GNU General Public License as published by
  16 *  the Free Software Foundation; either version 2 of the License, or (at
  17 *  your option) any later version.
  18 *
  19 *  This program is distributed in the hope that it will be useful, but
  20 *  WITHOUT ANY WARRANTY; without even the implied warranty of
  21 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22 *  General Public License for more details.
  23 *
  24 *  You should have received a copy of the GNU General Public License along
  25 *  with this program; if not, write to the Free Software Foundation, Inc.,
  26 *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
  27 *
  28 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  29 */
  30
  31#include <linux/kernel.h>
  32#include <linux/module.h>
  33#include <linux/init.h>
  34#include <linux/cpufreq.h>
  35#include <linux/proc_fs.h>
  36#include <linux/seq_file.h>
  37#include <linux/acpi.h>
  38#include <linux/dmi.h>
  39#include <linux/moduleparam.h>
  40#include <linux/sched.h>        /* need_resched() */
  41#include <linux/pm_qos_params.h>
  42#include <linux/clockchips.h>
  43#include <linux/cpuidle.h>
  44#include <linux/irqflags.h>
  45
  46/*
  47 * Include the apic definitions for x86 to have the APIC timer related defines
  48 * available also for UP (on SMP it gets magically included via linux/smp.h).
  49 * asm/acpi.h is not an option, as it would require more include magic. Also
  50 * creating an empty asm-ia64/apic.h would just trade pest vs. cholera.
  51 */
  52#ifdef CONFIG_X86
  53#include <asm/apic.h>
  54#endif
  55
  56#include <asm/io.h>
  57#include <asm/uaccess.h>
  58
  59#include <acpi/acpi_bus.h>
  60#include <acpi/processor.h>
  61#include <asm/processor.h>
  62
  63#define PREFIX "ACPI: "
  64
  65#define ACPI_PROCESSOR_CLASS            "processor"
  66#define _COMPONENT              ACPI_PROCESSOR_COMPONENT
  67ACPI_MODULE_NAME("processor_idle");
  68#define ACPI_PROCESSOR_FILE_POWER       "power"
  69#define PM_TIMER_TICK_NS                (1000000000ULL/PM_TIMER_FREQUENCY)
  70#define C2_OVERHEAD                     1       /* 1us */
  71#define C3_OVERHEAD                     1       /* 1us */
  72#define PM_TIMER_TICKS_TO_US(p)         (((p) * 1000)/(PM_TIMER_FREQUENCY/1000))
  73
  74static unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER;
  75module_param(max_cstate, uint, 0000);
  76static unsigned int nocst __read_mostly;
  77module_param(nocst, uint, 0000);
  78
  79static unsigned int latency_factor __read_mostly = 2;
  80module_param(latency_factor, uint, 0644);
  81
  82static s64 us_to_pm_timer_ticks(s64 t)
  83{
  84        return div64_u64(t * PM_TIMER_FREQUENCY, 1000000);
  85}
  86/*
  87 * IBM ThinkPad R40e crashes mysteriously when going into C2 or C3.
  88 * For now disable this. Probably a bug somewhere else.
  89 *
  90 * To skip this limit, boot/load with a large max_cstate limit.
  91 */
  92static int set_max_cstate(const struct dmi_system_id *id)
  93{
  94        if (max_cstate > ACPI_PROCESSOR_MAX_POWER)
  95                return 0;
  96
  97        printk(KERN_NOTICE PREFIX "%s detected - limiting to C%ld max_cstate."
  98               " Override with \"processor.max_cstate=%d\"\n", id->ident,
  99               (long)id->driver_data, ACPI_PROCESSOR_MAX_POWER + 1);
 100
 101        max_cstate = (long)id->driver_data;
 102
 103        return 0;
 104}
 105
 106/* Actually this shouldn't be __cpuinitdata, would be better to fix the
 107   callers to only run once -AK */
 108static struct dmi_system_id __cpuinitdata processor_power_dmi_table[] = {
 109        { set_max_cstate, "Clevo 5600D", {
 110          DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
 111          DMI_MATCH(DMI_BIOS_VERSION,"SHE845M0.86C.0013.D.0302131307")},
 112         (void *)2},
 113        {},
 114};
 115
 116
 117/*
 118 * Callers should disable interrupts before the call and enable
 119 * interrupts after return.
 120 */
 121static void acpi_safe_halt(void)
 122{
 123        current_thread_info()->status &= ~TS_POLLING;
 124        /*
 125         * TS_POLLING-cleared state must be visible before we
 126         * test NEED_RESCHED:
 127         */
 128        smp_mb();
 129        if (!need_resched()) {
 130                safe_halt();
 131                local_irq_disable();
 132        }
 133        current_thread_info()->status |= TS_POLLING;
 134}
 135
 136#ifdef ARCH_APICTIMER_STOPS_ON_C3
 137
 138/*
 139 * Some BIOS implementations switch to C3 in the published C2 state.
 140 * This seems to be a common problem on AMD boxen, but other vendors
 141 * are affected too. We pick the most conservative approach: we assume
 142 * that the local APIC stops in both C2 and C3.
 143 */
 144static void lapic_timer_check_state(int state, struct acpi_processor *pr,
 145                                   struct acpi_processor_cx *cx)
 146{
 147        struct acpi_processor_power *pwr = &pr->power;
 148        u8 type = local_apic_timer_c2_ok ? ACPI_STATE_C3 : ACPI_STATE_C2;
 149
 150        if (cpu_has(&cpu_data(pr->id), X86_FEATURE_ARAT))
 151                return;
 152
 153        if (boot_cpu_has(X86_FEATURE_AMDC1E))
 154                type = ACPI_STATE_C1;
 155
 156        /*
 157         * Check, if one of the previous states already marked the lapic
 158         * unstable
 159         */
 160        if (pwr->timer_broadcast_on_state < state)
 161                return;
 162
 163        if (cx->type >= type)
 164                pr->power.timer_broadcast_on_state = state;
 165}
 166
 167static void lapic_timer_propagate_broadcast(void *arg)
 168{
 169        struct acpi_processor *pr = (struct acpi_processor *) arg;
 170        unsigned long reason;
 171
 172        reason = pr->power.timer_broadcast_on_state < INT_MAX ?
 173                CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF;
 174
 175        clockevents_notify(reason, &pr->id);
 176}
 177
 178/* Power(C) State timer broadcast control */
 179static void lapic_timer_state_broadcast(struct acpi_processor *pr,
 180                                       struct acpi_processor_cx *cx,
 181                                       int broadcast)
 182{
 183        int state = cx - pr->power.states;
 184
 185        if (state >= pr->power.timer_broadcast_on_state) {
 186                unsigned long reason;
 187
 188                reason = broadcast ?  CLOCK_EVT_NOTIFY_BROADCAST_ENTER :
 189                        CLOCK_EVT_NOTIFY_BROADCAST_EXIT;
 190                clockevents_notify(reason, &pr->id);
 191        }
 192}
 193
 194#else
 195
 196static void lapic_timer_check_state(int state, struct acpi_processor *pr,
 197                                   struct acpi_processor_cx *cstate) { }
 198static void lapic_timer_propagate_broadcast(struct acpi_processor *pr) { }
 199static void lapic_timer_state_broadcast(struct acpi_processor *pr,
 200                                       struct acpi_processor_cx *cx,
 201                                       int broadcast)
 202{
 203}
 204
 205#endif
 206
 207/*
 208 * Suspend / resume control
 209 */
 210static int acpi_idle_suspend;
 211static u32 saved_bm_rld;
 212
 213static void acpi_idle_bm_rld_save(void)
 214{
 215        acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &saved_bm_rld);
 216}
 217static void acpi_idle_bm_rld_restore(void)
 218{
 219        u32 resumed_bm_rld;
 220
 221        acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &resumed_bm_rld);
 222
 223        if (resumed_bm_rld != saved_bm_rld)
 224                acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, saved_bm_rld);
 225}
 226
 227int acpi_processor_suspend(struct acpi_device * device, pm_message_t state)
 228{
 229        if (acpi_idle_suspend == 1)
 230                return 0;
 231
 232        acpi_idle_bm_rld_save();
 233        acpi_idle_suspend = 1;
 234        return 0;
 235}
 236
 237int acpi_processor_resume(struct acpi_device * device)
 238{
 239        if (acpi_idle_suspend == 0)
 240                return 0;
 241
 242        acpi_idle_bm_rld_restore();
 243        acpi_idle_suspend = 0;
 244        return 0;
 245}
 246
 247#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86)
 248static void tsc_check_state(int state)
 249{
 250        switch (boot_cpu_data.x86_vendor) {
 251        case X86_VENDOR_AMD:
 252        case X86_VENDOR_INTEL:
 253                /*
 254                 * AMD Fam10h TSC will tick in all
 255                 * C/P/S0/S1 states when this bit is set.
 256                 */
 257                if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
 258                        return;
 259
 260                /*FALL THROUGH*/
 261        default:
 262                /* TSC could halt in idle, so notify users */
 263                if (state > ACPI_STATE_C1)
 264                        mark_tsc_unstable("TSC halts in idle");
 265        }
 266}
 267#else
 268static void tsc_check_state(int state) { return; }
 269#endif
 270
 271static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr)
 272{
 273
 274        if (!pr)
 275                return -EINVAL;
 276
 277        if (!pr->pblk)
 278                return -ENODEV;
 279
 280        /* if info is obtained from pblk/fadt, type equals state */
 281        pr->power.states[ACPI_STATE_C2].type = ACPI_STATE_C2;
 282        pr->power.states[ACPI_STATE_C3].type = ACPI_STATE_C3;
 283
 284#ifndef CONFIG_HOTPLUG_CPU
 285        /*
 286         * Check for P_LVL2_UP flag before entering C2 and above on
 287         * an SMP system.
 288         */
 289        if ((num_online_cpus() > 1) &&
 290            !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
 291                return -ENODEV;
 292#endif
 293
 294        /* determine C2 and C3 address from pblk */
 295        pr->power.states[ACPI_STATE_C2].address = pr->pblk + 4;
 296        pr->power.states[ACPI_STATE_C3].address = pr->pblk + 5;
 297
 298        /* determine latencies from FADT */
 299        pr->power.states[ACPI_STATE_C2].latency = acpi_gbl_FADT.C2latency;
 300        pr->power.states[ACPI_STATE_C3].latency = acpi_gbl_FADT.C3latency;
 301
 302        ACPI_DEBUG_PRINT((ACPI_DB_INFO,
 303                          "lvl2[0x%08x] lvl3[0x%08x]\n",
 304                          pr->power.states[ACPI_STATE_C2].address,
 305                          pr->power.states[ACPI_STATE_C3].address));
 306
 307        return 0;
 308}
 309
 310static int acpi_processor_get_power_info_default(struct acpi_processor *pr)
 311{
 312        if (!pr->power.states[ACPI_STATE_C1].valid) {
 313                /* set the first C-State to C1 */
 314                /* all processors need to support C1 */
 315                pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1;
 316                pr->power.states[ACPI_STATE_C1].valid = 1;
 317                pr->power.states[ACPI_STATE_C1].entry_method = ACPI_CSTATE_HALT;
 318        }
 319        /* the C0 state only exists as a filler in our array */
 320        pr->power.states[ACPI_STATE_C0].valid = 1;
 321        return 0;
 322}
 323
 324static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
 325{
 326        acpi_status status = 0;
 327        acpi_integer count;
 328        int current_count;
 329        int i;
 330        struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 331        union acpi_object *cst;
 332
 333
 334        if (nocst)
 335                return -ENODEV;
 336
 337        current_count = 0;
 338
 339        status = acpi_evaluate_object(pr->handle, "_CST", NULL, &buffer);
 340        if (ACPI_FAILURE(status)) {
 341                ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No _CST, giving up\n"));
 342                return -ENODEV;
 343        }
 344
 345        cst = buffer.pointer;
 346
 347        /* There must be at least 2 elements */
 348        if (!cst || (cst->type != ACPI_TYPE_PACKAGE) || cst->package.count < 2) {
 349                printk(KERN_ERR PREFIX "not enough elements in _CST\n");
 350                status = -EFAULT;
 351                goto end;
 352        }
 353
 354        count = cst->package.elements[0].integer.value;
 355
 356        /* Validate number of power states. */
 357        if (count < 1 || count != cst->package.count - 1) {
 358                printk(KERN_ERR PREFIX "count given by _CST is not valid\n");
 359                status = -EFAULT;
 360                goto end;
 361        }
 362
 363        /* Tell driver that at least _CST is supported. */
 364        pr->flags.has_cst = 1;
 365
 366        for (i = 1; i <= count; i++) {
 367                union acpi_object *element;
 368                union acpi_object *obj;
 369                struct acpi_power_register *reg;
 370                struct acpi_processor_cx cx;
 371
 372                memset(&cx, 0, sizeof(cx));
 373
 374                element = &(cst->package.elements[i]);
 375                if (element->type != ACPI_TYPE_PACKAGE)
 376                        continue;
 377
 378                if (element->package.count != 4)
 379                        continue;
 380
 381                obj = &(element->package.elements[0]);
 382
 383                if (obj->type != ACPI_TYPE_BUFFER)
 384                        continue;
 385
 386                reg = (struct acpi_power_register *)obj->buffer.pointer;
 387
 388                if (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO &&
 389                    (reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE))
 390                        continue;
 391
 392                /* There should be an easy way to extract an integer... */
 393                obj = &(element->package.elements[1]);
 394                if (obj->type != ACPI_TYPE_INTEGER)
 395                        continue;
 396
 397                cx.type = obj->integer.value;
 398                /*
 399                 * Some buggy BIOSes won't list C1 in _CST -
 400                 * Let acpi_processor_get_power_info_default() handle them later
 401                 */
 402                if (i == 1 && cx.type != ACPI_STATE_C1)
 403                        current_count++;
 404
 405                cx.address = reg->address;
 406                cx.index = current_count + 1;
 407
 408                cx.entry_method = ACPI_CSTATE_SYSTEMIO;
 409                if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) {
 410                        if (acpi_processor_ffh_cstate_probe
 411                                        (pr->id, &cx, reg) == 0) {
 412                                cx.entry_method = ACPI_CSTATE_FFH;
 413                        } else if (cx.type == ACPI_STATE_C1) {
 414                                /*
 415                                 * C1 is a special case where FIXED_HARDWARE
 416                                 * can be handled in non-MWAIT way as well.
 417                                 * In that case, save this _CST entry info.
 418                                 * Otherwise, ignore this info and continue.
 419                                 */
 420                                cx.entry_method = ACPI_CSTATE_HALT;
 421                                snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
 422                        } else {
 423                                continue;
 424                        }
 425                        if (cx.type == ACPI_STATE_C1 &&
 426                                        (idle_halt || idle_nomwait)) {
 427                                /*
 428                                 * In most cases the C1 space_id obtained from
 429                                 * _CST object is FIXED_HARDWARE access mode.
 430                                 * But when the option of idle=halt is added,
 431                                 * the entry_method type should be changed from
 432                                 * CSTATE_FFH to CSTATE_HALT.
 433                                 * When the option of idle=nomwait is added,
 434                                 * the C1 entry_method type should be
 435                                 * CSTATE_HALT.
 436                                 */
 437                                cx.entry_method = ACPI_CSTATE_HALT;
 438                                snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
 439                        }
 440                } else {
 441                        snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI IOPORT 0x%x",
 442                                 cx.address);
 443                }
 444
 445                if (cx.type == ACPI_STATE_C1) {
 446                        cx.valid = 1;
 447                }
 448
 449                obj = &(element->package.elements[2]);
 450                if (obj->type != ACPI_TYPE_INTEGER)
 451                        continue;
 452
 453                cx.latency = obj->integer.value;
 454
 455                obj = &(element->package.elements[3]);
 456                if (obj->type != ACPI_TYPE_INTEGER)
 457                        continue;
 458
 459                cx.power = obj->integer.value;
 460
 461                current_count++;
 462                memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx));
 463
 464                /*
 465                 * We support total ACPI_PROCESSOR_MAX_POWER - 1
 466                 * (From 1 through ACPI_PROCESSOR_MAX_POWER - 1)
 467                 */
 468                if (current_count >= (ACPI_PROCESSOR_MAX_POWER - 1)) {
 469                        printk(KERN_WARNING
 470                               "Limiting number of power states to max (%d)\n",
 471                               ACPI_PROCESSOR_MAX_POWER);
 472                        printk(KERN_WARNING
 473                               "Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n");
 474                        break;
 475                }
 476        }
 477
 478        ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %d power states\n",
 479                          current_count));
 480
 481        /* Validate number of power states discovered */
 482        if (current_count < 2)
 483                status = -EFAULT;
 484
 485      end:
 486        kfree(buffer.pointer);
 487
 488        return status;
 489}
 490
 491static void acpi_processor_power_verify_c2(struct acpi_processor_cx *cx)
 492{
 493
 494        if (!cx->address)
 495                return;
 496
 497        /*
 498         * C2 latency must be less than or equal to 100
 499         * microseconds.
 500         */
 501        else if (cx->latency > ACPI_PROCESSOR_MAX_C2_LATENCY) {
 502                ACPI_DEBUG_PRINT((ACPI_DB_INFO,
 503                                  "latency too large [%d]\n", cx->latency));
 504                return;
 505        }
 506
 507        /*
 508         * Otherwise we've met all of our C2 requirements.
 509         * Normalize the C2 latency to expidite policy
 510         */
 511        cx->valid = 1;
 512
 513        cx->latency_ticks = cx->latency;
 514
 515        return;
 516}
 517
 518static void acpi_processor_power_verify_c3(struct acpi_processor *pr,
 519                                           struct acpi_processor_cx *cx)
 520{
 521        static int bm_check_flag = -1;
 522        static int bm_control_flag = -1;
 523
 524
 525        if (!cx->address)
 526                return;
 527
 528        /*
 529         * C3 latency must be less than or equal to 1000
 530         * microseconds.
 531         */
 532        else if (cx->latency > ACPI_PROCESSOR_MAX_C3_LATENCY) {
 533                ACPI_DEBUG_PRINT((ACPI_DB_INFO,
 534                                  "latency too large [%d]\n", cx->latency));
 535                return;
 536        }
 537
 538        /*
 539         * PIIX4 Erratum #18: We don't support C3 when Type-F (fast)
 540         * DMA transfers are used by any ISA device to avoid livelock.
 541         * Note that we could disable Type-F DMA (as recommended by
 542         * the erratum), but this is known to disrupt certain ISA
 543         * devices thus we take the conservative approach.
 544         */
 545        else if (errata.piix4.fdma) {
 546                ACPI_DEBUG_PRINT((ACPI_DB_INFO,
 547                                  "C3 not supported on PIIX4 with Type-F DMA\n"));
 548                return;
 549        }
 550
 551        /* All the logic here assumes flags.bm_check is same across all CPUs */
 552        if (bm_check_flag == -1) {
 553                /* Determine whether bm_check is needed based on CPU  */
 554                acpi_processor_power_init_bm_check(&(pr->flags), pr->id);
 555                bm_check_flag = pr->flags.bm_check;
 556                bm_control_flag = pr->flags.bm_control;
 557        } else {
 558                pr->flags.bm_check = bm_check_flag;
 559                pr->flags.bm_control = bm_control_flag;
 560        }
 561
 562        if (pr->flags.bm_check) {
 563                if (!pr->flags.bm_control) {
 564                        if (pr->flags.has_cst != 1) {
 565                                /* bus mastering control is necessary */
 566                                ACPI_DEBUG_PRINT((ACPI_DB_INFO,
 567                                        "C3 support requires BM control\n"));
 568                                return;
 569                        } else {
 570                                /* Here we enter C3 without bus mastering */
 571                                ACPI_DEBUG_PRINT((ACPI_DB_INFO,
 572                                        "C3 support without BM control\n"));
 573                        }
 574                }
 575        } else {
 576                /*
 577                 * WBINVD should be set in fadt, for C3 state to be
 578                 * supported on when bm_check is not required.
 579                 */
 580                if (!(acpi_gbl_FADT.flags & ACPI_FADT_WBINVD)) {
 581                        ACPI_DEBUG_PRINT((ACPI_DB_INFO,
 582                                          "Cache invalidation should work properly"
 583                                          " for C3 to be enabled on SMP systems\n"));
 584                        return;
 585                }
 586        }
 587
 588        /*
 589         * Otherwise we've met all of our C3 requirements.
 590         * Normalize the C3 latency to expidite policy.  Enable
 591         * checking of bus mastering status (bm_check) so we can
 592         * use this in our C3 policy
 593         */
 594        cx->valid = 1;
 595
 596        cx->latency_ticks = cx->latency;
 597        /*
 598         * On older chipsets, BM_RLD needs to be set
 599         * in order for Bus Master activity to wake the
 600         * system from C3.  Newer chipsets handle DMA
 601         * during C3 automatically and BM_RLD is a NOP.
 602         * In either case, the proper way to
 603         * handle BM_RLD is to set it and leave it set.
 604         */
 605        acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
 606
 607        return;
 608}
 609
 610static int acpi_processor_power_verify(struct acpi_processor *pr)
 611{
 612        unsigned int i;
 613        unsigned int working = 0;
 614
 615        pr->power.timer_broadcast_on_state = INT_MAX;
 616
 617        for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) {
 618                struct acpi_processor_cx *cx = &pr->power.states[i];
 619
 620                switch (cx->type) {
 621                case ACPI_STATE_C1:
 622                        cx->valid = 1;
 623                        break;
 624
 625                case ACPI_STATE_C2:
 626                        acpi_processor_power_verify_c2(cx);
 627                        break;
 628
 629                case ACPI_STATE_C3:
 630                        acpi_processor_power_verify_c3(pr, cx);
 631                        break;
 632                }
 633                if (!cx->valid)
 634                        continue;
 635
 636                lapic_timer_check_state(i, pr, cx);
 637                tsc_check_state(cx->type);
 638                working++;
 639        }
 640
 641        smp_call_function_single(pr->id, lapic_timer_propagate_broadcast,
 642                                 pr, 1);
 643
 644        return (working);
 645}
 646
 647static int acpi_processor_get_power_info(struct acpi_processor *pr)
 648{
 649        unsigned int i;
 650        int result;
 651
 652
 653        /* NOTE: the idle thread may not be running while calling
 654         * this function */
 655
 656        /* Zero initialize all the C-states info. */
 657        memset(pr->power.states, 0, sizeof(pr->power.states));
 658
 659        result = acpi_processor_get_power_info_cst(pr);
 660        if (result == -ENODEV)
 661                result = acpi_processor_get_power_info_fadt(pr);
 662
 663        if (result)
 664                return result;
 665
 666        acpi_processor_get_power_info_default(pr);
 667
 668        pr->power.count = acpi_processor_power_verify(pr);
 669
 670        /*
 671         * if one state of type C2 or C3 is available, mark this
 672         * CPU as being "idle manageable"
 673         */
 674        for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
 675                if (pr->power.states[i].valid) {
 676                        pr->power.count = i;
 677                        if (pr->power.states[i].type >= ACPI_STATE_C2)
 678                                pr->flags.power = 1;
 679                }
 680        }
 681
 682        return 0;
 683}
 684
 685#ifdef CONFIG_ACPI_PROCFS
 686static int acpi_processor_power_seq_show(struct seq_file *seq, void *offset)
 687{
 688        struct acpi_processor *pr = seq->private;
 689        unsigned int i;
 690
 691
 692        if (!pr)
 693                goto end;
 694
 695        seq_printf(seq, "active state:            C%zd\n"
 696                   "max_cstate:              C%d\n"
 697                   "maximum allowed latency: %d usec\n",
 698                   pr->power.state ? pr->power.state - pr->power.states : 0,
 699                   max_cstate, pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY));
 700
 701        seq_puts(seq, "states:\n");
 702
 703        for (i = 1; i <= pr->power.count; i++) {
 704                seq_printf(seq, "   %cC%d:                  ",
 705                           (&pr->power.states[i] ==
 706                            pr->power.state ? '*' : ' '), i);
 707
 708                if (!pr->power.states[i].valid) {
 709                        seq_puts(seq, "<not supported>\n");
 710                        continue;
 711                }
 712
 713                switch (pr->power.states[i].type) {
 714                case ACPI_STATE_C1:
 715                        seq_printf(seq, "type[C1] ");
 716                        break;
 717                case ACPI_STATE_C2:
 718                        seq_printf(seq, "type[C2] ");
 719                        break;
 720                case ACPI_STATE_C3:
 721                        seq_printf(seq, "type[C3] ");
 722                        break;
 723                default:
 724                        seq_printf(seq, "type[--] ");
 725                        break;
 726                }
 727
 728                if (pr->power.states[i].promotion.state)
 729                        seq_printf(seq, "promotion[C%zd] ",
 730                                   (pr->power.states[i].promotion.state -
 731                                    pr->power.states));
 732                else
 733                        seq_puts(seq, "promotion[--] ");
 734
 735                if (pr->power.states[i].demotion.state)
 736                        seq_printf(seq, "demotion[C%zd] ",
 737                                   (pr->power.states[i].demotion.state -
 738                                    pr->power.states));
 739                else
 740                        seq_puts(seq, "demotion[--] ");
 741
 742                seq_printf(seq, "latency[%03d] usage[%08d] duration[%020llu]\n",
 743                           pr->power.states[i].latency,
 744                           pr->power.states[i].usage,
 745                           (unsigned long long)pr->power.states[i].time);
 746        }
 747
 748      end:
 749        return 0;
 750}
 751
 752static int acpi_processor_power_open_fs(struct inode *inode, struct file *file)
 753{
 754        return single_open(file, acpi_processor_power_seq_show,
 755                           PDE(inode)->data);
 756}
 757
 758static const struct file_operations acpi_processor_power_fops = {
 759        .owner = THIS_MODULE,
 760        .open = acpi_processor_power_open_fs,
 761        .read = seq_read,
 762        .llseek = seq_lseek,
 763        .release = single_release,
 764};
 765#endif
 766
 767/**
 768 * acpi_idle_bm_check - checks if bus master activity was detected
 769 */
 770static int acpi_idle_bm_check(void)
 771{
 772        u32 bm_status = 0;
 773
 774        acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
 775        if (bm_status)
 776                acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
 777        /*
 778         * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
 779         * the true state of bus mastering activity; forcing us to
 780         * manually check the BMIDEA bit of each IDE channel.
 781         */
 782        else if (errata.piix4.bmisx) {
 783                if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01)
 784                    || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01))
 785                        bm_status = 1;
 786        }
 787        return bm_status;
 788}
 789
 790/**
 791 * acpi_idle_do_entry - a helper function that does C2 and C3 type entry
 792 * @cx: cstate data
 793 *
 794 * Caller disables interrupt before call and enables interrupt after return.
 795 */
 796static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
 797{
 798        /* Don't trace irqs off for idle */
 799        stop_critical_timings();
 800        if (cx->entry_method == ACPI_CSTATE_FFH) {
 801                /* Call into architectural FFH based C-state */
 802                acpi_processor_ffh_cstate_enter(cx);
 803        } else if (cx->entry_method == ACPI_CSTATE_HALT) {
 804                acpi_safe_halt();
 805        } else {
 806                int unused;
 807                /* IO port based C-state */
 808                inb(cx->address);
 809                /* Dummy wait op - must do something useless after P_LVL2 read
 810                   because chipsets cannot guarantee that STPCLK# signal
 811                   gets asserted in time to freeze execution properly. */
 812                unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
 813        }
 814        start_critical_timings();
 815}
 816
 817/**
 818 * acpi_idle_enter_c1 - enters an ACPI C1 state-type
 819 * @dev: the target CPU
 820 * @state: the state data
 821 *
 822 * This is equivalent to the HALT instruction.
 823 */
 824static int acpi_idle_enter_c1(struct cpuidle_device *dev,
 825                              struct cpuidle_state *state)
 826{
 827        ktime_t  kt1, kt2;
 828        s64 idle_time;
 829        struct acpi_processor *pr;
 830        struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
 831
 832        pr = __get_cpu_var(processors);
 833
 834        if (unlikely(!pr))
 835                return 0;
 836
 837        local_irq_disable();
 838
 839        /* Do not access any ACPI IO ports in suspend path */
 840        if (acpi_idle_suspend) {
 841                local_irq_enable();
 842                cpu_relax();
 843                return 0;
 844        }
 845
 846        lapic_timer_state_broadcast(pr, cx, 1);
 847        kt1 = ktime_get_real();
 848        acpi_idle_do_entry(cx);
 849        kt2 = ktime_get_real();
 850        idle_time =  ktime_to_us(ktime_sub(kt2, kt1));
 851
 852        local_irq_enable();
 853        cx->usage++;
 854        lapic_timer_state_broadcast(pr, cx, 0);
 855
 856        return idle_time;
 857}
 858
 859/**
 860 * acpi_idle_enter_simple - enters an ACPI state without BM handling
 861 * @dev: the target CPU
 862 * @state: the state data
 863 */
 864static int acpi_idle_enter_simple(struct cpuidle_device *dev,
 865                                  struct cpuidle_state *state)
 866{
 867        struct acpi_processor *pr;
 868        struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
 869        ktime_t  kt1, kt2;
 870        s64 idle_time;
 871        s64 sleep_ticks = 0;
 872
 873        pr = __get_cpu_var(processors);
 874
 875        if (unlikely(!pr))
 876                return 0;
 877
 878        if (acpi_idle_suspend)
 879                return(acpi_idle_enter_c1(dev, state));
 880
 881        local_irq_disable();
 882        current_thread_info()->status &= ~TS_POLLING;
 883        /*
 884         * TS_POLLING-cleared state must be visible before we test
 885         * NEED_RESCHED:
 886         */
 887        smp_mb();
 888
 889        if (unlikely(need_resched())) {
 890                current_thread_info()->status |= TS_POLLING;
 891                local_irq_enable();
 892                return 0;
 893        }
 894
 895        /*
 896         * Must be done before busmaster disable as we might need to
 897         * access HPET !
 898         */
 899        lapic_timer_state_broadcast(pr, cx, 1);
 900
 901        if (cx->type == ACPI_STATE_C3)
 902                ACPI_FLUSH_CPU_CACHE();
 903
 904        kt1 = ktime_get_real();
 905        /* Tell the scheduler that we are going deep-idle: */
 906        sched_clock_idle_sleep_event();
 907        acpi_idle_do_entry(cx);
 908        kt2 = ktime_get_real();
 909        idle_time =  ktime_to_us(ktime_sub(kt2, kt1));
 910
 911        sleep_ticks = us_to_pm_timer_ticks(idle_time);
 912
 913        /* Tell the scheduler how much we idled: */
 914        sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
 915
 916        local_irq_enable();
 917        current_thread_info()->status |= TS_POLLING;
 918
 919        cx->usage++;
 920
 921        lapic_timer_state_broadcast(pr, cx, 0);
 922        cx->time += sleep_ticks;
 923        return idle_time;
 924}
 925
 926static int c3_cpu_count;
 927static DEFINE_SPINLOCK(c3_lock);
 928
 929/**
 930 * acpi_idle_enter_bm - enters C3 with proper BM handling
 931 * @dev: the target CPU
 932 * @state: the state data
 933 *
 934 * If BM is detected, the deepest non-C3 idle state is entered instead.
 935 */
 936static int acpi_idle_enter_bm(struct cpuidle_device *dev,
 937                              struct cpuidle_state *state)
 938{
 939        struct acpi_processor *pr;
 940        struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
 941        ktime_t  kt1, kt2;
 942        s64 idle_time;
 943        s64 sleep_ticks = 0;
 944
 945
 946        pr = __get_cpu_var(processors);
 947
 948        if (unlikely(!pr))
 949                return 0;
 950
 951        if (acpi_idle_suspend)
 952                return(acpi_idle_enter_c1(dev, state));
 953
 954        if (acpi_idle_bm_check()) {
 955                if (dev->safe_state) {
 956                        dev->last_state = dev->safe_state;
 957                        return dev->safe_state->enter(dev, dev->safe_state);
 958                } else {
 959                        local_irq_disable();
 960                        acpi_safe_halt();
 961                        local_irq_enable();
 962                        return 0;
 963                }
 964        }
 965
 966        local_irq_disable();
 967        current_thread_info()->status &= ~TS_POLLING;
 968        /*
 969         * TS_POLLING-cleared state must be visible before we test
 970         * NEED_RESCHED:
 971         */
 972        smp_mb();
 973
 974        if (unlikely(need_resched())) {
 975                current_thread_info()->status |= TS_POLLING;
 976                local_irq_enable();
 977                return 0;
 978        }
 979
 980        acpi_unlazy_tlb(smp_processor_id());
 981
 982        /* Tell the scheduler that we are going deep-idle: */
 983        sched_clock_idle_sleep_event();
 984        /*
 985         * Must be done before busmaster disable as we might need to
 986         * access HPET !
 987         */
 988        lapic_timer_state_broadcast(pr, cx, 1);
 989
 990        kt1 = ktime_get_real();
 991        /*
 992         * disable bus master
 993         * bm_check implies we need ARB_DIS
 994         * !bm_check implies we need cache flush
 995         * bm_control implies whether we can do ARB_DIS
 996         *
 997         * That leaves a case where bm_check is set and bm_control is
 998         * not set. In that case we cannot do much, we enter C3
 999         * without doing anything.
1000         */
1001        if (pr->flags.bm_check && pr->flags.bm_control) {
1002                spin_lock(&c3_lock);
1003                c3_cpu_count++;
1004                /* Disable bus master arbitration when all CPUs are in C3 */
1005                if (c3_cpu_count == num_online_cpus())
1006                        acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 1);
1007                spin_unlock(&c3_lock);
1008        } else if (!pr->flags.bm_check) {
1009                ACPI_FLUSH_CPU_CACHE();
1010        }
1011
1012        acpi_idle_do_entry(cx);
1013
1014        /* Re-enable bus master arbitration */
1015        if (pr->flags.bm_check && pr->flags.bm_control) {
1016                spin_lock(&c3_lock);
1017                acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 0);
1018                c3_cpu_count--;
1019                spin_unlock(&c3_lock);
1020        }
1021        kt2 = ktime_get_real();
1022        idle_time =  ktime_to_us(ktime_sub(kt2, kt1));
1023
1024        sleep_ticks = us_to_pm_timer_ticks(idle_time);
1025        /* Tell the scheduler how much we idled: */
1026        sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
1027
1028        local_irq_enable();
1029        current_thread_info()->status |= TS_POLLING;
1030
1031        cx->usage++;
1032
1033        lapic_timer_state_broadcast(pr, cx, 0);
1034        cx->time += sleep_ticks;
1035        return idle_time;
1036}
1037
1038struct cpuidle_driver acpi_idle_driver = {
1039        .name =         "acpi_idle",
1040        .owner =        THIS_MODULE,
1041};
1042
1043/**
1044 * acpi_processor_setup_cpuidle - prepares and configures CPUIDLE
1045 * @pr: the ACPI processor
1046 */
1047static int acpi_processor_setup_cpuidle(struct acpi_processor *pr)
1048{
1049        int i, count = CPUIDLE_DRIVER_STATE_START;
1050        struct acpi_processor_cx *cx;
1051        struct cpuidle_state *state;
1052        struct cpuidle_device *dev = &pr->power.dev;
1053
1054        if (!pr->flags.power_setup_done)
1055                return -EINVAL;
1056
1057        if (pr->flags.power == 0) {
1058                return -EINVAL;
1059        }
1060
1061        dev->cpu = pr->id;
1062        for (i = 0; i < CPUIDLE_STATE_MAX; i++) {
1063                dev->states[i].name[0] = '\0';
1064                dev->states[i].desc[0] = '\0';
1065        }
1066
1067        if (max_cstate == 0)
1068                max_cstate = 1;
1069
1070        for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) {
1071                cx = &pr->power.states[i];
1072                state = &dev->states[count];
1073
1074                if (!cx->valid)
1075                        continue;
1076
1077#ifdef CONFIG_HOTPLUG_CPU
1078                if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) &&
1079                    !pr->flags.has_cst &&
1080                    !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
1081                        continue;
1082#endif
1083                cpuidle_set_statedata(state, cx);
1084
1085                snprintf(state->name, CPUIDLE_NAME_LEN, "C%d", i);
1086                strncpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
1087                state->exit_latency = cx->latency;
1088                state->target_residency = cx->latency * latency_factor;
1089                state->power_usage = cx->power;
1090
1091                state->flags = 0;
1092                switch (cx->type) {
1093                        case ACPI_STATE_C1:
1094                        state->flags |= CPUIDLE_FLAG_SHALLOW;
1095                        if (cx->entry_method == ACPI_CSTATE_FFH)
1096                                state->flags |= CPUIDLE_FLAG_TIME_VALID;
1097
1098                        state->enter = acpi_idle_enter_c1;
1099                        dev->safe_state = state;
1100                        break;
1101
1102                        case ACPI_STATE_C2:
1103                        state->flags |= CPUIDLE_FLAG_BALANCED;
1104                        state->flags |= CPUIDLE_FLAG_TIME_VALID;
1105                        state->enter = acpi_idle_enter_simple;
1106                        dev->safe_state = state;
1107                        break;
1108
1109                        case ACPI_STATE_C3:
1110                        state->flags |= CPUIDLE_FLAG_DEEP;
1111                        state->flags |= CPUIDLE_FLAG_TIME_VALID;
1112                        state->flags |= CPUIDLE_FLAG_CHECK_BM;
1113                        state->enter = pr->flags.bm_check ?
1114                                        acpi_idle_enter_bm :
1115                                        acpi_idle_enter_simple;
1116                        break;
1117                }
1118
1119                count++;
1120                if (count == CPUIDLE_STATE_MAX)
1121                        break;
1122        }
1123
1124        dev->state_count = count;
1125
1126        if (!count)
1127                return -EINVAL;
1128
1129        return 0;
1130}
1131
1132int acpi_processor_cst_has_changed(struct acpi_processor *pr)
1133{
1134        int ret = 0;
1135
1136        if (boot_option_idle_override)
1137                return 0;
1138
1139        if (!pr)
1140                return -EINVAL;
1141
1142        if (nocst) {
1143                return -ENODEV;
1144        }
1145
1146        if (!pr->flags.power_setup_done)
1147                return -ENODEV;
1148
1149        cpuidle_pause_and_lock();
1150        cpuidle_disable_device(&pr->power.dev);
1151        acpi_processor_get_power_info(pr);
1152        if (pr->flags.power) {
1153                acpi_processor_setup_cpuidle(pr);
1154                ret = cpuidle_enable_device(&pr->power.dev);
1155        }
1156        cpuidle_resume_and_unlock();
1157
1158        return ret;
1159}
1160
1161int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
1162                              struct acpi_device *device)
1163{
1164        acpi_status status = 0;
1165        static int first_run;
1166#ifdef CONFIG_ACPI_PROCFS
1167        struct proc_dir_entry *entry = NULL;
1168#endif
1169
1170        if (boot_option_idle_override)
1171                return 0;
1172
1173        if (!first_run) {
1174                if (idle_halt) {
1175                        /*
1176                         * When the boot option of "idle=halt" is added, halt
1177                         * is used for CPU IDLE.
1178                         * In such case C2/C3 is meaningless. So the max_cstate
1179                         * is set to one.
1180                         */
1181                        max_cstate = 1;
1182                }
1183                dmi_check_system(processor_power_dmi_table);
1184                max_cstate = acpi_processor_cstate_check(max_cstate);
1185                if (max_cstate < ACPI_C_STATES_MAX)
1186                        printk(KERN_NOTICE
1187                               "ACPI: processor limited to max C-state %d\n",
1188                               max_cstate);
1189                first_run++;
1190        }
1191
1192        if (!pr)
1193                return -EINVAL;
1194
1195        if (acpi_gbl_FADT.cst_control && !nocst) {
1196                status =
1197                    acpi_os_write_port(acpi_gbl_FADT.smi_command, acpi_gbl_FADT.cst_control, 8);
1198                if (ACPI_FAILURE(status)) {
1199                        ACPI_EXCEPTION((AE_INFO, status,
1200                                        "Notifying BIOS of _CST ability failed"));
1201                }
1202        }
1203
1204        acpi_processor_get_power_info(pr);
1205        pr->flags.power_setup_done = 1;
1206
1207        /*
1208         * Install the idle handler if processor power management is supported.
1209         * Note that we use previously set idle handler will be used on
1210         * platforms that only support C1.
1211         */
1212        if (pr->flags.power) {
1213                acpi_processor_setup_cpuidle(pr);
1214                if (cpuidle_register_device(&pr->power.dev))
1215                        return -EIO;
1216        }
1217#ifdef CONFIG_ACPI_PROCFS
1218        /* 'power' [R] */
1219        entry = proc_create_data(ACPI_PROCESSOR_FILE_POWER,
1220                                 S_IRUGO, acpi_device_dir(device),
1221                                 &acpi_processor_power_fops,
1222                                 acpi_driver_data(device));
1223        if (!entry)
1224                return -EIO;
1225#endif
1226        return 0;
1227}
1228
1229int acpi_processor_power_exit(struct acpi_processor *pr,
1230                              struct acpi_device *device)
1231{
1232        if (boot_option_idle_override)
1233                return 0;
1234
1235        cpuidle_unregister_device(&pr->power.dev);
1236        pr->flags.power_setup_done = 0;
1237
1238#ifdef CONFIG_ACPI_PROCFS
1239        if (acpi_device_dir(device))
1240                remove_proc_entry(ACPI_PROCESSOR_FILE_POWER,
1241                                  acpi_device_dir(device));
1242#endif
1243
1244        return 0;
1245}
1246