linux/drivers/cpuidle/cpuidle.c
<<
>>
Prefs
   1/*
   2 * cpuidle.c - core cpuidle infrastructure
   3 *
   4 * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
   5 *               Shaohua Li <shaohua.li@intel.com>
   6 *               Adam Belay <abelay@novell.com>
   7 *
   8 * This code is licenced under the GPL.
   9 */
  10
  11#include <linux/kernel.h>
  12#include <linux/mutex.h>
  13#include <linux/sched.h>
  14#include <linux/notifier.h>
  15#include <linux/pm_qos_params.h>
  16#include <linux/cpu.h>
  17#include <linux/cpuidle.h>
  18#include <linux/ktime.h>
  19#include <linux/hrtimer.h>
  20#include <trace/events/power.h>
  21
  22#include "cpuidle.h"
  23
  24DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
  25
  26DEFINE_MUTEX(cpuidle_lock);
  27LIST_HEAD(cpuidle_detected_devices);
  28static void (*pm_idle_old)(void);
  29
  30static int enabled_devices;
  31
  32#if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT)
  33static void cpuidle_kick_cpus(void)
  34{
  35        cpu_idle_wait();
  36}
  37#elif defined(CONFIG_SMP)
  38# error "Arch needs cpu_idle_wait() equivalent here"
  39#else /* !CONFIG_ARCH_HAS_CPU_IDLE_WAIT && !CONFIG_SMP */
  40static void cpuidle_kick_cpus(void) {}
  41#endif
  42
  43static int __cpuidle_register_device(struct cpuidle_device *dev);
  44
  45/**
  46 * cpuidle_idle_call - the main idle loop
  47 *
  48 * NOTE: no locks or semaphores should be used here
  49 */
  50static void cpuidle_idle_call(void)
  51{
  52        struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
  53        struct cpuidle_state *target_state;
  54        int next_state;
  55
  56        /* check if the device is ready */
  57        if (!dev || !dev->enabled) {
  58                if (pm_idle_old)
  59                        pm_idle_old();
  60                else
  61#if defined(CONFIG_ARCH_HAS_DEFAULT_IDLE)
  62                        default_idle();
  63#else
  64                        local_irq_enable();
  65#endif
  66                return;
  67        }
  68
  69#if 0
  70        /* shows regressions, re-enable for 2.6.29 */
  71        /*
  72         * run any timers that can be run now, at this point
  73         * before calculating the idle duration etc.
  74         */
  75        hrtimer_peek_ahead_timers();
  76#endif
  77
  78        /*
  79         * Call the device's prepare function before calling the
  80         * governor's select function.  ->prepare gives the device's
  81         * cpuidle driver a chance to update any dynamic information
  82         * of its cpuidle states for the current idle period, e.g.
  83         * state availability, latencies, residencies, etc.
  84         */
  85        if (dev->prepare)
  86                dev->prepare(dev);
  87
  88        /* ask the governor for the next state */
  89        next_state = cpuidle_curr_governor->select(dev);
  90        if (need_resched()) {
  91                local_irq_enable();
  92                return;
  93        }
  94
  95        target_state = &dev->states[next_state];
  96
  97        /* enter the state and update stats */
  98        dev->last_state = target_state;
  99
 100        trace_power_start(POWER_CSTATE, next_state, dev->cpu);
 101        trace_cpu_idle(next_state, dev->cpu);
 102
 103        dev->last_residency = target_state->enter(dev, target_state);
 104
 105        trace_power_end(dev->cpu);
 106        trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu);
 107
 108        if (dev->last_state)
 109                target_state = dev->last_state;
 110
 111        target_state->time += (unsigned long long)dev->last_residency;
 112        target_state->usage++;
 113
 114        /* give the governor an opportunity to reflect on the outcome */
 115        if (cpuidle_curr_governor->reflect)
 116                cpuidle_curr_governor->reflect(dev);
 117}
 118
 119/**
 120 * cpuidle_install_idle_handler - installs the cpuidle idle loop handler
 121 */
 122void cpuidle_install_idle_handler(void)
 123{
 124        if (enabled_devices && (pm_idle != cpuidle_idle_call)) {
 125                /* Make sure all changes finished before we switch to new idle */
 126                smp_wmb();
 127                pm_idle = cpuidle_idle_call;
 128        }
 129}
 130
 131/**
 132 * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler
 133 */
 134void cpuidle_uninstall_idle_handler(void)
 135{
 136        if (enabled_devices && pm_idle_old && (pm_idle != pm_idle_old)) {
 137                pm_idle = pm_idle_old;
 138                cpuidle_kick_cpus();
 139        }
 140}
 141
 142/**
 143 * cpuidle_pause_and_lock - temporarily disables CPUIDLE
 144 */
 145void cpuidle_pause_and_lock(void)
 146{
 147        mutex_lock(&cpuidle_lock);
 148        cpuidle_uninstall_idle_handler();
 149}
 150
 151EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock);
 152
 153/**
 154 * cpuidle_resume_and_unlock - resumes CPUIDLE operation
 155 */
 156void cpuidle_resume_and_unlock(void)
 157{
 158        cpuidle_install_idle_handler();
 159        mutex_unlock(&cpuidle_lock);
 160}
 161
 162EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock);
 163
 164#ifdef CONFIG_ARCH_HAS_CPU_RELAX
 165static int poll_idle(struct cpuidle_device *dev, struct cpuidle_state *st)
 166{
 167        ktime_t t1, t2;
 168        s64 diff;
 169        int ret;
 170
 171        t1 = ktime_get();
 172        local_irq_enable();
 173        while (!need_resched())
 174                cpu_relax();
 175
 176        t2 = ktime_get();
 177        diff = ktime_to_us(ktime_sub(t2, t1));
 178        if (diff > INT_MAX)
 179                diff = INT_MAX;
 180
 181        ret = (int) diff;
 182        return ret;
 183}
 184
 185static void poll_idle_init(struct cpuidle_device *dev)
 186{
 187        struct cpuidle_state *state = &dev->states[0];
 188
 189        cpuidle_set_statedata(state, NULL);
 190
 191        snprintf(state->name, CPUIDLE_NAME_LEN, "POLL");
 192        snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE");
 193        state->exit_latency = 0;
 194        state->target_residency = 0;
 195        state->power_usage = -1;
 196        state->flags = 0;
 197        state->enter = poll_idle;
 198}
 199#else
 200static void poll_idle_init(struct cpuidle_device *dev) {}
 201#endif /* CONFIG_ARCH_HAS_CPU_RELAX */
 202
 203/**
 204 * cpuidle_enable_device - enables idle PM for a CPU
 205 * @dev: the CPU
 206 *
 207 * This function must be called between cpuidle_pause_and_lock and
 208 * cpuidle_resume_and_unlock when used externally.
 209 */
 210int cpuidle_enable_device(struct cpuidle_device *dev)
 211{
 212        int ret, i;
 213
 214        if (dev->enabled)
 215                return 0;
 216        if (!cpuidle_get_driver() || !cpuidle_curr_governor)
 217                return -EIO;
 218        if (!dev->state_count)
 219                return -EINVAL;
 220
 221        if (dev->registered == 0) {
 222                ret = __cpuidle_register_device(dev);
 223                if (ret)
 224                        return ret;
 225        }
 226
 227        poll_idle_init(dev);
 228
 229        if ((ret = cpuidle_add_state_sysfs(dev)))
 230                return ret;
 231
 232        if (cpuidle_curr_governor->enable &&
 233            (ret = cpuidle_curr_governor->enable(dev)))
 234                goto fail_sysfs;
 235
 236        for (i = 0; i < dev->state_count; i++) {
 237                dev->states[i].usage = 0;
 238                dev->states[i].time = 0;
 239        }
 240        dev->last_residency = 0;
 241        dev->last_state = NULL;
 242
 243        smp_wmb();
 244
 245        dev->enabled = 1;
 246
 247        enabled_devices++;
 248        return 0;
 249
 250fail_sysfs:
 251        cpuidle_remove_state_sysfs(dev);
 252
 253        return ret;
 254}
 255
 256EXPORT_SYMBOL_GPL(cpuidle_enable_device);
 257
 258/**
 259 * cpuidle_disable_device - disables idle PM for a CPU
 260 * @dev: the CPU
 261 *
 262 * This function must be called between cpuidle_pause_and_lock and
 263 * cpuidle_resume_and_unlock when used externally.
 264 */
 265void cpuidle_disable_device(struct cpuidle_device *dev)
 266{
 267        if (!dev->enabled)
 268                return;
 269        if (!cpuidle_get_driver() || !cpuidle_curr_governor)
 270                return;
 271
 272        dev->enabled = 0;
 273
 274        if (cpuidle_curr_governor->disable)
 275                cpuidle_curr_governor->disable(dev);
 276
 277        cpuidle_remove_state_sysfs(dev);
 278        enabled_devices--;
 279}
 280
 281EXPORT_SYMBOL_GPL(cpuidle_disable_device);
 282
 283/**
 284 * __cpuidle_register_device - internal register function called before register
 285 * and enable routines
 286 * @dev: the cpu
 287 *
 288 * cpuidle_lock mutex must be held before this is called
 289 */
 290static int __cpuidle_register_device(struct cpuidle_device *dev)
 291{
 292        int ret;
 293        struct sys_device *sys_dev = get_cpu_sysdev((unsigned long)dev->cpu);
 294        struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
 295
 296        if (!sys_dev)
 297                return -EINVAL;
 298        if (!try_module_get(cpuidle_driver->owner))
 299                return -EINVAL;
 300
 301        init_completion(&dev->kobj_unregister);
 302
 303        /*
 304         * cpuidle driver should set the dev->power_specified bit
 305         * before registering the device if the driver provides
 306         * power_usage numbers.
 307         *
 308         * For those devices whose ->power_specified is not set,
 309         * we fill in power_usage with decreasing values as the
 310         * cpuidle code has an implicit assumption that state Cn
 311         * uses less power than C(n-1).
 312         *
 313         * With CONFIG_ARCH_HAS_CPU_RELAX, C0 is already assigned
 314         * an power value of -1.  So we use -2, -3, etc, for other
 315         * c-states.
 316         */
 317        if (!dev->power_specified) {
 318                int i;
 319                for (i = CPUIDLE_DRIVER_STATE_START; i < dev->state_count; i++)
 320                        dev->states[i].power_usage = -1 - i;
 321        }
 322
 323        per_cpu(cpuidle_devices, dev->cpu) = dev;
 324        list_add(&dev->device_list, &cpuidle_detected_devices);
 325        if ((ret = cpuidle_add_sysfs(sys_dev))) {
 326                module_put(cpuidle_driver->owner);
 327                return ret;
 328        }
 329
 330        dev->registered = 1;
 331        return 0;
 332}
 333
 334/**
 335 * cpuidle_register_device - registers a CPU's idle PM feature
 336 * @dev: the cpu
 337 */
 338int cpuidle_register_device(struct cpuidle_device *dev)
 339{
 340        int ret;
 341
 342        mutex_lock(&cpuidle_lock);
 343
 344        if ((ret = __cpuidle_register_device(dev))) {
 345                mutex_unlock(&cpuidle_lock);
 346                return ret;
 347        }
 348
 349        cpuidle_enable_device(dev);
 350        cpuidle_install_idle_handler();
 351
 352        mutex_unlock(&cpuidle_lock);
 353
 354        return 0;
 355
 356}
 357
 358EXPORT_SYMBOL_GPL(cpuidle_register_device);
 359
 360/**
 361 * cpuidle_unregister_device - unregisters a CPU's idle PM feature
 362 * @dev: the cpu
 363 */
 364void cpuidle_unregister_device(struct cpuidle_device *dev)
 365{
 366        struct sys_device *sys_dev = get_cpu_sysdev((unsigned long)dev->cpu);
 367        struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
 368
 369        if (dev->registered == 0)
 370                return;
 371
 372        cpuidle_pause_and_lock();
 373
 374        cpuidle_disable_device(dev);
 375
 376        cpuidle_remove_sysfs(sys_dev);
 377        list_del(&dev->device_list);
 378        wait_for_completion(&dev->kobj_unregister);
 379        per_cpu(cpuidle_devices, dev->cpu) = NULL;
 380
 381        cpuidle_resume_and_unlock();
 382
 383        module_put(cpuidle_driver->owner);
 384}
 385
 386EXPORT_SYMBOL_GPL(cpuidle_unregister_device);
 387
 388#ifdef CONFIG_SMP
 389
 390static void smp_callback(void *v)
 391{
 392        /* we already woke the CPU up, nothing more to do */
 393}
 394
 395/*
 396 * This function gets called when a part of the kernel has a new latency
 397 * requirement.  This means we need to get all processors out of their C-state,
 398 * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that
 399 * wakes them all right up.
 400 */
 401static int cpuidle_latency_notify(struct notifier_block *b,
 402                unsigned long l, void *v)
 403{
 404        smp_call_function(smp_callback, NULL, 1);
 405        return NOTIFY_OK;
 406}
 407
 408static struct notifier_block cpuidle_latency_notifier = {
 409        .notifier_call = cpuidle_latency_notify,
 410};
 411
 412static inline void latency_notifier_init(struct notifier_block *n)
 413{
 414        pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n);
 415}
 416
 417#else /* CONFIG_SMP */
 418
 419#define latency_notifier_init(x) do { } while (0)
 420
 421#endif /* CONFIG_SMP */
 422
 423/**
 424 * cpuidle_init - core initializer
 425 */
 426static int __init cpuidle_init(void)
 427{
 428        int ret;
 429
 430        pm_idle_old = pm_idle;
 431
 432        ret = cpuidle_add_class_sysfs(&cpu_sysdev_class);
 433        if (ret)
 434                return ret;
 435
 436        latency_notifier_init(&cpuidle_latency_notifier);
 437
 438        return 0;
 439}
 440
 441core_initcall(cpuidle_init);
 442