linux/drivers/cpufreq/cpufreq.c
<<
>>
Prefs
   1/*
   2 *  linux/drivers/cpufreq/cpufreq.c
   3 *
   4 *  Copyright (C) 2001 Russell King
   5 *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
   6 *
   7 *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
   8 *      Added handling for CPU hotplug
   9 *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
  10 *      Fix handling for CPU hotplug -- affected CPUs
  11 *
  12 * This program is free software; you can redistribute it and/or modify
  13 * it under the terms of the GNU General Public License version 2 as
  14 * published by the Free Software Foundation.
  15 *
  16 */
  17
  18#include <linux/kernel.h>
  19#include <linux/module.h>
  20#include <linux/init.h>
  21#include <linux/notifier.h>
  22#include <linux/cpufreq.h>
  23#include <linux/delay.h>
  24#include <linux/interrupt.h>
  25#include <linux/spinlock.h>
  26#include <linux/device.h>
  27#include <linux/slab.h>
  28#include <linux/cpu.h>
  29#include <linux/completion.h>
  30#include <linux/mutex.h>
  31#include <linux/syscore_ops.h>
  32
  33#include <trace/events/power.h>
  34
  35/**
  36 * The "cpufreq driver" - the arch- or hardware-dependent low
  37 * level driver of CPUFreq support, and its spinlock. This lock
  38 * also protects the cpufreq_cpu_data array.
  39 */
  40static struct cpufreq_driver *cpufreq_driver;
  41static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
  42#ifdef CONFIG_HOTPLUG_CPU
  43/* This one keeps track of the previously set governor of a removed CPU */
  44static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
  45#endif
  46static DEFINE_SPINLOCK(cpufreq_driver_lock);
  47
  48/*
  49 * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
  50 * all cpufreq/hotplug/workqueue/etc related lock issues.
  51 *
  52 * The rules for this semaphore:
  53 * - Any routine that wants to read from the policy structure will
  54 *   do a down_read on this semaphore.
  55 * - Any routine that will write to the policy structure and/or may take away
  56 *   the policy altogether (eg. CPU hotplug), will hold this lock in write
  57 *   mode before doing so.
  58 *
  59 * Additional rules:
  60 * - All holders of the lock should check to make sure that the CPU they
  61 *   are concerned with are online after they get the lock.
  62 * - Governor routines that can be called in cpufreq hotplug path should not
  63 *   take this sem as top level hotplug notifier handler takes this.
  64 * - Lock should not be held across
  65 *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
  66 */
  67static DEFINE_PER_CPU(int, cpufreq_policy_cpu);
  68static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
  69
  70#define lock_policy_rwsem(mode, cpu)                                    \
  71static int lock_policy_rwsem_##mode                                     \
  72(int cpu)                                                               \
  73{                                                                       \
  74        int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);              \
  75        BUG_ON(policy_cpu == -1);                                       \
  76        down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));            \
  77        if (unlikely(!cpu_online(cpu))) {                               \
  78                up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));      \
  79                return -1;                                              \
  80        }                                                               \
  81                                                                        \
  82        return 0;                                                       \
  83}
  84
  85lock_policy_rwsem(read, cpu);
  86
  87lock_policy_rwsem(write, cpu);
  88
  89static void unlock_policy_rwsem_read(int cpu)
  90{
  91        int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
  92        BUG_ON(policy_cpu == -1);
  93        up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
  94}
  95
  96static void unlock_policy_rwsem_write(int cpu)
  97{
  98        int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
  99        BUG_ON(policy_cpu == -1);
 100        up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
 101}
 102
 103
 104/* internal prototypes */
 105static int __cpufreq_governor(struct cpufreq_policy *policy,
 106                unsigned int event);
 107static unsigned int __cpufreq_get(unsigned int cpu);
 108static void handle_update(struct work_struct *work);
 109
 110/**
 111 * Two notifier lists: the "policy" list is involved in the
 112 * validation process for a new CPU frequency policy; the
 113 * "transition" list for kernel code that needs to handle
 114 * changes to devices when the CPU clock speed changes.
 115 * The mutex locks both lists.
 116 */
 117static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
 118static struct srcu_notifier_head cpufreq_transition_notifier_list;
 119
 120static bool init_cpufreq_transition_notifier_list_called;
 121static int __init init_cpufreq_transition_notifier_list(void)
 122{
 123        srcu_init_notifier_head(&cpufreq_transition_notifier_list);
 124        init_cpufreq_transition_notifier_list_called = true;
 125        return 0;
 126}
 127pure_initcall(init_cpufreq_transition_notifier_list);
 128
 129static LIST_HEAD(cpufreq_governor_list);
 130static DEFINE_MUTEX(cpufreq_governor_mutex);
 131
 132struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
 133{
 134        struct cpufreq_policy *data;
 135        unsigned long flags;
 136
 137        if (cpu >= nr_cpu_ids)
 138                goto err_out;
 139
 140        /* get the cpufreq driver */
 141        spin_lock_irqsave(&cpufreq_driver_lock, flags);
 142
 143        if (!cpufreq_driver)
 144                goto err_out_unlock;
 145
 146        if (!try_module_get(cpufreq_driver->owner))
 147                goto err_out_unlock;
 148
 149
 150        /* get the CPU */
 151        data = per_cpu(cpufreq_cpu_data, cpu);
 152
 153        if (!data)
 154                goto err_out_put_module;
 155
 156        if (!kobject_get(&data->kobj))
 157                goto err_out_put_module;
 158
 159        spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 160        return data;
 161
 162err_out_put_module:
 163        module_put(cpufreq_driver->owner);
 164err_out_unlock:
 165        spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 166err_out:
 167        return NULL;
 168}
 169EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
 170
 171
 172void cpufreq_cpu_put(struct cpufreq_policy *data)
 173{
 174        kobject_put(&data->kobj);
 175        module_put(cpufreq_driver->owner);
 176}
 177EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
 178
 179
 180/*********************************************************************
 181 *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
 182 *********************************************************************/
 183
 184/**
 185 * adjust_jiffies - adjust the system "loops_per_jiffy"
 186 *
 187 * This function alters the system "loops_per_jiffy" for the clock
 188 * speed change. Note that loops_per_jiffy cannot be updated on SMP
 189 * systems as each CPU might be scaled differently. So, use the arch
 190 * per-CPU loops_per_jiffy value wherever possible.
 191 */
 192#ifndef CONFIG_SMP
 193static unsigned long l_p_j_ref;
 194static unsigned int  l_p_j_ref_freq;
 195
 196static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
 197{
 198        if (ci->flags & CPUFREQ_CONST_LOOPS)
 199                return;
 200
 201        if (!l_p_j_ref_freq) {
 202                l_p_j_ref = loops_per_jiffy;
 203                l_p_j_ref_freq = ci->old;
 204                pr_debug("saving %lu as reference value for loops_per_jiffy; "
 205                        "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
 206        }
 207        if ((val == CPUFREQ_PRECHANGE  && ci->old < ci->new) ||
 208            (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) ||
 209            (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
 210                loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
 211                                                                ci->new);
 212                pr_debug("scaling loops_per_jiffy to %lu "
 213                        "for frequency %u kHz\n", loops_per_jiffy, ci->new);
 214        }
 215}
 216#else
 217static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
 218{
 219        return;
 220}
 221#endif
 222
 223
 224/**
 225 * cpufreq_notify_transition - call notifier chain and adjust_jiffies
 226 * on frequency transition.
 227 *
 228 * This function calls the transition notifiers and the "adjust_jiffies"
 229 * function. It is called twice on all CPU frequency changes that have
 230 * external effects.
 231 */
 232void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
 233{
 234        struct cpufreq_policy *policy;
 235
 236        BUG_ON(irqs_disabled());
 237
 238        freqs->flags = cpufreq_driver->flags;
 239        pr_debug("notification %u of frequency transition to %u kHz\n",
 240                state, freqs->new);
 241
 242        policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
 243        switch (state) {
 244
 245        case CPUFREQ_PRECHANGE:
 246                /* detect if the driver reported a value as "old frequency"
 247                 * which is not equal to what the cpufreq core thinks is
 248                 * "old frequency".
 249                 */
 250                if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
 251                        if ((policy) && (policy->cpu == freqs->cpu) &&
 252                            (policy->cur) && (policy->cur != freqs->old)) {
 253                                pr_debug("Warning: CPU frequency is"
 254                                        " %u, cpufreq assumed %u kHz.\n",
 255                                        freqs->old, policy->cur);
 256                                freqs->old = policy->cur;
 257                        }
 258                }
 259                srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
 260                                CPUFREQ_PRECHANGE, freqs);
 261                adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
 262                break;
 263
 264        case CPUFREQ_POSTCHANGE:
 265                adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
 266                pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
 267                        (unsigned long)freqs->cpu);
 268                trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
 269                trace_cpu_frequency(freqs->new, freqs->cpu);
 270                srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
 271                                CPUFREQ_POSTCHANGE, freqs);
 272                if (likely(policy) && likely(policy->cpu == freqs->cpu))
 273                        policy->cur = freqs->new;
 274                break;
 275        }
 276}
 277EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
 278
 279
 280
 281/*********************************************************************
 282 *                          SYSFS INTERFACE                          *
 283 *********************************************************************/
 284
 285static struct cpufreq_governor *__find_governor(const char *str_governor)
 286{
 287        struct cpufreq_governor *t;
 288
 289        list_for_each_entry(t, &cpufreq_governor_list, governor_list)
 290                if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN))
 291                        return t;
 292
 293        return NULL;
 294}
 295
 296/**
 297 * cpufreq_parse_governor - parse a governor string
 298 */
 299static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
 300                                struct cpufreq_governor **governor)
 301{
 302        int err = -EINVAL;
 303
 304        if (!cpufreq_driver)
 305                goto out;
 306
 307        if (cpufreq_driver->setpolicy) {
 308                if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
 309                        *policy = CPUFREQ_POLICY_PERFORMANCE;
 310                        err = 0;
 311                } else if (!strnicmp(str_governor, "powersave",
 312                                                CPUFREQ_NAME_LEN)) {
 313                        *policy = CPUFREQ_POLICY_POWERSAVE;
 314                        err = 0;
 315                }
 316        } else if (cpufreq_driver->target) {
 317                struct cpufreq_governor *t;
 318
 319                mutex_lock(&cpufreq_governor_mutex);
 320
 321                t = __find_governor(str_governor);
 322
 323                if (t == NULL) {
 324                        int ret;
 325
 326                        mutex_unlock(&cpufreq_governor_mutex);
 327                        ret = request_module("cpufreq_%s", str_governor);
 328                        mutex_lock(&cpufreq_governor_mutex);
 329
 330                        if (ret == 0)
 331                                t = __find_governor(str_governor);
 332                }
 333
 334                if (t != NULL) {
 335                        *governor = t;
 336                        err = 0;
 337                }
 338
 339                mutex_unlock(&cpufreq_governor_mutex);
 340        }
 341out:
 342        return err;
 343}
 344
 345
 346/**
 347 * cpufreq_per_cpu_attr_read() / show_##file_name() -
 348 * print out cpufreq information
 349 *
 350 * Write out information from cpufreq_driver->policy[cpu]; object must be
 351 * "unsigned int".
 352 */
 353
 354#define show_one(file_name, object)                     \
 355static ssize_t show_##file_name                         \
 356(struct cpufreq_policy *policy, char *buf)              \
 357{                                                       \
 358        return sprintf(buf, "%u\n", policy->object);    \
 359}
 360
 361show_one(cpuinfo_min_freq, cpuinfo.min_freq);
 362show_one(cpuinfo_max_freq, cpuinfo.max_freq);
 363show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
 364show_one(scaling_min_freq, min);
 365show_one(scaling_max_freq, max);
 366show_one(scaling_cur_freq, cur);
 367
 368static int __cpufreq_set_policy(struct cpufreq_policy *data,
 369                                struct cpufreq_policy *policy);
 370
 371/**
 372 * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
 373 */
 374#define store_one(file_name, object)                    \
 375static ssize_t store_##file_name                                        \
 376(struct cpufreq_policy *policy, const char *buf, size_t count)          \
 377{                                                                       \
 378        unsigned int ret = -EINVAL;                                     \
 379        struct cpufreq_policy new_policy;                               \
 380                                                                        \
 381        ret = cpufreq_get_policy(&new_policy, policy->cpu);             \
 382        if (ret)                                                        \
 383                return -EINVAL;                                         \
 384                                                                        \
 385        ret = sscanf(buf, "%u", &new_policy.object);                    \
 386        if (ret != 1)                                                   \
 387                return -EINVAL;                                         \
 388                                                                        \
 389        ret = __cpufreq_set_policy(policy, &new_policy);                \
 390        policy->user_policy.object = policy->object;                    \
 391                                                                        \
 392        return ret ? ret : count;                                       \
 393}
 394
 395store_one(scaling_min_freq, min);
 396store_one(scaling_max_freq, max);
 397
 398/**
 399 * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
 400 */
 401static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
 402                                        char *buf)
 403{
 404        unsigned int cur_freq = __cpufreq_get(policy->cpu);
 405        if (!cur_freq)
 406                return sprintf(buf, "<unknown>");
 407        return sprintf(buf, "%u\n", cur_freq);
 408}
 409
 410
 411/**
 412 * show_scaling_governor - show the current policy for the specified CPU
 413 */
 414static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
 415{
 416        if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
 417                return sprintf(buf, "powersave\n");
 418        else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
 419                return sprintf(buf, "performance\n");
 420        else if (policy->governor)
 421                return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n",
 422                                policy->governor->name);
 423        return -EINVAL;
 424}
 425
 426
 427/**
 428 * store_scaling_governor - store policy for the specified CPU
 429 */
 430static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
 431                                        const char *buf, size_t count)
 432{
 433        unsigned int ret = -EINVAL;
 434        char    str_governor[16];
 435        struct cpufreq_policy new_policy;
 436
 437        ret = cpufreq_get_policy(&new_policy, policy->cpu);
 438        if (ret)
 439                return ret;
 440
 441        ret = sscanf(buf, "%15s", str_governor);
 442        if (ret != 1)
 443                return -EINVAL;
 444
 445        if (cpufreq_parse_governor(str_governor, &new_policy.policy,
 446                                                &new_policy.governor))
 447                return -EINVAL;
 448
 449        /* Do not use cpufreq_set_policy here or the user_policy.max
 450           will be wrongly overridden */
 451        ret = __cpufreq_set_policy(policy, &new_policy);
 452
 453        policy->user_policy.policy = policy->policy;
 454        policy->user_policy.governor = policy->governor;
 455
 456        if (ret)
 457                return ret;
 458        else
 459                return count;
 460}
 461
 462/**
 463 * show_scaling_driver - show the cpufreq driver currently loaded
 464 */
 465static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
 466{
 467        return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
 468}
 469
 470/**
 471 * show_scaling_available_governors - show the available CPUfreq governors
 472 */
 473static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
 474                                                char *buf)
 475{
 476        ssize_t i = 0;
 477        struct cpufreq_governor *t;
 478
 479        if (!cpufreq_driver->target) {
 480                i += sprintf(buf, "performance powersave");
 481                goto out;
 482        }
 483
 484        list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
 485                if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
 486                    - (CPUFREQ_NAME_LEN + 2)))
 487                        goto out;
 488                i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
 489        }
 490out:
 491        i += sprintf(&buf[i], "\n");
 492        return i;
 493}
 494
 495static ssize_t show_cpus(const struct cpumask *mask, char *buf)
 496{
 497        ssize_t i = 0;
 498        unsigned int cpu;
 499
 500        for_each_cpu(cpu, mask) {
 501                if (i)
 502                        i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
 503                i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
 504                if (i >= (PAGE_SIZE - 5))
 505                        break;
 506        }
 507        i += sprintf(&buf[i], "\n");
 508        return i;
 509}
 510
 511/**
 512 * show_related_cpus - show the CPUs affected by each transition even if
 513 * hw coordination is in use
 514 */
 515static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
 516{
 517        if (cpumask_empty(policy->related_cpus))
 518                return show_cpus(policy->cpus, buf);
 519        return show_cpus(policy->related_cpus, buf);
 520}
 521
 522/**
 523 * show_affected_cpus - show the CPUs affected by each transition
 524 */
 525static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
 526{
 527        return show_cpus(policy->cpus, buf);
 528}
 529
 530static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
 531                                        const char *buf, size_t count)
 532{
 533        unsigned int freq = 0;
 534        unsigned int ret;
 535
 536        if (!policy->governor || !policy->governor->store_setspeed)
 537                return -EINVAL;
 538
 539        ret = sscanf(buf, "%u", &freq);
 540        if (ret != 1)
 541                return -EINVAL;
 542
 543        policy->governor->store_setspeed(policy, freq);
 544
 545        return count;
 546}
 547
 548static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
 549{
 550        if (!policy->governor || !policy->governor->show_setspeed)
 551                return sprintf(buf, "<unsupported>\n");
 552
 553        return policy->governor->show_setspeed(policy, buf);
 554}
 555
 556/**
 557 * show_scaling_driver - show the current cpufreq HW/BIOS limitation
 558 */
 559static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
 560{
 561        unsigned int limit;
 562        int ret;
 563        if (cpufreq_driver->bios_limit) {
 564                ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
 565                if (!ret)
 566                        return sprintf(buf, "%u\n", limit);
 567        }
 568        return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
 569}
 570
 571cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400);
 572cpufreq_freq_attr_ro(cpuinfo_min_freq);
 573cpufreq_freq_attr_ro(cpuinfo_max_freq);
 574cpufreq_freq_attr_ro(cpuinfo_transition_latency);
 575cpufreq_freq_attr_ro(scaling_available_governors);
 576cpufreq_freq_attr_ro(scaling_driver);
 577cpufreq_freq_attr_ro(scaling_cur_freq);
 578cpufreq_freq_attr_ro(bios_limit);
 579cpufreq_freq_attr_ro(related_cpus);
 580cpufreq_freq_attr_ro(affected_cpus);
 581cpufreq_freq_attr_rw(scaling_min_freq);
 582cpufreq_freq_attr_rw(scaling_max_freq);
 583cpufreq_freq_attr_rw(scaling_governor);
 584cpufreq_freq_attr_rw(scaling_setspeed);
 585
 586static struct attribute *default_attrs[] = {
 587        &cpuinfo_min_freq.attr,
 588        &cpuinfo_max_freq.attr,
 589        &cpuinfo_transition_latency.attr,
 590        &scaling_min_freq.attr,
 591        &scaling_max_freq.attr,
 592        &affected_cpus.attr,
 593        &related_cpus.attr,
 594        &scaling_governor.attr,
 595        &scaling_driver.attr,
 596        &scaling_available_governors.attr,
 597        &scaling_setspeed.attr,
 598        NULL
 599};
 600
 601struct kobject *cpufreq_global_kobject;
 602EXPORT_SYMBOL(cpufreq_global_kobject);
 603
 604#define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
 605#define to_attr(a) container_of(a, struct freq_attr, attr)
 606
 607static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 608{
 609        struct cpufreq_policy *policy = to_policy(kobj);
 610        struct freq_attr *fattr = to_attr(attr);
 611        ssize_t ret = -EINVAL;
 612        policy = cpufreq_cpu_get(policy->cpu);
 613        if (!policy)
 614                goto no_policy;
 615
 616        if (lock_policy_rwsem_read(policy->cpu) < 0)
 617                goto fail;
 618
 619        if (fattr->show)
 620                ret = fattr->show(policy, buf);
 621        else
 622                ret = -EIO;
 623
 624        unlock_policy_rwsem_read(policy->cpu);
 625fail:
 626        cpufreq_cpu_put(policy);
 627no_policy:
 628        return ret;
 629}
 630
 631static ssize_t store(struct kobject *kobj, struct attribute *attr,
 632                     const char *buf, size_t count)
 633{
 634        struct cpufreq_policy *policy = to_policy(kobj);
 635        struct freq_attr *fattr = to_attr(attr);
 636        ssize_t ret = -EINVAL;
 637        policy = cpufreq_cpu_get(policy->cpu);
 638        if (!policy)
 639                goto no_policy;
 640
 641        if (lock_policy_rwsem_write(policy->cpu) < 0)
 642                goto fail;
 643
 644        if (fattr->store)
 645                ret = fattr->store(policy, buf, count);
 646        else
 647                ret = -EIO;
 648
 649        unlock_policy_rwsem_write(policy->cpu);
 650fail:
 651        cpufreq_cpu_put(policy);
 652no_policy:
 653        return ret;
 654}
 655
 656static void cpufreq_sysfs_release(struct kobject *kobj)
 657{
 658        struct cpufreq_policy *policy = to_policy(kobj);
 659        pr_debug("last reference is dropped\n");
 660        complete(&policy->kobj_unregister);
 661}
 662
 663static const struct sysfs_ops sysfs_ops = {
 664        .show   = show,
 665        .store  = store,
 666};
 667
 668static struct kobj_type ktype_cpufreq = {
 669        .sysfs_ops      = &sysfs_ops,
 670        .default_attrs  = default_attrs,
 671        .release        = cpufreq_sysfs_release,
 672};
 673
 674/*
 675 * Returns:
 676 *   Negative: Failure
 677 *   0:        Success
 678 *   Positive: When we have a managed CPU and the sysfs got symlinked
 679 */
 680static int cpufreq_add_dev_policy(unsigned int cpu,
 681                                  struct cpufreq_policy *policy,
 682                                  struct sys_device *sys_dev)
 683{
 684        int ret = 0;
 685#ifdef CONFIG_SMP
 686        unsigned long flags;
 687        unsigned int j;
 688#ifdef CONFIG_HOTPLUG_CPU
 689        struct cpufreq_governor *gov;
 690
 691        gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
 692        if (gov) {
 693                policy->governor = gov;
 694                pr_debug("Restoring governor %s for cpu %d\n",
 695                       policy->governor->name, cpu);
 696        }
 697#endif
 698
 699        for_each_cpu(j, policy->cpus) {
 700                struct cpufreq_policy *managed_policy;
 701
 702                if (cpu == j)
 703                        continue;
 704
 705                /* Check for existing affected CPUs.
 706                 * They may not be aware of it due to CPU Hotplug.
 707                 * cpufreq_cpu_put is called when the device is removed
 708                 * in __cpufreq_remove_dev()
 709                 */
 710                managed_policy = cpufreq_cpu_get(j);
 711                if (unlikely(managed_policy)) {
 712
 713                        /* Set proper policy_cpu */
 714                        unlock_policy_rwsem_write(cpu);
 715                        per_cpu(cpufreq_policy_cpu, cpu) = managed_policy->cpu;
 716
 717                        if (lock_policy_rwsem_write(cpu) < 0) {
 718                                /* Should not go through policy unlock path */
 719                                if (cpufreq_driver->exit)
 720                                        cpufreq_driver->exit(policy);
 721                                cpufreq_cpu_put(managed_policy);
 722                                return -EBUSY;
 723                        }
 724
 725                        spin_lock_irqsave(&cpufreq_driver_lock, flags);
 726                        cpumask_copy(managed_policy->cpus, policy->cpus);
 727                        per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
 728                        spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 729
 730                        pr_debug("CPU already managed, adding link\n");
 731                        ret = sysfs_create_link(&sys_dev->kobj,
 732                                                &managed_policy->kobj,
 733                                                "cpufreq");
 734                        if (ret)
 735                                cpufreq_cpu_put(managed_policy);
 736                        /*
 737                         * Success. We only needed to be added to the mask.
 738                         * Call driver->exit() because only the cpu parent of
 739                         * the kobj needed to call init().
 740                         */
 741                        if (cpufreq_driver->exit)
 742                                cpufreq_driver->exit(policy);
 743
 744                        if (!ret)
 745                                return 1;
 746                        else
 747                                return ret;
 748                }
 749        }
 750#endif
 751        return ret;
 752}
 753
 754
 755/* symlink affected CPUs */
 756static int cpufreq_add_dev_symlink(unsigned int cpu,
 757                                   struct cpufreq_policy *policy)
 758{
 759        unsigned int j;
 760        int ret = 0;
 761
 762        for_each_cpu(j, policy->cpus) {
 763                struct cpufreq_policy *managed_policy;
 764                struct sys_device *cpu_sys_dev;
 765
 766                if (j == cpu)
 767                        continue;
 768                if (!cpu_online(j))
 769                        continue;
 770
 771                pr_debug("CPU %u already managed, adding link\n", j);
 772                managed_policy = cpufreq_cpu_get(cpu);
 773                cpu_sys_dev = get_cpu_sysdev(j);
 774                ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
 775                                        "cpufreq");
 776                if (ret) {
 777                        cpufreq_cpu_put(managed_policy);
 778                        return ret;
 779                }
 780        }
 781        return ret;
 782}
 783
 784static int cpufreq_add_dev_interface(unsigned int cpu,
 785                                     struct cpufreq_policy *policy,
 786                                     struct sys_device *sys_dev)
 787{
 788        struct cpufreq_policy new_policy;
 789        struct freq_attr **drv_attr;
 790        unsigned long flags;
 791        int ret = 0;
 792        unsigned int j;
 793
 794        /* prepare interface data */
 795        ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
 796                                   &sys_dev->kobj, "cpufreq");
 797        if (ret)
 798                return ret;
 799
 800        /* set up files for this cpu device */
 801        drv_attr = cpufreq_driver->attr;
 802        while ((drv_attr) && (*drv_attr)) {
 803                ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
 804                if (ret)
 805                        goto err_out_kobj_put;
 806                drv_attr++;
 807        }
 808        if (cpufreq_driver->get) {
 809                ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
 810                if (ret)
 811                        goto err_out_kobj_put;
 812        }
 813        if (cpufreq_driver->target) {
 814                ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
 815                if (ret)
 816                        goto err_out_kobj_put;
 817        }
 818        if (cpufreq_driver->bios_limit) {
 819                ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
 820                if (ret)
 821                        goto err_out_kobj_put;
 822        }
 823
 824        spin_lock_irqsave(&cpufreq_driver_lock, flags);
 825        for_each_cpu(j, policy->cpus) {
 826                if (!cpu_online(j))
 827                        continue;
 828                per_cpu(cpufreq_cpu_data, j) = policy;
 829                per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
 830        }
 831        spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 832
 833        ret = cpufreq_add_dev_symlink(cpu, policy);
 834        if (ret)
 835                goto err_out_kobj_put;
 836
 837        memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
 838        /* assure that the starting sequence is run in __cpufreq_set_policy */
 839        policy->governor = NULL;
 840
 841        /* set default policy */
 842        ret = __cpufreq_set_policy(policy, &new_policy);
 843        policy->user_policy.policy = policy->policy;
 844        policy->user_policy.governor = policy->governor;
 845
 846        if (ret) {
 847                pr_debug("setting policy failed\n");
 848                if (cpufreq_driver->exit)
 849                        cpufreq_driver->exit(policy);
 850        }
 851        return ret;
 852
 853err_out_kobj_put:
 854        kobject_put(&policy->kobj);
 855        wait_for_completion(&policy->kobj_unregister);
 856        return ret;
 857}
 858
 859
 860/**
 861 * cpufreq_add_dev - add a CPU device
 862 *
 863 * Adds the cpufreq interface for a CPU device.
 864 *
 865 * The Oracle says: try running cpufreq registration/unregistration concurrently
 866 * with with cpu hotplugging and all hell will break loose. Tried to clean this
 867 * mess up, but more thorough testing is needed. - Mathieu
 868 */
 869static int cpufreq_add_dev(struct sys_device *sys_dev)
 870{
 871        unsigned int cpu = sys_dev->id;
 872        int ret = 0, found = 0;
 873        struct cpufreq_policy *policy;
 874        unsigned long flags;
 875        unsigned int j;
 876#ifdef CONFIG_HOTPLUG_CPU
 877        int sibling;
 878#endif
 879
 880        if (cpu_is_offline(cpu))
 881                return 0;
 882
 883        pr_debug("adding CPU %u\n", cpu);
 884
 885#ifdef CONFIG_SMP
 886        /* check whether a different CPU already registered this
 887         * CPU because it is in the same boat. */
 888        policy = cpufreq_cpu_get(cpu);
 889        if (unlikely(policy)) {
 890                cpufreq_cpu_put(policy);
 891                return 0;
 892        }
 893#endif
 894
 895        if (!try_module_get(cpufreq_driver->owner)) {
 896                ret = -EINVAL;
 897                goto module_out;
 898        }
 899
 900        ret = -ENOMEM;
 901        policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
 902        if (!policy)
 903                goto nomem_out;
 904
 905        if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
 906                goto err_free_policy;
 907
 908        if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
 909                goto err_free_cpumask;
 910
 911        policy->cpu = cpu;
 912        cpumask_copy(policy->cpus, cpumask_of(cpu));
 913
 914        /* Initially set CPU itself as the policy_cpu */
 915        per_cpu(cpufreq_policy_cpu, cpu) = cpu;
 916        ret = (lock_policy_rwsem_write(cpu) < 0);
 917        WARN_ON(ret);
 918
 919        init_completion(&policy->kobj_unregister);
 920        INIT_WORK(&policy->update, handle_update);
 921
 922        /* Set governor before ->init, so that driver could check it */
 923#ifdef CONFIG_HOTPLUG_CPU
 924        for_each_online_cpu(sibling) {
 925                struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling);
 926                if (cp && cp->governor &&
 927                    (cpumask_test_cpu(cpu, cp->related_cpus))) {
 928                        policy->governor = cp->governor;
 929                        found = 1;
 930                        break;
 931                }
 932        }
 933#endif
 934        if (!found)
 935                policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
 936        /* call driver. From then on the cpufreq must be able
 937         * to accept all calls to ->verify and ->setpolicy for this CPU
 938         */
 939        ret = cpufreq_driver->init(policy);
 940        if (ret) {
 941                pr_debug("initialization failed\n");
 942                goto err_unlock_policy;
 943        }
 944        policy->user_policy.min = policy->min;
 945        policy->user_policy.max = policy->max;
 946
 947        blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
 948                                     CPUFREQ_START, policy);
 949
 950        ret = cpufreq_add_dev_policy(cpu, policy, sys_dev);
 951        if (ret) {
 952                if (ret > 0)
 953                        /* This is a managed cpu, symlink created,
 954                           exit with 0 */
 955                        ret = 0;
 956                goto err_unlock_policy;
 957        }
 958
 959        ret = cpufreq_add_dev_interface(cpu, policy, sys_dev);
 960        if (ret)
 961                goto err_out_unregister;
 962
 963        unlock_policy_rwsem_write(cpu);
 964
 965        kobject_uevent(&policy->kobj, KOBJ_ADD);
 966        module_put(cpufreq_driver->owner);
 967        pr_debug("initialization complete\n");
 968
 969        return 0;
 970
 971
 972err_out_unregister:
 973        spin_lock_irqsave(&cpufreq_driver_lock, flags);
 974        for_each_cpu(j, policy->cpus)
 975                per_cpu(cpufreq_cpu_data, j) = NULL;
 976        spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 977
 978        kobject_put(&policy->kobj);
 979        wait_for_completion(&policy->kobj_unregister);
 980
 981err_unlock_policy:
 982        unlock_policy_rwsem_write(cpu);
 983        free_cpumask_var(policy->related_cpus);
 984err_free_cpumask:
 985        free_cpumask_var(policy->cpus);
 986err_free_policy:
 987        kfree(policy);
 988nomem_out:
 989        module_put(cpufreq_driver->owner);
 990module_out:
 991        return ret;
 992}
 993
 994
 995/**
 996 * __cpufreq_remove_dev - remove a CPU device
 997 *
 998 * Removes the cpufreq interface for a CPU device.
 999 * Caller should already have policy_rwsem in write mode for this CPU.
1000 * This routine frees the rwsem before returning.
1001 */
1002static int __cpufreq_remove_dev(struct sys_device *sys_dev)
1003{
1004        unsigned int cpu = sys_dev->id;
1005        unsigned long flags;
1006        struct cpufreq_policy *data;
1007        struct kobject *kobj;
1008        struct completion *cmp;
1009#ifdef CONFIG_SMP
1010        struct sys_device *cpu_sys_dev;
1011        unsigned int j;
1012#endif
1013
1014        pr_debug("unregistering CPU %u\n", cpu);
1015
1016        spin_lock_irqsave(&cpufreq_driver_lock, flags);
1017        data = per_cpu(cpufreq_cpu_data, cpu);
1018
1019        if (!data) {
1020                spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1021                unlock_policy_rwsem_write(cpu);
1022                return -EINVAL;
1023        }
1024        per_cpu(cpufreq_cpu_data, cpu) = NULL;
1025
1026
1027#ifdef CONFIG_SMP
1028        /* if this isn't the CPU which is the parent of the kobj, we
1029         * only need to unlink, put and exit
1030         */
1031        if (unlikely(cpu != data->cpu)) {
1032                pr_debug("removing link\n");
1033                cpumask_clear_cpu(cpu, data->cpus);
1034                spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1035                kobj = &sys_dev->kobj;
1036                cpufreq_cpu_put(data);
1037                unlock_policy_rwsem_write(cpu);
1038                sysfs_remove_link(kobj, "cpufreq");
1039                return 0;
1040        }
1041#endif
1042
1043#ifdef CONFIG_SMP
1044
1045#ifdef CONFIG_HOTPLUG_CPU
1046        strncpy(per_cpu(cpufreq_cpu_governor, cpu), data->governor->name,
1047                        CPUFREQ_NAME_LEN);
1048#endif
1049
1050        /* if we have other CPUs still registered, we need to unlink them,
1051         * or else wait_for_completion below will lock up. Clean the
1052         * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
1053         * the sysfs links afterwards.
1054         */
1055        if (unlikely(cpumask_weight(data->cpus) > 1)) {
1056                for_each_cpu(j, data->cpus) {
1057                        if (j == cpu)
1058                                continue;
1059                        per_cpu(cpufreq_cpu_data, j) = NULL;
1060                }
1061        }
1062
1063        spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1064
1065        if (unlikely(cpumask_weight(data->cpus) > 1)) {
1066                for_each_cpu(j, data->cpus) {
1067                        if (j == cpu)
1068                                continue;
1069                        pr_debug("removing link for cpu %u\n", j);
1070#ifdef CONFIG_HOTPLUG_CPU
1071                        strncpy(per_cpu(cpufreq_cpu_governor, j),
1072                                data->governor->name, CPUFREQ_NAME_LEN);
1073#endif
1074                        cpu_sys_dev = get_cpu_sysdev(j);
1075                        kobj = &cpu_sys_dev->kobj;
1076                        unlock_policy_rwsem_write(cpu);
1077                        sysfs_remove_link(kobj, "cpufreq");
1078                        lock_policy_rwsem_write(cpu);
1079                        cpufreq_cpu_put(data);
1080                }
1081        }
1082#else
1083        spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1084#endif
1085
1086        if (cpufreq_driver->target)
1087                __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1088
1089        kobj = &data->kobj;
1090        cmp = &data->kobj_unregister;
1091        unlock_policy_rwsem_write(cpu);
1092        kobject_put(kobj);
1093
1094        /* we need to make sure that the underlying kobj is actually
1095         * not referenced anymore by anybody before we proceed with
1096         * unloading.
1097         */
1098        pr_debug("waiting for dropping of refcount\n");
1099        wait_for_completion(cmp);
1100        pr_debug("wait complete\n");
1101
1102        lock_policy_rwsem_write(cpu);
1103        if (cpufreq_driver->exit)
1104                cpufreq_driver->exit(data);
1105        unlock_policy_rwsem_write(cpu);
1106
1107#ifdef CONFIG_HOTPLUG_CPU
1108        /* when the CPU which is the parent of the kobj is hotplugged
1109         * offline, check for siblings, and create cpufreq sysfs interface
1110         * and symlinks
1111         */
1112        if (unlikely(cpumask_weight(data->cpus) > 1)) {
1113                /* first sibling now owns the new sysfs dir */
1114                cpumask_clear_cpu(cpu, data->cpus);
1115                cpufreq_add_dev(get_cpu_sysdev(cpumask_first(data->cpus)));
1116
1117                /* finally remove our own symlink */
1118                lock_policy_rwsem_write(cpu);
1119                __cpufreq_remove_dev(sys_dev);
1120        }
1121#endif
1122
1123        free_cpumask_var(data->related_cpus);
1124        free_cpumask_var(data->cpus);
1125        kfree(data);
1126
1127        return 0;
1128}
1129
1130
1131static int cpufreq_remove_dev(struct sys_device *sys_dev)
1132{
1133        unsigned int cpu = sys_dev->id;
1134        int retval;
1135
1136        if (cpu_is_offline(cpu))
1137                return 0;
1138
1139        if (unlikely(lock_policy_rwsem_write(cpu)))
1140                BUG();
1141
1142        retval = __cpufreq_remove_dev(sys_dev);
1143        return retval;
1144}
1145
1146
1147static void handle_update(struct work_struct *work)
1148{
1149        struct cpufreq_policy *policy =
1150                container_of(work, struct cpufreq_policy, update);
1151        unsigned int cpu = policy->cpu;
1152        pr_debug("handle_update for cpu %u called\n", cpu);
1153        cpufreq_update_policy(cpu);
1154}
1155
1156/**
1157 *      cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1158 *      @cpu: cpu number
1159 *      @old_freq: CPU frequency the kernel thinks the CPU runs at
1160 *      @new_freq: CPU frequency the CPU actually runs at
1161 *
1162 *      We adjust to current frequency first, and need to clean up later.
1163 *      So either call to cpufreq_update_policy() or schedule handle_update()).
1164 */
1165static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1166                                unsigned int new_freq)
1167{
1168        struct cpufreq_freqs freqs;
1169
1170        pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
1171               "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1172
1173        freqs.cpu = cpu;
1174        freqs.old = old_freq;
1175        freqs.new = new_freq;
1176        cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1177        cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1178}
1179
1180
1181/**
1182 * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1183 * @cpu: CPU number
1184 *
1185 * This is the last known freq, without actually getting it from the driver.
1186 * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1187 */
1188unsigned int cpufreq_quick_get(unsigned int cpu)
1189{
1190        struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1191        unsigned int ret_freq = 0;
1192
1193        if (policy) {
1194                ret_freq = policy->cur;
1195                cpufreq_cpu_put(policy);
1196        }
1197
1198        return ret_freq;
1199}
1200EXPORT_SYMBOL(cpufreq_quick_get);
1201
1202/**
1203 * cpufreq_quick_get_max - get the max reported CPU frequency for this CPU
1204 * @cpu: CPU number
1205 *
1206 * Just return the max possible frequency for a given CPU.
1207 */
1208unsigned int cpufreq_quick_get_max(unsigned int cpu)
1209{
1210        struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1211        unsigned int ret_freq = 0;
1212
1213        if (policy) {
1214                ret_freq = policy->max;
1215                cpufreq_cpu_put(policy);
1216        }
1217
1218        return ret_freq;
1219}
1220EXPORT_SYMBOL(cpufreq_quick_get_max);
1221
1222
1223static unsigned int __cpufreq_get(unsigned int cpu)
1224{
1225        struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
1226        unsigned int ret_freq = 0;
1227
1228        if (!cpufreq_driver->get)
1229                return ret_freq;
1230
1231        ret_freq = cpufreq_driver->get(cpu);
1232
1233        if (ret_freq && policy->cur &&
1234                !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1235                /* verify no discrepancy between actual and
1236                                        saved value exists */
1237                if (unlikely(ret_freq != policy->cur)) {
1238                        cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1239                        schedule_work(&policy->update);
1240                }
1241        }
1242
1243        return ret_freq;
1244}
1245
1246/**
1247 * cpufreq_get - get the current CPU frequency (in kHz)
1248 * @cpu: CPU number
1249 *
1250 * Get the CPU current (static) CPU frequency
1251 */
1252unsigned int cpufreq_get(unsigned int cpu)
1253{
1254        unsigned int ret_freq = 0;
1255        struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1256
1257        if (!policy)
1258                goto out;
1259
1260        if (unlikely(lock_policy_rwsem_read(cpu)))
1261                goto out_policy;
1262
1263        ret_freq = __cpufreq_get(cpu);
1264
1265        unlock_policy_rwsem_read(cpu);
1266
1267out_policy:
1268        cpufreq_cpu_put(policy);
1269out:
1270        return ret_freq;
1271}
1272EXPORT_SYMBOL(cpufreq_get);
1273
1274static struct sysdev_driver cpufreq_sysdev_driver = {
1275        .add            = cpufreq_add_dev,
1276        .remove         = cpufreq_remove_dev,
1277};
1278
1279
1280/**
1281 * cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
1282 *
1283 * This function is only executed for the boot processor.  The other CPUs
1284 * have been put offline by means of CPU hotplug.
1285 */
1286static int cpufreq_bp_suspend(void)
1287{
1288        int ret = 0;
1289
1290        int cpu = smp_processor_id();
1291        struct cpufreq_policy *cpu_policy;
1292
1293        pr_debug("suspending cpu %u\n", cpu);
1294
1295        /* If there's no policy for the boot CPU, we have nothing to do. */
1296        cpu_policy = cpufreq_cpu_get(cpu);
1297        if (!cpu_policy)
1298                return 0;
1299
1300        if (cpufreq_driver->suspend) {
1301                ret = cpufreq_driver->suspend(cpu_policy);
1302                if (ret)
1303                        printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1304                                        "step on CPU %u\n", cpu_policy->cpu);
1305        }
1306
1307        cpufreq_cpu_put(cpu_policy);
1308        return ret;
1309}
1310
1311/**
1312 * cpufreq_bp_resume - Restore proper frequency handling of the boot CPU.
1313 *
1314 *      1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1315 *      2.) schedule call cpufreq_update_policy() ASAP as interrupts are
1316 *          restored. It will verify that the current freq is in sync with
1317 *          what we believe it to be. This is a bit later than when it
1318 *          should be, but nonethteless it's better than calling
1319 *          cpufreq_driver->get() here which might re-enable interrupts...
1320 *
1321 * This function is only executed for the boot CPU.  The other CPUs have not
1322 * been turned on yet.
1323 */
1324static void cpufreq_bp_resume(void)
1325{
1326        int ret = 0;
1327
1328        int cpu = smp_processor_id();
1329        struct cpufreq_policy *cpu_policy;
1330
1331        pr_debug("resuming cpu %u\n", cpu);
1332
1333        /* If there's no policy for the boot CPU, we have nothing to do. */
1334        cpu_policy = cpufreq_cpu_get(cpu);
1335        if (!cpu_policy)
1336                return;
1337
1338        if (cpufreq_driver->resume) {
1339                ret = cpufreq_driver->resume(cpu_policy);
1340                if (ret) {
1341                        printk(KERN_ERR "cpufreq: resume failed in ->resume "
1342                                        "step on CPU %u\n", cpu_policy->cpu);
1343                        goto fail;
1344                }
1345        }
1346
1347        schedule_work(&cpu_policy->update);
1348
1349fail:
1350        cpufreq_cpu_put(cpu_policy);
1351}
1352
1353static struct syscore_ops cpufreq_syscore_ops = {
1354        .suspend        = cpufreq_bp_suspend,
1355        .resume         = cpufreq_bp_resume,
1356};
1357
1358
1359/*********************************************************************
1360 *                     NOTIFIER LISTS INTERFACE                      *
1361 *********************************************************************/
1362
1363/**
1364 *      cpufreq_register_notifier - register a driver with cpufreq
1365 *      @nb: notifier function to register
1366 *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1367 *
1368 *      Add a driver to one of two lists: either a list of drivers that
1369 *      are notified about clock rate changes (once before and once after
1370 *      the transition), or a list of drivers that are notified about
1371 *      changes in cpufreq policy.
1372 *
1373 *      This function may sleep, and has the same return conditions as
1374 *      blocking_notifier_chain_register.
1375 */
1376int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1377{
1378        int ret;
1379
1380        WARN_ON(!init_cpufreq_transition_notifier_list_called);
1381
1382        switch (list) {
1383        case CPUFREQ_TRANSITION_NOTIFIER:
1384                ret = srcu_notifier_chain_register(
1385                                &cpufreq_transition_notifier_list, nb);
1386                break;
1387        case CPUFREQ_POLICY_NOTIFIER:
1388                ret = blocking_notifier_chain_register(
1389                                &cpufreq_policy_notifier_list, nb);
1390                break;
1391        default:
1392                ret = -EINVAL;
1393        }
1394
1395        return ret;
1396}
1397EXPORT_SYMBOL(cpufreq_register_notifier);
1398
1399
1400/**
1401 *      cpufreq_unregister_notifier - unregister a driver with cpufreq
1402 *      @nb: notifier block to be unregistered
1403 *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1404 *
1405 *      Remove a driver from the CPU frequency notifier list.
1406 *
1407 *      This function may sleep, and has the same return conditions as
1408 *      blocking_notifier_chain_unregister.
1409 */
1410int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1411{
1412        int ret;
1413
1414        switch (list) {
1415        case CPUFREQ_TRANSITION_NOTIFIER:
1416                ret = srcu_notifier_chain_unregister(
1417                                &cpufreq_transition_notifier_list, nb);
1418                break;
1419        case CPUFREQ_POLICY_NOTIFIER:
1420                ret = blocking_notifier_chain_unregister(
1421                                &cpufreq_policy_notifier_list, nb);
1422                break;
1423        default:
1424                ret = -EINVAL;
1425        }
1426
1427        return ret;
1428}
1429EXPORT_SYMBOL(cpufreq_unregister_notifier);
1430
1431
1432/*********************************************************************
1433 *                              GOVERNORS                            *
1434 *********************************************************************/
1435
1436
1437int __cpufreq_driver_target(struct cpufreq_policy *policy,
1438                            unsigned int target_freq,
1439                            unsigned int relation)
1440{
1441        int retval = -EINVAL;
1442
1443        pr_debug("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
1444                target_freq, relation);
1445        if (cpu_online(policy->cpu) && cpufreq_driver->target)
1446                retval = cpufreq_driver->target(policy, target_freq, relation);
1447
1448        return retval;
1449}
1450EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1451
1452int cpufreq_driver_target(struct cpufreq_policy *policy,
1453                          unsigned int target_freq,
1454                          unsigned int relation)
1455{
1456        int ret = -EINVAL;
1457
1458        policy = cpufreq_cpu_get(policy->cpu);
1459        if (!policy)
1460                goto no_policy;
1461
1462        if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1463                goto fail;
1464
1465        ret = __cpufreq_driver_target(policy, target_freq, relation);
1466
1467        unlock_policy_rwsem_write(policy->cpu);
1468
1469fail:
1470        cpufreq_cpu_put(policy);
1471no_policy:
1472        return ret;
1473}
1474EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1475
1476int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
1477{
1478        int ret = 0;
1479
1480        policy = cpufreq_cpu_get(policy->cpu);
1481        if (!policy)
1482                return -EINVAL;
1483
1484        if (cpu_online(cpu) && cpufreq_driver->getavg)
1485                ret = cpufreq_driver->getavg(policy, cpu);
1486
1487        cpufreq_cpu_put(policy);
1488        return ret;
1489}
1490EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1491
1492/*
1493 * when "event" is CPUFREQ_GOV_LIMITS
1494 */
1495
1496static int __cpufreq_governor(struct cpufreq_policy *policy,
1497                                        unsigned int event)
1498{
1499        int ret;
1500
1501        /* Only must be defined when default governor is known to have latency
1502           restrictions, like e.g. conservative or ondemand.
1503           That this is the case is already ensured in Kconfig
1504        */
1505#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1506        struct cpufreq_governor *gov = &cpufreq_gov_performance;
1507#else
1508        struct cpufreq_governor *gov = NULL;
1509#endif
1510
1511        if (policy->governor->max_transition_latency &&
1512            policy->cpuinfo.transition_latency >
1513            policy->governor->max_transition_latency) {
1514                if (!gov)
1515                        return -EINVAL;
1516                else {
1517                        printk(KERN_WARNING "%s governor failed, too long"
1518                               " transition latency of HW, fallback"
1519                               " to %s governor\n",
1520                               policy->governor->name,
1521                               gov->name);
1522                        policy->governor = gov;
1523                }
1524        }
1525
1526        if (!try_module_get(policy->governor->owner))
1527                return -EINVAL;
1528
1529        pr_debug("__cpufreq_governor for CPU %u, event %u\n",
1530                                                policy->cpu, event);
1531        ret = policy->governor->governor(policy, event);
1532
1533        /* we keep one module reference alive for
1534                        each CPU governed by this CPU */
1535        if ((event != CPUFREQ_GOV_START) || ret)
1536                module_put(policy->governor->owner);
1537        if ((event == CPUFREQ_GOV_STOP) && !ret)
1538                module_put(policy->governor->owner);
1539
1540        return ret;
1541}
1542
1543
1544int cpufreq_register_governor(struct cpufreq_governor *governor)
1545{
1546        int err;
1547
1548        if (!governor)
1549                return -EINVAL;
1550
1551        mutex_lock(&cpufreq_governor_mutex);
1552
1553        err = -EBUSY;
1554        if (__find_governor(governor->name) == NULL) {
1555                err = 0;
1556                list_add(&governor->governor_list, &cpufreq_governor_list);
1557        }
1558
1559        mutex_unlock(&cpufreq_governor_mutex);
1560        return err;
1561}
1562EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1563
1564
1565void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1566{
1567#ifdef CONFIG_HOTPLUG_CPU
1568        int cpu;
1569#endif
1570
1571        if (!governor)
1572                return;
1573
1574#ifdef CONFIG_HOTPLUG_CPU
1575        for_each_present_cpu(cpu) {
1576                if (cpu_online(cpu))
1577                        continue;
1578                if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
1579                        strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
1580        }
1581#endif
1582
1583        mutex_lock(&cpufreq_governor_mutex);
1584        list_del(&governor->governor_list);
1585        mutex_unlock(&cpufreq_governor_mutex);
1586        return;
1587}
1588EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1589
1590
1591
1592/*********************************************************************
1593 *                          POLICY INTERFACE                         *
1594 *********************************************************************/
1595
1596/**
1597 * cpufreq_get_policy - get the current cpufreq_policy
1598 * @policy: struct cpufreq_policy into which the current cpufreq_policy
1599 *      is written
1600 *
1601 * Reads the current cpufreq policy.
1602 */
1603int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1604{
1605        struct cpufreq_policy *cpu_policy;
1606        if (!policy)
1607                return -EINVAL;
1608
1609        cpu_policy = cpufreq_cpu_get(cpu);
1610        if (!cpu_policy)
1611                return -EINVAL;
1612
1613        memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1614
1615        cpufreq_cpu_put(cpu_policy);
1616        return 0;
1617}
1618EXPORT_SYMBOL(cpufreq_get_policy);
1619
1620
1621/*
1622 * data   : current policy.
1623 * policy : policy to be set.
1624 */
1625static int __cpufreq_set_policy(struct cpufreq_policy *data,
1626                                struct cpufreq_policy *policy)
1627{
1628        int ret = 0;
1629
1630        pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1631                policy->min, policy->max);
1632
1633        memcpy(&policy->cpuinfo, &data->cpuinfo,
1634                                sizeof(struct cpufreq_cpuinfo));
1635
1636        if (policy->min > data->max || policy->max < data->min) {
1637                ret = -EINVAL;
1638                goto error_out;
1639        }
1640
1641        /* verify the cpu speed can be set within this limit */
1642        ret = cpufreq_driver->verify(policy);
1643        if (ret)
1644                goto error_out;
1645
1646        /* adjust if necessary - all reasons */
1647        blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1648                        CPUFREQ_ADJUST, policy);
1649
1650        /* adjust if necessary - hardware incompatibility*/
1651        blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1652                        CPUFREQ_INCOMPATIBLE, policy);
1653
1654        /* verify the cpu speed can be set within this limit,
1655           which might be different to the first one */
1656        ret = cpufreq_driver->verify(policy);
1657        if (ret)
1658                goto error_out;
1659
1660        /* notification of the new policy */
1661        blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1662                        CPUFREQ_NOTIFY, policy);
1663
1664        data->min = policy->min;
1665        data->max = policy->max;
1666
1667        pr_debug("new min and max freqs are %u - %u kHz\n",
1668                                        data->min, data->max);
1669
1670        if (cpufreq_driver->setpolicy) {
1671                data->policy = policy->policy;
1672                pr_debug("setting range\n");
1673                ret = cpufreq_driver->setpolicy(policy);
1674        } else {
1675                if (policy->governor != data->governor) {
1676                        /* save old, working values */
1677                        struct cpufreq_governor *old_gov = data->governor;
1678
1679                        pr_debug("governor switch\n");
1680
1681                        /* end old governor */
1682                        if (data->governor)
1683                                __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1684
1685                        /* start new governor */
1686                        data->governor = policy->governor;
1687                        if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1688                                /* new governor failed, so re-start old one */
1689                                pr_debug("starting governor %s failed\n",
1690                                                        data->governor->name);
1691                                if (old_gov) {
1692                                        data->governor = old_gov;
1693                                        __cpufreq_governor(data,
1694                                                           CPUFREQ_GOV_START);
1695                                }
1696                                ret = -EINVAL;
1697                                goto error_out;
1698                        }
1699                        /* might be a policy change, too, so fall through */
1700                }
1701                pr_debug("governor: change or update limits\n");
1702                __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1703        }
1704
1705error_out:
1706        return ret;
1707}
1708
1709/**
1710 *      cpufreq_update_policy - re-evaluate an existing cpufreq policy
1711 *      @cpu: CPU which shall be re-evaluated
1712 *
1713 *      Useful for policy notifiers which have different necessities
1714 *      at different times.
1715 */
1716int cpufreq_update_policy(unsigned int cpu)
1717{
1718        struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1719        struct cpufreq_policy policy;
1720        int ret;
1721
1722        if (!data) {
1723                ret = -ENODEV;
1724                goto no_policy;
1725        }
1726
1727        if (unlikely(lock_policy_rwsem_write(cpu))) {
1728                ret = -EINVAL;
1729                goto fail;
1730        }
1731
1732        pr_debug("updating policy for CPU %u\n", cpu);
1733        memcpy(&policy, data, sizeof(struct cpufreq_policy));
1734        policy.min = data->user_policy.min;
1735        policy.max = data->user_policy.max;
1736        policy.policy = data->user_policy.policy;
1737        policy.governor = data->user_policy.governor;
1738
1739        /* BIOS might change freq behind our back
1740          -> ask driver for current freq and notify governors about a change */
1741        if (cpufreq_driver->get) {
1742                policy.cur = cpufreq_driver->get(cpu);
1743                if (!data->cur) {
1744                        pr_debug("Driver did not initialize current freq");
1745                        data->cur = policy.cur;
1746                } else {
1747                        if (data->cur != policy.cur)
1748                                cpufreq_out_of_sync(cpu, data->cur,
1749                                                                policy.cur);
1750                }
1751        }
1752
1753        ret = __cpufreq_set_policy(data, &policy);
1754
1755        unlock_policy_rwsem_write(cpu);
1756
1757fail:
1758        cpufreq_cpu_put(data);
1759no_policy:
1760        return ret;
1761}
1762EXPORT_SYMBOL(cpufreq_update_policy);
1763
1764static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
1765                                        unsigned long action, void *hcpu)
1766{
1767        unsigned int cpu = (unsigned long)hcpu;
1768        struct sys_device *sys_dev;
1769
1770        sys_dev = get_cpu_sysdev(cpu);
1771        if (sys_dev) {
1772                switch (action) {
1773                case CPU_ONLINE:
1774                case CPU_ONLINE_FROZEN:
1775                        cpufreq_add_dev(sys_dev);
1776                        break;
1777                case CPU_DOWN_PREPARE:
1778                case CPU_DOWN_PREPARE_FROZEN:
1779                        if (unlikely(lock_policy_rwsem_write(cpu)))
1780                                BUG();
1781
1782                        __cpufreq_remove_dev(sys_dev);
1783                        break;
1784                case CPU_DOWN_FAILED:
1785                case CPU_DOWN_FAILED_FROZEN:
1786                        cpufreq_add_dev(sys_dev);
1787                        break;
1788                }
1789        }
1790        return NOTIFY_OK;
1791}
1792
1793static struct notifier_block __refdata cpufreq_cpu_notifier = {
1794    .notifier_call = cpufreq_cpu_callback,
1795};
1796
1797/*********************************************************************
1798 *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
1799 *********************************************************************/
1800
1801/**
1802 * cpufreq_register_driver - register a CPU Frequency driver
1803 * @driver_data: A struct cpufreq_driver containing the values#
1804 * submitted by the CPU Frequency driver.
1805 *
1806 *   Registers a CPU Frequency driver to this core code. This code
1807 * returns zero on success, -EBUSY when another driver got here first
1808 * (and isn't unregistered in the meantime).
1809 *
1810 */
1811int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1812{
1813        unsigned long flags;
1814        int ret;
1815
1816        if (!driver_data || !driver_data->verify || !driver_data->init ||
1817            ((!driver_data->setpolicy) && (!driver_data->target)))
1818                return -EINVAL;
1819
1820        pr_debug("trying to register driver %s\n", driver_data->name);
1821
1822        if (driver_data->setpolicy)
1823                driver_data->flags |= CPUFREQ_CONST_LOOPS;
1824
1825        spin_lock_irqsave(&cpufreq_driver_lock, flags);
1826        if (cpufreq_driver) {
1827                spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1828                return -EBUSY;
1829        }
1830        cpufreq_driver = driver_data;
1831        spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1832
1833        ret = sysdev_driver_register(&cpu_sysdev_class,
1834                                        &cpufreq_sysdev_driver);
1835        if (ret)
1836                goto err_null_driver;
1837
1838        if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1839                int i;
1840                ret = -ENODEV;
1841
1842                /* check for at least one working CPU */
1843                for (i = 0; i < nr_cpu_ids; i++)
1844                        if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
1845                                ret = 0;
1846                                break;
1847                        }
1848
1849                /* if all ->init() calls failed, unregister */
1850                if (ret) {
1851                        pr_debug("no CPU initialized for driver %s\n",
1852                                                        driver_data->name);
1853                        goto err_sysdev_unreg;
1854                }
1855        }
1856
1857        register_hotcpu_notifier(&cpufreq_cpu_notifier);
1858        pr_debug("driver %s up and running\n", driver_data->name);
1859
1860        return 0;
1861err_sysdev_unreg:
1862        sysdev_driver_unregister(&cpu_sysdev_class,
1863                        &cpufreq_sysdev_driver);
1864err_null_driver:
1865        spin_lock_irqsave(&cpufreq_driver_lock, flags);
1866        cpufreq_driver = NULL;
1867        spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1868        return ret;
1869}
1870EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1871
1872
1873/**
1874 * cpufreq_unregister_driver - unregister the current CPUFreq driver
1875 *
1876 *    Unregister the current CPUFreq driver. Only call this if you have
1877 * the right to do so, i.e. if you have succeeded in initialising before!
1878 * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1879 * currently not initialised.
1880 */
1881int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1882{
1883        unsigned long flags;
1884
1885        if (!cpufreq_driver || (driver != cpufreq_driver))
1886                return -EINVAL;
1887
1888        pr_debug("unregistering driver %s\n", driver->name);
1889
1890        sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver);
1891        unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1892
1893        spin_lock_irqsave(&cpufreq_driver_lock, flags);
1894        cpufreq_driver = NULL;
1895        spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1896
1897        return 0;
1898}
1899EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1900
1901static int __init cpufreq_core_init(void)
1902{
1903        int cpu;
1904
1905        for_each_possible_cpu(cpu) {
1906                per_cpu(cpufreq_policy_cpu, cpu) = -1;
1907                init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
1908        }
1909
1910        cpufreq_global_kobject = kobject_create_and_add("cpufreq",
1911                                                &cpu_sysdev_class.kset.kobj);
1912        BUG_ON(!cpufreq_global_kobject);
1913        register_syscore_ops(&cpufreq_syscore_ops);
1914
1915        return 0;
1916}
1917core_initcall(cpufreq_core_init);
1918