linux/drivers/cpufreq/cpufreq_ondemand.c
<<
>>
Prefs
   1/*
   2 *  drivers/cpufreq/cpufreq_ondemand.c
   3 *
   4 *  Copyright (C)  2001 Russell King
   5 *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
   6 *                      Jun Nakajima <jun.nakajima@intel.com>
   7 *
   8 * This program is free software; you can redistribute it and/or modify
   9 * it under the terms of the GNU General Public License version 2 as
  10 * published by the Free Software Foundation.
  11 */
  12
  13#include <linux/kernel.h>
  14#include <linux/module.h>
  15#include <linux/init.h>
  16#include <linux/cpufreq.h>
  17#include <linux/cpu.h>
  18#include <linux/jiffies.h>
  19#include <linux/kernel_stat.h>
  20#include <linux/mutex.h>
  21#include <linux/hrtimer.h>
  22#include <linux/tick.h>
  23#include <linux/ktime.h>
  24#include <linux/sched.h>
  25
  26/*
  27 * dbs is used in this file as a shortform for demandbased switching
  28 * It helps to keep variable names smaller, simpler
  29 */
  30
  31#define DEF_FREQUENCY_DOWN_DIFFERENTIAL         (10)
  32#define DEF_FREQUENCY_UP_THRESHOLD              (80)
  33#define DEF_SAMPLING_DOWN_FACTOR                (1)
  34#define MAX_SAMPLING_DOWN_FACTOR                (100000)
  35#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL       (3)
  36#define MICRO_FREQUENCY_UP_THRESHOLD            (95)
  37#define MICRO_FREQUENCY_MIN_SAMPLE_RATE         (10000)
  38#define MIN_FREQUENCY_UP_THRESHOLD              (11)
  39#define MAX_FREQUENCY_UP_THRESHOLD              (100)
  40
  41/*
  42 * The polling frequency of this governor depends on the capability of
  43 * the processor. Default polling frequency is 1000 times the transition
  44 * latency of the processor. The governor will work on any processor with
  45 * transition latency <= 10mS, using appropriate sampling
  46 * rate.
  47 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
  48 * this governor will not work.
  49 * All times here are in uS.
  50 */
  51#define MIN_SAMPLING_RATE_RATIO                 (2)
  52
  53static unsigned int min_sampling_rate;
  54
  55#define LATENCY_MULTIPLIER                      (1000)
  56#define MIN_LATENCY_MULTIPLIER                  (100)
  57#define TRANSITION_LATENCY_LIMIT                (10 * 1000 * 1000)
  58
  59static void do_dbs_timer(struct work_struct *work);
  60static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
  61                                unsigned int event);
  62
  63#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
  64static
  65#endif
  66struct cpufreq_governor cpufreq_gov_ondemand = {
  67       .name                   = "ondemand",
  68       .governor               = cpufreq_governor_dbs,
  69       .max_transition_latency = TRANSITION_LATENCY_LIMIT,
  70       .owner                  = THIS_MODULE,
  71};
  72
  73/* Sampling types */
  74enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
  75
  76struct cpu_dbs_info_s {
  77        cputime64_t prev_cpu_idle;
  78        cputime64_t prev_cpu_iowait;
  79        cputime64_t prev_cpu_wall;
  80        cputime64_t prev_cpu_nice;
  81        struct cpufreq_policy *cur_policy;
  82        struct delayed_work work;
  83        struct cpufreq_frequency_table *freq_table;
  84        unsigned int freq_lo;
  85        unsigned int freq_lo_jiffies;
  86        unsigned int freq_hi_jiffies;
  87        unsigned int rate_mult;
  88        int cpu;
  89        unsigned int sample_type:1;
  90        /*
  91         * percpu mutex that serializes governor limit change with
  92         * do_dbs_timer invocation. We do not want do_dbs_timer to run
  93         * when user is changing the governor or limits.
  94         */
  95        struct mutex timer_mutex;
  96};
  97static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info);
  98
  99static unsigned int dbs_enable; /* number of CPUs using this policy */
 100
 101/*
 102 * dbs_mutex protects dbs_enable in governor start/stop.
 103 */
 104static DEFINE_MUTEX(dbs_mutex);
 105
 106static struct dbs_tuners {
 107        unsigned int sampling_rate;
 108        unsigned int up_threshold;
 109        unsigned int down_differential;
 110        unsigned int ignore_nice;
 111        unsigned int sampling_down_factor;
 112        unsigned int powersave_bias;
 113        unsigned int io_is_busy;
 114} dbs_tuners_ins = {
 115        .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
 116        .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,
 117        .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL,
 118        .ignore_nice = 0,
 119        .powersave_bias = 0,
 120};
 121
 122static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
 123{
 124        u64 idle_time;
 125        u64 cur_wall_time;
 126        u64 busy_time;
 127
 128        cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
 129
 130        busy_time  = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
 131        busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
 132        busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
 133        busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
 134        busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
 135        busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
 136
 137        idle_time = cur_wall_time - busy_time;
 138        if (wall)
 139                *wall = jiffies_to_usecs(cur_wall_time);
 140
 141        return jiffies_to_usecs(idle_time);
 142}
 143
 144static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
 145{
 146        u64 idle_time = get_cpu_idle_time_us(cpu, NULL);
 147
 148        if (idle_time == -1ULL)
 149                return get_cpu_idle_time_jiffy(cpu, wall);
 150        else
 151                idle_time += get_cpu_iowait_time_us(cpu, wall);
 152
 153        return idle_time;
 154}
 155
 156static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wall)
 157{
 158        u64 iowait_time = get_cpu_iowait_time_us(cpu, wall);
 159
 160        if (iowait_time == -1ULL)
 161                return 0;
 162
 163        return iowait_time;
 164}
 165
 166/*
 167 * Find right freq to be set now with powersave_bias on.
 168 * Returns the freq_hi to be used right now and will set freq_hi_jiffies,
 169 * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
 170 */
 171static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
 172                                          unsigned int freq_next,
 173                                          unsigned int relation)
 174{
 175        unsigned int freq_req, freq_reduc, freq_avg;
 176        unsigned int freq_hi, freq_lo;
 177        unsigned int index = 0;
 178        unsigned int jiffies_total, jiffies_hi, jiffies_lo;
 179        struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
 180                                                   policy->cpu);
 181
 182        if (!dbs_info->freq_table) {
 183                dbs_info->freq_lo = 0;
 184                dbs_info->freq_lo_jiffies = 0;
 185                return freq_next;
 186        }
 187
 188        cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next,
 189                        relation, &index);
 190        freq_req = dbs_info->freq_table[index].frequency;
 191        freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000;
 192        freq_avg = freq_req - freq_reduc;
 193
 194        /* Find freq bounds for freq_avg in freq_table */
 195        index = 0;
 196        cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
 197                        CPUFREQ_RELATION_H, &index);
 198        freq_lo = dbs_info->freq_table[index].frequency;
 199        index = 0;
 200        cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
 201                        CPUFREQ_RELATION_L, &index);
 202        freq_hi = dbs_info->freq_table[index].frequency;
 203
 204        /* Find out how long we have to be in hi and lo freqs */
 205        if (freq_hi == freq_lo) {
 206                dbs_info->freq_lo = 0;
 207                dbs_info->freq_lo_jiffies = 0;
 208                return freq_lo;
 209        }
 210        jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
 211        jiffies_hi = (freq_avg - freq_lo) * jiffies_total;
 212        jiffies_hi += ((freq_hi - freq_lo) / 2);
 213        jiffies_hi /= (freq_hi - freq_lo);
 214        jiffies_lo = jiffies_total - jiffies_hi;
 215        dbs_info->freq_lo = freq_lo;
 216        dbs_info->freq_lo_jiffies = jiffies_lo;
 217        dbs_info->freq_hi_jiffies = jiffies_hi;
 218        return freq_hi;
 219}
 220
 221static void ondemand_powersave_bias_init_cpu(int cpu)
 222{
 223        struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
 224        dbs_info->freq_table = cpufreq_frequency_get_table(cpu);
 225        dbs_info->freq_lo = 0;
 226}
 227
 228static void ondemand_powersave_bias_init(void)
 229{
 230        int i;
 231        for_each_online_cpu(i) {
 232                ondemand_powersave_bias_init_cpu(i);
 233        }
 234}
 235
 236/************************** sysfs interface ************************/
 237
 238static ssize_t show_sampling_rate_min(struct kobject *kobj,
 239                                      struct attribute *attr, char *buf)
 240{
 241        return sprintf(buf, "%u\n", min_sampling_rate);
 242}
 243
 244define_one_global_ro(sampling_rate_min);
 245
 246/* cpufreq_ondemand Governor Tunables */
 247#define show_one(file_name, object)                                     \
 248static ssize_t show_##file_name                                         \
 249(struct kobject *kobj, struct attribute *attr, char *buf)              \
 250{                                                                       \
 251        return sprintf(buf, "%u\n", dbs_tuners_ins.object);             \
 252}
 253show_one(sampling_rate, sampling_rate);
 254show_one(io_is_busy, io_is_busy);
 255show_one(up_threshold, up_threshold);
 256show_one(sampling_down_factor, sampling_down_factor);
 257show_one(ignore_nice_load, ignore_nice);
 258show_one(powersave_bias, powersave_bias);
 259
 260static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b,
 261                                   const char *buf, size_t count)
 262{
 263        unsigned int input;
 264        int ret;
 265        ret = sscanf(buf, "%u", &input);
 266        if (ret != 1)
 267                return -EINVAL;
 268        dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate);
 269        return count;
 270}
 271
 272static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b,
 273                                   const char *buf, size_t count)
 274{
 275        unsigned int input;
 276        int ret;
 277
 278        ret = sscanf(buf, "%u", &input);
 279        if (ret != 1)
 280                return -EINVAL;
 281        dbs_tuners_ins.io_is_busy = !!input;
 282        return count;
 283}
 284
 285static ssize_t store_up_threshold(struct kobject *a, struct attribute *b,
 286                                  const char *buf, size_t count)
 287{
 288        unsigned int input;
 289        int ret;
 290        ret = sscanf(buf, "%u", &input);
 291
 292        if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD ||
 293                        input < MIN_FREQUENCY_UP_THRESHOLD) {
 294                return -EINVAL;
 295        }
 296        dbs_tuners_ins.up_threshold = input;
 297        return count;
 298}
 299
 300static ssize_t store_sampling_down_factor(struct kobject *a,
 301                        struct attribute *b, const char *buf, size_t count)
 302{
 303        unsigned int input, j;
 304        int ret;
 305        ret = sscanf(buf, "%u", &input);
 306
 307        if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
 308                return -EINVAL;
 309        dbs_tuners_ins.sampling_down_factor = input;
 310
 311        /* Reset down sampling multiplier in case it was active */
 312        for_each_online_cpu(j) {
 313                struct cpu_dbs_info_s *dbs_info;
 314                dbs_info = &per_cpu(od_cpu_dbs_info, j);
 315                dbs_info->rate_mult = 1;
 316        }
 317        return count;
 318}
 319
 320static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
 321                                      const char *buf, size_t count)
 322{
 323        unsigned int input;
 324        int ret;
 325
 326        unsigned int j;
 327
 328        ret = sscanf(buf, "%u", &input);
 329        if (ret != 1)
 330                return -EINVAL;
 331
 332        if (input > 1)
 333                input = 1;
 334
 335        if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */
 336                return count;
 337        }
 338        dbs_tuners_ins.ignore_nice = input;
 339
 340        /* we need to re-evaluate prev_cpu_idle */
 341        for_each_online_cpu(j) {
 342                struct cpu_dbs_info_s *dbs_info;
 343                dbs_info = &per_cpu(od_cpu_dbs_info, j);
 344                dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
 345                                                &dbs_info->prev_cpu_wall);
 346                if (dbs_tuners_ins.ignore_nice)
 347                        dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 348
 349        }
 350        return count;
 351}
 352
 353static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b,
 354                                    const char *buf, size_t count)
 355{
 356        unsigned int input;
 357        int ret;
 358        ret = sscanf(buf, "%u", &input);
 359
 360        if (ret != 1)
 361                return -EINVAL;
 362
 363        if (input > 1000)
 364                input = 1000;
 365
 366        dbs_tuners_ins.powersave_bias = input;
 367        ondemand_powersave_bias_init();
 368        return count;
 369}
 370
 371define_one_global_rw(sampling_rate);
 372define_one_global_rw(io_is_busy);
 373define_one_global_rw(up_threshold);
 374define_one_global_rw(sampling_down_factor);
 375define_one_global_rw(ignore_nice_load);
 376define_one_global_rw(powersave_bias);
 377
 378static struct attribute *dbs_attributes[] = {
 379        &sampling_rate_min.attr,
 380        &sampling_rate.attr,
 381        &up_threshold.attr,
 382        &sampling_down_factor.attr,
 383        &ignore_nice_load.attr,
 384        &powersave_bias.attr,
 385        &io_is_busy.attr,
 386        NULL
 387};
 388
 389static struct attribute_group dbs_attr_group = {
 390        .attrs = dbs_attributes,
 391        .name = "ondemand",
 392};
 393
 394/************************** sysfs end ************************/
 395
 396static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq)
 397{
 398        if (dbs_tuners_ins.powersave_bias)
 399                freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H);
 400        else if (p->cur == p->max)
 401                return;
 402
 403        __cpufreq_driver_target(p, freq, dbs_tuners_ins.powersave_bias ?
 404                        CPUFREQ_RELATION_L : CPUFREQ_RELATION_H);
 405}
 406
 407static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 408{
 409        unsigned int max_load_freq;
 410
 411        struct cpufreq_policy *policy;
 412        unsigned int j;
 413
 414        this_dbs_info->freq_lo = 0;
 415        policy = this_dbs_info->cur_policy;
 416
 417        /*
 418         * Every sampling_rate, we check, if current idle time is less
 419         * than 20% (default), then we try to increase frequency
 420         * Every sampling_rate, we look for a the lowest
 421         * frequency which can sustain the load while keeping idle time over
 422         * 30%. If such a frequency exist, we try to decrease to this frequency.
 423         *
 424         * Any frequency increase takes it to the maximum frequency.
 425         * Frequency reduction happens at minimum steps of
 426         * 5% (default) of current frequency
 427         */
 428
 429        /* Get Absolute Load - in terms of freq */
 430        max_load_freq = 0;
 431
 432        for_each_cpu(j, policy->cpus) {
 433                struct cpu_dbs_info_s *j_dbs_info;
 434                cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time;
 435                unsigned int idle_time, wall_time, iowait_time;
 436                unsigned int load, load_freq;
 437                int freq_avg;
 438
 439                j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
 440
 441                cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
 442                cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time);
 443
 444                wall_time = (unsigned int)
 445                        (cur_wall_time - j_dbs_info->prev_cpu_wall);
 446                j_dbs_info->prev_cpu_wall = cur_wall_time;
 447
 448                idle_time = (unsigned int)
 449                        (cur_idle_time - j_dbs_info->prev_cpu_idle);
 450                j_dbs_info->prev_cpu_idle = cur_idle_time;
 451
 452                iowait_time = (unsigned int)
 453                        (cur_iowait_time - j_dbs_info->prev_cpu_iowait);
 454                j_dbs_info->prev_cpu_iowait = cur_iowait_time;
 455
 456                if (dbs_tuners_ins.ignore_nice) {
 457                        u64 cur_nice;
 458                        unsigned long cur_nice_jiffies;
 459
 460                        cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
 461                                         j_dbs_info->prev_cpu_nice;
 462                        /*
 463                         * Assumption: nice time between sampling periods will
 464                         * be less than 2^32 jiffies for 32 bit sys
 465                         */
 466                        cur_nice_jiffies = (unsigned long)
 467                                        cputime64_to_jiffies64(cur_nice);
 468
 469                        j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 470                        idle_time += jiffies_to_usecs(cur_nice_jiffies);
 471                }
 472
 473                /*
 474                 * For the purpose of ondemand, waiting for disk IO is an
 475                 * indication that you're performance critical, and not that
 476                 * the system is actually idle. So subtract the iowait time
 477                 * from the cpu idle time.
 478                 */
 479
 480                if (dbs_tuners_ins.io_is_busy && idle_time >= iowait_time)
 481                        idle_time -= iowait_time;
 482
 483                if (unlikely(!wall_time || wall_time < idle_time))
 484                        continue;
 485
 486                load = 100 * (wall_time - idle_time) / wall_time;
 487
 488                freq_avg = __cpufreq_driver_getavg(policy, j);
 489                if (freq_avg <= 0)
 490                        freq_avg = policy->cur;
 491
 492                load_freq = load * freq_avg;
 493                if (load_freq > max_load_freq)
 494                        max_load_freq = load_freq;
 495        }
 496
 497        /* Check for frequency increase */
 498        if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) {
 499                /* If switching to max speed, apply sampling_down_factor */
 500                if (policy->cur < policy->max)
 501                        this_dbs_info->rate_mult =
 502                                dbs_tuners_ins.sampling_down_factor;
 503                dbs_freq_increase(policy, policy->max);
 504                return;
 505        }
 506
 507        /* Check for frequency decrease */
 508        /* if we cannot reduce the frequency anymore, break out early */
 509        if (policy->cur == policy->min)
 510                return;
 511
 512        /*
 513         * The optimal frequency is the frequency that is the lowest that
 514         * can support the current CPU usage without triggering the up
 515         * policy. To be safe, we focus 10 points under the threshold.
 516         */
 517        if (max_load_freq <
 518            (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) *
 519             policy->cur) {
 520                unsigned int freq_next;
 521                freq_next = max_load_freq /
 522                                (dbs_tuners_ins.up_threshold -
 523                                 dbs_tuners_ins.down_differential);
 524
 525                /* No longer fully busy, reset rate_mult */
 526                this_dbs_info->rate_mult = 1;
 527
 528                if (freq_next < policy->min)
 529                        freq_next = policy->min;
 530
 531                if (!dbs_tuners_ins.powersave_bias) {
 532                        __cpufreq_driver_target(policy, freq_next,
 533                                        CPUFREQ_RELATION_L);
 534                } else {
 535                        int freq = powersave_bias_target(policy, freq_next,
 536                                        CPUFREQ_RELATION_L);
 537                        __cpufreq_driver_target(policy, freq,
 538                                CPUFREQ_RELATION_L);
 539                }
 540        }
 541}
 542
 543static void do_dbs_timer(struct work_struct *work)
 544{
 545        struct cpu_dbs_info_s *dbs_info =
 546                container_of(work, struct cpu_dbs_info_s, work.work);
 547        unsigned int cpu = dbs_info->cpu;
 548        int sample_type = dbs_info->sample_type;
 549
 550        int delay;
 551
 552        mutex_lock(&dbs_info->timer_mutex);
 553
 554        /* Common NORMAL_SAMPLE setup */
 555        dbs_info->sample_type = DBS_NORMAL_SAMPLE;
 556        if (!dbs_tuners_ins.powersave_bias ||
 557            sample_type == DBS_NORMAL_SAMPLE) {
 558                dbs_check_cpu(dbs_info);
 559                if (dbs_info->freq_lo) {
 560                        /* Setup timer for SUB_SAMPLE */
 561                        dbs_info->sample_type = DBS_SUB_SAMPLE;
 562                        delay = dbs_info->freq_hi_jiffies;
 563                } else {
 564                        /* We want all CPUs to do sampling nearly on
 565                         * same jiffy
 566                         */
 567                        delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate
 568                                * dbs_info->rate_mult);
 569
 570                        if (num_online_cpus() > 1)
 571                                delay -= jiffies % delay;
 572                }
 573        } else {
 574                __cpufreq_driver_target(dbs_info->cur_policy,
 575                        dbs_info->freq_lo, CPUFREQ_RELATION_H);
 576                delay = dbs_info->freq_lo_jiffies;
 577        }
 578        schedule_delayed_work_on(cpu, &dbs_info->work, delay);
 579        mutex_unlock(&dbs_info->timer_mutex);
 580}
 581
 582static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
 583{
 584        /* We want all CPUs to do sampling nearly on same jiffy */
 585        int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
 586
 587        if (num_online_cpus() > 1)
 588                delay -= jiffies % delay;
 589
 590        dbs_info->sample_type = DBS_NORMAL_SAMPLE;
 591        INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
 592        schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay);
 593}
 594
 595static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
 596{
 597        cancel_delayed_work_sync(&dbs_info->work);
 598}
 599
 600/*
 601 * Not all CPUs want IO time to be accounted as busy; this dependson how
 602 * efficient idling at a higher frequency/voltage is.
 603 * Pavel Machek says this is not so for various generations of AMD and old
 604 * Intel systems.
 605 * Mike Chan (androidlcom) calis this is also not true for ARM.
 606 * Because of this, whitelist specific known (series) of CPUs by default, and
 607 * leave all others up to the user.
 608 */
 609static int should_io_be_busy(void)
 610{
 611#if defined(CONFIG_X86)
 612        /*
 613         * For Intel, Core 2 (model 15) andl later have an efficient idle.
 614         */
 615        if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
 616            boot_cpu_data.x86 == 6 &&
 617            boot_cpu_data.x86_model >= 15)
 618                return 1;
 619#endif
 620        return 0;
 621}
 622
 623static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 624                                   unsigned int event)
 625{
 626        unsigned int cpu = policy->cpu;
 627        struct cpu_dbs_info_s *this_dbs_info;
 628        unsigned int j;
 629        int rc;
 630
 631        this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
 632
 633        switch (event) {
 634        case CPUFREQ_GOV_START:
 635                if ((!cpu_online(cpu)) || (!policy->cur))
 636                        return -EINVAL;
 637
 638                mutex_lock(&dbs_mutex);
 639
 640                dbs_enable++;
 641                for_each_cpu(j, policy->cpus) {
 642                        struct cpu_dbs_info_s *j_dbs_info;
 643                        j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
 644                        j_dbs_info->cur_policy = policy;
 645
 646                        j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
 647                                                &j_dbs_info->prev_cpu_wall);
 648                        if (dbs_tuners_ins.ignore_nice)
 649                                j_dbs_info->prev_cpu_nice =
 650                                                kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 651                }
 652                this_dbs_info->cpu = cpu;
 653                this_dbs_info->rate_mult = 1;
 654                ondemand_powersave_bias_init_cpu(cpu);
 655                /*
 656                 * Start the timerschedule work, when this governor
 657                 * is used for first time
 658                 */
 659                if (dbs_enable == 1) {
 660                        unsigned int latency;
 661
 662                        rc = sysfs_create_group(cpufreq_global_kobject,
 663                                                &dbs_attr_group);
 664                        if (rc) {
 665                                mutex_unlock(&dbs_mutex);
 666                                return rc;
 667                        }
 668
 669                        /* policy latency is in nS. Convert it to uS first */
 670                        latency = policy->cpuinfo.transition_latency / 1000;
 671                        if (latency == 0)
 672                                latency = 1;
 673                        /* Bring kernel and HW constraints together */
 674                        min_sampling_rate = max(min_sampling_rate,
 675                                        MIN_LATENCY_MULTIPLIER * latency);
 676                        dbs_tuners_ins.sampling_rate =
 677                                max(min_sampling_rate,
 678                                    latency * LATENCY_MULTIPLIER);
 679                        dbs_tuners_ins.io_is_busy = should_io_be_busy();
 680                }
 681                mutex_unlock(&dbs_mutex);
 682
 683                mutex_init(&this_dbs_info->timer_mutex);
 684                dbs_timer_init(this_dbs_info);
 685                break;
 686
 687        case CPUFREQ_GOV_STOP:
 688                dbs_timer_exit(this_dbs_info);
 689
 690                mutex_lock(&dbs_mutex);
 691                mutex_destroy(&this_dbs_info->timer_mutex);
 692                dbs_enable--;
 693                mutex_unlock(&dbs_mutex);
 694                if (!dbs_enable)
 695                        sysfs_remove_group(cpufreq_global_kobject,
 696                                           &dbs_attr_group);
 697
 698                break;
 699
 700        case CPUFREQ_GOV_LIMITS:
 701                mutex_lock(&this_dbs_info->timer_mutex);
 702                if (policy->max < this_dbs_info->cur_policy->cur)
 703                        __cpufreq_driver_target(this_dbs_info->cur_policy,
 704                                policy->max, CPUFREQ_RELATION_H);
 705                else if (policy->min > this_dbs_info->cur_policy->cur)
 706                        __cpufreq_driver_target(this_dbs_info->cur_policy,
 707                                policy->min, CPUFREQ_RELATION_L);
 708                mutex_unlock(&this_dbs_info->timer_mutex);
 709                break;
 710        }
 711        return 0;
 712}
 713
 714static int __init cpufreq_gov_dbs_init(void)
 715{
 716        u64 idle_time;
 717        int cpu = get_cpu();
 718
 719        idle_time = get_cpu_idle_time_us(cpu, NULL);
 720        put_cpu();
 721        if (idle_time != -1ULL) {
 722                /* Idle micro accounting is supported. Use finer thresholds */
 723                dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
 724                dbs_tuners_ins.down_differential =
 725                                        MICRO_FREQUENCY_DOWN_DIFFERENTIAL;
 726                /*
 727                 * In nohz/micro accounting case we set the minimum frequency
 728                 * not depending on HZ, but fixed (very low). The deferred
 729                 * timer might skip some samples if idle/sleeping as needed.
 730                */
 731                min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE;
 732        } else {
 733                /* For correct statistics, we need 10 ticks for each measure */
 734                min_sampling_rate =
 735                        MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10);
 736        }
 737
 738        return cpufreq_register_governor(&cpufreq_gov_ondemand);
 739}
 740
 741static void __exit cpufreq_gov_dbs_exit(void)
 742{
 743        cpufreq_unregister_governor(&cpufreq_gov_ondemand);
 744}
 745
 746
 747MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
 748MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>");
 749MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for "
 750        "Low Latency Frequency Transition capable processors");
 751MODULE_LICENSE("GPL");
 752
 753#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
 754fs_initcall(cpufreq_gov_dbs_init);
 755#else
 756module_init(cpufreq_gov_dbs_init);
 757#endif
 758module_exit(cpufreq_gov_dbs_exit);
 759