linux/drivers/cpufreq/cpufreq_ondemand.c
<<
>>
Prefs
   1/*
   2 *  drivers/cpufreq/cpufreq_ondemand.c
   3 *
   4 *  Copyright (C)  2001 Russell King
   5 *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
   6 *                      Jun Nakajima <jun.nakajima@intel.com>
   7 *
   8 * This program is free software; you can redistribute it and/or modify
   9 * it under the terms of the GNU General Public License version 2 as
  10 * published by the Free Software Foundation.
  11 */
  12
  13#include <linux/kernel.h>
  14#include <linux/module.h>
  15#include <linux/init.h>
  16#include <linux/cpufreq.h>
  17#include <linux/cpu.h>
  18#include <linux/jiffies.h>
  19#include <linux/kernel_stat.h>
  20#include <linux/mutex.h>
  21#include <linux/hrtimer.h>
  22#include <linux/tick.h>
  23#include <linux/ktime.h>
  24#include <linux/sched.h>
  25
  26/*
  27 * dbs is used in this file as a shortform for demandbased switching
  28 * It helps to keep variable names smaller, simpler
  29 */
  30
  31#define DEF_FREQUENCY_DOWN_DIFFERENTIAL         (10)
  32#define DEF_FREQUENCY_UP_THRESHOLD              (80)
  33#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL       (3)
  34#define MICRO_FREQUENCY_UP_THRESHOLD            (95)
  35#define MICRO_FREQUENCY_MIN_SAMPLE_RATE         (10000)
  36#define MIN_FREQUENCY_UP_THRESHOLD              (11)
  37#define MAX_FREQUENCY_UP_THRESHOLD              (100)
  38
  39/*
  40 * The polling frequency of this governor depends on the capability of
  41 * the processor. Default polling frequency is 1000 times the transition
  42 * latency of the processor. The governor will work on any processor with
  43 * transition latency <= 10mS, using appropriate sampling
  44 * rate.
  45 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
  46 * this governor will not work.
  47 * All times here are in uS.
  48 */
  49#define MIN_SAMPLING_RATE_RATIO                 (2)
  50
  51static unsigned int min_sampling_rate;
  52
  53#define LATENCY_MULTIPLIER                      (1000)
  54#define MIN_LATENCY_MULTIPLIER                  (100)
  55#define TRANSITION_LATENCY_LIMIT                (10 * 1000 * 1000)
  56
  57static void do_dbs_timer(struct work_struct *work);
  58static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
  59                                unsigned int event);
  60
  61#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
  62static
  63#endif
  64struct cpufreq_governor cpufreq_gov_ondemand = {
  65       .name                   = "ondemand",
  66       .governor               = cpufreq_governor_dbs,
  67       .max_transition_latency = TRANSITION_LATENCY_LIMIT,
  68       .owner                  = THIS_MODULE,
  69};
  70
  71/* Sampling types */
  72enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
  73
  74struct cpu_dbs_info_s {
  75        cputime64_t prev_cpu_idle;
  76        cputime64_t prev_cpu_wall;
  77        cputime64_t prev_cpu_nice;
  78        struct cpufreq_policy *cur_policy;
  79        struct delayed_work work;
  80        struct cpufreq_frequency_table *freq_table;
  81        unsigned int freq_lo;
  82        unsigned int freq_lo_jiffies;
  83        unsigned int freq_hi_jiffies;
  84        int cpu;
  85        unsigned int sample_type:1;
  86        /*
  87         * percpu mutex that serializes governor limit change with
  88         * do_dbs_timer invocation. We do not want do_dbs_timer to run
  89         * when user is changing the governor or limits.
  90         */
  91        struct mutex timer_mutex;
  92};
  93static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info);
  94
  95static unsigned int dbs_enable; /* number of CPUs using this policy */
  96
  97/*
  98 * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on
  99 * different CPUs. It protects dbs_enable in governor start/stop.
 100 */
 101static DEFINE_MUTEX(dbs_mutex);
 102
 103static struct workqueue_struct  *kondemand_wq;
 104
 105static struct dbs_tuners {
 106        unsigned int sampling_rate;
 107        unsigned int up_threshold;
 108        unsigned int down_differential;
 109        unsigned int ignore_nice;
 110        unsigned int powersave_bias;
 111} dbs_tuners_ins = {
 112        .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
 113        .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL,
 114        .ignore_nice = 0,
 115        .powersave_bias = 0,
 116};
 117
 118static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
 119                                                        cputime64_t *wall)
 120{
 121        cputime64_t idle_time;
 122        cputime64_t cur_wall_time;
 123        cputime64_t busy_time;
 124
 125        cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
 126        busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,
 127                        kstat_cpu(cpu).cpustat.system);
 128
 129        busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq);
 130        busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq);
 131        busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal);
 132        busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice);
 133
 134        idle_time = cputime64_sub(cur_wall_time, busy_time);
 135        if (wall)
 136                *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
 137
 138        return (cputime64_t)jiffies_to_usecs(idle_time);
 139}
 140
 141static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
 142{
 143        u64 idle_time = get_cpu_idle_time_us(cpu, wall);
 144
 145        if (idle_time == -1ULL)
 146                return get_cpu_idle_time_jiffy(cpu, wall);
 147
 148        return idle_time;
 149}
 150
 151/*
 152 * Find right freq to be set now with powersave_bias on.
 153 * Returns the freq_hi to be used right now and will set freq_hi_jiffies,
 154 * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
 155 */
 156static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
 157                                          unsigned int freq_next,
 158                                          unsigned int relation)
 159{
 160        unsigned int freq_req, freq_reduc, freq_avg;
 161        unsigned int freq_hi, freq_lo;
 162        unsigned int index = 0;
 163        unsigned int jiffies_total, jiffies_hi, jiffies_lo;
 164        struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
 165                                                   policy->cpu);
 166
 167        if (!dbs_info->freq_table) {
 168                dbs_info->freq_lo = 0;
 169                dbs_info->freq_lo_jiffies = 0;
 170                return freq_next;
 171        }
 172
 173        cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next,
 174                        relation, &index);
 175        freq_req = dbs_info->freq_table[index].frequency;
 176        freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000;
 177        freq_avg = freq_req - freq_reduc;
 178
 179        /* Find freq bounds for freq_avg in freq_table */
 180        index = 0;
 181        cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
 182                        CPUFREQ_RELATION_H, &index);
 183        freq_lo = dbs_info->freq_table[index].frequency;
 184        index = 0;
 185        cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
 186                        CPUFREQ_RELATION_L, &index);
 187        freq_hi = dbs_info->freq_table[index].frequency;
 188
 189        /* Find out how long we have to be in hi and lo freqs */
 190        if (freq_hi == freq_lo) {
 191                dbs_info->freq_lo = 0;
 192                dbs_info->freq_lo_jiffies = 0;
 193                return freq_lo;
 194        }
 195        jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
 196        jiffies_hi = (freq_avg - freq_lo) * jiffies_total;
 197        jiffies_hi += ((freq_hi - freq_lo) / 2);
 198        jiffies_hi /= (freq_hi - freq_lo);
 199        jiffies_lo = jiffies_total - jiffies_hi;
 200        dbs_info->freq_lo = freq_lo;
 201        dbs_info->freq_lo_jiffies = jiffies_lo;
 202        dbs_info->freq_hi_jiffies = jiffies_hi;
 203        return freq_hi;
 204}
 205
 206static void ondemand_powersave_bias_init_cpu(int cpu)
 207{
 208        struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
 209        dbs_info->freq_table = cpufreq_frequency_get_table(cpu);
 210        dbs_info->freq_lo = 0;
 211}
 212
 213static void ondemand_powersave_bias_init(void)
 214{
 215        int i;
 216        for_each_online_cpu(i) {
 217                ondemand_powersave_bias_init_cpu(i);
 218        }
 219}
 220
 221/************************** sysfs interface ************************/
 222
 223static ssize_t show_sampling_rate_max(struct kobject *kobj,
 224                                      struct attribute *attr, char *buf)
 225{
 226        printk_once(KERN_INFO "CPUFREQ: ondemand sampling_rate_max "
 227               "sysfs file is deprecated - used by: %s\n", current->comm);
 228        return sprintf(buf, "%u\n", -1U);
 229}
 230
 231static ssize_t show_sampling_rate_min(struct kobject *kobj,
 232                                      struct attribute *attr, char *buf)
 233{
 234        return sprintf(buf, "%u\n", min_sampling_rate);
 235}
 236
 237#define define_one_ro(_name)            \
 238static struct global_attr _name =       \
 239__ATTR(_name, 0444, show_##_name, NULL)
 240
 241define_one_ro(sampling_rate_max);
 242define_one_ro(sampling_rate_min);
 243
 244/* cpufreq_ondemand Governor Tunables */
 245#define show_one(file_name, object)                                     \
 246static ssize_t show_##file_name                                         \
 247(struct kobject *kobj, struct attribute *attr, char *buf)              \
 248{                                                                       \
 249        return sprintf(buf, "%u\n", dbs_tuners_ins.object);             \
 250}
 251show_one(sampling_rate, sampling_rate);
 252show_one(up_threshold, up_threshold);
 253show_one(ignore_nice_load, ignore_nice);
 254show_one(powersave_bias, powersave_bias);
 255
 256/*** delete after deprecation time ***/
 257
 258#define DEPRECATION_MSG(file_name)                                      \
 259        printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs "       \
 260                    "interface is deprecated - " #file_name "\n");
 261
 262#define show_one_old(file_name)                                         \
 263static ssize_t show_##file_name##_old                                   \
 264(struct cpufreq_policy *unused, char *buf)                              \
 265{                                                                       \
 266        printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs "       \
 267                    "interface is deprecated - " #file_name "\n");      \
 268        return show_##file_name(NULL, NULL, buf);                       \
 269}
 270show_one_old(sampling_rate);
 271show_one_old(up_threshold);
 272show_one_old(ignore_nice_load);
 273show_one_old(powersave_bias);
 274show_one_old(sampling_rate_min);
 275show_one_old(sampling_rate_max);
 276
 277#define define_one_ro_old(object, _name)       \
 278static struct freq_attr object =               \
 279__ATTR(_name, 0444, show_##_name##_old, NULL)
 280
 281define_one_ro_old(sampling_rate_min_old, sampling_rate_min);
 282define_one_ro_old(sampling_rate_max_old, sampling_rate_max);
 283
 284/*** delete after deprecation time ***/
 285
 286static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b,
 287                                   const char *buf, size_t count)
 288{
 289        unsigned int input;
 290        int ret;
 291        ret = sscanf(buf, "%u", &input);
 292        if (ret != 1)
 293                return -EINVAL;
 294
 295        mutex_lock(&dbs_mutex);
 296        dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate);
 297        mutex_unlock(&dbs_mutex);
 298
 299        return count;
 300}
 301
 302static ssize_t store_up_threshold(struct kobject *a, struct attribute *b,
 303                                  const char *buf, size_t count)
 304{
 305        unsigned int input;
 306        int ret;
 307        ret = sscanf(buf, "%u", &input);
 308
 309        if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD ||
 310                        input < MIN_FREQUENCY_UP_THRESHOLD) {
 311                return -EINVAL;
 312        }
 313
 314        mutex_lock(&dbs_mutex);
 315        dbs_tuners_ins.up_threshold = input;
 316        mutex_unlock(&dbs_mutex);
 317
 318        return count;
 319}
 320
 321static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
 322                                      const char *buf, size_t count)
 323{
 324        unsigned int input;
 325        int ret;
 326
 327        unsigned int j;
 328
 329        ret = sscanf(buf, "%u", &input);
 330        if (ret != 1)
 331                return -EINVAL;
 332
 333        if (input > 1)
 334                input = 1;
 335
 336        mutex_lock(&dbs_mutex);
 337        if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */
 338                mutex_unlock(&dbs_mutex);
 339                return count;
 340        }
 341        dbs_tuners_ins.ignore_nice = input;
 342
 343        /* we need to re-evaluate prev_cpu_idle */
 344        for_each_online_cpu(j) {
 345                struct cpu_dbs_info_s *dbs_info;
 346                dbs_info = &per_cpu(od_cpu_dbs_info, j);
 347                dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
 348                                                &dbs_info->prev_cpu_wall);
 349                if (dbs_tuners_ins.ignore_nice)
 350                        dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
 351
 352        }
 353        mutex_unlock(&dbs_mutex);
 354
 355        return count;
 356}
 357
 358static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b,
 359                                    const char *buf, size_t count)
 360{
 361        unsigned int input;
 362        int ret;
 363        ret = sscanf(buf, "%u", &input);
 364
 365        if (ret != 1)
 366                return -EINVAL;
 367
 368        if (input > 1000)
 369                input = 1000;
 370
 371        mutex_lock(&dbs_mutex);
 372        dbs_tuners_ins.powersave_bias = input;
 373        ondemand_powersave_bias_init();
 374        mutex_unlock(&dbs_mutex);
 375
 376        return count;
 377}
 378
 379#define define_one_rw(_name) \
 380static struct global_attr _name = \
 381__ATTR(_name, 0644, show_##_name, store_##_name)
 382
 383define_one_rw(sampling_rate);
 384define_one_rw(up_threshold);
 385define_one_rw(ignore_nice_load);
 386define_one_rw(powersave_bias);
 387
 388static struct attribute *dbs_attributes[] = {
 389        &sampling_rate_max.attr,
 390        &sampling_rate_min.attr,
 391        &sampling_rate.attr,
 392        &up_threshold.attr,
 393        &ignore_nice_load.attr,
 394        &powersave_bias.attr,
 395        NULL
 396};
 397
 398static struct attribute_group dbs_attr_group = {
 399        .attrs = dbs_attributes,
 400        .name = "ondemand",
 401};
 402
 403/*** delete after deprecation time ***/
 404
 405#define write_one_old(file_name)                                        \
 406static ssize_t store_##file_name##_old                                  \
 407(struct cpufreq_policy *unused, const char *buf, size_t count)          \
 408{                                                                       \
 409       printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs "        \
 410                   "interface is deprecated - " #file_name "\n");       \
 411       return store_##file_name(NULL, NULL, buf, count);                \
 412}
 413write_one_old(sampling_rate);
 414write_one_old(up_threshold);
 415write_one_old(ignore_nice_load);
 416write_one_old(powersave_bias);
 417
 418#define define_one_rw_old(object, _name)       \
 419static struct freq_attr object =               \
 420__ATTR(_name, 0644, show_##_name##_old, store_##_name##_old)
 421
 422define_one_rw_old(sampling_rate_old, sampling_rate);
 423define_one_rw_old(up_threshold_old, up_threshold);
 424define_one_rw_old(ignore_nice_load_old, ignore_nice_load);
 425define_one_rw_old(powersave_bias_old, powersave_bias);
 426
 427static struct attribute *dbs_attributes_old[] = {
 428       &sampling_rate_max_old.attr,
 429       &sampling_rate_min_old.attr,
 430       &sampling_rate_old.attr,
 431       &up_threshold_old.attr,
 432       &ignore_nice_load_old.attr,
 433       &powersave_bias_old.attr,
 434       NULL
 435};
 436
 437static struct attribute_group dbs_attr_group_old = {
 438       .attrs = dbs_attributes_old,
 439       .name = "ondemand",
 440};
 441
 442/*** delete after deprecation time ***/
 443
 444/************************** sysfs end ************************/
 445
 446static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 447{
 448        unsigned int max_load_freq;
 449
 450        struct cpufreq_policy *policy;
 451        unsigned int j;
 452
 453        this_dbs_info->freq_lo = 0;
 454        policy = this_dbs_info->cur_policy;
 455
 456        /*
 457         * Every sampling_rate, we check, if current idle time is less
 458         * than 20% (default), then we try to increase frequency
 459         * Every sampling_rate, we look for a the lowest
 460         * frequency which can sustain the load while keeping idle time over
 461         * 30%. If such a frequency exist, we try to decrease to this frequency.
 462         *
 463         * Any frequency increase takes it to the maximum frequency.
 464         * Frequency reduction happens at minimum steps of
 465         * 5% (default) of current frequency
 466         */
 467
 468        /* Get Absolute Load - in terms of freq */
 469        max_load_freq = 0;
 470
 471        for_each_cpu(j, policy->cpus) {
 472                struct cpu_dbs_info_s *j_dbs_info;
 473                cputime64_t cur_wall_time, cur_idle_time;
 474                unsigned int idle_time, wall_time;
 475                unsigned int load, load_freq;
 476                int freq_avg;
 477
 478                j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
 479
 480                cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
 481
 482                wall_time = (unsigned int) cputime64_sub(cur_wall_time,
 483                                j_dbs_info->prev_cpu_wall);
 484                j_dbs_info->prev_cpu_wall = cur_wall_time;
 485
 486                idle_time = (unsigned int) cputime64_sub(cur_idle_time,
 487                                j_dbs_info->prev_cpu_idle);
 488                j_dbs_info->prev_cpu_idle = cur_idle_time;
 489
 490                if (dbs_tuners_ins.ignore_nice) {
 491                        cputime64_t cur_nice;
 492                        unsigned long cur_nice_jiffies;
 493
 494                        cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
 495                                         j_dbs_info->prev_cpu_nice);
 496                        /*
 497                         * Assumption: nice time between sampling periods will
 498                         * be less than 2^32 jiffies for 32 bit sys
 499                         */
 500                        cur_nice_jiffies = (unsigned long)
 501                                        cputime64_to_jiffies64(cur_nice);
 502
 503                        j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
 504                        idle_time += jiffies_to_usecs(cur_nice_jiffies);
 505                }
 506
 507                if (unlikely(!wall_time || wall_time < idle_time))
 508                        continue;
 509
 510                load = 100 * (wall_time - idle_time) / wall_time;
 511
 512                freq_avg = __cpufreq_driver_getavg(policy, j);
 513                if (freq_avg <= 0)
 514                        freq_avg = policy->cur;
 515
 516                load_freq = load * freq_avg;
 517                if (load_freq > max_load_freq)
 518                        max_load_freq = load_freq;
 519        }
 520
 521        /* Check for frequency increase */
 522        if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) {
 523                /* if we are already at full speed then break out early */
 524                if (!dbs_tuners_ins.powersave_bias) {
 525                        if (policy->cur == policy->max)
 526                                return;
 527
 528                        __cpufreq_driver_target(policy, policy->max,
 529                                CPUFREQ_RELATION_H);
 530                } else {
 531                        int freq = powersave_bias_target(policy, policy->max,
 532                                        CPUFREQ_RELATION_H);
 533                        __cpufreq_driver_target(policy, freq,
 534                                CPUFREQ_RELATION_L);
 535                }
 536                return;
 537        }
 538
 539        /* Check for frequency decrease */
 540        /* if we cannot reduce the frequency anymore, break out early */
 541        if (policy->cur == policy->min)
 542                return;
 543
 544        /*
 545         * The optimal frequency is the frequency that is the lowest that
 546         * can support the current CPU usage without triggering the up
 547         * policy. To be safe, we focus 10 points under the threshold.
 548         */
 549        if (max_load_freq <
 550            (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) *
 551             policy->cur) {
 552                unsigned int freq_next;
 553                freq_next = max_load_freq /
 554                                (dbs_tuners_ins.up_threshold -
 555                                 dbs_tuners_ins.down_differential);
 556
 557                if (!dbs_tuners_ins.powersave_bias) {
 558                        __cpufreq_driver_target(policy, freq_next,
 559                                        CPUFREQ_RELATION_L);
 560                } else {
 561                        int freq = powersave_bias_target(policy, freq_next,
 562                                        CPUFREQ_RELATION_L);
 563                        __cpufreq_driver_target(policy, freq,
 564                                CPUFREQ_RELATION_L);
 565                }
 566        }
 567}
 568
 569static void do_dbs_timer(struct work_struct *work)
 570{
 571        struct cpu_dbs_info_s *dbs_info =
 572                container_of(work, struct cpu_dbs_info_s, work.work);
 573        unsigned int cpu = dbs_info->cpu;
 574        int sample_type = dbs_info->sample_type;
 575
 576        /* We want all CPUs to do sampling nearly on same jiffy */
 577        int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
 578
 579        delay -= jiffies % delay;
 580        mutex_lock(&dbs_info->timer_mutex);
 581
 582        /* Common NORMAL_SAMPLE setup */
 583        dbs_info->sample_type = DBS_NORMAL_SAMPLE;
 584        if (!dbs_tuners_ins.powersave_bias ||
 585            sample_type == DBS_NORMAL_SAMPLE) {
 586                dbs_check_cpu(dbs_info);
 587                if (dbs_info->freq_lo) {
 588                        /* Setup timer for SUB_SAMPLE */
 589                        dbs_info->sample_type = DBS_SUB_SAMPLE;
 590                        delay = dbs_info->freq_hi_jiffies;
 591                }
 592        } else {
 593                __cpufreq_driver_target(dbs_info->cur_policy,
 594                        dbs_info->freq_lo, CPUFREQ_RELATION_H);
 595        }
 596        queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
 597        mutex_unlock(&dbs_info->timer_mutex);
 598}
 599
 600static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
 601{
 602        /* We want all CPUs to do sampling nearly on same jiffy */
 603        int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
 604        delay -= jiffies % delay;
 605
 606        dbs_info->sample_type = DBS_NORMAL_SAMPLE;
 607        INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
 608        queue_delayed_work_on(dbs_info->cpu, kondemand_wq, &dbs_info->work,
 609                delay);
 610}
 611
 612static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
 613{
 614        cancel_delayed_work_sync(&dbs_info->work);
 615}
 616
 617static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 618                                   unsigned int event)
 619{
 620        unsigned int cpu = policy->cpu;
 621        struct cpu_dbs_info_s *this_dbs_info;
 622        unsigned int j;
 623        int rc;
 624
 625        this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
 626
 627        switch (event) {
 628        case CPUFREQ_GOV_START:
 629                if ((!cpu_online(cpu)) || (!policy->cur))
 630                        return -EINVAL;
 631
 632                mutex_lock(&dbs_mutex);
 633
 634                rc = sysfs_create_group(&policy->kobj, &dbs_attr_group_old);
 635                if (rc) {
 636                        mutex_unlock(&dbs_mutex);
 637                        return rc;
 638                }
 639
 640                dbs_enable++;
 641                for_each_cpu(j, policy->cpus) {
 642                        struct cpu_dbs_info_s *j_dbs_info;
 643                        j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
 644                        j_dbs_info->cur_policy = policy;
 645
 646                        j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
 647                                                &j_dbs_info->prev_cpu_wall);
 648                        if (dbs_tuners_ins.ignore_nice) {
 649                                j_dbs_info->prev_cpu_nice =
 650                                                kstat_cpu(j).cpustat.nice;
 651                        }
 652                }
 653                this_dbs_info->cpu = cpu;
 654                ondemand_powersave_bias_init_cpu(cpu);
 655                /*
 656                 * Start the timerschedule work, when this governor
 657                 * is used for first time
 658                 */
 659                if (dbs_enable == 1) {
 660                        unsigned int latency;
 661
 662                        rc = sysfs_create_group(cpufreq_global_kobject,
 663                                                &dbs_attr_group);
 664                        if (rc) {
 665                                mutex_unlock(&dbs_mutex);
 666                                return rc;
 667                        }
 668
 669                        /* policy latency is in nS. Convert it to uS first */
 670                        latency = policy->cpuinfo.transition_latency / 1000;
 671                        if (latency == 0)
 672                                latency = 1;
 673                        /* Bring kernel and HW constraints together */
 674                        min_sampling_rate = max(min_sampling_rate,
 675                                        MIN_LATENCY_MULTIPLIER * latency);
 676                        dbs_tuners_ins.sampling_rate =
 677                                max(min_sampling_rate,
 678                                    latency * LATENCY_MULTIPLIER);
 679                }
 680                mutex_unlock(&dbs_mutex);
 681
 682                mutex_init(&this_dbs_info->timer_mutex);
 683                dbs_timer_init(this_dbs_info);
 684                break;
 685
 686        case CPUFREQ_GOV_STOP:
 687                dbs_timer_exit(this_dbs_info);
 688
 689                mutex_lock(&dbs_mutex);
 690                sysfs_remove_group(&policy->kobj, &dbs_attr_group_old);
 691                mutex_destroy(&this_dbs_info->timer_mutex);
 692                dbs_enable--;
 693                mutex_unlock(&dbs_mutex);
 694                if (!dbs_enable)
 695                        sysfs_remove_group(cpufreq_global_kobject,
 696                                           &dbs_attr_group);
 697
 698                break;
 699
 700        case CPUFREQ_GOV_LIMITS:
 701                mutex_lock(&this_dbs_info->timer_mutex);
 702                if (policy->max < this_dbs_info->cur_policy->cur)
 703                        __cpufreq_driver_target(this_dbs_info->cur_policy,
 704                                policy->max, CPUFREQ_RELATION_H);
 705                else if (policy->min > this_dbs_info->cur_policy->cur)
 706                        __cpufreq_driver_target(this_dbs_info->cur_policy,
 707                                policy->min, CPUFREQ_RELATION_L);
 708                mutex_unlock(&this_dbs_info->timer_mutex);
 709                break;
 710        }
 711        return 0;
 712}
 713
 714static int __init cpufreq_gov_dbs_init(void)
 715{
 716        int err;
 717        cputime64_t wall;
 718        u64 idle_time;
 719        int cpu = get_cpu();
 720
 721        idle_time = get_cpu_idle_time_us(cpu, &wall);
 722        put_cpu();
 723        if (idle_time != -1ULL) {
 724                /* Idle micro accounting is supported. Use finer thresholds */
 725                dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
 726                dbs_tuners_ins.down_differential =
 727                                        MICRO_FREQUENCY_DOWN_DIFFERENTIAL;
 728                /*
 729                 * In no_hz/micro accounting case we set the minimum frequency
 730                 * not depending on HZ, but fixed (very low). The deferred
 731                 * timer might skip some samples if idle/sleeping as needed.
 732                */
 733                min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE;
 734        } else {
 735                /* For correct statistics, we need 10 ticks for each measure */
 736                min_sampling_rate =
 737                        MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10);
 738        }
 739
 740        kondemand_wq = create_workqueue("kondemand");
 741        if (!kondemand_wq) {
 742                printk(KERN_ERR "Creation of kondemand failed\n");
 743                return -EFAULT;
 744        }
 745        err = cpufreq_register_governor(&cpufreq_gov_ondemand);
 746        if (err)
 747                destroy_workqueue(kondemand_wq);
 748
 749        return err;
 750}
 751
 752static void __exit cpufreq_gov_dbs_exit(void)
 753{
 754        cpufreq_unregister_governor(&cpufreq_gov_ondemand);
 755        destroy_workqueue(kondemand_wq);
 756}
 757
 758
 759MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
 760MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>");
 761MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for "
 762        "Low Latency Frequency Transition capable processors");
 763MODULE_LICENSE("GPL");
 764
 765#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
 766fs_initcall(cpufreq_gov_dbs_init);
 767#else
 768module_init(cpufreq_gov_dbs_init);
 769#endif
 770module_exit(cpufreq_gov_dbs_exit);
 771