linux/kernel/sched/cpufreq_schedutil.c
<<
>>
Prefs
   1/*
   2 * CPUFreq governor based on scheduler-provided CPU utilization data.
   3 *
   4 * Copyright (C) 2016, Intel Corporation
   5 * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 */
  11
  12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  13
  14#include <linux/cpufreq.h>
  15#include <linux/slab.h>
  16#include <trace/events/power.h>
  17
  18#include "sched.h"
  19
  20struct sugov_tunables {
  21        struct gov_attr_set attr_set;
  22        unsigned int rate_limit_us;
  23};
  24
  25struct sugov_policy {
  26        struct cpufreq_policy *policy;
  27
  28        struct sugov_tunables *tunables;
  29        struct list_head tunables_hook;
  30
  31        raw_spinlock_t update_lock;  /* For shared policies */
  32        u64 last_freq_update_time;
  33        s64 freq_update_delay_ns;
  34        unsigned int next_freq;
  35
  36        /* The next fields are only needed if fast switch cannot be used. */
  37        struct irq_work irq_work;
  38        struct work_struct work;
  39        struct mutex work_lock;
  40        bool work_in_progress;
  41
  42        bool need_freq_update;
  43};
  44
  45struct sugov_cpu {
  46        struct update_util_data update_util;
  47        struct sugov_policy *sg_policy;
  48
  49        unsigned int cached_raw_freq;
  50        unsigned long iowait_boost;
  51        unsigned long iowait_boost_max;
  52        u64 last_update;
  53
  54        /* The fields below are only needed when sharing a policy. */
  55        unsigned long util;
  56        unsigned long max;
  57        unsigned int flags;
  58};
  59
  60static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
  61
  62/************************ Governor internals ***********************/
  63
  64static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
  65{
  66        s64 delta_ns;
  67
  68        if (sg_policy->work_in_progress)
  69                return false;
  70
  71        if (unlikely(sg_policy->need_freq_update)) {
  72                sg_policy->need_freq_update = false;
  73                /*
  74                 * This happens when limits change, so forget the previous
  75                 * next_freq value and force an update.
  76                 */
  77                sg_policy->next_freq = UINT_MAX;
  78                return true;
  79        }
  80
  81        delta_ns = time - sg_policy->last_freq_update_time;
  82        return delta_ns >= sg_policy->freq_update_delay_ns;
  83}
  84
  85static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
  86                                unsigned int next_freq)
  87{
  88        struct cpufreq_policy *policy = sg_policy->policy;
  89
  90        sg_policy->last_freq_update_time = time;
  91
  92        if (policy->fast_switch_enabled) {
  93                if (sg_policy->next_freq == next_freq) {
  94                        trace_cpu_frequency(policy->cur, smp_processor_id());
  95                        return;
  96                }
  97                sg_policy->next_freq = next_freq;
  98                next_freq = cpufreq_driver_fast_switch(policy, next_freq);
  99                if (next_freq == CPUFREQ_ENTRY_INVALID)
 100                        return;
 101
 102                policy->cur = next_freq;
 103                trace_cpu_frequency(next_freq, smp_processor_id());
 104        } else if (sg_policy->next_freq != next_freq) {
 105                sg_policy->next_freq = next_freq;
 106                sg_policy->work_in_progress = true;
 107                irq_work_queue(&sg_policy->irq_work);
 108        }
 109}
 110
 111/**
 112 * get_next_freq - Compute a new frequency for a given cpufreq policy.
 113 * @sg_cpu: schedutil cpu object to compute the new frequency for.
 114 * @util: Current CPU utilization.
 115 * @max: CPU capacity.
 116 *
 117 * If the utilization is frequency-invariant, choose the new frequency to be
 118 * proportional to it, that is
 119 *
 120 * next_freq = C * max_freq * util / max
 121 *
 122 * Otherwise, approximate the would-be frequency-invariant utilization by
 123 * util_raw * (curr_freq / max_freq) which leads to
 124 *
 125 * next_freq = C * curr_freq * util_raw / max
 126 *
 127 * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8.
 128 *
 129 * The lowest driver-supported frequency which is equal or greater than the raw
 130 * next_freq (as calculated above) is returned, subject to policy min/max and
 131 * cpufreq driver limitations.
 132 */
 133static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util,
 134                                  unsigned long max)
 135{
 136        struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 137        struct cpufreq_policy *policy = sg_policy->policy;
 138        unsigned int freq = arch_scale_freq_invariant() ?
 139                                policy->cpuinfo.max_freq : policy->cur;
 140
 141        freq = (freq + (freq >> 2)) * util / max;
 142
 143        if (freq == sg_cpu->cached_raw_freq && sg_policy->next_freq != UINT_MAX)
 144                return sg_policy->next_freq;
 145        sg_cpu->cached_raw_freq = freq;
 146        return cpufreq_driver_resolve_freq(policy, freq);
 147}
 148
 149static void sugov_get_util(unsigned long *util, unsigned long *max)
 150{
 151        struct rq *rq = this_rq();
 152        unsigned long cfs_max;
 153
 154        cfs_max = arch_scale_cpu_capacity(NULL, smp_processor_id());
 155
 156        *util = min(rq->cfs.avg.util_avg, cfs_max);
 157        *max = cfs_max;
 158}
 159
 160static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
 161                                   unsigned int flags)
 162{
 163        if (flags & SCHED_CPUFREQ_IOWAIT) {
 164                sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
 165        } else if (sg_cpu->iowait_boost) {
 166                s64 delta_ns = time - sg_cpu->last_update;
 167
 168                /* Clear iowait_boost if the CPU apprears to have been idle. */
 169                if (delta_ns > TICK_NSEC)
 170                        sg_cpu->iowait_boost = 0;
 171        }
 172}
 173
 174static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
 175                               unsigned long *max)
 176{
 177        unsigned long boost_util = sg_cpu->iowait_boost;
 178        unsigned long boost_max = sg_cpu->iowait_boost_max;
 179
 180        if (!boost_util)
 181                return;
 182
 183        if (*util * boost_max < *max * boost_util) {
 184                *util = boost_util;
 185                *max = boost_max;
 186        }
 187        sg_cpu->iowait_boost >>= 1;
 188}
 189
 190static void sugov_update_single(struct update_util_data *hook, u64 time,
 191                                unsigned int flags)
 192{
 193        struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
 194        struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 195        struct cpufreq_policy *policy = sg_policy->policy;
 196        unsigned long util, max;
 197        unsigned int next_f;
 198
 199        sugov_set_iowait_boost(sg_cpu, time, flags);
 200        sg_cpu->last_update = time;
 201
 202        if (!sugov_should_update_freq(sg_policy, time))
 203                return;
 204
 205        if (flags & SCHED_CPUFREQ_RT_DL) {
 206                next_f = policy->cpuinfo.max_freq;
 207        } else {
 208                sugov_get_util(&util, &max);
 209                sugov_iowait_boost(sg_cpu, &util, &max);
 210                next_f = get_next_freq(sg_cpu, util, max);
 211        }
 212        sugov_update_commit(sg_policy, time, next_f);
 213}
 214
 215static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
 216                                           unsigned long util, unsigned long max,
 217                                           unsigned int flags)
 218{
 219        struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 220        struct cpufreq_policy *policy = sg_policy->policy;
 221        unsigned int max_f = policy->cpuinfo.max_freq;
 222        u64 last_freq_update_time = sg_policy->last_freq_update_time;
 223        unsigned int j;
 224
 225        if (flags & SCHED_CPUFREQ_RT_DL)
 226                return max_f;
 227
 228        sugov_iowait_boost(sg_cpu, &util, &max);
 229
 230        for_each_cpu(j, policy->cpus) {
 231                struct sugov_cpu *j_sg_cpu;
 232                unsigned long j_util, j_max;
 233                s64 delta_ns;
 234
 235                if (j == smp_processor_id())
 236                        continue;
 237
 238                j_sg_cpu = &per_cpu(sugov_cpu, j);
 239                /*
 240                 * If the CPU utilization was last updated before the previous
 241                 * frequency update and the time elapsed between the last update
 242                 * of the CPU utilization and the last frequency update is long
 243                 * enough, don't take the CPU into account as it probably is
 244                 * idle now (and clear iowait_boost for it).
 245                 */
 246                delta_ns = last_freq_update_time - j_sg_cpu->last_update;
 247                if (delta_ns > TICK_NSEC) {
 248                        j_sg_cpu->iowait_boost = 0;
 249                        continue;
 250                }
 251                if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL)
 252                        return max_f;
 253
 254                j_util = j_sg_cpu->util;
 255                j_max = j_sg_cpu->max;
 256                if (j_util * max > j_max * util) {
 257                        util = j_util;
 258                        max = j_max;
 259                }
 260
 261                sugov_iowait_boost(j_sg_cpu, &util, &max);
 262        }
 263
 264        return get_next_freq(sg_cpu, util, max);
 265}
 266
 267static void sugov_update_shared(struct update_util_data *hook, u64 time,
 268                                unsigned int flags)
 269{
 270        struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
 271        struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 272        unsigned long util, max;
 273        unsigned int next_f;
 274
 275        sugov_get_util(&util, &max);
 276
 277        raw_spin_lock(&sg_policy->update_lock);
 278
 279        sg_cpu->util = util;
 280        sg_cpu->max = max;
 281        sg_cpu->flags = flags;
 282
 283        sugov_set_iowait_boost(sg_cpu, time, flags);
 284        sg_cpu->last_update = time;
 285
 286        if (sugov_should_update_freq(sg_policy, time)) {
 287                next_f = sugov_next_freq_shared(sg_cpu, util, max, flags);
 288                sugov_update_commit(sg_policy, time, next_f);
 289        }
 290
 291        raw_spin_unlock(&sg_policy->update_lock);
 292}
 293
 294static void sugov_work(struct work_struct *work)
 295{
 296        struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work);
 297
 298        mutex_lock(&sg_policy->work_lock);
 299        __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq,
 300                                CPUFREQ_RELATION_L);
 301        mutex_unlock(&sg_policy->work_lock);
 302
 303        sg_policy->work_in_progress = false;
 304}
 305
 306static void sugov_irq_work(struct irq_work *irq_work)
 307{
 308        struct sugov_policy *sg_policy;
 309
 310        sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
 311        schedule_work_on(smp_processor_id(), &sg_policy->work);
 312}
 313
 314/************************** sysfs interface ************************/
 315
 316static struct sugov_tunables *global_tunables;
 317static DEFINE_MUTEX(global_tunables_lock);
 318
 319static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set)
 320{
 321        return container_of(attr_set, struct sugov_tunables, attr_set);
 322}
 323
 324static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
 325{
 326        struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
 327
 328        return sprintf(buf, "%u\n", tunables->rate_limit_us);
 329}
 330
 331static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf,
 332                                   size_t count)
 333{
 334        struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
 335        struct sugov_policy *sg_policy;
 336        unsigned int rate_limit_us;
 337
 338        if (kstrtouint(buf, 10, &rate_limit_us))
 339                return -EINVAL;
 340
 341        tunables->rate_limit_us = rate_limit_us;
 342
 343        list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook)
 344                sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC;
 345
 346        return count;
 347}
 348
 349static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
 350
 351static struct attribute *sugov_attributes[] = {
 352        &rate_limit_us.attr,
 353        NULL
 354};
 355
 356static struct kobj_type sugov_tunables_ktype = {
 357        .default_attrs = sugov_attributes,
 358        .sysfs_ops = &governor_sysfs_ops,
 359};
 360
 361/********************** cpufreq governor interface *********************/
 362
 363static struct cpufreq_governor schedutil_gov;
 364
 365static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
 366{
 367        struct sugov_policy *sg_policy;
 368
 369        sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL);
 370        if (!sg_policy)
 371                return NULL;
 372
 373        sg_policy->policy = policy;
 374        init_irq_work(&sg_policy->irq_work, sugov_irq_work);
 375        INIT_WORK(&sg_policy->work, sugov_work);
 376        mutex_init(&sg_policy->work_lock);
 377        raw_spin_lock_init(&sg_policy->update_lock);
 378        return sg_policy;
 379}
 380
 381static void sugov_policy_free(struct sugov_policy *sg_policy)
 382{
 383        mutex_destroy(&sg_policy->work_lock);
 384        kfree(sg_policy);
 385}
 386
 387static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy)
 388{
 389        struct sugov_tunables *tunables;
 390
 391        tunables = kzalloc(sizeof(*tunables), GFP_KERNEL);
 392        if (tunables) {
 393                gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook);
 394                if (!have_governor_per_policy())
 395                        global_tunables = tunables;
 396        }
 397        return tunables;
 398}
 399
 400static void sugov_tunables_free(struct sugov_tunables *tunables)
 401{
 402        if (!have_governor_per_policy())
 403                global_tunables = NULL;
 404
 405        kfree(tunables);
 406}
 407
 408static int sugov_init(struct cpufreq_policy *policy)
 409{
 410        struct sugov_policy *sg_policy;
 411        struct sugov_tunables *tunables;
 412        unsigned int lat;
 413        int ret = 0;
 414
 415        /* State should be equivalent to EXIT */
 416        if (policy->governor_data)
 417                return -EBUSY;
 418
 419        sg_policy = sugov_policy_alloc(policy);
 420        if (!sg_policy)
 421                return -ENOMEM;
 422
 423        mutex_lock(&global_tunables_lock);
 424
 425        if (global_tunables) {
 426                if (WARN_ON(have_governor_per_policy())) {
 427                        ret = -EINVAL;
 428                        goto free_sg_policy;
 429                }
 430                policy->governor_data = sg_policy;
 431                sg_policy->tunables = global_tunables;
 432
 433                gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook);
 434                goto out;
 435        }
 436
 437        tunables = sugov_tunables_alloc(sg_policy);
 438        if (!tunables) {
 439                ret = -ENOMEM;
 440                goto free_sg_policy;
 441        }
 442
 443        tunables->rate_limit_us = LATENCY_MULTIPLIER;
 444        lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
 445        if (lat)
 446                tunables->rate_limit_us *= lat;
 447
 448        policy->governor_data = sg_policy;
 449        sg_policy->tunables = tunables;
 450
 451        ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype,
 452                                   get_governor_parent_kobj(policy), "%s",
 453                                   schedutil_gov.name);
 454        if (ret)
 455                goto fail;
 456
 457 out:
 458        mutex_unlock(&global_tunables_lock);
 459
 460        cpufreq_enable_fast_switch(policy);
 461        return 0;
 462
 463 fail:
 464        policy->governor_data = NULL;
 465        sugov_tunables_free(tunables);
 466
 467 free_sg_policy:
 468        mutex_unlock(&global_tunables_lock);
 469
 470        sugov_policy_free(sg_policy);
 471        pr_err("initialization failed (error %d)\n", ret);
 472        return ret;
 473}
 474
 475static void sugov_exit(struct cpufreq_policy *policy)
 476{
 477        struct sugov_policy *sg_policy = policy->governor_data;
 478        struct sugov_tunables *tunables = sg_policy->tunables;
 479        unsigned int count;
 480
 481        cpufreq_disable_fast_switch(policy);
 482
 483        mutex_lock(&global_tunables_lock);
 484
 485        count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook);
 486        policy->governor_data = NULL;
 487        if (!count)
 488                sugov_tunables_free(tunables);
 489
 490        mutex_unlock(&global_tunables_lock);
 491
 492        sugov_policy_free(sg_policy);
 493}
 494
 495static int sugov_start(struct cpufreq_policy *policy)
 496{
 497        struct sugov_policy *sg_policy = policy->governor_data;
 498        unsigned int cpu;
 499
 500        sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
 501        sg_policy->last_freq_update_time = 0;
 502        sg_policy->next_freq = UINT_MAX;
 503        sg_policy->work_in_progress = false;
 504        sg_policy->need_freq_update = false;
 505
 506        for_each_cpu(cpu, policy->cpus) {
 507                struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
 508
 509                sg_cpu->sg_policy = sg_policy;
 510                if (policy_is_shared(policy)) {
 511                        sg_cpu->util = 0;
 512                        sg_cpu->max = 0;
 513                        sg_cpu->flags = SCHED_CPUFREQ_RT;
 514                        sg_cpu->last_update = 0;
 515                        sg_cpu->cached_raw_freq = 0;
 516                        sg_cpu->iowait_boost = 0;
 517                        sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
 518                        cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
 519                                                     sugov_update_shared);
 520                } else {
 521                        cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
 522                                                     sugov_update_single);
 523                }
 524        }
 525        return 0;
 526}
 527
 528static void sugov_stop(struct cpufreq_policy *policy)
 529{
 530        struct sugov_policy *sg_policy = policy->governor_data;
 531        unsigned int cpu;
 532
 533        for_each_cpu(cpu, policy->cpus)
 534                cpufreq_remove_update_util_hook(cpu);
 535
 536        synchronize_sched();
 537
 538        irq_work_sync(&sg_policy->irq_work);
 539        cancel_work_sync(&sg_policy->work);
 540}
 541
 542static void sugov_limits(struct cpufreq_policy *policy)
 543{
 544        struct sugov_policy *sg_policy = policy->governor_data;
 545
 546        if (!policy->fast_switch_enabled) {
 547                mutex_lock(&sg_policy->work_lock);
 548                cpufreq_policy_apply_limits(policy);
 549                mutex_unlock(&sg_policy->work_lock);
 550        }
 551
 552        sg_policy->need_freq_update = true;
 553}
 554
 555static struct cpufreq_governor schedutil_gov = {
 556        .name = "schedutil",
 557        .owner = THIS_MODULE,
 558        .init = sugov_init,
 559        .exit = sugov_exit,
 560        .start = sugov_start,
 561        .stop = sugov_stop,
 562        .limits = sugov_limits,
 563};
 564
 565#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
 566struct cpufreq_governor *cpufreq_default_governor(void)
 567{
 568        return &schedutil_gov;
 569}
 570#endif
 571
 572static int __init sugov_register(void)
 573{
 574        return cpufreq_register_governor(&schedutil_gov);
 575}
 576fs_initcall(sugov_register);
 577