linux/kernel/power/energy_model.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Energy Model of devices
   4 *
   5 * Copyright (c) 2018-2020, Arm ltd.
   6 * Written by: Quentin Perret, Arm ltd.
   7 * Improvements provided by: Lukasz Luba, Arm ltd.
   8 */
   9
  10#define pr_fmt(fmt) "energy_model: " fmt
  11
  12#include <linux/cpu.h>
  13#include <linux/cpumask.h>
  14#include <linux/debugfs.h>
  15#include <linux/energy_model.h>
  16#include <linux/sched/topology.h>
  17#include <linux/slab.h>
  18
  19/*
  20 * Mutex serializing the registrations of performance domains and letting
  21 * callbacks defined by drivers sleep.
  22 */
  23static DEFINE_MUTEX(em_pd_mutex);
  24
  25static bool _is_cpu_device(struct device *dev)
  26{
  27        return (dev->bus == &cpu_subsys);
  28}
  29
  30#ifdef CONFIG_DEBUG_FS
  31static struct dentry *rootdir;
  32
  33static void em_debug_create_ps(struct em_perf_state *ps, struct dentry *pd)
  34{
  35        struct dentry *d;
  36        char name[24];
  37
  38        snprintf(name, sizeof(name), "ps:%lu", ps->frequency);
  39
  40        /* Create per-ps directory */
  41        d = debugfs_create_dir(name, pd);
  42        debugfs_create_ulong("frequency", 0444, d, &ps->frequency);
  43        debugfs_create_ulong("power", 0444, d, &ps->power);
  44        debugfs_create_ulong("cost", 0444, d, &ps->cost);
  45}
  46
  47static int em_debug_cpus_show(struct seq_file *s, void *unused)
  48{
  49        seq_printf(s, "%*pbl\n", cpumask_pr_args(to_cpumask(s->private)));
  50
  51        return 0;
  52}
  53DEFINE_SHOW_ATTRIBUTE(em_debug_cpus);
  54
  55static void em_debug_create_pd(struct device *dev)
  56{
  57        struct dentry *d;
  58        int i;
  59
  60        /* Create the directory of the performance domain */
  61        d = debugfs_create_dir(dev_name(dev), rootdir);
  62
  63        if (_is_cpu_device(dev))
  64                debugfs_create_file("cpus", 0444, d, dev->em_pd->cpus,
  65                                    &em_debug_cpus_fops);
  66
  67        /* Create a sub-directory for each performance state */
  68        for (i = 0; i < dev->em_pd->nr_perf_states; i++)
  69                em_debug_create_ps(&dev->em_pd->table[i], d);
  70
  71}
  72
  73static void em_debug_remove_pd(struct device *dev)
  74{
  75        struct dentry *debug_dir;
  76
  77        debug_dir = debugfs_lookup(dev_name(dev), rootdir);
  78        debugfs_remove_recursive(debug_dir);
  79}
  80
  81static int __init em_debug_init(void)
  82{
  83        /* Create /sys/kernel/debug/energy_model directory */
  84        rootdir = debugfs_create_dir("energy_model", NULL);
  85
  86        return 0;
  87}
  88core_initcall(em_debug_init);
  89#else /* CONFIG_DEBUG_FS */
  90static void em_debug_create_pd(struct device *dev) {}
  91static void em_debug_remove_pd(struct device *dev) {}
  92#endif
  93
  94static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
  95                                int nr_states, struct em_data_callback *cb)
  96{
  97        unsigned long opp_eff, prev_opp_eff = ULONG_MAX;
  98        unsigned long power, freq, prev_freq = 0;
  99        struct em_perf_state *table;
 100        int i, ret;
 101        u64 fmax;
 102
 103        table = kcalloc(nr_states, sizeof(*table), GFP_KERNEL);
 104        if (!table)
 105                return -ENOMEM;
 106
 107        /* Build the list of performance states for this performance domain */
 108        for (i = 0, freq = 0; i < nr_states; i++, freq++) {
 109                /*
 110                 * active_power() is a driver callback which ceils 'freq' to
 111                 * lowest performance state of 'dev' above 'freq' and updates
 112                 * 'power' and 'freq' accordingly.
 113                 */
 114                ret = cb->active_power(&power, &freq, dev);
 115                if (ret) {
 116                        dev_err(dev, "EM: invalid perf. state: %d\n",
 117                                ret);
 118                        goto free_ps_table;
 119                }
 120
 121                /*
 122                 * We expect the driver callback to increase the frequency for
 123                 * higher performance states.
 124                 */
 125                if (freq <= prev_freq) {
 126                        dev_err(dev, "EM: non-increasing freq: %lu\n",
 127                                freq);
 128                        goto free_ps_table;
 129                }
 130
 131                /*
 132                 * The power returned by active_state() is expected to be
 133                 * positive, in milli-watts and to fit into 16 bits.
 134                 */
 135                if (!power || power > EM_MAX_POWER) {
 136                        dev_err(dev, "EM: invalid power: %lu\n",
 137                                power);
 138                        goto free_ps_table;
 139                }
 140
 141                table[i].power = power;
 142                table[i].frequency = prev_freq = freq;
 143
 144                /*
 145                 * The hertz/watts efficiency ratio should decrease as the
 146                 * frequency grows on sane platforms. But this isn't always
 147                 * true in practice so warn the user if a higher OPP is more
 148                 * power efficient than a lower one.
 149                 */
 150                opp_eff = freq / power;
 151                if (opp_eff >= prev_opp_eff)
 152                        dev_dbg(dev, "EM: hertz/watts ratio non-monotonically decreasing: em_perf_state %d >= em_perf_state%d\n",
 153                                        i, i - 1);
 154                prev_opp_eff = opp_eff;
 155        }
 156
 157        /* Compute the cost of each performance state. */
 158        fmax = (u64) table[nr_states - 1].frequency;
 159        for (i = 0; i < nr_states; i++) {
 160                table[i].cost = div64_u64(fmax * table[i].power,
 161                                          table[i].frequency);
 162        }
 163
 164        pd->table = table;
 165        pd->nr_perf_states = nr_states;
 166
 167        return 0;
 168
 169free_ps_table:
 170        kfree(table);
 171        return -EINVAL;
 172}
 173
 174static int em_create_pd(struct device *dev, int nr_states,
 175                        struct em_data_callback *cb, cpumask_t *cpus)
 176{
 177        struct em_perf_domain *pd;
 178        struct device *cpu_dev;
 179        int cpu, ret;
 180
 181        if (_is_cpu_device(dev)) {
 182                pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL);
 183                if (!pd)
 184                        return -ENOMEM;
 185
 186                cpumask_copy(em_span_cpus(pd), cpus);
 187        } else {
 188                pd = kzalloc(sizeof(*pd), GFP_KERNEL);
 189                if (!pd)
 190                        return -ENOMEM;
 191        }
 192
 193        ret = em_create_perf_table(dev, pd, nr_states, cb);
 194        if (ret) {
 195                kfree(pd);
 196                return ret;
 197        }
 198
 199        if (_is_cpu_device(dev))
 200                for_each_cpu(cpu, cpus) {
 201                        cpu_dev = get_cpu_device(cpu);
 202                        cpu_dev->em_pd = pd;
 203                }
 204
 205        dev->em_pd = pd;
 206
 207        return 0;
 208}
 209
 210/**
 211 * em_pd_get() - Return the performance domain for a device
 212 * @dev : Device to find the performance domain for
 213 *
 214 * Returns the performance domain to which @dev belongs, or NULL if it doesn't
 215 * exist.
 216 */
 217struct em_perf_domain *em_pd_get(struct device *dev)
 218{
 219        if (IS_ERR_OR_NULL(dev))
 220                return NULL;
 221
 222        return dev->em_pd;
 223}
 224EXPORT_SYMBOL_GPL(em_pd_get);
 225
 226/**
 227 * em_cpu_get() - Return the performance domain for a CPU
 228 * @cpu : CPU to find the performance domain for
 229 *
 230 * Returns the performance domain to which @cpu belongs, or NULL if it doesn't
 231 * exist.
 232 */
 233struct em_perf_domain *em_cpu_get(int cpu)
 234{
 235        struct device *cpu_dev;
 236
 237        cpu_dev = get_cpu_device(cpu);
 238        if (!cpu_dev)
 239                return NULL;
 240
 241        return em_pd_get(cpu_dev);
 242}
 243EXPORT_SYMBOL_GPL(em_cpu_get);
 244
 245/**
 246 * em_dev_register_perf_domain() - Register the Energy Model (EM) for a device
 247 * @dev         : Device for which the EM is to register
 248 * @nr_states   : Number of performance states to register
 249 * @cb          : Callback functions providing the data of the Energy Model
 250 * @cpus        : Pointer to cpumask_t, which in case of a CPU device is
 251 *              obligatory. It can be taken from i.e. 'policy->cpus'. For other
 252 *              type of devices this should be set to NULL.
 253 *
 254 * Create Energy Model tables for a performance domain using the callbacks
 255 * defined in cb.
 256 *
 257 * If multiple clients register the same performance domain, all but the first
 258 * registration will be ignored.
 259 *
 260 * Return 0 on success
 261 */
 262int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
 263                                struct em_data_callback *cb, cpumask_t *cpus)
 264{
 265        unsigned long cap, prev_cap = 0;
 266        int cpu, ret;
 267
 268        if (!dev || !nr_states || !cb)
 269                return -EINVAL;
 270
 271        /*
 272         * Use a mutex to serialize the registration of performance domains and
 273         * let the driver-defined callback functions sleep.
 274         */
 275        mutex_lock(&em_pd_mutex);
 276
 277        if (dev->em_pd) {
 278                ret = -EEXIST;
 279                goto unlock;
 280        }
 281
 282        if (_is_cpu_device(dev)) {
 283                if (!cpus) {
 284                        dev_err(dev, "EM: invalid CPU mask\n");
 285                        ret = -EINVAL;
 286                        goto unlock;
 287                }
 288
 289                for_each_cpu(cpu, cpus) {
 290                        if (em_cpu_get(cpu)) {
 291                                dev_err(dev, "EM: exists for CPU%d\n", cpu);
 292                                ret = -EEXIST;
 293                                goto unlock;
 294                        }
 295                        /*
 296                         * All CPUs of a domain must have the same
 297                         * micro-architecture since they all share the same
 298                         * table.
 299                         */
 300                        cap = arch_scale_cpu_capacity(cpu);
 301                        if (prev_cap && prev_cap != cap) {
 302                                dev_err(dev, "EM: CPUs of %*pbl must have the same capacity\n",
 303                                        cpumask_pr_args(cpus));
 304
 305                                ret = -EINVAL;
 306                                goto unlock;
 307                        }
 308                        prev_cap = cap;
 309                }
 310        }
 311
 312        ret = em_create_pd(dev, nr_states, cb, cpus);
 313        if (ret)
 314                goto unlock;
 315
 316        em_debug_create_pd(dev);
 317        dev_info(dev, "EM: created perf domain\n");
 318
 319unlock:
 320        mutex_unlock(&em_pd_mutex);
 321        return ret;
 322}
 323EXPORT_SYMBOL_GPL(em_dev_register_perf_domain);
 324
 325/**
 326 * em_dev_unregister_perf_domain() - Unregister Energy Model (EM) for a device
 327 * @dev         : Device for which the EM is registered
 328 *
 329 * Unregister the EM for the specified @dev (but not a CPU device).
 330 */
 331void em_dev_unregister_perf_domain(struct device *dev)
 332{
 333        if (IS_ERR_OR_NULL(dev) || !dev->em_pd)
 334                return;
 335
 336        if (_is_cpu_device(dev))
 337                return;
 338
 339        /*
 340         * The mutex separates all register/unregister requests and protects
 341         * from potential clean-up/setup issues in the debugfs directories.
 342         * The debugfs directory name is the same as device's name.
 343         */
 344        mutex_lock(&em_pd_mutex);
 345        em_debug_remove_pd(dev);
 346
 347        kfree(dev->em_pd->table);
 348        kfree(dev->em_pd);
 349        dev->em_pd = NULL;
 350        mutex_unlock(&em_pd_mutex);
 351}
 352EXPORT_SYMBOL_GPL(em_dev_unregister_perf_domain);
 353