linux/kernel/power/energy_model.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Energy Model of CPUs
   4 *
   5 * Copyright (c) 2018, Arm ltd.
   6 * Written by: Quentin Perret, Arm ltd.
   7 */
   8
   9#define pr_fmt(fmt) "energy_model: " fmt
  10
  11#include <linux/cpu.h>
  12#include <linux/cpumask.h>
  13#include <linux/debugfs.h>
  14#include <linux/energy_model.h>
  15#include <linux/sched/topology.h>
  16#include <linux/slab.h>
  17
  18/* Mapping of each CPU to the performance domain to which it belongs. */
  19static DEFINE_PER_CPU(struct em_perf_domain *, em_data);
  20
  21/*
  22 * Mutex serializing the registrations of performance domains and letting
  23 * callbacks defined by drivers sleep.
  24 */
  25static DEFINE_MUTEX(em_pd_mutex);
  26
  27#ifdef CONFIG_DEBUG_FS
  28static struct dentry *rootdir;
  29
  30static void em_debug_create_cs(struct em_cap_state *cs, struct dentry *pd)
  31{
  32        struct dentry *d;
  33        char name[24];
  34
  35        snprintf(name, sizeof(name), "cs:%lu", cs->frequency);
  36
  37        /* Create per-cs directory */
  38        d = debugfs_create_dir(name, pd);
  39        debugfs_create_ulong("frequency", 0444, d, &cs->frequency);
  40        debugfs_create_ulong("power", 0444, d, &cs->power);
  41        debugfs_create_ulong("cost", 0444, d, &cs->cost);
  42}
  43
  44static int em_debug_cpus_show(struct seq_file *s, void *unused)
  45{
  46        seq_printf(s, "%*pbl\n", cpumask_pr_args(to_cpumask(s->private)));
  47
  48        return 0;
  49}
  50DEFINE_SHOW_ATTRIBUTE(em_debug_cpus);
  51
  52static void em_debug_create_pd(struct em_perf_domain *pd, int cpu)
  53{
  54        struct dentry *d;
  55        char name[8];
  56        int i;
  57
  58        snprintf(name, sizeof(name), "pd%d", cpu);
  59
  60        /* Create the directory of the performance domain */
  61        d = debugfs_create_dir(name, rootdir);
  62
  63        debugfs_create_file("cpus", 0444, d, pd->cpus, &em_debug_cpus_fops);
  64
  65        /* Create a sub-directory for each capacity state */
  66        for (i = 0; i < pd->nr_cap_states; i++)
  67                em_debug_create_cs(&pd->table[i], d);
  68}
  69
  70static int __init em_debug_init(void)
  71{
  72        /* Create /sys/kernel/debug/energy_model directory */
  73        rootdir = debugfs_create_dir("energy_model", NULL);
  74
  75        return 0;
  76}
  77core_initcall(em_debug_init);
  78#else /* CONFIG_DEBUG_FS */
  79static void em_debug_create_pd(struct em_perf_domain *pd, int cpu) {}
  80#endif
  81static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states,
  82                                                struct em_data_callback *cb)
  83{
  84        unsigned long opp_eff, prev_opp_eff = ULONG_MAX;
  85        unsigned long power, freq, prev_freq = 0;
  86        int i, ret, cpu = cpumask_first(span);
  87        struct em_cap_state *table;
  88        struct em_perf_domain *pd;
  89        u64 fmax;
  90
  91        if (!cb->active_power)
  92                return NULL;
  93
  94        pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL);
  95        if (!pd)
  96                return NULL;
  97
  98        table = kcalloc(nr_states, sizeof(*table), GFP_KERNEL);
  99        if (!table)
 100                goto free_pd;
 101
 102        /* Build the list of capacity states for this performance domain */
 103        for (i = 0, freq = 0; i < nr_states; i++, freq++) {
 104                /*
 105                 * active_power() is a driver callback which ceils 'freq' to
 106                 * lowest capacity state of 'cpu' above 'freq' and updates
 107                 * 'power' and 'freq' accordingly.
 108                 */
 109                ret = cb->active_power(&power, &freq, cpu);
 110                if (ret) {
 111                        pr_err("pd%d: invalid cap. state: %d\n", cpu, ret);
 112                        goto free_cs_table;
 113                }
 114
 115                /*
 116                 * We expect the driver callback to increase the frequency for
 117                 * higher capacity states.
 118                 */
 119                if (freq <= prev_freq) {
 120                        pr_err("pd%d: non-increasing freq: %lu\n", cpu, freq);
 121                        goto free_cs_table;
 122                }
 123
 124                /*
 125                 * The power returned by active_state() is expected to be
 126                 * positive, in milli-watts and to fit into 16 bits.
 127                 */
 128                if (!power || power > EM_CPU_MAX_POWER) {
 129                        pr_err("pd%d: invalid power: %lu\n", cpu, power);
 130                        goto free_cs_table;
 131                }
 132
 133                table[i].power = power;
 134                table[i].frequency = prev_freq = freq;
 135
 136                /*
 137                 * The hertz/watts efficiency ratio should decrease as the
 138                 * frequency grows on sane platforms. But this isn't always
 139                 * true in practice so warn the user if a higher OPP is more
 140                 * power efficient than a lower one.
 141                 */
 142                opp_eff = freq / power;
 143                if (opp_eff >= prev_opp_eff)
 144                        pr_warn("pd%d: hertz/watts ratio non-monotonically decreasing: em_cap_state %d >= em_cap_state%d\n",
 145                                        cpu, i, i - 1);
 146                prev_opp_eff = opp_eff;
 147        }
 148
 149        /* Compute the cost of each capacity_state. */
 150        fmax = (u64) table[nr_states - 1].frequency;
 151        for (i = 0; i < nr_states; i++) {
 152                table[i].cost = div64_u64(fmax * table[i].power,
 153                                          table[i].frequency);
 154        }
 155
 156        pd->table = table;
 157        pd->nr_cap_states = nr_states;
 158        cpumask_copy(to_cpumask(pd->cpus), span);
 159
 160        em_debug_create_pd(pd, cpu);
 161
 162        return pd;
 163
 164free_cs_table:
 165        kfree(table);
 166free_pd:
 167        kfree(pd);
 168
 169        return NULL;
 170}
 171
 172/**
 173 * em_cpu_get() - Return the performance domain for a CPU
 174 * @cpu : CPU to find the performance domain for
 175 *
 176 * Return: the performance domain to which 'cpu' belongs, or NULL if it doesn't
 177 * exist.
 178 */
 179struct em_perf_domain *em_cpu_get(int cpu)
 180{
 181        return READ_ONCE(per_cpu(em_data, cpu));
 182}
 183EXPORT_SYMBOL_GPL(em_cpu_get);
 184
 185/**
 186 * em_register_perf_domain() - Register the Energy Model of a performance domain
 187 * @span        : Mask of CPUs in the performance domain
 188 * @nr_states   : Number of capacity states to register
 189 * @cb          : Callback functions providing the data of the Energy Model
 190 *
 191 * Create Energy Model tables for a performance domain using the callbacks
 192 * defined in cb.
 193 *
 194 * If multiple clients register the same performance domain, all but the first
 195 * registration will be ignored.
 196 *
 197 * Return 0 on success
 198 */
 199int em_register_perf_domain(cpumask_t *span, unsigned int nr_states,
 200                                                struct em_data_callback *cb)
 201{
 202        unsigned long cap, prev_cap = 0;
 203        struct em_perf_domain *pd;
 204        int cpu, ret = 0;
 205
 206        if (!span || !nr_states || !cb)
 207                return -EINVAL;
 208
 209        /*
 210         * Use a mutex to serialize the registration of performance domains and
 211         * let the driver-defined callback functions sleep.
 212         */
 213        mutex_lock(&em_pd_mutex);
 214
 215        for_each_cpu(cpu, span) {
 216                /* Make sure we don't register again an existing domain. */
 217                if (READ_ONCE(per_cpu(em_data, cpu))) {
 218                        ret = -EEXIST;
 219                        goto unlock;
 220                }
 221
 222                /*
 223                 * All CPUs of a domain must have the same micro-architecture
 224                 * since they all share the same table.
 225                 */
 226                cap = arch_scale_cpu_capacity(NULL, cpu);
 227                if (prev_cap && prev_cap != cap) {
 228                        pr_err("CPUs of %*pbl must have the same capacity\n",
 229                                                        cpumask_pr_args(span));
 230                        ret = -EINVAL;
 231                        goto unlock;
 232                }
 233                prev_cap = cap;
 234        }
 235
 236        /* Create the performance domain and add it to the Energy Model. */
 237        pd = em_create_pd(span, nr_states, cb);
 238        if (!pd) {
 239                ret = -EINVAL;
 240                goto unlock;
 241        }
 242
 243        for_each_cpu(cpu, span) {
 244                /*
 245                 * The per-cpu array can be read concurrently from em_cpu_get().
 246                 * The barrier enforces the ordering needed to make sure readers
 247                 * can only access well formed em_perf_domain structs.
 248                 */
 249                smp_store_release(per_cpu_ptr(&em_data, cpu), pd);
 250        }
 251
 252        pr_debug("Created perf domain %*pbl\n", cpumask_pr_args(span));
 253unlock:
 254        mutex_unlock(&em_pd_mutex);
 255
 256        return ret;
 257}
 258EXPORT_SYMBOL_GPL(em_register_perf_domain);
 259