linux/arch/x86/kernel/cpu/intel_epb.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Intel Performance and Energy Bias Hint support.
   4 *
   5 * Copyright (C) 2019 Intel Corporation
   6 *
   7 * Author:
   8 *      Rafael J. Wysocki <rafael.j.wysocki@intel.com>
   9 */
  10
  11#include <linux/cpuhotplug.h>
  12#include <linux/cpu.h>
  13#include <linux/device.h>
  14#include <linux/kernel.h>
  15#include <linux/string.h>
  16#include <linux/syscore_ops.h>
  17#include <linux/pm.h>
  18
  19#include <asm/cpufeature.h>
  20#include <asm/msr.h>
  21
  22/**
  23 * DOC: overview
  24 *
  25 * The Performance and Energy Bias Hint (EPB) allows software to specify its
  26 * preference with respect to the power-performance tradeoffs present in the
  27 * processor.  Generally, the EPB is expected to be set by user space (directly
  28 * via sysfs or with the help of the x86_energy_perf_policy tool), but there are
  29 * two reasons for the kernel to update it.
  30 *
  31 * First, there are systems where the platform firmware resets the EPB during
  32 * system-wide transitions from sleep states back into the working state
  33 * effectively causing the previous EPB updates by user space to be lost.
  34 * Thus the kernel needs to save the current EPB values for all CPUs during
  35 * system-wide transitions to sleep states and restore them on the way back to
  36 * the working state.  That can be achieved by saving EPB for secondary CPUs
  37 * when they are taken offline during transitions into system sleep states and
  38 * for the boot CPU in a syscore suspend operation, so that it can be restored
  39 * for the boot CPU in a syscore resume operation and for the other CPUs when
  40 * they are brought back online.  However, CPUs that are already offline when
  41 * a system-wide PM transition is started are not taken offline again, but their
  42 * EPB values may still be reset by the platform firmware during the transition,
  43 * so in fact it is necessary to save the EPB of any CPU taken offline and to
  44 * restore it when the given CPU goes back online at all times.
  45 *
  46 * Second, on many systems the initial EPB value coming from the platform
  47 * firmware is 0 ('performance') and at least on some of them that is because
  48 * the platform firmware does not initialize EPB at all with the assumption that
  49 * the OS will do that anyway.  That sometimes is problematic, as it may cause
  50 * the system battery to drain too fast, for example, so it is better to adjust
  51 * it on CPU bring-up and if the initial EPB value for a given CPU is 0, the
  52 * kernel changes it to 6 ('normal').
  53 */
  54
  55static DEFINE_PER_CPU(u8, saved_epb);
  56
  57#define EPB_MASK        0x0fULL
  58#define EPB_SAVED       0x10ULL
  59#define MAX_EPB         EPB_MASK
  60
  61static int intel_epb_save(void)
  62{
  63        u64 epb;
  64
  65        rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
  66        /*
  67         * Ensure that saved_epb will always be nonzero after this write even if
  68         * the EPB value read from the MSR is 0.
  69         */
  70        this_cpu_write(saved_epb, (epb & EPB_MASK) | EPB_SAVED);
  71
  72        return 0;
  73}
  74
  75static void intel_epb_restore(void)
  76{
  77        u64 val = this_cpu_read(saved_epb);
  78        u64 epb;
  79
  80        rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
  81        if (val) {
  82                val &= EPB_MASK;
  83        } else {
  84                /*
  85                 * Because intel_epb_save() has not run for the current CPU yet,
  86                 * it is going online for the first time, so if its EPB value is
  87                 * 0 ('performance') at this point, assume that it has not been
  88                 * initialized by the platform firmware and set it to 6
  89                 * ('normal').
  90                 */
  91                val = epb & EPB_MASK;
  92                if (val == ENERGY_PERF_BIAS_PERFORMANCE) {
  93                        val = ENERGY_PERF_BIAS_NORMAL;
  94                        pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
  95                }
  96        }
  97        wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, (epb & ~EPB_MASK) | val);
  98}
  99
 100static struct syscore_ops intel_epb_syscore_ops = {
 101        .suspend = intel_epb_save,
 102        .resume = intel_epb_restore,
 103};
 104
 105static const char * const energy_perf_strings[] = {
 106        "performance",
 107        "balance-performance",
 108        "normal",
 109        "balance-power",
 110        "power"
 111};
 112static const u8 energ_perf_values[] = {
 113        ENERGY_PERF_BIAS_PERFORMANCE,
 114        ENERGY_PERF_BIAS_BALANCE_PERFORMANCE,
 115        ENERGY_PERF_BIAS_NORMAL,
 116        ENERGY_PERF_BIAS_BALANCE_POWERSAVE,
 117        ENERGY_PERF_BIAS_POWERSAVE
 118};
 119
 120static ssize_t energy_perf_bias_show(struct device *dev,
 121                                     struct device_attribute *attr,
 122                                     char *buf)
 123{
 124        unsigned int cpu = dev->id;
 125        u64 epb;
 126        int ret;
 127
 128        ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
 129        if (ret < 0)
 130                return ret;
 131
 132        return sprintf(buf, "%llu\n", epb);
 133}
 134
 135static ssize_t energy_perf_bias_store(struct device *dev,
 136                                      struct device_attribute *attr,
 137                                      const char *buf, size_t count)
 138{
 139        unsigned int cpu = dev->id;
 140        u64 epb, val;
 141        int ret;
 142
 143        ret = __sysfs_match_string(energy_perf_strings,
 144                                   ARRAY_SIZE(energy_perf_strings), buf);
 145        if (ret >= 0)
 146                val = energ_perf_values[ret];
 147        else if (kstrtou64(buf, 0, &val) || val > MAX_EPB)
 148                return -EINVAL;
 149
 150        ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
 151        if (ret < 0)
 152                return ret;
 153
 154        ret = wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS,
 155                            (epb & ~EPB_MASK) | val);
 156        if (ret < 0)
 157                return ret;
 158
 159        return count;
 160}
 161
 162static DEVICE_ATTR_RW(energy_perf_bias);
 163
 164static struct attribute *intel_epb_attrs[] = {
 165        &dev_attr_energy_perf_bias.attr,
 166        NULL
 167};
 168
 169static const struct attribute_group intel_epb_attr_group = {
 170        .name = power_group_name,
 171        .attrs =  intel_epb_attrs
 172};
 173
 174static int intel_epb_online(unsigned int cpu)
 175{
 176        struct device *cpu_dev = get_cpu_device(cpu);
 177
 178        intel_epb_restore();
 179        if (!cpuhp_tasks_frozen)
 180                sysfs_merge_group(&cpu_dev->kobj, &intel_epb_attr_group);
 181
 182        return 0;
 183}
 184
 185static int intel_epb_offline(unsigned int cpu)
 186{
 187        struct device *cpu_dev = get_cpu_device(cpu);
 188
 189        if (!cpuhp_tasks_frozen)
 190                sysfs_unmerge_group(&cpu_dev->kobj, &intel_epb_attr_group);
 191
 192        intel_epb_save();
 193        return 0;
 194}
 195
 196static __init int intel_epb_init(void)
 197{
 198        int ret;
 199
 200        if (!boot_cpu_has(X86_FEATURE_EPB))
 201                return -ENODEV;
 202
 203        ret = cpuhp_setup_state(CPUHP_AP_X86_INTEL_EPB_ONLINE,
 204                                "x86/intel/epb:online", intel_epb_online,
 205                                intel_epb_offline);
 206        if (ret < 0)
 207                goto err_out_online;
 208
 209        register_syscore_ops(&intel_epb_syscore_ops);
 210        return 0;
 211
 212err_out_online:
 213        cpuhp_remove_state(CPUHP_AP_X86_INTEL_EPB_ONLINE);
 214        return ret;
 215}
 216subsys_initcall(intel_epb_init);
 217