linux/drivers/cpufreq/powernv-cpufreq.c
<<
>>
Prefs
   1/*
   2 * POWERNV cpufreq driver for the IBM POWER processors
   3 *
   4 * (C) Copyright IBM 2014
   5 *
   6 * Author: Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com>
   7 *
   8 * This program is free software; you can redistribute it and/or modify
   9 * it under the terms of the GNU General Public License as published by
  10 * the Free Software Foundation; either version 2, or (at your option)
  11 * any later version.
  12 *
  13 * This program is distributed in the hope that it will be useful,
  14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 * GNU General Public License for more details.
  17 *
  18 */
  19
  20#define pr_fmt(fmt)     "powernv-cpufreq: " fmt
  21
  22#include <linux/kernel.h>
  23#include <linux/sysfs.h>
  24#include <linux/cpumask.h>
  25#include <linux/module.h>
  26#include <linux/cpufreq.h>
  27#include <linux/smp.h>
  28#include <linux/of.h>
  29#include <linux/reboot.h>
  30
  31#include <asm/cputhreads.h>
  32#include <asm/firmware.h>
  33#include <asm/reg.h>
  34#include <asm/smp.h> /* Required for cpu_sibling_mask() in UP configs */
  35
  36#define POWERNV_MAX_PSTATES     256
  37#define PMSR_PSAFE_ENABLE       (1UL << 30)
  38#define PMSR_SPR_EM_DISABLE     (1UL << 31)
  39#define PMSR_MAX(x)             ((x >> 32) & 0xFF)
  40#define PMSR_LP(x)              ((x >> 48) & 0xFF)
  41
  42static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
  43static bool rebooting, throttled;
  44
  45/*
  46 * Note: The set of pstates consists of contiguous integers, the
  47 * smallest of which is indicated by powernv_pstate_info.min, the
  48 * largest of which is indicated by powernv_pstate_info.max.
  49 *
  50 * The nominal pstate is the highest non-turbo pstate in this
  51 * platform. This is indicated by powernv_pstate_info.nominal.
  52 */
  53static struct powernv_pstate_info {
  54        int min;
  55        int max;
  56        int nominal;
  57        int nr_pstates;
  58} powernv_pstate_info;
  59
  60/*
  61 * Initialize the freq table based on data obtained
  62 * from the firmware passed via device-tree
  63 */
  64static int init_powernv_pstates(void)
  65{
  66        struct device_node *power_mgt;
  67        int i, pstate_min, pstate_max, pstate_nominal, nr_pstates = 0;
  68        const __be32 *pstate_ids, *pstate_freqs;
  69        u32 len_ids, len_freqs;
  70
  71        power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
  72        if (!power_mgt) {
  73                pr_warn("power-mgt node not found\n");
  74                return -ENODEV;
  75        }
  76
  77        if (of_property_read_u32(power_mgt, "ibm,pstate-min", &pstate_min)) {
  78                pr_warn("ibm,pstate-min node not found\n");
  79                return -ENODEV;
  80        }
  81
  82        if (of_property_read_u32(power_mgt, "ibm,pstate-max", &pstate_max)) {
  83                pr_warn("ibm,pstate-max node not found\n");
  84                return -ENODEV;
  85        }
  86
  87        if (of_property_read_u32(power_mgt, "ibm,pstate-nominal",
  88                                 &pstate_nominal)) {
  89                pr_warn("ibm,pstate-nominal not found\n");
  90                return -ENODEV;
  91        }
  92        pr_info("cpufreq pstate min %d nominal %d max %d\n", pstate_min,
  93                pstate_nominal, pstate_max);
  94
  95        pstate_ids = of_get_property(power_mgt, "ibm,pstate-ids", &len_ids);
  96        if (!pstate_ids) {
  97                pr_warn("ibm,pstate-ids not found\n");
  98                return -ENODEV;
  99        }
 100
 101        pstate_freqs = of_get_property(power_mgt, "ibm,pstate-frequencies-mhz",
 102                                      &len_freqs);
 103        if (!pstate_freqs) {
 104                pr_warn("ibm,pstate-frequencies-mhz not found\n");
 105                return -ENODEV;
 106        }
 107
 108        if (len_ids != len_freqs) {
 109                pr_warn("Entries in ibm,pstate-ids and "
 110                        "ibm,pstate-frequencies-mhz does not match\n");
 111        }
 112
 113        nr_pstates = min(len_ids, len_freqs) / sizeof(u32);
 114        if (!nr_pstates) {
 115                pr_warn("No PStates found\n");
 116                return -ENODEV;
 117        }
 118
 119        pr_debug("NR PStates %d\n", nr_pstates);
 120        for (i = 0; i < nr_pstates; i++) {
 121                u32 id = be32_to_cpu(pstate_ids[i]);
 122                u32 freq = be32_to_cpu(pstate_freqs[i]);
 123
 124                pr_debug("PState id %d freq %d MHz\n", id, freq);
 125                powernv_freqs[i].frequency = freq * 1000; /* kHz */
 126                powernv_freqs[i].driver_data = id;
 127        }
 128        /* End of list marker entry */
 129        powernv_freqs[i].frequency = CPUFREQ_TABLE_END;
 130
 131        powernv_pstate_info.min = pstate_min;
 132        powernv_pstate_info.max = pstate_max;
 133        powernv_pstate_info.nominal = pstate_nominal;
 134        powernv_pstate_info.nr_pstates = nr_pstates;
 135
 136        return 0;
 137}
 138
 139/* Returns the CPU frequency corresponding to the pstate_id. */
 140static unsigned int pstate_id_to_freq(int pstate_id)
 141{
 142        int i;
 143
 144        i = powernv_pstate_info.max - pstate_id;
 145        if (i >= powernv_pstate_info.nr_pstates || i < 0) {
 146                pr_warn("PState id %d outside of PState table, "
 147                        "reporting nominal id %d instead\n",
 148                        pstate_id, powernv_pstate_info.nominal);
 149                i = powernv_pstate_info.max - powernv_pstate_info.nominal;
 150        }
 151
 152        return powernv_freqs[i].frequency;
 153}
 154
 155/*
 156 * cpuinfo_nominal_freq_show - Show the nominal CPU frequency as indicated by
 157 * the firmware
 158 */
 159static ssize_t cpuinfo_nominal_freq_show(struct cpufreq_policy *policy,
 160                                        char *buf)
 161{
 162        return sprintf(buf, "%u\n",
 163                pstate_id_to_freq(powernv_pstate_info.nominal));
 164}
 165
 166struct freq_attr cpufreq_freq_attr_cpuinfo_nominal_freq =
 167        __ATTR_RO(cpuinfo_nominal_freq);
 168
 169static struct freq_attr *powernv_cpu_freq_attr[] = {
 170        &cpufreq_freq_attr_scaling_available_freqs,
 171        &cpufreq_freq_attr_cpuinfo_nominal_freq,
 172        NULL,
 173};
 174
 175/* Helper routines */
 176
 177/* Access helpers to power mgt SPR */
 178
 179static inline unsigned long get_pmspr(unsigned long sprn)
 180{
 181        switch (sprn) {
 182        case SPRN_PMCR:
 183                return mfspr(SPRN_PMCR);
 184
 185        case SPRN_PMICR:
 186                return mfspr(SPRN_PMICR);
 187
 188        case SPRN_PMSR:
 189                return mfspr(SPRN_PMSR);
 190        }
 191        BUG();
 192}
 193
 194static inline void set_pmspr(unsigned long sprn, unsigned long val)
 195{
 196        switch (sprn) {
 197        case SPRN_PMCR:
 198                mtspr(SPRN_PMCR, val);
 199                return;
 200
 201        case SPRN_PMICR:
 202                mtspr(SPRN_PMICR, val);
 203                return;
 204        }
 205        BUG();
 206}
 207
 208/*
 209 * Use objects of this type to query/update
 210 * pstates on a remote CPU via smp_call_function.
 211 */
 212struct powernv_smp_call_data {
 213        unsigned int freq;
 214        int pstate_id;
 215};
 216
 217/*
 218 * powernv_read_cpu_freq: Reads the current frequency on this CPU.
 219 *
 220 * Called via smp_call_function.
 221 *
 222 * Note: The caller of the smp_call_function should pass an argument of
 223 * the type 'struct powernv_smp_call_data *' along with this function.
 224 *
 225 * The current frequency on this CPU will be returned via
 226 * ((struct powernv_smp_call_data *)arg)->freq;
 227 */
 228static void powernv_read_cpu_freq(void *arg)
 229{
 230        unsigned long pmspr_val;
 231        s8 local_pstate_id;
 232        struct powernv_smp_call_data *freq_data = arg;
 233
 234        pmspr_val = get_pmspr(SPRN_PMSR);
 235
 236        /*
 237         * The local pstate id corresponds bits 48..55 in the PMSR.
 238         * Note: Watch out for the sign!
 239         */
 240        local_pstate_id = (pmspr_val >> 48) & 0xFF;
 241        freq_data->pstate_id = local_pstate_id;
 242        freq_data->freq = pstate_id_to_freq(freq_data->pstate_id);
 243
 244        pr_debug("cpu %d pmsr %016lX pstate_id %d frequency %d kHz\n",
 245                raw_smp_processor_id(), pmspr_val, freq_data->pstate_id,
 246                freq_data->freq);
 247}
 248
 249/*
 250 * powernv_cpufreq_get: Returns the CPU frequency as reported by the
 251 * firmware for CPU 'cpu'. This value is reported through the sysfs
 252 * file cpuinfo_cur_freq.
 253 */
 254static unsigned int powernv_cpufreq_get(unsigned int cpu)
 255{
 256        struct powernv_smp_call_data freq_data;
 257
 258        smp_call_function_any(cpu_sibling_mask(cpu), powernv_read_cpu_freq,
 259                        &freq_data, 1);
 260
 261        return freq_data.freq;
 262}
 263
 264/*
 265 * set_pstate: Sets the pstate on this CPU.
 266 *
 267 * This is called via an smp_call_function.
 268 *
 269 * The caller must ensure that freq_data is of the type
 270 * (struct powernv_smp_call_data *) and the pstate_id which needs to be set
 271 * on this CPU should be present in freq_data->pstate_id.
 272 */
 273static void set_pstate(void *freq_data)
 274{
 275        unsigned long val;
 276        unsigned long pstate_ul =
 277                ((struct powernv_smp_call_data *) freq_data)->pstate_id;
 278
 279        val = get_pmspr(SPRN_PMCR);
 280        val = val & 0x0000FFFFFFFFFFFFULL;
 281
 282        pstate_ul = pstate_ul & 0xFF;
 283
 284        /* Set both global(bits 56..63) and local(bits 48..55) PStates */
 285        val = val | (pstate_ul << 56) | (pstate_ul << 48);
 286
 287        pr_debug("Setting cpu %d pmcr to %016lX\n",
 288                        raw_smp_processor_id(), val);
 289        set_pmspr(SPRN_PMCR, val);
 290}
 291
 292/*
 293 * get_nominal_index: Returns the index corresponding to the nominal
 294 * pstate in the cpufreq table
 295 */
 296static inline unsigned int get_nominal_index(void)
 297{
 298        return powernv_pstate_info.max - powernv_pstate_info.nominal;
 299}
 300
 301static void powernv_cpufreq_throttle_check(unsigned int cpu)
 302{
 303        unsigned long pmsr;
 304        int pmsr_pmax, pmsr_lp;
 305
 306        pmsr = get_pmspr(SPRN_PMSR);
 307
 308        /* Check for Pmax Capping */
 309        pmsr_pmax = (s8)PMSR_MAX(pmsr);
 310        if (pmsr_pmax != powernv_pstate_info.max) {
 311                throttled = true;
 312                pr_info("CPU %d Pmax is reduced to %d\n", cpu, pmsr_pmax);
 313                pr_info("Max allowed Pstate is capped\n");
 314        }
 315
 316        /*
 317         * Check for Psafe by reading LocalPstate
 318         * or check if Psafe_mode_active is set in PMSR.
 319         */
 320        pmsr_lp = (s8)PMSR_LP(pmsr);
 321        if ((pmsr_lp < powernv_pstate_info.min) ||
 322                                (pmsr & PMSR_PSAFE_ENABLE)) {
 323                throttled = true;
 324                pr_info("Pstate set to safe frequency\n");
 325        }
 326
 327        /* Check if SPR_EM_DISABLE is set in PMSR */
 328        if (pmsr & PMSR_SPR_EM_DISABLE) {
 329                throttled = true;
 330                pr_info("Frequency Control disabled from OS\n");
 331        }
 332
 333        if (throttled) {
 334                pr_info("PMSR = %16lx\n", pmsr);
 335                pr_crit("CPU Frequency could be throttled\n");
 336        }
 337}
 338
 339/*
 340 * powernv_cpufreq_target_index: Sets the frequency corresponding to
 341 * the cpufreq table entry indexed by new_index on the cpus in the
 342 * mask policy->cpus
 343 */
 344static int powernv_cpufreq_target_index(struct cpufreq_policy *policy,
 345                                        unsigned int new_index)
 346{
 347        struct powernv_smp_call_data freq_data;
 348
 349        if (unlikely(rebooting) && new_index != get_nominal_index())
 350                return 0;
 351
 352        if (!throttled)
 353                powernv_cpufreq_throttle_check(smp_processor_id());
 354
 355        freq_data.pstate_id = powernv_freqs[new_index].driver_data;
 356
 357        /*
 358         * Use smp_call_function to send IPI and execute the
 359         * mtspr on target CPU.  We could do that without IPI
 360         * if current CPU is within policy->cpus (core)
 361         */
 362        smp_call_function_any(policy->cpus, set_pstate, &freq_data, 1);
 363
 364        return 0;
 365}
 366
 367static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
 368{
 369        int base, i;
 370
 371        base = cpu_first_thread_sibling(policy->cpu);
 372
 373        for (i = 0; i < threads_per_core; i++)
 374                cpumask_set_cpu(base + i, policy->cpus);
 375
 376        return cpufreq_table_validate_and_show(policy, powernv_freqs);
 377}
 378
 379static int powernv_cpufreq_reboot_notifier(struct notifier_block *nb,
 380                                unsigned long action, void *unused)
 381{
 382        int cpu;
 383        struct cpufreq_policy cpu_policy;
 384
 385        rebooting = true;
 386        for_each_online_cpu(cpu) {
 387                cpufreq_get_policy(&cpu_policy, cpu);
 388                powernv_cpufreq_target_index(&cpu_policy, get_nominal_index());
 389        }
 390
 391        return NOTIFY_DONE;
 392}
 393
 394static struct notifier_block powernv_cpufreq_reboot_nb = {
 395        .notifier_call = powernv_cpufreq_reboot_notifier,
 396};
 397
 398static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy)
 399{
 400        struct powernv_smp_call_data freq_data;
 401
 402        freq_data.pstate_id = powernv_pstate_info.min;
 403        smp_call_function_single(policy->cpu, set_pstate, &freq_data, 1);
 404}
 405
 406static struct cpufreq_driver powernv_cpufreq_driver = {
 407        .name           = "powernv-cpufreq",
 408        .flags          = CPUFREQ_CONST_LOOPS,
 409        .init           = powernv_cpufreq_cpu_init,
 410        .verify         = cpufreq_generic_frequency_table_verify,
 411        .target_index   = powernv_cpufreq_target_index,
 412        .get            = powernv_cpufreq_get,
 413        .stop_cpu       = powernv_cpufreq_stop_cpu,
 414        .attr           = powernv_cpu_freq_attr,
 415};
 416
 417static int __init powernv_cpufreq_init(void)
 418{
 419        int rc = 0;
 420
 421        /* Don't probe on pseries (guest) platforms */
 422        if (!firmware_has_feature(FW_FEATURE_OPALv3))
 423                return -ENODEV;
 424
 425        /* Discover pstates from device tree and init */
 426        rc = init_powernv_pstates();
 427        if (rc) {
 428                pr_info("powernv-cpufreq disabled. System does not support PState control\n");
 429                return rc;
 430        }
 431
 432        register_reboot_notifier(&powernv_cpufreq_reboot_nb);
 433        return cpufreq_register_driver(&powernv_cpufreq_driver);
 434}
 435module_init(powernv_cpufreq_init);
 436
 437static void __exit powernv_cpufreq_exit(void)
 438{
 439        unregister_reboot_notifier(&powernv_cpufreq_reboot_nb);
 440        cpufreq_unregister_driver(&powernv_cpufreq_driver);
 441}
 442module_exit(powernv_cpufreq_exit);
 443
 444MODULE_LICENSE("GPL");
 445MODULE_AUTHOR("Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com>");
 446