linux/drivers/cpuidle/cpuidle-powernv.c
<<
>>
Prefs
   1/*
   2 *  cpuidle-powernv - idle state cpuidle driver.
   3 *  Adapted from drivers/cpuidle/cpuidle-pseries
   4 *
   5 */
   6
   7#include <linux/kernel.h>
   8#include <linux/module.h>
   9#include <linux/init.h>
  10#include <linux/moduleparam.h>
  11#include <linux/cpuidle.h>
  12#include <linux/cpu.h>
  13#include <linux/notifier.h>
  14#include <linux/clockchips.h>
  15#include <linux/of.h>
  16#include <linux/slab.h>
  17
  18#include <asm/machdep.h>
  19#include <asm/firmware.h>
  20#include <asm/opal.h>
  21#include <asm/runlatch.h>
  22
  23#define POWERNV_THRESHOLD_LATENCY_NS 200000
  24
  25struct cpuidle_driver powernv_idle_driver = {
  26        .name             = "powernv_idle",
  27        .owner            = THIS_MODULE,
  28};
  29
  30static int max_idle_state;
  31static struct cpuidle_state *cpuidle_state_table;
  32
  33static u64 stop_psscr_table[CPUIDLE_STATE_MAX];
  34
  35static u64 snooze_timeout;
  36static bool snooze_timeout_en;
  37
  38static int snooze_loop(struct cpuidle_device *dev,
  39                        struct cpuidle_driver *drv,
  40                        int index)
  41{
  42        u64 snooze_exit_time;
  43
  44        local_irq_enable();
  45        set_thread_flag(TIF_POLLING_NRFLAG);
  46
  47        snooze_exit_time = get_tb() + snooze_timeout;
  48        ppc64_runlatch_off();
  49        while (!need_resched()) {
  50                HMT_low();
  51                HMT_very_low();
  52                if (snooze_timeout_en && get_tb() > snooze_exit_time)
  53                        break;
  54        }
  55
  56        HMT_medium();
  57        ppc64_runlatch_on();
  58        clear_thread_flag(TIF_POLLING_NRFLAG);
  59        smp_mb();
  60        return index;
  61}
  62
  63static int nap_loop(struct cpuidle_device *dev,
  64                        struct cpuidle_driver *drv,
  65                        int index)
  66{
  67        ppc64_runlatch_off();
  68        power7_idle();
  69        ppc64_runlatch_on();
  70        return index;
  71}
  72
  73/* Register for fastsleep only in oneshot mode of broadcast */
  74#ifdef CONFIG_TICK_ONESHOT
  75static int fastsleep_loop(struct cpuidle_device *dev,
  76                                struct cpuidle_driver *drv,
  77                                int index)
  78{
  79        unsigned long old_lpcr = mfspr(SPRN_LPCR);
  80        unsigned long new_lpcr;
  81
  82        if (unlikely(system_state < SYSTEM_RUNNING))
  83                return index;
  84
  85        new_lpcr = old_lpcr;
  86        /* Do not exit powersave upon decrementer as we've setup the timer
  87         * offload.
  88         */
  89        new_lpcr &= ~LPCR_PECE1;
  90
  91        mtspr(SPRN_LPCR, new_lpcr);
  92        power7_sleep();
  93
  94        mtspr(SPRN_LPCR, old_lpcr);
  95
  96        return index;
  97}
  98#endif
  99
 100static int stop_loop(struct cpuidle_device *dev,
 101                     struct cpuidle_driver *drv,
 102                     int index)
 103{
 104        ppc64_runlatch_off();
 105        power9_idle_stop(stop_psscr_table[index]);
 106        ppc64_runlatch_on();
 107        return index;
 108}
 109
 110/*
 111 * States for dedicated partition case.
 112 */
 113static struct cpuidle_state powernv_states[CPUIDLE_STATE_MAX] = {
 114        { /* Snooze */
 115                .name = "snooze",
 116                .desc = "snooze",
 117                .exit_latency = 0,
 118                .target_residency = 0,
 119                .enter = snooze_loop },
 120};
 121
 122static int powernv_cpuidle_cpu_online(unsigned int cpu)
 123{
 124        struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
 125
 126        if (dev && cpuidle_get_driver()) {
 127                cpuidle_pause_and_lock();
 128                cpuidle_enable_device(dev);
 129                cpuidle_resume_and_unlock();
 130        }
 131        return 0;
 132}
 133
 134static int powernv_cpuidle_cpu_dead(unsigned int cpu)
 135{
 136        struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
 137
 138        if (dev && cpuidle_get_driver()) {
 139                cpuidle_pause_and_lock();
 140                cpuidle_disable_device(dev);
 141                cpuidle_resume_and_unlock();
 142        }
 143        return 0;
 144}
 145
 146/*
 147 * powernv_cpuidle_driver_init()
 148 */
 149static int powernv_cpuidle_driver_init(void)
 150{
 151        int idle_state;
 152        struct cpuidle_driver *drv = &powernv_idle_driver;
 153
 154        drv->state_count = 0;
 155
 156        for (idle_state = 0; idle_state < max_idle_state; ++idle_state) {
 157                /* Is the state not enabled? */
 158                if (cpuidle_state_table[idle_state].enter == NULL)
 159                        continue;
 160
 161                drv->states[drv->state_count] = /* structure copy */
 162                        cpuidle_state_table[idle_state];
 163
 164                drv->state_count += 1;
 165        }
 166
 167        return 0;
 168}
 169
 170static int powernv_add_idle_states(void)
 171{
 172        struct device_node *power_mgt;
 173        int nr_idle_states = 1; /* Snooze */
 174        int dt_idle_states;
 175        u32 latency_ns[CPUIDLE_STATE_MAX];
 176        u32 residency_ns[CPUIDLE_STATE_MAX];
 177        u32 flags[CPUIDLE_STATE_MAX];
 178        u64 psscr_val[CPUIDLE_STATE_MAX];
 179        const char *names[CPUIDLE_STATE_MAX];
 180        int i, rc;
 181
 182        /* Currently we have snooze statically defined */
 183
 184        power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
 185        if (!power_mgt) {
 186                pr_warn("opal: PowerMgmt Node not found\n");
 187                goto out;
 188        }
 189
 190        /* Read values of any property to determine the num of idle states */
 191        dt_idle_states = of_property_count_u32_elems(power_mgt, "ibm,cpu-idle-state-flags");
 192        if (dt_idle_states < 0) {
 193                pr_warn("cpuidle-powernv: no idle states found in the DT\n");
 194                goto out;
 195        }
 196
 197        /*
 198         * Since snooze is used as first idle state, max idle states allowed is
 199         * CPUIDLE_STATE_MAX -1
 200         */
 201        if (dt_idle_states > CPUIDLE_STATE_MAX - 1) {
 202                pr_warn("cpuidle-powernv: discovered idle states more than allowed");
 203                dt_idle_states = CPUIDLE_STATE_MAX - 1;
 204        }
 205
 206        if (of_property_read_u32_array(power_mgt,
 207                        "ibm,cpu-idle-state-flags", flags, dt_idle_states)) {
 208                pr_warn("cpuidle-powernv : missing ibm,cpu-idle-state-flags in DT\n");
 209                goto out;
 210        }
 211
 212        if (of_property_read_u32_array(power_mgt,
 213                "ibm,cpu-idle-state-latencies-ns", latency_ns,
 214                dt_idle_states)) {
 215                pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n");
 216                goto out;
 217        }
 218        if (of_property_read_string_array(power_mgt,
 219                "ibm,cpu-idle-state-names", names, dt_idle_states) < 0) {
 220                pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n");
 221                goto out;
 222        }
 223
 224        /*
 225         * If the idle states use stop instruction, probe for psscr values
 226         * which are necessary to specify required stop level.
 227         */
 228        if (flags[0] & (OPAL_PM_STOP_INST_FAST | OPAL_PM_STOP_INST_DEEP))
 229                if (of_property_read_u64_array(power_mgt,
 230                    "ibm,cpu-idle-state-psscr", psscr_val, dt_idle_states)) {
 231                        pr_warn("cpuidle-powernv: missing ibm,cpu-idle-states-psscr in DT\n");
 232                        goto out;
 233                }
 234
 235        rc = of_property_read_u32_array(power_mgt,
 236                "ibm,cpu-idle-state-residency-ns", residency_ns, dt_idle_states);
 237
 238        for (i = 0; i < dt_idle_states; i++) {
 239                /*
 240                 * If an idle state has exit latency beyond
 241                 * POWERNV_THRESHOLD_LATENCY_NS then don't use it
 242                 * in cpu-idle.
 243                 */
 244                if (latency_ns[i] > POWERNV_THRESHOLD_LATENCY_NS)
 245                        continue;
 246
 247                /*
 248                 * Cpuidle accepts exit_latency and target_residency in us.
 249                 * Use default target_residency values if f/w does not expose it.
 250                 */
 251                if (flags[i] & OPAL_PM_NAP_ENABLED) {
 252                        /* Add NAP state */
 253                        strcpy(powernv_states[nr_idle_states].name, "Nap");
 254                        strcpy(powernv_states[nr_idle_states].desc, "Nap");
 255                        powernv_states[nr_idle_states].flags = 0;
 256                        powernv_states[nr_idle_states].target_residency = 100;
 257                        powernv_states[nr_idle_states].enter = nap_loop;
 258                } else if ((flags[i] & OPAL_PM_STOP_INST_FAST) &&
 259                                !(flags[i] & OPAL_PM_TIMEBASE_STOP)) {
 260                        strncpy(powernv_states[nr_idle_states].name,
 261                                names[i], CPUIDLE_NAME_LEN);
 262                        strncpy(powernv_states[nr_idle_states].desc,
 263                                names[i], CPUIDLE_NAME_LEN);
 264                        powernv_states[nr_idle_states].flags = 0;
 265
 266                        powernv_states[nr_idle_states].enter = stop_loop;
 267                        stop_psscr_table[nr_idle_states] = psscr_val[i];
 268                }
 269
 270                /*
 271                 * All cpuidle states with CPUIDLE_FLAG_TIMER_STOP set must come
 272                 * within this config dependency check.
 273                 */
 274#ifdef CONFIG_TICK_ONESHOT
 275                if (flags[i] & OPAL_PM_SLEEP_ENABLED ||
 276                        flags[i] & OPAL_PM_SLEEP_ENABLED_ER1) {
 277                        /* Add FASTSLEEP state */
 278                        strcpy(powernv_states[nr_idle_states].name, "FastSleep");
 279                        strcpy(powernv_states[nr_idle_states].desc, "FastSleep");
 280                        powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIMER_STOP;
 281                        powernv_states[nr_idle_states].target_residency = 300000;
 282                        powernv_states[nr_idle_states].enter = fastsleep_loop;
 283                } else if ((flags[i] & OPAL_PM_STOP_INST_DEEP) &&
 284                                (flags[i] & OPAL_PM_TIMEBASE_STOP)) {
 285                        strncpy(powernv_states[nr_idle_states].name,
 286                                names[i], CPUIDLE_NAME_LEN);
 287                        strncpy(powernv_states[nr_idle_states].desc,
 288                                names[i], CPUIDLE_NAME_LEN);
 289
 290                        powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIMER_STOP;
 291                        powernv_states[nr_idle_states].enter = stop_loop;
 292                        stop_psscr_table[nr_idle_states] = psscr_val[i];
 293                }
 294#endif
 295                powernv_states[nr_idle_states].exit_latency =
 296                                ((unsigned int)latency_ns[i]) / 1000;
 297
 298                if (!rc) {
 299                        powernv_states[nr_idle_states].target_residency =
 300                                ((unsigned int)residency_ns[i]) / 1000;
 301                }
 302
 303                nr_idle_states++;
 304        }
 305out:
 306        return nr_idle_states;
 307}
 308
 309/*
 310 * powernv_idle_probe()
 311 * Choose state table for shared versus dedicated partition
 312 */
 313static int powernv_idle_probe(void)
 314{
 315        if (cpuidle_disable != IDLE_NO_OVERRIDE)
 316                return -ENODEV;
 317
 318        if (firmware_has_feature(FW_FEATURE_OPAL)) {
 319                cpuidle_state_table = powernv_states;
 320                /* Device tree can indicate more idle states */
 321                max_idle_state = powernv_add_idle_states();
 322                if (max_idle_state > 1) {
 323                        snooze_timeout_en = true;
 324                        snooze_timeout = powernv_states[1].target_residency *
 325                                         tb_ticks_per_usec;
 326                }
 327        } else
 328                return -ENODEV;
 329
 330        return 0;
 331}
 332
 333static int __init powernv_processor_idle_init(void)
 334{
 335        int retval;
 336
 337        retval = powernv_idle_probe();
 338        if (retval)
 339                return retval;
 340
 341        powernv_cpuidle_driver_init();
 342        retval = cpuidle_register(&powernv_idle_driver, NULL);
 343        if (retval) {
 344                printk(KERN_DEBUG "Registration of powernv driver failed.\n");
 345                return retval;
 346        }
 347
 348        retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
 349                                           "cpuidle/powernv:online",
 350                                           powernv_cpuidle_cpu_online, NULL);
 351        WARN_ON(retval < 0);
 352        retval = cpuhp_setup_state_nocalls(CPUHP_CPUIDLE_DEAD,
 353                                           "cpuidle/powernv:dead", NULL,
 354                                           powernv_cpuidle_cpu_dead);
 355        WARN_ON(retval < 0);
 356        printk(KERN_DEBUG "powernv_idle_driver registered\n");
 357        return 0;
 358}
 359
 360device_initcall(powernv_processor_idle_init);
 361