linux/drivers/cpuidle/cpuidle-powernv.c
<<
>>
Prefs
   1/*
   2 *  cpuidle-powernv - idle state cpuidle driver.
   3 *  Adapted from drivers/cpuidle/cpuidle-pseries
   4 *
   5 */
   6
   7#include <linux/kernel.h>
   8#include <linux/module.h>
   9#include <linux/init.h>
  10#include <linux/moduleparam.h>
  11#include <linux/cpuidle.h>
  12#include <linux/cpu.h>
  13#include <linux/notifier.h>
  14#include <linux/clockchips.h>
  15#include <linux/of.h>
  16#include <linux/slab.h>
  17
  18#include <asm/machdep.h>
  19#include <asm/firmware.h>
  20#include <asm/opal.h>
  21#include <asm/runlatch.h>
  22
  23#define POWERNV_THRESHOLD_LATENCY_NS 200000
  24
  25struct cpuidle_driver powernv_idle_driver = {
  26        .name             = "powernv_idle",
  27        .owner            = THIS_MODULE,
  28};
  29
  30static int max_idle_state;
  31static struct cpuidle_state *cpuidle_state_table;
  32
  33static u64 stop_psscr_table[CPUIDLE_STATE_MAX];
  34
  35static u64 snooze_timeout;
  36static bool snooze_timeout_en;
  37
  38static int snooze_loop(struct cpuidle_device *dev,
  39                        struct cpuidle_driver *drv,
  40                        int index)
  41{
  42        u64 snooze_exit_time;
  43
  44        local_irq_enable();
  45        set_thread_flag(TIF_POLLING_NRFLAG);
  46
  47        snooze_exit_time = get_tb() + snooze_timeout;
  48        ppc64_runlatch_off();
  49        while (!need_resched()) {
  50                HMT_low();
  51                HMT_very_low();
  52                if (snooze_timeout_en && get_tb() > snooze_exit_time)
  53                        break;
  54        }
  55
  56        HMT_medium();
  57        ppc64_runlatch_on();
  58        clear_thread_flag(TIF_POLLING_NRFLAG);
  59        smp_mb();
  60        return index;
  61}
  62
  63static int nap_loop(struct cpuidle_device *dev,
  64                        struct cpuidle_driver *drv,
  65                        int index)
  66{
  67        ppc64_runlatch_off();
  68        power7_idle();
  69        ppc64_runlatch_on();
  70        return index;
  71}
  72
  73/* Register for fastsleep only in oneshot mode of broadcast */
  74#ifdef CONFIG_TICK_ONESHOT
  75static int fastsleep_loop(struct cpuidle_device *dev,
  76                                struct cpuidle_driver *drv,
  77                                int index)
  78{
  79        unsigned long old_lpcr = mfspr(SPRN_LPCR);
  80        unsigned long new_lpcr;
  81
  82        if (unlikely(system_state < SYSTEM_RUNNING))
  83                return index;
  84
  85        new_lpcr = old_lpcr;
  86        /* Do not exit powersave upon decrementer as we've setup the timer
  87         * offload.
  88         */
  89        new_lpcr &= ~LPCR_PECE1;
  90
  91        mtspr(SPRN_LPCR, new_lpcr);
  92        power7_sleep();
  93
  94        mtspr(SPRN_LPCR, old_lpcr);
  95
  96        return index;
  97}
  98#endif
  99
 100static int stop_loop(struct cpuidle_device *dev,
 101                     struct cpuidle_driver *drv,
 102                     int index)
 103{
 104        ppc64_runlatch_off();
 105        power9_idle_stop(stop_psscr_table[index]);
 106        ppc64_runlatch_on();
 107        return index;
 108}
 109
 110/*
 111 * States for dedicated partition case.
 112 */
 113static struct cpuidle_state powernv_states[CPUIDLE_STATE_MAX] = {
 114        { /* Snooze */
 115                .name = "snooze",
 116                .desc = "snooze",
 117                .exit_latency = 0,
 118                .target_residency = 0,
 119                .enter = snooze_loop },
 120};
 121
 122static int powernv_cpuidle_add_cpu_notifier(struct notifier_block *n,
 123                        unsigned long action, void *hcpu)
 124{
 125        int hotcpu = (unsigned long)hcpu;
 126        struct cpuidle_device *dev =
 127                                per_cpu(cpuidle_devices, hotcpu);
 128
 129        if (dev && cpuidle_get_driver()) {
 130                switch (action) {
 131                case CPU_ONLINE:
 132                case CPU_ONLINE_FROZEN:
 133                        cpuidle_pause_and_lock();
 134                        cpuidle_enable_device(dev);
 135                        cpuidle_resume_and_unlock();
 136                        break;
 137
 138                case CPU_DEAD:
 139                case CPU_DEAD_FROZEN:
 140                        cpuidle_pause_and_lock();
 141                        cpuidle_disable_device(dev);
 142                        cpuidle_resume_and_unlock();
 143                        break;
 144
 145                default:
 146                        return NOTIFY_DONE;
 147                }
 148        }
 149        return NOTIFY_OK;
 150}
 151
 152static struct notifier_block setup_hotplug_notifier = {
 153        .notifier_call = powernv_cpuidle_add_cpu_notifier,
 154};
 155
 156/*
 157 * powernv_cpuidle_driver_init()
 158 */
 159static int powernv_cpuidle_driver_init(void)
 160{
 161        int idle_state;
 162        struct cpuidle_driver *drv = &powernv_idle_driver;
 163
 164        drv->state_count = 0;
 165
 166        for (idle_state = 0; idle_state < max_idle_state; ++idle_state) {
 167                /* Is the state not enabled? */
 168                if (cpuidle_state_table[idle_state].enter == NULL)
 169                        continue;
 170
 171                drv->states[drv->state_count] = /* structure copy */
 172                        cpuidle_state_table[idle_state];
 173
 174                drv->state_count += 1;
 175        }
 176
 177        return 0;
 178}
 179
 180static int powernv_add_idle_states(void)
 181{
 182        struct device_node *power_mgt;
 183        int nr_idle_states = 1; /* Snooze */
 184        int dt_idle_states;
 185        u32 latency_ns[CPUIDLE_STATE_MAX];
 186        u32 residency_ns[CPUIDLE_STATE_MAX];
 187        u32 flags[CPUIDLE_STATE_MAX];
 188        u64 psscr_val[CPUIDLE_STATE_MAX];
 189        const char *names[CPUIDLE_STATE_MAX];
 190        int i, rc;
 191
 192        /* Currently we have snooze statically defined */
 193
 194        power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
 195        if (!power_mgt) {
 196                pr_warn("opal: PowerMgmt Node not found\n");
 197                goto out;
 198        }
 199
 200        /* Read values of any property to determine the num of idle states */
 201        dt_idle_states = of_property_count_u32_elems(power_mgt, "ibm,cpu-idle-state-flags");
 202        if (dt_idle_states < 0) {
 203                pr_warn("cpuidle-powernv: no idle states found in the DT\n");
 204                goto out;
 205        }
 206
 207        /*
 208         * Since snooze is used as first idle state, max idle states allowed is
 209         * CPUIDLE_STATE_MAX -1
 210         */
 211        if (dt_idle_states > CPUIDLE_STATE_MAX - 1) {
 212                pr_warn("cpuidle-powernv: discovered idle states more than allowed");
 213                dt_idle_states = CPUIDLE_STATE_MAX - 1;
 214        }
 215
 216        if (of_property_read_u32_array(power_mgt,
 217                        "ibm,cpu-idle-state-flags", flags, dt_idle_states)) {
 218                pr_warn("cpuidle-powernv : missing ibm,cpu-idle-state-flags in DT\n");
 219                goto out;
 220        }
 221
 222        if (of_property_read_u32_array(power_mgt,
 223                "ibm,cpu-idle-state-latencies-ns", latency_ns,
 224                dt_idle_states)) {
 225                pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n");
 226                goto out;
 227        }
 228        if (of_property_read_string_array(power_mgt,
 229                "ibm,cpu-idle-state-names", names, dt_idle_states) < 0) {
 230                pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n");
 231                goto out;
 232        }
 233
 234        /*
 235         * If the idle states use stop instruction, probe for psscr values
 236         * which are necessary to specify required stop level.
 237         */
 238        if (flags[0] & (OPAL_PM_STOP_INST_FAST | OPAL_PM_STOP_INST_DEEP))
 239                if (of_property_read_u64_array(power_mgt,
 240                    "ibm,cpu-idle-state-psscr", psscr_val, dt_idle_states)) {
 241                        pr_warn("cpuidle-powernv: missing ibm,cpu-idle-states-psscr in DT\n");
 242                        goto out;
 243                }
 244
 245        rc = of_property_read_u32_array(power_mgt,
 246                "ibm,cpu-idle-state-residency-ns", residency_ns, dt_idle_states);
 247
 248        for (i = 0; i < dt_idle_states; i++) {
 249                /*
 250                 * If an idle state has exit latency beyond
 251                 * POWERNV_THRESHOLD_LATENCY_NS then don't use it
 252                 * in cpu-idle.
 253                 */
 254                if (latency_ns[i] > POWERNV_THRESHOLD_LATENCY_NS)
 255                        continue;
 256
 257                /*
 258                 * Cpuidle accepts exit_latency and target_residency in us.
 259                 * Use default target_residency values if f/w does not expose it.
 260                 */
 261                if (flags[i] & OPAL_PM_NAP_ENABLED) {
 262                        /* Add NAP state */
 263                        strcpy(powernv_states[nr_idle_states].name, "Nap");
 264                        strcpy(powernv_states[nr_idle_states].desc, "Nap");
 265                        powernv_states[nr_idle_states].flags = 0;
 266                        powernv_states[nr_idle_states].target_residency = 100;
 267                        powernv_states[nr_idle_states].enter = nap_loop;
 268                } else if ((flags[i] & OPAL_PM_STOP_INST_FAST) &&
 269                                !(flags[i] & OPAL_PM_TIMEBASE_STOP)) {
 270                        strncpy(powernv_states[nr_idle_states].name,
 271                                names[i], CPUIDLE_NAME_LEN);
 272                        strncpy(powernv_states[nr_idle_states].desc,
 273                                names[i], CPUIDLE_NAME_LEN);
 274                        powernv_states[nr_idle_states].flags = 0;
 275
 276                        powernv_states[nr_idle_states].enter = stop_loop;
 277                        stop_psscr_table[nr_idle_states] = psscr_val[i];
 278                }
 279
 280                /*
 281                 * All cpuidle states with CPUIDLE_FLAG_TIMER_STOP set must come
 282                 * within this config dependency check.
 283                 */
 284#ifdef CONFIG_TICK_ONESHOT
 285                if (flags[i] & OPAL_PM_SLEEP_ENABLED ||
 286                        flags[i] & OPAL_PM_SLEEP_ENABLED_ER1) {
 287                        /* Add FASTSLEEP state */
 288                        strcpy(powernv_states[nr_idle_states].name, "FastSleep");
 289                        strcpy(powernv_states[nr_idle_states].desc, "FastSleep");
 290                        powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIMER_STOP;
 291                        powernv_states[nr_idle_states].target_residency = 300000;
 292                        powernv_states[nr_idle_states].enter = fastsleep_loop;
 293                } else if ((flags[i] & OPAL_PM_STOP_INST_DEEP) &&
 294                                (flags[i] & OPAL_PM_TIMEBASE_STOP)) {
 295                        strncpy(powernv_states[nr_idle_states].name,
 296                                names[i], CPUIDLE_NAME_LEN);
 297                        strncpy(powernv_states[nr_idle_states].desc,
 298                                names[i], CPUIDLE_NAME_LEN);
 299
 300                        powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIMER_STOP;
 301                        powernv_states[nr_idle_states].enter = stop_loop;
 302                        stop_psscr_table[nr_idle_states] = psscr_val[i];
 303                }
 304#endif
 305                powernv_states[nr_idle_states].exit_latency =
 306                                ((unsigned int)latency_ns[i]) / 1000;
 307
 308                if (!rc) {
 309                        powernv_states[nr_idle_states].target_residency =
 310                                ((unsigned int)residency_ns[i]) / 1000;
 311                }
 312
 313                nr_idle_states++;
 314        }
 315out:
 316        return nr_idle_states;
 317}
 318
 319/*
 320 * powernv_idle_probe()
 321 * Choose state table for shared versus dedicated partition
 322 */
 323static int powernv_idle_probe(void)
 324{
 325        if (cpuidle_disable != IDLE_NO_OVERRIDE)
 326                return -ENODEV;
 327
 328        if (firmware_has_feature(FW_FEATURE_OPAL)) {
 329                cpuidle_state_table = powernv_states;
 330                /* Device tree can indicate more idle states */
 331                max_idle_state = powernv_add_idle_states();
 332                if (max_idle_state > 1) {
 333                        snooze_timeout_en = true;
 334                        snooze_timeout = powernv_states[1].target_residency *
 335                                         tb_ticks_per_usec;
 336                }
 337        } else
 338                return -ENODEV;
 339
 340        return 0;
 341}
 342
 343static int __init powernv_processor_idle_init(void)
 344{
 345        int retval;
 346
 347        retval = powernv_idle_probe();
 348        if (retval)
 349                return retval;
 350
 351        powernv_cpuidle_driver_init();
 352        retval = cpuidle_register(&powernv_idle_driver, NULL);
 353        if (retval) {
 354                printk(KERN_DEBUG "Registration of powernv driver failed.\n");
 355                return retval;
 356        }
 357
 358        register_cpu_notifier(&setup_hotplug_notifier);
 359        printk(KERN_DEBUG "powernv_idle_driver registered\n");
 360        return 0;
 361}
 362
 363device_initcall(powernv_processor_idle_init);
 364